/ * @ l i c e n s e
Papa Parse
v5 . 0.1
https : //github.com/mholt/PapaParse
License : MIT
* /
( function ( root , factory )
{
/* globals define */
if ( typeof define === 'function' && define . amd )
{
// AMD. Register as an anonymous module.
define ( [ ] , factory ) ;
}
else if ( typeof module === 'object' && typeof exports !== 'undefined' )
{
// Node. Does not work with strict CommonJS, but
// only CommonJS-like environments that support module.exports,
// like Node.
module . exports = factory ( ) ;
}
else
{
// Browser globals (root is window)
root . Papa = factory ( ) ;
}
// in strict mode we cannot access arguments.callee, so we need a named reference to
// stringify the factory method for the blob worker
// eslint-disable-next-line func-name
} ( this , function moduleFactory ( )
{
'use strict' ;
var global = ( function ( ) {
// alternative method, similar to `Function('return this')()`
// but without using `eval` (which is disabled when
// using Content Security Policy).
if ( typeof self !== 'undefined' ) { return self ; }
if ( typeof window !== 'undefined' ) { return window ; }
if ( typeof global !== 'undefined' ) { return global ; }
// When running tests none of the above have been defined
return { } ;
} ) ( ) ;
function getWorkerBlob ( ) {
var URL = global . URL || global . webkitURL || null ;
var code = moduleFactory . toString ( ) ;
return Papa . BLOB _URL || ( Papa . BLOB _URL = URL . createObjectURL ( new Blob ( [ '(' , code , ')();' ] , { type : 'text/javascript' } ) ) ) ;
}
var IS _WORKER = ! global . document && ! ! global . postMessage ,
IS _PAPA _WORKER = IS _WORKER && /blob:/i . test ( ( global . location || { } ) . protocol ) ;
var workers = { } , workerIdCounter = 0 ;
var Papa = { } ;
Papa . parse = CsvToJson ;
Papa . unparse = JsonToCsv ;
Papa . RECORD _SEP = String . fromCharCode ( 30 ) ;
Papa . UNIT _SEP = String . fromCharCode ( 31 ) ;
Papa . BYTE _ORDER _MARK = '\ufeff' ;
Papa . BAD _DELIMITERS = [ '\r' , '\n' , '"' , Papa . BYTE _ORDER _MARK ] ;
Papa . WORKERS _SUPPORTED = ! IS _WORKER && ! ! global . Worker ;
Papa . NODE _STREAM _INPUT = 1 ;
// Configurable chunk sizes for local and remote files, respectively
Papa . LocalChunkSize = 1024 * 1024 * 10 ; // 10 MB
Papa . RemoteChunkSize = 1024 * 1024 * 5 ; // 5 MB
Papa . DefaultDelimiter = ',' ; // Used if not specified and detection fails
// Exposed for testing and development only
Papa . Parser = Parser ;
Papa . ParserHandle = ParserHandle ;
Papa . NetworkStreamer = NetworkStreamer ;
Papa . FileStreamer = FileStreamer ;
Papa . StringStreamer = StringStreamer ;
Papa . ReadableStreamStreamer = ReadableStreamStreamer ;
if ( typeof PAPA _BROWSER _CONTEXT === 'undefined' ) {
Papa . DuplexStreamStreamer = DuplexStreamStreamer ;
}
if ( global . jQuery )
{
var $ = global . jQuery ;
$ . fn . parse = function ( options )
{
var config = options . config || { } ;
var queue = [ ] ;
this . each ( function ( idx )
{
var supported = $ ( this ) . prop ( 'tagName' ) . toUpperCase ( ) === 'INPUT'
&& $ ( this ) . attr ( 'type' ) . toLowerCase ( ) === 'file'
&& global . FileReader ;
if ( ! supported || ! this . files || this . files . length === 0 )
return true ; // continue to next input element
for ( var i = 0 ; i < this . files . length ; i ++ )
{
queue . push ( {
file : this . files [ i ] ,
inputElem : this ,
instanceConfig : $ . extend ( { } , config )
} ) ;
}
} ) ;
parseNextFile ( ) ; // begin parsing
return this ; // maintains chainability
function parseNextFile ( )
{
if ( queue . length === 0 )
{
if ( isFunction ( options . complete ) )
options . complete ( ) ;
return ;
}
var f = queue [ 0 ] ;
if ( isFunction ( options . before ) )
{
var returned = options . before ( f . file , f . inputElem ) ;
if ( typeof returned === 'object' )
{
if ( returned . action === 'abort' )
{
error ( 'AbortError' , f . file , f . inputElem , returned . reason ) ;
return ; // Aborts all queued files immediately
}
else if ( returned . action === 'skip' )
{
fileComplete ( ) ; // parse the next file in the queue, if any
return ;
}
else if ( typeof returned . config === 'object' )
f . instanceConfig = $ . extend ( f . instanceConfig , returned . config ) ;
}
else if ( returned === 'skip' )
{
fileComplete ( ) ; // parse the next file in the queue, if any
return ;
}
}
// Wrap up the user's complete callback, if any, so that ours also gets executed
var userCompleteFunc = f . instanceConfig . complete ;
f . instanceConfig . complete = function ( results )
{
if ( isFunction ( userCompleteFunc ) )
userCompleteFunc ( results , f . file , f . inputElem ) ;
fileComplete ( ) ;
} ;
Papa . parse ( f . file , f . instanceConfig ) ;
}
function error ( name , file , elem , reason )
{
if ( isFunction ( options . error ) )
options . error ( { name : name } , file , elem , reason ) ;
}
function fileComplete ( )
{
queue . splice ( 0 , 1 ) ;
parseNextFile ( ) ;
}
} ;
}
if ( IS _PAPA _WORKER )
{
global . onmessage = workerThreadReceivedMessage ;
}
function CsvToJson ( _input , _config )
{
_config = _config || { } ;
var dynamicTyping = _config . dynamicTyping || false ;
if ( isFunction ( dynamicTyping ) ) {
_config . dynamicTypingFunction = dynamicTyping ;
// Will be filled on first row call
dynamicTyping = { } ;
}
_config . dynamicTyping = dynamicTyping ;
_config . transform = isFunction ( _config . transform ) ? _config . transform : false ;
if ( _config . worker && Papa . WORKERS _SUPPORTED )
{
var w = newWorker ( ) ;
w . userStep = _config . step ;
w . userChunk = _config . chunk ;
w . userComplete = _config . complete ;
w . userError = _config . error ;
_config . step = isFunction ( _config . step ) ;
_config . chunk = isFunction ( _config . chunk ) ;
_config . complete = isFunction ( _config . complete ) ;
_config . error = isFunction ( _config . error ) ;
delete _config . worker ; // prevent infinite loop
w . postMessage ( {
input : _input ,
config : _config ,
workerId : w . id
} ) ;
return ;
}
var streamer = null ;
if ( _input === Papa . NODE _STREAM _INPUT && typeof PAPA _BROWSER _CONTEXT === 'undefined' )
{
// create a node Duplex stream for use
// with .pipe
streamer = new DuplexStreamStreamer ( _config ) ;
return streamer . getStream ( ) ;
}
else if ( typeof _input === 'string' )
{
if ( _config . download )
streamer = new NetworkStreamer ( _config ) ;
else
streamer = new StringStreamer ( _config ) ;
}
else if ( _input . readable === true && isFunction ( _input . read ) && isFunction ( _input . on ) )
{
streamer = new ReadableStreamStreamer ( _config ) ;
}
else if ( ( global . File && _input instanceof File ) || _input instanceof Object ) // ...Safari. (see issue #106)
streamer = new FileStreamer ( _config ) ;
return streamer . stream ( _input ) ;
}
function JsonToCsv ( _input , _config )
{
// Default configuration
/** whether to surround every datum with quotes */
var _quotes = false ;
/** whether to write headers */
var _writeHeader = true ;
/** delimiting character(s) */
var _delimiter = ',' ;
/** newline character(s) */
var _newline = '\r\n' ;
/** quote character */
var _quoteChar = '"' ;
/** escaped quote character, either "" or <config.escapeChar>" */
var _escapedQuote = _quoteChar + _quoteChar ;
/** whether to skip empty lines */
var _skipEmptyLines = false ;
/** the columns (keys) we expect when we unparse objects */
var _columns = null ;
unpackConfig ( ) ;
var quoteCharRegex = new RegExp ( escapeRegExp ( _quoteChar ) , 'g' ) ;
if ( typeof _input === 'string' )
_input = JSON . parse ( _input ) ;
if ( Array . isArray ( _input ) )
{
if ( ! _input . length || Array . isArray ( _input [ 0 ] ) )
return serialize ( null , _input , _skipEmptyLines ) ;
else if ( typeof _input [ 0 ] === 'object' )
return serialize ( _columns || objectKeys ( _input [ 0 ] ) , _input , _skipEmptyLines ) ;
}
else if ( typeof _input === 'object' )
{
if ( typeof _input . data === 'string' )
_input . data = JSON . parse ( _input . data ) ;
if ( Array . isArray ( _input . data ) )
{
if ( ! _input . fields )
_input . fields = _input . meta && _input . meta . fields ;
if ( ! _input . fields )
_input . fields = Array . isArray ( _input . data [ 0 ] )
? _input . fields
: objectKeys ( _input . data [ 0 ] ) ;
if ( ! ( Array . isArray ( _input . data [ 0 ] ) ) && typeof _input . data [ 0 ] !== 'object' )
_input . data = [ _input . data ] ; // handles input like [1,2,3] or ['asdf']
}
return serialize ( _input . fields || [ ] , _input . data || [ ] , _skipEmptyLines ) ;
}
// Default (any valid paths should return before this)
throw new Error ( 'Unable to serialize unrecognized input' ) ;
function unpackConfig ( )
{
if ( typeof _config !== 'object' )
return ;
if ( typeof _config . delimiter === 'string'
&& ! Papa . BAD _DELIMITERS . filter ( function ( value ) { return _config . delimiter . indexOf ( value ) !== - 1 ; } ) . length )
{
_delimiter = _config . delimiter ;
}
if ( typeof _config . quotes === 'boolean'
|| Array . isArray ( _config . quotes ) )
_quotes = _config . quotes ;
if ( typeof _config . skipEmptyLines === 'boolean'
|| typeof _config . skipEmptyLines === 'string' )
_skipEmptyLines = _config . skipEmptyLines ;
if ( typeof _config . newline === 'string' )
_newline = _config . newline ;
if ( typeof _config . quoteChar === 'string' )
_quoteChar = _config . quoteChar ;
if ( typeof _config . header === 'boolean' )
_writeHeader = _config . header ;
if ( Array . isArray ( _config . columns ) ) {
if ( _config . columns . length === 0 ) throw new Error ( 'Option columns is empty' ) ;
_columns = _config . columns ;
}
if ( _config . escapeChar !== undefined ) {
_escapedQuote = _config . escapeChar + _quoteChar ;
}
}
/** Turns an object's keys into an array */
function objectKeys ( obj )
{
if ( typeof obj !== 'object' )
return [ ] ;
var keys = [ ] ;
for ( var key in obj )
keys . push ( key ) ;
return keys ;
}
/** The double for loop that iterates the data and writes out a CSV string including header row */
function serialize ( fields , data , skipEmptyLines )
{
var csv = '' ;
if ( typeof fields === 'string' )
fields = JSON . parse ( fields ) ;
if ( typeof data === 'string' )
data = JSON . parse ( data ) ;
var hasHeader = Array . isArray ( fields ) && fields . length > 0 ;
var dataKeyedByField = ! ( Array . isArray ( data [ 0 ] ) ) ;
// If there a header row, write it first
if ( hasHeader && _writeHeader )
{
for ( var i = 0 ; i < fields . length ; i ++ )
{
if ( i > 0 )
csv += _delimiter ;
csv += safe ( fields [ i ] , i ) ;
}
if ( data . length > 0 )
csv += _newline ;
}
// Then write out the data
for ( var row = 0 ; row < data . length ; row ++ )
{
var maxCol = hasHeader ? fields . length : data [ row ] . length ;
var emptyLine = false ;
var nullLine = hasHeader ? Object . keys ( data [ row ] ) . length === 0 : data [ row ] . length === 0 ;
if ( skipEmptyLines && ! hasHeader )
{
emptyLine = skipEmptyLines === 'greedy' ? data [ row ] . join ( '' ) . trim ( ) === '' : data [ row ] . length === 1 && data [ row ] [ 0 ] . length === 0 ;
}
if ( skipEmptyLines === 'greedy' && hasHeader ) {
var line = [ ] ;
for ( var c = 0 ; c < maxCol ; c ++ ) {
var cx = dataKeyedByField ? fields [ c ] : c ;
line . push ( data [ row ] [ cx ] ) ;
}
emptyLine = line . join ( '' ) . trim ( ) === '' ;
}
if ( ! emptyLine )
{
for ( var col = 0 ; col < maxCol ; col ++ )
{
if ( col > 0 && ! nullLine )
csv += _delimiter ;
var colIdx = hasHeader && dataKeyedByField ? fields [ col ] : col ;
csv += safe ( data [ row ] [ colIdx ] , col ) ;
}
if ( row < data . length - 1 && ( ! skipEmptyLines || ( maxCol > 0 && ! nullLine ) ) )
{
csv += _newline ;
}
}
}
return csv ;
}
/** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
function safe ( str , col )
{
if ( typeof str === 'undefined' || str === null )
return '' ;
if ( str . constructor === Date )
return JSON . stringify ( str ) . slice ( 1 , 25 ) ;
str = str . toString ( ) . replace ( quoteCharRegex , _escapedQuote ) ;
var needsQuotes = ( typeof _quotes === 'boolean' && _quotes )
|| ( Array . isArray ( _quotes ) && _quotes [ col ] )
|| hasAny ( str , Papa . BAD _DELIMITERS )
|| str . indexOf ( _delimiter ) > - 1
|| str . charAt ( 0 ) === ' '
|| str . charAt ( str . length - 1 ) === ' ' ;
return needsQuotes ? _quoteChar + str + _quoteChar : str ;
}
function hasAny ( str , substrings )
{
for ( var i = 0 ; i < substrings . length ; i ++ )
if ( str . indexOf ( substrings [ i ] ) > - 1 )
return true ;
return false ;
}
}
/** ChunkStreamer is the base prototype for various streamer implementations. */
function ChunkStreamer ( config )
{
this . _handle = null ;
this . _finished = false ;
this . _completed = false ;
this . _halted = false ;
this . _input = null ;
this . _baseIndex = 0 ;
this . _partialLine = '' ;
this . _rowCount = 0 ;
this . _start = 0 ;
this . _nextChunk = null ;
this . isFirstChunk = true ;
this . _completeResults = {
data : [ ] ,
errors : [ ] ,
meta : { }
} ;
replaceConfig . call ( this , config ) ;
this . parseChunk = function ( chunk , isFakeChunk )
{
// First chunk pre-processing
if ( this . isFirstChunk && isFunction ( this . _config . beforeFirstChunk ) )
{
var modifiedChunk = this . _config . beforeFirstChunk ( chunk ) ;
if ( modifiedChunk !== undefined )
chunk = modifiedChunk ;
}
this . isFirstChunk = false ;
this . _halted = false ;
// Rejoin the line we likely just split in two by chunking the file
var aggregate = this . _partialLine + chunk ;
this . _partialLine = '' ;
var results = this . _handle . parse ( aggregate , this . _baseIndex , ! this . _finished ) ;
if ( this . _handle . paused ( ) || this . _handle . aborted ( ) ) {
this . _halted = true ;
return ;
}
var lastIndex = results . meta . cursor ;
if ( ! this . _finished )
{
this . _partialLine = aggregate . substring ( lastIndex - this . _baseIndex ) ;
this . _baseIndex = lastIndex ;
}
if ( results && results . data )
this . _rowCount += results . data . length ;
var finishedIncludingPreview = this . _finished || ( this . _config . preview && this . _rowCount >= this . _config . preview ) ;
if ( IS _PAPA _WORKER )
{
global . postMessage ( {
results : results ,
workerId : Papa . WORKER _ID ,
finished : finishedIncludingPreview
} ) ;
}
else if ( isFunction ( this . _config . chunk ) && ! isFakeChunk )
{
this . _config . chunk ( results , this . _handle ) ;
if ( this . _handle . paused ( ) || this . _handle . aborted ( ) ) {
this . _halted = true ;
return ;
}
results = undefined ;
this . _completeResults = undefined ;
}
if ( ! this . _config . step && ! this . _config . chunk ) {
this . _completeResults . data = this . _completeResults . data . concat ( results . data ) ;
this . _completeResults . errors = this . _completeResults . errors . concat ( results . errors ) ;
this . _completeResults . meta = results . meta ;
}
if ( ! this . _completed && finishedIncludingPreview && isFunction ( this . _config . complete ) && ( ! results || ! results . meta . aborted ) ) {
this . _config . complete ( this . _completeResults , this . _input ) ;
this . _completed = true ;
}
if ( ! finishedIncludingPreview && ( ! results || ! results . meta . paused ) )
this . _nextChunk ( ) ;
return results ;
} ;
this . _sendError = function ( error )
{
if ( isFunction ( this . _config . error ) )
this . _config . error ( error ) ;
else if ( IS _PAPA _WORKER && this . _config . error )
{
global . postMessage ( {
workerId : Papa . WORKER _ID ,
error : error ,
finished : false
} ) ;
}
} ;
function replaceConfig ( config )
{
// Deep-copy the config so we can edit it
var configCopy = copy ( config ) ;
configCopy . chunkSize = parseInt ( configCopy . chunkSize ) ; // parseInt VERY important so we don't concatenate strings!
if ( ! config . step && ! config . chunk )
configCopy . chunkSize = null ; // disable Range header if not streaming; bad values break IIS - see issue #196
this . _handle = new ParserHandle ( configCopy ) ;
this . _handle . streamer = this ;
this . _config = configCopy ; // persist the copy to the caller
}
}
function NetworkStreamer ( config )
{
config = config || { } ;
if ( ! config . chunkSize )
config . chunkSize = Papa . RemoteChunkSize ;
ChunkStreamer . call ( this , config ) ;
var xhr ;
if ( IS _WORKER )
{
this . _nextChunk = function ( )
{
this . _readChunk ( ) ;
this . _chunkLoaded ( ) ;
} ;
}
else
{
this . _nextChunk = function ( )
{
this . _readChunk ( ) ;
} ;
}
this . stream = function ( url )
{
this . _input = url ;
this . _nextChunk ( ) ; // Starts streaming
} ;
this . _readChunk = function ( )
{
if ( this . _finished )
{
this . _chunkLoaded ( ) ;
return ;
}
xhr = new XMLHttpRequest ( ) ;
if ( this . _config . withCredentials )
{
xhr . withCredentials = this . _config . withCredentials ;
}
if ( ! IS _WORKER )
{
xhr . onload = bindFunction ( this . _chunkLoaded , this ) ;
xhr . onerror = bindFunction ( this . _chunkError , this ) ;
}
xhr . open ( 'GET' , this . _input , ! IS _WORKER ) ;
// Headers can only be set when once the request state is OPENED
if ( this . _config . downloadRequestHeaders )
{
var headers = this . _config . downloadRequestHeaders ;
for ( var headerName in headers )
{
xhr . setRequestHeader ( headerName , headers [ headerName ] ) ;
}
}
if ( this . _config . chunkSize )
{
var end = this . _start + this . _config . chunkSize - 1 ; // minus one because byte range is inclusive
xhr . setRequestHeader ( 'Range' , 'bytes=' + this . _start + '-' + end ) ;
}
try {
xhr . send ( ) ;
}
catch ( err ) {
this . _chunkError ( err . message ) ;
}
if ( IS _WORKER && xhr . status === 0 )
this . _chunkError ( ) ;
else
this . _start += this . _config . chunkSize ;
} ;
this . _chunkLoaded = function ( )
{
if ( xhr . readyState !== 4 )
return ;
if ( xhr . status < 200 || xhr . status >= 400 )
{
this . _chunkError ( ) ;
return ;
}
this . _finished = ! this . _config . chunkSize || this . _start > getFileSize ( xhr ) ;
this . parseChunk ( xhr . responseText ) ;
} ;
this . _chunkError = function ( errorMessage )
{
var errorText = xhr . statusText || errorMessage ;
this . _sendError ( new Error ( errorText ) ) ;
} ;
function getFileSize ( xhr )
{
var contentRange = xhr . getResponseHeader ( 'Content-Range' ) ;
if ( contentRange === null ) { // no content range, then finish!
return - 1 ;
}
return parseInt ( contentRange . substr ( contentRange . lastIndexOf ( '/' ) + 1 ) ) ;
}
}
NetworkStreamer . prototype = Object . create ( ChunkStreamer . prototype ) ;
NetworkStreamer . prototype . constructor = NetworkStreamer ;
function FileStreamer ( config )
{
config = config || { } ;
if ( ! config . chunkSize )
config . chunkSize = Papa . LocalChunkSize ;
ChunkStreamer . call ( this , config ) ;
var reader , slice ;
// FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862
// But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76
var usingAsyncReader = typeof FileReader !== 'undefined' ; // Safari doesn't consider it a function - see issue #105
this . stream = function ( file )
{
this . _input = file ;
slice = file . slice || file . webkitSlice || file . mozSlice ;
if ( usingAsyncReader )
{
reader = new FileReader ( ) ; // Preferred method of reading files, even in workers
reader . onload = bindFunction ( this . _chunkLoaded , this ) ;
reader . onerror = bindFunction ( this . _chunkError , this ) ;
}
else
reader = new FileReaderSync ( ) ; // Hack for running in a web worker in Firefox
this . _nextChunk ( ) ; // Starts streaming
} ;
this . _nextChunk = function ( )
{
if ( ! this . _finished && ( ! this . _config . preview || this . _rowCount < this . _config . preview ) )
this . _readChunk ( ) ;
} ;
this . _readChunk = function ( )
{
var input = this . _input ;
if ( this . _config . chunkSize )
{
var end = Math . min ( this . _start + this . _config . chunkSize , this . _input . size ) ;
input = slice . call ( input , this . _start , end ) ;
}
var txt = reader . readAsText ( input , this . _config . encoding ) ;
if ( ! usingAsyncReader )
this . _chunkLoaded ( { target : { result : txt } } ) ; // mimic the async signature
} ;
this . _chunkLoaded = function ( event )
{
// Very important to increment start each time before handling results
this . _start += this . _config . chunkSize ;
this . _finished = ! this . _config . chunkSize || this . _start >= this . _input . size ;
this . parseChunk ( event . target . result ) ;
} ;
this . _chunkError = function ( )
{
this . _sendError ( reader . error ) ;
} ;
}
FileStreamer . prototype = Object . create ( ChunkStreamer . prototype ) ;
FileStreamer . prototype . constructor = FileStreamer ;
function StringStreamer ( config )
{
config = config || { } ;
ChunkStreamer . call ( this , config ) ;
var remaining ;
this . stream = function ( s )
{
remaining = s ;
return this . _nextChunk ( ) ;
} ;
this . _nextChunk = function ( )
{
if ( this . _finished ) return ;
var size = this . _config . chunkSize ;
var chunk = size ? remaining . substr ( 0 , size ) : remaining ;
remaining = size ? remaining . substr ( size ) : '' ;
this . _finished = ! remaining ;
return this . parseChunk ( chunk ) ;
} ;
}
StringStreamer . prototype = Object . create ( StringStreamer . prototype ) ;
StringStreamer . prototype . constructor = StringStreamer ;
function ReadableStreamStreamer ( config )
{
config = config || { } ;
ChunkStreamer . call ( this , config ) ;
var queue = [ ] ;
var parseOnData = true ;
var streamHasEnded = false ;
this . pause = function ( )
{
ChunkStreamer . prototype . pause . apply ( this , arguments ) ;
this . _input . pause ( ) ;
} ;
this . resume = function ( )
{
ChunkStreamer . prototype . resume . apply ( this , arguments ) ;
this . _input . resume ( ) ;
} ;
this . stream = function ( stream )
{
this . _input = stream ;
this . _input . on ( 'data' , this . _streamData ) ;
this . _input . on ( 'end' , this . _streamEnd ) ;
this . _input . on ( 'error' , this . _streamError ) ;
} ;
this . _checkIsFinished = function ( )
{
if ( streamHasEnded && queue . length === 1 ) {
this . _finished = true ;
}
} ;
this . _nextChunk = function ( )
{
this . _checkIsFinished ( ) ;
if ( queue . length )
{
this . parseChunk ( queue . shift ( ) ) ;
}
else
{
parseOnData = true ;
}
} ;
this . _streamData = bindFunction ( function ( chunk )
{
try
{
queue . push ( typeof chunk === 'string' ? chunk : chunk . toString ( this . _config . encoding ) ) ;
if ( parseOnData )
{
parseOnData = false ;
this . _checkIsFinished ( ) ;
this . parseChunk ( queue . shift ( ) ) ;
}
}
catch ( error )
{
this . _streamError ( error ) ;
}
} , this ) ;
this . _streamError = bindFunction ( function ( error )
{
this . _streamCleanUp ( ) ;
this . _sendError ( error ) ;
} , this ) ;
this . _streamEnd = bindFunction ( function ( )
{
this . _streamCleanUp ( ) ;
streamHasEnded = true ;
this . _streamData ( '' ) ;
} , this ) ;
this . _streamCleanUp = bindFunction ( function ( )
{
this . _input . removeListener ( 'data' , this . _streamData ) ;
this . _input . removeListener ( 'end' , this . _streamEnd ) ;
this . _input . removeListener ( 'error' , this . _streamError ) ;
} , this ) ;
}
ReadableStreamStreamer . prototype = Object . create ( ChunkStreamer . prototype ) ;
ReadableStreamStreamer . prototype . constructor = ReadableStreamStreamer ;
function DuplexStreamStreamer ( _config ) {
var Duplex = require ( 'stream' ) . Duplex ;
var config = copy ( _config ) ;
var parseOnWrite = true ;
var writeStreamHasFinished = false ;
var parseCallbackQueue = [ ] ;
var stream = null ;
this . _onCsvData = function ( results )
{
var data = results . data ;
if ( ! stream . push ( data ) && ! this . _handle . paused ( ) ) {
// the writeable consumer buffer has filled up
// so we need to pause until more items
// can be processed
this . _handle . pause ( ) ;
}
} ;
this . _onCsvComplete = function ( )
{
// node will finish the read stream when
// null is pushed
stream . push ( null ) ;
} ;
config . step = bindFunction ( this . _onCsvData , this ) ;
config . complete = bindFunction ( this . _onCsvComplete , this ) ;
ChunkStreamer . call ( this , config ) ;
this . _nextChunk = function ( )
{
if ( writeStreamHasFinished && parseCallbackQueue . length === 1 ) {
this . _finished = true ;
}
if ( parseCallbackQueue . length ) {
parseCallbackQueue . shift ( ) ( ) ;
} else {
parseOnWrite = true ;
}
} ;
this . _addToParseQueue = function ( chunk , callback )
{
// add to queue so that we can indicate
// completion via callback
// node will automatically pause the incoming stream
// when too many items have been added without their
// callback being invoked
parseCallbackQueue . push ( bindFunction ( function ( ) {
this . parseChunk ( typeof chunk === 'string' ? chunk : chunk . toString ( config . encoding ) ) ;
if ( isFunction ( callback ) ) {
return callback ( ) ;
}
} , this ) ) ;
if ( parseOnWrite ) {
parseOnWrite = false ;
this . _nextChunk ( ) ;
}
} ;
this . _onRead = function ( )
{
if ( this . _handle . paused ( ) ) {
// the writeable consumer can handle more data
// so resume the chunk parsing
this . _handle . resume ( ) ;
}
} ;
this . _onWrite = function ( chunk , encoding , callback )
{
this . _addToParseQueue ( chunk , callback ) ;
} ;
this . _onWriteComplete = function ( )
{
writeStreamHasFinished = true ;
// have to write empty string
// so parser knows its done
this . _addToParseQueue ( '' ) ;
} ;
this . getStream = function ( )
{
return stream ;
} ;
stream = new Duplex ( {
readableObjectMode : true ,
decodeStrings : false ,
read : bindFunction ( this . _onRead , this ) ,
write : bindFunction ( this . _onWrite , this )
} ) ;
stream . once ( 'finish' , bindFunction ( this . _onWriteComplete , this ) ) ;
}
if ( typeof PAPA _BROWSER _CONTEXT === 'undefined' ) {
DuplexStreamStreamer . prototype = Object . create ( ChunkStreamer . prototype ) ;
DuplexStreamStreamer . prototype . constructor = DuplexStreamStreamer ;
}
// Use one ParserHandle per entire CSV file or string
function ParserHandle ( _config )
{
// One goal is to minimize the use of regular expressions...
var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i ;
var ISO _DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/ ;
var self = this ;
var _stepCounter = 0 ; // Number of times step was called (number of rows parsed)
var _rowCounter = 0 ; // Number of rows that have been parsed so far
var _input ; // The input being parsed
var _parser ; // The core parser being used
var _paused = false ; // Whether we are paused or not
var _aborted = false ; // Whether the parser has aborted or not
var _delimiterError ; // Temporary state between delimiter detection and processing results
var _fields = [ ] ; // Fields are from the header row of the input, if there is one
var _results = { // The last results returned from the parser
data : [ ] ,
errors : [ ] ,
meta : { }
} ;
if ( isFunction ( _config . step ) )
{
var userStep = _config . step ;
_config . step = function ( results )
{
_results = results ;
if ( needsHeaderRow ( ) )
processResults ( ) ;
else // only call user's step function after header row
{
processResults ( ) ;
// It's possbile that this line was empty and there's no row here after all
if ( _results . data . length === 0 )
return ;
_stepCounter += results . data . length ;
if ( _config . preview && _stepCounter > _config . preview )
_parser . abort ( ) ;
else
userStep ( _results , self ) ;
}
} ;
}
/ * *
* Parses input . Most users won 't need, and shouldn' t mess with , the baseIndex
* and ignoreLastRow parameters . They are used by streamers ( wrapper functions )
* when an input comes in multiple chunks , like from a file .
* /
this . parse = function ( input , baseIndex , ignoreLastRow )
{
var quoteChar = _config . quoteChar || '"' ;
if ( ! _config . newline )
_config . newline = guessLineEndings ( input , quoteChar ) ;
_delimiterError = false ;
if ( ! _config . delimiter )
{
var delimGuess = guessDelimiter ( input , _config . newline , _config . skipEmptyLines , _config . comments , _config . delimitersToGuess ) ;
if ( delimGuess . successful )
_config . delimiter = delimGuess . bestDelimiter ;
else
{
_delimiterError = true ; // add error after parsing (otherwise it would be overwritten)
_config . delimiter = Papa . DefaultDelimiter ;
}
_results . meta . delimiter = _config . delimiter ;
}
else if ( isFunction ( _config . delimiter ) )
{
_config . delimiter = _config . delimiter ( input ) ;
_results . meta . delimiter = _config . delimiter ;
}
var parserConfig = copy ( _config ) ;
if ( _config . preview && _config . header )
parserConfig . preview ++ ; // to compensate for header row
_input = input ;
_parser = new Parser ( parserConfig ) ;
_results = _parser . parse ( _input , baseIndex , ignoreLastRow ) ;
processResults ( ) ;
return _paused ? { meta : { paused : true } } : ( _results || { meta : { paused : false } } ) ;
} ;
this . paused = function ( )
{
return _paused ;
} ;
this . pause = function ( )
{
_paused = true ;
_parser . abort ( ) ;
_input = _input . substr ( _parser . getCharIndex ( ) ) ;
} ;
this . resume = function ( )
{
if ( self . streamer . _halted ) {
_paused = false ;
self . streamer . parseChunk ( _input , true ) ;
} else {
// Bugfix: #636 In case the processing hasn't halted yet
// wait for it to halt in order to resume
setTimeout ( this . resume , 3 ) ;
}
} ;
this . aborted = function ( )
{
return _aborted ;
} ;
this . abort = function ( )
{
_aborted = true ;
_parser . abort ( ) ;
_results . meta . aborted = true ;
if ( isFunction ( _config . complete ) )
_config . complete ( _results ) ;
_input = '' ;
} ;
function testEmptyLine ( s ) {
return _config . skipEmptyLines === 'greedy' ? s . join ( '' ) . trim ( ) === '' : s . length === 1 && s [ 0 ] . length === 0 ;
}
function processResults ( )
{
if ( _results && _delimiterError )
{
addError ( 'Delimiter' , 'UndetectableDelimiter' , 'Unable to auto-detect delimiting character; defaulted to \'' + Papa . DefaultDelimiter + '\'' ) ;
_delimiterError = false ;
}
if ( _config . skipEmptyLines )
{
for ( var i = 0 ; i < _results . data . length ; i ++ )
if ( testEmptyLine ( _results . data [ i ] ) )
_results . data . splice ( i -- , 1 ) ;
}
if ( needsHeaderRow ( ) )
fillHeaderFields ( ) ;
return applyHeaderAndDynamicTypingAndTransformation ( ) ;
}
function needsHeaderRow ( )
{
return _config . header && _fields . length === 0 ;
}
function fillHeaderFields ( )
{
if ( ! _results )
return ;
function addHeder ( header )
{
if ( isFunction ( _config . transformHeader ) )
header = _config . transformHeader ( header ) ;
_fields . push ( header ) ;
}
if ( Array . isArray ( _results . data [ 0 ] ) )
{
for ( var i = 0 ; needsHeaderRow ( ) && i < _results . data . length ; i ++ )
_results . data [ i ] . forEach ( addHeder ) ;
_results . data . splice ( 0 , 1 ) ;
}
// if _results.data[0] is not an array, we are in a step where _results.data is the row.
else
_results . data . forEach ( addHeder ) ;
}
function shouldApplyDynamicTyping ( field ) {
// Cache function values to avoid calling it for each row
if ( _config . dynamicTypingFunction && _config . dynamicTyping [ field ] === undefined ) {
_config . dynamicTyping [ field ] = _config . dynamicTypingFunction ( field ) ;
}
return ( _config . dynamicTyping [ field ] || _config . dynamicTyping ) === true ;
}
function parseDynamic ( field , value )
{
if ( shouldApplyDynamicTyping ( field ) )
{
if ( value === 'true' || value === 'TRUE' )
return true ;
else if ( value === 'false' || value === 'FALSE' )
return false ;
else if ( FLOAT . test ( value ) )
return parseFloat ( value ) ;
else if ( ISO _DATE . test ( value ) )
return new Date ( value ) ;
else
return ( value === '' ? null : value ) ;
}
return value ;
}
function applyHeaderAndDynamicTypingAndTransformation ( )
{
if ( ! _results || ( ! _config . header && ! _config . dynamicTyping && ! _config . transform ) )
return _results ;
function processRow ( rowSource , i )
{
var row = _config . header ? { } : [ ] ;
var j ;
for ( j = 0 ; j < rowSource . length ; j ++ )
{
var field = j ;
var value = rowSource [ j ] ;
if ( _config . header )
field = j >= _fields . length ? '__parsed_extra' : _fields [ j ] ;
if ( _config . transform )
value = _config . transform ( value , field ) ;
value = parseDynamic ( field , value ) ;
if ( field === '__parsed_extra' )
{
row [ field ] = row [ field ] || [ ] ;
row [ field ] . push ( value ) ;
}
else
row [ field ] = value ;
}
if ( _config . header )
{
if ( j > _fields . length )
addError ( 'FieldMismatch' , 'TooManyFields' , 'Too many fields: expected ' + _fields . length + ' fields but parsed ' + j , _rowCounter + i ) ;
else if ( j < _fields . length )
addError ( 'FieldMismatch' , 'TooFewFields' , 'Too few fields: expected ' + _fields . length + ' fields but parsed ' + j , _rowCounter + i ) ;
}
return row ;
}
var incrementBy = 1 ;
if ( ! _results . data [ 0 ] || Array . isArray ( _results . data [ 0 ] ) )
{
_results . data = _results . data . map ( processRow ) ;
incrementBy = _results . data . length ;
}
else
_results . data = processRow ( _results . data , 0 ) ;
if ( _config . header && _results . meta )
_results . meta . fields = _fields ;
_rowCounter += incrementBy ;
return _results ;
}
function guessDelimiter ( input , newline , skipEmptyLines , comments , delimitersToGuess ) {
var bestDelim , bestDelta , fieldCountPrevRow , maxFieldCount ;
delimitersToGuess = delimitersToGuess || [ ',' , '\t' , '|' , ';' , Papa . RECORD _SEP , Papa . UNIT _SEP ] ;
for ( var i = 0 ; i < delimitersToGuess . length ; i ++ ) {
var delim = delimitersToGuess [ i ] ;
var delta = 0 , avgFieldCount = 0 , emptyLinesCount = 0 ;
fieldCountPrevRow = undefined ;
var preview = new Parser ( {
comments : comments ,
delimiter : delim ,
newline : newline ,
preview : 10
} ) . parse ( input ) ;
for ( var j = 0 ; j < preview . data . length ; j ++ ) {
if ( skipEmptyLines && testEmptyLine ( preview . data [ j ] ) ) {
emptyLinesCount ++ ;
continue ;
}
var fieldCount = preview . data [ j ] . length ;
avgFieldCount += fieldCount ;
if ( typeof fieldCountPrevRow === 'undefined' ) {
fieldCountPrevRow = fieldCount ;
continue ;
}
else if ( fieldCount > 0 ) {
delta += Math . abs ( fieldCount - fieldCountPrevRow ) ;
fieldCountPrevRow = fieldCount ;
}
}
if ( preview . data . length > 0 )
avgFieldCount /= ( preview . data . length - emptyLinesCount ) ;
if ( ( typeof bestDelta === 'undefined' || delta <= bestDelta )
&& ( typeof maxFieldCount === 'undefined' || avgFieldCount > maxFieldCount ) && avgFieldCount > 1.99 ) {
bestDelta = delta ;
bestDelim = delim ;
maxFieldCount = avgFieldCount ;
}
}
_config . delimiter = bestDelim ;
return {
successful : ! ! bestDelim ,
bestDelimiter : bestDelim
} ;
}
function guessLineEndings ( input , quoteChar )
{
input = input . substr ( 0 , 1024 * 1024 ) ; // max length 1 MB
// Replace all the text inside quotes
var re = new RegExp ( escapeRegExp ( quoteChar ) + '([^]*?)' + escapeRegExp ( quoteChar ) , 'gm' ) ;
input = input . replace ( re , '' ) ;
var r = input . split ( '\r' ) ;
var n = input . split ( '\n' ) ;
var nAppearsFirst = ( n . length > 1 && n [ 0 ] . length < r [ 0 ] . length ) ;
if ( r . length === 1 || nAppearsFirst )
return '\n' ;
var numWithN = 0 ;
for ( var i = 0 ; i < r . length ; i ++ )
{
if ( r [ i ] [ 0 ] === '\n' )
numWithN ++ ;
}
return numWithN >= r . length / 2 ? '\r\n' : '\r' ;
}
function addError ( type , code , msg , row )
{
_results . errors . push ( {
type : type ,
code : code ,
message : msg ,
row : row
} ) ;
}
}
/** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
function escapeRegExp ( string )
{
return string . replace ( /[.*+?^${}()|[\]\\]/g , '\\$&' ) ; // $& means the whole matched string
}
/** The core parser implements speedy and correct CSV parsing */
function Parser ( config )
{
// Unpack the config object
config = config || { } ;
var delim = config . delimiter ;
var newline = config . newline ;
var comments = config . comments ;
var step = config . step ;
var preview = config . preview ;
var fastMode = config . fastMode ;
var quoteChar ;
/** Allows for no quoteChar by setting quoteChar to undefined in config */
if ( config . quoteChar === undefined ) {
quoteChar = '"' ;
} else {
quoteChar = config . quoteChar ;
}
var escapeChar = quoteChar ;
if ( config . escapeChar !== undefined ) {
escapeChar = config . escapeChar ;
}
// Delimiter must be valid
if ( typeof delim !== 'string'
|| Papa . BAD _DELIMITERS . indexOf ( delim ) > - 1 )
delim = ',' ;
// Comment character must be valid
if ( comments === delim )
throw new Error ( 'Comment character same as delimiter' ) ;
else if ( comments === true )
comments = '#' ;
else if ( typeof comments !== 'string'
|| Papa . BAD _DELIMITERS . indexOf ( comments ) > - 1 )
comments = false ;
// Newline must be valid: \r, \n, or \r\n
if ( newline !== '\n' && newline !== '\r' && newline !== '\r\n' )
newline = '\n' ;
// We're gonna need these at the Parser scope
var cursor = 0 ;
var aborted = false ;
this . parse = function ( input , baseIndex , ignoreLastRow )
{
// For some reason, in Chrome, this speeds things up (!?)
if ( typeof input !== 'string' )
throw new Error ( 'Input must be a string' ) ;
// We don't need to compute some of these every time parse() is called,
// but having them in a more local scope seems to perform better
var inputLen = input . length ,
delimLen = delim . length ,
newlineLen = newline . length ,
commentsLen = comments . length ;
var stepIsFunction = isFunction ( step ) ;
// Establish starting state
cursor = 0 ;
var data = [ ] , errors = [ ] , row = [ ] , lastCursor = 0 ;
if ( ! input )
return returnable ( ) ;
if ( fastMode || ( fastMode !== false && input . indexOf ( quoteChar ) === - 1 ) )
{
var rows = input . split ( newline ) ;
for ( var i = 0 ; i < rows . length ; i ++ )
{
row = rows [ i ] ;
cursor += row . length ;
if ( i !== rows . length - 1 )
cursor += newline . length ;
else if ( ignoreLastRow )
return returnable ( ) ;
if ( comments && row . substr ( 0 , commentsLen ) === comments )
continue ;
if ( stepIsFunction )
{
data = [ ] ;
pushRow ( row . split ( delim ) ) ;
doStep ( ) ;
if ( aborted )
return returnable ( ) ;
}
else
pushRow ( row . split ( delim ) ) ;
if ( preview && i >= preview )
{
data = data . slice ( 0 , preview ) ;
return returnable ( true ) ;
}
}
return returnable ( ) ;
}
var nextDelim = input . indexOf ( delim , cursor ) ;
var nextNewline = input . indexOf ( newline , cursor ) ;
var quoteCharRegex = new RegExp ( escapeRegExp ( escapeChar ) + escapeRegExp ( quoteChar ) , 'g' ) ;
var quoteSearch = input . indexOf ( quoteChar , cursor ) ;
// Parser loop
for ( ; ; )
{
// Field has opening quote
if ( input [ cursor ] === quoteChar )
{
// Start our search for the closing quote where the cursor is
quoteSearch = cursor ;
// Skip the opening quote
cursor ++ ;
for ( ; ; )
{
// Find closing quote
quoteSearch = input . indexOf ( quoteChar , quoteSearch + 1 ) ;
//No other quotes are found - no other delimiters
if ( quoteSearch === - 1 )
{
if ( ! ignoreLastRow ) {
// No closing quote... what a pity
errors . push ( {
type : 'Quotes' ,
code : 'MissingQuotes' ,
message : 'Quoted field unterminated' ,
row : data . length , // row has yet to be inserted
index : cursor
} ) ;
}
return finish ( ) ;
}
// Closing quote at EOF
if ( quoteSearch === inputLen - 1 )
{
var value = input . substring ( cursor , quoteSearch ) . replace ( quoteCharRegex , quoteChar ) ;
return finish ( value ) ;
}
// If this quote is escaped, it's part of the data; skip it
// If the quote character is the escape character, then check if the next character is the escape character
if ( quoteChar === escapeChar && input [ quoteSearch + 1 ] === escapeChar )
{
quoteSearch ++ ;
continue ;
}
// If the quote character is not the escape character, then check if the previous character was the escape character
if ( quoteChar !== escapeChar && quoteSearch !== 0 && input [ quoteSearch - 1 ] === escapeChar )
{
continue ;
}
// Check up to nextDelim or nextNewline, whichever is closest
var checkUpTo = nextNewline === - 1 ? nextDelim : Math . min ( nextDelim , nextNewline ) ;
var spacesBetweenQuoteAndDelimiter = extraSpaces ( checkUpTo ) ;
// Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
if ( input [ quoteSearch + 1 + spacesBetweenQuoteAndDelimiter ] === delim )
{
row . push ( input . substring ( cursor , quoteSearch ) . replace ( quoteCharRegex , quoteChar ) ) ;
cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen ;
// If char after following delimiter is not quoteChar, we find next quote char position
if ( input [ quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen ] !== quoteChar )
{
quoteSearch = input . indexOf ( quoteChar , cursor ) ;
}
nextDelim = input . indexOf ( delim , cursor ) ;
nextNewline = input . indexOf ( newline , cursor ) ;
break ;
}
var spacesBetweenQuoteAndNewLine = extraSpaces ( nextNewline ) ;
// Closing quote followed by newline or 'unnecessary spaces + newLine'
if ( input . substr ( quoteSearch + 1 + spacesBetweenQuoteAndNewLine , newlineLen ) === newline )
{
row . push ( input . substring ( cursor , quoteSearch ) . replace ( quoteCharRegex , quoteChar ) ) ;
saveRow ( quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen ) ;
nextDelim = input . indexOf ( delim , cursor ) ; // because we may have skipped the nextDelim in the quoted field
quoteSearch = input . indexOf ( quoteChar , cursor ) ; // we search for first quote in next line
if ( stepIsFunction )
{
doStep ( ) ;
if ( aborted )
return returnable ( ) ;
}
if ( preview && data . length >= preview )
return returnable ( true ) ;
break ;
}
// Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
errors . push ( {
type : 'Quotes' ,
code : 'InvalidQuotes' ,
message : 'Trailing quote on quoted field is malformed' ,
row : data . length , // row has yet to be inserted
index : cursor
} ) ;
quoteSearch ++ ;
continue ;
}
continue ;
}
// Comment found at start of new line
if ( comments && row . length === 0 && input . substr ( cursor , commentsLen ) === comments )
{
if ( nextNewline === - 1 ) // Comment ends at EOF
return returnable ( ) ;
cursor = nextNewline + newlineLen ;
nextNewline = input . indexOf ( newline , cursor ) ;
nextDelim = input . indexOf ( delim , cursor ) ;
continue ;
}
// Next delimiter comes before next newline, so we've reached end of field
if ( nextDelim !== - 1 && ( nextDelim < nextNewline || nextNewline === - 1 ) )
{
// we check, if we have quotes, because delimiter char may be part of field enclosed in quotes
if ( quoteSearch !== - 1 ) {
// we have quotes, so we try to find the next delimiter not enclosed in quotes and also next starting quote char
var nextDelimObj = getNextUnqotedDelimiter ( nextDelim , quoteSearch , nextNewline ) ;
// if we have next delimiter char which is not enclosed in quotes
if ( nextDelimObj && nextDelimObj . nextDelim ) {
nextDelim = nextDelimObj . nextDelim ;
quoteSearch = nextDelimObj . quoteSearch ;
row . push ( input . substring ( cursor , nextDelim ) ) ;
cursor = nextDelim + delimLen ;
// we look for next delimiter char
nextDelim = input . indexOf ( delim , cursor ) ;
continue ;
}
} else {
row . push ( input . substring ( cursor , nextDelim ) ) ;
cursor = nextDelim + delimLen ;
nextDelim = input . indexOf ( delim , cursor ) ;
continue ;
}
}
// End of row
if ( nextNewline !== - 1 )
{
row . push ( input . substring ( cursor , nextNewline ) ) ;
saveRow ( nextNewline + newlineLen ) ;
if ( stepIsFunction )
{
doStep ( ) ;
if ( aborted )
return returnable ( ) ;
}
if ( preview && data . length >= preview )
return returnable ( true ) ;
continue ;
}
break ;
}
return finish ( ) ;
function pushRow ( row )
{
data . push ( row ) ;
lastCursor = cursor ;
}
/ * *
* checks if there are extra spaces after closing quote and given index without any text
* if Yes , returns the number of spaces
* /
function extraSpaces ( index ) {
var spaceLength = 0 ;
if ( index !== - 1 ) {
var textBetweenClosingQuoteAndIndex = input . substring ( quoteSearch + 1 , index ) ;
if ( textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex . trim ( ) === '' ) {
spaceLength = textBetweenClosingQuoteAndIndex . length ;
}
}
return spaceLength ;
}
/ * *
* Appends the remaining input from cursor to the end into
* row , saves the row , calls step , and returns the results .
* /
function finish ( value )
{
if ( ignoreLastRow )
return returnable ( ) ;
if ( typeof value === 'undefined' )
value = input . substr ( cursor ) ;
row . push ( value ) ;
cursor = inputLen ; // important in case parsing is paused
pushRow ( row ) ;
if ( stepIsFunction )
doStep ( ) ;
return returnable ( ) ;
}
/ * *
* Appends the current row to the results . It sets the cursor
* to newCursor and finds the nextNewline . The caller should
* take care to execute user ' s step function and check for
* preview and end parsing if necessary .
* /
function saveRow ( newCursor )
{
cursor = newCursor ;
pushRow ( row ) ;
row = [ ] ;
nextNewline = input . indexOf ( newline , cursor ) ;
}
/** Returns an object with the results, errors, and meta. */
function returnable ( stopped , step )
{
var isStep = step || false ;
return {
data : isStep ? data [ 0 ] : data ,
errors : errors ,
meta : {
delimiter : delim ,
linebreak : newline ,
aborted : aborted ,
truncated : ! ! stopped ,
cursor : lastCursor + ( baseIndex || 0 )
}
} ;
}
/** Executes the user's step function and resets data & errors. */
function doStep ( )
{
step ( returnable ( undefined , true ) ) ;
data = [ ] ;
errors = [ ] ;
}
/** Gets the delimiter character, which is not inside the quoted field */
function getNextUnqotedDelimiter ( nextDelim , quoteSearch , newLine ) {
var result = {
nextDelim : undefined ,
quoteSearch : undefined
} ;
// get the next closing quote character
var nextQuoteSearch = input . indexOf ( quoteChar , quoteSearch + 1 ) ;
// if next delimiter is part of a field enclosed in quotes
if ( nextDelim > quoteSearch && nextDelim < nextQuoteSearch && ( nextQuoteSearch < newLine || newLine === - 1 ) ) {
// get the next delimiter character after this one
var nextNextDelim = input . indexOf ( delim , nextQuoteSearch ) ;
// if there is no next delimiter, return default result
if ( nextNextDelim === - 1 ) {
return result ;
}
// find the next opening quote char position
if ( nextNextDelim > nextQuoteSearch ) {
nextQuoteSearch = input . indexOf ( quoteChar , nextQuoteSearch + 1 ) ;
}
// try to get the next delimiter position
result = getNextUnqotedDelimiter ( nextNextDelim , nextQuoteSearch , newLine ) ;
} else {
result = {
nextDelim : nextDelim ,
quoteSearch : quoteSearch
} ;
}
return result ;
}
} ;
/** Sets the abort flag */
this . abort = function ( )
{
aborted = true ;
} ;
/** Gets the cursor position */
this . getCharIndex = function ( )
{
return cursor ;
} ;
}
function newWorker ( )
{
if ( ! Papa . WORKERS _SUPPORTED )
return false ;
var workerUrl = getWorkerBlob ( ) ;
var w = new global . Worker ( workerUrl ) ;
w . onmessage = mainThreadReceivedMessage ;
w . id = workerIdCounter ++ ;
workers [ w . id ] = w ;
return w ;
}
/** Callback when main thread receives a message */
function mainThreadReceivedMessage ( e )
{
var msg = e . data ;
var worker = workers [ msg . workerId ] ;
var aborted = false ;
if ( msg . error )
worker . userError ( msg . error , msg . file ) ;
else if ( msg . results && msg . results . data )
{
var abort = function ( ) {
aborted = true ;
completeWorker ( msg . workerId , { data : [ ] , errors : [ ] , meta : { aborted : true } } ) ;
} ;
var handle = {
abort : abort ,
pause : notImplemented ,
resume : notImplemented
} ;
if ( isFunction ( worker . userStep ) )
{
for ( var i = 0 ; i < msg . results . data . length ; i ++ )
{
worker . userStep ( {
data : msg . results . data [ i ] ,
errors : msg . results . errors ,
meta : msg . results . meta
} , handle ) ;
if ( aborted )
break ;
}
delete msg . results ; // free memory ASAP
}
else if ( isFunction ( worker . userChunk ) )
{
worker . userChunk ( msg . results , handle , msg . file ) ;
delete msg . results ;
}
}
if ( msg . finished && ! aborted )
completeWorker ( msg . workerId , msg . results ) ;
}
function completeWorker ( workerId , results ) {
var worker = workers [ workerId ] ;
if ( isFunction ( worker . userComplete ) )
worker . userComplete ( results ) ;
worker . terminate ( ) ;
delete workers [ workerId ] ;
}
function notImplemented ( ) {
throw new Error ( 'Not implemented.' ) ;
}
/** Callback when worker thread receives a message */
function workerThreadReceivedMessage ( e )
{
var msg = e . data ;
if ( typeof Papa . WORKER _ID === 'undefined' && msg )
Papa . WORKER _ID = msg . workerId ;
if ( typeof msg . input === 'string' )
{
global . postMessage ( {
workerId : Papa . WORKER _ID ,
results : Papa . parse ( msg . input , msg . config ) ,
finished : true
} ) ;
}
else if ( ( global . File && msg . input instanceof File ) || msg . input instanceof Object ) // thank you, Safari (see issue #106)
{
var results = Papa . parse ( msg . input , msg . config ) ;
if ( results )
global . postMessage ( {
workerId : Papa . WORKER _ID ,
results : results ,
finished : true
} ) ;
}
}
/** Makes a deep copy of an array or object (mostly) */
function copy ( obj )
{
if ( typeof obj !== 'object' || obj === null )
return obj ;
var cpy = Array . isArray ( obj ) ? [ ] : { } ;
for ( var key in obj )
cpy [ key ] = copy ( obj [ key ] ) ;
return cpy ;
}
function bindFunction ( f , self )
{
return function ( ) { f . apply ( self , arguments ) ; } ;
}
function isFunction ( func )
{
return typeof func === 'function' ;
}
return Papa ;
} ) ) ;