/ *
Papa Parse
v3 . 0.1
https : //github.com/mholt/PapaParse
* /
( function ( global )
{
"use strict" ;
var IS _WORKER = ! global . document , SCRIPT _PATH ;
var workers = { } , workerIdCounter = 0 ;
// A configuration object from which to draw default settings
var DEFAULTS = {
delimiter : "" , // empty: auto-detect
header : false ,
dynamicTyping : false ,
preview : 0 ,
step : undefined ,
encoding : "" , // browser should default to "UTF-8"
worker : false ,
comments : false ,
complete : undefined ,
download : false ,
chunk : undefined ,
keepEmptyRows : false
} ;
global . Papa = { } ;
global . Papa . parse = CsvToJson ;
global . Papa . unparse = JsonToCsv ;
global . Papa . RECORD _SEP = String . fromCharCode ( 30 ) ;
global . Papa . UNIT _SEP = String . fromCharCode ( 31 ) ;
global . Papa . BYTE _ORDER _MARK = "\ufeff" ;
global . Papa . BAD _DELIMITERS = [ "\r" , "\n" , "\"" , global . Papa . BYTE _ORDER _MARK ] ;
global . Papa . WORKERS _SUPPORTED = ! ! global . Worker ;
// Configurable chunk sizes for local and remote files, respectively
global . Papa . LocalChunkSize = 1024 * 1024 * 10 ; // 10 MB
global . Papa . RemoteChunkSize = 1024 * 1024 * 5 ; // 5 MB
// Exposed for testing and development only
global . Papa . Parser = Parser ;
global . Papa . ParserHandle = ParserHandle ;
global . Papa . NetworkStreamer = NetworkStreamer ;
global . Papa . FileStreamer = FileStreamer ;
if ( global . jQuery )
{
var $ = global . jQuery ;
$ . fn . parse = function ( options )
{
var config = options . config || { } ;
var queue = [ ] ;
this . each ( function ( idx )
{
var supported = $ ( this ) . prop ( 'tagName' ) . toUpperCase ( ) == "INPUT"
&& $ ( this ) . attr ( 'type' ) . toLowerCase ( ) == "file"
&& global . FileReader ;
if ( ! supported || ! this . files || this . files . length == 0 )
return true ; // continue to next input element
for ( var i = 0 ; i < this . files . length ; i ++ )
{
queue . push ( {
file : this . files [ i ] ,
inputElem : this ,
instanceConfig : $ . extend ( { } , config )
} ) ;
}
} ) ;
parseNextFile ( ) ; // begin parsing
return this ; // maintains chainability
function parseNextFile ( )
{
if ( queue . length == 0 )
{
if ( isFunction ( options . complete ) )
options . complete ( ) ;
return ;
}
var f = queue [ 0 ] ;
if ( isFunction ( options . before ) )
{
var returned = options . before ( f . file , f . inputElem ) ;
if ( typeof returned === 'object' )
{
if ( returned . action == "abort" )
{
error ( "AbortError" , f . file , f . inputElem , returned . reason ) ;
return ; // Aborts all queued files immediately
}
else if ( returned . action == "skip" )
{
fileComplete ( ) ; // parse the next file in the queue, if any
return ;
}
else if ( typeof returned . config === 'object' )
f . instanceConfig = $ . extend ( f . instanceConfig , returned . config ) ;
}
else if ( returned == "skip" )
{
fileComplete ( ) ; // parse the next file in the queue, if any
return ;
}
}
// Wrap up the user's complete callback, if any, so that ours also gets executed
var userCompleteFunc = f . instanceConfig . complete ;
f . instanceConfig . complete = function ( results )
{
if ( isFunction ( userCompleteFunc ) )
userCompleteFunc ( results , f . file , f . inputElem ) ;
fileComplete ( ) ;
} ;
Papa . parse ( f . file , f . instanceConfig ) ;
}
function error ( name , file , elem , reason )
{
if ( isFunction ( options . error ) )
options . error ( { name : name } , file , elem , reason ) ;
}
function fileComplete ( )
{
queue . splice ( 0 , 1 ) ;
parseNextFile ( ) ;
}
}
}
if ( IS _WORKER )
global . onmessage = workerThreadReceivedMessage ;
else if ( Papa . WORKERS _SUPPORTED )
SCRIPT _PATH = getScriptPath ( ) ;
function CsvToJson ( _input , _config )
{
var config = IS _WORKER ? _config : copyAndValidateConfig ( _config ) ;
var useWorker = config . worker && Papa . WORKERS _SUPPORTED && SCRIPT _PATH ;
if ( useWorker )
{
var w = newWorker ( ) ;
w . userStep = config . step ;
w . userChunk = config . chunk ;
w . userComplete = config . complete ;
w . userError = config . error ;
config . step = isFunction ( config . step ) ;
config . chunk = isFunction ( config . chunk ) ;
config . complete = isFunction ( config . complete ) ;
config . error = isFunction ( config . error ) ;
delete config . worker ; // prevent infinite loop
w . postMessage ( {
input : _input ,
config : config ,
workerId : w . id
} ) ;
}
else
{
if ( typeof _input === 'string' )
{
if ( config . download )
{
var streamer = new NetworkStreamer ( config ) ;
streamer . stream ( _input ) ;
}
else
{
var ph = new ParserHandle ( config ) ;
var results = ph . parse ( _input ) ;
if ( isFunction ( config . complete ) )
config . complete ( results ) ;
return results ;
}
}
else if ( _input instanceof File )
{
if ( config . step || config . chunk )
{
var streamer = new FileStreamer ( config ) ;
streamer . stream ( _input ) ;
}
else
{
var ph = new ParserHandle ( config ) ;
if ( IS _WORKER )
{
var reader = new FileReaderSync ( ) ;
var input = reader . readAsText ( _input , config . encoding ) ;
return ph . parse ( input ) ;
}
else
{
reader = new FileReader ( ) ;
reader . onload = function ( event )
{
var ph = new ParserHandle ( config ) ;
var results = ph . parse ( event . target . result ) ;
if ( isFunction ( config . complete ) )
config . complete ( results ) ;
} ;
reader . onerror = function ( )
{
if ( isFunction ( config . error ) )
config . error ( reader . error , _input ) ;
} ;
reader . readAsText ( _input , config . encoding ) ;
}
}
}
}
}
function JsonToCsv ( _input , _config )
{
var _output = "" ;
var _fields = [ ] ;
// Default configuration
var _quotes = false ; // whether to surround every datum with quotes
var _delimiter = "," ; // delimiting character
var _newline = "\r\n" ; // newline character(s)
unpackConfig ( ) ;
if ( typeof _input === 'string' )
_input = JSON . parse ( _input ) ;
if ( _input instanceof Array )
{
if ( ! _input . length || _input [ 0 ] instanceof Array )
return serialize ( null , _input ) ;
else if ( typeof _input [ 0 ] === 'object' )
return serialize ( objectKeys ( _input [ 0 ] ) , _input ) ;
}
else if ( typeof _input === 'object' )
{
if ( typeof _input . data === 'string' )
_input . data = JSON . parse ( _input . data ) ;
if ( _input . data instanceof Array )
{
if ( ! _input . fields )
_input . fields = _input . data [ 0 ] instanceof Array
? _input . fields
: objectKeys ( _input . data [ 0 ] ) ;
if ( ! ( _input . data [ 0 ] instanceof Array ) && typeof _input . data [ 0 ] !== 'object' )
_input . data = [ _input . data ] ; // handles input like [1,2,3] or ["asdf"]
}
return serialize ( _input . fields || [ ] , _input . data || [ ] ) ;
}
// Default (any valid paths should return before this)
throw "exception: Unable to serialize unrecognized input" ;
function unpackConfig ( )
{
if ( typeof _config !== 'object' )
return ;
if ( typeof _config . delimiter === 'string'
&& _config . delimiter . length == 1
&& global . Papa . BAD _DELIMITERS . indexOf ( _config . delimiter ) == - 1 )
{
_delimiter = _config . delimiter ;
}
if ( typeof _config . quotes === 'boolean'
|| _config . quotes instanceof Array )
_quotes = _config . quotes ;
if ( typeof _config . newline === 'string' )
_newline = _config . newline ;
}
// Turns an object's keys into an array
function objectKeys ( obj )
{
if ( typeof obj !== 'object' )
return [ ] ;
var keys = [ ] ;
for ( var key in obj )
keys . push ( key ) ;
return keys ;
}
// The double for loop that iterates the data and writes out a CSV string including header row
function serialize ( fields , data )
{
var csv = "" ;
if ( typeof fields === 'string' )
fields = JSON . parse ( fields ) ;
if ( typeof data === 'string' )
data = JSON . parse ( data ) ;
var hasHeader = fields instanceof Array && fields . length > 0 ;
var dataKeyedByField = ! ( data [ 0 ] instanceof Array ) ;
// If there a header row, write it first
if ( hasHeader )
{
for ( var i = 0 ; i < fields . length ; i ++ )
{
if ( i > 0 )
csv += _delimiter ;
csv += safe ( fields [ i ] , i ) ;
}
if ( data . length > 0 )
csv += _newline ;
}
// Then write out the data
for ( var row = 0 ; row < data . length ; row ++ )
{
var maxCol = hasHeader ? fields . length : data [ row ] . length ;
for ( var col = 0 ; col < maxCol ; col ++ )
{
if ( col > 0 )
csv += _delimiter ;
var colIdx = hasHeader && dataKeyedByField ? fields [ col ] : col ;
csv += safe ( data [ row ] [ colIdx ] , col ) ;
}
if ( row < data . length - 1 )
csv += _newline ;
}
return csv ;
}
// Encloses a value around quotes if needed (makes a value safe for CSV insertion)
function safe ( str , col )
{
if ( typeof str === "undefined" || str === null )
return "" ;
str = str . toString ( ) . replace ( /"/g , '""' ) ;
var needsQuotes = ( typeof _quotes === 'boolean' && _quotes )
|| ( _quotes instanceof Array && _quotes [ col ] )
|| hasAny ( str , global . Papa . BAD _DELIMITERS )
|| str . indexOf ( _delimiter ) > - 1
|| str . charAt ( 0 ) == ' '
|| str . charAt ( str . length - 1 ) == ' ' ;
return needsQuotes ? '"' + str + '"' : str ;
}
function hasAny ( str , substrings )
{
for ( var i = 0 ; i < substrings . length ; i ++ )
if ( str . indexOf ( substrings [ i ] ) > - 1 )
return true ;
return false ;
}
}
// NOTE/TODO: Many of the functions of NetworkStreamer and FileStreamer are the same. Consolidate?
function NetworkStreamer ( config )
{
config = config || { } ;
if ( ! config . chunkSize )
config . chunkSize = Papa . RemoteChunkSize ;
var start = 0 , fileSize = 0 ;
var aggregate = "" ;
var partialLine = "" ;
var xhr , nextChunk ;
var handle = new ParserHandle ( copy ( config ) ) ;
this . stream = function ( url )
{
if ( IS _WORKER )
{
nextChunk = function ( )
{
readChunk ( ) ;
chunkLoaded ( ) ;
} ;
}
else
{
nextChunk = function ( )
{
readChunk ( ) ;
} ;
}
nextChunk ( ) ; // Starts streaming
function readChunk ( )
{
xhr = new XMLHttpRequest ( ) ;
if ( ! IS _WORKER )
{
xhr . onload = chunkLoaded ;
xhr . onerror = chunkError ;
}
xhr . open ( "GET" , url , ! IS _WORKER ) ;
if ( config . step )
{
var end = start + config . chunkSize - 1 ; // minus one because byte range is inclusive
if ( fileSize && end > fileSize ) // Hack around a Chrome bug: http://stackoverflow.com/q/24745095/1048862
end = fileSize ;
xhr . setRequestHeader ( "Range" , "bytes=" + start + "-" + end ) ;
}
xhr . send ( ) ;
if ( IS _WORKER && xhr . status == 0 )
chunkError ( ) ;
else
start += config . chunkSize ;
}
function chunkLoaded ( )
{
if ( xhr . readyState != 4 )
return ;
if ( xhr . status < 200 || xhr . status >= 400 )
{
chunkError ( ) ;
return ;
}
// Rejoin the line we likely just split in two by chunking the file
aggregate += partialLine + xhr . responseText ;
partialLine = "" ;
var finishedWithEntireFile = ! config . step || start > getFileSize ( xhr ) ;
if ( ! finishedWithEntireFile )
{
var lastLineEnd = aggregate . lastIndexOf ( "\n" ) ;
if ( lastLineEnd < 0 )
lastLineEnd = aggregate . lastIndexOf ( "\r" ) ;
if ( lastLineEnd > - 1 )
{
partialLine = aggregate . substring ( lastLineEnd + 1 ) ; // skip the line ending character
aggregate = aggregate . substring ( 0 , lastLineEnd ) ;
}
else
{
// For chunk sizes smaller than a line (a line could not fit in a single chunk)
// we simply build our aggregate by reading in the next chunk, until we find a newline
nextChunk ( ) ;
return ;
}
}
var results = handle . parse ( aggregate ) ;
aggregate = "" ;
if ( IS _WORKER )
{
global . postMessage ( {
results : results ,
workerId : Papa . WORKER _ID ,
finished : finishedWithEntireFile
} ) ;
}
else if ( isFunction ( config . chunk ) )
{
config . chunk ( results ) ; // TODO: Implement abort? (like step)
results = undefined ;
}
if ( finishedWithEntireFile && isFunction ( config . complete ) )
config . complete ( results ) ;
else if ( results && results . meta . aborted && isFunction ( config . complete ) )
config . complete ( results ) ;
else if ( ! finishedWithEntireFile )
nextChunk ( ) ;
}
function chunkError ( )
{
if ( isFunction ( config . error ) )
config . error ( xhr . statusText ) ;
else if ( IS _WORKER && config . error )
{
global . postMessage ( {
workerId : Papa . WORKER _ID ,
error : xhr . statusText ,
finished : false
} ) ;
}
}
function getFileSize ( xhr )
{
var contentRange = xhr . getResponseHeader ( "Content-Range" ) ;
return parseInt ( contentRange . substr ( contentRange . lastIndexOf ( "/" ) + 1 ) ) ;
}
} ;
}
function FileStreamer ( config )
{
config = config || { } ;
if ( ! config . chunkSize )
config . chunkSize = Papa . LocalChunkSize ;
var start = 0 ;
var aggregate = "" ;
var partialLine = "" ;
var reader , nextChunk , slice ;
var handle = new ParserHandle ( copy ( config ) ) ;
// FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862
// But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76
var usingAsyncReader = typeof FileReader === 'function' ;
this . stream = function ( file )
{
var slice = file . slice || file . webkitSlice || file . mozSlice ;
if ( usingAsyncReader )
{
reader = new FileReader ( ) ; // Preferred method of reading files, even in workers
reader . onload = chunkLoaded ;
reader . onerror = chunkError ;
}
else
reader = new FileReaderSync ( ) ; // Hack for running in a web worker in Firefox
nextChunk ( ) ; // Starts streaming
function nextChunk ( )
{
if ( start < file . size )
readChunk ( ) ;
}
function readChunk ( )
{
var end = Math . min ( start + config . chunkSize , file . size ) ;
var txt = reader . readAsText ( slice . call ( file , start , end ) , config . encoding ) ;
if ( ! usingAsyncReader )
chunkLoaded ( { target : { result : txt } } ) ; // mimic the async signature
}
function chunkLoaded ( event )
{
// Very important to increment start each time before handling results
start += config . chunkSize ;
// Rejoin the line we likely just split in two by chunking the file
aggregate += partialLine + event . target . result ;
partialLine = "" ;
var finishedWithEntireFile = start >= file . size ;
if ( ! finishedWithEntireFile )
{
var lastLineEnd = aggregate . lastIndexOf ( "\n" ) ;
if ( lastLineEnd < 0 )
lastLineEnd = aggregate . lastIndexOf ( "\r" ) ;
if ( lastLineEnd > - 1 )
{
partialLine = aggregate . substring ( lastLineEnd + 1 ) ; // skip the line ending character
aggregate = aggregate . substring ( 0 , lastLineEnd ) ;
}
else
{
// For chunk sizes smaller than a line (a line could not fit in a single chunk)
// we simply build our aggregate by reading in the next chunk, until we find a newline
nextChunk ( ) ;
return ;
}
}
var results = handle . parse ( aggregate ) ;
aggregate = "" ;
if ( IS _WORKER )
{
global . postMessage ( {
results : results ,
workerId : Papa . WORKER _ID ,
finished : finishedWithEntireFile
} ) ;
}
else if ( isFunction ( config . chunk ) )
{
config . chunk ( results , file ) ;
results = undefined ;
}
if ( finishedWithEntireFile && isFunction ( config . complete ) )
config . complete ( undefined , file ) ;
else if ( results && results . meta . aborted && isFunction ( config . complete ) ) // TODO: Abort needs reworking like pause/resume need it (if streaming, no results object is returned, so it has no meta to say aborted: true...)
config . complete ( results , file ) ;
else if ( ! finishedWithEntireFile )
nextChunk ( ) ;
}
function chunkError ( )
{
if ( isFunction ( config . error ) )
config . error ( reader . error , file ) ;
else if ( IS _WORKER && config . error )
{
global . postMessage ( {
workerId : Papa . WORKER _ID ,
error : reader . error ,
file : file ,
finished : false
} ) ;
}
}
} ;
}
// Use one ParserHandle per entire CSV file or string
function ParserHandle ( _config )
{
// One goal is to minimize the use of regular expressions...
var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i ;
var _delimiterError ; // Temporary state between delimiter detection and processing results
var _fields = [ ] ; // Fields are from the header row of the input, if there is one
var _results = { // The last results returned from the parser
data : [ ] ,
errors : [ ] ,
meta : { }
} ;
_config = copy ( _config ) ;
this . parse = function ( input )
{
_delimiterError = false ;
if ( ! _config . delimiter )
{
var delimGuess = guessDelimiter ( input ) ;
if ( delimGuess . successful )
_config . delimiter = delimGuess . bestDelimiter ;
else
{
_delimiterError = true ; // add error after parsing (otherwise it would be overwritten)
_config . delimiter = "," ;
}
_results . meta . delimiter = _config . delimiter ;
}
if ( isFunction ( _config . step ) )
{
var userStep = _config . step ;
_config . step = function ( results , parser )
{
_results = results ;
if ( needsHeaderRow ( ) )
processResults ( ) ;
else
userStep ( processResults ( ) , parser ) ;
} ;
}
_results = new Parser ( _config ) . parse ( input ) ;
return processResults ( ) ;
} ;
function processResults ( )
{
if ( _results && _delimiterError )
{
addError ( "Delimiter" , "UndetectableDelimiter" , "Unable to auto-detect delimiting character; defaulted to comma" ) ;
_delimiterError = false ;
}
if ( needsHeaderRow ( ) )
fillHeaderFields ( ) ;
return applyHeaderAndDynamicTyping ( ) ;
}
function needsHeaderRow ( )
{
return _config . header && _fields . length == 0 ;
}
function fillHeaderFields ( )
{
if ( ! _results )
return ;
for ( var i = 0 ; needsHeaderRow ( ) && i < _results . data . length ; i ++ )
for ( var j = 0 ; j < _results . data [ i ] . length ; j ++ )
_fields . push ( _results . data [ i ] [ j ] ) ;
_results . data . splice ( 0 , 1 ) ;
}
function applyHeaderAndDynamicTyping ( )
{
if ( ! _results || ( ! _config . header && ! _config . dynamicTyping ) )
return _results ;
for ( var i = 0 ; i < _results . data . length ; i ++ )
{
var row = { } ;
for ( var j = 0 ; j < _results . data [ i ] . length ; j ++ )
{
if ( _config . dynamicTyping )
{
var value = _results . data [ i ] [ j ] ;
if ( value == "true" )
_results . data [ i ] [ j ] = true ;
else if ( value == "false" )
_results . data [ i ] [ j ] = false ;
else
_results . data [ i ] [ j ] = tryParseFloat ( value ) ;
}
if ( _config . header )
{
if ( j >= _fields . length )
{
if ( ! row [ "__parsed_extra" ] )
row [ "__parsed_extra" ] = [ ] ;
row [ "__parsed_extra" ] . push ( _results . data [ i ] [ j ] ) ;
}
row [ _fields [ j ] ] = _results . data [ i ] [ j ] ;
}
}
if ( _config . header )
{
_results . data [ i ] = row ;
if ( j > _fields . length )
addError ( "FieldMismatch" , "TooManyFields" , "Too many fields: expected " + _fields . length + " fields but parsed " + j , i ) ;
else if ( j < _fields . length )
addError ( "FieldMismatch" , "TooFewFields" , "Too few fields: expected " + _fields . length + " fields but parsed " + j , i ) ;
}
}
if ( _config . header && _results . meta ) ;
_results . meta . fields = _fields ;
return _results ;
}
function guessDelimiter ( input )
{
var delimChoices = [ "," , "\t" , "|" , ";" , Papa . RECORD _SEP , Papa . UNIT _SEP ] ;
var bestDelim , bestDelta , fieldCountPrevRow ;
for ( var i = 0 ; i < delimChoices . length ; i ++ )
{
var delim = delimChoices [ i ] ;
var delta = 0 , avgFieldCount = 0 ;
fieldCountPrevRow = undefined ;
var preview = new Parser ( {
delimiter : delim ,
preview : 10
} ) . parse ( input ) ;
for ( var j = 0 ; j < preview . data . length ; j ++ )
{
var fieldCount = preview . data [ j ] . length ;
avgFieldCount += fieldCount ;
if ( typeof fieldCountPrevRow === 'undefined' )
{
fieldCountPrevRow = fieldCount ;
continue ;
}
else if ( fieldCount > 1 )
{
delta += Math . abs ( fieldCount - fieldCountPrevRow ) ;
fieldCountPrevRow = fieldCount ;
}
}
avgFieldCount /= preview . data . length ;
if ( ( typeof bestDelta === 'undefined' || delta < bestDelta )
&& avgFieldCount > 1.99 )
{
bestDelta = delta ;
bestDelim = delim ;
}
}
_config . delimiter = bestDelim ;
return {
successful : ! ! bestDelim ,
bestDelimiter : bestDelim
}
}
function tryParseFloat ( val )
{
var isNumber = FLOAT . test ( val ) ;
return isNumber ? parseFloat ( val ) : val ;
}
function addError ( type , code , msg , row )
{
_results . errors . push ( {
type : type ,
code : code ,
message : msg ,
row : row
} ) ;
}
}
function Parser ( config )
{
var self = this ;
var EMPTY = /^\s*$/ ;
var _input ; // The input text being parsed
var _delimiter ; // The delimiting character
var _comments ; // Comment character (default '#') or boolean
var _step ; // The step (streaming) function
var _callback ; // The callback to invoke when finished
var _preview ; // Maximum number of lines (not rows) to parse
var _ch ; // Current character
var _i ; // Current character's positional index
var _inQuotes ; // Whether in quotes or not
var _lineNum ; // Current line number (1-based indexing)
var _data ; // Parsed data (results)
var _errors ; // Parse errors
var _rowIdx ; // Current row index within results (0-based)
var _colIdx ; // Current col index within result row (0-based)
var _runningRowIdx ; // Cumulative row index, used by the preview feature
var _aborted = false ; // Abort flag
var _paused = false ; // Pause flag
// Unpack the config object
config = config || { } ;
_delimiter = config . delimiter ;
_comments = config . comments ;
_step = config . step ;
_preview = config . preview ;
// Delimiter integrity check
if ( typeof _delimiter !== 'string'
|| _delimiter . length != 1
|| Papa . BAD _DELIMITERS . indexOf ( _delimiter ) > - 1 )
_delimiter = "," ;
// Comment character integrity check
if ( _comments === true )
_comments = "#" ;
else if ( typeof _comments !== 'string'
|| _comments . length != 1
|| Papa . BAD _DELIMITERS . indexOf ( _comments ) > - 1
|| _comments == _delimiter )
_comments = false ;
this . parse = function ( input )
{
if ( typeof input !== 'string' )
throw "Input must be a string" ;
reset ( input ) ;
return parserLoop ( ) ;
} ;
/ *
// TODO: Pause and resume just doesn't work well.
// I suspect this may need to be implemented at a higher-level
// scope than just this core Parser.
this . pause = function ( )
{
_paused = true ;
} ;
this . resume = function ( )
{
_paused = false ;
if ( _i < _input . length )
return parserLoop ( ) ;
} ;
* /
this . abort = function ( )
{
_aborted = true ;
} ;
function parserLoop ( )
{
while ( _i < _input . length )
{
if ( _aborted ) break ;
if ( _preview > 0 && _runningRowIdx >= _preview ) break ;
if ( _paused ) return finishParsing ( ) ;
if ( _ch == '"' )
parseQuotes ( ) ;
else if ( _inQuotes )
parseInQuotes ( ) ;
else
parseNotInQuotes ( ) ;
nextChar ( ) ;
}
return finishParsing ( ) ;
}
function nextChar ( )
{
_i ++ ;
_ch = _input [ _i ] ;
}
function finishParsing ( )
{
if ( _aborted )
addError ( "Abort" , "ParseAbort" , "Parsing was aborted by the user's step function" ) ;
if ( _inQuotes )
addError ( "Quotes" , "MissingQuotes" , "Unescaped or mismatched quotes" ) ;
endRow ( ) ; // End of input is also end of the last row
if ( ! isFunction ( _step ) )
return returnable ( ) ;
}
function parseQuotes ( )
{
if ( quotesOnBoundary ( ) && ! quotesEscaped ( ) )
_inQuotes = ! _inQuotes ;
else
{
saveChar ( ) ;
if ( _inQuotes && quotesEscaped ( ) )
_i ++
else
addError ( "Quotes" , "UnexpectedQuotes" , "Unexpected quotes" ) ;
}
}
function parseInQuotes ( )
{
if ( twoCharLineBreak ( _i ) || oneCharLineBreak ( _i ) )
_lineNum ++ ;
saveChar ( ) ;
}
function parseNotInQuotes ( )
{
if ( _ch == _delimiter )
newField ( ) ;
else if ( twoCharLineBreak ( _i ) )
{
newRow ( ) ;
nextChar ( ) ;
}
else if ( oneCharLineBreak ( _i ) )
newRow ( ) ;
else if ( isCommentStart ( ) )
skipLine ( ) ;
else
saveChar ( ) ;
}
function isCommentStart ( )
{
if ( ! _comments )
return false ;
var firstCharOfLine = _i == 0
|| oneCharLineBreak ( _i - 1 )
|| twoCharLineBreak ( _i - 2 ) ;
return firstCharOfLine && _input [ _i ] === _comments ;
}
function skipLine ( )
{
while ( ! twoCharLineBreak ( _i )
&& ! oneCharLineBreak ( _i )
&& _i < _input . length )
{
nextChar ( ) ;
}
}
function saveChar ( )
{
_data [ _rowIdx ] [ _colIdx ] += _ch ;
}
function newField ( )
{
_data [ _rowIdx ] . push ( "" ) ;
_colIdx = _data [ _rowIdx ] . length - 1 ;
}
function newRow ( )
{
endRow ( ) ;
_lineNum ++ ;
_runningRowIdx ++ ;
_data . push ( [ ] ) ;
_rowIdx = _data . length - 1 ;
newField ( ) ;
}
function endRow ( )
{
trimEmptyLastRow ( ) ;
if ( isFunction ( _step ) )
{
if ( _data [ _rowIdx ] )
_step ( returnable ( ) , self ) ;
clearErrorsAndData ( ) ;
}
}
function trimEmptyLastRow ( )
{
if ( _data [ _rowIdx ] . length == 1 && EMPTY . test ( _data [ _rowIdx ] [ 0 ] ) )
{
if ( config . keepEmptyRows )
_data [ _rowIdx ] . splice ( 0 , 1 ) ; // leave row, but no fields
else
_data . splice ( _rowIdx , 1 ) ; // cut out row entirely
_rowIdx = _data . length - 1 ;
}
}
function twoCharLineBreak ( i )
{
return i < _input . length - 1 &&
( ( _input [ i ] == "\r" && _input [ i + 1 ] == "\n" )
|| ( _input [ i ] == "\n" && _input [ i + 1 ] == "\r" ) )
}
function oneCharLineBreak ( i )
{
return _input [ i ] == "\r" || _input [ i ] == "\n" ;
}
function quotesEscaped ( )
{
// Quotes as data cannot be on boundary, for example: ,"", are not escaped quotes
return ! quotesOnBoundary ( ) && _i < _input . length - 1 && _input [ _i + 1 ] == '"' ;
}
function quotesOnBoundary ( )
{
return ( ! _inQuotes && isBoundary ( _i - 1 ) ) || isBoundary ( _i + 1 ) ;
}
function isBoundary ( i )
{
if ( typeof i != 'number' )
i = _i ;
var ch = _input [ i ] ;
return ( i <= - 1 || i >= _input . length )
|| ( ch == _delimiter
|| ch == "\r"
|| ch == "\n" ) ;
}
function addError ( type , code , msg )
{
_errors . push ( {
type : type ,
code : code ,
message : msg ,
line : _lineNum ,
row : _rowIdx ,
index : _i
} ) ;
}
function reset ( input )
{
_input = input ;
_inQuotes = false ;
_i = 0 , _runningRowIdx = 0 , _lineNum = 1 ;
clearErrorsAndData ( ) ;
_data = [ [ "" ] ] ; // starting parsing requires an empty field
_ch = _input [ _i ] ;
}
function clearErrorsAndData ( )
{
_data = [ ] ;
_errors = [ ] ;
_rowIdx = 0 ;
_colIdx = 0 ;
}
function returnable ( )
{
return {
data : _data ,
errors : _errors ,
meta : {
lines : _lineNum ,
delimiter : _delimiter ,
aborted : _aborted ,
truncated : _preview > 0 && _i < _input . length
}
} ;
}
}
function getScriptPath ( )
{
var id = "worker" + String ( Math . random ( ) ) . substr ( 2 ) ;
document . write ( '<script id="' + id + '"></script>' ) ;
return document . getElementById ( id ) . previousSibling . src ;
}
function newWorker ( )
{
if ( ! Papa . WORKERS _SUPPORTED )
return false ;
var w = new global . Worker ( SCRIPT _PATH ) ;
w . onmessage = mainThreadReceivedMessage ;
w . id = workerIdCounter ++ ;
workers [ w . id ] = w ;
return w ;
}
// Callback when main thread receives a message
function mainThreadReceivedMessage ( e )
{
var msg = e . data ;
var worker = workers [ msg . workerId ] ;
if ( msg . error )
worker . userError ( msg . error , msg . file ) ;
else if ( msg . results && msg . results . data )
{
if ( isFunction ( worker . userStep ) )
{
for ( var i = 0 ; i < msg . results . data . length ; i ++ )
{
worker . userStep ( {
data : [ msg . results . data [ i ] ] ,
errors : msg . results . errors ,
meta : msg . results . meta
} ) ;
}
delete msg . results ; // free memory ASAP
}
else if ( isFunction ( worker . userChunk ) )
{
worker . userChunk ( msg . results , msg . file ) ;
delete msg . results ;
}
}
if ( msg . finished )
{
if ( isFunction ( workers [ msg . workerId ] . userComplete ) )
workers [ msg . workerId ] . userComplete ( msg . results ) ;
workers [ msg . workerId ] . terminate ( ) ;
delete workers [ msg . workerId ] ;
}
}
// Callback when worker thread receives a message
function workerThreadReceivedMessage ( e )
{
var msg = e . data ;
if ( typeof Papa . WORKER _ID === 'undefined' && msg )
Papa . WORKER _ID = msg . workerId ;
if ( typeof msg . input === 'string' )
{
global . postMessage ( {
workerId : Papa . WORKER _ID ,
results : Papa . parse ( msg . input , msg . config ) ,
finished : true ,
} ) ;
}
else if ( msg . input instanceof File )
{
var results = Papa . parse ( msg . input , msg . config ) ;
if ( results )
global . postMessage ( {
workerId : Papa . WORKER _ID ,
results : results ,
finished : true
} ) ;
}
}
// Replaces bad config values with good, default ones
function copyAndValidateConfig ( origConfig )
{
if ( typeof origConfig !== 'object' )
origConfig = { } ;
var config = copy ( origConfig ) ;
if ( typeof config . delimiter !== 'string'
|| config . delimiter . length != 1
|| Papa . BAD _DELIMITERS . indexOf ( config . delimiter ) > - 1 )
config . delimiter = DEFAULTS . delimiter ;
if ( typeof config . header !== 'boolean' )
config . header = DEFAULTS . header ;
if ( typeof config . dynamicTyping !== 'boolean' )
config . dynamicTyping = DEFAULTS . dynamicTyping ;
if ( typeof config . preview !== 'number' )
config . preview = DEFAULTS . preview ;
if ( typeof config . step !== 'function' )
config . step = DEFAULTS . step ;
if ( typeof config . complete !== 'function' )
config . complete = DEFAULTS . complete ;
if ( typeof config . encoding !== 'string' )
config . encoding = DEFAULTS . encoding ;
if ( typeof config . worker !== 'boolean' )
config . worker = DEFAULTS . worker ;
if ( typeof config . download !== 'boolean' )
config . download = DEFAULTS . download ;
if ( typeof config . keepEmptyRows !== 'boolean' )
config . keepEmptyRows = DEFAULTS . keepEmptyRows ;
return config ;
}
function copy ( obj )
{
if ( typeof obj !== 'object' )
return obj ;
var cpy = obj instanceof Array ? [ ] : { } ;
for ( var key in obj )
cpy [ key ] = copy ( obj [ key ] ) ;
return cpy ;
}
function isFunction ( func )
{
return typeof func === 'function' ;
}
} ) ( this ) ;