Latest; updated docs to include new features

11 years ago · 1b0ce42908
2 changed files with 79 additions and 30 deletions
--- a/docs.html
+++ b/docs.html
@ -75,7 +75,6 @@
				@@ -75,7 +75,6 @@
 							</ul>
 						</li>
 						<li><a href="#extras">Extras</a></li>
-						<!--<li><a href="#examples"><b>Examples</b></a>-->
 					</ol>
 				</div>
 				<div class="clear"></div>
@ -275,7 +274,7 @@ var csv = Papa.unparse({
				@@ -275,7 +274,7 @@ var csv = Papa.unparse({
 	delimiter: ",",
 	newline: "\r\n"
 }</code>
-							Set <code>quotes</code> to <code>true</code> to force enclosing each datum around quotes. The <code>delimiter</code> can be any valid delimiting character. And the <code>newline</code> character(s) may also be customized.
+							Set <code>quotes</code> to <code>true</code> to force enclosing each datum around quotes, or an array of true/false values correlating to specific to columns to force-quote. The <code>delimiter</code> can be any valid delimiting character. The <code>newline</code> character(s) may also be customized.
 						</li>
 					</ul>
 				</div>
@ -318,17 +317,15 @@ var csv = Papa.unparse({
				@@ -318,17 +317,15 @@ var csv = Papa.unparse({
 	worker: false,
 	comments: false,
 	complete: undefined,
-	download: false
+	download: false,
+	keepEmptyRows: false,
+	chunk: undefined
 }</code>
 					<ul>
 						<li><code>delimiter</code> The delimiting character. Leave blank to auto-detect. If specified, it must be a string of length 1, and cannot be found in <a href="#extras">Papa.BAD_DELIMITERS</a>.</li>
 						<li><code>header</code> If true, the first row of parsed data will be interpreted as field names. Fields will be returned in the <a href="#meta">meta</a>, and each row will be an object of data keyed by field name. If false, the parser simply returns an array of arrays, including the first row.</li>
 						<li><code>dynamicTyping</code> If true, numeric and boolean data will be converted to their type instead of remaining strings.</li>
 						<li><code>preview</code> If > 0, only that many rows will be parsed.</li>
-					</ul>
-				</div>
-				<div class="grid-50">
-					<ul>
 						<li>
 							<code>step</code> To <a href="faq.html#streaming">stream</a> the input, define a callback function to receive <a href="#results">results</a> row-by-row rather than together at the end:
 <code class="block">step: function(results, parser) {
@ -337,6 +334,10 @@ var csv = Papa.unparse({
				@@ -337,6 +334,10 @@ var csv = Papa.unparse({
 }</code>
 							You can call <code>parser.abort()</code> to halt parsing that input (not available if using a worker).
 						</li>
+					</ul>
+				</div>
+				<div class="grid-50">
+					<ul>
 						<li><code>encoding</code> The encoding to use when opening files locally.</li>
 						<li><code>worker</code> Whether or not to use a <a href="faq.html#workers">worker thread</a>. Using a worker will keep your page reactive, but may be slightly slower.</li>
 						<li><code>comments</code> Specify a comment character (like <code>"#"</code>) if your CSV file has commented lines, and Papa will skip them. This feature is disabled by default.</li>
@ -348,7 +349,9 @@ var csv = Papa.unparse({
				@@ -348,7 +349,9 @@ var csv = Papa.unparse({
 </code>
 							If streaming, results will <i>not</i> be available in this function.
 						</li>
-						<li><code>download</code> If <code>true</code>, this indicates that the string you passed in is actually a URL from which to download a file and parse it.
+						<li><code>download</code> If <code>true</code>, this indicates that the string you passed in is actually a URL from which to download a file and parse it.</li>
+						<li><code>keepEmptyRows</code> If <code>true</code>, rows that are empty will be included in the results as an empty array. This is useful if you want to maintain line (or at least <i>row</i>) parity with the original input.</li>
+						<li><code>chunk</code> A callback, much like step, which activates streaming and is executed after every chunk (piece) is loaded and parsed. Works only with local and remote files. Do not use both chunk and step callbacks together. This function can be used to receive results one chunk at a time rather than one row at a time. If your file has a million rows, this results in, say, 10,000 function invocations rather than 1,000,000. In some cases, this may be faster.</li>
 					</ul>
 				</div>
 				<div class="clear"></div>
@ -527,6 +530,20 @@ var csv = Papa.unparse({
				@@ -527,6 +530,20 @@ var csv = Papa.unparse({
 							Whether or not the browser supports HTML5 Web Workers. If false, <code>worker: true</code> will have no effect.
 						</li>
 					</ul>
+
+					<p>
+						Some settings you may change:
+					</p>
+
+					<ul>
+						<li>
+							<code>Papa.LocalChunkSize</code> &nbsp;
+							The size in bytes of each file chunk. Used when streaming files obtained from the DOM that exist on the local computer. Default 10 MB.
+						</li>
+						<li>
+							<code>Papa.RemoteChunkSize</code> &nbsp;
+							Same as LocalChunkSize, but for downloading files from remote locations. Default 5 MB.
+						</li>
 				</div>
 				<div class="grid-50">
 					<p>
--- a/resources/js/papaparse.js
+++ b/resources/js/papaparse.js
@ -21,7 +21,9 @@
				@@ -21,7 +21,9 @@
 		worker: false,
 		comments: false,
 		complete: undefined,
-		download: false
+		download: false,
+		chunk: undefined,
+		keepEmptyRows: false
 	};

 	global.Papa = {};
@ -35,6 +37,10 @@
				@@ -35,6 +37,10 @@
 	global.Papa.BAD_DELIMITERS = ["\r", "\n", "\"", global.Papa.BYTE_ORDER_MARK];
 	global.Papa.WORKERS_SUPPORTED = !!global.Worker;

+	// Configurable chunk sizes for local and remote files, respectively
+	global.Papa.LocalChunkSize = 1024 * 1024 * 10;	// 10 MB
+	global.Papa.RemoteChunkSize = 1024 * 1024 * 5;	// 5 MB
+
 	// Exposed for testing and development only
 	global.Papa.Parser = Parser;
 	global.Papa.ParserHandle = ParserHandle;
@ -154,10 +160,12 @@
				@@ -154,10 +160,12 @@
 			var w = newWorker();

 			w.userStep = config.step;
+			w.userChunk = config.chunk;
 			w.userComplete = config.complete;
 			w.userError = config.error;

 			config.step = isFunction(config.step);
+			config.chunk = isFunction(config.chunk);
 			config.complete = isFunction(config.complete);
 			config.error = isFunction(config.error);
 			delete config.worker;	// prevent infinite loop
@ -188,7 +196,7 @@
				@@ -188,7 +196,7 @@
 			}
 			else if (_input instanceof File)
 			{
-				if (config.step)
+				if (config.step || config.chunk)
 				{
 					var streamer = new FileStreamer(config);
 					streamer.stream(_input);
@ -253,7 +261,7 @@
				@@ -253,7 +261,7 @@
 				_input.data = JSON.parse(_input.data);

 			if (_input.data instanceof Array)
-			{				
+			{
 				if (!_input.fields)
 					_input.fields = _input.data[0] instanceof Array
 									? _input.fields
@ -261,9 +269,9 @@
				@@ -261,9 +269,9 @@

 				if (!(_input.data[0] instanceof Array) && typeof _input.data[0] !== 'object')
 					_input.data = [_input.data];	// handles input like [1,2,3] or ["asdf"]
-
-				return serialize(_input.fields, _input.data);
 			}
+
+			return serialize(_input.fields || [], _input.data || []);
 		}

 		// Default (any valid paths should return before this)
@ -282,7 +290,8 @@
				@@ -282,7 +290,8 @@
 				_delimiter = _config.delimiter;
 			}

-			if (typeof _config.quotes === 'boolean')
+			if (typeof _config.quotes === 'boolean'
+				|| _config.quotes instanceof Array)
 				_quotes = _config.quotes;

 			if (typeof _config.newline === 'string')
@ -321,7 +330,7 @@
				@@ -321,7 +330,7 @@
 				{
 					if (i > 0)
 						csv += _delimiter;
-					csv += safe(fields[i]);
+					csv += safe(fields[i], i);
 				}
 				if (data.length > 0)
 					csv += _newline;
@ -337,7 +346,7 @@
				@@ -337,7 +346,7 @@
 					if (col > 0)
 						csv += _delimiter;
 					var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
-					csv += safe(data[row][colIdx]);
+					csv += safe(data[row][colIdx], col);
 				}

 				if (row < data.length - 1)
@ -348,14 +357,15 @@
				@@ -348,14 +357,15 @@
 		}

 		// Encloses a value around quotes if needed (makes a value safe for CSV insertion)
-		function safe(str)
+		function safe(str, col)
 		{
 			if (typeof str === "undefined")
 				return "";

 			str = str.toString().replace(/"/g, '""');

-			var needsQuotes = _quotes
+			var needsQuotes = (typeof _quotes === 'boolean' && _quotes)
+							|| (_quotes instanceof Array && _quotes[col])
 							|| hasAny(str, global.Papa.BAD_DELIMITERS)
 							|| str.indexOf(_delimiter) > -1
 							|| str.charAt(0) == ' '
@ -380,7 +390,7 @@
				@@ -380,7 +390,7 @@
 	{
 		config = config || {};
 		if (!config.chunkSize)
-			config.chunkSize = 1024 * 1024 * 5;	// 5 MB
+			config.chunkSize = Papa.RemoteChunkSize;

 		var start = 0, fileSize = 0;
 		var aggregate = "";
@ -481,6 +491,11 @@
				@@ -481,6 +491,11 @@
 						finished: finishedWithEntireFile
 					});
 				}
+				else if (isFunction(config.chunk))
+				{
+					config.chunk(results);	// TODO: Implement abort? (like step)
+					results = undefined;
+				}
 				
 				if (finishedWithEntireFile && isFunction(config.complete))
 					config.complete(results);
@ -524,7 +539,7 @@
				@@ -524,7 +539,7 @@
 	{
 		config = config || {};
 		if (!config.chunkSize)
-			config.chunkSize = 1024 * 1024 * 10;	// 10 MB
+			config.chunkSize = Papa.LocalChunkSize;
 		
 		var start = 0;
 		var aggregate = "";
@ -596,6 +611,11 @@
				@@ -596,6 +611,11 @@
 						finished: finishedWithEntireFile
 					});
 				}
+				else if (isFunction(config.chunk))
+				{
+					config.chunk(results, file);
+					results = undefined;
+				}
 				
 				if (finishedWithEntireFile && isFunction(config.complete))
 					config.complete(undefined, file);
@ -1034,7 +1054,10 @@
				@@ -1034,7 +1054,10 @@
 		{
 			if (_data[_rowIdx].length == 1 && EMPTY.test(_data[_rowIdx][0]))
 			{
-				_data.splice(_rowIdx, 1);
+				if (config.keepEmptyRows)
+					_data[_rowIdx].splice(0, 1);	// leave row, but no fields
+				else
+					_data.splice(_rowIdx, 1);		// cut out row entirely
 				_rowIdx = _data.length - 1;
 			}
 		}
@ -1150,20 +1173,26 @@
				@@ -1150,20 +1173,26 @@
 		var msg = e.data;
 		var worker = workers[msg.workerId];

-		if (msg.results && msg.results.data && isFunction(worker.userStep))
+		if (msg.error)
+			worker.userError(msg.error, msg.file);
+		else if (msg.results && msg.results.data)
 		{
-			for (var i = 0; i < msg.results.data.length; i++)
+			if (isFunction(worker.userStep))
 			{
-				worker.userStep({
-					data: [msg.results.data[i]],
-					errors: msg.results.errors,
-					meta: msg.results.meta
-				});
+				for (var i = 0; i < msg.results.data.length; i++)
+				{
+					worker.userStep({
+						data: [msg.results.data[i]],
+						errors: msg.results.errors,
+						meta: msg.results.meta
+					});
+				}
 			}
+			else if (isFunction(worker.userChunk))
+				worker.userChunk(msg.results, msg.file);
+
 			delete msg.results;	// free memory ASAP
 		}
-		else if (msg.error)
-			worker.userError(msg.error, msg.file);

 		if (msg.finished)
 		{
@ -1239,6 +1268,9 @@
				@@ -1239,6 +1268,9 @@
 		if (typeof config.download !== 'boolean')
 			config.download = DEFAULTS.download;

+		if (typeof config.keepEmptyRows !== 'boolean')
+			config.keepEmptyRows = DEFAULTS.keepEmptyRows;
+
 		return config;
 	}