From 13fb8fc05f4c991b4e3e72693af6fdc2c86e7946 Mon Sep 17 00:00:00 2001
From: Matthew Holt <Matthew.Holt+git@gmail.com>
Date: Wed, 15 Oct 2014 21:25:40 -0600
Subject: [PATCH] Bug fixes related to pause/resume and preview (fixes #92)

---
 papaparse.js | 393 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 219 insertions(+), 174 deletions(-)

diff --git a/papaparse.js b/papaparse.js
index 941dca6..6eff648 100644
--- a/papaparse.js
+++ b/papaparse.js
@@ -394,11 +394,23 @@
 		var start = 0, fileSize = 0;
 		var aggregate = "";
 		var partialLine = "";
-		var xhr, nextChunk;
+		var xhr, url, nextChunk, finishedWithEntireFile;
 		var handle = new ParserHandle(copy(config));
+		handle.streamer = this;
 
-		this.stream = function(url)
+		this.resume = function()
+		{
+			nextChunk();
+		};
+
+		this.finished = function()
+		{
+			return finishedWithEntireFile;
+		};
+
+		this.stream = function(u)
 		{
+			url = u;
 			if (IS_WORKER)
 			{
 				nextChunk = function()
@@ -416,110 +428,116 @@
 			}
 
 			nextChunk();	// Starts streaming
+		};
 
-
-			function readChunk()
+		function readChunk()
+		{
+			if (finishedWithEntireFile)
 			{
-				xhr = new XMLHttpRequest();
-				if (!IS_WORKER)
-				{
-					xhr.onload = chunkLoaded;
-					xhr.onerror = chunkError;
-				}
-				xhr.open("GET", url, !IS_WORKER);
-				if (config.step)
-				{
-					var end = start + config.chunkSize - 1;	// minus one because byte range is inclusive
-					if (fileSize && end > fileSize) // Hack around a Chrome bug: http://stackoverflow.com/q/24745095/1048862
-						end = fileSize;
-					xhr.setRequestHeader("Range", "bytes="+start+"-"+end);
-				}
-				xhr.send();
-				if (IS_WORKER && xhr.status == 0)
-					chunkError();
-				else
-					start += config.chunkSize;
+				chunkLoaded();
+				return;
 			}
 
-			function chunkLoaded()
+			xhr = new XMLHttpRequest();
+			if (!IS_WORKER)
 			{
-				if (xhr.readyState != 4)
-					return;
-
-				if (xhr.status < 200 || xhr.status >= 400)
-				{
-					chunkError();
-					return;
-				}
+				xhr.onload = chunkLoaded;
+				xhr.onerror = chunkError;
+			}
+			xhr.open("GET", url, !IS_WORKER);
+			if (config.step)
+			{
+				var end = start + config.chunkSize - 1;	// minus one because byte range is inclusive
+				if (fileSize && end > fileSize) // Hack around a Chrome bug: http://stackoverflow.com/q/24745095/1048862
+					end = fileSize;
+				xhr.setRequestHeader("Range", "bytes="+start+"-"+end);
+			}
+			xhr.send();
+			if (IS_WORKER && xhr.status == 0)
+				chunkError();
+			else
+				start += config.chunkSize;
+		}
 
-				// Rejoin the line we likely just split in two by chunking the file
-				aggregate += partialLine + xhr.responseText;
-				partialLine = "";
+		function chunkLoaded()
+		{
+			if (xhr.readyState != 4)
+				return;
 
-				var finishedWithEntireFile = !config.step || start > getFileSize(xhr);
+			if (xhr.status < 200 || xhr.status >= 400)
+			{
+				chunkError();
+				return;
+			}
 
-				if (!finishedWithEntireFile)
-				{
-					var lastLineEnd = aggregate.lastIndexOf("\n");
+			// Rejoin the line we likely just split in two by chunking the file
+			aggregate += partialLine + xhr.responseText;
+			partialLine = "";
 
-					if (lastLineEnd < 0)
-						lastLineEnd = aggregate.lastIndexOf("\r");
+			finishedWithEntireFile = !config.step || start > getFileSize(xhr);
 
-					if (lastLineEnd > -1)
-					{
-						partialLine = aggregate.substring(lastLineEnd + 1);	// skip the line ending character
-						aggregate = aggregate.substring(0, lastLineEnd);
-					}
-					else
-					{
-						// For chunk sizes smaller than a line (a line could not fit in a single chunk)
-						// we simply build our aggregate by reading in the next chunk, until we find a newline
-						nextChunk();
-						return;
-					}
-				}
+			if (!finishedWithEntireFile)
+			{
+				var lastLineEnd = aggregate.lastIndexOf("\n");
 
-				var results = handle.parse(aggregate);
-				aggregate = "";
+				if (lastLineEnd < 0)
+					lastLineEnd = aggregate.lastIndexOf("\r");
 
-				if (IS_WORKER)
+				if (lastLineEnd > -1)
 				{
-					global.postMessage({
-						results: results,
-						workerId: Papa.WORKER_ID,
-						finished: finishedWithEntireFile
-					});
+					partialLine = aggregate.substring(lastLineEnd + 1);	// skip the line ending character
+					aggregate = aggregate.substring(0, lastLineEnd);
 				}
-				else if (isFunction(config.chunk))
+				else
 				{
-					config.chunk(results);
-					results = undefined;
-				}
-
-				if (!finishedWithEntireFile && !results.meta.paused)
+					// For chunk sizes smaller than a line (a line could not fit in a single chunk)
+					// we simply build our aggregate by reading in the next chunk, until we find a newline
 					nextChunk();
+					return;
+				}
 			}
 
-			function chunkError()
+			var results = handle.parse(aggregate);
+			aggregate = "";
+
+			if (IS_WORKER)
 			{
-				if (isFunction(config.error))
-					config.error(xhr.statusText);
-				else if (IS_WORKER && config.error)
-				{
-					global.postMessage({
-						workerId: Papa.WORKER_ID,
-						error: xhr.statusText,
-						finished: false
-					});
-				}
+				global.postMessage({
+					results: results,
+					workerId: Papa.WORKER_ID,
+					finished: finishedWithEntireFile
+				});
 			}
+			else if (isFunction(config.chunk))
+			{
+				console.log("CHUNKED");
+				config.chunk(results);
+				results = undefined;
+			}
+
+			if (!finishedWithEntireFile && !results.meta.paused)
+				nextChunk();
+		}
 
-			function getFileSize(xhr)
+		function chunkError()
+		{
+			if (isFunction(config.error))
+				config.error(xhr.statusText);
+			else if (IS_WORKER && config.error)
 			{
-				var contentRange = xhr.getResponseHeader("Content-Range");
-				return parseInt(contentRange.substr(contentRange.lastIndexOf("/") + 1));
+				global.postMessage({
+					workerId: Papa.WORKER_ID,
+					error: xhr.statusText,
+					finished: false
+				});
 			}
-		};
+		}
+
+		function getFileSize(xhr)
+		{
+			var contentRange = xhr.getResponseHeader("Content-Range");
+			return parseInt(contentRange.substr(contentRange.lastIndexOf("/") + 1));
+		}
 	}
 
 
@@ -537,18 +555,22 @@
 			config.chunkSize = Papa.LocalChunkSize;
 
 		var start = 0;
+		var file;
+		var slice;
 		var aggregate = "";
 		var partialLine = "";
-		var reader, nextChunk, slice;
+		var reader, nextChunk, slice, finishedWithEntireFile;
 		var handle = new ParserHandle(copy(config));
+		handle.streamer = this;
 
 		// FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862
 		// But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76
 		var usingAsyncReader = typeof FileReader === 'function';
 
-		this.stream = function(file)
+		this.stream = function(f)
 		{
-			var slice = file.slice || file.webkitSlice || file.mozSlice;
+			file = f;
+			slice = file.slice || file.webkitSlice || file.mozSlice;
 
 			if (usingAsyncReader)
 			{
@@ -560,89 +582,100 @@
 				reader = new FileReaderSync();	// Hack for running in a web worker in Firefox
 
 			nextChunk();	// Starts streaming
+		};
 
-			function nextChunk()
-			{
-				if (start < file.size)
-					readChunk();
-			}
+		this.finished = function()
+		{
+			return finishedWithEntireFile;
+		};
 
-			function readChunk()
-			{
-				var end = Math.min(start + config.chunkSize, file.size);
-				var txt = reader.readAsText(slice.call(file, start, end), config.encoding);
-				if (!usingAsyncReader)
-					chunkLoaded({ target: { result: txt } });	// mimic the async signature
-			}
+		this.resume = function()
+		{
+			nextChunk();
+		};
 
-			function chunkLoaded(event)
-			{
-				// Very important to increment start each time before handling results
-				start += config.chunkSize;
+		function nextChunk()
+		{
+			if (!finishedWithEntireFile)
+				readChunk();
+		}
 
-				// Rejoin the line we likely just split in two by chunking the file
-				aggregate += partialLine + event.target.result;
-				partialLine = "";
+		function readChunk()
+		{
+			var end = Math.min(start + config.chunkSize, file.size);
+			var txt = reader.readAsText(slice.call(file, start, end), config.encoding);
+			if (!usingAsyncReader)
+				chunkLoaded({ target: { result: txt } });	// mimic the async signature
+		}
 
-				var finishedWithEntireFile = start >= file.size;
+		function chunkLoaded(event)
+		{
+			// Very important to increment start each time before handling results
+			start += config.chunkSize;
 
-				if (!finishedWithEntireFile)
-				{
-					var lastLineEnd = aggregate.lastIndexOf("\n");
+			// Rejoin the line we likely just split in two by chunking the file
+			aggregate += partialLine + event.target.result;
+			partialLine = "";
 
-					if (lastLineEnd < 0)
-						lastLineEnd = aggregate.lastIndexOf("\r");
+			finishedWithEntireFile = start >= file.size;
 
-					if (lastLineEnd > -1)
-					{
-						partialLine = aggregate.substring(lastLineEnd + 1);	// skip the line ending character
-						aggregate = aggregate.substring(0, lastLineEnd);
-					}
-					else
-					{
-						// For chunk sizes smaller than a line (a line could not fit in a single chunk)
-						// we simply build our aggregate by reading in the next chunk, until we find a newline
-						nextChunk();
-						return;
-					}
-				}
+			if (!finishedWithEntireFile)
+			{
+				var lastLineEnd = aggregate.lastIndexOf("\n");
 
-				var results = handle.parse(aggregate);
-				aggregate = "";
+				if (lastLineEnd < 0)
+					lastLineEnd = aggregate.lastIndexOf("\r");
 
-				if (IS_WORKER)
+				if (lastLineEnd > -1)
 				{
-					global.postMessage({
-						results: results,
-						workerId: Papa.WORKER_ID,
-						finished: finishedWithEntireFile
-					});
+					partialLine = aggregate.substring(lastLineEnd + 1);	// skip the line ending character
+					aggregate = aggregate.substring(0, lastLineEnd);
 				}
-				else if (isFunction(config.chunk))
+				else
 				{
-					config.chunk(results, file);
-					results = undefined;
+					// For chunk sizes smaller than a line (a line could not fit in a single chunk)
+					// we simply build our aggregate by reading in the next chunk, until we find a newline
+					nextChunk();
+					return;
 				}
+			}
 
-				if (!finishedWithEntireFile && !results.meta.paused)
-					nextChunk();
+			var results = handle.parse(aggregate);
+			aggregate = "";
+
+			if (IS_WORKER)
+			{
+				global.postMessage({
+					results: results,
+					workerId: Papa.WORKER_ID,
+					finished: finishedWithEntireFile
+				});
+			}
+			else if (isFunction(config.chunk))
+			{
+				config.chunk(results, file);
+				results = undefined;
 			}
 
-			function chunkError()
+			if (!results || !results.meta.paused)
+				nextChunk();
+		}
+
+		function chunkError()
+		{
+			if (isFunction(config.error))
+				config.error(reader.error, file);
+			else if (IS_WORKER && config.error)
 			{
-				if (isFunction(config.error))
-					config.error(reader.error, file);
-				else if (IS_WORKER && config.error)
-				{
-					global.postMessage({
-						workerId: Papa.WORKER_ID,
-						error: reader.error,
-						file: file,
-						finished: false
-					});
-				}
+				global.postMessage({
+					workerId: Papa.WORKER_ID,
+					error: reader.error,
+					file: file,
+					finished: false
+				});
 			}
-		};
+		}
+
 	}
 
 
@@ -656,6 +689,7 @@
 		var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
 
 		var self = this;
+		var _stepCounter = 0;	// Number of times step was called (number of rows parsed)
 		var _input;				// The input being parsed
 		var _parser;			// The core parser being used
 		var _paused = false;	// Whether we are paused or not
@@ -666,10 +700,29 @@
 			errors: [],
 			meta: {}
 		};
-		_config = copy(_config);
+
+		if (isFunction(_config.step))
+		{
+			var userStep = _config.step;
+			_config.step = function(results)
+			{
+				_results = results;
+				if (needsHeaderRow())
+					processResults();
+				else	// only call user's step function after header row
+				{
+					_stepCounter += results.data.length;
+					if (_config.preview && _stepCounter > _config.preview)
+						_parser.abort();
+					else
+						userStep(processResults(), self);
+				}
+			};
+		}
 
 		this.parse = function(input)
 		{
+			//_stepCounter = 0;
 			_delimiterError = false;
 			if (!_config.delimiter)
 			{
@@ -684,29 +737,17 @@
 				_results.meta.delimiter = _config.delimiter;
 			}
 
-			if (isFunction(_config.step))
-			{
-				var userStep = _config.step;
-				_config.step = function(results)
-				{
-					_results = results;
-					if (needsHeaderRow())
-						processResults();
-					else
-						userStep(processResults(), self);
-				};
-			}
-
+			var parserConfig = copy(_config);
 			if (_config.preview && _config.header)
-				_config.preview++;	// to compensate for header row
+				parserConfig.preview++;	// to compensate for header row
 
 			_input = input;
-			_parser = new Parser(_config);
+			_parser = new Parser(parserConfig);
 			_results = _parser.parse(_input);
 			processResults();
-			if (isFunction(_config.complete) && !_paused)
-				_config.complete(_results);
-			return _paused ? { meta: { paused: true } } : _results;
+			if (isFunction(_config.complete) && !_paused && (!self.streamer || self.streamer.finished()))
+				_config.complete(_results);	// TODO: In some cases, when chunk is specified, this executes before the chunk function...
+			return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } });
 		};
 
 		this.pause = function()
@@ -721,8 +762,13 @@
 			_paused = false;
 			_parser = new Parser(_config);
 			_parser.parse(_input);
-			if (isFunction(_config.complete) && !_paused)
-				_config.complete(_results);
+			if (!_paused)
+			{
+				if (self.streamer && !self.streamer.finished())
+					self.streamer.resume();		// more of the file yet to come
+				else if (isFunction(_config.complete))
+					_config.complete(_results);
+			}
 		};
 
 		this.abort = function()
@@ -731,7 +777,7 @@
 			if (isFunction(_config.complete))
 				_config.complete(_results);
 			_input = "";
-		}
+		};
 
 		function processResults()
 		{
@@ -743,7 +789,6 @@
 
 			if (needsHeaderRow())
 				fillHeaderFields();
-
 			return applyHeaderAndDynamicTyping();
 		}
 
@@ -770,6 +815,7 @@
 			for (var i = 0; i < _results.data.length; i++)
 			{
 				var row = {};
+
 				for (var j = 0; j < _results.data[i].length; j++)
 				{
 					if (_config.dynamicTyping)
@@ -808,7 +854,6 @@
 
 			if (_config.header && _results.meta)
 				_results.meta.fields = _fields;
-
 			return _results;
 		}