Browse Source

fix stream reset, interrupting thread when pages are indexed

notmasteryet 14 years ago
parent
commit
4d44eb6184
  1. 4
      src/core.js
  2. 21
      src/worker.js

4
src/core.js

@ -205,7 +205,7 @@ var Page = (function PageClosure() {
streams.push(xref.fetchIfRef(content[i])); streams.push(xref.fetchIfRef(content[i]));
content = new StreamsSequenceStream(streams); content = new StreamsSequenceStream(streams);
} else if (isStream(content)) } else if (isStream(content))
content.pos = 0; content.reset();
var pe = this.pe = new PartialEvaluator( var pe = this.pe = new PartialEvaluator(
xref, handler, 'p' + this.pageNumber + '_'); xref, handler, 'p' + this.pageNumber + '_');
@ -236,7 +236,7 @@ var Page = (function PageClosure() {
streams.push(xref.fetchIfRef(content[i])); streams.push(xref.fetchIfRef(content[i]));
content = new StreamsSequenceStream(streams); content = new StreamsSequenceStream(streams);
} else if (isStream(content)) } else if (isStream(content))
content.pos = 0; content.reset();
var pe = new PartialEvaluator( var pe = new PartialEvaluator(
xref, handler, 'p' + this.pageNumber + '_'); xref, handler, 'p' + this.pageNumber + '_');

21
src/worker.js

@ -164,23 +164,34 @@ var WorkerMessageHandler = {
handler.on('extract_text', function wphExtractText() { handler.on('extract_text', function wphExtractText() {
var numPages = pdfDoc.numPages; var numPages = pdfDoc.numPages;
var index = []; var index = [];
for (var i = 0; i < numPages; i++) {
var start = Date.now(); var start = Date.now();
function indexPage(pageNum) {
if (pageNum > numPages) {
console.log('text indexing=: time=%dms', Date.now() - start);
handler.send('text_extracted', { index: index });
return;
}
var textContent = ''; var textContent = '';
try { try {
var page = pdfDoc.getPage(i + 1); var page = pdfDoc.getPage(pageNum);
textContent = page.extractTextContent(); textContent = page.extractTextContent();
} catch (e) { } catch (e) {
// Skip errored pages // Skip errored pages
} }
index.push(textContent); index.push(textContent);
}
console.log('text indexing=: time=%dms', Date.now() - start); // processing one page, interrupting thread to process
// other requests
setTimeout(function extractTextNextPage() {
indexPage(pageNum + 1);
}, 0);
}
handler.send('text_extracted', { index: index }); indexPage(1);
}); });
} }
}; };

Loading…
Cancel
Save