Browse Source

PDF.js version 1.3.34 - See mozilla/pdf.js@0819d71a027020d44b37a31808bf778fc2c0ea14

master v1.3.34
Pdf Bot 9 years ago
parent
commit
58277c8b76
  1. 2
      bower.json
  2. 58
      build/pdf.combined.js
  3. 20
      build/pdf.js
  4. 42
      build/pdf.worker.js
  5. 2
      package.json
  6. 4
      web/pdf_viewer.js

2
bower.json

@ -1,6 +1,6 @@
{ {
"name": "pdfjs-dist", "name": "pdfjs-dist",
"version": "1.3.32", "version": "1.3.34",
"main": [ "main": [
"build/pdf.js", "build/pdf.js",
"build/pdf.worker.js" "build/pdf.worker.js"

58
build/pdf.combined.js

@ -20,8 +20,8 @@ if (typeof PDFJS === 'undefined') {
(typeof window !== 'undefined' ? window : this).PDFJS = {}; (typeof window !== 'undefined' ? window : this).PDFJS = {};
} }
PDFJS.version = '1.3.32'; PDFJS.version = '1.3.34';
PDFJS.build = 'c2dfe9e'; PDFJS.build = '0819d71';
(function pdfjsWrapper() { (function pdfjsWrapper() {
// Use strict in our context only - users might not want it // Use strict in our context only - users might not want it
@ -2354,6 +2354,14 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() {
return PDFDocumentProxy; return PDFDocumentProxy;
})(); })();
/**
* Page getTextContent parameters.
*
* @typedef {Object} getTextContentParameters
* @param {boolean} normalizeWhitespace - replaces all occurrences of
* whitespace with standard spaces (0x20). The default value is `false`.
*/
/** /**
* Page text content. * Page text content.
* *
@ -2632,12 +2640,16 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
}, },
/** /**
* @param {getTextContentParameters} params - getTextContent parameters.
* @return {Promise} That is resolved a {@link TextContent} * @return {Promise} That is resolved a {@link TextContent}
* object that represent the page text content. * object that represent the page text content.
*/ */
getTextContent: function PDFPageProxy_getTextContent() { getTextContent: function PDFPageProxy_getTextContent(params) {
var normalizeWhitespace = (params && params.normalizeWhitespace) || false;
return this.transport.messageHandler.sendWithPromise('GetTextContent', { return this.transport.messageHandler.sendWithPromise('GetTextContent', {
pageIndex: this.pageNumber - 1 pageIndex: this.pageNumber - 1,
normalizeWhitespace: normalizeWhitespace,
}); });
}, },
@ -9909,7 +9921,8 @@ var Page = (function PageClosure() {
}); });
}, },
extractTextContent: function Page_extractTextContent(task) { extractTextContent: function Page_extractTextContent(task,
normalizeWhitespace) {
var handler = { var handler = {
on: function nullHandlerOn() {}, on: function nullHandlerOn() {},
send: function nullHandlerSend() {} send: function nullHandlerSend() {}
@ -9939,7 +9952,9 @@ var Page = (function PageClosure() {
return partialEvaluator.getTextContent(contentStream, return partialEvaluator.getTextContent(contentStream,
task, task,
self.resources); self.resources,
/* stateManager = */ null,
normalizeWhitespace);
}); });
}, },
@ -18842,12 +18857,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}); });
}, },
getTextContent: function PartialEvaluator_getTextContent(stream, task, getTextContent:
resources, function PartialEvaluator_getTextContent(stream, task, resources,
stateManager) { stateManager,
normalizeWhitespace) {
stateManager = (stateManager || new StateManager(new TextState())); stateManager = (stateManager || new StateManager(new TextState()));
var WhitespaceRegexp = /\s/g;
var textContent = { var textContent = {
items: [], items: [],
styles: Object.create(null) styles: Object.create(null)
@ -18961,11 +18979,23 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return textContentItem; return textContentItem;
} }
function replaceWhitespace(str) {
// Replaces all whitespaces with standard spaces (0x20), to avoid
// alignment issues between the textLayer and the canvas if the text
// contains e.g. tabs (fixes issue6612.pdf).
var i = 0, ii = str.length, code;
while (i < ii && (code = str.charCodeAt(i)) >= 0x20 && code <= 0x7F) {
i++;
}
return (i < ii ? str.replace(WhitespaceRegexp, ' ') : str);
}
function runBidiTransform(textChunk) { function runBidiTransform(textChunk) {
var str = textChunk.str.join(''); var str = textChunk.str.join('');
var bidiResult = PDFJS.bidi(str, -1, textChunk.vertical); var bidiResult = PDFJS.bidi(str, -1, textChunk.vertical);
return { return {
str: bidiResult.str, str: (normalizeWhitespace ? replaceWhitespace(bidiResult.str) :
bidiResult.str),
dir: bidiResult.dir, dir: bidiResult.dir,
width: textChunk.width, width: textChunk.width,
height: textChunk.height, height: textChunk.height,
@ -19286,8 +19316,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
} }
return self.getTextContent(xobj, task, return self.getTextContent(xobj, task,
xobj.dict.get('Resources') || resources, stateManager). xobj.dict.get('Resources') || resources, stateManager,
then(function (formTextContent) { normalizeWhitespace).then(function (formTextContent) {
Util.appendToArray(textContent.items, formTextContent.items); Util.appendToArray(textContent.items, formTextContent.items);
Util.extendObj(textContent.styles, formTextContent.styles); Util.extendObj(textContent.styles, formTextContent.styles);
stateManager.restore(); stateManager.restore();
@ -41996,12 +42026,14 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
handler.on('GetTextContent', function wphExtractText(data) { handler.on('GetTextContent', function wphExtractText(data) {
var pageIndex = data.pageIndex; var pageIndex = data.pageIndex;
var normalizeWhitespace = data.normalizeWhitespace;
return pdfManager.getPage(pageIndex).then(function(page) { return pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('GetTextContent: page ' + pageIndex); var task = new WorkerTask('GetTextContent: page ' + pageIndex);
startWorkerTask(task); startWorkerTask(task);
var pageNum = pageIndex + 1; var pageNum = pageIndex + 1;
var start = Date.now(); var start = Date.now();
return page.extractTextContent(task).then(function(textContent) { return page.extractTextContent(task, normalizeWhitespace).then(
function(textContent) {
finishWorkerTask(task); finishWorkerTask(task);
info('text indexing: page=' + pageNum + ' - time=' + info('text indexing: page=' + pageNum + ' - time=' +
(Date.now() - start) + 'ms'); (Date.now() - start) + 'ms');

20
build/pdf.js

@ -20,8 +20,8 @@ if (typeof PDFJS === 'undefined') {
(typeof window !== 'undefined' ? window : this).PDFJS = {}; (typeof window !== 'undefined' ? window : this).PDFJS = {};
} }
PDFJS.version = '1.3.32'; PDFJS.version = '1.3.34';
PDFJS.build = 'c2dfe9e'; PDFJS.build = '0819d71';
(function pdfjsWrapper() { (function pdfjsWrapper() {
// Use strict in our context only - users might not want it // Use strict in our context only - users might not want it
@ -2354,6 +2354,14 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() {
return PDFDocumentProxy; return PDFDocumentProxy;
})(); })();
/**
* Page getTextContent parameters.
*
* @typedef {Object} getTextContentParameters
* @param {boolean} normalizeWhitespace - replaces all occurrences of
* whitespace with standard spaces (0x20). The default value is `false`.
*/
/** /**
* Page text content. * Page text content.
* *
@ -2632,12 +2640,16 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
}, },
/** /**
* @param {getTextContentParameters} params - getTextContent parameters.
* @return {Promise} That is resolved a {@link TextContent} * @return {Promise} That is resolved a {@link TextContent}
* object that represent the page text content. * object that represent the page text content.
*/ */
getTextContent: function PDFPageProxy_getTextContent() { getTextContent: function PDFPageProxy_getTextContent(params) {
var normalizeWhitespace = (params && params.normalizeWhitespace) || false;
return this.transport.messageHandler.sendWithPromise('GetTextContent', { return this.transport.messageHandler.sendWithPromise('GetTextContent', {
pageIndex: this.pageNumber - 1 pageIndex: this.pageNumber - 1,
normalizeWhitespace: normalizeWhitespace,
}); });
}, },

42
build/pdf.worker.js vendored

@ -20,8 +20,8 @@ if (typeof PDFJS === 'undefined') {
(typeof window !== 'undefined' ? window : this).PDFJS = {}; (typeof window !== 'undefined' ? window : this).PDFJS = {};
} }
PDFJS.version = '1.3.32'; PDFJS.version = '1.3.34';
PDFJS.build = 'c2dfe9e'; PDFJS.build = '0819d71';
(function pdfjsWrapper() { (function pdfjsWrapper() {
// Use strict in our context only - users might not want it // Use strict in our context only - users might not want it
@ -2849,7 +2849,8 @@ var Page = (function PageClosure() {
}); });
}, },
extractTextContent: function Page_extractTextContent(task) { extractTextContent: function Page_extractTextContent(task,
normalizeWhitespace) {
var handler = { var handler = {
on: function nullHandlerOn() {}, on: function nullHandlerOn() {},
send: function nullHandlerSend() {} send: function nullHandlerSend() {}
@ -2879,7 +2880,9 @@ var Page = (function PageClosure() {
return partialEvaluator.getTextContent(contentStream, return partialEvaluator.getTextContent(contentStream,
task, task,
self.resources); self.resources,
/* stateManager = */ null,
normalizeWhitespace);
}); });
}, },
@ -11782,12 +11785,15 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}); });
}, },
getTextContent: function PartialEvaluator_getTextContent(stream, task, getTextContent:
resources, function PartialEvaluator_getTextContent(stream, task, resources,
stateManager) { stateManager,
normalizeWhitespace) {
stateManager = (stateManager || new StateManager(new TextState())); stateManager = (stateManager || new StateManager(new TextState()));
var WhitespaceRegexp = /\s/g;
var textContent = { var textContent = {
items: [], items: [],
styles: Object.create(null) styles: Object.create(null)
@ -11901,11 +11907,23 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return textContentItem; return textContentItem;
} }
function replaceWhitespace(str) {
// Replaces all whitespaces with standard spaces (0x20), to avoid
// alignment issues between the textLayer and the canvas if the text
// contains e.g. tabs (fixes issue6612.pdf).
var i = 0, ii = str.length, code;
while (i < ii && (code = str.charCodeAt(i)) >= 0x20 && code <= 0x7F) {
i++;
}
return (i < ii ? str.replace(WhitespaceRegexp, ' ') : str);
}
function runBidiTransform(textChunk) { function runBidiTransform(textChunk) {
var str = textChunk.str.join(''); var str = textChunk.str.join('');
var bidiResult = PDFJS.bidi(str, -1, textChunk.vertical); var bidiResult = PDFJS.bidi(str, -1, textChunk.vertical);
return { return {
str: bidiResult.str, str: (normalizeWhitespace ? replaceWhitespace(bidiResult.str) :
bidiResult.str),
dir: bidiResult.dir, dir: bidiResult.dir,
width: textChunk.width, width: textChunk.width,
height: textChunk.height, height: textChunk.height,
@ -12226,8 +12244,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
} }
return self.getTextContent(xobj, task, return self.getTextContent(xobj, task,
xobj.dict.get('Resources') || resources, stateManager). xobj.dict.get('Resources') || resources, stateManager,
then(function (formTextContent) { normalizeWhitespace).then(function (formTextContent) {
Util.appendToArray(textContent.items, formTextContent.items); Util.appendToArray(textContent.items, formTextContent.items);
Util.extendObj(textContent.styles, formTextContent.styles); Util.extendObj(textContent.styles, formTextContent.styles);
stateManager.restore(); stateManager.restore();
@ -34936,12 +34954,14 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = {
handler.on('GetTextContent', function wphExtractText(data) { handler.on('GetTextContent', function wphExtractText(data) {
var pageIndex = data.pageIndex; var pageIndex = data.pageIndex;
var normalizeWhitespace = data.normalizeWhitespace;
return pdfManager.getPage(pageIndex).then(function(page) { return pdfManager.getPage(pageIndex).then(function(page) {
var task = new WorkerTask('GetTextContent: page ' + pageIndex); var task = new WorkerTask('GetTextContent: page ' + pageIndex);
startWorkerTask(task); startWorkerTask(task);
var pageNum = pageIndex + 1; var pageNum = pageIndex + 1;
var start = Date.now(); var start = Date.now();
return page.extractTextContent(task).then(function(textContent) { return page.extractTextContent(task, normalizeWhitespace).then(
function(textContent) {
finishWorkerTask(task); finishWorkerTask(task);
info('text indexing: page=' + pageNum + ' - time=' + info('text indexing: page=' + pageNum + ' - time=' +
(Date.now() - start) + 'ms'); (Date.now() - start) + 'ms');

2
package.json

@ -1,6 +1,6 @@
{ {
"name": "pdfjs-dist", "name": "pdfjs-dist",
"version": "1.3.32", "version": "1.3.34",
"description": "Generic build of Mozilla's PDF.js library.", "description": "Generic build of Mozilla's PDF.js library.",
"keywords": [ "keywords": [
"Mozilla", "Mozilla",

4
web/pdf_viewer.js

@ -1345,7 +1345,7 @@ var PDFPageView = (function PDFPageViewClosure() {
function pdfPageRenderCallback() { function pdfPageRenderCallback() {
pageViewDrawCallback(null); pageViewDrawCallback(null);
if (textLayer) { if (textLayer) {
self.pdfPage.getTextContent().then( self.pdfPage.getTextContent({ normalizeWhitespace: true }).then(
function textContentResolved(textContent) { function textContentResolved(textContent) {
textLayer.setTextContent(textContent); textLayer.setTextContent(textContent);
textLayer.render(TEXT_LAYER_RENDER_DELAY); textLayer.render(TEXT_LAYER_RENDER_DELAY);
@ -2626,7 +2626,7 @@ var PDFViewer = (function pdfViewer() {
getPageTextContent: function (pageIndex) { getPageTextContent: function (pageIndex) {
return this.pdfDocument.getPage(pageIndex + 1).then(function (page) { return this.pdfDocument.getPage(pageIndex + 1).then(function (page) {
return page.getTextContent(); return page.getTextContent({ normalizeWhitespace: true });
}); });
}, },

Loading…
Cancel
Save