Generic build of PDF.js library.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

548 lines
18 KiB

14 years ago
/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
/* Copyright 2012 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* globals warn, Dict, isDict, shadow, isArray, Util, StreamsSequenceStream,
isStream, NullStream, ObjectLoader, PartialEvaluator, Promise,
OperatorList, Annotation, error, assert, XRef, isArrayBuffer, Stream,
isString, isName, info, Linearization, MissingDataException, Lexer,
Catalog, stringToPDFString, stringToBytes, calculateMD5,
AnnotationFactory */
14 years ago
'use strict';
var Page = (function PageClosure() {
var LETTER_SIZE_MEDIABOX = [0, 0, 612, 792];
function Page(pdfManager, xref, pageIndex, pageDict, ref, fontCache) {
this.pdfManager = pdfManager;
this.pageIndex = pageIndex;
14 years ago
this.pageDict = pageDict;
this.xref = xref;
this.ref = ref;
this.fontCache = fontCache;
this.idCounters = {
obj: 0
};
this.resourcesPromise = null;
14 years ago
}
Page.prototype = {
getPageProp: function Page_getPageProp(key) {
return this.pageDict.get(key);
14 years ago
},
getInheritedPageProp: function Page_getInheritedPageProp(key) {
var dict = this.pageDict, valueArray = null, loopCount = 0;
var MAX_LOOP_COUNT = 100;
// Always walk up the entire parent chain, to be able to find
// e.g. \Resources placed on multiple levels of the tree.
while (dict) {
var value = dict.get(key);
if (value) {
if (!valueArray) {
valueArray = [];
}
valueArray.push(value);
}
if (++loopCount > MAX_LOOP_COUNT) {
warn('Page_getInheritedPageProp: maximum loop count exceeded.');
14 years ago
break;
}
dict = dict.get('Parent');
14 years ago
}
if (!valueArray) {
return Dict.empty;
}
if (valueArray.length === 1 || !isDict(valueArray[0]) ||
loopCount > MAX_LOOP_COUNT) {
return valueArray[0];
}
return Dict.merge(this.xref, valueArray);
14 years ago
},
14 years ago
get content() {
return this.getPageProp('Contents');
14 years ago
},
14 years ago
get resources() {
// For robustness: The spec states that a \Resources entry has to be
// present, but can be empty. Some document omit it still, in this case
// we return an empty dictionary.
return shadow(this, 'resources', this.getInheritedPageProp('Resources'));
14 years ago
},
14 years ago
get mediaBox() {
var obj = this.getInheritedPageProp('MediaBox');
14 years ago
// Reset invalid media box to letter size.
if (!isArray(obj) || obj.length !== 4) {
obj = LETTER_SIZE_MEDIABOX;
}
14 years ago
return shadow(this, 'mediaBox', obj);
},
14 years ago
get view() {
var mediaBox = this.mediaBox;
var cropBox = this.getInheritedPageProp('CropBox');
if (!isArray(cropBox) || cropBox.length !== 4) {
return shadow(this, 'view', mediaBox);
}
13 years ago
// From the spec, 6th ed., p.963:
// "The crop, bleed, trim, and art boxes should not ordinarily
// extend beyond the boundaries of the media box. If they do, they are
// effectively reduced to their intersection with the media box."
cropBox = Util.intersect(cropBox, mediaBox);
if (!cropBox) {
return shadow(this, 'view', mediaBox);
}
return shadow(this, 'view', cropBox);
14 years ago
},
14 years ago
get rotate() {
var rotate = this.getInheritedPageProp('Rotate') || 0;
14 years ago
// Normalize rotation so it's a multiple of 90 and between 0 and 270
if (rotate % 90 !== 0) {
14 years ago
rotate = 0;
} else if (rotate >= 360) {
rotate = rotate % 360;
} else if (rotate < 0) {
// The spec doesn't cover negatives, assume its counterclockwise
// rotation. The following is the other implementation of modulo.
rotate = ((rotate % 360) + 360) % 360;
}
return shadow(this, 'rotate', rotate);
},
getContentStream: function Page_getContentStream() {
var content = this.content;
var stream;
14 years ago
if (isArray(content)) {
// fetching items
var xref = this.xref;
14 years ago
var i, n = content.length;
var streams = [];
for (i = 0; i < n; ++i) {
streams.push(xref.fetchIfRef(content[i]));
}
stream = new StreamsSequenceStream(streams);
} else if (isStream(content)) {
stream = content;
} else {
// replacing non-existent page content with empty one
stream = new NullStream();
14 years ago
}
return stream;
},
loadResources: function Page_loadResources(keys) {
if (!this.resourcesPromise) {
// TODO: add async getInheritedPageProp and remove this.
this.resourcesPromise = this.pdfManager.ensure(this, 'resources');
}
return this.resourcesPromise.then(function resourceSuccess() {
var objectLoader = new ObjectLoader(this.resources.map,
keys,
this.xref);
return objectLoader.load();
}.bind(this));
},
getOperatorList: function Page_getOperatorList(handler, intent) {
var self = this;
var pdfManager = this.pdfManager;
var contentStreamPromise = pdfManager.ensure(this, 'getContentStream',
[]);
var resourcesPromise = this.loadResources([
'ExtGState',
'ColorSpace',
'Pattern',
'Shading',
'XObject',
'Font'
// ProcSet
// Properties
]);
var partialEvaluator = new PartialEvaluator(pdfManager, this.xref,
handler, this.pageIndex,
'p' + this.pageIndex + '_',
this.idCounters,
this.fontCache);
var dataPromises = Promise.all([contentStreamPromise, resourcesPromise]);
var pageListPromise = dataPromises.then(function(data) {
var contentStream = data[0];
var opList = new OperatorList(intent, handler, self.pageIndex);
handler.send('StartRenderPage', {
transparency: partialEvaluator.hasBlendModes(self.resources),
pageIndex: self.pageIndex,
intent: intent
});
return partialEvaluator.getOperatorList(contentStream, self.resources,
opList).then(function () {
return opList;
});
});
var annotationsPromise = pdfManager.ensure(this, 'annotations');
return Promise.all([pageListPromise, annotationsPromise]).then(
function(datas) {
var pageOpList = datas[0];
var annotations = datas[1];
if (annotations.length === 0) {
pageOpList.flush(true);
return pageOpList;
}
var annotationsReadyPromise = Annotation.appendToOperatorList(
annotations, pageOpList, pdfManager, partialEvaluator, intent);
return annotationsReadyPromise.then(function () {
pageOpList.flush(true);
return pageOpList;
});
});
14 years ago
},
extractTextContent: function Page_extractTextContent() {
var handler = {
on: function nullHandlerOn() {},
send: function nullHandlerSend() {}
};
var self = this;
var pdfManager = this.pdfManager;
var contentStreamPromise = pdfManager.ensure(this, 'getContentStream',
[]);
var resourcesPromise = this.loadResources([
'ExtGState',
'XObject',
'Font'
]);
var dataPromises = Promise.all([contentStreamPromise,
resourcesPromise]);
return dataPromises.then(function(data) {
var contentStream = data[0];
var partialEvaluator = new PartialEvaluator(pdfManager, self.xref,
handler, self.pageIndex,
'p' + self.pageIndex + '_',
self.idCounters,
self.fontCache);
return partialEvaluator.getTextContent(contentStream,
self.resources);
});
14 years ago
},
getAnnotationsData: function Page_getAnnotationsData() {
var annotations = this.annotations;
var annotationsData = [];
for (var i = 0, n = annotations.length; i < n; ++i) {
annotationsData.push(annotations[i].data);
}
return annotationsData;
},
get annotations() {
var annotations = [];
var annotationRefs = this.getInheritedPageProp('Annots') || [];
var annotationFactory = new AnnotationFactory();
for (var i = 0, n = annotationRefs.length; i < n; ++i) {
var annotationRef = annotationRefs[i];
var annotation = annotationFactory.create(this.xref, annotationRef);
if (annotation &&
(annotation.isViewable() || annotation.isPrintable())) {
annotations.push(annotation);
}
14 years ago
}
return shadow(this, 'annotations', annotations);
14 years ago
}
};
return Page;
14 years ago
})();
/**
13 years ago
* The `PDFDocument` holds all the data of the PDF file. Compared to the
14 years ago
* `PDFDoc`, this one doesn't have any job management code.
13 years ago
* Right now there exists one PDFDocument on the main thread + one object
14 years ago
* for each worker. If there is no worker support enabled, there are two
13 years ago
* `PDFDocument` objects on the main thread created.
14 years ago
*/
13 years ago
var PDFDocument = (function PDFDocumentClosure() {
var FINGERPRINT_FIRST_BYTES = 1024;
var EMPTY_FINGERPRINT = '\x00\x00\x00\x00\x00\x00\x00' +
'\x00\x00\x00\x00\x00\x00\x00\x00\x00';
function PDFDocument(pdfManager, arg, password) {
if (isStream(arg)) {
init.call(this, pdfManager, arg, password);
} else if (isArrayBuffer(arg)) {
init.call(this, pdfManager, new Stream(arg), password);
} else {
13 years ago
error('PDFDocument: Unknown argument type');
}
14 years ago
}
function init(pdfManager, stream, password) {
assert(stream.length > 0, 'stream must have data');
this.pdfManager = pdfManager;
14 years ago
this.stream = stream;
var xref = new XRef(this.stream, password, pdfManager);
this.xref = xref;
14 years ago
}
function find(stream, needle, limit, backwards) {
var pos = stream.pos;
var end = stream.end;
var strBuf = [];
if (pos + limit > end) {
14 years ago
limit = end - pos;
}
for (var n = 0; n < limit; ++n) {
strBuf.push(String.fromCharCode(stream.getByte()));
}
var str = strBuf.join('');
14 years ago
stream.pos = pos;
var index = backwards ? str.lastIndexOf(needle) : str.indexOf(needle);
if (index === -1) {
14 years ago
return false; /* not found */
}
14 years ago
stream.pos += index;
return true; /* found */
}
var DocumentInfoValidators = {
get entries() {
// Lazily build this since all the validation functions below are not
// defined until after this file loads.
return shadow(this, 'entries', {
Title: isString,
Author: isString,
Subject: isString,
Keywords: isString,
Creator: isString,
Producer: isString,
CreationDate: isString,
ModDate: isString,
Trapped: isName
});
}
};
13 years ago
PDFDocument.prototype = {
parse: function PDFDocument_parse(recoveryMode) {
this.setup(recoveryMode);
var version = this.catalog.catDict.get('Version');
if (isName(version)) {
this.pdfFormatVersion = version.name;
}
try {
// checking if AcroForm is present
this.acroForm = this.catalog.catDict.get('AcroForm');
if (this.acroForm) {
this.xfa = this.acroForm.get('XFA');
var fields = this.acroForm.get('Fields');
if ((!fields || !isArray(fields) || fields.length === 0) &&
!this.xfa) {
// no fields and no XFA -- not a form (?)
this.acroForm = null;
}
}
} catch (ex) {
info('Something wrong with AcroForm entry');
this.acroForm = null;
}
},
14 years ago
get linearization() {
var linearization = null;
if (this.stream.length) {
try {
linearization = Linearization.create(this.stream);
} catch (err) {
if (err instanceof MissingDataException) {
throw err;
}
info(err);
}
14 years ago
}
// shadow the prototype getter with a data property
return shadow(this, 'linearization', linearization);
},
get startXRef() {
var stream = this.stream;
var startXRef = 0;
var linearization = this.linearization;
if (linearization) {
// Find end of first obj.
stream.reset();
if (find(stream, 'endobj', 1024)) {
14 years ago
startXRef = stream.pos + 6;
}
14 years ago
} else {
14 years ago
// Find startxref by jumping backward from the end of the file.
var step = 1024;
var found = false, pos = stream.end;
14 years ago
while (!found && pos > 0) {
pos -= step - 'startxref'.length;
if (pos < 0) {
pos = 0;
}
stream.pos = pos;
14 years ago
found = find(stream, 'startxref', step, true);
}
if (found) {
14 years ago
stream.skip(9);
var ch;
do {
ch = stream.getByte();
14 years ago
} while (Lexer.isSpace(ch));
var str = '';
while (ch >= 0x20 && ch <= 0x39) { // < '9'
str += String.fromCharCode(ch);
ch = stream.getByte();
14 years ago
}
startXRef = parseInt(str, 10);
if (isNaN(startXRef)) {
14 years ago
startXRef = 0;
}
14 years ago
}
}
// shadow the prototype getter with a data property
return shadow(this, 'startXRef', startXRef);
},
get mainXRefEntriesOffset() {
var mainXRefEntriesOffset = 0;
var linearization = this.linearization;
if (linearization) {
14 years ago
mainXRefEntriesOffset = linearization.mainXRefEntriesOffset;
}
14 years ago
// shadow the prototype getter with a data property
return shadow(this, 'mainXRefEntriesOffset', mainXRefEntriesOffset);
},
// Find the header, remove leading garbage and setup the stream
// starting from the header.
13 years ago
checkHeader: function PDFDocument_checkHeader() {
14 years ago
var stream = this.stream;
stream.reset();
if (find(stream, '%PDF-', 1024)) {
// Found the header, trim off any garbage before it.
stream.moveStart();
// Reading file format version
var MAX_VERSION_LENGTH = 12;
var version = '', ch;
while ((ch = stream.getByte()) > 0x20) { // SPACE
if (version.length >= MAX_VERSION_LENGTH) {
break;
}
version += String.fromCharCode(ch);
}
if (!this.pdfFormatVersion) {
// removing "%PDF-"-prefix
this.pdfFormatVersion = version.substring(5);
}
14 years ago
return;
}
// May not be a PDF file, continue anyway.
},
parseStartXRef: function PDFDocument_parseStartXRef() {
var startXRef = this.startXRef;
this.xref.setStartXRef(startXRef);
},
setup: function PDFDocument_setup(recoveryMode) {
this.xref.parse(recoveryMode);
this.catalog = new Catalog(this.pdfManager, this.xref);
14 years ago
},
get numPages() {
var linearization = this.linearization;
var num = linearization ? linearization.numPages : this.catalog.numPages;
// shadow the prototype getter
return shadow(this, 'numPages', num);
},
get documentInfo() {
var docInfo = {
PDFFormatVersion: this.pdfFormatVersion,
IsAcroFormPresent: !!this.acroForm,
IsXFAPresent: !!this.xfa
};
var infoDict;
try {
infoDict = this.xref.trailer.get('Info');
} catch (err) {
info('The document information dictionary is invalid.');
}
if (infoDict) {
var validEntries = DocumentInfoValidators.entries;
// Only fill the document info with valid entries from the spec.
for (var key in validEntries) {
if (infoDict.has(key)) {
var value = infoDict.get(key);
// Make sure the value conforms to the spec.
if (validEntries[key](value)) {
docInfo[key] = (typeof value !== 'string' ?
value : stringToPDFString(value));
} else {
info('Bad value in document info for "' + key + '"');
}
}
}
}
return shadow(this, 'documentInfo', docInfo);
},
get fingerprint() {
var xref = this.xref, idArray, hash, fileID = '';
if (xref.trailer.has('ID')) {
idArray = xref.trailer.get('ID');
}
if (idArray && isArray(idArray) && idArray[0] !== EMPTY_FINGERPRINT) {
hash = stringToBytes(idArray[0]);
} else {
if (this.stream.ensureRange) {
this.stream.ensureRange(0,
Math.min(FINGERPRINT_FIRST_BYTES, this.stream.end));
}
hash = calculateMD5(this.stream.bytes.subarray(0,
FINGERPRINT_FIRST_BYTES), 0, FINGERPRINT_FIRST_BYTES);
}
for (var i = 0, n = hash.length; i < n; i++) {
var hex = hash[i].toString(16);
fileID += hex.length === 1 ? '0' + hex : hex;
}
return shadow(this, 'fingerprint', fileID);
},
getPage: function PDFDocument_getPage(pageIndex) {
return this.catalog.getPage(pageIndex);
},
cleanup: function PDFDocument_cleanup() {
return this.catalog.cleanup();
14 years ago
}
};
13 years ago
return PDFDocument;
14 years ago
})();