/** * @licstart The following is the entire license notice for the * Javascript code in this page * * Copyright 2018 Mozilla Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @licend The above is the entire license notice for the * Javascript code in this page */ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.PDFDocument = exports.Page = void 0; var _util = require("../shared/util"); var _obj = require("./obj"); var _primitives = require("./primitives"); var _stream2 = require("./stream"); var _annotation = require("./annotation"); var _crypto = require("./crypto"); var _parser = require("./parser"); var _operator_list = require("./operator_list"); var _evaluator = require("./evaluator"); var _function = require("./function"); function _slicedToArray(arr, i) { return _arrayWithHoles(arr) || _iterableToArrayLimit(arr, i) || _nonIterableRest(); } function _nonIterableRest() { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } function _iterableToArrayLimit(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"] != null) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } function _arrayWithHoles(arr) { if (Array.isArray(arr)) return arr; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; } var DEFAULT_USER_UNIT = 1.0; var LETTER_SIZE_MEDIABOX = [0, 0, 612, 792]; function isAnnotationRenderable(annotation, intent) { return intent === 'display' && annotation.viewable || intent === 'print' && annotation.printable; } var Page = /*#__PURE__*/ function () { function Page(_ref) { var pdfManager = _ref.pdfManager, xref = _ref.xref, pageIndex = _ref.pageIndex, pageDict = _ref.pageDict, ref = _ref.ref, fontCache = _ref.fontCache, builtInCMapCache = _ref.builtInCMapCache, pdfFunctionFactory = _ref.pdfFunctionFactory; _classCallCheck(this, Page); this.pdfManager = pdfManager; this.pageIndex = pageIndex; this.pageDict = pageDict; this.xref = xref; this.ref = ref; this.fontCache = fontCache; this.builtInCMapCache = builtInCMapCache; this.pdfFunctionFactory = pdfFunctionFactory; this.evaluatorOptions = pdfManager.evaluatorOptions; this.resourcesPromise = null; var uniquePrefix = "p".concat(this.pageIndex, "_"); var idCounters = { obj: 0 }; this.idFactory = { createObjId: function createObjId() { return uniquePrefix + ++idCounters.obj; } }; } _createClass(Page, [{ key: "_getInheritableProperty", value: function _getInheritableProperty(key) { var getArray = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false; var value = (0, _util.getInheritableProperty)({ dict: this.pageDict, key: key, getArray: getArray, stopWhenFound: false }); if (!Array.isArray(value)) { return value; } if (value.length === 1 || !(0, _primitives.isDict)(value[0])) { return value[0]; } return _primitives.Dict.merge(this.xref, value); } }, { key: "getContentStream", value: function getContentStream() { var content = this.content; var stream; if (Array.isArray(content)) { var xref = this.xref; var streams = []; var _iteratorNormalCompletion = true; var _didIteratorError = false; var _iteratorError = undefined; try { for (var _iterator = content[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) { var _stream = _step.value; streams.push(xref.fetchIfRef(_stream)); } } catch (err) { _didIteratorError = true; _iteratorError = err; } finally { try { if (!_iteratorNormalCompletion && _iterator.return != null) { _iterator.return(); } } finally { if (_didIteratorError) { throw _iteratorError; } } } stream = new _stream2.StreamsSequenceStream(streams); } else if ((0, _primitives.isStream)(content)) { stream = content; } else { stream = new _stream2.NullStream(); } return stream; } }, { key: "loadResources", value: function loadResources(keys) { var _this = this; if (!this.resourcesPromise) { this.resourcesPromise = this.pdfManager.ensure(this, 'resources'); } return this.resourcesPromise.then(function () { var objectLoader = new _obj.ObjectLoader(_this.resources, keys, _this.xref); return objectLoader.load(); }); } }, { key: "getOperatorList", value: function getOperatorList(_ref2) { var _this2 = this; var handler = _ref2.handler, task = _ref2.task, intent = _ref2.intent, renderInteractiveForms = _ref2.renderInteractiveForms; var contentStreamPromise = this.pdfManager.ensure(this, 'getContentStream'); var resourcesPromise = this.loadResources(['ExtGState', 'ColorSpace', 'Pattern', 'Shading', 'XObject', 'Font']); var partialEvaluator = new _evaluator.PartialEvaluator({ pdfManager: this.pdfManager, xref: this.xref, handler: handler, pageIndex: this.pageIndex, idFactory: this.idFactory, fontCache: this.fontCache, builtInCMapCache: this.builtInCMapCache, options: this.evaluatorOptions, pdfFunctionFactory: this.pdfFunctionFactory }); var dataPromises = Promise.all([contentStreamPromise, resourcesPromise]); var pageListPromise = dataPromises.then(function (_ref3) { var _ref4 = _slicedToArray(_ref3, 1), contentStream = _ref4[0]; var opList = new _operator_list.OperatorList(intent, handler, _this2.pageIndex); handler.send('StartRenderPage', { transparency: partialEvaluator.hasBlendModes(_this2.resources), pageIndex: _this2.pageIndex, intent: intent }); return partialEvaluator.getOperatorList({ stream: contentStream, task: task, resources: _this2.resources, operatorList: opList }).then(function () { return opList; }); }); return Promise.all([pageListPromise, this._parsedAnnotations]).then(function (_ref5) { var _ref6 = _slicedToArray(_ref5, 2), pageOpList = _ref6[0], annotations = _ref6[1]; if (annotations.length === 0) { pageOpList.flush(true); return pageOpList; } var opListPromises = []; var _iteratorNormalCompletion2 = true; var _didIteratorError2 = false; var _iteratorError2 = undefined; try { for (var _iterator2 = annotations[Symbol.iterator](), _step2; !(_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done); _iteratorNormalCompletion2 = true) { var annotation = _step2.value; if (isAnnotationRenderable(annotation, intent)) { opListPromises.push(annotation.getOperatorList(partialEvaluator, task, renderInteractiveForms)); } } } catch (err) { _didIteratorError2 = true; _iteratorError2 = err; } finally { try { if (!_iteratorNormalCompletion2 && _iterator2.return != null) { _iterator2.return(); } } finally { if (_didIteratorError2) { throw _iteratorError2; } } } return Promise.all(opListPromises).then(function (opLists) { pageOpList.addOp(_util.OPS.beginAnnotations, []); var _iteratorNormalCompletion3 = true; var _didIteratorError3 = false; var _iteratorError3 = undefined; try { for (var _iterator3 = opLists[Symbol.iterator](), _step3; !(_iteratorNormalCompletion3 = (_step3 = _iterator3.next()).done); _iteratorNormalCompletion3 = true) { var opList = _step3.value; pageOpList.addOpList(opList); } } catch (err) { _didIteratorError3 = true; _iteratorError3 = err; } finally { try { if (!_iteratorNormalCompletion3 && _iterator3.return != null) { _iterator3.return(); } } finally { if (_didIteratorError3) { throw _iteratorError3; } } } pageOpList.addOp(_util.OPS.endAnnotations, []); pageOpList.flush(true); return pageOpList; }); }); } }, { key: "extractTextContent", value: function extractTextContent(_ref7) { var _this3 = this; var handler = _ref7.handler, task = _ref7.task, normalizeWhitespace = _ref7.normalizeWhitespace, sink = _ref7.sink, combineTextItems = _ref7.combineTextItems; var contentStreamPromise = this.pdfManager.ensure(this, 'getContentStream'); var resourcesPromise = this.loadResources(['ExtGState', 'XObject', 'Font']); var dataPromises = Promise.all([contentStreamPromise, resourcesPromise]); return dataPromises.then(function (_ref8) { var _ref9 = _slicedToArray(_ref8, 1), contentStream = _ref9[0]; var partialEvaluator = new _evaluator.PartialEvaluator({ pdfManager: _this3.pdfManager, xref: _this3.xref, handler: handler, pageIndex: _this3.pageIndex, idFactory: _this3.idFactory, fontCache: _this3.fontCache, builtInCMapCache: _this3.builtInCMapCache, options: _this3.evaluatorOptions, pdfFunctionFactory: _this3.pdfFunctionFactory }); return partialEvaluator.getTextContent({ stream: contentStream, task: task, resources: _this3.resources, normalizeWhitespace: normalizeWhitespace, combineTextItems: combineTextItems, sink: sink }); }); } }, { key: "getAnnotationsData", value: function getAnnotationsData(intent) { return this._parsedAnnotations.then(function (annotations) { var annotationsData = []; for (var i = 0, ii = annotations.length; i < ii; i++) { if (!intent || isAnnotationRenderable(annotations[i], intent)) { annotationsData.push(annotations[i].data); } } return annotationsData; }); } }, { key: "content", get: function get() { return this.pageDict.get('Contents'); } }, { key: "resources", get: function get() { return (0, _util.shadow)(this, 'resources', this._getInheritableProperty('Resources') || _primitives.Dict.empty); } }, { key: "mediaBox", get: function get() { var mediaBox = this._getInheritableProperty('MediaBox', true); if (!Array.isArray(mediaBox) || mediaBox.length !== 4) { return (0, _util.shadow)(this, 'mediaBox', LETTER_SIZE_MEDIABOX); } return (0, _util.shadow)(this, 'mediaBox', mediaBox); } }, { key: "cropBox", get: function get() { var cropBox = this._getInheritableProperty('CropBox', true); if (!Array.isArray(cropBox) || cropBox.length !== 4) { return (0, _util.shadow)(this, 'cropBox', this.mediaBox); } return (0, _util.shadow)(this, 'cropBox', cropBox); } }, { key: "userUnit", get: function get() { var obj = this.pageDict.get('UserUnit'); if (!(0, _util.isNum)(obj) || obj <= 0) { obj = DEFAULT_USER_UNIT; } return (0, _util.shadow)(this, 'userUnit', obj); } }, { key: "view", get: function get() { var mediaBox = this.mediaBox, cropBox = this.cropBox; if (mediaBox === cropBox) { return (0, _util.shadow)(this, 'view', mediaBox); } var intersection = _util.Util.intersect(cropBox, mediaBox); return (0, _util.shadow)(this, 'view', intersection || mediaBox); } }, { key: "rotate", get: function get() { var rotate = this._getInheritableProperty('Rotate') || 0; if (rotate % 90 !== 0) { rotate = 0; } else if (rotate >= 360) { rotate = rotate % 360; } else if (rotate < 0) { rotate = (rotate % 360 + 360) % 360; } return (0, _util.shadow)(this, 'rotate', rotate); } }, { key: "annotations", get: function get() { return (0, _util.shadow)(this, 'annotations', this._getInheritableProperty('Annots') || []); } }, { key: "_parsedAnnotations", get: function get() { var _this4 = this; var parsedAnnotations = this.pdfManager.ensure(this, 'annotations').then(function () { var annotationRefs = _this4.annotations; var annotationPromises = []; for (var i = 0, ii = annotationRefs.length; i < ii; i++) { annotationPromises.push(_annotation.AnnotationFactory.create(_this4.xref, annotationRefs[i], _this4.pdfManager, _this4.idFactory)); } return Promise.all(annotationPromises).then(function (annotations) { return annotations.filter(function isDefined(annotation) { return !!annotation; }); }, function (reason) { (0, _util.warn)("_parsedAnnotations: \"".concat(reason, "\".")); return []; }); }); return (0, _util.shadow)(this, '_parsedAnnotations', parsedAnnotations); } }]); return Page; }(); exports.Page = Page; var FINGERPRINT_FIRST_BYTES = 1024; var EMPTY_FINGERPRINT = '\x00\x00\x00\x00\x00\x00\x00' + '\x00\x00\x00\x00\x00\x00\x00\x00\x00'; function find(stream, needle, limit, backwards) { var pos = stream.pos; var end = stream.end; if (pos + limit > end) { limit = end - pos; } var strBuf = []; for (var i = 0; i < limit; ++i) { strBuf.push(String.fromCharCode(stream.getByte())); } var str = strBuf.join(''); stream.pos = pos; var index = backwards ? str.lastIndexOf(needle) : str.indexOf(needle); if (index === -1) { return false; } stream.pos += index; return true; } var PDFDocument = /*#__PURE__*/ function () { function PDFDocument(pdfManager, arg) { _classCallCheck(this, PDFDocument); var stream; if ((0, _primitives.isStream)(arg)) { stream = arg; } else if ((0, _util.isArrayBuffer)(arg)) { stream = new _stream2.Stream(arg); } else { throw new Error('PDFDocument: Unknown argument type'); } if (stream.length <= 0) { throw new Error('PDFDocument: Stream must have data'); } this.pdfManager = pdfManager; this.stream = stream; this.xref = new _obj.XRef(stream, pdfManager); this.pdfFunctionFactory = new _function.PDFFunctionFactory({ xref: this.xref, isEvalSupported: pdfManager.evaluatorOptions.isEvalSupported }); this._pagePromises = []; } _createClass(PDFDocument, [{ key: "parse", value: function parse(recoveryMode) { this.setup(recoveryMode); var version = this.catalog.catDict.get('Version'); if ((0, _primitives.isName)(version)) { this.pdfFormatVersion = version.name; } try { this.acroForm = this.catalog.catDict.get('AcroForm'); if (this.acroForm) { this.xfa = this.acroForm.get('XFA'); var fields = this.acroForm.get('Fields'); if ((!fields || !Array.isArray(fields) || fields.length === 0) && !this.xfa) { this.acroForm = null; } } } catch (ex) { if (ex instanceof _util.MissingDataException) { throw ex; } (0, _util.info)('Cannot fetch AcroForm entry; assuming no AcroForms are present'); this.acroForm = null; } } }, { key: "checkHeader", value: function checkHeader() { var stream = this.stream; stream.reset(); if (!find(stream, '%PDF-', 1024)) { return; } stream.moveStart(); var MAX_PDF_VERSION_LENGTH = 12; var version = '', ch; while ((ch = stream.getByte()) > 0x20) { if (version.length >= MAX_PDF_VERSION_LENGTH) { break; } version += String.fromCharCode(ch); } if (!this.pdfFormatVersion) { this.pdfFormatVersion = version.substring(5); } } }, { key: "parseStartXRef", value: function parseStartXRef() { this.xref.setStartXRef(this.startXRef); } }, { key: "setup", value: function setup(recoveryMode) { this.xref.parse(recoveryMode); this.catalog = new _obj.Catalog(this.pdfManager, this.xref); } }, { key: "_getLinearizationPage", value: function _getLinearizationPage(pageIndex) { var catalog = this.catalog, linearization = this.linearization; (0, _util.assert)(linearization && linearization.pageFirst === pageIndex); var ref = new _primitives.Ref(linearization.objectNumberFirst, 0); return this.xref.fetchAsync(ref).then(function (obj) { if ((0, _primitives.isDict)(obj, 'Page') || (0, _primitives.isDict)(obj) && !obj.has('Type') && obj.has('Contents')) { if (ref && !catalog.pageKidsCountCache.has(ref)) { catalog.pageKidsCountCache.put(ref, 1); } return [obj, ref]; } throw new _util.FormatError('The Linearization dictionary doesn\'t point ' + 'to a valid Page dictionary.'); }).catch(function (reason) { (0, _util.info)(reason); return catalog.getPageDict(pageIndex); }); } }, { key: "getPage", value: function getPage(pageIndex) { var _this5 = this; if (this._pagePromises[pageIndex] !== undefined) { return this._pagePromises[pageIndex]; } var catalog = this.catalog, linearization = this.linearization; var promise = linearization && linearization.pageFirst === pageIndex ? this._getLinearizationPage(pageIndex) : catalog.getPageDict(pageIndex); return this._pagePromises[pageIndex] = promise.then(function (_ref10) { var _ref11 = _slicedToArray(_ref10, 2), pageDict = _ref11[0], ref = _ref11[1]; return new Page({ pdfManager: _this5.pdfManager, xref: _this5.xref, pageIndex: pageIndex, pageDict: pageDict, ref: ref, fontCache: catalog.fontCache, builtInCMapCache: catalog.builtInCMapCache, pdfFunctionFactory: _this5.pdfFunctionFactory }); }); } }, { key: "checkFirstPage", value: function checkFirstPage() { var _this6 = this; return this.getPage(0).catch(function (reason) { if (reason instanceof _util.XRefEntryException) { _this6._pagePromises.length = 0; _this6.cleanup(); throw new _util.XRefParseException(); } }); } }, { key: "fontFallback", value: function fontFallback(id, handler) { return this.catalog.fontFallback(id, handler); } }, { key: "cleanup", value: function cleanup() { return this.catalog.cleanup(); } }, { key: "linearization", get: function get() { var linearization = null; try { linearization = _parser.Linearization.create(this.stream); } catch (err) { if (err instanceof _util.MissingDataException) { throw err; } (0, _util.info)(err); } return (0, _util.shadow)(this, 'linearization', linearization); } }, { key: "startXRef", get: function get() { var stream = this.stream; var startXRef = 0; if (this.linearization) { stream.reset(); if (find(stream, 'endobj', 1024)) { startXRef = stream.pos + 6; } } else { var step = 1024; var startXRefLength = 'startxref'.length; var found = false, pos = stream.end; while (!found && pos > 0) { pos -= step - startXRefLength; if (pos < 0) { pos = 0; } stream.pos = pos; found = find(stream, 'startxref', step, true); } if (found) { stream.skip(9); var ch; do { ch = stream.getByte(); } while ((0, _util.isSpace)(ch)); var str = ''; while (ch >= 0x20 && ch <= 0x39) { str += String.fromCharCode(ch); ch = stream.getByte(); } startXRef = parseInt(str, 10); if (isNaN(startXRef)) { startXRef = 0; } } } return (0, _util.shadow)(this, 'startXRef', startXRef); } }, { key: "numPages", get: function get() { var linearization = this.linearization; var num = linearization ? linearization.numPages : this.catalog.numPages; return (0, _util.shadow)(this, 'numPages', num); } }, { key: "documentInfo", get: function get() { var DocumentInfoValidators = { Title: _util.isString, Author: _util.isString, Subject: _util.isString, Keywords: _util.isString, Creator: _util.isString, Producer: _util.isString, CreationDate: _util.isString, ModDate: _util.isString, Trapped: _primitives.isName }; var docInfo = { PDFFormatVersion: this.pdfFormatVersion, IsLinearized: !!this.linearization, IsAcroFormPresent: !!this.acroForm, IsXFAPresent: !!this.xfa }; var infoDict; try { infoDict = this.xref.trailer.get('Info'); } catch (err) { if (err instanceof _util.MissingDataException) { throw err; } (0, _util.info)('The document information dictionary is invalid.'); } if ((0, _primitives.isDict)(infoDict)) { var _iteratorNormalCompletion4 = true; var _didIteratorError4 = false; var _iteratorError4 = undefined; try { for (var _iterator4 = infoDict.getKeys()[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) { var key = _step4.value; var value = infoDict.get(key); if (DocumentInfoValidators[key]) { if (DocumentInfoValidators[key](value)) { docInfo[key] = typeof value !== 'string' ? value : (0, _util.stringToPDFString)(value); } else { (0, _util.info)("Bad value in document info for \"".concat(key, "\".")); } } else if (typeof key === 'string') { var customValue = void 0; if ((0, _util.isString)(value)) { customValue = (0, _util.stringToPDFString)(value); } else if ((0, _primitives.isName)(value) || (0, _util.isNum)(value) || (0, _util.isBool)(value)) { customValue = value; } else { (0, _util.info)("Unsupported value in document info for (custom) \"".concat(key, "\".")); continue; } if (!docInfo['Custom']) { docInfo['Custom'] = Object.create(null); } docInfo['Custom'][key] = customValue; } } } catch (err) { _didIteratorError4 = true; _iteratorError4 = err; } finally { try { if (!_iteratorNormalCompletion4 && _iterator4.return != null) { _iterator4.return(); } } finally { if (_didIteratorError4) { throw _iteratorError4; } } } } return (0, _util.shadow)(this, 'documentInfo', docInfo); } }, { key: "fingerprint", get: function get() { var hash; var idArray = this.xref.trailer.get('ID'); if (Array.isArray(idArray) && idArray[0] && (0, _util.isString)(idArray[0]) && idArray[0] !== EMPTY_FINGERPRINT) { hash = (0, _util.stringToBytes)(idArray[0]); } else { if (this.stream.ensureRange) { this.stream.ensureRange(0, Math.min(FINGERPRINT_FIRST_BYTES, this.stream.end)); } hash = (0, _crypto.calculateMD5)(this.stream.bytes.subarray(0, FINGERPRINT_FIRST_BYTES), 0, FINGERPRINT_FIRST_BYTES); } var fingerprint = ''; for (var i = 0, ii = hash.length; i < ii; i++) { var hex = hash[i].toString(16); fingerprint += hex.length === 1 ? '0' + hex : hex; } return (0, _util.shadow)(this, 'fingerprint', fingerprint); } }]); return PDFDocument; }(); exports.PDFDocument = PDFDocument;