diff --git a/src/core/obj.js b/src/core/obj.js index 98d6d3d6d..923cf18b5 100644 --- a/src/core/obj.js +++ b/src/core/obj.js @@ -48,6 +48,7 @@ var stringToPDFString = sharedUtil.stringToPDFString; var stringToUTF8String = sharedUtil.stringToUTF8String; var warn = sharedUtil.warn; var isValidUrl = sharedUtil.isValidUrl; +var Util = sharedUtil.Util; var Ref = corePrimitives.Ref; var RefSet = corePrimitives.RefSet; var RefSetCache = corePrimitives.RefSetCache; @@ -262,6 +263,96 @@ var Catalog = (function CatalogClosure() { } return dest; }, + + get pageLabels() { + var obj = null; + try { + obj = this.readPageLabels(); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn('Unable to read page labels.'); + } + return shadow(this, 'pageLabels', obj); + }, + readPageLabels: function Catalog_readPageLabels() { + var obj = this.catDict.getRaw('PageLabels'); + if (!obj) { + return null; + } + var pageLabels = new Array(this.numPages); + var style = null; + var prefix = ''; + var start = 1; + + var numberTree = new NumberTree(obj, this.xref); + var nums = numberTree.getAll(); + var currentLabel = '', currentIndex = 1; + + for (var i = 0, ii = this.numPages; i < ii; i++) { + if (nums.hasOwnProperty(i)) { + var labelDict = nums[i]; + assert(isDict(labelDict), 'The PageLabel is not a dictionary.'); + + var type = labelDict.get('Type'); + assert(!type || (isName(type) && type.name === 'PageLabel'), + 'Invalid type in PageLabel dictionary.'); + + var s = labelDict.get('S'); + assert(!s || isName(s), 'Invalid style in PageLabel dictionary.'); + style = (s ? s.name : null); + + prefix = labelDict.get('P') || ''; + assert(isString(prefix), 'Invalid prefix in PageLabel dictionary.'); + + start = labelDict.get('St') || 1; + assert(isInt(start), 'Invalid start in PageLabel dictionary.'); + currentIndex = start; + } + + switch (style) { + case 'D': + currentLabel = currentIndex; + break; + case 'R': + case 'r': + currentLabel = Util.toRoman(currentIndex, style === 'r'); + break; + case 'A': + case 'a': + var LIMIT = 26; // Use only the characters A--Z, or a--z. + var A_UPPER_CASE = 0x41, A_LOWER_CASE = 0x61; + + var baseCharCode = (style === 'a' ? A_LOWER_CASE : A_UPPER_CASE); + var letterIndex = currentIndex - 1; + var character = String.fromCharCode(baseCharCode + + (letterIndex % LIMIT)); + var charBuf = []; + for (var j = 0, jj = (letterIndex / LIMIT) | 0; j <= jj; j++) { + charBuf.push(character); + } + currentLabel = charBuf.join(''); + break; + default: + assert(!style, + 'Invalid style "' + style + '" in PageLabel dictionary.'); + } + pageLabels[i] = prefix + currentLabel; + + currentLabel = ''; + currentIndex++; + } + + // Ignore PageLabels if they correspond to standard page numbering. + for (i = 0, ii = this.numPages; i < ii; i++) { + if (pageLabels[i] !== (i + 1).toString()) { + break; + } + } + return (i === ii ? [] : pageLabels); + }, + get attachments() { var xref = this.xref; var attachments = null, nameTreeRef; @@ -1137,24 +1228,23 @@ var XRef = (function XRefClosure() { })(); /** - * A NameTree is like a Dict but has some advantageous properties, see the - * spec (7.9.6) for more details. - * TODO: implement all the Dict functions and make this more efficent. + * A NameTree/NumberTree is like a Dict but has some advantageous properties, + * see the specification (7.9.6 and 7.9.7) for additional details. + * TODO: implement all the Dict functions and make this more efficient. */ -var NameTree = (function NameTreeClosure() { - function NameTree(root, xref) { - this.root = root; - this.xref = xref; +var NameOrNumberTree = (function NameOrNumberTreeClosure() { + function NameOrNumberTree(root, xref) { + throw new Error('Cannot initialize NameOrNumberTree.'); } - NameTree.prototype = { - getAll: function NameTree_getAll() { + NameOrNumberTree.prototype = { + getAll: function NameOrNumberTree_getAll() { var dict = {}; if (!this.root) { return dict; } var xref = this.xref; - // reading name tree + // Reading Name/Number tree. var processed = new RefSet(); processed.put(this.root); var queue = [this.root]; @@ -1168,45 +1258,43 @@ var NameTree = (function NameTreeClosure() { var kids = obj.get('Kids'); for (i = 0, n = kids.length; i < n; i++) { var kid = kids[i]; - if (processed.has(kid)) { - error('invalid destinations'); - } + assert(!processed.has(kid), + 'Duplicate entry in "' + this._type + '" tree.'); queue.push(kid); processed.put(kid); } continue; } - var names = obj.get('Names'); - if (names) { - for (i = 0, n = names.length; i < n; i += 2) { - dict[xref.fetchIfRef(names[i])] = xref.fetchIfRef(names[i + 1]); + var entries = obj.get(this._type); + if (isArray(entries)) { + for (i = 0, n = entries.length; i < n; i += 2) { + dict[xref.fetchIfRef(entries[i])] = xref.fetchIfRef(entries[i + 1]); } } } return dict; }, - get: function NameTree_get(destinationId) { + get: function NameOrNumberTree_get(key) { if (!this.root) { return null; } var xref = this.xref; - var kidsOrNames = xref.fetchIfRef(this.root); + var kidsOrEntries = xref.fetchIfRef(this.root); var loopCount = 0; - var MAX_NAMES_LEVELS = 10; + var MAX_LEVELS = 10; var l, r, m; // Perform a binary search to quickly find the entry that - // contains the named destination we are looking for. - while (kidsOrNames.has('Kids')) { - loopCount++; - if (loopCount > MAX_NAMES_LEVELS) { - warn('Search depth limit for named destionations has been reached.'); + // contains the key we are looking for. + while (kidsOrEntries.has('Kids')) { + if (++loopCount > MAX_LEVELS) { + warn('Search depth limit reached for "' + this._type + '" tree.'); return null; } - var kids = kidsOrNames.get('Kids'); + var kids = kidsOrEntries.get('Kids'); if (!isArray(kids)) { return null; } @@ -1218,12 +1306,12 @@ var NameTree = (function NameTreeClosure() { var kid = xref.fetchIfRef(kids[m]); var limits = kid.get('Limits'); - if (destinationId < xref.fetchIfRef(limits[0])) { + if (key < xref.fetchIfRef(limits[0])) { r = m - 1; - } else if (destinationId > xref.fetchIfRef(limits[1])) { + } else if (key > xref.fetchIfRef(limits[1])) { l = m + 1; } else { - kidsOrNames = xref.fetchIfRef(kids[m]); + kidsOrEntries = xref.fetchIfRef(kids[m]); break; } } @@ -1232,33 +1320,57 @@ var NameTree = (function NameTreeClosure() { } } - // If we get here, then we have found the right entry. Now - // go through the named destinations in the Named dictionary - // until we find the exact destination we're looking for. - var names = kidsOrNames.get('Names'); - if (isArray(names)) { + // If we get here, then we have found the right entry. Now go through the + // entries in the dictionary until we find the key we're looking for. + var entries = kidsOrEntries.get(this._type); + if (isArray(entries)) { // Perform a binary search to reduce the lookup time. l = 0; - r = names.length - 2; + r = entries.length - 2; while (l <= r) { // Check only even indices (0, 2, 4, ...) because the - // odd indices contain the actual D array. + // odd indices contain the actual data. m = (l + r) & ~1; - if (destinationId < xref.fetchIfRef(names[m])) { + var currentKey = xref.fetchIfRef(entries[m]); + if (key < currentKey) { r = m - 2; - } else if (destinationId > xref.fetchIfRef(names[m])) { + } else if (key > currentKey) { l = m + 2; } else { - return xref.fetchIfRef(names[m + 1]); + return xref.fetchIfRef(entries[m + 1]); } } } return null; } }; + return NameOrNumberTree; +})(); + +var NameTree = (function NameTreeClosure() { + function NameTree(root, xref) { + this.root = root; + this.xref = xref; + this._type = 'Names'; + } + + Util.inherit(NameTree, NameOrNumberTree, {}); + return NameTree; })(); +var NumberTree = (function NumberTreeClosure() { + function NumberTree(root, xref) { + this.root = root; + this.xref = xref; + this._type = 'Nums'; + } + + Util.inherit(NumberTree, NameOrNumberTree, {}); + + return NumberTree; +})(); + /** * "A PDF file can refer to the contents of another file by using a File * Specification (PDF 1.1)", see the spec (7.11) for more details. diff --git a/src/core/worker.js b/src/core/worker.js index 37f2fc756..f71e42b93 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -451,6 +451,12 @@ var WorkerMessageHandler = PDFJS.WorkerMessageHandler = { } ); + handler.on('GetPageLabels', + function wphSetupGetPageLabels(data) { + return pdfManager.ensureCatalog('pageLabels'); + } + ); + handler.on('GetAttachments', function wphSetupGetAttachments(data) { return pdfManager.ensureCatalog('attachments'); diff --git a/src/display/api.js b/src/display/api.js index 9a1ad451a..d94a4bd3a 100644 --- a/src/display/api.js +++ b/src/display/api.js @@ -690,6 +690,16 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() { getDestination: function PDFDocumentProxy_getDestination(id) { return this.transport.getDestination(id); }, + /** + * @return {Promise} A promise that is resolved with: an Array containing + * the pageLabels that correspond to the pageIndexes; or null, when no + * pageLabels are present in the PDF file. + * NOTE: If the pageLabels are all identical to standard page numbering, + * i.e. [1, 2, 3, ...], the promise is resolved with an empty Array. + */ + getPageLabels: function PDFDocumentProxy_getPageLabels() { + return this.transport.getPageLabels(); + }, /** * @return {Promise} A promise that is resolved with a lookup table for * mapping named attachments to their content. @@ -1804,6 +1814,10 @@ var WorkerTransport = (function WorkerTransportClosure() { return this.messageHandler.sendWithPromise('GetDestination', { id: id }); }, + getPageLabels: function WorkerTransport_getPageLabels() { + return this.messageHandler.sendWithPromise('GetPageLabels', null); + }, + getAttachments: function WorkerTransport_getAttachments() { return this.messageHandler.sendWithPromise('GetAttachments', null); }, diff --git a/src/shared/util.js b/src/shared/util.js index 23b359227..9a9926667 100644 --- a/src/shared/util.js +++ b/src/shared/util.js @@ -808,6 +808,42 @@ var Util = PDFJS.Util = (function UtilClosure() { return num < 0 ? -1 : 1; }; + var ROMAN_NUMBER_MAP = [ + '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', + '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', + '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX' + ]; + /** + * Converts positive integers to (upper case) Roman numerals. + * @param {integer} number - The number that should be converted. + * @param {boolean} lowerCase - Indicates if the result should be converted + * to lower case letters. The default is false. + * @return {string} The resulting Roman number. + */ + Util.toRoman = function Util_toRoman(number, lowerCase) { + assert(isInt(number) && number > 0, + 'The number should be a positive integer.'); + var pos, romanBuf = []; + // Thousands + while (number >= 1000) { + number -= 1000; + romanBuf.push('M'); + } + // Hundreds + pos = (number / 100) | 0; + number %= 100; + romanBuf.push(ROMAN_NUMBER_MAP[pos]); + // Tens + pos = (number / 10) | 0; + number %= 10; + romanBuf.push(ROMAN_NUMBER_MAP[10 + pos]); + // Ones + romanBuf.push(ROMAN_NUMBER_MAP[20 + number]); + + var romanStr = romanBuf.join(''); + return (lowerCase ? romanStr.toLowerCase() : romanStr); + }; + Util.appendToArray = function Util_appendToArray(arr1, arr2) { Array.prototype.push.apply(arr1, arr2); }; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 6ce31a348..5be1fc3f4 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -24,6 +24,7 @@ !arial_unicode_ab_cidfont.pdf !arial_unicode_en_cidfont.pdf !asciihexdecode.pdf +!bug793632.pdf !bug1020858.pdf !bug1050040.pdf !bug1200096.pdf diff --git a/test/pdfs/bug793632.pdf b/test/pdfs/bug793632.pdf new file mode 100644 index 000000000..e17e69b6b Binary files /dev/null and b/test/pdfs/bug793632.pdf differ diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index a1213bec0..466d035e1 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -329,6 +329,36 @@ describe('api', function() { expect(data).toEqual(null); }); }); + it('gets non-existent page labels', function () { + var promise = doc.getPageLabels(); + waitsForPromiseResolved(promise, function (data) { + expect(data).toEqual(null); + }); + }); + it('gets page labels', function () { + // PageLabels with Roman/Arabic numerals. + var url0 = combineUrl(window.location.href, '../pdfs/bug793632.pdf'); + var promise0 = PDFJS.getDocument(url0).promise.then(function (pdfDoc) { + return pdfDoc.getPageLabels(); + }); + // PageLabels with only a label prefix. + var url1 = combineUrl(window.location.href, '../pdfs/issue1453.pdf'); + var promise1 = PDFJS.getDocument(url1).promise.then(function (pdfDoc) { + return pdfDoc.getPageLabels(); + }); + // PageLabels identical to standard page numbering. + var url2 = combineUrl(window.location.href, '../pdfs/rotation.pdf'); + var promise2 = PDFJS.getDocument(url2).promise.then(function (pdfDoc) { + return pdfDoc.getPageLabels(); + }); + + waitsForPromiseResolved(Promise.all([promise0, promise1, promise2]), + function (pageLabels) { + expect(pageLabels[0]).toEqual(['i', 'ii', 'iii', '1']); + expect(pageLabels[1]).toEqual(['Front Page1']); + expect(pageLabels[2]).toEqual([]); + }); + }); it('gets attachments', function() { var promise = doc.getAttachments(); waitsForPromiseResolved(promise, function (data) {