From eeefe5d851661c0e5801546269964731fbb55273 Mon Sep 17 00:00:00 2001 From: Pdf Bot Date: Thu, 10 Mar 2016 19:48:29 +0000 Subject: [PATCH] PDF.js version 1.4.133 - See mozilla/pdf.js@4784863ef7ef3198631cb3299fff16a0843f7a88 --- bower.json | 2 +- build/pdf.combined.js | 94 +++++++++++++++++++++++++++++++++++++------ build/pdf.js | 4 +- build/pdf.worker.js | 94 +++++++++++++++++++++++++++++++++++++------ package.json | 2 +- 5 files changed, 168 insertions(+), 28 deletions(-) diff --git a/bower.json b/bower.json index 170eb9e6d..f59050458 100644 --- a/bower.json +++ b/bower.json @@ -1,6 +1,6 @@ { "name": "pdfjs-dist", - "version": "1.4.131", + "version": "1.4.133", "main": [ "build/pdf.js", "build/pdf.worker.js" diff --git a/build/pdf.combined.js b/build/pdf.combined.js index 16d02d1aa..d4969abe6 100644 --- a/build/pdf.combined.js +++ b/build/pdf.combined.js @@ -28,8 +28,8 @@ factory((root.pdfjsDistBuildPdfCombined = {})); // Use strict in our context only - users might not want it 'use strict'; -var pdfjsVersion = '1.4.131'; -var pdfjsBuild = '1475984'; +var pdfjsVersion = '1.4.133'; +var pdfjsBuild = '4784863'; var pdfjsFilePath = typeof document !== 'undefined' && document.currentScript ? @@ -16673,6 +16673,36 @@ exports.isStream = isStream; return code; } + function getUnicodeForGlyph(name, glyphsUnicodeMap) { + var unicode = glyphsUnicodeMap[name]; + if (unicode !== undefined) { + return unicode; + } + if (!name) { + return -1; + } + // Try to recover valid Unicode values from 'uniXXXX'/'uXXXX{XX}' glyphs. + if (name[0] === 'u') { + var nameLen = name.length, hexStr; + + if (nameLen === 7 && name[1] === 'n' && name[2] === 'i') { // 'uniXXXX' + hexStr = name.substr(3); + } else if (nameLen >= 5 && nameLen <= 7) { // 'uXXXX{XX}' + hexStr = name.substr(1); + } else { + return -1; + } + // Check for upper-case hexadecimal characters, to avoid false positives. + if (hexStr === hexStr.toUpperCase()) { + unicode = parseInt(hexStr, 16); + if (unicode >= 0) { + return unicode; + } + } + } + return -1; + } + var UnicodeRanges = [ { 'begin': 0x0000, 'end': 0x007F }, // Basic Latin { 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement @@ -18220,6 +18250,7 @@ exports.isStream = isStream; exports.reverseIfRtl = reverseIfRtl; exports.getUnicodeRangeFor = getUnicodeRangeFor; exports.getNormalizedUnicodes = getNormalizedUnicodes; + exports.getUnicodeForGlyph = getUnicodeForGlyph; })); @@ -33822,6 +33853,7 @@ var getSupplementalGlyphMapForArialBlack = coreStandardFonts.getSupplementalGlyphMapForArialBlack; var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor; var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues; +var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph; // Unicode Private Use Area var PRIVATE_USE_OFFSET_START = 0xE000; @@ -34207,7 +34239,7 @@ var ProblematicCharRanges = new Int32Array([ */ var Font = (function FontClosure() { function Font(name, file, properties) { - var charCode, glyphName, fontChar; + var charCode, glyphName, unicode, fontChar; this.name = name; this.loadedName = properties.loadedName; @@ -34351,21 +34383,25 @@ var Font = (function FontClosure() { this.toFontChar[charCode] = fontChar; } } else if (isStandardFont) { - this.toFontChar = []; glyphsUnicodeMap = getGlyphsUnicode(); for (charCode in properties.defaultEncoding) { glyphName = (properties.differences[charCode] || properties.defaultEncoding[charCode]); - this.toFontChar[charCode] = glyphsUnicodeMap[glyphName]; + unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); + if (unicode !== -1) { + this.toFontChar[charCode] = unicode; + } } } else { - var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1); glyphsUnicodeMap = getGlyphsUnicode(); this.toUnicode.forEach(function(charCode, unicodeCharCode) { - if (notCidFont) { + if (!this.composite) { glyphName = (properties.differences[charCode] || properties.defaultEncoding[charCode]); - unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode); + unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); + if (unicode !== -1) { + unicodeCharCode = unicode; + } } this.toFontChar[charCode] = unicodeCharCode; }.bind(this)); @@ -34464,7 +34500,7 @@ var Font = (function FontClosure() { function int16(b0, b1) { return (b0 << 8) + b1; } - + function signedInt16(b0, b1) { var value = (b0 << 8) + b1; return value & (1 << 15) ? value - 0x10000 : value; @@ -36025,6 +36061,26 @@ var Font = (function FontClosure() { return false; } + // Some bad PDF generators, e.g. Scribus PDF, include glyph names + // in a 'uniXXXX' format -- attempting to recover proper ones. + function recoverGlyphName(name, glyphsUnicodeMap) { + if (glyphsUnicodeMap[name] !== undefined) { + return name; + } + // The glyph name is non-standard, trying to recover. + var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap); + if (unicode !== -1) { + for (var key in glyphsUnicodeMap) { + if (glyphsUnicodeMap[key] === unicode) { + return key; + } + } + } + warn('Unable to recover a standard glyph name for: ' + name); + return name; + } + + if (properties.type === 'CIDFontType2') { var cidToGidMap = properties.cidToGidMap || []; var isCidToGidMapEmpty = cidToGidMap.length === 0; @@ -36079,7 +36135,7 @@ var Font = (function FontClosure() { } var glyphsUnicodeMap = getGlyphsUnicode(); for (charCode = 0; charCode < 256; charCode++) { - var glyphName; + var glyphName, standardGlyphName; if (this.differences && charCode in this.differences) { glyphName = this.differences[charCode]; } else if (charCode in baseEncoding && @@ -36091,13 +36147,16 @@ var Font = (function FontClosure() { if (!glyphName) { continue; } + // Ensure that non-standard glyph names are resolved to valid ones. + standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap); + var unicodeOrCharCode, isUnicode = false; if (cmapPlatformId === 3 && cmapEncodingId === 1) { - unicodeOrCharCode = glyphsUnicodeMap[glyphName]; + unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName]; isUnicode = true; } else if (cmapPlatformId === 1 && cmapEncodingId === 0) { // TODO: the encoding needs to be updated with mac os table. - unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName); + unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName); } var found = false; @@ -36115,6 +36174,11 @@ var Font = (function FontClosure() { if (!found && properties.glyphNames) { // Try to map using the post table. var glyphId = properties.glyphNames.indexOf(glyphName); + // The post table ought to use the same kind of glyph names as the + // `differences` array, but check the standard ones as a fallback. + if (glyphId === -1 && standardGlyphName !== glyphName) { + glyphId = properties.glyphNames.indexOf(standardGlyphName); + } if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) { charCodeToGlyphId[charCode] = glyphId; found = true; @@ -36428,6 +36492,12 @@ var Font = (function FontClosure() { code = +glyphName.substr(1); } break; + default: + // 'uniXXXX'/'uXXXX{XX}' glyphs + var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); + if (unicode !== -1) { + code = unicode; + } } if (code) { // If |baseEncodingName| is one the predefined encodings, diff --git a/build/pdf.js b/build/pdf.js index f0278fa2e..126ddcb00 100644 --- a/build/pdf.js +++ b/build/pdf.js @@ -28,8 +28,8 @@ factory((root.pdfjsDistBuildPdf = {})); // Use strict in our context only - users might not want it 'use strict'; -var pdfjsVersion = '1.4.131'; -var pdfjsBuild = '1475984'; +var pdfjsVersion = '1.4.133'; +var pdfjsBuild = '4784863'; var pdfjsFilePath = typeof document !== 'undefined' && document.currentScript ? diff --git a/build/pdf.worker.js b/build/pdf.worker.js index 5a1f01982..bc265bc20 100644 --- a/build/pdf.worker.js +++ b/build/pdf.worker.js @@ -28,8 +28,8 @@ factory((root.pdfjsDistBuildPdfWorker = {})); // Use strict in our context only - users might not want it 'use strict'; -var pdfjsVersion = '1.4.131'; -var pdfjsBuild = '1475984'; +var pdfjsVersion = '1.4.133'; +var pdfjsBuild = '4784863'; var pdfjsFilePath = typeof document !== 'undefined' && document.currentScript ? @@ -16605,6 +16605,36 @@ exports.isStream = isStream; return code; } + function getUnicodeForGlyph(name, glyphsUnicodeMap) { + var unicode = glyphsUnicodeMap[name]; + if (unicode !== undefined) { + return unicode; + } + if (!name) { + return -1; + } + // Try to recover valid Unicode values from 'uniXXXX'/'uXXXX{XX}' glyphs. + if (name[0] === 'u') { + var nameLen = name.length, hexStr; + + if (nameLen === 7 && name[1] === 'n' && name[2] === 'i') { // 'uniXXXX' + hexStr = name.substr(3); + } else if (nameLen >= 5 && nameLen <= 7) { // 'uXXXX{XX}' + hexStr = name.substr(1); + } else { + return -1; + } + // Check for upper-case hexadecimal characters, to avoid false positives. + if (hexStr === hexStr.toUpperCase()) { + unicode = parseInt(hexStr, 16); + if (unicode >= 0) { + return unicode; + } + } + } + return -1; + } + var UnicodeRanges = [ { 'begin': 0x0000, 'end': 0x007F }, // Basic Latin { 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement @@ -18152,6 +18182,7 @@ exports.isStream = isStream; exports.reverseIfRtl = reverseIfRtl; exports.getUnicodeRangeFor = getUnicodeRangeFor; exports.getNormalizedUnicodes = getNormalizedUnicodes; + exports.getUnicodeForGlyph = getUnicodeForGlyph; })); @@ -25842,6 +25873,7 @@ var getSupplementalGlyphMapForArialBlack = coreStandardFonts.getSupplementalGlyphMapForArialBlack; var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor; var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues; +var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph; // Unicode Private Use Area var PRIVATE_USE_OFFSET_START = 0xE000; @@ -26227,7 +26259,7 @@ var ProblematicCharRanges = new Int32Array([ */ var Font = (function FontClosure() { function Font(name, file, properties) { - var charCode, glyphName, fontChar; + var charCode, glyphName, unicode, fontChar; this.name = name; this.loadedName = properties.loadedName; @@ -26371,21 +26403,25 @@ var Font = (function FontClosure() { this.toFontChar[charCode] = fontChar; } } else if (isStandardFont) { - this.toFontChar = []; glyphsUnicodeMap = getGlyphsUnicode(); for (charCode in properties.defaultEncoding) { glyphName = (properties.differences[charCode] || properties.defaultEncoding[charCode]); - this.toFontChar[charCode] = glyphsUnicodeMap[glyphName]; + unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); + if (unicode !== -1) { + this.toFontChar[charCode] = unicode; + } } } else { - var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1); glyphsUnicodeMap = getGlyphsUnicode(); this.toUnicode.forEach(function(charCode, unicodeCharCode) { - if (notCidFont) { + if (!this.composite) { glyphName = (properties.differences[charCode] || properties.defaultEncoding[charCode]); - unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode); + unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); + if (unicode !== -1) { + unicodeCharCode = unicode; + } } this.toFontChar[charCode] = unicodeCharCode; }.bind(this)); @@ -26484,7 +26520,7 @@ var Font = (function FontClosure() { function int16(b0, b1) { return (b0 << 8) + b1; } - + function signedInt16(b0, b1) { var value = (b0 << 8) + b1; return value & (1 << 15) ? value - 0x10000 : value; @@ -28045,6 +28081,26 @@ var Font = (function FontClosure() { return false; } + // Some bad PDF generators, e.g. Scribus PDF, include glyph names + // in a 'uniXXXX' format -- attempting to recover proper ones. + function recoverGlyphName(name, glyphsUnicodeMap) { + if (glyphsUnicodeMap[name] !== undefined) { + return name; + } + // The glyph name is non-standard, trying to recover. + var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap); + if (unicode !== -1) { + for (var key in glyphsUnicodeMap) { + if (glyphsUnicodeMap[key] === unicode) { + return key; + } + } + } + warn('Unable to recover a standard glyph name for: ' + name); + return name; + } + + if (properties.type === 'CIDFontType2') { var cidToGidMap = properties.cidToGidMap || []; var isCidToGidMapEmpty = cidToGidMap.length === 0; @@ -28099,7 +28155,7 @@ var Font = (function FontClosure() { } var glyphsUnicodeMap = getGlyphsUnicode(); for (charCode = 0; charCode < 256; charCode++) { - var glyphName; + var glyphName, standardGlyphName; if (this.differences && charCode in this.differences) { glyphName = this.differences[charCode]; } else if (charCode in baseEncoding && @@ -28111,13 +28167,16 @@ var Font = (function FontClosure() { if (!glyphName) { continue; } + // Ensure that non-standard glyph names are resolved to valid ones. + standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap); + var unicodeOrCharCode, isUnicode = false; if (cmapPlatformId === 3 && cmapEncodingId === 1) { - unicodeOrCharCode = glyphsUnicodeMap[glyphName]; + unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName]; isUnicode = true; } else if (cmapPlatformId === 1 && cmapEncodingId === 0) { // TODO: the encoding needs to be updated with mac os table. - unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName); + unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName); } var found = false; @@ -28135,6 +28194,11 @@ var Font = (function FontClosure() { if (!found && properties.glyphNames) { // Try to map using the post table. var glyphId = properties.glyphNames.indexOf(glyphName); + // The post table ought to use the same kind of glyph names as the + // `differences` array, but check the standard ones as a fallback. + if (glyphId === -1 && standardGlyphName !== glyphName) { + glyphId = properties.glyphNames.indexOf(standardGlyphName); + } if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) { charCodeToGlyphId[charCode] = glyphId; found = true; @@ -28448,6 +28512,12 @@ var Font = (function FontClosure() { code = +glyphName.substr(1); } break; + default: + // 'uniXXXX'/'uXXXX{XX}' glyphs + var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); + if (unicode !== -1) { + code = unicode; + } } if (code) { // If |baseEncodingName| is one the predefined encodings, diff --git a/package.json b/package.json index 6ce9b6271..584dd17fd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "pdfjs-dist", - "version": "1.4.131", + "version": "1.4.133", "main": "build/pdf.js", "description": "Generic build of Mozilla's PDF.js library.", "keywords": [