|
|
|
@ -24,7 +24,7 @@
@@ -24,7 +24,7 @@
|
|
|
|
|
'pdfjs/core/fonts', 'pdfjs/core/function', 'pdfjs/core/pattern', |
|
|
|
|
'pdfjs/core/cmap', 'pdfjs/core/metrics', 'pdfjs/core/bidi', |
|
|
|
|
'pdfjs/core/encodings', 'pdfjs/core/standard_fonts', |
|
|
|
|
'pdfjs/core/unicode'], factory); |
|
|
|
|
'pdfjs/core/unicode', 'pdfjs/core/glyphlist'], factory); |
|
|
|
|
} else if (typeof exports !== 'undefined') { |
|
|
|
|
factory(exports, require('../shared/util.js'), require('./primitives.js'), |
|
|
|
|
require('./stream.js'), require('./parser.js'), require('./image.js'), |
|
|
|
@ -32,7 +32,7 @@
@@ -32,7 +32,7 @@
|
|
|
|
|
require('./fonts.js'), require('./function.js'), require('./pattern.js'), |
|
|
|
|
require('./cmap.js'), require('./metrics.js'), require('./bidi.js'), |
|
|
|
|
require('./encodings.js'), require('./standard_fonts.js'), |
|
|
|
|
require('./unicode.js')); |
|
|
|
|
require('./unicode.js'), require('./glyphlist.js')); |
|
|
|
|
} else { |
|
|
|
|
factory((root.pdfjsCoreEvaluator = {}), root.pdfjsSharedUtil, |
|
|
|
|
root.pdfjsCorePrimitives, root.pdfjsCoreStream, root.pdfjsCoreParser, |
|
|
|
@ -40,12 +40,13 @@
@@ -40,12 +40,13 @@
|
|
|
|
|
root.pdfjsCoreFonts, root.pdfjsCoreFunction, root.pdfjsCorePattern, |
|
|
|
|
root.pdfjsCoreCMap, root.pdfjsCoreMetrics, root.pdfjsCoreBidi, |
|
|
|
|
root.pdfjsCoreEncodings, root.pdfjsCoreStandardFonts, |
|
|
|
|
root.pdfjsCoreUnicode); |
|
|
|
|
root.pdfjsCoreUnicode, root.pdfjsCoreGlyphList); |
|
|
|
|
} |
|
|
|
|
}(this, function (exports, sharedUtil, corePrimitives, coreStream, coreParser, |
|
|
|
|
coreImage, coreColorSpace, coreMurmurHash3, coreFonts, |
|
|
|
|
coreFunction, corePattern, coreCMap, coreMetrics, coreBidi, |
|
|
|
|
coreEncodings, coreStandardFonts, coreUnicode) { |
|
|
|
|
coreEncodings, coreStandardFonts, coreUnicode, |
|
|
|
|
coreGlyphList) { |
|
|
|
|
|
|
|
|
|
var FONT_IDENTITY_MATRIX = sharedUtil.FONT_IDENTITY_MATRIX; |
|
|
|
|
var IDENTITY_MATRIX = sharedUtil.IDENTITY_MATRIX; |
|
|
|
@ -103,6 +104,8 @@ var getSerifFonts = coreStandardFonts.getSerifFonts;
@@ -103,6 +104,8 @@ var getSerifFonts = coreStandardFonts.getSerifFonts;
|
|
|
|
|
var getSymbolsFonts = coreStandardFonts.getSymbolsFonts; |
|
|
|
|
var getNormalizedUnicodes = coreUnicode.getNormalizedUnicodes; |
|
|
|
|
var reverseIfRtl = coreUnicode.reverseIfRtl; |
|
|
|
|
var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph; |
|
|
|
|
var getGlyphsUnicode = coreGlyphList.getGlyphsUnicode; |
|
|
|
|
|
|
|
|
|
var PartialEvaluator = (function PartialEvaluatorClosure() { |
|
|
|
|
function PartialEvaluator(pdfManager, xref, handler, pageIndex, |
|
|
|
@ -651,8 +654,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
@@ -651,8 +654,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|
|
|
|
// TODO move promises into translate font
|
|
|
|
|
var translatedPromise; |
|
|
|
|
try { |
|
|
|
|
translatedPromise = Promise.resolve( |
|
|
|
|
this.translateFont(preEvaluatedFont, xref)); |
|
|
|
|
translatedPromise = this.translateFont(preEvaluatedFont, xref); |
|
|
|
|
} catch (e) { |
|
|
|
|
translatedPromise = Promise.reject(e); |
|
|
|
|
} |
|
|
|
@ -1550,9 +1552,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
@@ -1550,9 +1552,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|
|
|
|
xref, properties) { |
|
|
|
|
// 9.10.2
|
|
|
|
|
var toUnicode = (dict.get('ToUnicode') || baseDict.get('ToUnicode')); |
|
|
|
|
if (toUnicode) { |
|
|
|
|
properties.toUnicode = this.readToUnicode(toUnicode); |
|
|
|
|
} |
|
|
|
|
var toUnicodePromise = toUnicode ? |
|
|
|
|
this.readToUnicode(toUnicode) : Promise.resolve(undefined); |
|
|
|
|
|
|
|
|
|
if (properties.composite) { |
|
|
|
|
// CIDSystemInfo helps to match CID to glyphs
|
|
|
|
|
var cidSystemInfo = dict.get('CIDSystemInfo'); |
|
|
|
@ -1637,20 +1639,164 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
@@ -1637,20 +1639,164 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|
|
|
|
properties.differences = differences; |
|
|
|
|
properties.baseEncodingName = baseEncodingName; |
|
|
|
|
properties.dict = dict; |
|
|
|
|
return toUnicodePromise.then(function(toUnicode) { |
|
|
|
|
properties.toUnicode = toUnicode; |
|
|
|
|
return this.buildToUnicode(properties); |
|
|
|
|
}.bind(this)).then(function (toUnicode) { |
|
|
|
|
properties.toUnicode = toUnicode; |
|
|
|
|
return properties; |
|
|
|
|
}); |
|
|
|
|
}, |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* Builds a char code to unicode map based on section 9.10 of the spec. |
|
|
|
|
* @param {Object} properties Font properties object. |
|
|
|
|
* @return {Promise} A Promise resolving to ToUnicodeMap object. |
|
|
|
|
*/ |
|
|
|
|
buildToUnicode: function partialEvaluator_buildToUnicode(properties) { |
|
|
|
|
// Section 9.10.2 Mapping Character Codes to Unicode Values
|
|
|
|
|
if (properties.toUnicode && properties.toUnicode.length !== 0) { |
|
|
|
|
return Promise.resolve(properties.toUnicode); |
|
|
|
|
} |
|
|
|
|
// According to the spec if the font is a simple font we should only map
|
|
|
|
|
// to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
|
|
|
|
|
// the differences array only contains adobe standard or symbol set names,
|
|
|
|
|
// in pratice it seems better to always try to create a toUnicode
|
|
|
|
|
// map based of the default encoding.
|
|
|
|
|
var toUnicode, charcode; |
|
|
|
|
if (!properties.composite /* is simple font */) { |
|
|
|
|
toUnicode = []; |
|
|
|
|
var encoding = properties.defaultEncoding.slice(); |
|
|
|
|
var baseEncodingName = properties.baseEncodingName; |
|
|
|
|
// Merge in the differences array.
|
|
|
|
|
var differences = properties.differences; |
|
|
|
|
for (charcode in differences) { |
|
|
|
|
encoding[charcode] = differences[charcode]; |
|
|
|
|
} |
|
|
|
|
var glyphsUnicodeMap = getGlyphsUnicode(); |
|
|
|
|
for (charcode in encoding) { |
|
|
|
|
// a) Map the character code to a character name.
|
|
|
|
|
var glyphName = encoding[charcode]; |
|
|
|
|
// b) Look up the character name in the Adobe Glyph List (see the
|
|
|
|
|
// Bibliography) to obtain the corresponding Unicode value.
|
|
|
|
|
if (glyphName === '') { |
|
|
|
|
continue; |
|
|
|
|
} else if (glyphsUnicodeMap[glyphName] === undefined) { |
|
|
|
|
// (undocumented) c) Few heuristics to recognize unknown glyphs
|
|
|
|
|
// NOTE: Adobe Reader does not do this step, but OSX Preview does
|
|
|
|
|
var code = 0; |
|
|
|
|
switch (glyphName[0]) { |
|
|
|
|
case 'G': // Gxx glyph
|
|
|
|
|
if (glyphName.length === 3) { |
|
|
|
|
code = parseInt(glyphName.substr(1), 16); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 'g': // g00xx glyph
|
|
|
|
|
if (glyphName.length === 5) { |
|
|
|
|
code = parseInt(glyphName.substr(1), 16); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
case 'C': // Cddd glyph
|
|
|
|
|
case 'c': // cddd glyph
|
|
|
|
|
if (glyphName.length >= 3) { |
|
|
|
|
code = +glyphName.substr(1); |
|
|
|
|
} |
|
|
|
|
break; |
|
|
|
|
default: |
|
|
|
|
// 'uniXXXX'/'uXXXX{XX}' glyphs
|
|
|
|
|
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap); |
|
|
|
|
if (unicode !== -1) { |
|
|
|
|
code = unicode; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if (code) { |
|
|
|
|
// If |baseEncodingName| is one the predefined encodings,
|
|
|
|
|
// and |code| equals |charcode|, using the glyph defined in the
|
|
|
|
|
// baseEncoding seems to yield a better |toUnicode| mapping
|
|
|
|
|
// (fixes issue 5070).
|
|
|
|
|
if (baseEncodingName && code === +charcode) { |
|
|
|
|
var baseEncoding = getEncoding(baseEncodingName); |
|
|
|
|
if (baseEncoding && (glyphName = baseEncoding[charcode])) { |
|
|
|
|
toUnicode[charcode] = |
|
|
|
|
String.fromCharCode(glyphsUnicodeMap[glyphName]); |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
toUnicode[charcode] = String.fromCharCode(code); |
|
|
|
|
} |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
toUnicode[charcode] = |
|
|
|
|
String.fromCharCode(glyphsUnicodeMap[glyphName]); |
|
|
|
|
} |
|
|
|
|
return Promise.resolve(new ToUnicodeMap(toUnicode)); |
|
|
|
|
} |
|
|
|
|
// If the font is a composite font that uses one of the predefined CMaps
|
|
|
|
|
// listed in Table 118 (except Identity–H and Identity–V) or whose
|
|
|
|
|
// descendant CIDFont uses the Adobe-GB1, Adobe-CNS1, Adobe-Japan1, or
|
|
|
|
|
// Adobe-Korea1 character collection:
|
|
|
|
|
if (properties.composite && ( |
|
|
|
|
(properties.cMap.builtInCMap && |
|
|
|
|
!(properties.cMap instanceof IdentityCMap)) || |
|
|
|
|
(properties.cidSystemInfo.registry === 'Adobe' && |
|
|
|
|
(properties.cidSystemInfo.ordering === 'GB1' || |
|
|
|
|
properties.cidSystemInfo.ordering === 'CNS1' || |
|
|
|
|
properties.cidSystemInfo.ordering === 'Japan1' || |
|
|
|
|
properties.cidSystemInfo.ordering === 'Korea1')))) { |
|
|
|
|
// Then:
|
|
|
|
|
// a) Map the character code to a character identifier (CID) according
|
|
|
|
|
// to the font’s CMap.
|
|
|
|
|
// b) Obtain the registry and ordering of the character collection used
|
|
|
|
|
// by the font’s CMap (for example, Adobe and Japan1) from its
|
|
|
|
|
// CIDSystemInfo dictionary.
|
|
|
|
|
var registry = properties.cidSystemInfo.registry; |
|
|
|
|
var ordering = properties.cidSystemInfo.ordering; |
|
|
|
|
// c) Construct a second CMap name by concatenating the registry and
|
|
|
|
|
// ordering obtained in step (b) in the format registry–ordering–UCS2
|
|
|
|
|
// (for example, Adobe–Japan1–UCS2).
|
|
|
|
|
var ucs2CMapName = new Name(registry + '-' + ordering + '-UCS2'); |
|
|
|
|
// d) Obtain the CMap with the name constructed in step (c) (available
|
|
|
|
|
// from the ASN Web site; see the Bibliography).
|
|
|
|
|
return CMapFactory.create(ucs2CMapName, |
|
|
|
|
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).then( |
|
|
|
|
function (ucs2CMap) { |
|
|
|
|
var cMap = properties.cMap; |
|
|
|
|
toUnicode = []; |
|
|
|
|
cMap.forEach(function(charcode, cid) { |
|
|
|
|
assert(cid <= 0xffff, 'Max size of CID is 65,535'); |
|
|
|
|
// e) Map the CID obtained in step (a) according to the CMap
|
|
|
|
|
// obtained in step (d), producing a Unicode value.
|
|
|
|
|
var ucs2 = ucs2CMap.lookup(cid); |
|
|
|
|
if (ucs2) { |
|
|
|
|
toUnicode[charcode] = |
|
|
|
|
String.fromCharCode((ucs2.charCodeAt(0) << 8) + |
|
|
|
|
ucs2.charCodeAt(1)); |
|
|
|
|
} |
|
|
|
|
}); |
|
|
|
|
return new ToUnicodeMap(toUnicode); |
|
|
|
|
}); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// The viewer's choice, just use an identity map.
|
|
|
|
|
return Promise.resolve(new IdentityToUnicodeMap(properties.firstChar, |
|
|
|
|
properties.lastChar)); |
|
|
|
|
}, |
|
|
|
|
|
|
|
|
|
readToUnicode: function PartialEvaluator_readToUnicode(toUnicode) { |
|
|
|
|
var cmap, cmapObj = toUnicode; |
|
|
|
|
var cmapObj = toUnicode; |
|
|
|
|
if (isName(cmapObj)) { |
|
|
|
|
cmap = CMapFactory.create(cmapObj, |
|
|
|
|
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null); |
|
|
|
|
return CMapFactory.create(cmapObj, |
|
|
|
|
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).then( |
|
|
|
|
function (cmap) { |
|
|
|
|
if (cmap instanceof IdentityCMap) { |
|
|
|
|
return new IdentityToUnicodeMap(0, 0xFFFF); |
|
|
|
|
} |
|
|
|
|
return new ToUnicodeMap(cmap.getMap()); |
|
|
|
|
}); |
|
|
|
|
} else if (isStream(cmapObj)) { |
|
|
|
|
cmap = CMapFactory.create(cmapObj, |
|
|
|
|
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null); |
|
|
|
|
return CMapFactory.create(cmapObj, |
|
|
|
|
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).then( |
|
|
|
|
function (cmap) { |
|
|
|
|
if (cmap instanceof IdentityCMap) { |
|
|
|
|
return new IdentityToUnicodeMap(0, 0xFFFF); |
|
|
|
|
} |
|
|
|
@ -1673,8 +1819,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
@@ -1673,8 +1819,9 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|
|
|
|
map[charCode] = String.fromCharCode.apply(String, str); |
|
|
|
|
}); |
|
|
|
|
return new ToUnicodeMap(map); |
|
|
|
|
}); |
|
|
|
|
} |
|
|
|
|
return null; |
|
|
|
|
return Promise.resolve(null); |
|
|
|
|
}, |
|
|
|
|
|
|
|
|
|
readCidToGidMap: function PartialEvaluator_readCidToGidMap(cidToGidStream) { |
|
|
|
@ -1978,10 +2125,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
@@ -1978,10 +2125,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|
|
|
|
firstChar: 0, |
|
|
|
|
lastChar: maxCharIndex |
|
|
|
|
}; |
|
|
|
|
this.extractDataStructures(dict, dict, xref, properties); |
|
|
|
|
return this.extractDataStructures(dict, dict, xref, properties).then( |
|
|
|
|
function (properties) { |
|
|
|
|
properties.widths = this.buildCharCodeToWidth(metrics.widths, |
|
|
|
|
properties); |
|
|
|
|
return new Font(baseFontName, null, properties); |
|
|
|
|
}.bind(this)); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -2058,16 +2207,25 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
@@ -2058,16 +2207,25 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|
|
|
|
coded: false |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
var cMapPromise; |
|
|
|
|
if (composite) { |
|
|
|
|
var cidEncoding = baseDict.get('Encoding'); |
|
|
|
|
if (isName(cidEncoding)) { |
|
|
|
|
properties.cidEncoding = cidEncoding.name; |
|
|
|
|
} |
|
|
|
|
properties.cMap = CMapFactory.create(cidEncoding, |
|
|
|
|
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null); |
|
|
|
|
cMapPromise = CMapFactory.create(cidEncoding, |
|
|
|
|
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).then( |
|
|
|
|
function (cMap) { |
|
|
|
|
properties.cMap = cMap; |
|
|
|
|
properties.vertical = properties.cMap.vertical; |
|
|
|
|
}); |
|
|
|
|
} else { |
|
|
|
|
cMapPromise = Promise.resolve(undefined); |
|
|
|
|
} |
|
|
|
|
this.extractDataStructures(dict, baseDict, xref, properties); |
|
|
|
|
|
|
|
|
|
return cMapPromise.then(function () { |
|
|
|
|
return this.extractDataStructures(dict, baseDict, xref, properties); |
|
|
|
|
}.bind(this)).then(function (properties) { |
|
|
|
|
this.extractWidths(dict, xref, descriptor, properties); |
|
|
|
|
|
|
|
|
|
if (type === 'Type3') { |
|
|
|
@ -2075,6 +2233,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
@@ -2075,6 +2233,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return new Font(fontName.name, fontFile, properties); |
|
|
|
|
}.bind(this)); |
|
|
|
|
} |
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|