Browse Source

Merge pull request #5150 from nnethercote/toUnicode

Fix #4935
Yury Delendik 11 years ago
parent
commit
4ce1b1e987
  1. 13
      src/core/evaluator.js
  2. 113
      src/core/fonts.js

13
src/core/evaluator.js

@ -20,8 +20,8 @@
isNum, isStream, isString, JpegStream, Lexer, Metrics, isNum, isStream, isString, JpegStream, Lexer, Metrics,
MurmurHash3_64, Name, Parser, Pattern, PDFImage, PDFJS, serifFonts, MurmurHash3_64, Name, Parser, Pattern, PDFImage, PDFJS, serifFonts,
stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise, stdFontMap, symbolsFonts, getTilingPatternIR, warn, Util, Promise,
RefSetCache, isRef, TextRenderingMode, CMapFactory, OPS, RefSetCache, isRef, TextRenderingMode, ToUnicodeMap, CMapFactory,
UNSUPPORTED_FEATURES, UnsupportedManager, NormalizedUnicodes, OPS, UNSUPPORTED_FEATURES, UnsupportedManager, NormalizedUnicodes,
IDENTITY_MATRIX, reverseIfRtl, createPromiseCapability, IDENTITY_MATRIX, reverseIfRtl, createPromiseCapability,
getFontType */ getFontType */
@ -1309,12 +1309,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
}, },
readToUnicode: function PartialEvaluator_readToUnicode(toUnicode) { readToUnicode: function PartialEvaluator_readToUnicode(toUnicode) {
var cmapObj = toUnicode; var cmap, cmapObj = toUnicode;
if (isName(cmapObj)) { if (isName(cmapObj)) {
return CMapFactory.create(cmapObj, cmap = CMapFactory.create(cmapObj,
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).getMap(); { url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).getMap();
return new ToUnicodeMap(cmap);
} else if (isStream(cmapObj)) { } else if (isStream(cmapObj)) {
var cmap = CMapFactory.create(cmapObj, cmap = CMapFactory.create(cmapObj,
{ url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).getMap(); { url: PDFJS.cMapUrl, packed: PDFJS.cMapPacked }, null).getMap();
// Convert UTF-16BE // Convert UTF-16BE
// NOTE: cmap can be a sparse array, so use forEach instead of for(;;) // NOTE: cmap can be a sparse array, so use forEach instead of for(;;)
@ -1333,7 +1334,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
} }
cmap[i] = String.fromCharCode.apply(String, str); cmap[i] = String.fromCharCode.apply(String, str);
}); });
return cmap; return new ToUnicodeMap(cmap);
} }
return null; return null;
}, },

113
src/core/fonts.js

@ -2161,6 +2161,68 @@ var Glyph = (function GlyphClosure() {
return Glyph; return Glyph;
})(); })();
var ToUnicodeMap = (function ToUnicodeMapClosure() {
function ToUnicodeMap(cmap) {
// The elements of this._map can be integers or strings, depending on how
// |cmap| was created.
this._map = cmap;
}
ToUnicodeMap.prototype = {
get length() {
return this._map.length;
},
forEach: function(callback) {
for (var charCode in this._map) {
callback(charCode, this._map[charCode].charCodeAt(0));
}
},
get: function(i) {
return this._map[i];
},
charCodeOf: function(v) {
return this._map.indexOf(v);
}
};
return ToUnicodeMap;
})();
var IdentityToUnicodeMap = (function IdentityToUnicodeMapClosure() {
function IdentityToUnicodeMap(firstChar, lastChar) {
this.firstChar = firstChar;
this.lastChar = lastChar;
}
IdentityToUnicodeMap.prototype = {
get length() {
error('should not access .length');
},
forEach: function(callback) {
for (var i = this.firstChar, ii = this.lastChar; i <= ii; i++) {
callback(i, i);
}
},
get: function(i) {
if (this.firstChar <= i && i <= this.lastChar) {
return String.fromCharCode(i);
}
return undefined;
},
charCodeOf: function(v) {
error('should not call .charCodeOf');
}
};
return IdentityToUnicodeMap;
})();
/** /**
* 'Font' is the class the outside world should use, it encapsulate all the font * 'Font' is the class the outside world should use, it encapsulate all the font
* decoding logics whatever type it is (assuming the font type is supported). * decoding logics whatever type it is (assuming the font type is supported).
@ -2204,9 +2266,7 @@ var Font = (function FontClosure() {
this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS; this.descent = properties.descent / PDF_GLYPH_SPACE_UNITS;
this.fontMatrix = properties.fontMatrix; this.fontMatrix = properties.fontMatrix;
var unicode = this.buildToUnicode(properties); this.toUnicode = properties.toUnicode = this.buildToUnicode(properties);
this.toUnicode = properties.toUnicode = unicode.toUnicode;
this.isIdentityUnicode = properties.isIdentityUnicode = unicode.isIdentity;
this.toFontChar = []; this.toFontChar = [];
@ -2259,7 +2319,7 @@ var Font = (function FontClosure() {
map[+code] = GlyphMapForStandardFonts[code]; map[+code] = GlyphMapForStandardFonts[code];
} }
this.toFontChar = map; this.toFontChar = map;
this.toUnicode = map; this.toUnicode = new ToUnicodeMap(map);
} else if (/Symbol/i.test(fontName)) { } else if (/Symbol/i.test(fontName)) {
var symbols = Encodings.SymbolSetEncoding; var symbols = Encodings.SymbolSetEncoding;
for (charCode in symbols) { for (charCode in symbols) {
@ -2278,15 +2338,14 @@ var Font = (function FontClosure() {
} }
} else { } else {
var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1); var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1);
for (charCode in this.toUnicode) { this.toUnicode.forEach(function(charCode, unicodeCharCode) {
unicodeCharCode = this.toUnicode[charCode].charCodeAt(0);
if (notCidFont) { if (notCidFont) {
glyphName = (properties.differences[charCode] || glyphName = (properties.differences[charCode] ||
properties.defaultEncoding[charCode]); properties.defaultEncoding[charCode]);
unicodeCharCode = (GlyphsUnicode[glyphName] || unicodeCharCode); unicodeCharCode = (GlyphsUnicode[glyphName] || unicodeCharCode);
} }
this.toFontChar[charCode] = unicodeCharCode; this.toFontChar[charCode] = unicodeCharCode;
} }.bind(this));
} }
this.loadedName = fontName.split('-')[0]; this.loadedName = fontName.split('-')[0];
this.loading = false; this.loading = false;
@ -2499,7 +2558,8 @@ var Font = (function FontClosure() {
function adjustMapping(charCodeToGlyphId, properties) { function adjustMapping(charCodeToGlyphId, properties) {
var toUnicode = properties.toUnicode; var toUnicode = properties.toUnicode;
var isSymbolic = !!(properties.flags & FontFlags.Symbolic); var isSymbolic = !!(properties.flags & FontFlags.Symbolic);
var isIdentityUnicode = properties.isIdentityUnicode; var isIdentityUnicode =
properties.toUnicode instanceof IdentityToUnicodeMap;
var isCidFontType2 = (properties.type === 'CIDFontType2'); var isCidFontType2 = (properties.type === 'CIDFontType2');
var newMap = Object.create(null); var newMap = Object.create(null);
var toFontChar = []; var toFontChar = [];
@ -2512,8 +2572,8 @@ var Font = (function FontClosure() {
// First try to map the value to a unicode position if a non identity map // First try to map the value to a unicode position if a non identity map
// was created. // was created.
if (!isIdentityUnicode) { if (!isIdentityUnicode) {
if (toUnicode[originalCharCode] !== undefined) { if (toUnicode.get(originalCharCode) !== undefined) {
var unicode = toUnicode[fontCharCode]; var unicode = toUnicode.get(fontCharCode);
// TODO: Try to map ligatures to the correct spot. // TODO: Try to map ligatures to the correct spot.
if (unicode.length === 1) { if (unicode.length === 1) {
fontCharCode = unicode.charCodeAt(0); fontCharCode = unicode.charCodeAt(0);
@ -3852,7 +3912,7 @@ var Font = (function FontClosure() {
var dupFirstEntry = false; var dupFirstEntry = false;
if (properties.type === 'CIDFontType2' && properties.toUnicode && if (properties.type === 'CIDFontType2' && properties.toUnicode &&
properties.toUnicode[0] > '\u0000') { properties.toUnicode.get(0) > '\u0000') {
// oracle's defect (see 3427), duplicating first entry // oracle's defect (see 3427), duplicating first entry
dupFirstEntry = true; dupFirstEntry = true;
numGlyphs++; numGlyphs++;
@ -4298,19 +4358,12 @@ var Font = (function FontClosure() {
/** /**
* Builds a char code to unicode map based on section 9.10 of the spec. * Builds a char code to unicode map based on section 9.10 of the spec.
* @param {Object} properties Font properties object. * @param {Object} properties Font properties object.
* @return {Object} Has two properties: 'toUnicode' which maps char codes to * @return {Object} A ToUnicodeMap object.
* unicode (string) values and 'isIdentity' which is true if an identity map
* is used.
*/ */
buildToUnicode: function Font_buildToUnicode(properties) { buildToUnicode: function Font_buildToUnicode(properties) {
var map = {
isIdentity: false,
toUnicode: null
};
// Section 9.10.2 Mapping Character Codes to Unicode Values // Section 9.10.2 Mapping Character Codes to Unicode Values
if (properties.toUnicode && properties.toUnicode.length !== 0) { if (properties.toUnicode && properties.toUnicode.length !== 0) {
map.toUnicode = properties.toUnicode; return properties.toUnicode;
return map;
} }
// According to the spec if the font is a simple font we should only map // According to the spec if the font is a simple font we should only map
// to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or // to unicode if the base encoding is MacRoman, MacExpert, or WinAnsi or
@ -4375,8 +4428,7 @@ var Font = (function FontClosure() {
} }
toUnicode[charcode] = String.fromCharCode(GlyphsUnicode[glyphName]); toUnicode[charcode] = String.fromCharCode(GlyphsUnicode[glyphName]);
} }
map.toUnicode = toUnicode; return new ToUnicodeMap(toUnicode);
return map;
} }
// If the font is a composite font that uses one of the predefined CMaps // If the font is a composite font that uses one of the predefined CMaps
// listed in Table 118 (except Identity–H and Identity–V) or whose // listed in Table 118 (except Identity–H and Identity–V) or whose
@ -4419,19 +4471,12 @@ var Font = (function FontClosure() {
ucs2.charCodeAt(1)); ucs2.charCodeAt(1));
} }
}); });
map.toUnicode = toUnicode; return new ToUnicodeMap(toUnicode);
return map;
} }
// The viewer's choice, just use an identity map. // The viewer's choice, just use an identity map.
toUnicode = []; return new IdentityToUnicodeMap(properties.firstChar,
var firstChar = properties.firstChar, lastChar = properties.lastChar; properties.lastChar);
for (var i = firstChar; i <= lastChar; i++) {
toUnicode[i] = String.fromCharCode(i);
}
map.isIdentity = true;
map.toUnicode = toUnicode;
return map;
}, },
get spaceWidth() { get spaceWidth() {
@ -4459,7 +4504,7 @@ var Font = (function FontClosure() {
} }
// ... via toUnicode map // ... via toUnicode map
if (!charcode && 'toUnicode' in this) { if (!charcode && 'toUnicode' in this) {
charcode = this.toUnicode.indexOf(glyphUnicode); charcode = this.toUnicode.charCodeOf(glyphUnicode);
} }
// setting it to unicode if negative or undefined // setting it to unicode if negative or undefined
if (charcode <= 0) { if (charcode <= 0) {
@ -4489,7 +4534,7 @@ var Font = (function FontClosure() {
width = isNum(width) ? width : this.defaultWidth; width = isNum(width) ? width : this.defaultWidth;
var vmetric = this.vmetrics && this.vmetrics[widthCode]; var vmetric = this.vmetrics && this.vmetrics[widthCode];
var unicode = this.toUnicode[charcode] || charcode; var unicode = this.toUnicode.get(charcode) || charcode;
if (typeof unicode === 'number') { if (typeof unicode === 'number') {
unicode = String.fromCharCode(unicode); unicode = String.fromCharCode(unicode);
} }

Loading…
Cancel
Save