Browse Source

Merge pull request #4550 from yurydelendik/macenc

Heuristics to recognize the unknown glyphs for toUnicode
Yury Delendik 11 years ago
parent
commit
938547276e
  1. 30
      src/core/fonts.js
  2. 1
      test/pdfs/.gitignore
  3. BIN
      test/pdfs/issue4550.pdf
  4. 6
      test/test_manifest.json

30
src/core/fonts.js

@ -4193,7 +4193,7 @@ var Font = (function FontClosure() {
toUnicode: null toUnicode: null
}; };
// Section 9.10.2 Mapping Character Codes to Unicode Values // Section 9.10.2 Mapping Character Codes to Unicode Values
if (properties.toUnicode) { if (properties.toUnicode && properties.toUnicode.length !== 0) {
map.toUnicode = properties.toUnicode; map.toUnicode = properties.toUnicode;
return map; return map;
} }
@ -4217,6 +4217,21 @@ var Font = (function FontClosure() {
// b) Look up the character name in the Adobe Glyph List (see the // b) Look up the character name in the Adobe Glyph List (see the
// Bibliography) to obtain the corresponding Unicode value. // Bibliography) to obtain the corresponding Unicode value.
if (glyphName === '' || !(glyphName in GlyphsUnicode)) { if (glyphName === '' || !(glyphName in GlyphsUnicode)) {
// (undocumented) c) Few heuristics to recognize unknown glyphs
// NOTE: Adobe Reader does not do this step, but OSX Preview does
var code;
// Gxx glyph
if (glyphName.length === 3 &&
glyphName[0] === 'G' &&
(code = parseInt(glyphName.substr(1), 16))) {
toUnicode[charcode] = String.fromCharCode(code);
}
// Cddd glyph
if (glyphName.length >= 3 &&
glyphName[0] === 'C' &&
(code = +glyphName.substr(1))) {
toUnicode[charcode] = String.fromCharCode(code);
}
continue; continue;
} }
toUnicode[charcode] = String.fromCharCode(GlyphsUnicode[glyphName]); toUnicode[charcode] = String.fromCharCode(GlyphsUnicode[glyphName]);
@ -5467,17 +5482,19 @@ var CFFFont = (function CFFFontClosure() {
}, },
getGlyphMapping: function CFFFont_getGlyphMapping() { getGlyphMapping: function CFFFont_getGlyphMapping() {
var cff = this.cff; var cff = this.cff;
var properties = this.properties;
var charsets = cff.charset.charset; var charsets = cff.charset.charset;
var charCodeToGlyphId = Object.create(null); var charCodeToGlyphId;
var glyphId; var glyphId;
if (this.properties.composite) { if (properties.composite) {
if (this.cff.isCIDFont) { charCodeToGlyphId = Object.create(null);
if (cff.isCIDFont) {
// If the font is actually a CID font then we should use the charset // If the font is actually a CID font then we should use the charset
// to map CIDs to GIDs. // to map CIDs to GIDs.
for (glyphId = 0; glyphId < charsets.length; glyphId++) { for (glyphId = 0; glyphId < charsets.length; glyphId++) {
var cidString = String.fromCharCode(charsets[glyphId]); var cidString = String.fromCharCode(charsets[glyphId]);
var charCode = this.properties.cMap.map.indexOf(cidString); var charCode = properties.cMap.map.indexOf(cidString);
charCodeToGlyphId[charCode] = glyphId; charCodeToGlyphId[charCode] = glyphId;
} }
} else { } else {
@ -5491,7 +5508,8 @@ var CFFFont = (function CFFFontClosure() {
} }
var encoding = cff.encoding ? cff.encoding.encoding : null; var encoding = cff.encoding ? cff.encoding.encoding : null;
return type1FontGlyphMapping(this.properties, encoding, charsets); charCodeToGlyphId = type1FontGlyphMapping(properties, encoding, charsets);
return charCodeToGlyphId;
} }
}; };

1
test/pdfs/.gitignore vendored

@ -33,6 +33,7 @@
!issue918.pdf !issue918.pdf
!issue1905.pdf !issue1905.pdf
!issue2833.pdf !issue2833.pdf
!issue4550.pdf
!rotated.pdf !rotated.pdf
!issue1249.pdf !issue1249.pdf
!issue1171.pdf !issue1171.pdf

BIN
test/pdfs/issue4550.pdf

Binary file not shown.

6
test/test_manifest.json

@ -352,6 +352,12 @@
"lastPage": 1, "lastPage": 1,
"type": "eq" "type": "eq"
}, },
{ "id": "issue4550-text",
"file": "pdfs/issue4550.pdf",
"md5": "d64cfc4b50e225f596130d9938e8d5cc",
"rounds": 1,
"type": "text"
},
{ "id": "jai-pdf", { "id": "jai-pdf",
"file": "pdfs/jai.pdf", "file": "pdfs/jai.pdf",
"md5": "1f5dd128c3757420a881a155f2f8ace3", "md5": "1f5dd128c3757420a881a155f2f8ace3",

Loading…
Cancel
Save