diff --git a/src/evaluator.js b/src/evaluator.js index e07394201..23c9d1f65 100644 --- a/src/evaluator.js +++ b/src/evaluator.js @@ -481,7 +481,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { var toUnicode = dict.get('ToUnicode') || baseDict.get('ToUnicode'); if (toUnicode) - properties.toUnicode = this.readToUnicode(toUnicode, xref); + properties.toUnicode = this.readToUnicode(toUnicode, xref, properties); if (properties.composite) { // CIDSystemInfo helps to match CID to glyphs @@ -537,7 +537,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { properties.hasEncoding = hasEncoding; }, - readToUnicode: function PartialEvaluator_readToUnicode(toUnicode, xref) { + readToUnicode: function PartialEvaluator_readToUnicode(toUnicode, xref, + properties) { var cmapObj = toUnicode; var charToUnicode = []; if (isName(cmapObj)) { @@ -626,6 +627,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { } } else if (octet == 0x3E) { if (token.length) { + // XXX guessing chars size by checking number size in the CMap + if (token.length <= 2 && properties.composite) + properties.wideChars = false; + if (token.length <= 4) { // parsing hex number tokens.push(parseInt(token, 16)); @@ -843,6 +848,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() { length1: length1, length2: length2, composite: composite, + wideChars: composite, fixedPitch: false, fontMatrix: dict.get('FontMatrix') || IDENTITY_MATRIX, firstChar: firstChar || 0, diff --git a/src/fonts.js b/src/fonts.js index 7bd3ddd06..22037e724 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -789,6 +789,7 @@ var Font = (function FontClosure() { this.widths = properties.widths; this.defaultWidth = properties.defaultWidth; this.composite = properties.composite; + this.wideChars = properties.wideChars; this.hasEncoding = properties.hasEncoding; this.fontMatrix = properties.fontMatrix; @@ -2520,7 +2521,7 @@ var Font = (function FontClosure() { glyphs = []; - if (this.composite) { + if (this.wideChars) { // composite fonts have multi-byte strings convert the string from // single-byte to multi-byte // XXX assuming CIDFonts are two-byte - later need to extract the diff --git a/test/pdfs/issue1597.pdf.link b/test/pdfs/issue1597.pdf.link new file mode 100644 index 000000000..94d54084e --- /dev/null +++ b/test/pdfs/issue1597.pdf.link @@ -0,0 +1 @@ +http://content1d.omroep.nl/227cbd4ae54f95dd466a7a8475fec2ea/4f95b377/nos/docs/230412_brief_koningin.pdf diff --git a/test/test_manifest.json b/test/test_manifest.json index b6879e1ec..38e7d7ed2 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -560,6 +560,13 @@ "link": true, "type": "eq" }, + { "id": "issue1597", + "file": "pdfs/issue1597.pdf", + "md5": "a5ebef467fd6e2fc0aeb56c9eb725ae3", + "rounds": 1, + "link": true, + "type": "eq" + }, { "id": "issue1317", "file": "pdfs/issue1317.pdf", "md5": "6fb46275b30c48c8985617d4f86199e3",