From 19b670458e82b579b81ecfa17033c5073106072a Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Tue, 13 Mar 2012 18:59:16 -0500 Subject: [PATCH 1/6] Remove empty gryphs; improve glyph unicode movements --- src/fonts.js | 84 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 14 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index df0acbbc5..e3178ae9d 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -1656,6 +1656,30 @@ var Font = (function FontClosure() { glyf.data = newGlyfData.subarray(0, writeOffset); } + function findEmptyGlyphs(locaTable, isGlyphLocationsLong, emptyGlyphIds) { + var itemSize, itemDecode; + if (isGlyphLocationsLong) { + itemSize = 4; + itemDecode = function fontItemDecodeLong(data, offset) { + return (data[offset] << 24) | (data[offset + 1] << 16) | + (data[offset + 2] << 8) | data[offset + 3]; + }; + } else { + itemSize = 2; + itemDecode = function fontItemDecode(data, offset) { + return (data[offset] << 9) | (data[offset + 1] << 1); + }; + } + var data = locaTable.data, length = data.length; + var lastOffset = itemDecode(data, 0); + for (var i = itemSize, j = 0; i < length; i += itemSize, j++) { + var offset = itemDecode(data, i); + if (offset == lastOffset) + emptyGlyphIds[j] = true; + lastOffset = offset; + } + } + function readGlyphNameMap(post, properties) { var start = (font.start ? font.start : 0) + post.offset; font.pos = start; @@ -1782,11 +1806,15 @@ var Font = (function FontClosure() { sanitizeMetrics(font, hhea, hmtx, numGlyphs); sanitizeMetrics(font, vhea, vmtx, numGlyphs); + var isGlyphLocationsLong = int16([head.data[50], head.data[51]]); if (head && loca && glyf) { - var isGlyphLocationsLong = int16([head.data[50], head.data[51]]); sanitizeGlyphLocations(loca, glyf, numGlyphs, isGlyphLocationsLong); } + var emptyGlyphIds = []; + if (glyf) + findEmptyGlyphs(loca, isGlyphLocationsLong, emptyGlyphIds); + // Sanitizer reduces the glyph advanceWidth to the maxAdvanceWidth // Sometimes it's 0. That needs to be fixed if (hhea.data[10] == 0 && hhea.data[11] == 0) { @@ -1918,6 +1946,15 @@ var Font = (function FontClosure() { } } + // remove glyph references outside range of avaialable glyphs or empty + for (var i = ids.length - 1; i >= 0; i--) { + if (ids[i] < numGlyphs && + (!emptyGlyphIds[ids[i]] || this.isSymbolicFont)) + continue; + ids.splice(i, 1); + glyphs.splice(i, 1); + } + if (hasShortCmap && this.hasEncoding && !this.isSymbolicFont) { // Re-encode short map encoding to unicode -- that simplifies the // resolution of MacRoman encoded glyphs logic for TrueType fonts: @@ -1951,9 +1988,11 @@ var Font = (function FontClosure() { // Re-encode cmap encoding to unicode, based on the 'post' table data // diffrence array or base encoding var reverseMap = []; - for (var i = 0, ii = glyphs.length; i < ii; i++) + for (var i = 0, ii = glyphs.length; i < ii; i++) { reverseMap[glyphs[i].unicode] = i; + } + var backtrackReplacements = []; for (var i = 0, ii = glyphs.length; i < ii; i++) { var code = glyphs[i].unicode; var changeCode = false; @@ -1966,13 +2005,36 @@ var Font = (function FontClosure() { } if (glyphName in GlyphsUnicode) { var unicode = GlyphsUnicode[glyphName]; - if (!unicode || (unicode in reverseMap)) - continue; // unknown glyph name or its place is taken + if (!unicode || reverseMap[unicode] === i) + continue; // unknown glyph name or in its own place + + if (unicode in reverseMap) { + backtrackReplacements[unicode] = { + index: i, + code: code, + changeCode: changeCode + }; + continue; // its place is taken + } - glyphs[i].unicode = unicode; - reverseMap[unicode] = i; - if (changeCode) - toFontChar[code] = unicode; + var index = i; + while (true) { + glyphs[index].unicode = unicode; + reverseMap[unicode] = index; + if (changeCode) + toFontChar[code] = unicode; + + // checking if available place can be used by other glyph + var backtrack = backtrackReplacements[code]; + if (!backtrack) + break; + + delete backtrackReplacements[code]; + index = backtrack.index; + code = backtrack.code; + changeCode = backtrack.changeCode; + unicode = code; + } } this.useToFontChar = true; } @@ -1988,12 +2050,6 @@ var Font = (function FontClosure() { this.useToFontChar = true; } - // remove glyph references outside range of avaialable glyphs - for (var i = 0, ii = ids.length; i < ii; i++) { - if (ids[i] >= numGlyphs) - ids[i] = 0; - } - createGlyphNameMap(glyphs, ids, properties); this.glyphNameMap = properties.glyphNameMap; From a66b1a7ad3a271daabd6d25ed6e420c476905fe0 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 17 Mar 2012 23:05:22 -0500 Subject: [PATCH 2/6] Fix unicode re-assignment; MacRomanEncoding detection --- src/fonts.js | 56 ++++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/src/fonts.js b/src/fonts.js index e3178ae9d..0c26ec884 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -174,7 +174,6 @@ var Encodings = { '', '', 'Lslash', 'Oslash', 'OE', 'ordmasculine', '', '', '', '', '', 'ae', '', '', '', 'dotlessi', '', '', 'lslash', 'oslash', 'oe', 'germandbls'], WinAnsiEncoding: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', - '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent', 'ampersand', 'quotesingle', 'parenleft', 'parenright', 'asterisk', 'plus', @@ -1947,12 +1946,22 @@ var Font = (function FontClosure() { } // remove glyph references outside range of avaialable glyphs or empty + var glyphsRemoved = 0; for (var i = ids.length - 1; i >= 0; i--) { if (ids[i] < numGlyphs && (!emptyGlyphIds[ids[i]] || this.isSymbolicFont)) continue; ids.splice(i, 1); glyphs.splice(i, 1); + glyphsRemoved++; + } + + // heuristics: if removed more than 2 glyphs encoding WinAnsiEncoding + // does not set properly + if (glyphsRemoved > 2) { + warn('Switching TrueType encoding to MacRomanEncoding for ' + + this.name + ' font'); + encoding = Encodings.MacRomanEncoding; } if (hasShortCmap && this.hasEncoding && !this.isSymbolicFont) { @@ -1992,7 +2001,7 @@ var Font = (function FontClosure() { reverseMap[glyphs[i].unicode] = i; } - var backtrackReplacements = []; + var newGlyphUnicodes = []; for (var i = 0, ii = glyphs.length; i < ii; i++) { var code = glyphs[i].unicode; var changeCode = false; @@ -2008,36 +2017,23 @@ var Font = (function FontClosure() { if (!unicode || reverseMap[unicode] === i) continue; // unknown glyph name or in its own place - if (unicode in reverseMap) { - backtrackReplacements[unicode] = { - index: i, - code: code, - changeCode: changeCode - }; - continue; // its place is taken - } - - var index = i; - while (true) { - glyphs[index].unicode = unicode; - reverseMap[unicode] = index; - if (changeCode) - toFontChar[code] = unicode; - - // checking if available place can be used by other glyph - var backtrack = backtrackReplacements[code]; - if (!backtrack) - break; - - delete backtrackReplacements[code]; - index = backtrack.index; - code = backtrack.code; - changeCode = backtrack.changeCode; - unicode = code; - } + newGlyphUnicodes[i] = unicode; + if (changeCode) + toFontChar[code] = unicode; + delete reverseMap[code]; } - this.useToFontChar = true; } + for (var index in newGlyphUnicodes) { + var unicode = newGlyphUnicodes[index]; + if (reverseMap[unicode]) { + // avoiding assigning to the same unicode + glyphs[index].unicode = unusedUnicode++; + continue; + } + glyphs[index].unicode = unicode; + reverseMap[unicode] = index; + } + this.useToFontChar = true; } // Moving all symbolic font glyphs into 0xF000 - 0xF0FF range. From b2b78cfeb2a07e6301d6f3238954b77c07bc7322 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 17 Mar 2012 23:13:54 -0500 Subject: [PATCH 3/6] Add tests from #1309 and #1317 --- test/test_manifest.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/test_manifest.json b/test/test_manifest.json index 16d924151..9cfe673f2 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -515,5 +515,19 @@ "pageLimit": 2, "link": true, "type": "eq" + }, + { "id": "issue1309", + "file": "pdfs/issue1309.pdf", + "md5": "e835fb7f3dab3073ad37d0bd3c6399fa", + "rounds": 1, + "link": true, + "type": "eq" + }, + { "id": "issue1317", + "file": "pdfs/issue1317.pdf", + "md5": "6fb46275b30c48c8985617d4f86199e3", + "rounds": 1, + "link": true, + "type": "eq" } ] From 73c9f8797b7c35f62f7d29f3cc77bfbd4153be46 Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Sat, 17 Mar 2012 23:22:42 -0500 Subject: [PATCH 4/6] Add missing .link files; minor fix --- src/fonts.js | 3 +-- test/pdfs/issue1309.pdf.link | 1 + test/pdfs/issue1317.pdf.link | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) create mode 100644 test/pdfs/issue1309.pdf.link create mode 100644 test/pdfs/issue1317.pdf.link diff --git a/src/fonts.js b/src/fonts.js index 0c26ec884..a13c948ab 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -1997,9 +1997,8 @@ var Font = (function FontClosure() { // Re-encode cmap encoding to unicode, based on the 'post' table data // diffrence array or base encoding var reverseMap = []; - for (var i = 0, ii = glyphs.length; i < ii; i++) { + for (var i = 0, ii = glyphs.length; i < ii; i++) reverseMap[glyphs[i].unicode] = i; - } var newGlyphUnicodes = []; for (var i = 0, ii = glyphs.length; i < ii; i++) { diff --git a/test/pdfs/issue1309.pdf.link b/test/pdfs/issue1309.pdf.link new file mode 100644 index 000000000..1351d45ef --- /dev/null +++ b/test/pdfs/issue1309.pdf.link @@ -0,0 +1 @@ +http://www.lufthansa.com/mediapool/pdf/31/media_907231.pdf diff --git a/test/pdfs/issue1317.pdf.link b/test/pdfs/issue1317.pdf.link new file mode 100644 index 000000000..67c4d50ef --- /dev/null +++ b/test/pdfs/issue1317.pdf.link @@ -0,0 +1 @@ +http://iliad.fr/presse/2012/CP_080312_Free_mobile.pdf From 8e060bdbca7fab0d7f65d4a247b63d24a825fd58 Mon Sep 17 00:00:00 2001 From: Kalervo Kujala Date: Thu, 22 Mar 2012 15:15:27 +0200 Subject: [PATCH 5/6] Use [] instead of new Array(...). --- src/bidi.js | 12 ++++-------- src/colorspace.js | 2 +- src/fonts.js | 4 ++-- src/function.js | 4 ++-- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/bidi.js b/src/bidi.js index aab477dbc..5f18e5303 100644 --- a/src/bidi.js +++ b/src/bidi.js @@ -132,9 +132,9 @@ var bidi = PDFJS.bidi = (function bidiClosure() { // get types, fill arrays - var chars = new Array(strLength); - var types = new Array(strLength); - var oldtypes = new Array(strLength); + var chars = []; + var types = []; + var oldtypes = []; var numBidi = 0; for (var i = 0; i < strLength; ++i) { @@ -176,16 +176,12 @@ var bidi = PDFJS.bidi = (function bidiClosure() { } } - var levels = new Array(strLength); + var levels = []; for (var i = 0; i < strLength; ++i) { levels[i] = startLevel; } - var diffChars = new Array(strLength); - var diffLevels = new Array(strLength); - var diffTypes = new Array(strLength); - /* X1-X10: skip most of this, since we are NOT doing the embeddings. */ diff --git a/src/colorspace.js b/src/colorspace.js index d3d392361..e1df7c725 100644 --- a/src/colorspace.js +++ b/src/colorspace.js @@ -220,7 +220,7 @@ var AlternateCS = (function AlternateCSClosure() { var baseNumComps = base.numComps; var baseBuf = new Uint8Array(baseNumComps * length); var numComps = this.numComps; - var scaled = new Array(numComps); + var scaled = []; for (var i = 0; i < length; i += numComps) { for (var z = 0; z < numComps; ++z) diff --git a/src/fonts.js b/src/fonts.js index 8473196a0..43efea34d 100644 --- a/src/fonts.js +++ b/src/fonts.js @@ -3652,7 +3652,7 @@ var CFFParser = (function CFFParserClosure() { var name = index.get(i); // OTS doesn't allow names to be over 127 characters. var length = Math.min(name.length, 127); - var data = new Array(length); + var data = []; // OTS also only permits certain characters in the name. for (var j = 0; j < length; ++j) { var c = name[j]; @@ -4502,7 +4502,7 @@ var CFFCompiler = (function CFFCompilerClosure() { return this.compileTypedArray(fdSelect); }, compileTypedArray: function compileTypedArray(data) { - var out = new Array(data.length); + var out = []; for (var i = 0, ii = data.length; i < ii; ++i) out[i] = data[i]; return out; diff --git a/src/function.js b/src/function.js index 4f81158f0..5ff5840c5 100644 --- a/src/function.js +++ b/src/function.js @@ -81,7 +81,7 @@ var PDFFunction = (function PDFFunctionClosure() { function toMultiArray(arr) { var inputLength = arr.length; var outputLength = arr.length / 2; - var out = new Array(outputLength); + var out = []; var index = 0; for (var i = 0; i < inputLength; i += 2) { out[index] = [arr[i], arr[i + 1]]; @@ -364,7 +364,7 @@ var PDFFunction = (function PDFFunctionClosure() { return cache.get(key); var stack = evaluator.execute(initialStack); - var transformed = new Array(numOutputs); + var transformed = []; for (i = numOutputs - 1; i >= 0; --i) { var out = stack.pop(); var rangeIndex = 2 * i; From be8daec13f1faa2c3d7ad44e43f7bf0346e1ca8a Mon Sep 17 00:00:00 2001 From: gigaherz Date: Sat, 24 Mar 2012 16:22:55 +0100 Subject: [PATCH 6/6] Adding myself to the contributor list. --- LICENSE | 1 + 1 file changed, 1 insertion(+) diff --git a/LICENSE b/LICENSE index a3e99545a..f01ded412 100644 --- a/LICENSE +++ b/LICENSE @@ -12,6 +12,7 @@ Jakob Miland Artur Adib Brendan Dahl + David Quintana Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"),