From 6b6e97dff6c6dc6f85bed0de3c3861630628e9f5 Mon Sep 17 00:00:00 2001 From: Adil Allawi <adil@diwan.com> Date: Tue, 12 Jul 2011 03:58:09 +0100 Subject: [PATCH] cleanup, added new flag to the font properties called compositeFont. If true, the encoding and ShowText strings must be multi-byte --- fonts.js | 21 +++++++++------------ pdf.js | 53 +++++++++++++++++++++++++++-------------------------- 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/fonts.js b/fonts.js index 2728afd89..3e6756624 100755 --- a/fonts.js +++ b/fonts.js @@ -389,6 +389,7 @@ var Font = (function() { var data; switch (properties.type) { case 'Type1': + case 'CIDFontType0': var cff = new CFF(name, file, properties); this.mimetype = 'font/opentype'; @@ -397,15 +398,7 @@ var Font = (function() { break; case 'TrueType': - this.mimetype = 'font/opentype'; - - // Repair the TrueType file if it is can be damaged in the point of - // view of the sanitizer - data = this.checkAndRepair(name, file, properties); - break; - - case 'Type0': - //this is a Truetype font + case 'CIDFontType2': this.mimetype = 'font/opentype'; // Repair the TrueType file if it is can be damaged in the point of @@ -421,6 +414,7 @@ var Font = (function() { this.type = properties.type; //use the type to test if the string is single or multi-byte this.id = Fonts.registerFont(name, data, properties); this.loadedName = 'pdfFont' + this.id; + this.compositeFont = properties.compositeFont; }; function stringToArray(str) { @@ -1150,8 +1144,11 @@ var Font = (function() { if (!charsCache) charsCache = this.charsCache = Object.create(null); - if (this.type == "Type0") { - //string needs to be converted from byte to multi-byte assume for now two-byte + if (this.compositeFont) { + // composite fonts have multi-byte strings + // convert the string from single-byte to multi-byte + // XXX assuming CIDFonts are two-byte - later need to extract the correct byte encoding + // according to the PDF spec str = ''; var multiByteStr = ""; var length = chars.length; @@ -1162,7 +1159,7 @@ var Font = (function() { byte2 = 0; else byte2 = chars.charCodeAt(i) & 0xFF; - multiByteStr += String.fromCharCode((byte1<<8) | byte2); + multiByteStr += String.fromCharCode((byte1 << 8) | byte2); } str = multiByteStr; } diff --git a/pdf.js b/pdf.js index 888addd1b..2df2f3f14 100644 --- a/pdf.js +++ b/pdf.js @@ -64,14 +64,6 @@ function stringToBytes(str) { return bytes; } -function singleByteToMultiByteString (str) { - var multiByteStr = ""; - var bytes = stringToBytes(e); - for (var j = 0; j<bytes.length; j++) { - multiByteStr += String.fromCharCode((bytes[j++]<<16) | bytes[j]); - } - return multiByteStr; -} var Stream = (function() { function constructor(arrayBuffer, start, length, dict) { this.bytes = new Uint8Array(arrayBuffer); @@ -3635,16 +3627,22 @@ var PartialEvaluator = (function() { var fd; var descendant = []; var subType = fontDict.get('Subtype'); + var compositeFont = false; assertWellFormed(IsName(subType), 'invalid font Subtype'); - //If font is a composite get the FontDescriptor from the descendant font - if (subType.name == "Type0") + //If font is a composite + // - get the descendant font + // - set the type according to the descendant font + // - get the FontDescriptor from the descendant font + if (subType.name == 'Type0') { - var df = fontDict.get("DescendantFonts"); + var df = fontDict.get('DescendantFonts'); if (!df) return null; + compositeFont = true; descendant = xref.fetch(df[0]); - fd = descendant.get("FontDescriptor"); + subType = descendant.get('Subtype'); + fd = descendant.get('FontDescriptor'); } else { fd = fontDict.get('FontDescriptor'); } @@ -3665,24 +3663,26 @@ var PartialEvaluator = (function() { var encodingMap = {}; var charset = []; - if (subType.name == 'Type0') { - //XXX CIDFont support - only identity CID Encoding for now + if (compositeFont) { + //Special CIDFont support + //XXX only identity CID Encodings supported for now var encoding = xref.fetchIfRef(fontDict.get('Encoding')); if (IsName(encoding)) { //Encoding is a predefined CMap if (encoding.name == 'Identity-H') { - if (descendant.get('Subtype').name == 'CIDFontType2') - { - //Extract an encoding from the CIDToGIDMap - var glyphsStream = xref.fetchIfRef(descendant.get('CIDToGIDMap')); - var glyphsData = glyphsStream.getBytes(0); - var i = 0; - for (var j=0; j<glyphsData.length; j++) { - var glyphID = (glyphsData[j++]*0x100)+glyphsData[j]; - //encodingMap[glyphID] = i++; - charset.push(glyphID); + if (subType.name == 'CIDFontType2') { + var cidToGidMap = descendant.get('CIDToGIDMap'); + if (cidToGidMap) { + //Extract the charset from the CIDToGIDMap + var glyphsStream = xref.fetchIfRef(cidToGidMap); + var glyphsData = glyphsStream.getBytes(0); + var i = 0; + //glyph ids are big-endian 2-byte values + for (var j=0; j<glyphsData.length; j++) { + var glyphID = (glyphsData[j++] << 8) | glyphsData[j]; + charset.push(glyphID); + } } - encoding[0] = 0; } } else { TODO ('Need to support predefined CMaps see PDF 32000-1:2008 9.7.5.2 Predefined CMaps') @@ -3820,7 +3820,8 @@ var PartialEvaluator = (function() { flags: descriptor.get('Flags'), italicAngle: descriptor.get('ItalicAngle'), fixedPitch: false, - textMatrix: IDENTITY_MATRIX + textMatrix: IDENTITY_MATRIX, + compositeFont: compositeFont }; return {