From d58dac0fd384a803d41b92928706446bdbaae53c Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Mon, 22 Aug 2011 22:50:17 -0500 Subject: [PATCH 1/2] Encoding for standard fonts; symbols encoding --- fonts.js | 3 -- pdf.js | 109 ++++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 79 insertions(+), 33 deletions(-) diff --git a/fonts.js b/fonts.js index 3d47ef4a2..9049255e7 100755 --- a/fonts.js +++ b/fonts.js @@ -404,9 +404,6 @@ var Font = (function Font() { (fontName.indexOf('Italic') != -1); this.loadedName = fontName.split('-')[0]; this.loading = false; - this.charsToUnicode = function(s) { - return s; - }; return; } diff --git a/pdf.js b/pdf.js index 469eec578..53d3b9853 100644 --- a/pdf.js +++ b/pdf.js @@ -3851,6 +3851,44 @@ var Encodings = { 'ucircumflex', 'udieresis', 'yacute', 'thorn', 'ydieresis' ]); }, + get symbolsEncoding() { + return shadow(this, 'symbolsEncoding', + [,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, + 'space', 'exclam', 'universal', 'numbersign', 'existential', 'percent', + 'ampersand', 'suchthat', 'parenleft', 'parenright', 'asteriskmath', + 'plus', 'comma', 'minus', 'period', 'slash', 'zero', 'one', 'two', + 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'colon', + 'semicolon', 'less', 'equal', 'greater', 'question', 'congruent', + 'Alpha', 'Beta', 'Chi', 'Delta', 'Epsilon', 'Phi', 'Gamma', 'Eta', + 'Iota', 'theta1', 'Kappa', 'Lambda', 'Mu', 'Nu', 'Omicron', 'Pi', + 'Theta', 'Rho', 'Sigma', 'Tau', 'Upsilon', 'sigma1', 'Omega', 'Xi', + 'Psi', 'Zeta', 'bracketleft', 'therefore', 'bracketright', + 'perpendicular', 'underscore', 'radicalex', 'alpha', 'beta', 'chi', + 'delta', 'epsilon', 'phi', 'gamma', 'eta', 'iota', 'phi1', 'kappa', + 'lambda', 'mu', 'nu', 'omicron', 'pi', 'theta', 'rho', 'sigma', 'tau', + 'upsilon', 'omega1', 'omega', 'xi', 'psi', 'zeta', 'braceleft', 'bar', + 'braceright', 'similar',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 'Euro', + 'Upsilon1', 'minute', 'lessequal', 'fraction', 'infinity', 'florin', + 'club', 'diamond', 'heart', 'spade', 'arrowboth', 'arrowleft', 'arrowup', + 'arrowright', 'arrowdown', 'degree', 'plusminus', 'second', + 'greaterequal', 'multiply', 'proportional', 'partialdiff', 'bullet', + 'divide', 'notequal', 'equivalence', 'approxequal', 'ellipsis', + 'arrowvertex', 'arrowhorizex', 'carriagereturn', 'aleph', 'Ifraktur', + 'Rfraktur', 'weierstrass', 'circlemultiply', 'circleplus', 'emptyset', + 'intersection', 'union', 'propersuperset', 'reflexsuperset', 'notsubset', + 'propersubset', 'reflexsubset', 'element', 'notelement', 'angle', + 'gradient', 'registerserif', 'copyrightserif', 'trademarkserif', + 'product', 'radical', 'dotmath', 'logicalnot', 'logicaland', 'logicalor', + 'arrowdblboth', 'arrowdblleft', 'arrowdblup', 'arrowdblright', + 'arrowdbldown', 'lozenge', 'angleleft', 'registersans', 'copyrightsans', + 'trademarksans', 'summation', 'parenlefttp', 'parenleftex', + 'parenleftbt', 'bracketlefttp', 'bracketleftex', 'bracketleftbt', + 'bracelefttp', 'braceleftmid', 'braceleftbt', 'braceex', ,'angleright', + 'integral', 'integraltp', 'integralex', 'integralbt', 'parenrighttp', + 'parenrightex', 'parenrightbt', 'bracketrighttp', 'bracketrightex', + 'bracketrightbt', 'bracerighttp', 'bracerightmid', 'bracerightbt' + ]); + }, get zapfDingbatsEncoding() { return shadow(this, 'zapfDingbatsEncoding', [,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, @@ -4118,24 +4156,6 @@ var PartialEvaluator = (function() { fd = fontDict.get('FontDescriptor'); } - if (!fd) { - var baseFontName = fontDict.get('BaseFont'); - if (!IsName(baseFontName)) - return null; - // Using base font name as a font name. - return { - name: baseFontName.name.replace(/[\+,\-]/g, '_'), - fontDict: fontDict, - properties: {} - }; - } - - var descriptor = xref.fetch(fd); - - var fontName = xref.fetchIfRef(descriptor.get('FontName')); - assertWellFormed(IsName(fontName), 'invalid font name'); - fontName = fontName.name.replace(/[\+,\-]/g, '_'); - var encodingMap = {}; var charset = []; if (compositeFont) { @@ -4200,14 +4220,6 @@ var PartialEvaluator = (function() { GlyphsUnicode[data.name]; } } - - // Get the font charset if any - var charset = descriptor.get('CharSet'); - if (charset) { - assertWellFormed(IsString(charset), 'invalid charset'); - charset = charset.split('/'); - charset.shift(); - } } else if (IsName(encoding)) { var encoding = Encodings[encoding.name]; if (!encoding) @@ -4217,11 +4229,10 @@ var PartialEvaluator = (function() { for (var j = 0; j < encoding.length; j++) encodingMap[index++] = GlyphsUnicode[encoding[j]]; + // firstChar and width are required + // (except for 14 standard fonts) var firstChar = xref.fetchIfRef(fontDict.get('FirstChar')); - var widths = xref.fetchIfRef(fontDict.get('Widths')); - assertWellFormed(IsArray(widths) && IsInt(firstChar), - 'invalid font Widths or FirstChar'); - + var widths = xref.fetchIfRef(fontDict.get('Widths')) || []; for (var j = 0; j < widths.length; j++) { if (widths[j]) charset.push(encoding[j + firstChar]); @@ -4296,6 +4307,36 @@ var PartialEvaluator = (function() { } } + if (!fd) { + var baseFontName = fontDict.get('BaseFont'); + if (!IsName(baseFontName)) + return null; + // Using base font name as a font name. + baseFontName = baseFontName.name.replace(/[\+,\-]/g, '_'); + if (baseFontName == 'Symbol') { + // special case for symbols + var encoding = Encodings.symbolsEncoding; + for (var i = 0, n = encoding.length, j; i < n; i++) { + if (!(j = encoding[i])) + continue; + encodingMap[i] = GlyphsUnicode[j] || 0; + } + } + return { + name: baseFontName, + fontDict: fontDict, + properties: { + encoding: encodingMap + } + }; + } + + var descriptor = xref.fetch(fd); + + var fontName = xref.fetchIfRef(descriptor.get('FontName')); + assertWellFormed(IsName(fontName), 'invalid font name'); + fontName = fontName.name.replace(/[\+,\-]/g, '_'); + var fontFile = descriptor.get('FontFile', 'FontFile2', 'FontFile3'); if (fontFile) { fontFile = xref.fetchIfRef(fontFile); @@ -4307,6 +4348,14 @@ var PartialEvaluator = (function() { } } + if (descriptor.has('CharSet')) { + // Get the font charset if any (meaningful only in Type 1) + charset = descriptor.get('CharSet'); + assertWellFormed(IsString(charset), 'invalid charset'); + charset = charset.split('/'); + charset.shift(); + } + var widths = fontDict.get('Widths'); if (widths) { var glyphWidths = {}; From b90c6945b81db9de81f0a55f25b0bd75d01f770b Mon Sep 17 00:00:00 2001 From: notmasteryet Date: Tue, 23 Aug 2011 17:06:45 -0500 Subject: [PATCH 2/2] Sanitizing base font name before comparison with 'Symbol' --- pdf.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdf.js b/pdf.js index 53d3b9853..47363401b 100644 --- a/pdf.js +++ b/pdf.js @@ -4313,7 +4313,7 @@ var PartialEvaluator = (function() { return null; // Using base font name as a font name. baseFontName = baseFontName.name.replace(/[\+,\-]/g, '_'); - if (baseFontName == 'Symbol') { + if (/^Symbol(_?(Bold|Italic))*$/.test(baseFontName)) { // special case for symbols var encoding = Encodings.symbolsEncoding; for (var i = 0, n = encoding.length, j; i < n; i++) {