Browse Source

Fix some bugs and add the beginning of a Type2 reader

Vivien Nicolas 14 years ago
parent
commit
c098f0b31f
  1. 279
      PDFFont.js
  2. 552
      cffStandardStrings.js
  3. 5
      pdf.js
  4. 1
      test.html

279
PDFFont.js

@ -17,6 +17,8 @@ var Base64Encoder = {
}; };
var TrueTypeFont = function(aFontName, aFontFile) { var TrueTypeFont = function(aFontName, aFontFile) {
if (Fonts.get(aFontName)) if (Fonts.get(aFontName))
return; return;
@ -30,6 +32,7 @@ var TrueTypeFont = function(aFontName, aFontFile) {
document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0); document.styleSheets[0].insertRule("@font-face { font-family: '" + aFontName + "'; src: " + url + " }", 0);
}; };
var Type1Parser = function(aAsciiStream, aBinaryStream) { var Type1Parser = function(aAsciiStream, aBinaryStream) {
var lexer = new Lexer(aAsciiStream); var lexer = new Lexer(aAsciiStream);
@ -211,6 +214,8 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
}, },
pop: function() { pop: function() {
if (!this.length)
throw new Error("stackunderflow");
return this.__innerStack__.pop(); return this.__innerStack__.pop();
}, },
@ -220,10 +225,10 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
return this.__innerStack__[this.__innerStack__.length - 1]; return this.__innerStack__[this.__innerStack__.length - 1];
}, },
toString: function() { dump: function() {
log("=== Start Dumping operandStack ==="); log("=== Start Dumping operandStack ===");
var str = []; var str = [];
for (var i = 0; i < this.__innerStack__.length; i++) for (var i = 0; i < this.length; i++)
log(this.__innerStack__[i]); log(this.__innerStack__[i]);
log("=== End Dumping operandStack ==="); log("=== End Dumping operandStack ===");
}, },
@ -257,7 +262,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
}, },
pop: function() { pop: function() {
if (this.__innerStack__.length == 2) if (this.__innerStack__.length == 3)
return null; return null;
return this.__innerStack__.pop(); return this.__innerStack__.pop();
@ -275,7 +280,15 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
get length() { get length() {
return this.__innerStack__.length; return this.__innerStack__.length;
} },
dump: function() {
log("=== Start Dumping dictionaryStack ===");
var str = [];
for (var i = 0; i < this.length; i++)
log(this.__innerStack__[i]);
log("=== End Dumping dictionaryStack ===");
},
}; };
/* /*
@ -433,10 +446,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var data = operandStack.pop(); var data = operandStack.pop();
var indexOrKey = operandStack.pop(); var indexOrKey = operandStack.pop();
var object = operandStack.pop(); var object = operandStack.pop();
//dump("put " + data + " in " + object + "[" + indexOrKey + "]"); dump("put " + data + " in " + object + "[" + indexOrKey + "]");
object.set ? object.set(indexOrKey, data) object.set ? object.set(indexOrKey, data)
: object[indexOrKey] = data; : object[indexOrKey] = data;
break; break;
case "pop": case "pop":
@ -454,7 +466,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var indexOrKey = operandStack.pop(); var indexOrKey = operandStack.pop();
var object = operandStack.pop(); var object = operandStack.pop();
var data = object.get ? object.get(indexOrKey) : object[indexOrKey]; var data = object.get ? object.get(indexOrKey) : object[indexOrKey];
dump("get " + obj + "[" + indexOrKey + "]: " + data); dump("get " + object + "[" + indexOrKey + "]: " + data);
operandStack.push(data); operandStack.push(data);
break; break;
@ -501,6 +513,8 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var value = operandStack.pop(); var value = operandStack.pop();
var key = operandStack.pop(); var key = operandStack.pop();
// XXX we don't want to do that here but for some reasons the names
// are different between what is declared and the FontName directive
if (key == "FontName" && Fonts.get(value)) { if (key == "FontName" && Fonts.get(value)) {
// The font has already be decoded, stop! // The font has already be decoded, stop!
return true; return true;
@ -515,6 +529,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var key = operandStack.pop(); var key = operandStack.pop();
dump("definefont " + font + " with key: " + key); dump("definefont " + font + " with key: " + key);
Fonts.set(key, font); Fonts.set(key, font);
operandStack.push(font);
break; break;
case "known": case "known":
@ -532,7 +547,9 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
case "eexec": case "eexec":
// All the first segment data has been read, decrypt the second segment // All the first segment data has been read, decrypt the second segment
// and start interpreting it in order to decode it // and start interpreting it in order to decode it
var file = operandStack.pop();
var eexecString = decrypt(aBinaryStream, kEexecEncryptionKey, 4).join(""); var eexecString = decrypt(aBinaryStream, kEexecEncryptionKey, 4).join("");
dump(eexecString);
lexer = new Lexer(new StringStream(eexecString)); lexer = new Lexer(new StringStream(eexecString));
break; break;
@ -553,7 +570,7 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
var newOperand = operandStack.peek(); var newOperand = operandStack.peek();
for (var i = 0; i < operands.length; i++) while (operands.length)
operandStack.push(operands.pop()); operandStack.push(operands.pop());
operandStack.push(newOperand); operandStack.push(newOperand);
@ -620,11 +637,14 @@ var Type1Parser = function(aAsciiStream, aBinaryStream) {
}; };
var type1hack = false;
var Type1Font = function(aFontName, aFontFile) { var Type1Font = function(aFontName, aFontFile) {
// All Type1 font program should begin with the comment %! // All Type1 font program should begin with the comment %!
if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21) if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21)
error("Invalid file header"); error("Invalid file header");
if (!type1hack) {
type1hack= true;
var start = Date.now(); var start = Date.now();
var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict); var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict);
@ -635,5 +655,248 @@ var Type1Font = function(aFontName, aFontFile) {
var end = Date.now(); var end = Date.now();
//log("Time to parse font is:" + (end - start)); //log("Time to parse font is:" + (end - start));
this.convert();
}
}; };
var hack = false;
Type1Font.prototype = {
convert: function() {
var fontName = "TACTGM+NimbusRomNo9L-Medi";
var fontData = null;
for (var font in Fonts.map) {
if (font == fontName) {
fontData = Fonts.get(font);
break;
}
}
if (!fontData || hack)
return;
hack = true;
var t1Only = [
"callothersubr",
"closepath",
"dotsection",
"hsbw",
"hstem3",
"pop",
"sbw",
"seac",
"setcurrentpoint",
"vstem3"
];
/*
* The sequence and form of a Type 2 charstring program may be
* represented as:
* w? {hs* vs* cm* hm* mt subpath}? {mt subpath}* endchar
*
*/
var t2CharStrings = new Dict();
var t1CharStrings = fontData.get("CharStrings");
for (var key in t1CharStrings.map) {
var font = t1CharStrings.get(key);
var t2font = [];
for (var i = 0; i < font.length; i++) {
var token = font[i];
switch (token) {
case "hsbw":
var width = t2font.pop();
var leftSidebearingPoint = t2font.pop();
font.push(width);
break;
default:
if (t1Only.indexOf(token) != -1) {
log(token + " need convert!\n");
throw new Error("Type1 Only token");
}
t2font.push(token);
break;
}
}
log(key + "::" + t1CharStrings.get(key));
log("type2::" + t2font);
}
}
};
function decodeType2DictData(aString, aDictionary) {
var data = [];
var value = "";
var count = aString.length;
for (var i = 0; i < count; i) {
value = aString[i++];
if (value < 0) {
continue;
} else if (value == 28) {
value = aString[i++] << 8 | aString[i++];
} else if (value == 29) {
value = aString[i++] << 24 |
aString[i++] << 16 |
aString[i++] << 8 |
aString[i++];
} else if (value < 32) {
if (value == 12) {
value = aDictionary["12"][aString[i++]];
} else {
value = aDictionary[value];
}
} else if (value <= 246) {
value = parseInt(value) - 139;
} else if (value <= 250) {
value = ((value - 247) * 256) + parseInt(aString[i++]) + 108;
} else if (value <= 254) {
value = -((value - 251) * 256) - parseInt(aString[i++]) - 108;
} else {
throw new Error("Value should not be 255");
}
data.push(value);
}
return data;
}
var Type2Parser = function(aFilePath) {
var font = new Dict();
// Turn on this flag for additional debugging logs
var debug = true;
function dump(aStr) {
if (debug)
log(aStr);
};
function readIndex(aStream, aIsByte) {
var count = aStream.getByte() + aStream.getByte();
var offsize = aStream.getByte();
var offsets = [];
for (var i = 0; i < count + 1; i++) {
var offset = 0;
for (var j = 0; j < offsize; j++) {
// XXX need to do some better code here
var byte = aStream.getByte();
offset += byte;
}
offsets.push(offset);
}
dump("Found " + count + " objects at offsets :" + offsets + " (offsize: " + offsize + ")");
var dataOffset = aStream.pos;
var objects = [];
for (var i = 0; i < count; i++) {
var offset = offsets[i];
aStream.pos = dataOffset + offset - 1;
var data = [];
var length = offsets[i + 1] - 1;
for (var j = offset - 1; j < length; j++)
data.push(aIsByte ? aStream.getByte() : aStream.getChar());
dump("object at offset " + offset + " is: " + data);
objects.push(data);
}
return objects;
};
function parseAsToken(aArray) {
var objects = [];
var count = aArray.length;
for (var i = 0; i < count; i++) {
var decoded = decodeType2DictData(aArray[i], CFFDictOps);
var stack = [];
var count = decoded.length;
for (var i = 0; i < count; i++) {
var token = decoded[i];
if (IsNum(token)) {
stack.push(token);
} else {
switch (token.operand) {
case "SID":
font.set(token.name, CFFStrings[stack.pop()]);
break;
case "number number":
font.set(token.name, {
size: stack.pop(),
offset: stack.pop()
});
break;
case "boolean":
font.set(token.name, stack.pop());
break;
case "delta":
font.set(token.name, stack.pop());
break;
default:
if (token.operand && token.operand.length) {
var array = [];
for (var j = 0; j < token.operand.length; j++)
array.push(stack.pop());
font.set(token.name, array);
} else {
font.set(token.name, stack.pop());
}
break;
}
}
}
}
return objects;
};
this.parse = function(aStream) {
font.set("major", aStream.getByte());
font.set("minor", aStream.getByte());
font.set("hdrSize", aStream.getByte());
font.set("offsize", aStream.getByte());
// Move the cursor after the header
aStream.skip(font.get("hdrSize") - aStream.pos);
// Read the NAME Index
dump("Reading Index: Names");
font.set("Names", readIndex(aStream));
dump(font.get("Names"));
// Read the Top Dict Index
dump("Reading Index: TopDict");
var topDict = readIndex(aStream, true);
// Read the String Index
dump("Reading Index: Strings");
var strings = readIndex(aStream);
// Fill up the Strings dictionary with the new unique strings
for (var i = 0; i < strings.length; i++)
CFFStrings.push(strings[i].join(""));
// Parse the TopDict operator
parseAsToken(topDict);
for (var p in font.map) {
log(p + "::" + font.get(p));
}
}
};
//
var xhr = new XMLHttpRequest();
xhr.open("GET", "titi.cff", false);
xhr.mozResponseType = xhr.responseType = "arraybuffer";
xhr.expected = (document.URL.indexOf("file:") == 0) ? 0 : 200;
xhr.send(null);
var cffData = xhr.mozResponseArrayBuffer || xhr.mozResponse ||
xhr.responseArrayBuffer || xhr.response;
var cff = new Type2Parser("titi.cff");
cff.parse(new Stream(cffData));

552
cffStandardStrings.js

@ -0,0 +1,552 @@
var CFFStrings = [
".notdef",
"space",
"exclam",
"quotedbl",
"numbersign",
"dollar",
"percent",
"ampersand",
"quoteright",
"parenleft",
"parenright",
"asterisk",
"plus",
"comma",
"hyphen",
"period",
"slash",
"zero",
"one",
"two",
"three",
"four",
"five",
"six",
"seven",
"eight",
"nine",
"colon",
"semicolon",
"less",
"equal",
"greater",
"question",
"at",
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"J",
"K",
"L",
"M",
"N",
"O",
"P",
"Q",
"R",
"S",
"T",
"U",
"V",
"W",
"X",
"Y",
"Z",
"bracketleft",
"backslash",
"bracketright",
"asciicircum",
"underscore",
"quoteleft",
"95 asciitilde",
"b",
"c",
"d",
"e",
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"q",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
"braceleft",
"bar",
"braceright",
"asciitilde",
"exclamdown",
"cent",
"sterling",
"fraction",
"yen",
"florin",
"section",
"currency",
"quotesingle",
"quotedblleft",
"guillemotleft",
"guilsinglleft",
"guilsinglright",
"fi",
"fl",
"endash",
"dagger",
"daggerdbl",
"periodcentered",
"paragraph",
"bullet",
"quotesinglbase",
"quotedblbase",
"quotedblright",
"guillemotright",
"ellipsis",
"perthousand",
"questiondown",
"grave",
"acute",
"circumflex",
"tilde",
"macron",
"breve",
"dotaccent",
"dieresis",
"ring",
"cedilla",
"hungarumlaut",
"ogonek",
"caron",
"emdash",
"AE",
"ordfeminine",
"Lslash",
"Oslash",
"OE",
"ordmasculine",
"ae",
"dotlessi",
"lslash",
"oslash",
"oe",
"germandbls",
"onesuperior",
"logicalnot",
"mu",
"trademark",
"Eth",
"onehalf",
"plusminus",
"Thorn",
"onequarter",
"divide",
"brokenbar",
"degree",
"thorn",
"threequarters",
"twosuperior",
"registered",
"minus",
"eth",
"multiply",
"threesuperior",
"copyright",
"Aacute",
"Acircumflex",
"Adieresis",
"Agrave",
"Aring",
"Atilde",
"Ccedilla",
"Eacute",
"Ecircumflex",
"Edieresis",
"Egrave",
"Iacute",
"Icircumflex",
"Idieresis",
"Igrave",
"Ntilde",
"Oacute",
"Ocircumflex",
"Odieresis",
"Ograve",
"Otilde",
"Scaron",
"Uacute",
"Ucircumflex",
"Udieresis",
"Ugrave",
"Yacute",
"Ydieresis",
"Zcaron",
"aacute",
"acircumflex",
"adieresis",
"agrave",
"aring",
"atilde",
"ccedilla",
"eacute",
"ecircumflex",
"edieresis",
"egrave",
"iacute",
"icircumflex",
"idieresis",
"igrave",
"ntilde",
"oacute",
"ocircumflex",
"odieresis",
"ograve",
"otilde",
"scaron",
"uacute",
"ucircumflex",
"udieresis",
"ugrave",
"yacute",
"ydieresis",
"zcaron",
"exclamsmall",
"Hungarumlautsmall",
"dollaroldstyle",
"dollarsuperior",
"ampersandsmall",
"Acutesmall",
"parenleftsuperior",
"parenrightsuperior",
"266 ff",
"onedotenleader",
"zerooldstyle",
"oneoldstyle",
"twooldstyle",
"threeoldstyle",
"fouroldstyle",
"fiveoldstyle",
"sixoldstyle",
"sevenoldstyle",
"eightoldstyle",
"nineoldstyle",
"commasuperior",
"threequartersemdash",
"periodsuperior",
"questionsmall",
"asuperior",
"bsuperior",
"centsuperior",
"dsuperior",
"esuperior",
"isuperior",
"lsuperior",
"msuperior",
"nsuperior",
"osuperior",
"rsuperior",
"ssuperior",
"tsuperior",
"ff",
"ffi",
"ffl",
"parenleftinferior",
"parenrightinferior",
"Circumflexsmall",
"hyphensuperior",
"Gravesmall",
"Asmall",
"Bsmall",
"Csmall",
"Dsmall",
"Esmall",
"Fsmall",
"Gsmall",
"Hsmall",
"Ismall",
"Jsmall",
"Ksmall",
"Lsmall",
"Msmall",
"Nsmall",
"Osmall",
"Psmall",
"Qsmall",
"Rsmall",
"Ssmall",
"Tsmall",
"Usmall",
"Vsmall",
"Wsmall",
"Xsmall",
"Ysmall",
"Zsmall",
"colonmonetary",
"onefitted",
"rupiah",
"Tildesmall",
"exclamdownsmall",
"centoldstyle",
"Lslashsmall",
"Scaronsmall",
"Zcaronsmall",
"Dieresissmall",
"Brevesmall",
"Caronsmall",
"Dotaccentsmall",
"Macronsmall",
"figuredash",
"hypheninferior",
"Ogoneksmall",
"Ringsmall",
"Cedillasmall",
"questiondownsmall",
"oneeighth",
"threeeighths",
"fiveeighths",
"seveneighths",
"onethird",
"twothirds",
"zerosuperior",
"foursuperior",
"fivesuperior",
"sixsuperior",
"sevensuperior",
"eightsuperior",
"ninesuperior",
"zeroinferior",
"oneinferior",
"twoinferior",
"threeinferior",
"fourinferior",
"fiveinferior",
"sixinferior",
"seveninferior",
"eightinferior",
"nineinferior",
"centinferior",
"dollarinferior",
"periodinferior",
"commainferior",
"Agravesmall",
"Aacutesmall",
"Acircumflexsmall",
"Atildesmall",
"Adieresissmall",
"Aringsmall",
"AEsmall",
"Ccedillasmall",
"Egravesmall",
"Eacutesmall",
"Ecircumflexsmall",
"Edieresissmall",
"Igravesmall",
"Iacutesmall",
"Icircumflexsmall",
"Idieresissmall",
"Ethsmall",
"Ntildesmall",
"Ogravesmall",
"Oacutesmall",
"Ocircumflexsmall",
"Otildesmall",
"Odieresissmall",
"OEsmall",
"Oslashsmall",
"Ugravesmall",
"Uacutesmall",
"Ucircumflexsmall",
"Udieresissmall",
"Yacutesmall",
"Thornsmall",
"Ydieresissmall",
"001.000",
"001.001",
"001.002",
"001.003",
"Black",
"Bold",
"Book",
"Light",
"Medium",
"Regular",
"Roman",
"Semibold"
];
var CFFDictOps = {
"0": {
name: "version",
operand: "SID"
},
"1": {
name: "Notice",
operand: "SID"
},
"2": {
name: "FullName",
operand: "SID"
},
"3": {
name: "FamilyName",
operand: "SID"
},
"4": {
name: "Weight",
operand: "SID"
},
"5": {
name: "FontBBox",
operand: [0, 0, 0, 0]
},
"6": {
name: "BlueValues"
},
"7": {
name: "OtherBlues"
},
"8": {
name: "FamilyBlues"
},
"9": {
name: "FamilyOtherBlues"
},
"10": {
name: "StdHW"
},
"11": {
name: "StdVW"
},
"12": {
"0": {
name: "Copyright",
operand: "SID"
},
"1": {
name: "IsFixedPitch",
operand: false
},
"2": {
name: "ItalicAngle",
operand: 0
},
"3": {
name: "UnderlinePosition",
operand: -100
},
"4": {
name: "UnderlineThickness",
operand: 50
},
"5": {
name: "PaintType",
operand: 0
},
"6": {
name: "CharstringType",
operand: 2
},
"7": {
name: "FontMatrix",
operand: [0.001, 0, 0, 0.001, 0 ,0]
},
"8": {
name: "StrokeWidth",
operand: 0
},
"9": {
name: "BlueScale"
},
"10": {
name: "BlueShift"
},
"11": {
name: "BlueFuzz"
},
"12": {
name: "StemSnapH"
},
"13": {
name: "StemSnapV"
},
"14": {
name: "ForceBold"
},
"17": {
name: "LanguageGroup"
},
"18": {
name: "ExpansionFactor"
},
"9": {
name: "initialRandomSeed"
},
"20": {
name: "SyntheticBase",
operand: null
},
"21": {
name: "PostScript",
operand: "SID"
},
"22": {
name: "BaseFontName",
operand: "SID"
},
"23": {
name: "BaseFontBlend",
operand: "delta"
}
},
"13": {
name: "UniqueID",
operand: null
},
"14": {
name: "XUID",
operand: []
},
"15": {
name: "charset",
operand: 0
},
"16": {
name: "Encoding",
operand: 0
},
"17": {
name: "CharStrings",
operand: null
},
"18": {
name: "Private",
operand: "number number"
},
"19": {
name: "Subrs"
},
"20": {
name: "defaultWidthX"
},
"21": {
name: "nominalWidthX"
}
};

5
pdf.js

@ -5,6 +5,7 @@ var ERRORS = 0, WARNINGS = 1, TODOS = 5;
var verbosity = WARNINGS; var verbosity = WARNINGS;
function log(msg) { function log(msg) {
msg = msg.toString ? msg.toString() : msg;
if (console && console.log) if (console && console.log)
console.log(msg); console.log(msg);
else if (print) else if (print)
@ -78,7 +79,7 @@ var Stream = (function() {
return ch; return ch;
}, },
skip: function(n) { skip: function(n) {
if (!n) if (!n && !IsNum(n))
n = 1; n = 1;
this.pos += n; this.pos += n;
}, },
@ -2279,6 +2280,7 @@ var CanvasGraphics = (function() {
var subtype = font.get("Subtype").name; var subtype = font.get("Subtype").name;
switch (subtype) { switch (subtype) {
case "Type1": case "Type1":
break;
var fontDescriptor = font.get("FontDescriptor"); var fontDescriptor = font.get("FontDescriptor");
if (fontDescriptor.num) { if (fontDescriptor.num) {
var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); var fontDescriptor = this.xref.fetchIfRef(fontDescriptor);
@ -2292,6 +2294,7 @@ var CanvasGraphics = (function() {
break; break;
case "TrueType": case "TrueType":
break;
var fontDescriptor = font.get("FontDescriptor"); var fontDescriptor = font.get("FontDescriptor");
if (fontDescriptor.num) { if (fontDescriptor.num) {
var fontDescriptor = this.xref.fetchIfRef(fontDescriptor); var fontDescriptor = this.xref.fetchIfRef(fontDescriptor);

1
test.html

@ -5,6 +5,7 @@
<script type="text/javascript" src="pdf.js"></script> <script type="text/javascript" src="pdf.js"></script>
<script type="text/javascript" src="test.js"></script> <script type="text/javascript" src="test.js"></script>
<script type="text/javascript" src="cffStandardStrings.js"></script>
<script type="text/javascript" src="PDFFont.js"></script> <script type="text/javascript" src="PDFFont.js"></script>
</head> </head>

Loading…
Cancel
Save