You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
630 lines
20 KiB
630 lines
20 KiB
/* -*- Mode: Java; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- / |
|
/* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */ |
|
|
|
var HashMap = (function() { |
|
function constructor() { |
|
} |
|
|
|
constructor.prototype = { |
|
get: function(key) { |
|
return this["$" + key]; |
|
}, |
|
set: function(key, value) { |
|
this["$" + key] = value; |
|
}, |
|
contains: function(key) { |
|
return ("$" + key) in this; |
|
} |
|
}; |
|
|
|
return constructor; |
|
})(); |
|
|
|
var Stream = (function() { |
|
function constructor(arrayBuffer) { |
|
this.bytes = Uint8Array(arrayBuffer); |
|
this.pos = 0; |
|
} |
|
|
|
constructor.prototype = { |
|
reset: function() { |
|
this.pos = 0; |
|
}, |
|
lookChar: function() { |
|
var bytes = this.bytes; |
|
if (this.pos >= bytes.length) |
|
return EOF; |
|
return String.fromCharCode(bytes[this.pos]); |
|
}, |
|
getChar: function() { |
|
var ch = this.lookChar(); |
|
this.pos++; |
|
return ch; |
|
}, |
|
putBack: function() { |
|
this.pos--; |
|
}, |
|
skipChar: function() { |
|
this.pos++; |
|
}, |
|
moveStart: function(delta) { |
|
this.bytes = Uint8Array(arrayBuffer, delta); |
|
this.pos -= delta; |
|
} |
|
}; |
|
|
|
constructor.EOF = -1; |
|
|
|
return constructor; |
|
}); |
|
|
|
var Obj = (function() { |
|
function constructor(type, value) { |
|
this.type = type; |
|
this.value = value; |
|
} |
|
|
|
constructor.prototype = { |
|
}; |
|
|
|
var types = [ |
|
"Bool", "Int", "Real", "String", "Name", "Null", |
|
"Array", "Dict", "Stream", "Ref", |
|
"Cmd", "Error", "EOF", "None" |
|
]; |
|
|
|
for (var i = 0; i < types.length; ++i) { |
|
var typeName = types[i]; |
|
constructor[typeName] = i; |
|
constructor.prototype["is" + typeName] = |
|
(function (value) { |
|
return this.type == i && |
|
(typeof value == "undefined" || value == this.value); |
|
}); |
|
} |
|
|
|
constructor.prototype.lookup = function(key) { |
|
function lookup(key) { |
|
if (!(this.value.contains(key))) |
|
return Obj.nullObj; |
|
return this.value.get(key); |
|
} |
|
} |
|
|
|
constructor.trueObj = new constructor(constructor.Bool, true); |
|
constructor.falseObj = new constructor(constructor.Bool, false); |
|
constructor.nullObj = new constructor(constructor.Null); |
|
constructor.errorObj = new constructor(constructor.Error); |
|
|
|
return constructor; |
|
})(); |
|
|
|
var Lexer = (function() { |
|
function constructor(stream) { |
|
this.stream = stream; |
|
} |
|
|
|
// A '1' in this array means the character is white space. A '1' or |
|
// '2' means the character ends a name or command. |
|
var specialChars = [ |
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x |
|
1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx |
|
]; |
|
|
|
const MIN_INT = (1<<31) | 0; |
|
const MAX_INT = (MIN_INT - 1) | 0; |
|
const MIN_UINT = 0; |
|
const MAX_UINT = ((1<<30) * 4) - 1; |
|
|
|
function ToHexDigit(ch) { |
|
if (ch >= "0" && ch <= "9") |
|
return ch - "0"; |
|
ch = ch.toLowerCase(); |
|
if (ch >= "a" && ch <= "f") |
|
return ch - "a"; |
|
return -1; |
|
} |
|
|
|
constructor.prototype = { |
|
error: function(msg) { |
|
// TODO |
|
}, |
|
getNumber: function(ch) { |
|
var floating = false; |
|
var str = ch; |
|
var stream = this.stream; |
|
do { |
|
ch = stream.getChar(); |
|
if (ch == "." && !floating) { |
|
str += ch; |
|
floating = true; |
|
} else if (ch == "-") { |
|
// ignore minus signs in the middle of numbers to match |
|
// Adobe's behavior |
|
this.error("Badly formated number"); |
|
} else if (ch >= "0" && ch <= "9") { |
|
str += ch; |
|
} else if (ch == "e" || ch == "E") { |
|
floating = true; |
|
} else { |
|
// put back the last character, it doesn't belong to us |
|
stream.putBack(); |
|
break; |
|
} |
|
} while (true); |
|
var value = parseNumber(str); |
|
if (isNaN(value)) |
|
return Obj.errorObj; |
|
if (floating) { |
|
type = Obj.Floating; |
|
} else { |
|
if (value >= MIN_INT && value <= MAX_INT) |
|
type = Obj.Int; |
|
else if (value >= MAX_UINT && value <= MAX_UINT) |
|
type = Obj.Uint; |
|
else |
|
return Obj.errorObj; |
|
} |
|
return new Obj(type, value); |
|
}, |
|
getString: function(ch) { |
|
var n = 0; |
|
var numParent = 1; |
|
var done = false; |
|
var str = ch; |
|
var stream = this.stream; |
|
do { |
|
switch (ch = stream.getChar()) { |
|
case EOF: |
|
this.error("Unterminated string"); |
|
done = true; |
|
break; |
|
case '(': |
|
++numParen; |
|
str += ch; |
|
break; |
|
case ')': |
|
if (--numParen == 0) { |
|
done = true; |
|
} else { |
|
str += ch; |
|
} |
|
break; |
|
case '\\': |
|
switch (ch = stream.getChar()) { |
|
case 'n': |
|
str += '\n'; |
|
break; |
|
case 'r': |
|
str += '\r'; |
|
break; |
|
case 't': |
|
str += '\t'; |
|
break; |
|
case 'b': |
|
str += '\b'; |
|
break; |
|
case 'f': |
|
str += '\f'; |
|
break; |
|
case '\\': |
|
case '(': |
|
case ')': |
|
str += c; |
|
break; |
|
case '0': case '1': case '2': case '3': |
|
case '4': case '5': case '6': case '7': |
|
var x = ch - '0'; |
|
ch = stream.lookChar(); |
|
if (ch >= '0' && ch <= '7') { |
|
this.getChar(); |
|
x = (x << 3) + (x - '0'); |
|
ch = stream.lookChar(); |
|
if (ch >= '0' && ch <= '7') { |
|
stream.getChar(); |
|
x = (x << 3) + (x - '0'); |
|
} |
|
} |
|
str += String.fromCharCode(x); |
|
break; |
|
case '\r': |
|
ch = stream.lookChar(); |
|
if (ch == '\n') |
|
stream.getChar(); |
|
break; |
|
case '\n': |
|
break; |
|
case EOF: |
|
this.error("Unterminated string"); |
|
done = true; |
|
break; |
|
default: |
|
str += ch; |
|
break; |
|
} |
|
break; |
|
default: |
|
str += ch; |
|
break; |
|
} |
|
} while (!done); |
|
if (!str.length) |
|
return new Obj(Obj.EOF); |
|
return new Obj(Obj.String, str); |
|
}, |
|
getName: function(ch) { |
|
var str = ""; |
|
var stream = this.stream; |
|
while ((ch = stream.lookChar()) != EOF && !specialChars[ch.toCharCode()]) { |
|
stream.getChar(); |
|
if (ch == "#") { |
|
ch = stream.lookChar(); |
|
var x = ToHexDigit(ch); |
|
if (x != -1) { |
|
stream.getChar(); |
|
var x2 = ToHexDigit(stream.getChar()); |
|
if (x2 == -1) |
|
this.error("Illegal digit in hex char in name"); |
|
str += String.fromCharCode((x << 4) | x2); |
|
} else { |
|
str += "#"; |
|
str += ch; |
|
} |
|
} else { |
|
str += ch; |
|
} |
|
} |
|
if (str.length > 128) |
|
this.error("Warning: name token is longer than allowed by the specification"); |
|
return new Obj(Obj.Name, str); |
|
}, |
|
getHexString: function(ch) { |
|
var str = ""; |
|
var stream = this.stream; |
|
while (1) { |
|
ch = stream.getChar(); |
|
if (ch == '>') { |
|
break; |
|
} else if (ch == EOF) { |
|
this.error("Unterminated hex string"); |
|
break; |
|
} else if (specialChars[ch.toCharCode()] != 1) { |
|
var x, x2; |
|
if (((x = ToHexDigit(ch)) == -1) || |
|
((x2 = ToHexDigit(this.getChar())) == -1)) { |
|
error("Illegal character in hex string"); |
|
break; |
|
} |
|
str += String.fromCharCode((x << 4) | x2); |
|
} |
|
} |
|
return new Obj(Obj.String, str); |
|
}, |
|
getObj: function() { |
|
// skip whitespace and comments |
|
var comment = false; |
|
var stream = this.stream; |
|
while (true) { |
|
var ch; |
|
if ((ch = stream.getChar()) == EOF) |
|
return new Obj(Object.EOF); |
|
if (comment) { |
|
if (ch == '\r' || ch == '\n') |
|
comment = false; |
|
} else if (ch == '%') { |
|
comment = true; |
|
} else if (specialChars[ch.chatCodeAt(0)] != 1) { |
|
break; |
|
} |
|
} |
|
|
|
// start reading token |
|
switch (c) { |
|
case '0': case '1': case '2': case '3': case '4': |
|
case '5': case '6': case '7': case '8': case '9': |
|
case '+': case '-': case '.': |
|
return this.getNumber(ch); |
|
case '(': |
|
return this.getString(ch); |
|
case '/': |
|
return this.getName(ch); |
|
// array punctuation |
|
case '[': |
|
case ']': |
|
return new Obj(Obj.Cmd, ch); |
|
// hex string or dict punctuation |
|
case '<': |
|
ch = stream.lookChar(); |
|
if (ch == '<') { |
|
// dict punctuation |
|
stream.getChar(); |
|
return new Obj(Obj.Cmd, ch); |
|
} |
|
return this.getHexString(ch); |
|
// dict punctuation |
|
case '>': |
|
ch = stream.lookChar(); |
|
if (ch == '>') { |
|
stream.getChar(); |
|
return new Obj(Obj.Cmd, ch); |
|
} |
|
// fall through |
|
case ')': |
|
case '{': |
|
case '}': |
|
this.error("Illegal character"); |
|
return Obj.errorObj; |
|
} |
|
|
|
// command |
|
var str = ch; |
|
while ((ch = stream.lookChar()) != EOF && !specialChars[ch.toCharCode()]) { |
|
stream.getChar(); |
|
if (str.length == 128) { |
|
error("Command token too long"); |
|
break; |
|
} |
|
str += ch; |
|
} |
|
if (str == "true") |
|
return Obj.trueObj; |
|
if (str == "false") |
|
return Obj.falseObj; |
|
if (str == "null") |
|
return Obj.nullObj; |
|
return new Obj(Obj.Cmd, str); |
|
} |
|
}; |
|
|
|
return constructor; |
|
})(); |
|
|
|
var Parser = (function() { |
|
function constructor(lexer, allowStreams) { |
|
this.lexer = lexer; |
|
this.allowStreams = allowStreams; |
|
this.inlineImg = 0; |
|
this.refill(); |
|
} |
|
|
|
constructor.prototype = { |
|
refill: function() { |
|
this.buf1 = lexer.getObj(); |
|
this.buf2 = lexer.getObj(); |
|
}, |
|
shift: function() { |
|
if (this.inlineImg > 0) { |
|
if (this.inlineImg < 2) { |
|
this.inlineImg++; |
|
} else { |
|
// in a damaged content stream, if 'ID' shows up in the middle |
|
// of a dictionary, we need to reset |
|
this.inlineImg = 0; |
|
} |
|
} else if (this.buf2.isCmd("ID")) { |
|
this.lexer.skipChar(); // skip char after 'ID' command |
|
this.inlineImg = 1; |
|
} |
|
this.buf1 = this.buf2; |
|
// don't buffer inline image data |
|
this.buf2 = (this.inlineImg > 0) ? Obj.nullObj : this.lexer.getObj(); |
|
}, |
|
getObj: function() { |
|
// refill buffer after inline image data |
|
if (this.inlineImg == 2) |
|
this.refill(); |
|
|
|
if (this.buf1.isCmd("[")) { // array |
|
var obj = new Obj(Obj.Array, []); |
|
while (!this.buf1.isCmd("]") && !this.buf1.isEOF()) |
|
obj.value.push(this.getObj()); |
|
if (this.buf1.isEOF()) |
|
this.error("End of file inside array"); |
|
this.shift(); |
|
return obj; |
|
} else if (this.buf1.isCmd("<<")) { // dictionary or stream |
|
this.shift(); |
|
var obj = new Obj(Obj.Dict, new HashMap()); |
|
while (!this.buf1.isCmd(">>") && !this.buf1.isEOF()) { |
|
if (!this.buf1.isName()) { |
|
error("Dictionary key must be a name object"); |
|
shift(); |
|
} else { |
|
var key = buf1.value; |
|
this.shift(); |
|
if (this.buf1.isEOF() || this.buf1.isError()) |
|
break; |
|
obj.value.set(key, this.getObj()); |
|
} |
|
} |
|
if (this.buf1.isEOF()) |
|
error("End of file inside dictionary"); |
|
|
|
// stream objects are not allowed inside content streams or |
|
// object streams |
|
if (this.allowStreams && this.buf2.isCmd("stream")) { |
|
return this.makeStream(); |
|
} else { |
|
this.shift(); |
|
} |
|
return obj; |
|
|
|
} else if (this.buf1.isInt()) { // indirect reference or integer |
|
var num = this.buf1.value; |
|
this.shift(); |
|
if (this.buf1.isInt() && this.buf2.isCmd("R")) { |
|
var obj = new Obj(Obj.Ref, [num, this.buf1.value]); |
|
this.shift(); |
|
this.shift(); |
|
return obj; |
|
} |
|
return new Obj(Obj.Int, num); |
|
} else if (this.buf1.isString()) { // string |
|
var obj = this.decrypt(this.buf1); |
|
this.shift(); |
|
return obj; |
|
} |
|
|
|
// simple object |
|
var obj = this.buf1; |
|
this.shift(); |
|
return obj; |
|
}, |
|
decrypt: function(obj) { |
|
// TODO |
|
return obj; |
|
}, |
|
makeStream: function() { |
|
// TODO |
|
return new Obj(Obj.Error); |
|
} |
|
}; |
|
|
|
return constructor; |
|
})(); |
|
|
|
var Linearization = (function () { |
|
function constructor(stream) { |
|
this.parser = new Parser(new Lexer(stream), false); |
|
var obj1 = this.parser.getObj(); |
|
var obj2 = this.parser.getObj(); |
|
var obj3 = this.parser.getObj(); |
|
this.linDict = this.parser.getObj(); |
|
if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") && linDict.isDict()) { |
|
var obj = linDict.lookup("Linearized"); |
|
if (!(obj.isNum() && obj.value > 0)) |
|
this.linDict = Obj.nullObj; |
|
} |
|
} |
|
|
|
constructor.prototype = { |
|
getInt: function(name) { |
|
var linDict = this.linDict; |
|
var obj; |
|
if (!linDict.isDict() && |
|
(obj = linDict.lookup(name)).isInt() && |
|
obj.value > 0) { |
|
return length; |
|
} |
|
error("'" + name + "' field in linearization table is invalid"); |
|
return 0; |
|
}, |
|
getHint: function(index) { |
|
var linDict = this.linDict; |
|
var obj1, obj2; |
|
if (linDict.isDict() && |
|
(obj1 = linDict.lookup("H")).isArray() && |
|
obj1.value.length >= 2 && |
|
(obj2 = obj1.value[index]).isInt() && |
|
obj2.value > 0) { |
|
return obj2.value; |
|
} |
|
this.error("Hints table in linearization table is invalid"); |
|
return 0; |
|
}, |
|
get length() { |
|
return this.getInt("L"); |
|
}, |
|
get hintsOffset() { |
|
return this.getHint(0); |
|
}, |
|
get hintsLength() { |
|
return this.getHint(1); |
|
}, |
|
get hintsOffset2() { |
|
return this.getHint(2); |
|
}, |
|
get hintsLenth2() { |
|
return this.getHint(3); |
|
}, |
|
get objectNumberFirst() { |
|
return this.getInt("O"); |
|
}, |
|
get endFirst() { |
|
return this.getInt("E"); |
|
}, |
|
get numPages() { |
|
return this.getInt("N"); |
|
}, |
|
get mainXRefEntriesOffset() { |
|
return this.getInt("T"); |
|
}, |
|
get pageFirst() { |
|
return this.getInt("P"); |
|
} |
|
}; |
|
})(); |
|
|
|
var PDFDoc = (function () { |
|
function constructor(stream) { |
|
this.setup(stream); |
|
} |
|
|
|
constructor.prototype = { |
|
get linearization() { |
|
var length = this.stream.length; |
|
var linearization = false; |
|
if (length) { |
|
linearization = new Linearization(this.stream); |
|
if (linearization.length != length) |
|
linearization = false; |
|
} |
|
// shadow the prototype getter |
|
return this.linearization = linearization; |
|
}, |
|
get startXRef() { |
|
var startXRef; |
|
var linearization = this.linearization; |
|
if (linearization) { |
|
// TODO |
|
} else { |
|
// TODO |
|
} |
|
// shadow the prototype getter |
|
return this.startXRef = startXRef; |
|
}, |
|
// Find the header, remove leading garbage and setup the stream |
|
// starting from the header. |
|
checkHeader: function(stream) { |
|
const headerSearchSize = 1024; |
|
|
|
stream.reset(); |
|
|
|
var skip = 0; |
|
var header = "%PDF-"; |
|
while (skip < headerSearchSize) { |
|
stream.setPos(skip); |
|
for (var i = 0; i < header.length; ++i) { |
|
if (stream.getChar() != header.charCodeAt(i)) |
|
break; |
|
} |
|
|
|
// Found the header, trim off any garbage before it. |
|
if (i == header.length) { |
|
stream.moveStart(skip); |
|
return; |
|
} |
|
} |
|
|
|
// May not be a PDF file, continue anyway. |
|
this.stream = stream; |
|
}, |
|
setup: function(arrayBuffer, ownerPassword, userPassword) { |
|
this.checkHeader(arrayBuffer); |
|
} |
|
}; |
|
})();
|
|
|