Browse Source

Improve robustness of stream parser (invalid length)

When the parser finds a stream, it retrieves the Length from the stream
dictionary and advances the lexer to the offset as specified in Length.
If this Length is incorrect, the lexer could end up anywhere.

When the lexer gets in an invalid state, it could throw errors. For
example, in issue 6108, the lexer ends up inside the stream data. This
stream has the ASCIIHexDecode filter, so all characters are made up from
ASCII characters, and the lexer interprets it as a command token. Tokens
cannot be longer than 127 bytes, so eventually 128 bytes are consumed
and the lexer throws "Command token too long" error.

Another possible error is "Illegal character: 41" when the lexer happens
to end up at a ')' due to the length mismatch.

These problems are solved by catching lexer errors and recovering the
parser via the existing stream length detection branch.
Rob Wu 10 years ago
parent
commit
e211c25f06
  1. 15
      src/core/parser.js
  2. 1
      test/pdfs/.gitignore
  3. 28
      test/pdfs/issue6108.pdf
  4. 6
      test/test_manifest.json

15
src/core/parser.js

@ -53,6 +53,16 @@ var Parser = (function ParserClosure() {
this.buf2 = this.lexer.getObj(); this.buf2 = this.lexer.getObj();
} }
}, },
tryShift: function Parser_tryShift() {
try {
this.shift();
return true;
} catch (e) {
// Upon failure, the caller should reset this.lexer.pos to a known good
// state and call this.shift() twice to reset the buffers.
return false;
}
},
getObj: function Parser_getObj(cipherTransform) { getObj: function Parser_getObj(cipherTransform) {
var buf1 = this.buf1; var buf1 = this.buf1;
this.shift(); this.shift();
@ -426,9 +436,10 @@ var Parser = (function ParserClosure() {
stream.pos = pos + length; stream.pos = pos + length;
lexer.nextChar(); lexer.nextChar();
this.shift(); // '>>' // Shift '>>' and check whether the new object marks the end of the stream
if (this.tryShift() && isCmd(this.buf2, 'endstream')) {
this.shift(); // 'stream' this.shift(); // 'stream'
if (!isCmd(this.buf1, 'endstream')) { } else {
// bad stream length, scanning for endstream // bad stream length, scanning for endstream
stream.pos = pos; stream.pos = pos;
var SCAN_BLOCK_SIZE = 2048; var SCAN_BLOCK_SIZE = 2048;

1
test/pdfs/.gitignore vendored

@ -146,3 +146,4 @@
!issue6068.pdf !issue6068.pdf
!issue6081.pdf !issue6081.pdf
!issue6069.pdf !issue6069.pdf
!issue6108.pdf

28
test/pdfs/issue6108.pdf

@ -0,0 +1,28 @@
%PDF-1.0
1 0 obj
<</Type/Catalog/Pages 2 0 R>>
endobj
2 0 obj
<</Type/Pages/Count 1/Kids[3 0 R]/MediaBox [0 0 400 50]>>
endobj
3 0 obj
<</Type/Page/Parent 2 0 R/Resources<</Font<</F1<</Type/Font/Subtype/Type1/BaseFont/Arial>>>>>>/Contents 4 0 R>>
endobj
4 0 obj
<</Length 9/Filter/ASCIIHexDecode>>
stream
42542F4631203132205466203230203230205464202841534349494865784465636F64652066696C7465722C20736D616C6C204C656E6774682C2073697A652064696666206973206D6F7265207468616E203133352062797465732920546A204554>
endstream
endobj
xref
0 5
0000000000 65535 f
0000000009 00000 n
0000000054 00000 n
0000000127 00000 n
0000000254 00000 n
trailer
<</Root 1 0 R/Size 5>>
startxref
519
%%EOF

6
test/test_manifest.json

@ -2284,5 +2284,11 @@
"md5": "d0ad8871f4116bca8e39513ffa8b7d8e", "md5": "d0ad8871f4116bca8e39513ffa8b7d8e",
"rounds": 1, "rounds": 1,
"type": "load" "type": "load"
},
{ "id": "issue6108",
"file": "pdfs/issue6108.pdf",
"md5": "8961cb55149495989a80bf0487e0f076",
"rounds": 1,
"type": "load"
} }
] ]

Loading…
Cancel
Save