Browse Source

Merge pull request #8829 from Snuffleupagus/issue-8823

Attempt to improve the `EI` detection heuristics, for inline images, in streams containing `NUL` bytes (issue 8823)
Tim van der Meij 8 years ago committed by GitHub
parent
commit
7c7ba9a2ad
  1. 20
      src/core/parser.js
  2. 1
      test/pdfs/.gitignore
  3. BIN
      test/pdfs/issue8823.pdf
  4. 10
      test/test_manifest.json

20
src/core/parser.js

@ -148,7 +148,8 @@ var Parser = (function ParserClosure() { @@ -148,7 +148,8 @@ var Parser = (function ParserClosure() {
* @returns {number} The inline stream length.
*/
findDefaultInlineStreamEnd(stream) {
const E = 0x45, I = 0x49, SPACE = 0x20, LF = 0xA, CR = 0xD, n = 5;
const E = 0x45, I = 0x49, SPACE = 0x20, LF = 0xA, CR = 0xD;
const n = 10, NUL = 0x0;
let startPos = stream.pos, state = 0, ch, maybeEIPos;
while ((ch = stream.getByte()) !== -1) {
if (state === 0) {
@ -159,10 +160,23 @@ var Parser = (function ParserClosure() { @@ -159,10 +160,23 @@ var Parser = (function ParserClosure() {
assert(state === 2);
if (ch === SPACE || ch === LF || ch === CR) {
maybeEIPos = stream.pos;
// Let's check the next `n` bytes are ASCII... just be sure.
// Let's check that the next `n` bytes are ASCII... just to be sure.
let followingBytes = stream.peekBytes(n);
for (let i = 0; i < n; i++) {
for (let i = 0, ii = followingBytes.length; i < ii; i++) {
ch = followingBytes[i];
if (ch === NUL && followingBytes[i + 1] !== NUL) {
// NUL bytes are not supposed to occur *outside* of inline
// images, but some PDF generators violate that assumption,
// thus breaking the EI detection heuristics used below.
//
// However, we can't unconditionally treat NUL bytes as "ASCII",
// since that *could* result in inline images being truncated.
//
// To attempt to address this, we'll still treat any *sequence*
// of NUL bytes as non-ASCII, but for a *single* NUL byte we'll
// continue checking the `followingBytes` (fixes issue8823.pdf).
continue;
}
if (ch !== LF && ch !== CR && (ch < SPACE || ch > 0x7F)) {
// Not a LF, CR, SPACE or any visible ASCII character, i.e.
// it's binary stuff. Resetting the state.

1
test/pdfs/.gitignore vendored

@ -58,6 +58,7 @@ @@ -58,6 +58,7 @@
!issue8697.pdf
!issue8707.pdf
!issue8798r.pdf
!issue8823.pdf
!bad-PageLabels.pdf
!filled-background.pdf
!ArabicCIDTrueType.pdf

BIN
test/pdfs/issue8823.pdf

Binary file not shown.

10
test/test_manifest.json

@ -2976,9 +2976,15 @@ @@ -2976,9 +2976,15 @@
{ "id": "issue8798",
"file": "pdfs/issue8798r.pdf",
"md5": "3a0e29f013d9edcceb5d852e37738a77",
"link": false,
"rounds": 1,
"type": "eq"
},
{ "id": "issue8823",
"file": "pdfs/issue8823.pdf",
"md5": "ad02d4aa374b315bf1766038d002d57a",
"link": false,
"rounds": 1,
"lastPage": 1,
"link": true,
"type": "eq"
},
{ "id": "issue8613",

Loading…
Cancel
Save