From 09f8f951c8e4363f4545f0fa942bc4fb65606878 Mon Sep 17 00:00:00 2001
From: Yury Delendik <ydelendik@mozilla.com>
Date: Fri, 17 Jan 2014 07:16:52 -0600
Subject: [PATCH] Extracts evaluator preprocessor and refactor text extraction

---
 src/core/evaluator.js       | 496 +++++++++++++++++++-----------------
 src/core/parser.js          |  15 --
 src/display/canvas.js       |   9 +-
 test/unit/evaluator_spec.js |  33 ++-
 4 files changed, 289 insertions(+), 264 deletions(-)

diff --git a/src/core/evaluator.js b/src/core/evaluator.js
index 27e512ea8..bdeb1af80 100644
--- a/src/core/evaluator.js
+++ b/src/core/evaluator.js
@@ -40,119 +40,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
     this.fontCache = fontCache;
   }
 
-  // Specifies properties for each command
-  //
-  // If variableArgs === true: [0, `numArgs`] expected
-  // If variableArgs === false: exactly `numArgs` expected
-  var OP_MAP = {
-    // Graphic state
-    w: { id: OPS.setLineWidth, numArgs: 1, variableArgs: false },
-    J: { id: OPS.setLineCap, numArgs: 1, variableArgs: false },
-    j: { id: OPS.setLineJoin, numArgs: 1, variableArgs: false },
-    M: { id: OPS.setMiterLimit, numArgs: 1, variableArgs: false },
-    d: { id: OPS.setDash, numArgs: 2, variableArgs: false },
-    ri: { id: OPS.setRenderingIntent, numArgs: 1, variableArgs: false },
-    i: { id: OPS.setFlatness, numArgs: 1, variableArgs: false },
-    gs: { id: OPS.setGState, numArgs: 1, variableArgs: false },
-    q: { id: OPS.save, numArgs: 0, variableArgs: false },
-    Q: { id: OPS.restore, numArgs: 0, variableArgs: false },
-    cm: { id: OPS.transform, numArgs: 6, variableArgs: false },
-
-    // Path
-    m: { id: OPS.moveTo, numArgs: 2, variableArgs: false },
-    l: { id: OPS.lineTo, numArgs: 2, variableArgs: false },
-    c: { id: OPS.curveTo, numArgs: 6, variableArgs: false },
-    v: { id: OPS.curveTo2, numArgs: 4, variableArgs: false },
-    y: { id: OPS.curveTo3, numArgs: 4, variableArgs: false },
-    h: { id: OPS.closePath, numArgs: 0, variableArgs: false },
-    re: { id: OPS.rectangle, numArgs: 4, variableArgs: false },
-    S: { id: OPS.stroke, numArgs: 0, variableArgs: false },
-    s: { id: OPS.closeStroke, numArgs: 0, variableArgs: false },
-    f: { id: OPS.fill, numArgs: 0, variableArgs: false },
-    F: { id: OPS.fill, numArgs: 0, variableArgs: false },
-    'f*': { id: OPS.eoFill, numArgs: 0, variableArgs: false },
-    B: { id: OPS.fillStroke, numArgs: 0, variableArgs: false },
-    'B*': { id: OPS.eoFillStroke, numArgs: 0, variableArgs: false },
-    b: { id: OPS.closeFillStroke, numArgs: 0, variableArgs: false },
-    'b*': { id: OPS.closeEOFillStroke, numArgs: 0, variableArgs: false },
-    n: { id: OPS.endPath, numArgs: 0, variableArgs: false },
-
-    // Clipping
-    W: { id: OPS.clip, numArgs: 0, variableArgs: false },
-    'W*': { id: OPS.eoClip, numArgs: 0, variableArgs: false },
-
-    // Text
-    BT: { id: OPS.beginText, numArgs: 0, variableArgs: false },
-    ET: { id: OPS.endText, numArgs: 0, variableArgs: false },
-    Tc: { id: OPS.setCharSpacing, numArgs: 1, variableArgs: false },
-    Tw: { id: OPS.setWordSpacing, numArgs: 1, variableArgs: false },
-    Tz: { id: OPS.setHScale, numArgs: 1, variableArgs: false },
-    TL: { id: OPS.setLeading, numArgs: 1, variableArgs: false },
-    Tf: { id: OPS.setFont, numArgs: 2, variableArgs: false },
-    Tr: { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false },
-    Ts: { id: OPS.setTextRise, numArgs: 1, variableArgs: false },
-    Td: { id: OPS.moveText, numArgs: 2, variableArgs: false },
-    TD: { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false },
-    Tm: { id: OPS.setTextMatrix, numArgs: 6, variableArgs: false },
-    'T*': { id: OPS.nextLine, numArgs: 0, variableArgs: false },
-    Tj: { id: OPS.showText, numArgs: 1, variableArgs: false },
-    TJ: { id: OPS.showSpacedText, numArgs: 1, variableArgs: false },
-    '\'': { id: OPS.nextLineShowText, numArgs: 1, variableArgs: false },
-    '"': { id: OPS.nextLineSetSpacingShowText, numArgs: 3,
-      variableArgs: false },
-
-    // Type3 fonts
-    d0: { id: OPS.setCharWidth, numArgs: 2, variableArgs: false },
-    d1: { id: OPS.setCharWidthAndBounds, numArgs: 6, variableArgs: false },
-
-    // Color
-    CS: { id: OPS.setStrokeColorSpace, numArgs: 1, variableArgs: false },
-    cs: { id: OPS.setFillColorSpace, numArgs: 1, variableArgs: false },
-    SC: { id: OPS.setStrokeColor, numArgs: 4, variableArgs: true },
-    SCN: { id: OPS.setStrokeColorN, numArgs: 33, variableArgs: true },
-    sc: { id: OPS.setFillColor, numArgs: 4, variableArgs: true },
-    scn: { id: OPS.setFillColorN, numArgs: 33, variableArgs: true },
-    G: { id: OPS.setStrokeGray, numArgs: 1, variableArgs: false },
-    g: { id: OPS.setFillGray, numArgs: 1, variableArgs: false },
-    RG: { id: OPS.setStrokeRGBColor, numArgs: 3, variableArgs: false },
-    rg: { id: OPS.setFillRGBColor, numArgs: 3, variableArgs: false },
-    K: { id: OPS.setStrokeCMYKColor, numArgs: 4, variableArgs: false },
-    k: { id: OPS.setFillCMYKColor, numArgs: 4, variableArgs: false },
-
-    // Shading
-    sh: { id: OPS.shadingFill, numArgs: 1, variableArgs: false },
-
-    // Images
-    BI: { id: OPS.beginInlineImage, numArgs: 0, variableArgs: false },
-    ID: { id: OPS.beginImageData, numArgs: 0, variableArgs: false },
-    EI: { id: OPS.endInlineImage, numArgs: 1, variableArgs: false },
-
-    // XObjects
-    Do: { id: OPS.paintXObject, numArgs: 1, variableArgs: false },
-    MP: { id: OPS.markPoint, numArgs: 1, variableArgs: false },
-    DP: { id: OPS.markPointProps, numArgs: 2, variableArgs: false },
-    BMC: { id: OPS.beginMarkedContent, numArgs: 1, variableArgs: false },
-    BDC: { id: OPS.beginMarkedContentProps, numArgs: 2,
-      variableArgs: false },
-    EMC: { id: OPS.endMarkedContent, numArgs: 0, variableArgs: false },
-
-    // Compatibility
-    BX: { id: OPS.beginCompat, numArgs: 0, variableArgs: false },
-    EX: { id: OPS.endCompat, numArgs: 0, variableArgs: false },
-
-    // (reserved partial commands for the lexer)
-    BM: null,
-    BD: null,
-    'true': null,
-    fa: null,
-    fal: null,
-    fals: null,
-    'false': null,
-    nu: null,
-    nul: null,
-    'null': null
-  };
-
   var TILING_PATTERN = 1, SHADING_PATTERN = 2;
 
   PartialEvaluator.prototype = {
@@ -198,7 +85,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
 
     buildFormXObject: function PartialEvaluator_buildFormXObject(resources,
                                                                  xobj, smask,
-                                                                 operatorList) {
+                                                                 operatorList,
+                                                                 state) {
       var self = this;
 
       var matrix = xobj.dict.get('Matrix');
@@ -226,7 +114,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       operatorList.addOp(OPS.paintFormXObjectBegin, [matrix, bbox]);
 
       this.getOperatorList(xobj, xobj.dict.get('Resources') || resources,
-                           operatorList);
+                           operatorList, state);
       operatorList.addOp(OPS.paintFormXObjectEnd, []);
 
       if (group) {
@@ -532,7 +420,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
 
     getOperatorList: function PartialEvaluator_getOperatorList(stream,
                                                                resources,
-                                                               operatorList) {
+                                                               operatorList,
+                                                               evaluatorState) {
 
       var self = this;
       var xref = this.xref;
@@ -543,54 +432,16 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       resources = resources || new Dict();
       var xobjs = resources.get('XObject') || new Dict();
       var patterns = resources.get('Pattern') || new Dict();
-      // TODO(mduan): pass array of knownCommands rather than OP_MAP
-      // dictionary
-      var parser = new Parser(new Lexer(stream, OP_MAP), false, xref);
+      var preprocessor = new EvaluatorPreprocessor(stream, xref);
+      if (evaluatorState) {
+        preprocessor.setState(evaluatorState);
+      }
 
       var promise = new LegacyPromise();
-      var args = [];
-      while (true) {
-
-        var obj = parser.getObj();
-
-        if (isEOF(obj)) {
-          break;
-        }
-
-        if (isCmd(obj)) {
-          var cmd = obj.cmd;
-
-          // Check that the command is valid
-          var opSpec = OP_MAP[cmd];
-          if (!opSpec) {
-            warn('Unknown command "' + cmd + '"');
-            continue;
-          }
-
-          var fn = opSpec.id;
-
-          // Validate the number of arguments for the command
-          if (opSpec.variableArgs) {
-            if (args.length > opSpec.numArgs) {
-              info('Command ' + fn + ': expected [0,' + opSpec.numArgs +
-                  '] args, but received ' + args.length + ' args');
-            }
-          } else {
-            if (args.length < opSpec.numArgs) {
-              // If we receive too few args, it's not possible to possible
-              // to execute the command, so skip the command
-              info('Command ' + fn + ': because expected ' +
-                   opSpec.numArgs + ' args, but received ' + args.length +
-                   ' args; skipping');
-              args = [];
-              continue;
-            } else if (args.length > opSpec.numArgs) {
-              info('Command ' + fn + ': expected ' + opSpec.numArgs +
-                  ' args, but received ' + args.length + ' args');
-            }
-          }
-
-          // TODO figure out how to type-check vararg functions
+      var operation;
+      while ((operation = preprocessor.read())) {
+          var args = operation.args;
+          var fn = operation.fn;
 
           switch (fn) {
             case OPS.setStrokeColorN:
@@ -642,7 +493,8 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
                 );
 
                 if ('Form' == type.name) {
-                  self.buildFormXObject(resources, xobj, null, operatorList);
+                  self.buildFormXObject(resources, xobj, null, operatorList,
+                                        preprocessor.getState());
                   args = [];
                   continue;
                 } else if ('Image' == type.name) {
@@ -733,12 +585,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
           } // switch
 
           operatorList.addOp(fn, args);
-          args = [];
-          parser.saveState();
-        } else if (obj !== null && obj !== undefined) {
-          args.push(obj instanceof Dict ? obj.getAll() : obj);
-          assertWellFormed(args.length <= 33, 'Too many arguments');
-        }
+      }
+
+      // some pdf don't close all restores inside object/form
+      // closing those for them
+      for (var i = 0, ii = preprocessor.savedStatesDepth; i < ii; i++) {
+        operatorList.addOp(OPS.restore, []);
       }
 
       return operatorList;
@@ -775,65 +627,55 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
       // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
       var xobjs = null;
 
-      var parser = new Parser(new Lexer(stream), false);
+      var preprocessor = new EvaluatorPreprocessor(stream, xref);
       var res = resources;
-      var args = [], obj;
 
       var chunk = '';
       var font = null;
       var charSpace = 0, wordSpace = 0;
-      while (!isEOF(obj = parser.getObj())) {
-        if (isCmd(obj)) {
-          var cmd = obj.cmd;
-          switch (cmd) {
+      var operation;
+      while ((operation = preprocessor.read())) {
+          var fn = operation.fn;
+          var args = operation.args;
+          switch (fn) {
             // TODO: Add support for SAVE/RESTORE and XFORM here.
-            case 'Tf':
+            case OPS.setFont:
               font = handleSetFont(args[0].name).translated;
               textState.fontSize = args[1];
               break;
-            case 'Ts':
+            case OPS.setTextRise:
               textState.textRise = args[0];
               break;
-            case 'Tz':
+            case OPS.setHScale:
               textState.textHScale = args[0] / 100;
               break;
-            case 'TL':
+            case OPS.setLeading:
               textState.leading = args[0];
               break;
-            case 'Td':
+            case OPS.moveText:
               textState.translateTextMatrix(args[0], args[1]);
               break;
-            case 'TD':
+            case OPS.setLeadingMoveText:
               textState.leading = -args[1];
               textState.translateTextMatrix(args[0], args[1]);
               break;
-            case 'T*':
+            case OPS.nextLine:
               textState.translateTextMatrix(0, -textState.leading);
               break;
-            case 'Tm':
+            case OPS.setTextMatrix:
               textState.setTextMatrix(args[0], args[1],
                                        args[2], args[3], args[4], args[5]);
               break;
-            case 'Tc':
+            case OPS.setCharSpacing:
               charSpace = args[0];
               break;
-            case 'Tw':
+            case OPS.setWordSpacing:
               wordSpace = args[0];
               break;
-            case 'q':
-              textState.push();
-              break;
-            case 'Q':
-              textState.pop();
-              break;
-            case 'BT':
+            case OPS.beginText:
               textState.initialiseTextObj();
               break;
-            case 'cm':
-              textState.transformCTM(args[0], args[1], args[2],
-                                args[3], args[4], args[5]);
-              break;
-            case 'TJ':
+            case OPS.showSpacedText:
               var items = args[0];
               for (var j = 0, jj = items.length; j < jj; j++) {
                 if (typeof items[j] === 'string') {
@@ -851,20 +693,20 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
                 }
               }
               break;
-            case 'Tj':
+            case OPS.showText:
               chunk += fontCharsToUnicode(args[0], font);
               break;
-            case '\'':
+            case OPS.nextLineShowText:
               // For search, adding a extra white space for line breaks would be
               // better here, but that causes too much spaces in the
               // text-selection divs.
               chunk += fontCharsToUnicode(args[0], font);
               break;
-            case '"':
+            case OPS.nextLineSetSpacingShowText:
               // Note comment in "'"
               chunk += fontCharsToUnicode(args[2], font);
               break;
-            case 'Do':
+            case OPS.paintXObject:
               // Set the chunk such that the following if won't add something
               // to the state.
               chunk = '';
@@ -898,7 +740,7 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
                 state
               );
               break;
-            case 'gs':
+            case OPS.setGState:
               var dictName = args[0];
               var extGState = resources.get('ExtGState');
 
@@ -917,7 +759,11 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
 
           if (chunk !== '') {
             var bidiText = PDFJS.bidi(chunk, -1, font.vertical);
-            var renderParams = textState.calcRenderParams();
+            var renderParams = textState.calcRenderParams(preprocessor.ctm);
+            bidiText.x = renderParams.renderMatrix[4] - (textState.fontSize *
+                           renderParams.vScale * Math.sin(renderParams.angle));
+            bidiText.y = renderParams.renderMatrix[5] + (textState.fontSize *
+                           renderParams.vScale * Math.cos(renderParams.angle));
             var fontHeight = textState.fontSize * renderParams.vScale;
             var fontAscent = font.ascent ? font.ascent * fontHeight :
               font.descent ? (1 + font.descent) * fontHeight : fontHeight;
@@ -933,12 +779,6 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
 
             chunk = '';
           }
-
-          args = [];
-        } else if (obj !== null && obj !== undefined) {
-          assertWellFormed(args.length <= 33, 'Too many arguments');
-          args.push(obj);
-        }
       } // while
 
       return state;
@@ -1597,7 +1437,6 @@ var OperatorList = (function OperatorListClosure() {
 var TextState = (function TextStateClosure() {
   function TextState() {
     this.fontSize = 0;
-    this.ctm = [1, 0, 0, 1, 0, 0];
     this.textMatrix = [1, 0, 0, 1, 0, 0];
     this.stateStack = [];
     //textState variables
@@ -1606,15 +1445,6 @@ var TextState = (function TextStateClosure() {
     this.textRise = 0;
   }
   TextState.prototype = {
-    push: function TextState_push() {
-      this.stateStack.push(this.ctm.slice());
-    },
-    pop: function TextState_pop() {
-      var prev = this.stateStack.pop();
-      if (prev) {
-        this.ctm = prev;
-      }
-    },
     initialiseTextObj: function TextState_initialiseTextObj() {
       var m = this.textMatrix;
       m[0] = 1, m[1] = 0, m[2] = 0, m[3] = 1, m[4] = 0, m[5] = 0;
@@ -1623,24 +1453,13 @@ var TextState = (function TextStateClosure() {
       var m = this.textMatrix;
       m[0] = a, m[1] = b, m[2] = c, m[3] = d, m[4] = e, m[5] = f;
     },
-    transformCTM: function TextState_transformCTM(a, b, c, d, e, f) {
-      var m = this.ctm;
-      var m0 = m[0], m1 = m[1], m2 = m[2], m3 = m[3], m4 = m[4], m5 = m[5];
-      m[0] = m0 * a + m2 * b;
-      m[1] = m1 * a + m3 * b;
-      m[2] = m0 * c + m2 * d;
-      m[3] = m1 * c + m3 * d;
-      m[4] = m0 * e + m2 * f + m4;
-      m[5] = m1 * e + m3 * f + m5;
-    },
     translateTextMatrix: function TextState_translateTextMatrix(x, y) {
       var m = this.textMatrix;
       m[4] = m[0] * x + m[2] * y + m[4];
       m[5] = m[1] * x + m[3] * y + m[5];
     },
-    calcRenderParams: function TextState_calcRenderingParams() {
+    calcRenderParams: function TextState_calcRenderingParams(cm) {
       var tm = this.textMatrix;
-      var cm = this.ctm;
       var a = this.fontSize;
       var b = a * this.textHScale;
       var c = this.textRise;
@@ -1683,3 +1502,218 @@ var EvalState = (function EvalStateClosure() {
   return EvalState;
 })();
 
+var EvaluatorPreprocessor = (function EvaluatorPreprocessor() {
+  // Specifies properties for each command
+  //
+  // If variableArgs === true: [0, `numArgs`] expected
+  // If variableArgs === false: exactly `numArgs` expected
+  var OP_MAP = {
+    // Graphic state
+    w: { id: OPS.setLineWidth, numArgs: 1, variableArgs: false },
+    J: { id: OPS.setLineCap, numArgs: 1, variableArgs: false },
+    j: { id: OPS.setLineJoin, numArgs: 1, variableArgs: false },
+    M: { id: OPS.setMiterLimit, numArgs: 1, variableArgs: false },
+    d: { id: OPS.setDash, numArgs: 2, variableArgs: false },
+    ri: { id: OPS.setRenderingIntent, numArgs: 1, variableArgs: false },
+    i: { id: OPS.setFlatness, numArgs: 1, variableArgs: false },
+    gs: { id: OPS.setGState, numArgs: 1, variableArgs: false },
+    q: { id: OPS.save, numArgs: 0, variableArgs: false },
+    Q: { id: OPS.restore, numArgs: 0, variableArgs: false },
+    cm: { id: OPS.transform, numArgs: 6, variableArgs: false },
+
+    // Path
+    m: { id: OPS.moveTo, numArgs: 2, variableArgs: false },
+    l: { id: OPS.lineTo, numArgs: 2, variableArgs: false },
+    c: { id: OPS.curveTo, numArgs: 6, variableArgs: false },
+    v: { id: OPS.curveTo2, numArgs: 4, variableArgs: false },
+    y: { id: OPS.curveTo3, numArgs: 4, variableArgs: false },
+    h: { id: OPS.closePath, numArgs: 0, variableArgs: false },
+    re: { id: OPS.rectangle, numArgs: 4, variableArgs: false },
+    S: { id: OPS.stroke, numArgs: 0, variableArgs: false },
+    s: { id: OPS.closeStroke, numArgs: 0, variableArgs: false },
+    f: { id: OPS.fill, numArgs: 0, variableArgs: false },
+    F: { id: OPS.fill, numArgs: 0, variableArgs: false },
+    'f*': { id: OPS.eoFill, numArgs: 0, variableArgs: false },
+    B: { id: OPS.fillStroke, numArgs: 0, variableArgs: false },
+    'B*': { id: OPS.eoFillStroke, numArgs: 0, variableArgs: false },
+    b: { id: OPS.closeFillStroke, numArgs: 0, variableArgs: false },
+    'b*': { id: OPS.closeEOFillStroke, numArgs: 0, variableArgs: false },
+    n: { id: OPS.endPath, numArgs: 0, variableArgs: false },
+
+    // Clipping
+    W: { id: OPS.clip, numArgs: 0, variableArgs: false },
+    'W*': { id: OPS.eoClip, numArgs: 0, variableArgs: false },
+
+    // Text
+    BT: { id: OPS.beginText, numArgs: 0, variableArgs: false },
+    ET: { id: OPS.endText, numArgs: 0, variableArgs: false },
+    Tc: { id: OPS.setCharSpacing, numArgs: 1, variableArgs: false },
+    Tw: { id: OPS.setWordSpacing, numArgs: 1, variableArgs: false },
+    Tz: { id: OPS.setHScale, numArgs: 1, variableArgs: false },
+    TL: { id: OPS.setLeading, numArgs: 1, variableArgs: false },
+    Tf: { id: OPS.setFont, numArgs: 2, variableArgs: false },
+    Tr: { id: OPS.setTextRenderingMode, numArgs: 1, variableArgs: false },
+    Ts: { id: OPS.setTextRise, numArgs: 1, variableArgs: false },
+    Td: { id: OPS.moveText, numArgs: 2, variableArgs: false },
+    TD: { id: OPS.setLeadingMoveText, numArgs: 2, variableArgs: false },
+    Tm: { id: OPS.setTextMatrix, numArgs: 6, variableArgs: false },
+    'T*': { id: OPS.nextLine, numArgs: 0, variableArgs: false },
+    Tj: { id: OPS.showText, numArgs: 1, variableArgs: false },
+    TJ: { id: OPS.showSpacedText, numArgs: 1, variableArgs: false },
+    '\'': { id: OPS.nextLineShowText, numArgs: 1, variableArgs: false },
+    '"': { id: OPS.nextLineSetSpacingShowText, numArgs: 3,
+      variableArgs: false },
+
+    // Type3 fonts
+    d0: { id: OPS.setCharWidth, numArgs: 2, variableArgs: false },
+    d1: { id: OPS.setCharWidthAndBounds, numArgs: 6, variableArgs: false },
+
+    // Color
+    CS: { id: OPS.setStrokeColorSpace, numArgs: 1, variableArgs: false },
+    cs: { id: OPS.setFillColorSpace, numArgs: 1, variableArgs: false },
+    SC: { id: OPS.setStrokeColor, numArgs: 4, variableArgs: true },
+    SCN: { id: OPS.setStrokeColorN, numArgs: 33, variableArgs: true },
+    sc: { id: OPS.setFillColor, numArgs: 4, variableArgs: true },
+    scn: { id: OPS.setFillColorN, numArgs: 33, variableArgs: true },
+    G: { id: OPS.setStrokeGray, numArgs: 1, variableArgs: false },
+    g: { id: OPS.setFillGray, numArgs: 1, variableArgs: false },
+    RG: { id: OPS.setStrokeRGBColor, numArgs: 3, variableArgs: false },
+    rg: { id: OPS.setFillRGBColor, numArgs: 3, variableArgs: false },
+    K: { id: OPS.setStrokeCMYKColor, numArgs: 4, variableArgs: false },
+    k: { id: OPS.setFillCMYKColor, numArgs: 4, variableArgs: false },
+
+    // Shading
+    sh: { id: OPS.shadingFill, numArgs: 1, variableArgs: false },
+
+    // Images
+    BI: { id: OPS.beginInlineImage, numArgs: 0, variableArgs: false },
+    ID: { id: OPS.beginImageData, numArgs: 0, variableArgs: false },
+    EI: { id: OPS.endInlineImage, numArgs: 1, variableArgs: false },
+
+    // XObjects
+    Do: { id: OPS.paintXObject, numArgs: 1, variableArgs: false },
+    MP: { id: OPS.markPoint, numArgs: 1, variableArgs: false },
+    DP: { id: OPS.markPointProps, numArgs: 2, variableArgs: false },
+    BMC: { id: OPS.beginMarkedContent, numArgs: 1, variableArgs: false },
+    BDC: { id: OPS.beginMarkedContentProps, numArgs: 2,
+      variableArgs: false },
+    EMC: { id: OPS.endMarkedContent, numArgs: 0, variableArgs: false },
+
+    // Compatibility
+    BX: { id: OPS.beginCompat, numArgs: 0, variableArgs: false },
+    EX: { id: OPS.endCompat, numArgs: 0, variableArgs: false },
+
+    // (reserved partial commands for the lexer)
+    BM: null,
+    BD: null,
+    'true': null,
+    fa: null,
+    fal: null,
+    fals: null,
+    'false': null,
+    nu: null,
+    nul: null,
+    'null': null
+  };
+
+  function EvaluatorPreprocessor(stream, xref) {
+    // TODO(mduan): pass array of knownCommands rather than OP_MAP
+    // dictionary
+    this.parser = new Parser(new Lexer(stream, OP_MAP), false, xref);
+    this.ctm = new Float32Array([1, 0, 0, 1, 0, 0]);
+    this.savedStates = [];
+  }
+  EvaluatorPreprocessor.prototype = {
+    get savedStatesDepth() {
+      return this.savedStates.length;
+    },
+    read: function EvaluatorPreprocessor_read() {
+      var args = [];
+      while (true) {
+        var obj = this.parser.getObj();
+        if (isEOF(obj)) {
+          return null; // no more commands
+        }
+        if (!isCmd(obj)) {
+          // argument
+          if (obj !== null && obj !== undefined) {
+            args.push(obj instanceof Dict ? obj.getAll() : obj);
+            assertWellFormed(args.length <= 33, 'Too many arguments');
+          }
+          continue;
+        }
+
+        var cmd = obj.cmd;
+        // Check that the command is valid
+        var opSpec = OP_MAP[cmd];
+        if (!opSpec) {
+          warn('Unknown command "' + cmd + '"');
+          continue;
+        }
+
+        var fn = opSpec.id;
+
+        // Validate the number of arguments for the command
+        if (opSpec.variableArgs) {
+          if (args.length > opSpec.numArgs) {
+            info('Command ' + fn + ': expected [0,' + opSpec.numArgs +
+              '] args, but received ' + args.length + ' args');
+          }
+        } else {
+          if (args.length < opSpec.numArgs) {
+            // If we receive too few args, it's not possible to possible
+            // to execute the command, so skip the command
+            info('Command ' + fn + ': because expected ' +
+              opSpec.numArgs + ' args, but received ' + args.length +
+              ' args; skipping');
+            args = [];
+            continue;
+          } else if (args.length > opSpec.numArgs) {
+            info('Command ' + fn + ': expected ' + opSpec.numArgs +
+              ' args, but received ' + args.length + ' args');
+          }
+        }
+
+        // TODO figure out how to type-check vararg functions
+
+        this.preprocessCommand(fn, args);
+
+        return {fn: fn, args: args};
+      }
+    },
+    getState: function EvaluatorPreprocessor_getState() {
+      return {
+        ctm: this.ctm
+      };
+    },
+    setState: function EvaluatorPreprocessor_setState(state) {
+      this.ctm = state.ctm;
+    },
+    preprocessCommand: function EvaluatorPreprocessor_preprocessCommand(fn,
+                                                                        args) {
+      switch (fn | 0) {
+        case OPS.save:
+          this.savedStates.push(this.getState());
+          break;
+        case OPS.restore:
+          var previousState = this.savedStates.pop();
+          if (previousState) {
+            this.setState(previousState);
+          }
+          break;
+        case OPS.transform:
+          var ctm = this.ctm;
+          var m = new Float32Array(6);
+          m[0] = ctm[0] * args[0] + ctm[2] * args[1];
+          m[1] = ctm[1] * args[0] + ctm[3] * args[1];
+          m[2] = ctm[0] * args[2] + ctm[2] * args[3];
+          m[3] = ctm[1] * args[2] + ctm[3] * args[3];
+          m[4] = ctm[0] * args[4] + ctm[2] * args[5] + ctm[4];
+          m[5] = ctm[1] * args[4] + ctm[3] * args[5] + ctm[5];
+          this.ctm = m;
+          break;
+      }
+    }
+  };
+  return EvaluatorPreprocessor;
+})();
diff --git a/src/core/parser.js b/src/core/parser.js
index bb2f17837..3cf29c982 100644
--- a/src/core/parser.js
+++ b/src/core/parser.js
@@ -36,21 +36,6 @@ var Parser = (function ParserClosure() {
   }
 
   Parser.prototype = {
-    saveState: function Parser_saveState() {
-      this.state = {
-        buf1: this.buf1,
-        buf2: this.buf2,
-        streamPos: this.lexer.stream.pos
-      };
-    },
-
-    restoreState: function Parser_restoreState() {
-      var state = this.state;
-      this.buf1 = state.buf1;
-      this.buf2 = state.buf2;
-      this.lexer.stream.pos = state.streamPos;
-    },
-
     refill: function Parser_refill() {
       this.buf1 = this.lexer.getObj();
       this.buf2 = this.lexer.getObj();
diff --git a/src/display/canvas.js b/src/display/canvas.js
index ad061a7ab..646c3f881 100644
--- a/src/display/canvas.js
+++ b/src/display/canvas.js
@@ -376,7 +376,6 @@ var CanvasExtraState = (function CanvasExtraStateClosure() {
     this.fillAlpha = 1;
     this.strokeAlpha = 1;
     this.lineWidth = 1;
-    this.paintFormXObjectDepth = 0;
 
     this.old = old;
   }
@@ -1453,7 +1452,6 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
     paintFormXObjectBegin: function CanvasGraphics_paintFormXObjectBegin(matrix,
                                                                         bbox) {
       this.save();
-      this.current.paintFormXObjectDepth++;
       this.baseTransformStack.push(this.baseTransform);
 
       if (matrix && isArray(matrix) && 6 == matrix.length)
@@ -1471,12 +1469,7 @@ var CanvasGraphics = (function CanvasGraphicsClosure() {
     },
 
     paintFormXObjectEnd: function CanvasGraphics_paintFormXObjectEnd() {
-      var depth = this.current.paintFormXObjectDepth;
-      do {
-        this.restore();
-        // some pdf don't close all restores inside object
-        // closing those for them
-      } while (this.current.paintFormXObjectDepth >= depth);
+      this.restore();
       this.baseTransform = this.baseTransformStack.pop();
     },
 
diff --git a/test/unit/evaluator_spec.js b/test/unit/evaluator_spec.js
index 48006f839..007365714 100644
--- a/test/unit/evaluator_spec.js
+++ b/test/unit/evaluator_spec.js
@@ -35,11 +35,11 @@ describe('evaluator', function() {
       var evaluator = new PartialEvaluator(new PdfManagerMock(),
                                            new XrefMock(), new HandlerMock(),
                                            'prefix');
-      var stream = new StringStream('qTT');
+      var stream = new StringStream('fTT');
       var result = evaluator.getOperatorList(stream, new ResourcesMock());
       expect(!!result.fnArray && !!result.argsArray).toEqual(true);
       expect(result.fnArray.length).toEqual(1);
-      expect(result.fnArray[0]).toEqual(OPS.save);
+      expect(result.fnArray[0]).toEqual(OPS.fill);
       expect(result.argsArray[0].length).toEqual(0);
     });
 
@@ -72,13 +72,13 @@ describe('evaluator', function() {
       var evaluator = new PartialEvaluator(new PdfManagerMock(),
                                            new XrefMock(), new HandlerMock(),
                                            'prefix');
-      var stream = new StringStream('qqq');
+      var stream = new StringStream('fff');
       var result = evaluator.getOperatorList(stream, new ResourcesMock());
       expect(!!result.fnArray && !!result.argsArray).toEqual(true);
       expect(result.fnArray.length).toEqual(3);
-      expect(result.fnArray[0]).toEqual(OPS.save);
-      expect(result.fnArray[1]).toEqual(OPS.save);
-      expect(result.fnArray[2]).toEqual(OPS.save);
+      expect(result.fnArray[0]).toEqual(OPS.fill);
+      expect(result.fnArray[1]).toEqual(OPS.fill);
+      expect(result.fnArray[2]).toEqual(OPS.fill);
     });
 
     it('should handle three glued operations #2', function() {
@@ -100,11 +100,11 @@ describe('evaluator', function() {
       var evaluator = new PartialEvaluator(new PdfManagerMock(),
                                            new XrefMock(), new HandlerMock(),
                                            'prefix');
-      var stream = new StringStream('q5 Ts');
+      var stream = new StringStream('f5 Ts');
       var result  = evaluator.getOperatorList(stream, new ResourcesMock());
       expect(!!result.fnArray && !!result.argsArray).toEqual(true);
       expect(result.fnArray.length).toEqual(2);
-      expect(result.fnArray[0]).toEqual(OPS.save);
+      expect(result.fnArray[0]).toEqual(OPS.fill);
       expect(result.fnArray[1]).toEqual(OPS.setTextRise);
       expect(result.argsArray.length).toEqual(2);
       expect(result.argsArray[1].length).toEqual(1);
@@ -115,13 +115,13 @@ describe('evaluator', function() {
       var evaluator = new PartialEvaluator(new PdfManagerMock(),
                                            new XrefMock(), new HandlerMock(),
                                            'prefix');
-      var stream = new StringStream('trueifalserinullq');
+      var stream = new StringStream('trueifalserinullh');
       var result = evaluator.getOperatorList(stream, new ResourcesMock());
       expect(!!result.fnArray && !!result.argsArray).toEqual(true);
       expect(result.fnArray.length).toEqual(3);
       expect(result.fnArray[0]).toEqual(OPS.setFlatness);
       expect(result.fnArray[1]).toEqual(OPS.setRenderingIntent);
-      expect(result.fnArray[2]).toEqual(OPS.save);
+      expect(result.fnArray[2]).toEqual(OPS.closePath);
       expect(result.argsArray.length).toEqual(3);
       expect(result.argsArray[0].length).toEqual(1);
       expect(result.argsArray[0][0]).toEqual(true);
@@ -163,6 +163,19 @@ describe('evaluator', function() {
       expect(result.argsArray).toEqual([]);
       expect(result.fnArray).toEqual([]);
     });
+    it('should close opened saves', function() {
+      var evaluator = new PartialEvaluator(new PdfManagerMock(),
+        new XrefMock(), new HandlerMock(),
+        'prefix');
+      var stream = new StringStream('qq');
+      var result = evaluator.getOperatorList(stream, new ResourcesMock());
+      expect(!!result.fnArray && !!result.argsArray).toEqual(true);
+      expect(result.fnArray.length).toEqual(4);
+      expect(result.fnArray[0]).toEqual(OPS.save);
+      expect(result.fnArray[1]).toEqual(OPS.save);
+      expect(result.fnArray[2]).toEqual(OPS.restore);
+      expect(result.fnArray[3]).toEqual(OPS.restore);
+    });
   });
 });