Use Array.join instead of += to build up strings in the Lexer.

11 years ago · c1ef7e4d63
1 changed files with 40 additions and 28 deletions
--- a/src/core/parser.js
+++ b/src/core/parser.js
@ -335,6 +335,13 @@ var Lexer = (function LexerClosure() {
				@@ -335,6 +335,13 @@ var Lexer = (function LexerClosure() {
    this.stream = stream;
    this.nextChar();

+    // While lexing, we build up many strings one char at a time. Using += for
+    // this can result in lots of garbage strings. It's better to build an
+    // array of single-char strings and then join() them together at the end.
+    // And reusing a single array (i.e. |this.strBuf|) over and over for this
+    // purpose uses less memory than using a new array for each string.
+    this.strBuf = [];
+
    // The PDFs might have "glued" commands with other commands, operands or
    // literals, e.g. "q1". The knownCommands is a dictionary of the valid
    // commands and their prefixes. The prefixes are built the following way:
@ -389,17 +396,19 @@ var Lexer = (function LexerClosure() {
				@@ -389,17 +396,19 @@ var Lexer = (function LexerClosure() {
    getNumber: function Lexer_getNumber() {
      var floating = false;
      var ch = this.currentChar;
-      var str = String.fromCharCode(ch);
+      var strBuf = this.strBuf;
+      strBuf.length = 0;
+      strBuf.push(String.fromCharCode(ch));
      while ((ch = this.nextChar()) >= 0) {
        if (ch === 0x2E && !floating) { // '.'
-          str += '.';
+          strBuf.push('.');
          floating = true;
        } else if (ch === 0x2D) { // '-'
          // ignore minus signs in the middle of numbers to match
          // Adobe's behavior
          warn('Badly formated number');
        } else if (ch >= 0x30 && ch <= 0x39) { // '0'-'9'
-          str += String.fromCharCode(ch);
+          strBuf.push(String.fromCharCode(ch));
        } else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
          floating = true;
        } else {
@ -407,7 +416,7 @@ var Lexer = (function LexerClosure() {
				@@ -407,7 +416,7 @@ var Lexer = (function LexerClosure() {
          break;
        }
      }
-      var value = parseFloat(str);
+      var value = parseFloat(strBuf.join(''));
      if (isNaN(value))
        error('Invalid floating point number: ' + value);
      return value;
@ -415,7 +424,8 @@ var Lexer = (function LexerClosure() {
				@@ -415,7 +424,8 @@ var Lexer = (function LexerClosure() {
    getString: function Lexer_getString() {
      var numParen = 1;
      var done = false;
-      var str = '';
+      var strBuf = this.strBuf;
+      strBuf.length = 0;

      var ch = this.nextChar();
      while (true) {
@ -427,14 +437,14 @@ var Lexer = (function LexerClosure() {
				@@ -427,14 +437,14 @@ var Lexer = (function LexerClosure() {
            break;
          case 0x28: // '('
            ++numParen;
-            str += '(';
+            strBuf.push('(');
            break;
          case 0x29: // ')'
            if (--numParen === 0) {
              this.nextChar(); // consume strings ')'
              done = true;
            } else {
-              str += ')';
+              strBuf.push(')');
            }
            break;
          case 0x5C: // '\\'
@ -445,24 +455,24 @@ var Lexer = (function LexerClosure() {
				@@ -445,24 +455,24 @@ var Lexer = (function LexerClosure() {
                done = true;
                break;
              case 0x6E: // 'n'
-                str += '\n';
+                strBuf.push('\n');
                break;
              case 0x72: // 'r'
-                str += '\r';
+                strBuf.push('\r');
                break;
              case 0x74: // 't'
-                str += '\t';
+                strBuf.push('\t');
                break;
              case 0x62: // 'b'
-                str += '\b';
+                strBuf.push('\b');
                break;
              case 0x66: // 'f'
-                str += '\f';
+                strBuf.push('\f');
                break;
              case 0x5C: // '\'
              case 0x28: // '('
              case 0x29: // ')'
-                str += String.fromCharCode(ch);
+                strBuf.push(String.fromCharCode(ch));
                break;
              case 0x30: case 0x31: case 0x32: case 0x33: // '0'-'3'
              case 0x34: case 0x35: case 0x36: case 0x37: // '4'-'7'
@ -478,17 +488,17 @@ var Lexer = (function LexerClosure() {
				@@ -478,17 +488,17 @@ var Lexer = (function LexerClosure() {
                  }
                }

-                str += String.fromCharCode(x);
+                strBuf.push(String.fromCharCode(x));
                break;
              case 0x0A: case 0x0D: // LF, CR
                break;
              default:
-                str += String.fromCharCode(ch);
+                strBuf.push(String.fromCharCode(ch));
                break;
            }
            break;
          default:
-            str += String.fromCharCode(ch);
+            strBuf.push(String.fromCharCode(ch));
            break;
        }
        if (done) {
@ -498,10 +508,12 @@ var Lexer = (function LexerClosure() {
				@@ -498,10 +508,12 @@ var Lexer = (function LexerClosure() {
          ch = this.nextChar();
        }
      }
-      return str;
+      return strBuf.join('');
    },
    getName: function Lexer_getName() {
-      var str = '', ch;
+      var ch;
+      var strBuf = this.strBuf;
+      strBuf.length = 0;
      while ((ch = this.nextChar()) >= 0 && !specialChars[ch]) {
        if (ch === 0x23) { // '#'
          ch = this.nextChar();
@ -510,23 +522,23 @@ var Lexer = (function LexerClosure() {
				@@ -510,23 +522,23 @@ var Lexer = (function LexerClosure() {
            var x2 = toHexDigit(this.nextChar());
            if (x2 == -1)
              error('Illegal digit in hex char in name: ' + x2);
-            str += String.fromCharCode((x << 4) | x2);
+            strBuf.push(String.fromCharCode((x << 4) | x2));
          } else {
-            str += '#';
-            str += String.fromCharCode(ch);
+            strBuf.push('#', String.fromCharCode(ch));
          }
        } else {
-          str += String.fromCharCode(ch);
+          strBuf.push(String.fromCharCode(ch));
        }
      }
-      if (str.length > 128) {
+      if (strBuf.length > 128) {
        error('Warning: name token is longer than allowed by the spec: ' +
-              str.length);
+              strBuf.length);
      }
-      return new Name(str);
+      return new Name(strBuf.join(''));
    },
    getHexString: function Lexer_getHexString() {
-      var str = '';
+      var strBuf = this.strBuf;
+      strBuf.length = 0;
      var ch = this.currentChar;
      var isFirstHex = true;
      var firstDigit;
@ -556,13 +568,13 @@ var Lexer = (function LexerClosure() {
				@@ -556,13 +568,13 @@ var Lexer = (function LexerClosure() {
              ch = this.nextChar();
              continue;
            }
-            str += String.fromCharCode((firstDigit << 4) | secondDigit);
+            strBuf.push(String.fromCharCode((firstDigit << 4) | secondDigit));
          }
          isFirstHex = !isFirstHex;
          ch = this.nextChar();
        }
      }
-      return str;
+      return strBuf.join('');
    },
    getObj: function Lexer_getObj() {
      // skip whitespace and comments