Browse Source

Rewrite Lexer_getNumber.

Now, it computes the numbers with only basic arithmetic operations, without first creating a string and then calling parseFloat.
The new function doesn't behave exactly the same as the old one.
In particular, the old behaviour was that when there was a number immediatly followed by an 'E', the 'E' was consumed. Now it's not. It allows for "glued" numbers and operators.
Also, the new function is faster and consumes less memory.
Ophir LOJKINE 11 years ago
parent
commit
4a66eccedc
  1. 90
      src/core/parser.js
  2. 24
      test/unit/parser_spec.js

90
src/core/parser.js

@ -393,47 +393,85 @@ var Lexer = (function LexerClosure() {
nextChar: function Lexer_nextChar() { nextChar: function Lexer_nextChar() {
return (this.currentChar = this.stream.getByte()); return (this.currentChar = this.stream.getByte());
}, },
peekChar: function Lexer_peekChar() {
return this.stream.peekBytes(1)[0];
},
getNumber: function Lexer_getNumber() { getNumber: function Lexer_getNumber() {
var floating = false;
var ch = this.currentChar; var ch = this.currentChar;
var allDigits = ch >= 0x30 && ch <= 0x39; var eNotation = false;
var strBuf = this.strBuf; var divideBy = 0; // different from 0 if it's a floating point value
strBuf.length = 0;
strBuf.push(String.fromCharCode(ch)); var sign = 1;
if (ch === 0x2D) { // '-'
sign = -1;
ch = this.nextChar();
} else if (ch === 0x2B) { // '+'
ch = this.nextChar();
}
if (ch === 0x2E) { // '.'
divideBy = 10;
ch = this.nextChar();
}
if (ch < 0x30 || ch > 0x39) { // '0' - '9'
error('Invalid number: ' + String.fromCharCode(ch));
return 0;
}
var baseValue = ch - 0x30; // '0'
var powerValue = 0;
var powerValueSign = 1;
while ((ch = this.nextChar()) >= 0) { while ((ch = this.nextChar()) >= 0) {
if (ch >= 0x30 && ch <= 0x39) { // '0'-'9' if (0x30 <= ch && ch <= 0x39) { // '0' - '9'
strBuf.push(String.fromCharCode(ch)); var currentDigit = ch - 0x30; // '0'
} else if (ch === 0x2E && !floating) { // '.' if (eNotation) { // We are after an 'e' or 'E'
strBuf.push('.'); powerValue = powerValue * 10 + currentDigit;
floating = true; } else {
allDigits = false; if (divideBy !== 0) { // We are after a point
divideBy *= 10;
}
baseValue = baseValue * 10 + currentDigit;
}
} else if (ch === 0x2E) { // '.'
if (divideBy === 0) {
divideBy = 1;
} else {
// A number can have only one '.'
break;
}
} else if (ch === 0x2D) { // '-' } else if (ch === 0x2D) { // '-'
// ignore minus signs in the middle of numbers to match // ignore minus signs in the middle of numbers to match
// Adobe's behavior // Adobe's behavior
warn('Badly formated number'); warn('Badly formated number');
allDigits = false;
} else if (ch === 0x45 || ch === 0x65) { // 'E', 'e' } else if (ch === 0x45 || ch === 0x65) { // 'E', 'e'
floating = true; // 'E' can be either a scientific notation or the beginning of a new
allDigits = false; // operator
var hasE = true;
ch = this.peekChar();
if (ch === 0x2B || ch === 0x2D) { // '+', '-'
powerValueSign = (ch === 0x2D) ? -1 : 1;
this.nextChar(); // Consume the sign character
} else if (ch < 0x30 || ch > 0x39) { // '0' - '9'
// The 'E' must be the beginning of a new operator
break;
}
eNotation = true;
} else { } else {
// the last character doesn't belong to us // the last character doesn't belong to us
break; break;
} }
} }
var value;
if (allDigits) { if (divideBy !== 0) {
value = 0; baseValue /= divideBy;
var charCodeOfZero = 48; // '0'
for (var i = 0, ii = strBuf.length; i < ii; i++) {
value = value * 10 + (strBuf[i].charCodeAt(0) - charCodeOfZero);
}
} else {
value = parseFloat(strBuf.join(''));
if (isNaN(value)) {
error('Invalid floating point number: ' + value);
} }
if (eNotation) {
baseValue *= Math.pow(10, powerValueSign * powerValue);
} }
return value; return sign * baseValue;
}, },
getString: function Lexer_getString() { getString: function Lexer_getString() {
var numParen = 1; var numParen = 1;

24
test/unit/parser_spec.js

@ -14,6 +14,30 @@ describe('parser', function() {
expect(result).toEqual(11.234); expect(result).toEqual(11.234);
}); });
it('should parse PostScript numbers', function() {
var numbers = ['-.002', '34.5', '-3.62', '123.6e10', '1E-5', '-1.', '0.0',
'123', '-98', '43445', '0', '+17'];
for (var i=0, ii=numbers.length; i<ii; i++) {
var num = numbers[i];
var input = new StringStream(num);
var lexer = new Lexer(input);
var result = lexer.getNumber();
expect(result).toEqual(parseFloat(num));
}
});
it('should handle glued numbers and operators', function() {
var input = new StringStream('123ET');
var lexer = new Lexer(input);
var value = lexer.getNumber();
expect(value).toEqual(123);
// The lexer must not have consumed the 'E'
expect(lexer.currentChar).toEqual(0x45); // 'E'
});
it('should stop parsing strings at the end of stream', function() { it('should stop parsing strings at the end of stream', function() {
var input = new StringStream('(1$4)'); var input = new StringStream('(1$4)');
input.getByte = function(super_getByte) { input.getByte = function(super_getByte) {

Loading…
Cancel
Save