Browse Source

Merge pull request #5111 from nnethercote/better-cidchars

Represent cid chars using integers, not strings.
Yury Delendik 11 years ago
parent
commit
6865c284a7
  1. 54
      src/core/cmap.js
  2. 15
      src/core/fonts.js
  3. 6
      test/unit/cmap_spec.js

54
src/core/cmap.js

@ -199,6 +199,10 @@ var CMap = (function CMapClosure() {
// where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...] // where nBytePairs are ranges e.g. [low1, high1, low2, high2, ...]
this.codespaceRanges = [[], [], [], []]; this.codespaceRanges = [[], [], [], []];
this.numCodespaceRanges = 0; this.numCodespaceRanges = 0;
// Map entries have one of two forms.
// - cid chars are 16-bit unsigned integers, stored as integers.
// - bf chars are variable-length byte sequences, stored as strings, with
// one byte per character.
this._map = []; this._map = [];
this.vertical = false; this.vertical = false;
this.useCMap = null; this.useCMap = null;
@ -210,18 +214,23 @@ var CMap = (function CMapClosure() {
this.numCodespaceRanges++; this.numCodespaceRanges++;
}, },
mapRange: function(low, high, dstLow) { mapCidRange: function(low, high, dstLow) {
while (low <= high) {
this._map[low++] = dstLow++;
}
},
mapBfRange: function(low, high, dstLow) {
var lastByte = dstLow.length - 1; var lastByte = dstLow.length - 1;
while (low <= high) { while (low <= high) {
this._map[low] = dstLow; this._map[low++] = dstLow;
// Only the last byte has to be incremented. // Only the last byte has to be incremented.
dstLow = dstLow.substr(0, lastByte) + dstLow = dstLow.substr(0, lastByte) +
String.fromCharCode(dstLow.charCodeAt(lastByte) + 1); String.fromCharCode(dstLow.charCodeAt(lastByte) + 1);
++low;
} }
}, },
mapRangeToArray: function(low, high, array) { mapBfRangeToArray: function(low, high, array) {
var i = 0, ii = array.length; var i = 0, ii = array.length;
while (low <= high && i < ii) { while (low <= high && i < ii) {
this._map[low] = array[i++]; this._map[low] = array[i++];
@ -229,6 +238,7 @@ var CMap = (function CMapClosure() {
} }
}, },
// This is used for both bf and cid chars.
mapOne: function(src, dst) { mapOne: function(src, dst) {
this._map[src] = dst; this._map[src] = dst;
}, },
@ -302,7 +312,7 @@ var IdentityCMap = (function IdentityCMapClosure() {
CMap.call(this); CMap.call(this);
this.vertical = vertical; this.vertical = vertical;
this.addCodespaceRange(n, 0, 0xffff); this.addCodespaceRange(n, 0, 0xffff);
this.mapRange(0, 0xffff, '\u0000'); this.mapCidRange(0, 0xffff, 0);
} }
Util.inherit(IdentityCMap, CMap, {}); Util.inherit(IdentityCMap, CMap, {});
@ -522,7 +532,7 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
case 2: // cidchar case 2: // cidchar
stream.readHex(char, dataSize); stream.readHex(char, dataSize);
code = stream.readNumber(); code = stream.readNumber();
cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code)); cMap.mapOne(hexToInt(char, dataSize), code);
for (i = 1; i < subitemsCount; i++) { for (i = 1; i < subitemsCount; i++) {
incHex(char, dataSize); incHex(char, dataSize);
if (!sequence) { if (!sequence) {
@ -530,7 +540,7 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
addHex(char, tmp, dataSize); addHex(char, tmp, dataSize);
} }
code = stream.readSigned() + (code + 1); code = stream.readSigned() + (code + 1);
cMap.mapOne(hexToInt(char, dataSize), String.fromCharCode(code)); cMap.mapOne(hexToInt(char, dataSize), code);
} }
break; break;
case 3: // cidrange case 3: // cidrange
@ -538,8 +548,8 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
stream.readHexNumber(end, dataSize); stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize); addHex(end, start, dataSize);
code = stream.readNumber(); code = stream.readNumber();
cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize), cMap.mapCidRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
String.fromCharCode(code)); code);
for (i = 1; i < subitemsCount; i++) { for (i = 1; i < subitemsCount; i++) {
incHex(end, dataSize); incHex(end, dataSize);
if (!sequence) { if (!sequence) {
@ -551,8 +561,8 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
stream.readHexNumber(end, dataSize); stream.readHexNumber(end, dataSize);
addHex(end, start, dataSize); addHex(end, start, dataSize);
code = stream.readNumber(); code = stream.readNumber();
cMap.mapRange(hexToInt(start, dataSize), hexToInt(end, dataSize), cMap.mapCidRange(hexToInt(start, dataSize), hexToInt(end, dataSize),
String.fromCharCode(code)); code);
} }
break; break;
case 4: // bfchar case 4: // bfchar
@ -578,9 +588,9 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
stream.readHexNumber(end, ucs2DataSize); stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize); addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize); stream.readHex(charCode, dataSize);
cMap.mapRange(hexToInt(start, ucs2DataSize), cMap.mapBfRange(hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize), hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize)); hexToStr(charCode, dataSize));
for (i = 1; i < subitemsCount; i++) { for (i = 1; i < subitemsCount; i++) {
incHex(end, ucs2DataSize); incHex(end, ucs2DataSize);
if (!sequence) { if (!sequence) {
@ -592,9 +602,9 @@ var BinaryCMapReader = (function BinaryCMapReaderClosure() {
stream.readHexNumber(end, ucs2DataSize); stream.readHexNumber(end, ucs2DataSize);
addHex(end, start, ucs2DataSize); addHex(end, start, ucs2DataSize);
stream.readHex(charCode, dataSize); stream.readHex(charCode, dataSize);
cMap.mapRange(hexToInt(start, ucs2DataSize), cMap.mapBfRange(hexToInt(start, ucs2DataSize),
hexToInt(end, ucs2DataSize), hexToInt(end, ucs2DataSize),
hexToStr(charCode, dataSize)); hexToStr(charCode, dataSize));
} }
break; break;
default: default:
@ -675,7 +685,7 @@ var CMapFactory = (function CMapFactoryClosure() {
obj = lexer.getObj(); obj = lexer.getObj();
if (isInt(obj) || isString(obj)) { if (isInt(obj) || isString(obj)) {
var dstLow = isInt(obj) ? String.fromCharCode(obj) : obj; var dstLow = isInt(obj) ? String.fromCharCode(obj) : obj;
cMap.mapRange(low, high, dstLow); cMap.mapBfRange(low, high, dstLow);
} else if (isCmd(obj, '[')) { } else if (isCmd(obj, '[')) {
obj = lexer.getObj(); obj = lexer.getObj();
var array = []; var array = [];
@ -683,7 +693,7 @@ var CMapFactory = (function CMapFactoryClosure() {
array.push(obj); array.push(obj);
obj = lexer.getObj(); obj = lexer.getObj();
} }
cMap.mapRangeToArray(low, high, array); cMap.mapBfRangeToArray(low, high, array);
} else { } else {
break; break;
} }
@ -704,7 +714,7 @@ var CMapFactory = (function CMapFactoryClosure() {
var src = strToInt(obj); var src = strToInt(obj);
obj = lexer.getObj(); obj = lexer.getObj();
expectInt(obj); expectInt(obj);
var dst = String.fromCharCode(obj); var dst = obj;
cMap.mapOne(src, dst); cMap.mapOne(src, dst);
} }
} }
@ -725,8 +735,8 @@ var CMapFactory = (function CMapFactoryClosure() {
var high = strToInt(obj); var high = strToInt(obj);
obj = lexer.getObj(); obj = lexer.getObj();
expectInt(obj); expectInt(obj);
var dstLow = String.fromCharCode(obj); var dstLow = obj;
cMap.mapRange(low, high, dstLow); cMap.mapCidRange(low, high, dstLow);
} }
} }

15
src/core/fonts.js

@ -3899,8 +3899,7 @@ var Font = (function FontClosure() {
var cidToGidMap = properties.cidToGidMap || []; var cidToGidMap = properties.cidToGidMap || [];
var cidToGidMapLength = cidToGidMap.length; var cidToGidMapLength = cidToGidMap.length;
properties.cMap.forEach(function(charCode, cid) { properties.cMap.forEach(function(charCode, cid) {
assert(cid.length === 1, 'Max size of CID is 65,535'); assert(cid <= 0xffff, 'Max size of CID is 65,535');
cid = cid.charCodeAt(0);
var glyphId = -1; var glyphId = -1;
if (cidToGidMapLength === 0) { if (cidToGidMapLength === 0) {
glyphId = charCode; glyphId = charCode;
@ -4370,10 +4369,10 @@ var Font = (function FontClosure() {
var cMap = properties.cMap; var cMap = properties.cMap;
toUnicode = []; toUnicode = [];
cMap.forEach(function(charcode, cid) { cMap.forEach(function(charcode, cid) {
assert(cid.length === 1, 'Max size of CID is 65,535'); assert(cid <= 0xffff, 'Max size of CID is 65,535');
// e) Map the CID obtained in step (a) according to the CMap obtained // e) Map the CID obtained in step (a) according to the CMap obtained
// in step (d), producing a Unicode value. // in step (d), producing a Unicode value.
var ucs2 = ucs2CMap.lookup(cid.charCodeAt(0)); var ucs2 = ucs2CMap.lookup(cid);
if (ucs2) { if (ucs2) {
toUnicode[charcode] = toUnicode[charcode] =
String.fromCharCode((ucs2.charCodeAt(0) << 8) + String.fromCharCode((ucs2.charCodeAt(0) << 8) +
@ -4415,7 +4414,7 @@ var Font = (function FontClosure() {
var charcode = 0; var charcode = 0;
if (this.composite) { if (this.composite) {
if (this.cMap.contains(glyphUnicode)) { if (this.cMap.contains(glyphUnicode)) {
charcode = this.cMap.lookup(glyphUnicode).charCodeAt(0); charcode = this.cMap.lookup(glyphUnicode);
} }
} }
// ... via toUnicode map // ... via toUnicode map
@ -4444,7 +4443,7 @@ var Font = (function FontClosure() {
var widthCode = charcode; var widthCode = charcode;
if (this.cMap && this.cMap.contains(charcode)) { if (this.cMap && this.cMap.contains(charcode)) {
widthCode = this.cMap.lookup(charcode).charCodeAt(0); widthCode = this.cMap.lookup(charcode);
} }
width = this.widths[widthCode]; width = this.widths[widthCode];
width = isNum(width) ? width : this.defaultWidth; width = isNum(width) ? width : this.defaultWidth;
@ -5626,8 +5625,8 @@ var CFFFont = (function CFFFontClosure() {
// If the font is actually a CID font then we should use the charset // If the font is actually a CID font then we should use the charset
// to map CIDs to GIDs. // to map CIDs to GIDs.
for (glyphId = 0; glyphId < charsets.length; glyphId++) { for (glyphId = 0; glyphId < charsets.length; glyphId++) {
var cidString = String.fromCharCode(charsets[glyphId]); var cid = charsets[glyphId];
var charCode = properties.cMap.charCodeOf(cidString); var charCode = properties.cMap.charCodeOf(cid);
charCodeToGlyphId[charCode] = glyphId; charCodeToGlyphId[charCode] = glyphId;
} }
} else { } else {

6
test/unit/cmap_spec.js

@ -44,7 +44,7 @@ describe('cmap', function() {
'endcidchar\n'; 'endcidchar\n';
var stream = new StringStream(str); var stream = new StringStream(str);
var cmap = CMapFactory.create(stream); var cmap = CMapFactory.create(stream);
expect(cmap.lookup(0x14)).toEqual(String.fromCharCode(0x00)); expect(cmap.lookup(0x14)).toEqual(0x00);
expect(cmap.lookup(0x15)).toBeUndefined(); expect(cmap.lookup(0x15)).toBeUndefined();
}); });
it('parses begincidrange', function() { it('parses begincidrange', function() {
@ -54,8 +54,8 @@ describe('cmap', function() {
var stream = new StringStream(str); var stream = new StringStream(str);
var cmap = CMapFactory.create(stream); var cmap = CMapFactory.create(stream);
expect(cmap.lookup(0x15)).toBeUndefined(); expect(cmap.lookup(0x15)).toBeUndefined();
expect(cmap.lookup(0x16)).toEqual(String.fromCharCode(0x00)); expect(cmap.lookup(0x16)).toEqual(0x00);
expect(cmap.lookup(0x1B)).toEqual(String.fromCharCode(0x05)); expect(cmap.lookup(0x1B)).toEqual(0x05);
expect(cmap.lookup(0x1C)).toBeUndefined(); expect(cmap.lookup(0x1C)).toBeUndefined();
}); });
it('decodes codespace ranges', function() { it('decodes codespace ranges', function() {

Loading…
Cancel
Save