Pure Javascript OCR for more than 100 Languages 📖🎉🖥
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

1138 lines
25 KiB

<script type="text/javascript">
var setlang = (function(){
var crc32 = (function() {
var table = [],
poly = 0xEDB88320; // reverse polynomial
// build the table
function makeTable() {
var c, n, k;
for (n = 0; n < 256; n += 1) {
c = n;
for (k = 0; k < 8; k += 1) {
if (c & 1) {
c = poly ^ (c >>> 1);
} else {
c = c >>> 1;
}
}
table[n] = c >>> 0;
}
}
function strToArr(str) {
// sweet hack to turn string into a 'byte' array
return Array.prototype.map.call(str, function (c) {
return c.charCodeAt(0);
});
}
/*
* Compute CRC of array directly.
*
* This is slower for repeated calls, so append mode is not supported.
*/
function crcDirect(arr) {
var crc = -1, // initial contents of LFBSR
i, j, l, temp;
for (i = 0, l = arr.length; i < l; i += 1) {
temp = (crc ^ arr[i]) & 0xff;
// read 8 bits one at a time
for (j = 0; j < 8; j += 1) {
if ((temp & 1) === 1) {
temp = (temp >>> 1) ^ poly;
} else {
temp = (temp >>> 1);
}
}
crc = (crc >>> 8) ^ temp;
}
// flip bits
return crc ^ -1;
}
/*
* Compute CRC with the help of a pre-calculated table.
*
* This supports append mode, if the second parameter is set.
*/
function crcTable(arr, append) {
var crc, i, l;
// if we're in append mode, don't reset crc
// if arr is null or undefined, reset table and return
if (typeof crcTable.crc === 'undefined' || !append || !arr) {
crcTable.crc = 0 ^ -1;
if (!arr) {
return;
}
}
// store in temp variable for minor speed gain
crc = crcTable.crc;
for (i = 0, l = arr.length; i < l; i += 1) {
crc = (crc >>> 8) ^ table[(crc ^ arr[i]) & 0xff];
}
crcTable.crc = crc;
return crc ^ -1;
}
// build the table
// this isn't that costly, and most uses will be for table assisted mode
makeTable();
var exports = function (val, direct) {
var val = (typeof val === 'string') ? strToArr(val) : val,
ret = direct ? crcDirect(val) : crcTable(val);
// convert to 2's complement hex
return (ret >>> 0).toString(16);
};
exports.direct = crcDirect;
exports.table = crcTable;
return exports;
})()
/*
* $Id: rawinflate.js,v 0.2 2009/03/01 18:32:24 dankogai Exp $
*
* original:
* http://www.onicos.com/staff/iz/amuse/javascript/expert/inflate.txt
*/
/* Copyright (C) 1999 Masanao Izumo <iz@onicos.co.jp>
* Version: 1.0.0.1
* LastModified: Dec 25 1999
*/
/* Interface:
* data = inflate(src);
*/
var inflate = (function () {
/* constant parameters */
var WSIZE = 32768, // Sliding Window size
STORED_BLOCK = 0,
STATIC_TREES = 1,
DYN_TREES = 2,
/* for inflate */
lbits = 9, // bits in base literal/length lookup table
dbits = 6, // bits in base distance lookup table
/* variables (inflate) */
slide,
wp, // current position in slide
fixed_tl = null, // inflate static
fixed_td, // inflate static
fixed_bl, // inflate static
fixed_bd, // inflate static
bit_buf, // bit buffer
bit_len, // bits in bit buffer
method,
eof,
copy_leng,
copy_dist,
tl, // literal length decoder table
td, // literal distance decoder table
bl, // number of bits decoded by tl
bd, // number of bits decoded by td
inflate_data,
inflate_pos,
/* constant tables (inflate) */
MASK_BITS = [
0x0000,
0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
],
// Tables for deflate from PKZIP's appnote.txt.
// Copy lengths for literal codes 257..285
cplens = [
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0
],
/* note: see note #13 above about the 258 in this list. */
// Extra bits for literal codes 257..285
cplext = [
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99 // 99==invalid
],
// Copy offsets for distance codes 0..29
cpdist = [
1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
8193, 12289, 16385, 24577
],
// Extra bits for distance codes
cpdext = [
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
12, 12, 13, 13
],
// Order of the bit length code lengths
border = [
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
];
/* objects (inflate) */
function HuftList() {
this.next = null;
this.list = null;
}
function HuftNode() {
this.e = 0; // number of extra bits or operation
this.b = 0; // number of bits in this code or subcode
// union
this.n = 0; // literal, length base, or distance base
this.t = null; // (HuftNode) pointer to next level of table
}
/*
* @param b- code lengths in bits (all assumed <= BMAX)
* @param n- number of codes (assumed <= N_MAX)
* @param s- number of simple-valued codes (0..s-1)
* @param d- list of base values for non-simple codes
* @param e- list of extra bits for non-simple codes
* @param mm- maximum lookup bits
*/
function HuftBuild(b, n, s, d, e, mm) {
this.BMAX = 16; // maximum bit length of any code
this.N_MAX = 288; // maximum number of codes in any set
this.status = 0; // 0: success, 1: incomplete table, 2: bad input
this.root = null; // (HuftList) starting table
this.m = 0; // maximum lookup bits, returns actual
/* Given a list of code lengths and a maximum table size, make a set of
tables to decode that set of codes. Return zero on success, one if
the given code set is incomplete (the tables are still built in this
case), two if the input is invalid (all zero length codes or an
oversubscribed set of lengths), and three if not enough memory.
The code with value 256 is special, and the tables are constructed
so that no bits beyond that code are fetched when that code is
decoded. */
var a; // counter for codes of length k
var c = [];
var el; // length of EOB code (value 256)
var f; // i repeats in table every f entries
var g; // maximum code length
var h; // table level
var i; // counter, current code
var j; // counter
var k; // number of bits in current code
var lx = [];
var p; // pointer into c[], b[], or v[]
var pidx; // index of p
var q; // (HuftNode) points to current table
var r = new HuftNode(); // table entry for structure assignment
var u = [];
var v = [];
var w;
var x = [];
var xp; // pointer into x or c
var y; // number of dummy codes added
var z; // number of entries in current table
var o;
var tail; // (HuftList)
tail = this.root = null;
// bit length count table
for (i = 0; i < this.BMAX + 1; i++) {
c[i] = 0;
}
// stack of bits per table
for (i = 0; i < this.BMAX + 1; i++) {
lx[i] = 0;
}
// HuftNode[BMAX][] table stack
for (i = 0; i < this.BMAX; i++) {
u[i] = null;
}
// values in order of bit length
for (i = 0; i < this.N_MAX; i++) {
v[i] = 0;
}
// bit offsets, then code stack
for (i = 0; i < this.BMAX + 1; i++) {
x[i] = 0;
}
// Generate counts for each bit length
el = n > 256 ? b[256] : this.BMAX; // set length of EOB code, if any
p = b; pidx = 0;
i = n;
do {
c[p[pidx]]++; // assume all entries <= BMAX
pidx++;
} while (--i > 0);
if (c[0] === n) { // null input--all zero length codes
this.root = null;
this.m = 0;
this.status = 0;
return;
}
// Find minimum and maximum length, bound *m by those
for (j = 1; j <= this.BMAX; j++) {
if (c[j] !== 0) {
break;
}
}
k = j; // minimum code length
if (mm < j) {
mm = j;
}
for (i = this.BMAX; i !== 0; i--) {
if (c[i] !== 0) {
break;
}
}
g = i; // maximum code length
if (mm > i) {
mm = i;
}
// Adjust last length count to fill out codes, if needed
for (y = 1 << j; j < i; j++, y <<= 1) {
if ((y -= c[j]) < 0) {
this.status = 2; // bad input: more codes than bits
this.m = mm;
return;
}
}
if ((y -= c[i]) < 0) {
this.status = 2;
this.m = mm;
return;
}
c[i] += y;
// Generate starting offsets into the value table for each length
x[1] = j = 0;
p = c;
pidx = 1;
xp = 2;
while (--i > 0) { // note that i == g from above
x[xp++] = (j += p[pidx++]);
}
// Make a table of values in order of bit lengths
p = b; pidx = 0;
i = 0;
do {
if ((j = p[pidx++]) !== 0) {
v[x[j]++] = i;
}
} while (++i < n);
n = x[g]; // set n to length of v
// Generate the Huffman codes and for each, make the table entries
x[0] = i = 0; // first Huffman code is zero
p = v; pidx = 0; // grab values in bit order
h = -1; // no tables yet--level -1
w = lx[0] = 0; // no bits decoded yet
q = null; // ditto
z = 0; // ditto
// go through the bit lengths (k already is bits in shortest code)
for (null; k <= g; k++) {
a = c[k];
while (a-- > 0) {
// here i is the Huffman code of length k bits for value p[pidx]
// make tables up to required level
while (k > w + lx[1 + h]) {
w += lx[1 + h]; // add bits already decoded
h++;
// compute minimum size table less than or equal to *m bits
z = (z = g - w) > mm ? mm : z; // upper limit
if ((f = 1 << (j = k - w)) > a + 1) { // try a k-w bit table
// too few codes for k-w bit table
f -= a + 1; // deduct codes from patterns left
xp = k;
while (++j < z) { // try smaller tables up to z bits
if ((f <<= 1) <= c[++xp]) {
break; // enough codes to use up j bits
}
f -= c[xp]; // else deduct codes from patterns
}
}
if (w + j > el && w < el) {
j = el - w; // make EOB code end at table
}
z = 1 << j; // table entries for j-bit table
lx[1 + h] = j; // set table size in stack
// allocate and link in new table
q = [];
for (o = 0; o < z; o++) {
q[o] = new HuftNode();
}
if (!tail) {
tail = this.root = new HuftList();
} else {
tail = tail.next = new HuftList();
}
tail.next = null;
tail.list = q;
u[h] = q; // table starts after link
/* connect to last table, if there is one */
if (h > 0) {
x[h] = i; // save pattern for backing up
r.b = lx[h]; // bits to dump before this table
r.e = 16 + j; // bits in this table
r.t = q; // pointer to this table
j = (i & ((1 << w) - 1)) >> (w - lx[h]);
u[h - 1][j].e = r.e;
u[h - 1][j].b = r.b;
u[h - 1][j].n = r.n;
u[h - 1][j].t = r.t;
}
}
// set up table entry in r
r.b = k - w;
if (pidx >= n) {
r.e = 99; // out of values--invalid code
} else if (p[pidx] < s) {
r.e = (p[pidx] < 256 ? 16 : 15); // 256 is end-of-block code
r.n = p[pidx++]; // simple code is just the value
} else {
r.e = e[p[pidx] - s]; // non-simple--look up in lists
r.n = d[p[pidx++] - s];
}
// fill code-like entries with r //
f = 1 << (k - w);
for (j = i >> w; j < z; j += f) {
q[j].e = r.e;
q[j].b = r.b;
q[j].n = r.n;
q[j].t = r.t;
}
// backwards increment the k-bit code i
for (j = 1 << (k - 1); (i & j) !== 0; j >>= 1) {
i ^= j;
}
i ^= j;
// backup over finished tables
while ((i & ((1 << w) - 1)) !== x[h]) {
w -= lx[h]; // don't need to update q
h--;
}
}
}
/* return actual size of base table */
this.m = lx[1];
/* Return true (1) if we were given an incomplete table */
this.status = ((y !== 0 && g !== 1) ? 1 : 0);
}
/* routines (inflate) */
function GET_BYTE() {
if (inflate_data.length === inflate_pos) {
return -1;
}
return inflate_data[inflate_pos++] & 0xff;
}
function NEEDBITS(n) {
while (bit_len < n) {
bit_buf |= GET_BYTE() << bit_len;
bit_len += 8;
}
}
function GETBITS(n) {
return bit_buf & MASK_BITS[n];
}
function DUMPBITS(n) {
bit_buf >>= n;
bit_len -= n;
}
function inflate_codes(buff, off, size) {
// inflate (decompress) the codes in a deflated (compressed) block.
// Return an error code or zero if it all goes ok.
var e; // table entry flag/number of extra bits
var t; // (HuftNode) pointer to table entry
var n;
if (size === 0) {
return 0;
}
// inflate the coded data
n = 0;
for (;;) { // do until end of block
NEEDBITS(bl);
t = tl.list[GETBITS(bl)];
e = t.e;
while (e > 16) {
if (e === 99) {
return -1;
}
DUMPBITS(t.b);
e -= 16;
NEEDBITS(e);
t = t.t[GETBITS(e)];
e = t.e;
}
DUMPBITS(t.b);
if (e === 16) { // then it's a literal
wp &= WSIZE - 1;
buff[off + n++] = slide[wp++] = t.n;
if (n === size) {
return size;
}
continue;
}
// exit if end of block
if (e === 15) {
break;
}
// it's an EOB or a length
// get length of block to copy
NEEDBITS(e);
copy_leng = t.n + GETBITS(e);
DUMPBITS(e);
// decode distance of block to copy
NEEDBITS(bd);
t = td.list[GETBITS(bd)];
e = t.e;
while (e > 16) {
if (e === 99) {
return -1;
}
DUMPBITS(t.b);
e -= 16;
NEEDBITS(e);
t = t.t[GETBITS(e)];
e = t.e;
}
DUMPBITS(t.b);
NEEDBITS(e);
copy_dist = wp - t.n - GETBITS(e);
DUMPBITS(e);
// do the copy
while (copy_leng > 0 && n < size) {
copy_leng--;
copy_dist &= WSIZE - 1;
wp &= WSIZE - 1;
buff[off + n++] = slide[wp++] = slide[copy_dist++];
}
if (n === size) {
return size;
}
}
method = -1; // done
return n;
}
function inflate_stored(buff, off, size) {
/* "decompress" an inflated type 0 (stored) block. */
var n;
// go to byte boundary
n = bit_len & 7;
DUMPBITS(n);
// get the length and its complement
NEEDBITS(16);
n = GETBITS(16);
DUMPBITS(16);
NEEDBITS(16);
if (n !== ((~bit_buf) & 0xffff)) {
return -1; // error in compressed data
}
DUMPBITS(16);
// read and output the compressed data
copy_leng = n;
n = 0;
while (copy_leng > 0 && n < size) {
copy_leng--;
wp &= WSIZE - 1;
NEEDBITS(8);
buff[off + n++] = slide[wp++] = GETBITS(8);
DUMPBITS(8);
}
if (copy_leng === 0) {
method = -1; // done
}
return n;
}
function inflate_fixed(buff, off, size) {
// decompress an inflated type 1 (fixed Huffman codes) block. We should
// either replace this with a custom decoder, or at least precompute the
// Huffman tables.
// if first time, set up tables for fixed blocks
if (!fixed_tl) {
var i; // temporary variable
var l = []; // 288 length list for huft_build (initialized below)
var h; // HuftBuild
// literal table
for (i = 0; i < 144; i++) {
l[i] = 8;
}
for (null; i < 256; i++) {
l[i] = 9;
}
for (null; i < 280; i++) {
l[i] = 7;
}
for (null; i < 288; i++) { // make a complete, but wrong code set
l[i] = 8;
}
fixed_bl = 7;
h = new HuftBuild(l, 288, 257, cplens, cplext, fixed_bl);
if (h.status !== 0) {
console.error("HufBuild error: " + h.status);
return -1;
}
fixed_tl = h.root;
fixed_bl = h.m;
// distance table
for (i = 0; i < 30; i++) { // make an incomplete code set
l[i] = 5;
}
fixed_bd = 5;
h = new HuftBuild(l, 30, 0, cpdist, cpdext, fixed_bd);
if (h.status > 1) {
fixed_tl = null;
console.error("HufBuild error: " + h.status);
return -1;
}
fixed_td = h.root;
fixed_bd = h.m;
}
tl = fixed_tl;
td = fixed_td;
bl = fixed_bl;
bd = fixed_bd;
return inflate_codes(buff, off, size);
}
function inflate_dynamic(buff, off, size) {
// decompress an inflated type 2 (dynamic Huffman codes) block.
var i; // temporary variables
var j;
var l; // last length
var n; // number of lengths to get
var t; // (HuftNode) literal/length code table
var nb; // number of bit length codes
var nl; // number of literal/length codes
var nd; // number of distance codes
var ll = [];
var h; // (HuftBuild)
// literal/length and distance code lengths
for (i = 0; i < 286 + 30; i++) {
ll[i] = 0;
}
// read in table lengths
NEEDBITS(5);
nl = 257 + GETBITS(5); // number of literal/length codes
DUMPBITS(5);
NEEDBITS(5);
nd = 1 + GETBITS(5); // number of distance codes
DUMPBITS(5);
NEEDBITS(4);
nb = 4 + GETBITS(4); // number of bit length codes
DUMPBITS(4);
if (nl > 286 || nd > 30) {
return -1; // bad lengths
}
// read in bit-length-code lengths
for (j = 0; j < nb; j++) {
NEEDBITS(3);
ll[border[j]] = GETBITS(3);
DUMPBITS(3);
}
for (null; j < 19; j++) {
ll[border[j]] = 0;
}
// build decoding table for trees--single level, 7 bit lookup
bl = 7;
h = new HuftBuild(ll, 19, 19, null, null, bl);
if (h.status !== 0) {
return -1; // incomplete code set
}
tl = h.root;
bl = h.m;
// read in literal and distance code lengths
n = nl + nd;
i = l = 0;
while (i < n) {
NEEDBITS(bl);
t = tl.list[GETBITS(bl)];
j = t.b;
DUMPBITS(j);
j = t.n;
if (j < 16) { // length of code in bits (0..15)
ll[i++] = l = j; // save last length in l
} else if (j === 16) { // repeat last length 3 to 6 times
NEEDBITS(2);
j = 3 + GETBITS(2);
DUMPBITS(2);
if (i + j > n) {
return -1;
}
while (j-- > 0) {
ll[i++] = l;
}
} else if (j === 17) { // 3 to 10 zero length codes
NEEDBITS(3);
j = 3 + GETBITS(3);
DUMPBITS(3);
if (i + j > n) {
return -1;
}
while (j-- > 0) {
ll[i++] = 0;
}
l = 0;
} else { // j === 18: 11 to 138 zero length codes
NEEDBITS(7);
j = 11 + GETBITS(7);
DUMPBITS(7);
if (i + j > n) {
return -1;
}
while (j-- > 0) {
ll[i++] = 0;
}
l = 0;
}
}
// build the decoding tables for literal/length and distance codes
bl = lbits;
h = new HuftBuild(ll, nl, 257, cplens, cplext, bl);
if (bl === 0) { // no literals or lengths
h.status = 1;
}
if (h.status !== 0) {
if (h.status !== 1) {
return -1; // incomplete code set
}
// **incomplete literal tree**
}
tl = h.root;
bl = h.m;
for (i = 0; i < nd; i++) {
ll[i] = ll[i + nl];
}
bd = dbits;
h = new HuftBuild(ll, nd, 0, cpdist, cpdext, bd);
td = h.root;
bd = h.m;
if (bd === 0 && nl > 257) { // lengths but no distances
// **incomplete distance tree**
return -1;
}
/*
if (h.status === 1) {
// **incomplete distance tree**
}
*/
if (h.status !== 0) {
return -1;
}
// decompress until an end-of-block code
return inflate_codes(buff, off, size);
}
function inflate_start() {
if (!slide) {
slide = []; // new Array(2 * WSIZE); // slide.length is never called
}
wp = 0;
bit_buf = 0;
bit_len = 0;
method = -1;
eof = false;
copy_leng = copy_dist = 0;
tl = null;
}
function inflate_internal(buff, off, size) {
// decompress an inflated entry
var n, i;
n = 0;
while (n < size) {
if (eof && method === -1) {
return n;
}
if (copy_leng > 0) {
if (method !== STORED_BLOCK) {
// STATIC_TREES or DYN_TREES
while (copy_leng > 0 && n < size) {
copy_leng--;
copy_dist &= WSIZE - 1;
wp &= WSIZE - 1;
buff[off + n++] = slide[wp++] = slide[copy_dist++];
}
} else {
while (copy_leng > 0 && n < size) {
copy_leng--;
wp &= WSIZE - 1;
NEEDBITS(8);
buff[off + n++] = slide[wp++] = GETBITS(8);
DUMPBITS(8);
}
if (copy_leng === 0) {
method = -1; // done
}
}
if (n === size) {
return n;
}
}
if (method === -1) {
if (eof) {
break;
}
// read in last block bit
NEEDBITS(1);
if (GETBITS(1) !== 0) {
eof = true;
}
DUMPBITS(1);
// read in block type
NEEDBITS(2);
method = GETBITS(2);
DUMPBITS(2);
tl = null;
copy_leng = 0;
}
switch (method) {
case STORED_BLOCK:
i = inflate_stored(buff, off + n, size - n);
break;
case STATIC_TREES:
if (tl) {
i = inflate_codes(buff, off + n, size - n);
} else {
i = inflate_fixed(buff, off + n, size - n);
}
break;
case DYN_TREES:
if (tl) {
i = inflate_codes(buff, off + n, size - n);
} else {
i = inflate_dynamic(buff, off + n, size - n);
}
break;
default: // error
i = -1;
break;
}
if (i === -1) {
if (eof) {
return 0;
}
return -1;
}
n += i;
}
return n;
}
function inflate(arr) {
var buff = [], i;
inflate_start();
inflate_data = arr;
inflate_pos = 0;
do {
i = inflate_internal(buff, buff.length, 1024);
} while (i > 0);
inflate_data = null; // G.C.
return buff;
}
return inflate
}());
// magic numbers marking this file as GZIP
var ID1 = 0x1F,
ID2 = 0x8B,
compressionMethods = {
'deflate': 8
},
possibleFlags = {
'FTEXT': 0x01,
'FHCRC': 0x02,
'FEXTRA': 0x04,
'FNAME': 0x08,
'FCOMMENT': 0x10
},
osMap = {
'fat': 0, // FAT file system (DOS, OS/2, NT) + PKZIPW 2.50 VFAT, NTFS
'amiga': 1, // Amiga
'vmz': 2, // VMS (VAX or Alpha AXP)
'unix': 3, // Unix
'vm/cms': 4, // VM/CMS
'atari': 5, // Atari
'hpfs': 6, // HPFS file system (OS/2, NT 3.x)
'macintosh': 7, // Macintosh
'z-system': 8, // Z-System
'cplm': 9, // CP/M
'tops-20': 10, // TOPS-20
'ntfs': 11, // NTFS file system (NT)
'qdos': 12, // SMS/QDOS
'acorn': 13, // Acorn RISC OS
'vfat': 14, // VFAT file system (Win95, NT)
'vms': 15, // MVS (code also taken for PRIMOS)
'beos': 16, // BeOS (BeBox or PowerMac)
'tandem': 17, // Tandem/NSK
'theos': 18 // THEOS
},
os = 'unix',
DEFAULT_LEVEL = 6;
function putByte(n, arr) {
arr.push(n & 0xFF);
}
// LSB first
function putShort(n, arr) {
arr.push(n & 0xFF);
arr.push(n >>> 8);
}
// LSB first
function putLong(n, arr) {
putShort(n & 0xffff, arr);
putShort(n >>> 16, arr);
}
function putString(s, arr) {
var i, len = s.length;
for (i = 0; i < len; i += 1) {
putByte(s.charCodeAt(i), arr);
}
}
function readByte(arr) {
return arr.shift();
}
function readShort(arr) {
return arr.shift() | (arr.shift() << 8);
}
function readLong(arr) {
var n1 = readShort(arr),
n2 = readShort(arr);
// JavaScript can't handle bits in the position 32
// we'll emulate this by removing the left-most bit (if it exists)
// and add it back in via multiplication, which does work
if (n2 > 32768) {
n2 -= 32768;
return ((n2 << 16) | n1) + 32768 * Math.pow(2, 16);
}
return (n2 << 16) | n1;
}
function readString(arr) {
var charArr = [];
// turn all bytes into chars until the terminating null
while (arr[0] !== 0) {
charArr.push(String.fromCharCode(arr.shift()));
}
// throw away terminating null
arr.shift();
// join all characters into a cohesive string
return charArr.join('');
}
/*
* Reads n number of bytes and return as an array.
*
* @param arr- Array of bytes to read from
* @param n- Number of bytes to read
*/
function readBytes(arr, n) {
var i, ret = [];
for (i = 0; i < n; i += 1) {
ret.push(arr.shift());
}
return ret;
}
function unzip(data, options) {
// start with a copy of the array
var arr = Array.prototype.slice.call(data, 0),
t,
compressionMethod,
flags,
mtime,
xFlags,
key,
os,
crc,
size,
res;
// check the first two bytes for the magic numbers
if (readByte(arr) !== ID1 || readByte(arr) !== ID2) {
throw 'Not a GZIP file';
}
t = readByte(arr);
t = Object.keys(compressionMethods).some(function (key) {
compressionMethod = key;
return compressionMethods[key] === t;
});
if (!t) {
throw 'Unsupported compression method';
}
flags = readByte(arr);
mtime = readLong(arr);
xFlags = readByte(arr);
t = readByte(arr);
Object.keys(osMap).some(function (key) {
if (osMap[key] === t) {
os = key;
return true;
}
});
// just throw away the bytes for now
if (flags & possibleFlags['FEXTRA']) {
t = readShort(arr);
readBytes(arr, t);
}
// just throw away for now
if (flags & possibleFlags['FNAME']) {
readString(arr);
}
// just throw away for now
if (flags & possibleFlags['FCOMMENT']) {
readString(arr);
}
// just throw away for now
if (flags & possibleFlags['FHCRC']) {
readShort(arr);
}
if (compressionMethod === 'deflate') {
// give deflate everything but the last 8 bytes
// the last 8 bytes are for the CRC32 checksum and filesize
res = inflate(arr.splice(0, arr.length - 8));
}
if (flags & possibleFlags['FTEXT']) {
res = Array.prototype.map.call(res, function (byte) {
return String.fromCharCode(byte);
}).join('');
}
crc = readLong(arr);
if (crc !== parseInt(crc32(res), 16)) {
throw 'Checksum does not match';
}
size = readLong(arr);
if (size !== res.length) {
throw 'Size of decompressed file not correct';
}
return res;
}
// lang ='eng'
return (function(lang){
var xhr = new XMLHttpRequest();
xhr.open('GET', 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'+lang+'.traineddata.gz', true);
xhr.responseType = 'arraybuffer';
xhr.onerror = function(){ cb(xhr, null) }
xhr.onload = function(){
if (xhr.status == 200 || (xhr.status == 0 && xhr.response)) {
var arr = new Uint8Array(xhr.response)
console.log(arr.length)
window.result = new Uint8Array(unzip(arr))
console.log(result.length)
} else cb(xhr, null);
}
xhr.send(null)
})
})()
setlang('eng')
</script>