Browse Source

building new version

pull/34/head 1.0.5
Kevin Kwok 8 years ago
parent
commit
2733c9a030
  1. 767
      dist/tesseract.js
  2. 578
      dist/worker.js
  3. 5
      package.json
  4. 5
      src/browser/index.js
  5. 2
      src/index.js

767
dist/tesseract.js vendored

@ -84,80 +84,341 @@ module.exports = shouldUseNative() ? Object.assign : function (target, source) {
}; };
},{}],2:[function(require,module,exports){ },{}],2:[function(require,module,exports){
exports.defaultOptions = { // shim for using process in browser
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', var process = module.exports = {};
// workerPath: 'dist/worker.js',
workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.3/dist/worker.js', // cached from whatever global is present so that test runners that stub it
tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js', // don't break things. But we need to wrap it in a try catch in case it is
// wrapped in strict mode code which doesn't define any globals. It's inside a
// function because try/catches deoptimize in certain engines.
var cachedSetTimeout;
var cachedClearTimeout;
function defaultSetTimout() {
throw new Error('setTimeout has not been defined');
}
function defaultClearTimeout () {
throw new Error('clearTimeout has not been defined');
} }
(function () {
try {
if (typeof setTimeout === 'function') {
cachedSetTimeout = setTimeout;
} else {
cachedSetTimeout = defaultSetTimout;
}
} catch (e) {
cachedSetTimeout = defaultSetTimout;
}
try {
if (typeof clearTimeout === 'function') {
cachedClearTimeout = clearTimeout;
} else {
cachedClearTimeout = defaultClearTimeout;
}
} catch (e) {
cachedClearTimeout = defaultClearTimeout;
}
} ())
function runTimeout(fun) {
if (cachedSetTimeout === setTimeout) {
//normal enviroments in sane situations
return setTimeout(fun, 0);
}
// if setTimeout wasn't available but was latter defined
if ((cachedSetTimeout === defaultSetTimout || !cachedSetTimeout) && setTimeout) {
cachedSetTimeout = setTimeout;
return setTimeout(fun, 0);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedSetTimeout(fun, 0);
} catch(e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedSetTimeout.call(null, fun, 0);
} catch(e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error
return cachedSetTimeout.call(this, fun, 0);
}
}
exports.spawnWorker = function spawnWorker(instance, workerOptions){
if(window.Blob && window.URL){ }
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']) function runClearTimeout(marker) {
var worker = new Worker(window.URL.createObjectURL(blob)); if (cachedClearTimeout === clearTimeout) {
}else{ //normal enviroments in sane situations
var worker = new Worker(workerOptions.workerPath) return clearTimeout(marker);
}
// if clearTimeout wasn't available but was latter defined
if ((cachedClearTimeout === defaultClearTimeout || !cachedClearTimeout) && clearTimeout) {
cachedClearTimeout = clearTimeout;
return clearTimeout(marker);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedClearTimeout(marker);
} catch (e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedClearTimeout.call(null, marker);
} catch (e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error.
// Some versions of I.E. have different rules for clearTimeout vs setTimeout
return cachedClearTimeout.call(this, marker);
}
} }
worker.onmessage = function(e){
var packet = e.data;
instance._recv(packet) }
var queue = [];
var draining = false;
var currentQueue;
var queueIndex = -1;
function cleanUpNextTick() {
if (!draining || !currentQueue) {
return;
}
draining = false;
if (currentQueue.length) {
queue = currentQueue.concat(queue);
} else {
queueIndex = -1;
}
if (queue.length) {
drainQueue();
} }
return worker
} }
exports.terminateWorker = function(instance){ function drainQueue() {
instance.worker.terminate() if (draining) {
return;
}
var timeout = runTimeout(cleanUpNextTick);
draining = true;
var len = queue.length;
while(len) {
currentQueue = queue;
queue = [];
while (++queueIndex < len) {
if (currentQueue) {
currentQueue[queueIndex].run();
}
}
queueIndex = -1;
len = queue.length;
}
currentQueue = null;
draining = false;
runClearTimeout(timeout);
} }
exports.sendPacket = function sendPacket(instance, packet){ process.nextTick = function (fun) {
loadImage(packet.payload.image, function(img){ var args = new Array(arguments.length - 1);
packet.payload.image = img if (arguments.length > 1) {
instance.worker.postMessage(packet) for (var i = 1; i < arguments.length; i++) {
}) args[i - 1] = arguments[i];
}
}
queue.push(new Item(fun, args));
if (queue.length === 1 && !draining) {
runTimeout(drainQueue);
}
};
// v8 likes predictible objects
function Item(fun, array) {
this.fun = fun;
this.array = array;
}
Item.prototype.run = function () {
this.fun.apply(null, this.array);
};
process.title = 'browser';
process.browser = true;
process.env = {};
process.argv = [];
process.version = ''; // empty string to avoid regexp issues
process.versions = {};
function noop() {}
process.on = noop;
process.addListener = noop;
process.once = noop;
process.off = noop;
process.removeListener = noop;
process.removeAllListeners = noop;
process.emit = noop;
process.binding = function (name) {
throw new Error('process.binding is not supported');
};
process.cwd = function () { return '/' };
process.chdir = function (dir) {
throw new Error('process.chdir is not supported');
};
process.umask = function() { return 0; };
},{}],3:[function(require,module,exports){
module.exports={
"name": "tesseract.js",
"version": "1.0.5",
"description": "Pure Javascript Multilingual OCR",
"main": "src/index.js",
"scripts": {
"test": "echo \"Error: no test specified\" & exit 1",
"start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js",
"release": "git tag `jq -r '.version' package.json`"
},
"browser": {
"./src/node/index.js": "./src/browser/index.js"
},
"author": "",
"license": "Apache",
"devDependencies": {
"babel-preset-es2015": "^6.16.0",
"babelify": "^7.3.0",
"browserify": "^13.1.0",
"envify": "^3.4.1",
"http-server": "^0.9.0",
"watchify": "^3.7.0",
"pako": "^1.0.3"
},
"dependencies": {
"file-type": "^3.8.0",
"jpeg-js": "^0.2.0",
"level-js": "^2.2.4",
"object-assign": "^4.1.0",
"png.js": "^0.2.1",
"tesseract.js-core": "^1.0.2"
},
"repository": {
"type": "git",
"url": "https://github.com/naptha/tesseract.js.git"
},
"bugs": {
"url": "https://github.com/naptha/tesseract.js/issues"
},
"homepage": "https://github.com/naptha/tesseract.js"
} }
},{}],4:[function(require,module,exports){
(function (process){
'use strict';
function loadImage(image, cb){ var defaultOptions = {
if(typeof image === 'string'){ // workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
if(/^\#/.test(image)){ corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'
};
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Configuration');
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js';
} else {
var version = require('../../package.json').version;
defaultOptions.workerPath = 'https://cdn.rawgit.com/naptha/tesseract.js/' + version + '/dist/worker.js';
}
exports.defaultOptions = defaultOptions;
exports.spawnWorker = function spawnWorker(instance, workerOptions) {
if (window.Blob && window.URL) {
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']);
var worker = new Worker(window.URL.createObjectURL(blob));
} else {
var worker = new Worker(workerOptions.workerPath);
}
worker.onmessage = function (e) {
var packet = e.data;
instance._recv(packet);
};
return worker;
};
exports.terminateWorker = function (instance) {
instance.worker.terminate();
};
exports.sendPacket = function sendPacket(instance, packet) {
loadImage(packet.payload.image, function (img) {
packet.payload.image = img;
instance.worker.postMessage(packet);
});
};
function loadImage(image, cb) {
if (typeof image === 'string') {
if (/^\#/.test(image)) {
// element css selector // element css selector
return loadImage(document.querySelector(image), cb) return loadImage(document.querySelector(image), cb);
}else{ } else if (/(blob|data)\:/.test(image)) {
// url or path // data url
var im = new Image var im = new Image();
im.src = image; im.src = image;
im.onload = e => loadImage(im, cb); im.onload = function (e) {
return return loadImage(im, cb);
};
return;
} else {
var xhr = new XMLHttpRequest();
xhr.open('GET', image, true);
xhr.responseType = "blob";
xhr.onload = function (e) {
return loadImage(xhr.response, cb);
};
xhr.onerror = function (e) {
if (/^https?:\/\//.test(image) && !/^https:\/\/crossorigin.me/.test(image)) {
console.debug('Attempting to load image with CORS proxy');
loadImage('https://crossorigin.me/' + image, cb);
}
};
xhr.send(null);
return;
} }
}else if(image instanceof File){ } else if (image instanceof File) {
// files // files
var fr = new FileReader() var fr = new FileReader();
fr.onload = e => loadImage(fr.result, cb); fr.onload = function (e) {
fr.readAsDataURL(image) return loadImage(fr.result, cb);
return };
}else if(image instanceof Blob){ fr.readAsDataURL(image);
return loadImage(URL.createObjectURL(image), cb) return;
}else if(image.getContext){ } else if (image instanceof Blob) {
return loadImage(URL.createObjectURL(image), cb);
} else if (image.getContext) {
// canvas element // canvas element
return loadImage(image.getContext('2d'), cb) return loadImage(image.getContext('2d'), cb);
}else if(image.tagName == "IMG" || image.tagName == "VIDEO"){ } else if (image.tagName == "IMG" || image.tagName == "VIDEO") {
// image element or video element // image element or video element
var c = document.createElement('canvas'); var c = document.createElement('canvas');
c.width = image.naturalWidth || image.videoWidth; c.width = image.naturalWidth || image.videoWidth;
c.height = image.naturalHeight || image.videoHeight; c.height = image.naturalHeight || image.videoHeight;
var ctx = c.getContext('2d'); var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0); ctx.drawImage(image, 0, 0);
return loadImage(ctx, cb) return loadImage(ctx, cb);
}else if(image.getImageData){ } else if (image.getImageData) {
// canvas context // canvas context
var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height); var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return loadImage(data, cb) return loadImage(data, cb);
} else {
return cb(image);
} }
cb(image) throw new Error('Missing return in loadImage cascade');
} }
},{}],3:[function(require,module,exports){ }).call(this,require('_process'))
},{"../../package.json":3,"_process":2}],5:[function(require,module,exports){
"use strict";
// The result of dump.js is a big JSON tree // The result of dump.js is a big JSON tree
// which can be easily serialized (for instance // which can be easily serialized (for instance
// to be sent from a webworker to the main app // to be sent from a webworker to the main app
@ -165,225 +426,283 @@ function loadImage(image, cb){
// a (circular) DOM-like interface for walking // a (circular) DOM-like interface for walking
// through the data. // through the data.
module.exports = function circularize(page){ module.exports = function circularize(page) {
page.paragraphs = [] page.paragraphs = [];
page.lines = [] page.lines = [];
page.words = [] page.words = [];
page.symbols = [] page.symbols = [];
page.blocks.forEach(function(block){ page.blocks.forEach(function (block) {
block.page = page; block.page = page;
block.lines = [] block.lines = [];
block.words = [] block.words = [];
block.symbols = [] block.symbols = [];
block.paragraphs.forEach(function(para){ block.paragraphs.forEach(function (para) {
para.block = block; para.block = block;
para.page = page; para.page = page;
para.words = [] para.words = [];
para.symbols = [] para.symbols = [];
para.lines.forEach(function(line){ para.lines.forEach(function (line) {
line.paragraph = para; line.paragraph = para;
line.block = block; line.block = block;
line.page = page; line.page = page;
line.symbols = [] line.symbols = [];
line.words.forEach(function(word){ line.words.forEach(function (word) {
word.line = line; word.line = line;
word.paragraph = para; word.paragraph = para;
word.block = block; word.block = block;
word.page = page; word.page = page;
word.symbols.forEach(function(sym){ word.symbols.forEach(function (sym) {
sym.word = word; sym.word = word;
sym.line = line; sym.line = line;
sym.paragraph = para; sym.paragraph = para;
sym.block = block; sym.block = block;
sym.page = page; sym.page = page;
sym.line.symbols.push(sym)
sym.paragraph.symbols.push(sym)
sym.block.symbols.push(sym)
sym.page.symbols.push(sym)
})
word.paragraph.words.push(word)
word.block.words.push(word)
word.page.words.push(word)
})
line.block.lines.push(line)
line.page.lines.push(line)
})
para.page.paragraphs.push(para)
})
})
return page
}
},{}],4:[function(require,module,exports){
const adapter = require('../node/index.js')
let jobCounter = 0;
module.exports = class TesseractJob {
constructor(instance){
this.id = 'Job-' + (++jobCounter) + '-' + Math.random().toString(16).slice(3, 8)
this._instance = instance;
this._resolve = []
this._reject = []
this._progress = []
this._finally = []
}
then(resolve, reject){
if(this._resolve.push){
this._resolve.push(resolve)
}else{
resolve(this._resolve)
}
if(reject) this.catch(reject); sym.line.symbols.push(sym);
return this; sym.paragraph.symbols.push(sym);
} sym.block.symbols.push(sym);
catch(reject){ sym.page.symbols.push(sym);
if(this._reject.push){ });
this._reject.push(reject) word.paragraph.words.push(word);
}else{ word.block.words.push(word);
reject(this._reject) word.page.words.push(word);
});
line.block.lines.push(line);
line.page.lines.push(line);
});
para.page.paragraphs.push(para);
});
});
return page;
};
},{}],6:[function(require,module,exports){
'use strict';
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var adapter = require('../node/index.js');
var jobCounter = 0;
module.exports = function () {
function TesseractJob(instance) {
_classCallCheck(this, TesseractJob);
this.id = 'Job-' + ++jobCounter + '-' + Math.random().toString(16).slice(3, 8);
this._instance = instance;
this._resolve = [];
this._reject = [];
this._progress = [];
this._finally = [];
} }
return this;
} _createClass(TesseractJob, [{
progress(fn){ key: 'then',
this._progress.push(fn) value: function then(resolve, reject) {
return this; if (this._resolve.push) {
} this._resolve.push(resolve);
finally(fn) { } else {
this._finally.push(fn) resolve(this._resolve);
return this; }
}
_send(action, payload){ if (reject) this.catch(reject);
adapter.sendPacket(this._instance, { return this;
jobId: this.id,
action: action,
payload: payload
})
}
_handle(packet){
var data = packet.data;
let runFinallyCbs = false;
if(packet.status === 'resolve'){
if(this._resolve.length === 0) console.debug(data);
this._resolve.forEach(fn => {
var ret = fn(data);
if(ret && typeof ret.then == 'function'){
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.')
} }
}) }, {
this._resolve = data; key: 'catch',
this._instance._dequeue() value: function _catch(reject) {
runFinallyCbs = true; if (this._reject.push) {
}else if(packet.status === 'reject'){ this._reject.push(reject);
if(this._reject.length === 0) console.error(data); } else {
this._reject.forEach(fn => fn(data)) reject(this._reject);
this._reject = data; }
this._instance._dequeue() return this;
runFinallyCbs = true; }
}else if(packet.status === 'progress'){ }, {
this._progress.forEach(fn => fn(data)) key: 'progress',
}else{ value: function progress(fn) {
console.warn('Message type unknown', packet.status) this._progress.push(fn);
} return this;
}
}, {
key: 'finally',
value: function _finally(fn) {
this._finally.push(fn);
return this;
}
}, {
key: '_send',
value: function _send(action, payload) {
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
});
}
}, {
key: '_handle',
value: function _handle(packet) {
var data = packet.data;
var runFinallyCbs = false;
if (packet.status === 'resolve') {
if (this._resolve.length === 0) console.debug(data);
this._resolve.forEach(function (fn) {
var ret = fn(data);
if (ret && typeof ret.then == 'function') {
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.');
}
});
this._resolve = data;
this._instance._dequeue();
runFinallyCbs = true;
} else if (packet.status === 'reject') {
if (this._reject.length === 0) console.error(data);
this._reject.forEach(function (fn) {
return fn(data);
});
this._reject = data;
this._instance._dequeue();
runFinallyCbs = true;
} else if (packet.status === 'progress') {
this._progress.forEach(function (fn) {
return fn(data);
});
} else {
console.warn('Message type unknown', packet.status);
}
if (runFinallyCbs) {
this._finally.forEach(function (fn) {
return fn(data);
});
}
}
}]);
if (runFinallyCbs) { return TesseractJob;
this._finally.forEach(fn => fn(data)); }();
}
} },{"../node/index.js":4}],7:[function(require,module,exports){
} 'use strict';
},{"../node/index.js":2}],5:[function(require,module,exports){
const adapter = require('./node/index.js') var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
const circularize = require('./common/circularize.js')
const TesseractJob = require('./common/job'); function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
const objectAssign = require('object-assign');
var adapter = require('./node/index.js');
var circularize = require('./common/circularize.js');
var TesseractJob = require('./common/job');
var objectAssign = require('object-assign');
var version = require('../package.json').version;
function create(workerOptions){ function create(workerOptions) {
workerOptions = workerOptions || {}; workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions)) var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions));
worker.create = create; worker.create = create;
worker.version = version;
return worker; return worker;
} }
class TesseractWorker { var TesseractWorker = function () {
constructor(workerOptions){ function TesseractWorker(workerOptions) {
_classCallCheck(this, TesseractWorker);
this.worker = null; this.worker = null;
this.workerOptions = workerOptions; this.workerOptions = workerOptions;
this._currentJob = null; this._currentJob = null;
this._queue = [] this._queue = [];
} }
recognize(image, options){ _createClass(TesseractWorker, [{
return this._delay(job => { key: 'recognize',
if(typeof options === 'string'){ value: function recognize(image, options) {
options = { lang: options }; var _this = this;
}else{
options = options || {} return this._delay(function (job) {
options.lang = options.lang || 'eng'; if (typeof options === 'string') {
} options = { lang: options };
} else {
job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions }) options = options || {};
}) options.lang = options.lang || 'eng';
} }
detect(image, options){
options = options || {}
return this._delay(job => {
job._send('detect', { image: image, options: options, workerOptions: this.workerOptions })
})
}
terminate(){ job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions });
if(this.worker) adapter.terminateWorker(this); });
this.worker = null; }
} }, {
key: 'detect',
value: function detect(image, options) {
var _this2 = this;
options = options || {};
return this._delay(function (job) {
job._send('detect', { image: image, options: options, workerOptions: _this2.workerOptions });
});
}
}, {
key: 'terminate',
value: function terminate() {
if (this.worker) adapter.terminateWorker(this);
this.worker = null;
}
}, {
key: '_delay',
value: function _delay(fn) {
var _this3 = this;
if (!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions);
var job = new TesseractJob(this);
this._queue.push(function (e) {
_this3._queue.shift();
_this3._currentJob = job;
fn(job);
});
if (!this._currentJob) this._dequeue();
return job;
}
}, {
key: '_dequeue',
value: function _dequeue() {
this._currentJob = null;
if (this._queue.length > 0) {
this._queue[0]();
}
}
}, {
key: '_recv',
value: function _recv(packet) {
_delay(fn){ if (packet.status === 'resolve' && packet.action === 'recognize') {
if(!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions); packet.data = circularize(packet.data);
}
var job = new TesseractJob(this);
this._queue.push(e => {
this._queue.shift()
this._currentJob = job;
fn(job)
})
if(!this._currentJob) this._dequeue();
return job
}
_dequeue(){ if (this._currentJob.id === packet.jobId) {
this._currentJob = null; this._currentJob._handle(packet);
if(this._queue.length > 0){ } else {
this._queue[0]() console.warn('Job ID ' + packet.jobId + ' not known.');
}
} }
} }]);
_recv(packet){
if(packet.status === 'resolve' && packet.action === 'recognize'){ return TesseractWorker;
packet.data = circularize(packet.data); }();
}
if(this._currentJob.id === packet.jobId){ var DefaultTesseract = create();
this._currentJob._handle(packet)
}else{
console.warn('Job ID ' + packet.jobId + ' not known.')
}
}
}
var DefaultTesseract = create() module.exports = DefaultTesseract;
module.exports = DefaultTesseract },{"../package.json":3,"./common/circularize.js":5,"./common/job":6,"./node/index.js":4,"object-assign":1}]},{},[7])(7)
},{"./common/circularize.js":3,"./common/job":4,"./node/index.js":2,"object-assign":1}]},{},[5])(5)
}); });

578
dist/worker.js vendored

@ -82,7 +82,7 @@ AbstractChainedBatch.prototype.write = function (options, callback) {
module.exports = AbstractChainedBatch module.exports = AbstractChainedBatch
}).call(this,require('_process')) }).call(this,require('_process'))
},{"_process":36}],2:[function(require,module,exports){ },{"_process":37}],2:[function(require,module,exports){
(function (process){ (function (process){
/* Copyright (c) 2013 Rod Vagg, MIT License */ /* Copyright (c) 2013 Rod Vagg, MIT License */
@ -135,7 +135,7 @@ AbstractIterator.prototype.end = function (callback) {
module.exports = AbstractIterator module.exports = AbstractIterator
}).call(this,require('_process')) }).call(this,require('_process'))
},{"_process":36}],3:[function(require,module,exports){ },{"_process":37}],3:[function(require,module,exports){
(function (Buffer,process){ (function (Buffer,process){
/* Copyright (c) 2013 Rod Vagg, MIT License */ /* Copyright (c) 2013 Rod Vagg, MIT License */
@ -395,7 +395,7 @@ module.exports.AbstractIterator = AbstractIterator
module.exports.AbstractChainedBatch = AbstractChainedBatch module.exports.AbstractChainedBatch = AbstractChainedBatch
}).call(this,{"isBuffer":require("../is-buffer/index.js")},require('_process')) }).call(this,{"isBuffer":require("../is-buffer/index.js")},require('_process'))
},{"../is-buffer/index.js":10,"./abstract-chained-batch":1,"./abstract-iterator":2,"_process":36,"xtend":4}],4:[function(require,module,exports){ },{"../is-buffer/index.js":9,"./abstract-chained-batch":1,"./abstract-iterator":2,"_process":37,"xtend":4}],4:[function(require,module,exports){
module.exports = extend module.exports = extend
function extend() { function extend() {
@ -2323,7 +2323,7 @@ function isnan (val) {
} }
}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) }).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
},{"base64-js":5,"ieee754":8,"isarray":11}],7:[function(require,module,exports){ },{"base64-js":5,"ieee754":8,"isarray":10}],7:[function(require,module,exports){
/*global window:false, self:false, define:false, module:false */ /*global window:false, self:false, define:false, module:false */
/** /**
@ -3817,31 +3817,6 @@ exports.write = function (buffer, value, offset, isLE, mLen, nBytes) {
} }
},{}],9:[function(require,module,exports){ },{}],9:[function(require,module,exports){
if (typeof Object.create === 'function') {
// implementation from standard node.js 'util' module
module.exports = function inherits(ctor, superCtor) {
ctor.super_ = superCtor
ctor.prototype = Object.create(superCtor.prototype, {
constructor: {
value: ctor,
enumerable: false,
writable: true,
configurable: true
}
});
};
} else {
// old school shim for old browsers
module.exports = function inherits(ctor, superCtor) {
ctor.super_ = superCtor
var TempCtor = function () {}
TempCtor.prototype = superCtor.prototype
ctor.prototype = new TempCtor()
ctor.prototype.constructor = ctor
}
}
},{}],10:[function(require,module,exports){
/*! /*!
* Determine if an object is a Buffer * Determine if an object is a Buffer
* *
@ -3864,14 +3839,14 @@ function isSlowBuffer (obj) {
return typeof obj.readFloatLE === 'function' && typeof obj.slice === 'function' && isBuffer(obj.slice(0, 0)) return typeof obj.readFloatLE === 'function' && typeof obj.slice === 'function' && isBuffer(obj.slice(0, 0))
} }
},{}],11:[function(require,module,exports){ },{}],10:[function(require,module,exports){
var toString = {}.toString; var toString = {}.toString;
module.exports = Array.isArray || function (arr) { module.exports = Array.isArray || function (arr) {
return toString.call(arr) == '[object Array]'; return toString.call(arr) == '[object Array]';
}; };
},{}],12:[function(require,module,exports){ },{}],11:[function(require,module,exports){
var Buffer = require('buffer').Buffer; var Buffer = require('buffer').Buffer;
module.exports = isBuffer; module.exports = isBuffer;
@ -3881,7 +3856,7 @@ function isBuffer (o) {
|| /\[object (.+Array|Array.+)\]/.test(Object.prototype.toString.call(o)); || /\[object (.+Array|Array.+)\]/.test(Object.prototype.toString.call(o));
} }
},{"buffer":6}],13:[function(require,module,exports){ },{"buffer":6}],12:[function(require,module,exports){
(function (Buffer){ (function (Buffer){
module.exports = Level module.exports = Level
@ -4059,7 +4034,7 @@ var checkKeyValue = Level.prototype._checkKeyValue = function (obj, type) {
} }
}).call(this,require("buffer").Buffer) }).call(this,require("buffer").Buffer)
},{"./iterator":14,"abstract-leveldown":3,"buffer":6,"idb-wrapper":7,"isbuffer":12,"typedarray-to-buffer":37,"util":39,"xtend":41}],14:[function(require,module,exports){ },{"./iterator":13,"abstract-leveldown":3,"buffer":6,"idb-wrapper":7,"isbuffer":11,"typedarray-to-buffer":38,"util":41,"xtend":15}],13:[function(require,module,exports){
var util = require('util') var util = require('util')
var AbstractIterator = require('abstract-leveldown').AbstractIterator var AbstractIterator = require('abstract-leveldown').AbstractIterator
var ltgt = require('ltgt') var ltgt = require('ltgt')
@ -4133,7 +4108,43 @@ Iterator.prototype._next = function (callback) {
this.callback = callback this.callback = callback
} }
},{"abstract-leveldown":3,"ltgt":15,"util":39}],15:[function(require,module,exports){ },{"abstract-leveldown":3,"ltgt":16,"util":41}],14:[function(require,module,exports){
module.exports = hasKeys
function hasKeys(source) {
return source !== null &&
(typeof source === "object" ||
typeof source === "function")
}
},{}],15:[function(require,module,exports){
var Keys = require("object-keys")
var hasKeys = require("./has-keys")
module.exports = extend
function extend() {
var target = {}
for (var i = 0; i < arguments.length; i++) {
var source = arguments[i]
if (!hasKeys(source)) {
continue
}
var keys = Keys(source)
for (var j = 0; j < keys.length; j++) {
var name = keys[j]
target[name] = source[name]
}
}
return target
}
},{"./has-keys":14,"object-keys":18}],16:[function(require,module,exports){
(function (Buffer){ (function (Buffer){
exports.compare = function (a, b) { exports.compare = function (a, b) {
@ -4283,7 +4294,7 @@ exports.filter = function (range, compare) {
} }
}).call(this,{"isBuffer":require("../is-buffer/index.js")}) }).call(this,{"isBuffer":require("../is-buffer/index.js")})
},{"../is-buffer/index.js":10}],16:[function(require,module,exports){ },{"../is-buffer/index.js":9}],17:[function(require,module,exports){
var hasOwn = Object.prototype.hasOwnProperty; var hasOwn = Object.prototype.hasOwnProperty;
var toString = Object.prototype.toString; var toString = Object.prototype.toString;
@ -4325,11 +4336,11 @@ module.exports = function forEach(obj, fn) {
}; };
},{}],17:[function(require,module,exports){ },{}],18:[function(require,module,exports){
module.exports = Object.keys || require('./shim'); module.exports = Object.keys || require('./shim');
},{"./shim":19}],18:[function(require,module,exports){ },{"./shim":20}],19:[function(require,module,exports){
var toString = Object.prototype.toString; var toString = Object.prototype.toString;
module.exports = function isArguments(value) { module.exports = function isArguments(value) {
@ -4347,7 +4358,7 @@ module.exports = function isArguments(value) {
}; };
},{}],19:[function(require,module,exports){ },{}],20:[function(require,module,exports){
(function () { (function () {
"use strict"; "use strict";
@ -4411,7 +4422,7 @@ module.exports = function isArguments(value) {
}()); }());
},{"./foreach":16,"./isArguments":18}],20:[function(require,module,exports){ },{"./foreach":17,"./isArguments":19}],21:[function(require,module,exports){
// Top level file is just a mixin of submodules & constants // Top level file is just a mixin of submodules & constants
'use strict'; 'use strict';
@ -4427,7 +4438,7 @@ assign(pako, deflate, inflate, constants);
module.exports = pako; module.exports = pako;
},{"./lib/deflate":21,"./lib/inflate":22,"./lib/utils/common":23,"./lib/zlib/constants":26}],21:[function(require,module,exports){ },{"./lib/deflate":22,"./lib/inflate":23,"./lib/utils/common":24,"./lib/zlib/constants":27}],22:[function(require,module,exports){
'use strict'; 'use strict';
@ -4829,7 +4840,7 @@ exports.deflate = deflate;
exports.deflateRaw = deflateRaw; exports.deflateRaw = deflateRaw;
exports.gzip = gzip; exports.gzip = gzip;
},{"./utils/common":23,"./utils/strings":24,"./zlib/deflate":28,"./zlib/messages":33,"./zlib/zstream":35}],22:[function(require,module,exports){ },{"./utils/common":24,"./utils/strings":25,"./zlib/deflate":29,"./zlib/messages":34,"./zlib/zstream":36}],23:[function(require,module,exports){
'use strict'; 'use strict';
@ -5249,7 +5260,7 @@ exports.inflate = inflate;
exports.inflateRaw = inflateRaw; exports.inflateRaw = inflateRaw;
exports.ungzip = inflate; exports.ungzip = inflate;
},{"./utils/common":23,"./utils/strings":24,"./zlib/constants":26,"./zlib/gzheader":29,"./zlib/inflate":31,"./zlib/messages":33,"./zlib/zstream":35}],23:[function(require,module,exports){ },{"./utils/common":24,"./utils/strings":25,"./zlib/constants":27,"./zlib/gzheader":30,"./zlib/inflate":32,"./zlib/messages":34,"./zlib/zstream":36}],24:[function(require,module,exports){
'use strict'; 'use strict';
@ -5353,7 +5364,7 @@ exports.setTyped = function (on) {
exports.setTyped(TYPED_OK); exports.setTyped(TYPED_OK);
},{}],24:[function(require,module,exports){ },{}],25:[function(require,module,exports){
// String encode/decode helpers // String encode/decode helpers
'use strict'; 'use strict';
@ -5540,7 +5551,7 @@ exports.utf8border = function (buf, max) {
return (pos + _utf8len[buf[pos]] > max) ? pos : max; return (pos + _utf8len[buf[pos]] > max) ? pos : max;
}; };
},{"./common":23}],25:[function(require,module,exports){ },{"./common":24}],26:[function(require,module,exports){
'use strict'; 'use strict';
// Note: adler32 takes 12% for level 0 and 2% for level 6. // Note: adler32 takes 12% for level 0 and 2% for level 6.
@ -5574,7 +5585,7 @@ function adler32(adler, buf, len, pos) {
module.exports = adler32; module.exports = adler32;
},{}],26:[function(require,module,exports){ },{}],27:[function(require,module,exports){
'use strict'; 'use strict';
@ -5626,7 +5637,7 @@ module.exports = {
//Z_NULL: null // Use -1 or null inline, depending on var type //Z_NULL: null // Use -1 or null inline, depending on var type
}; };
},{}],27:[function(require,module,exports){ },{}],28:[function(require,module,exports){
'use strict'; 'use strict';
// Note: we can't get significant speed boost here. // Note: we can't get significant speed boost here.
@ -5669,7 +5680,7 @@ function crc32(crc, buf, len, pos) {
module.exports = crc32; module.exports = crc32;
},{}],28:[function(require,module,exports){ },{}],29:[function(require,module,exports){
'use strict'; 'use strict';
var utils = require('../utils/common'); var utils = require('../utils/common');
@ -7526,7 +7537,7 @@ exports.deflatePrime = deflatePrime;
exports.deflateTune = deflateTune; exports.deflateTune = deflateTune;
*/ */
},{"../utils/common":23,"./adler32":25,"./crc32":27,"./messages":33,"./trees":34}],29:[function(require,module,exports){ },{"../utils/common":24,"./adler32":26,"./crc32":28,"./messages":34,"./trees":35}],30:[function(require,module,exports){
'use strict'; 'use strict';
@ -7568,7 +7579,7 @@ function GZheader() {
module.exports = GZheader; module.exports = GZheader;
},{}],30:[function(require,module,exports){ },{}],31:[function(require,module,exports){
'use strict'; 'use strict';
// See state defs from inflate.js // See state defs from inflate.js
@ -7896,7 +7907,7 @@ module.exports = function inflate_fast(strm, start) {
return; return;
}; };
},{}],31:[function(require,module,exports){ },{}],32:[function(require,module,exports){
'use strict'; 'use strict';
@ -9436,7 +9447,7 @@ exports.inflateSyncPoint = inflateSyncPoint;
exports.inflateUndermine = inflateUndermine; exports.inflateUndermine = inflateUndermine;
*/ */
},{"../utils/common":23,"./adler32":25,"./crc32":27,"./inffast":30,"./inftrees":32}],32:[function(require,module,exports){ },{"../utils/common":24,"./adler32":26,"./crc32":28,"./inffast":31,"./inftrees":33}],33:[function(require,module,exports){
'use strict'; 'use strict';
@ -9765,7 +9776,7 @@ module.exports = function inflate_table(type, lens, lens_index, codes, table, ta
return 0; return 0;
}; };
},{"../utils/common":23}],33:[function(require,module,exports){ },{"../utils/common":24}],34:[function(require,module,exports){
'use strict'; 'use strict';
module.exports = { module.exports = {
@ -9780,7 +9791,7 @@ module.exports = {
'-6': 'incompatible version' /* Z_VERSION_ERROR (-6) */ '-6': 'incompatible version' /* Z_VERSION_ERROR (-6) */
}; };
},{}],34:[function(require,module,exports){ },{}],35:[function(require,module,exports){
'use strict'; 'use strict';
@ -10984,7 +10995,7 @@ exports._tr_flush_block = _tr_flush_block;
exports._tr_tally = _tr_tally; exports._tr_tally = _tr_tally;
exports._tr_align = _tr_align; exports._tr_align = _tr_align;
},{"../utils/common":23}],35:[function(require,module,exports){ },{"../utils/common":24}],36:[function(require,module,exports){
'use strict'; 'use strict';
@ -11015,7 +11026,7 @@ function ZStream() {
module.exports = ZStream; module.exports = ZStream;
},{}],36:[function(require,module,exports){ },{}],37:[function(require,module,exports){
// shim for using process in browser // shim for using process in browser
var process = module.exports = {}; var process = module.exports = {};
@ -11197,7 +11208,7 @@ process.chdir = function (dir) {
}; };
process.umask = function() { return 0; }; process.umask = function() { return 0; };
},{}],37:[function(require,module,exports){ },{}],38:[function(require,module,exports){
(function (Buffer){ (function (Buffer){
/** /**
* Convert a typed array to a Buffer without a copy * Convert a typed array to a Buffer without a copy
@ -11220,14 +11231,39 @@ module.exports = function (arr) {
} }
}).call(this,require("buffer").Buffer) }).call(this,require("buffer").Buffer)
},{"buffer":6}],38:[function(require,module,exports){ },{"buffer":6}],39:[function(require,module,exports){
if (typeof Object.create === 'function') {
// implementation from standard node.js 'util' module
module.exports = function inherits(ctor, superCtor) {
ctor.super_ = superCtor
ctor.prototype = Object.create(superCtor.prototype, {
constructor: {
value: ctor,
enumerable: false,
writable: true,
configurable: true
}
});
};
} else {
// old school shim for old browsers
module.exports = function inherits(ctor, superCtor) {
ctor.super_ = superCtor
var TempCtor = function () {}
TempCtor.prototype = superCtor.prototype
ctor.prototype = new TempCtor()
ctor.prototype.constructor = ctor
}
}
},{}],40:[function(require,module,exports){
module.exports = function isBuffer(arg) { module.exports = function isBuffer(arg) {
return arg && typeof arg === 'object' return arg && typeof arg === 'object'
&& typeof arg.copy === 'function' && typeof arg.copy === 'function'
&& typeof arg.fill === 'function' && typeof arg.fill === 'function'
&& typeof arg.readUInt8 === 'function'; && typeof arg.readUInt8 === 'function';
} }
},{}],39:[function(require,module,exports){ },{}],41:[function(require,module,exports){
(function (process,global){ (function (process,global){
// Copyright Joyent, Inc. and other Node contributors. // Copyright Joyent, Inc. and other Node contributors.
// //
@ -11817,70 +11853,39 @@ function hasOwnProperty(obj, prop) {
} }
}).call(this,require('_process'),typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) }).call(this,require('_process'),typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
},{"./support/isBuffer":38,"_process":36,"inherits":9}],40:[function(require,module,exports){ },{"./support/isBuffer":40,"_process":37,"inherits":39}],42:[function(require,module,exports){
module.exports = hasKeys 'use strict';
function hasKeys(source) {
return source !== null &&
(typeof source === "object" ||
typeof source === "function")
}
},{}],41:[function(require,module,exports){
var Keys = require("object-keys")
var hasKeys = require("./has-keys")
module.exports = extend
function extend() {
var target = {}
for (var i = 0; i < arguments.length; i++) {
var source = arguments[i]
if (!hasKeys(source)) {
continue
}
var keys = Keys(source)
for (var j = 0; j < keys.length; j++) {
var name = keys[j]
target[name] = source[name]
}
}
return target
}
},{"./has-keys":40,"object-keys":17}],42:[function(require,module,exports){ var leveljs = require('level-js');
var leveljs = require('level-js') var db = typeof indexedDB === 'undefined' ? { open: function open(_, cb) {
var db = typeof indexedDB === 'undefined' ? { open: (_, cb) => cb(true) } : leveljs('./tessdata2') return cb(true);
} } : leveljs('./tessdata2');
var langdata = require('../common/langdata.json') var langdata = require('../common/langdata.json');
module.exports = function getLanguageData(req, res, cb){ module.exports = function getLanguageData(req, res, cb) {
var lang = req.options.lang; var lang = req.options.lang;
function saveDataFile(data){ function saveDataFile(data) {
db.put(lang, data, err => console.log('cached', lang, err)) db.put(lang, data, function (err) {
cb(data) return console.log('cached', lang, err);
});
cb(data);
} }
db.open({ compression: false }, err => { db.open({ compression: false }, function (err) {
if (err) return fetchLanguageData(req, res, cb); if (err) return fetchLanguageData(req, res, cb);
db.get(lang, (err, data) => { db.get(lang, function (err, data) {
if (err) return fetchLanguageData(req, res, saveDataFile); if (err) return fetchLanguageData(req, res, saveDataFile);
res.progress({ status: 'found in cache ' + lang + '.traineddata' }) res.progress({ status: 'found in cache ' + lang + '.traineddata' });
cb(data) cb(data);
}) });
}) });
} };
var ungzip = require('pako').ungzip; var ungzip = require('pako').ungzip;
function fetchLanguageData(req, res, cb){ function fetchLanguageData(req, res, cb) {
var lang = req.options.lang; var lang = req.options.lang;
var langfile = lang + '.traineddata.gz'; var langfile = lang + '.traineddata.gz';
var url = req.workerOptions.langPath + langfile; var url = req.workerOptions.langPath + langfile;
@ -11888,114 +11893,134 @@ function fetchLanguageData(req, res, cb){
var xhr = new XMLHttpRequest(); var xhr = new XMLHttpRequest();
xhr.open('GET', url, true); xhr.open('GET', url, true);
xhr.responseType = 'arraybuffer'; xhr.responseType = 'arraybuffer';
xhr.onerror = e => { xhr.onerror = function (e) {
xhr.onprogress = xhr.onload = null xhr.onprogress = xhr.onload = null;
cb(xhr, null) cb(xhr, null);
} };
xhr.onprogress = e => xhr.onprogress = function (e) {
res.progress({ return res.progress({
status: 'downloading ' + langfile, status: 'downloading ' + langfile,
loaded: e.loaded, loaded: e.loaded,
progress: Math.min(1, e.loaded / langdata[lang]) progress: Math.min(1, e.loaded / langdata[lang])
}); });
};
xhr.onload = e => { xhr.onload = function (e) {
if (!(xhr.status == 200 || (xhr.status == 0 && xhr.response))) return res.reject('Error downloading language ' + url); if (!(xhr.status == 200 || xhr.status == 0 && xhr.response)) return res.reject('Error downloading language ' + url);
res.progress({ status: 'unzipping ' + langfile }) res.progress({ status: 'unzipping ' + langfile });
// in case the gzips are already ungzipped or extra gzipped // in case the gzips are already ungzipped or extra gzipped
var response = new Uint8Array(xhr.response) var response = new Uint8Array(xhr.response);
try { try {
while(response[0] == 0x1f && response[1] == 0x8b) response = ungzip(response); while (response[0] == 0x1f && response[1] == 0x8b) {
response = ungzip(response);
}
} catch (err) { } catch (err) {
return res.reject('Error unzipping language file ' + langfile + '\n' + err.message) return res.reject('Error unzipping language file ' + langfile + '\n' + err.message);
} }
cb(response) cb(response);
} };
xhr.send() xhr.send();
} }
},{"../common/langdata.json":46,"level-js":13,"pako":20}],43:[function(require,module,exports){ },{"../common/langdata.json":46,"level-js":12,"pako":21}],43:[function(require,module,exports){
(function (global){ (function (process,global){
"use strict"; 'use strict';
var workerUtils = require('../common/worker.js') var workerUtils = require('../common/worker.js');
global.addEventListener('message', function(e){ if (process.env.NODE_ENV === "development") {
console.debug('Using Development Worker');
}
global.addEventListener('message', function (e) {
var packet = e.data; var packet = e.data;
workerUtils.dispatchHandlers(packet, obj => postMessage(obj)) workerUtils.dispatchHandlers(packet, function (obj) {
}) return postMessage(obj);
});
});
exports.getCore = function(req, res){ exports.getCore = function (req, res) {
if(!global.TesseractCore){ if (!global.TesseractCore) {
res.progress({ status: 'loading tesseract core' }) res.progress({ status: 'loading tesseract core', progress: 0 });
importScripts(req.workerOptions.tesseractPath) importScripts(req.workerOptions.corePath);
res.progress({ status: 'loaded tesseract core' }) res.progress({ status: 'loading tesseract core', progress: 1 });
} }
return TesseractCore return TesseractCore;
} };
exports.getLanguageData = require('./lang.js') exports.getLanguageData = require('./lang.js');
workerUtils.setAdapter(module.exports); workerUtils.setAdapter(module.exports);
}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) }).call(this,require('_process'),typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
},{"../common/worker.js":47,"./lang.js":42}],44:[function(require,module,exports){ },{"../common/worker.js":47,"./lang.js":42,"_process":37}],44:[function(require,module,exports){
'use strict';
// This converts an image to grayscale // This converts an image to grayscale
module.exports = function desaturate(image){ module.exports = function desaturate(image) {
var width, height; var width, height;
if(image.data){ if (image.data) {
var src = image.data; var src = image.data;
width = image.width, width = image.width, height = image.height;
height = image.height; var dst = new Uint8Array(width * height);
var dst = new Uint8Array(width * height); var srcLength = src.length | 0,
var srcLength = src.length | 0, srcLength_16 = (srcLength - 16) | 0; srcLength_16 = srcLength - 16 | 0;
for (var i = 0, j = 0; i <= srcLength_16; i += 16, j += 4) { for (var i = 0, j = 0; i <= srcLength_16; i += 16, j += 4) {
// convert to grayscale 4 pixels at a time; eveything with alpha gets put in front of 50% gray // convert to grayscale 4 pixels at a time; eveything with alpha gets put in front of 50% gray
dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16 dst[j] = (src[i] * 77 + src[i + 1] * 151 + src[i + 2] * 28) * src[i + 3] + (255 - src[i + 3] << 15) + 32768 >> 16;
dst[j+1] = (((src[i+4] * 77 + src[i+5] * 151 + src[i+6] * 28) * src[i+7]) + ((255-src[i+7]) << 15) + 32768) >> 16 dst[j + 1] = (src[i + 4] * 77 + src[i + 5] * 151 + src[i + 6] * 28) * src[i + 7] + (255 - src[i + 7] << 15) + 32768 >> 16;
dst[j+2] = (((src[i+8] * 77 + src[i+9] * 151 + src[i+10] * 28) * src[i+11]) + ((255-src[i+11]) << 15) + 32768) >> 16 dst[j + 2] = (src[i + 8] * 77 + src[i + 9] * 151 + src[i + 10] * 28) * src[i + 11] + (255 - src[i + 11] << 15) + 32768 >> 16;
dst[j+3] = (((src[i+12] * 77 + src[i+13] * 151 + src[i+14] * 28) * src[i+15]) + ((255-src[i+15]) << 15) + 32768) >> 16 dst[j + 3] = (src[i + 12] * 77 + src[i + 13] * 151 + src[i + 14] * 28) * src[i + 15] + (255 - src[i + 15] << 15) + 32768 >> 16;
} }
for (; i < srcLength; i += 4, ++j) //finish up for (; i < srcLength; i += 4, ++j) {
dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16 //finish up
image = dst; dst[j] = (src[i] * 77 + src[i + 1] * 151 + src[i + 2] * 28) * src[i + 3] + (255 - src[i + 3] << 15) + 32768 >> 16;
} else { throw 'Invalid ImageData' } }image = dst;
return image } else {
} throw 'Invalid ImageData';
}
return image;
};
},{}],45:[function(require,module,exports){ },{}],45:[function(require,module,exports){
module.exports = function DumpLiterallyEverything(Module, base){ 'use strict';
module.exports = function DumpLiterallyEverything(Module, base) {
var ri = base.GetIterator(); var ri = base.GetIterator();
var blocks = []; var blocks = [];
var block, para, textline, word, symbol; var block, para, textline, word, symbol;
function enumToString(value, prefix){ function enumToString(value, prefix) {
return (Object.keys(Module) return Object.keys(Module).filter(function (e) {
.filter(function(e){ return e.substr(0, prefix.length + 1) == prefix + '_' }) return e.substr(0, prefix.length + 1) == prefix + '_';
.filter(function(e){ return Module[e] === value }) }).filter(function (e) {
.map(function(e){ return e.slice(prefix.length + 1) })[0]) return Module[e] === value;
}).map(function (e) {
return e.slice(prefix.length + 1);
})[0];
} }
ri.Begin() ri.Begin();
do { do {
if(ri.IsAtBeginningOf(Module.RIL_BLOCK)){ if (ri.IsAtBeginningOf(Module.RIL_BLOCK)) {
var poly = ri.BlockPolygon(); var poly = ri.BlockPolygon();
var polygon = null; var polygon = null;
// BlockPolygon() returns null when automatic page segmentation is off // BlockPolygon() returns null when automatic page segmentation is off
if(Module.getPointer(poly) > 0){ if (Module.getPointer(poly) > 0) {
var n = poly.get_n(), var n = poly.get_n(),
px = poly.get_x(), px = poly.get_x(),
py = poly.get_y(), py = poly.get_y(),
polygon = []; polygon = [];
for(var i = 0; i < n; i++){ for (var i = 0; i < n; i++) {
polygon.push([px.getValue(i), py.getValue(i)]); polygon.push([px.getValue(i), py.getValue(i)]);
} }
Module._ptaDestroy(Module.getPointer(poly)); Module._ptaDestroy(Module.getPointer(poly));
} }
block = { block = {
paragraphs: [], paragraphs: [],
@ -12006,10 +12031,10 @@ module.exports = function DumpLiterallyEverything(Module, base){
blocktype: enumToString(ri.BlockType(), 'PT'), blocktype: enumToString(ri.BlockType(), 'PT'),
polygon: polygon polygon: polygon
} };
blocks.push(block) blocks.push(block);
} }
if(ri.IsAtBeginningOf(Module.RIL_PARA)){ if (ri.IsAtBeginningOf(Module.RIL_PARA)) {
para = { para = {
lines: [], lines: [],
@ -12019,10 +12044,10 @@ module.exports = function DumpLiterallyEverything(Module, base){
bbox: ri.getBoundingBox(Module.RIL_PARA), bbox: ri.getBoundingBox(Module.RIL_PARA),
is_ltr: !!ri.ParagraphIsLtr() is_ltr: !!ri.ParagraphIsLtr()
} };
block.paragraphs.push(para) block.paragraphs.push(para);
} }
if(ri.IsAtBeginningOf(Module.RIL_TEXTLINE)){ if (ri.IsAtBeginningOf(Module.RIL_TEXTLINE)) {
textline = { textline = {
words: [], words: [],
@ -12030,10 +12055,10 @@ module.exports = function DumpLiterallyEverything(Module, base){
confidence: ri.Confidence(Module.RIL_TEXTLINE), confidence: ri.Confidence(Module.RIL_TEXTLINE),
baseline: ri.getBaseline(Module.RIL_TEXTLINE), baseline: ri.getBaseline(Module.RIL_TEXTLINE),
bbox: ri.getBoundingBox(Module.RIL_TEXTLINE) bbox: ri.getBoundingBox(Module.RIL_TEXTLINE)
} };
para.lines.push(textline) para.lines.push(textline);
} }
if(ri.IsAtBeginningOf(Module.RIL_WORD)){ if (ri.IsAtBeginningOf(Module.RIL_WORD)) {
var fontInfo = ri.getWordFontAttributes(), var fontInfo = ri.getWordFontAttributes(),
wordDir = ri.WordDirection(); wordDir = ri.WordDirection();
word = { word = {
@ -12058,25 +12083,25 @@ module.exports = function DumpLiterallyEverything(Module, base){
is_smallcaps: fontInfo.is_smallcaps, is_smallcaps: fontInfo.is_smallcaps,
font_size: fontInfo.pointsize, font_size: fontInfo.pointsize,
font_id: fontInfo.font_id, font_id: fontInfo.font_id,
font_name: fontInfo.font_name, font_name: fontInfo.font_name
} };
var wc = new Module.WordChoiceIterator(ri); var wc = new Module.WordChoiceIterator(ri);
do { do {
word.choices.push({ word.choices.push({
text: wc.GetUTF8Text(), text: wc.GetUTF8Text(),
confidence: wc.Confidence() confidence: wc.Confidence()
}) });
} while (wc.Next()); } while (wc.Next());
Module.destroy(wc) Module.destroy(wc);
textline.words.push(word) textline.words.push(word);
} }
var image = null; var image = null;
// var pix = ri.GetBinaryImage(Module.RIL_SYMBOL) // var pix = ri.GetBinaryImage(Module.RIL_SYMBOL)
// var image = pix2array(pix); // var image = pix2array(pix);
// // for some reason it seems that things stop working if you destroy pics // // for some reason it seems that things stop working if you destroy pics
// Module._pixDestroy(Module.getPointer(pix)); // Module._pixDestroy(Module.getPointer(pix));
if(ri.IsAtBeginningOf(Module.RIL_SYMBOL)){ if (ri.IsAtBeginningOf(Module.RIL_SYMBOL)) {
symbol = { symbol = {
choices: [], choices: [],
image: image, image: image,
@ -12088,20 +12113,20 @@ module.exports = function DumpLiterallyEverything(Module, base){
is_superscript: !!ri.SymbolIsSuperscript(), is_superscript: !!ri.SymbolIsSuperscript(),
is_subscript: !!ri.SymbolIsSubscript(), is_subscript: !!ri.SymbolIsSubscript(),
is_dropcap: !!ri.SymbolIsDropcap(), is_dropcap: !!ri.SymbolIsDropcap()
} };
word.symbols.push(symbol) word.symbols.push(symbol);
var ci = new Module.ChoiceIterator(ri); var ci = new Module.ChoiceIterator(ri);
do { do {
symbol.choices.push({ symbol.choices.push({
text: ci.GetUTF8Text(), text: ci.GetUTF8Text(),
confidence: ci.Confidence() confidence: ci.Confidence()
}) });
} while (ci.Next()); } while (ci.Next());
Module.destroy(ci) Module.destroy(ci);
} }
} while (ri.Next(Module.RIL_SYMBOL)); } while (ri.Next(Module.RIL_SYMBOL));
Module.destroy(ri) Module.destroy(ri);
return { return {
text: base.GetUTF8Text(), text: base.GetUTF8Text(),
@ -12113,120 +12138,129 @@ module.exports = function DumpLiterallyEverything(Module, base){
psm: enumToString(base.GetPageSegMode(), 'PSM'), psm: enumToString(base.GetPageSegMode(), 'PSM'),
oem: enumToString(base.oem(), 'OEM'), oem: enumToString(base.oem(), 'OEM'),
version: base.Version(), version: base.Version()
} };
} };
// the generated HOCR is excessively indented, so // the generated HOCR is excessively indented, so
// we get rid of that indentation // we get rid of that indentation
function deindent(html){ function deindent(html) {
var lines = html.split('\n') var lines = html.split('\n');
if(lines[0].substring(0, 2) === " "){ if (lines[0].substring(0, 2) === " ") {
for (var i = 0; i < lines.length; i++) { for (var i = 0; i < lines.length; i++) {
if (lines[i].substring(0,2) === " ") { if (lines[i].substring(0, 2) === " ") {
lines[i] = lines[i].slice(2) lines[i] = lines[i].slice(2);
} }
}; };
} }
return lines.join('\n') return lines.join('\n');
} }
},{}],46:[function(require,module,exports){ },{}],46:[function(require,module,exports){
module.exports={"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922} module.exports={"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922}
},{}],47:[function(require,module,exports){ },{}],47:[function(require,module,exports){
'use strict';
var latestJob; var latestJob;
var Module; var Module;
var base; var base;
var adapter = {}; var adapter = {};
function dispatchHandlers(packet, send){ function dispatchHandlers(packet, send) {
function respond(status, data){ function respond(status, data) {
send({ send({
jobId: packet.jobId, jobId: packet.jobId,
status: status, status: status,
action: packet.action, action: packet.action,
data: data data: data
}) });
} }
respond.resolve = respond.bind(this, 'resolve') respond.resolve = respond.bind(this, 'resolve');
respond.reject = respond.bind(this, 'reject') respond.reject = respond.bind(this, 'reject');
respond.progress = respond.bind(this, 'progress') respond.progress = respond.bind(this, 'progress');
latestJob = respond; latestJob = respond;
if(packet.action === 'recognize'){ try {
handleRecognize(packet.payload, respond) if (packet.action === 'recognize') {
}else if(packet.action === 'detect'){ handleRecognize(packet.payload, respond);
handleDetect(packet.payload, respond) } else if (packet.action === 'detect') {
handleDetect(packet.payload, respond);
}
} catch (err) {
respond.reject(err);
} }
} }
exports.dispatchHandlers = dispatchHandlers; exports.dispatchHandlers = dispatchHandlers;
exports.setAdapter = function setAdapter(impl){ exports.setAdapter = function setAdapter(impl) {
adapter = impl; adapter = impl;
} };
function handleInit(req, res) {
var MIN_MEMORY = 100663296;
if (['chi_sim', 'chi_tra', 'jpn'].indexOf(req.options.lang) != -1) {
MIN_MEMORY = 167772160;
}
function handleInit(req, res){ if (!Module || Module.TOTAL_MEMORY < MIN_MEMORY) {
if(!Module){
var Core = adapter.getCore(req, res); var Core = adapter.getCore(req, res);
res.progress({ status: 'initializing tesseract api' }) res.progress({ status: 'initializing tesseract', progress: 0 });
Module = Core({ Module = Core({
TOTAL_MEMORY: req.memory, TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent){ TesseractProgress: function TesseractProgress(percent) {
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }) latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent - 30) / 70) });
}, },
onRuntimeInitialized() {} onRuntimeInitialized: function onRuntimeInitialized() {}
}) });
Module.FS_createPath("/", "tessdata", true, true)
base = new Module.TessBaseAPI() Module.FS_createPath("/", "tessdata", true, true);
res.progress({ status: 'initialized tesseract api' }) base = new Module.TessBaseAPI();
res.progress({ status: 'initializing tesseract', progress: 1 });
} }
} }
var dump = require('./dump.js');
var desaturate = require('./desaturate.js');
function setImage(Module, base, image) {
var dump = require('./dump.js')
var desaturate = require('./desaturate.js')
function setImage(Module, base, image){
var imgbin = desaturate(image), var imgbin = desaturate(image),
width = image.width, width = image.width,
height = image.height; height = image.height;
var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL); var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL);
base.SetImage(Module.wrapPointer(ptr), width, height, 1, width); base.SetImage(Module.wrapPointer(ptr), width, height, 1, width);
base.SetRectangle(0, 0, width, height) base.SetRectangle(0, 0, width, height);
return ptr; return ptr;
} }
function loadLanguage(req, res, cb){ function loadLanguage(req, res, cb) {
var lang = req.options.lang; var lang = req.options.lang;
if(!Module._loadedLanguages) Module._loadedLanguages = {};
if(lang in Module._loadedLanguages) return cb();
adapter.getLanguageData(req, res, function(data){ if (!Module._loadedLanguages) Module._loadedLanguages = {};
if (lang in Module._loadedLanguages) return cb();
adapter.getLanguageData(req, res, function (data) {
Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false); Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false);
res.progress({ status: 'loaded ' + lang + '.traineddata' }) res.progress({ status: 'loading ' + lang + '.traineddata', progress: 1 });
Module._loadedLanguages[lang] = true; Module._loadedLanguages[lang] = true;
cb() cb();
}) });
} }
function handleRecognize(req, res) {
handleInit(req, res);
loadLanguage(req, res, function () {
function handleRecognize(req, res){
handleInit(req, res)
loadLanguage(req, res, function(){
var lang = req.options.lang; var lang = req.options.lang;
base.Init(null, lang) res.progress({ status: 'initializing api', progress: 0 });
res.progress({ status: 'initialized with language' }) base.Init(null, lang);
res.progress({ status: 'initializing api', progress: 0.3 });
var options = req.options; var options = req.options;
for (var option in options) { for (var option in options) {
@ -12235,39 +12269,41 @@ function handleRecognize(req, res){
} }
} }
res.progress({ status: 'initializing api', progress: 0.6 });
var ptr = setImage(Module, base, req.image); var ptr = setImage(Module, base, req.image);
base.Recognize(null) res.progress({ status: 'initializing api', progress: 1 });
var result = dump(Module, base) base.Recognize(null);
var result = dump(Module, base);
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
res.resolve(result); res.resolve(result);
}) });
} }
function handleDetect(req, res) {
function handleDetect(req, res){ handleInit(req, res);
handleInit(req, res)
req.options.lang = 'osd'; req.options.lang = 'osd';
loadLanguage(req, res, function(){ loadLanguage(req, res, function () {
base.Init(null, 'osd');
base.SetPageSegMode(Module.PSM_OSD_ONLY);
base.Init(null, 'osd')
base.SetPageSegMode(Module.PSM_OSD_ONLY)
var ptr = setImage(Module, base, req.image); var ptr = setImage(Module, base, req.image);
var results = new Module.OSResults(); var results = new Module.OSResults();
var success = base.DetectOS(results); var success = base.DetectOS(results);
if(!success){ if (!success) {
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
res.reject("failed to detect os") res.reject("failed to detect os");
} else { } else {
var charset = results.get_unicharset() var charset = results.get_unicharset();
var best = results.get_best_result() var best = results.get_best_result();
var oid = best.get_orientation_id(), var oid = best.get_orientation_id(),
sid = best.get_script_id(); sid = best.get_script_id();
@ -12277,14 +12313,14 @@ function handleDetect(req, res){
script_confidence: best.get_sconfidence(), script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid], orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence() orientation_confidence: best.get_oconfidence()
} };
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
res.resolve(result) res.resolve(result);
} }
}) });
} }
},{"./desaturate.js":44,"./dump.js":45}]},{},[43]); },{"./desaturate.js":44,"./dump.js":45}]},{},[43]);

5
package.json

@ -1,12 +1,13 @@
{ {
"name": "tesseract.js", "name": "tesseract.js",
"version": "1.0.3", "version": "1.0.5",
"description": "Pure Javascript Multilingual OCR", "description": "Pure Javascript Multilingual OCR",
"main": "src/index.js", "main": "src/index.js",
"scripts": { "scripts": {
"test": "echo \"Error: no test specified\" & exit 1", "test": "echo \"Error: no test specified\" & exit 1",
"start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355", "start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js" "build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js",
"release": "git tag `jq -r '.version' package.json`"
}, },
"browser": { "browser": {
"./src/node/index.js": "./src/browser/index.js" "./src/node/index.js": "./src/browser/index.js"

5
src/browser/index.js

@ -1,5 +1,5 @@
var defaultOptions = { var defaultOptions = {
workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js', // workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.2.0/dist/worker.js',
corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js', corePath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
} }
@ -7,6 +7,9 @@ var defaultOptions = {
if (process.env.NODE_ENV === "development") { if (process.env.NODE_ENV === "development") {
console.debug('Using Development Configuration') console.debug('Using Development Configuration')
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js' defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js'
}else{
var version = require('../../package.json').version;
defaultOptions.workerPath = 'https://cdn.rawgit.com/naptha/tesseract.js/' + version + '/dist/worker.js'
} }
exports.defaultOptions = defaultOptions; exports.defaultOptions = defaultOptions;

2
src/index.js

@ -2,11 +2,13 @@ const adapter = require('./node/index.js')
const circularize = require('./common/circularize.js') const circularize = require('./common/circularize.js')
const TesseractJob = require('./common/job'); const TesseractJob = require('./common/job');
const objectAssign = require('object-assign'); const objectAssign = require('object-assign');
const version = require('../package.json').version;
function create(workerOptions){ function create(workerOptions){
workerOptions = workerOptions || {}; workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions)) var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions))
worker.create = create; worker.create = create;
worker.version = version;
return worker; return worker;
} }

Loading…
Cancel
Save