Browse Source

update contrib

pull/12/head
Guillermo 8 years ago
parent
commit
c61ee098d7
  1. 6
      README.md
  2. 376
      dist/tesseract.js
  3. 332
      dist/worker.js

6
README.md

@ -265,8 +265,10 @@ Then, cd in to the folder, `npm install`, and `npm start`
... a bunch of npm stuff ... ... a bunch of npm stuff ...
tesseract.js@1.0.0 start /Users/guillermo/Desktop/code_static/tesseract.js Starting up http-server, serving ./
node devServer.js Available on:
http://127.0.0.1:7355
http://18.111.40.187:7355
Listening at http://localhost:7355 Listening at http://localhost:7355
``` ```

376
dist/tesseract.js vendored

@ -1,85 +1,78 @@
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){ (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
'use strict';
exports.defaultOptions = { exports.defaultOptions = {
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
// workerPath: 'dist/worker.js', // workerPath: 'dist/worker.js',
workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.3/dist/worker.js', workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.3/dist/worker.js',
tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js' tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
}; }
exports.spawnWorker = function spawnWorker(instance, workerOptions) { exports.spawnWorker = function spawnWorker(instance, workerOptions){
if (window.Blob && window.URL) { if(window.Blob && window.URL){
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']); var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");'])
var worker = new Worker(window.URL.createObjectURL(blob)); var worker = new Worker(window.URL.createObjectURL(blob));
} else { }else{
var worker = new Worker(workerOptions.workerPath); var worker = new Worker(workerOptions.workerPath)
} }
worker.onmessage = function (e) { worker.onmessage = function(e){
var packet = e.data; var packet = e.data;
instance._recv(packet); instance._recv(packet)
}; }
return worker; return worker
}; }
exports.terminateWorker = function (instance) { exports.terminateWorker = function(instance){
instance.worker.terminate(); instance.worker.terminate()
}; }
exports.sendPacket = function sendPacket(instance, packet) { exports.sendPacket = function sendPacket(instance, packet){
loadImage(packet.payload.image, function (img) { loadImage(packet.payload.image, function(img){
packet.payload.image = img; packet.payload.image = img
instance.worker.postMessage(packet); instance.worker.postMessage(packet)
}); })
}; }
function loadImage(image, cb) {
if (typeof image === 'string') { function loadImage(image, cb){
if (/^\#/.test(image)) { if(typeof image === 'string'){
if(/^\#/.test(image)){
// element css selector // element css selector
return loadImage(document.querySelector(image), cb); return loadImage(document.querySelector(image), cb)
} else { }else{
// url or path // url or path
var im = new Image(); var im = new Image
im.src = image; im.src = image;
im.onload = function (e) { im.onload = e => loadImage(im, cb);
return loadImage(im, cb); return
};
return;
} }
} else if (image instanceof File) { }else if(image instanceof File){
// files // files
var fr = new FileReader(); var fr = new FileReader()
fr.onload = function (e) { fr.onload = e => loadImage(fr.result, cb);
return loadImage(fr.result, cb); fr.readAsDataURL(image)
}; return
fr.readAsDataURL(image); }else if(image instanceof Blob){
return; return loadImage(URL.createObjectURL(image), cb)
} else if (image instanceof Blob) { }else if(image.getContext){
return loadImage(URL.createObjectURL(image), cb);
} else if (image.getContext) {
// canvas element // canvas element
return loadImage(image.getContext('2d'), cb); return loadImage(image.getContext('2d'), cb)
} else if (image.tagName == "IMG" || image.tagName == "VIDEO") { }else if(image.tagName == "IMG" || image.tagName == "VIDEO"){
// image element or video element // image element or video element
var c = document.createElement('canvas'); var c = document.createElement('canvas');
c.width = image.naturalWidth || image.videoWidth; c.width = image.naturalWidth || image.videoWidth;
c.height = image.naturalHeight || image.videoHeight; c.height = image.naturalHeight || image.videoHeight;
var ctx = c.getContext('2d'); var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0); ctx.drawImage(image, 0, 0);
return loadImage(ctx, cb); return loadImage(ctx, cb)
} else if (image.getImageData) { }else if(image.getImageData){
// canvas context // canvas context
var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height); var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return loadImage(data, cb); return loadImage(data, cb)
} }
cb(image); cb(image)
} }
},{}],2:[function(require,module,exports){ },{}],2:[function(require,module,exports){
"use strict";
// The result of dump.js is a big JSON tree // The result of dump.js is a big JSON tree
// which can be easily serialized (for instance // which can be easily serialized (for instance
// to be sent from a webworker to the main app // to be sent from a webworker to the main app
@ -87,251 +80,212 @@ function loadImage(image, cb) {
// a (circular) DOM-like interface for walking // a (circular) DOM-like interface for walking
// through the data. // through the data.
module.exports = function circularize(page) { module.exports = function circularize(page){
page.paragraphs = []; page.paragraphs = []
page.lines = []; page.lines = []
page.words = []; page.words = []
page.symbols = []; page.symbols = []
page.blocks.forEach(function (block) { page.blocks.forEach(function(block){
block.page = page; block.page = page;
block.lines = []; block.lines = []
block.words = []; block.words = []
block.symbols = []; block.symbols = []
block.paragraphs.forEach(function (para) { block.paragraphs.forEach(function(para){
para.block = block; para.block = block;
para.page = page; para.page = page;
para.words = []; para.words = []
para.symbols = []; para.symbols = []
para.lines.forEach(function (line) { para.lines.forEach(function(line){
line.paragraph = para; line.paragraph = para;
line.block = block; line.block = block;
line.page = page; line.page = page;
line.symbols = []; line.symbols = []
line.words.forEach(function (word) { line.words.forEach(function(word){
word.line = line; word.line = line;
word.paragraph = para; word.paragraph = para;
word.block = block; word.block = block;
word.page = page; word.page = page;
word.symbols.forEach(function (sym) { word.symbols.forEach(function(sym){
sym.word = word; sym.word = word;
sym.line = line; sym.line = line;
sym.paragraph = para; sym.paragraph = para;
sym.block = block; sym.block = block;
sym.page = page; sym.page = page;
sym.line.symbols.push(sym); sym.line.symbols.push(sym)
sym.paragraph.symbols.push(sym); sym.paragraph.symbols.push(sym)
sym.block.symbols.push(sym); sym.block.symbols.push(sym)
sym.page.symbols.push(sym); sym.page.symbols.push(sym)
}); })
word.paragraph.words.push(word); word.paragraph.words.push(word)
word.block.words.push(word); word.block.words.push(word)
word.page.words.push(word); word.page.words.push(word)
}); })
line.block.lines.push(line); line.block.lines.push(line)
line.page.lines.push(line); line.page.lines.push(line)
}); })
para.page.paragraphs.push(para); para.page.paragraphs.push(para)
}); })
}); })
return page; return page
}; }
},{}],3:[function(require,module,exports){ },{}],3:[function(require,module,exports){
"use strict"; "use strict";
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); var adapter = require('./node/index.js')
var circularize = require('./common/circularize.js')
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
var adapter = require('./node/index.js');
var circularize = require('./common/circularize.js');
function createWorker(workerOptions) { function createWorker(workerOptions){
return new TesseractWorker(workerOptions); return new TesseractWorker(workerOptions)
} }
var TesseractWorker = function () { class TesseractWorker {
function TesseractWorker(workerOptions) { constructor(workerOptions){
_classCallCheck(this, TesseractWorker);
this.worker = null; this.worker = null;
this.workerOptions = workerOptions; this.workerOptions = workerOptions;
this._currentJob = null; this._currentJob = null;
this._queue = []; this._queue = []
} }
_createClass(TesseractWorker, [{ recognize(image, options){
key: 'recognize', return this._delay(job => {
value: function recognize(image, options) { if(typeof options === 'string'){
var _this = this;
return this._delay(function (job) {
if (typeof options === 'string') {
options = { lang: options }; options = { lang: options };
} else { }else{
options = options || {}; options = options || {}
options.lang = options.lang || 'eng'; options.lang = options.lang || 'eng';
} }
job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions }); job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions })
}); })
} }
}, { detect(image, options){
key: 'detect', options = options || {}
value: function detect(image, options) { return this._delay(job => {
var _this2 = this; job._send('detect', { image: image, options: options, workerOptions: this.workerOptions })
})
options = options || {};
return this._delay(function (job) {
job._send('detect', { image: image, options: options, workerOptions: _this2.workerOptions });
});
} }
}, {
key: 'terminate', terminate(){
value: function terminate() { if(this.worker) adapter.terminateWorker(this);
if (this.worker) adapter.terminateWorker(this);
this.worker = null; this.worker = null;
} }
}, {
key: '_delay',
value: function _delay(fn) {
var _this3 = this;
if (!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions); _delay(fn){
if(!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions);
var job = new TesseractJob(this); var job = new TesseractJob(this);
this._queue.push(function (e) { this._queue.push(e => {
_this3._queue.shift(); this._queue.shift()
_this3._currentJob = job; this._currentJob = job;
fn(job); fn(job)
}); })
if (!this._currentJob) this._dequeue(); if(!this._currentJob) this._dequeue();
return job; return job
} }
}, {
key: '_dequeue', _dequeue(){
value: function _dequeue() {
this._currentJob = null; this._currentJob = null;
if (this._queue.length > 0) { if(this._queue.length > 0){
this._queue[0](); this._queue[0]()
} }
} }
}, {
key: '_recv',
value: function _recv(packet) {
if (packet.status === 'resolve' && packet.action === 'recognize') { _recv(packet){
if(packet.status === 'resolve' && packet.action === 'recognize'){
packet.data = circularize(packet.data); packet.data = circularize(packet.data);
} }
if (this._currentJob.id === packet.jobId) { if(this._currentJob.id === packet.jobId){
this._currentJob._handle(packet); this._currentJob._handle(packet)
} else { }else{
console.warn('Job ID ' + packet.jobId + ' not known.'); console.warn('Job ID ' + packet.jobId + ' not known.')
} }
} }
}]); }
return TesseractWorker;
}();
var jobCounter = 0; var jobCounter = 0;
var TesseractJob = function () { class TesseractJob {
function TesseractJob(instance) { constructor(instance){
_classCallCheck(this, TesseractJob); this.id = 'Job-' + (++jobCounter) + '-' + Math.random().toString(16).slice(3, 8)
this.id = 'Job-' + ++jobCounter + '-' + Math.random().toString(16).slice(3, 8);
this._instance = instance; this._instance = instance;
this._resolve = []; this._resolve = []
this._reject = []; this._reject = []
this._progress = []; this._progress = []
} }
_createClass(TesseractJob, [{ then(resolve, reject){
key: 'then', if(this._resolve.push){
value: function then(resolve, reject) { this._resolve.push(resolve)
if (this._resolve.push) { }else{
this._resolve.push(resolve); resolve(this._resolve)
} else {
resolve(this._resolve);
} }
if (reject) this.catch(reject); if(reject) this.catch(reject);
return this; return this;
} }
}, { catch(reject){
key: 'catch', if(this._reject.push){
value: function _catch(reject) { this._reject.push(reject)
if (this._reject.push) { }else{
this._reject.push(reject); reject(this._reject)
} else {
reject(this._reject);
} }
return this; return this;
} }
}, { progress(fn){
key: 'progress', this._progress.push(fn)
value: function progress(fn) {
this._progress.push(fn);
return this; return this;
} }
}, { _send(action, payload){
key: '_send',
value: function _send(action, payload) {
adapter.sendPacket(this._instance, { adapter.sendPacket(this._instance, {
jobId: this.id, jobId: this.id,
action: action, action: action,
payload: payload payload: payload
}); })
} }
}, {
key: '_handle', _handle(packet){
value: function _handle(packet) {
var data = packet.data; var data = packet.data;
if (packet.status === 'resolve') { if(packet.status === 'resolve'){
if (this._resolve.length === 0) console.debug(data); if(this._resolve.length === 0) console.debug(data);
this._resolve.forEach(function (fn) { this._resolve.forEach(fn => {
var ret = fn(data); var ret = fn(data);
if (ret && typeof ret.then == 'function') { if(ret && typeof ret.then == 'function'){
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.'); console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.')
} }
}); })
this._resolve = data; this._resolve = data;
this._instance._dequeue(); this._instance._dequeue()
} else if (packet.status === 'reject') { }else if(packet.status === 'reject'){
if (this._reject.length === 0) console.error(data); if(this._reject.length === 0) console.error(data);
this._reject.forEach(function (fn) { this._reject.forEach(fn => fn(data))
return fn(data);
});
this._reject = data; this._reject = data;
this._instance._dequeue(); this._instance._dequeue()
} else if (packet.status === 'progress') { }else if(packet.status === 'progress'){
this._progress.forEach(function (fn) { this._progress.forEach(fn => fn(data))
return fn(data); }else{
}); console.warn('Message type unknown', packet.status)
} else {
console.warn('Message type unknown', packet.status);
} }
} }
}]); }
return TesseractJob;
}();
var DefaultTesseract = createWorker(adapter.defaultOptions); var DefaultTesseract = createWorker(adapter.defaultOptions)
DefaultTesseract.createWorker = createWorker; DefaultTesseract.createWorker = createWorker;
module.exports = DefaultTesseract; module.exports = DefaultTesseract
},{"./common/circularize.js":2,"./node/index.js":1}]},{},[3])(3) },{"./common/circularize.js":2,"./node/index.js":1}]},{},[3])(3)
}); });

332
dist/worker.js vendored

@ -11854,38 +11854,33 @@ function extend() {
} }
},{"./has-keys":40,"object-keys":17}],42:[function(require,module,exports){ },{"./has-keys":40,"object-keys":17}],42:[function(require,module,exports){
'use strict'; var leveljs = require('level-js')
var db = typeof indexedDB === 'undefined' ? { open: (_, cb) => cb(true) } : leveljs('./tessdata2')
var leveljs = require('level-js');
var db = typeof indexedDB === 'undefined' ? { open: function open(_, cb) {
return cb(true);
} } : leveljs('./tessdata2');
var langdata = require('../common/langdata.json'); var langdata = require('../common/langdata.json')
module.exports = function getLanguageData(req, res, cb) { module.exports = function getLanguageData(req, res, cb){
var lang = req.options.lang; var lang = req.options.lang;
function saveDataFile(data) { function saveDataFile(data){
db.put(lang, data, function (err) { db.put(lang, data, err => console.log('cached', lang, err))
return console.log('cached', lang, err); cb(data)
});
cb(data);
} }
db.open({ compression: false }, function (err) { db.open({ compression: false }, err => {
if (err) return fetchLanguageData(req, res, cb); if (err) return fetchLanguageData(req, res, cb);
db.get(lang, function (err, data) { db.get(lang, (err, data) => {
if (err) return fetchLanguageData(req, res, saveDataFile); if (err) return fetchLanguageData(req, res, saveDataFile);
res.progress({ status: 'found in cache ' + lang + '.traineddata' }); res.progress({ status: 'found in cache ' + lang + '.traineddata' })
cb(data); cb(data)
}); })
}); })
}; }
var ungzip = require('pako').ungzip; var ungzip = require('pako').ungzip;
function fetchLanguageData(req, res, cb) { function fetchLanguageData(req, res, cb){
var lang = req.options.lang; var lang = req.options.lang;
var langfile = lang + '.traineddata.gz'; var langfile = lang + '.traineddata.gz';
var url = req.workerOptions.langPath + langfile; var url = req.workerOptions.langPath + langfile;
@ -11893,125 +11888,109 @@ function fetchLanguageData(req, res, cb) {
var xhr = new XMLHttpRequest(); var xhr = new XMLHttpRequest();
xhr.responseType = 'arraybuffer'; xhr.responseType = 'arraybuffer';
xhr.open('GET', url, true); xhr.open('GET', url, true);
xhr.onerror = function (e) { xhr.onerror = e => {
xhr.onprogress = xhr.onload = null; xhr.onprogress = xhr.onload = null
cb(xhr, null); cb(xhr, null)
}; }
xhr.onprogress = function (e) { xhr.onprogress = e =>
return res.progress({ res.progress({
status: 'downloading ' + langfile, status: 'downloading ' + langfile,
loaded: e.loaded, loaded: e.loaded,
progress: Math.min(1, e.loaded / langdata[lang]) progress: Math.min(1, e.loaded / langdata[lang])
}); });
};
xhr.onload = function (e) { xhr.onload = e => {
if (!(xhr.status == 200 || xhr.status == 0 && xhr.response)) return res.reject('Error downloading language ' + url); if (!(xhr.status == 200 || (xhr.status == 0 && xhr.response))) return res.reject('Error downloading language ' + url);
res.progress({ status: 'unzipping ' + langfile }); res.progress({ status: 'unzipping ' + langfile })
// in case the gzips are already ungzipped or extra gzipped // in case the gzips are already ungzipped or extra gzipped
var response = new Uint8Array(xhr.response); var response = new Uint8Array(xhr.response)
try { try {
while (response[0] == 0x1f && response[1] == 0x8b) { while(response[0] == 0x1f && response[1] == 0x8b) response = ungzip(response);
response = ungzip(response);
}
} catch (err) { } catch (err) {
return res.reject('Error unzipping language file ' + langfile + '\n' + err.message); return res.reject('Error unzipping language file ' + langfile + '\n' + err.message)
} }
cb(response); cb(response)
}; }
xhr.send(); xhr.send()
} }
},{"../common/langdata.json":46,"level-js":13,"pako":20}],43:[function(require,module,exports){ },{"../common/langdata.json":46,"level-js":13,"pako":20}],43:[function(require,module,exports){
(function (global){ (function (global){
"use strict"; "use strict";
var workerUtils = require('../common/worker.js'); var workerUtils = require('../common/worker.js')
global.addEventListener('message', function (e) { global.addEventListener('message', function(e){
var packet = e.data; var packet = e.data;
workerUtils.dispatchHandlers(packet, function (obj) { workerUtils.dispatchHandlers(packet, obj => postMessage(obj))
return postMessage(obj); })
});
});
exports.getCore = function (req, res) { exports.getCore = function(req, res){
if (!global.TesseractCore) { if(!global.TesseractCore){
res.progress({ status: 'loading tesseract core' }); res.progress({ status: 'loading tesseract core' })
importScripts(req.workerOptions.tesseractPath); importScripts(req.workerOptions.tesseractPath)
res.progress({ status: 'loaded tesseract core' }); res.progress({ status: 'loaded tesseract core' })
} }
return TesseractCore; return TesseractCore
}; }
exports.getLanguageData = require('./lang.js'); exports.getLanguageData = require('./lang.js')
workerUtils.setAdapter(module.exports); workerUtils.setAdapter(module.exports);
}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) }).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
},{"../common/worker.js":47,"./lang.js":42}],44:[function(require,module,exports){ },{"../common/worker.js":47,"./lang.js":42}],44:[function(require,module,exports){
'use strict';
// This converts an image to grayscale // This converts an image to grayscale
module.exports = function desaturate(image) { module.exports = function desaturate(image){
var width, height; var width, height;
if (image.data) { if(image.data){
var src = image.data; var src = image.data;
width = image.width, height = image.height; width = image.width,
height = image.height;
var dst = new Uint8Array(width * height); var dst = new Uint8Array(width * height);
var srcLength = src.length | 0, var srcLength = src.length | 0, srcLength_16 = (srcLength - 16) | 0;
srcLength_16 = srcLength - 16 | 0;
for (var i = 0, j = 0; i <= srcLength_16; i += 16, j += 4) { for (var i = 0, j = 0; i <= srcLength_16; i += 16, j += 4) {
// convert to grayscale 4 pixels at a time; eveything with alpha gets put in front of 50% gray // convert to grayscale 4 pixels at a time; eveything with alpha gets put in front of 50% gray
dst[j] = (src[i] * 77 + src[i + 1] * 151 + src[i + 2] * 28) * src[i + 3] + (255 - src[i + 3] << 15) + 32768 >> 16; dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16
dst[j + 1] = (src[i + 4] * 77 + src[i + 5] * 151 + src[i + 6] * 28) * src[i + 7] + (255 - src[i + 7] << 15) + 32768 >> 16; dst[j+1] = (((src[i+4] * 77 + src[i+5] * 151 + src[i+6] * 28) * src[i+7]) + ((255-src[i+7]) << 15) + 32768) >> 16
dst[j + 2] = (src[i + 8] * 77 + src[i + 9] * 151 + src[i + 10] * 28) * src[i + 11] + (255 - src[i + 11] << 15) + 32768 >> 16; dst[j+2] = (((src[i+8] * 77 + src[i+9] * 151 + src[i+10] * 28) * src[i+11]) + ((255-src[i+11]) << 15) + 32768) >> 16
dst[j + 3] = (src[i + 12] * 77 + src[i + 13] * 151 + src[i + 14] * 28) * src[i + 15] + (255 - src[i + 15] << 15) + 32768 >> 16; dst[j+3] = (((src[i+12] * 77 + src[i+13] * 151 + src[i+14] * 28) * src[i+15]) + ((255-src[i+15]) << 15) + 32768) >> 16
}
for (; i < srcLength; i += 4, ++j) {
//finish up
dst[j] = (src[i] * 77 + src[i + 1] * 151 + src[i + 2] * 28) * src[i + 3] + (255 - src[i + 3] << 15) + 32768 >> 16;
}image = dst;
} else {
throw 'Invalid ImageData';
} }
return image; for (; i < srcLength; i += 4, ++j) //finish up
}; dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16
image = dst;
} else { throw 'Invalid ImageData' }
return image
}
},{}],45:[function(require,module,exports){ },{}],45:[function(require,module,exports){
'use strict'; module.exports = function DumpLiterallyEverything(Module, base){
module.exports = function DumpLiterallyEverything(Module, base) {
var ri = base.GetIterator(); var ri = base.GetIterator();
var blocks = []; var blocks = [];
var block, para, textline, word, symbol; var block, para, textline, word, symbol;
function enumToString(value, prefix) { function enumToString(value, prefix){
return Object.keys(Module).filter(function (e) { return (Object.keys(Module)
return e.substr(0, prefix.length + 1) == prefix + '_'; .filter(function(e){ return e.substr(0, prefix.length + 1) == prefix + '_' })
}).filter(function (e) { .filter(function(e){ return Module[e] === value })
return Module[e] === value; .map(function(e){ return e.slice(prefix.length + 1) })[0])
}).map(function (e) {
return e.slice(prefix.length + 1);
})[0];
} }
ri.Begin(); ri.Begin()
do { do {
if (ri.IsAtBeginningOf(Module.RIL_BLOCK)) { if(ri.IsAtBeginningOf(Module.RIL_BLOCK)){
var poly = ri.BlockPolygon(); var poly = ri.BlockPolygon();
var polygon = null; var polygon = null;
// BlockPolygon() returns null when automatic page segmentation is off // BlockPolygon() returns null when automatic page segmentation is off
if (Module.getPointer(poly) > 0) { if(Module.getPointer(poly) > 0){
var n = poly.get_n(), var n = poly.get_n(),
px = poly.get_x(), px = poly.get_x(),
py = poly.get_y(), py = poly.get_y(),
polygon = []; polygon = [];
for (var i = 0; i < n; i++) { for(var i = 0; i < n; i++){
polygon.push([px.getValue(i), py.getValue(i)]); polygon.push([px.getValue(i), py.getValue(i)]);
} }
Module._ptaDestroy(Module.getPointer(poly)); Module._ptaDestroy(Module.getPointer(poly));
@ -12027,10 +12006,10 @@ module.exports = function DumpLiterallyEverything(Module, base) {
blocktype: enumToString(ri.BlockType(), 'PT'), blocktype: enumToString(ri.BlockType(), 'PT'),
polygon: polygon polygon: polygon
};
blocks.push(block);
} }
if (ri.IsAtBeginningOf(Module.RIL_PARA)) { blocks.push(block)
}
if(ri.IsAtBeginningOf(Module.RIL_PARA)){
para = { para = {
lines: [], lines: [],
@ -12040,10 +12019,10 @@ module.exports = function DumpLiterallyEverything(Module, base) {
bbox: ri.getBoundingBox(Module.RIL_PARA), bbox: ri.getBoundingBox(Module.RIL_PARA),
is_ltr: !!ri.ParagraphIsLtr() is_ltr: !!ri.ParagraphIsLtr()
};
block.paragraphs.push(para);
} }
if (ri.IsAtBeginningOf(Module.RIL_TEXTLINE)) { block.paragraphs.push(para)
}
if(ri.IsAtBeginningOf(Module.RIL_TEXTLINE)){
textline = { textline = {
words: [], words: [],
@ -12051,10 +12030,10 @@ module.exports = function DumpLiterallyEverything(Module, base) {
confidence: ri.Confidence(Module.RIL_TEXTLINE), confidence: ri.Confidence(Module.RIL_TEXTLINE),
baseline: ri.getBaseline(Module.RIL_TEXTLINE), baseline: ri.getBaseline(Module.RIL_TEXTLINE),
bbox: ri.getBoundingBox(Module.RIL_TEXTLINE) bbox: ri.getBoundingBox(Module.RIL_TEXTLINE)
};
para.lines.push(textline);
} }
if (ri.IsAtBeginningOf(Module.RIL_WORD)) { para.lines.push(textline)
}
if(ri.IsAtBeginningOf(Module.RIL_WORD)){
var fontInfo = ri.getWordFontAttributes(), var fontInfo = ri.getWordFontAttributes(),
wordDir = ri.WordDirection(); wordDir = ri.WordDirection();
word = { word = {
@ -12079,17 +12058,17 @@ module.exports = function DumpLiterallyEverything(Module, base) {
is_smallcaps: fontInfo.is_smallcaps, is_smallcaps: fontInfo.is_smallcaps,
font_size: fontInfo.pointsize, font_size: fontInfo.pointsize,
font_id: fontInfo.font_id, font_id: fontInfo.font_id,
font_name: fontInfo.font_name font_name: fontInfo.font_name,
}; }
var wc = new Module.WordChoiceIterator(ri); var wc = new Module.WordChoiceIterator(ri);
do { do {
word.choices.push({ word.choices.push({
text: wc.GetUTF8Text(), text: wc.GetUTF8Text(),
confidence: wc.Confidence() confidence: wc.Confidence()
}); })
} while (wc.Next()); } while (wc.Next());
Module.destroy(wc); Module.destroy(wc)
textline.words.push(word); textline.words.push(word)
} }
var image = null; var image = null;
@ -12097,7 +12076,7 @@ module.exports = function DumpLiterallyEverything(Module, base) {
// var image = pix2array(pix); // var image = pix2array(pix);
// // for some reason it seems that things stop working if you destroy pics // // for some reason it seems that things stop working if you destroy pics
// Module._pixDestroy(Module.getPointer(pix)); // Module._pixDestroy(Module.getPointer(pix));
if (ri.IsAtBeginningOf(Module.RIL_SYMBOL)) { if(ri.IsAtBeginningOf(Module.RIL_SYMBOL)){
symbol = { symbol = {
choices: [], choices: [],
image: image, image: image,
@ -12109,20 +12088,20 @@ module.exports = function DumpLiterallyEverything(Module, base) {
is_superscript: !!ri.SymbolIsSuperscript(), is_superscript: !!ri.SymbolIsSuperscript(),
is_subscript: !!ri.SymbolIsSubscript(), is_subscript: !!ri.SymbolIsSubscript(),
is_dropcap: !!ri.SymbolIsDropcap() is_dropcap: !!ri.SymbolIsDropcap(),
}; }
word.symbols.push(symbol); word.symbols.push(symbol)
var ci = new Module.ChoiceIterator(ri); var ci = new Module.ChoiceIterator(ri);
do { do {
symbol.choices.push({ symbol.choices.push({
text: ci.GetUTF8Text(), text: ci.GetUTF8Text(),
confidence: ci.Confidence() confidence: ci.Confidence()
}); })
} while (ci.Next()); } while (ci.Next());
Module.destroy(ci); Module.destroy(ci)
} }
} while (ri.Next(Module.RIL_SYMBOL)); } while (ri.Next(Module.RIL_SYMBOL));
Module.destroy(ri); Module.destroy(ri)
return { return {
text: base.GetUTF8Text(), text: base.GetUTF8Text(),
@ -12134,116 +12113,120 @@ module.exports = function DumpLiterallyEverything(Module, base) {
psm: enumToString(base.GetPageSegMode(), 'PSM'), psm: enumToString(base.GetPageSegMode(), 'PSM'),
oem: enumToString(base.oem(), 'OEM'), oem: enumToString(base.oem(), 'OEM'),
version: base.Version() version: base.Version(),
}; }
}; }
// the generated HOCR is excessively indented, so // the generated HOCR is excessively indented, so
// we get rid of that indentation // we get rid of that indentation
function deindent(html) { function deindent(html){
var lines = html.split('\n'); var lines = html.split('\n')
if (lines[0].substring(0, 2) === " ") { if(lines[0].substring(0, 2) === " "){
for (var i = 0; i < lines.length; i++) { for (var i = 0; i < lines.length; i++) {
if (lines[i].substring(0, 2) === " ") { if (lines[i].substring(0,2) === " ") {
lines[i] = lines[i].slice(2); lines[i] = lines[i].slice(2)
} }
}; };
} }
return lines.join('\n'); return lines.join('\n')
} }
},{}],46:[function(require,module,exports){ },{}],46:[function(require,module,exports){
module.exports={"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922} module.exports={"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922}
},{}],47:[function(require,module,exports){ },{}],47:[function(require,module,exports){
'use strict';
var latestJob; var latestJob;
var Module; var Module;
var base; var base;
var adapter = {}; var adapter = {};
function dispatchHandlers(packet, send) { function dispatchHandlers(packet, send){
function respond(status, data) { function respond(status, data){
send({ send({
jobId: packet.jobId, jobId: packet.jobId,
status: status, status: status,
action: packet.action, action: packet.action,
data: data data: data
}); })
} }
respond.resolve = respond.bind(this, 'resolve'); respond.resolve = respond.bind(this, 'resolve')
respond.reject = respond.bind(this, 'reject'); respond.reject = respond.bind(this, 'reject')
respond.progress = respond.bind(this, 'progress'); respond.progress = respond.bind(this, 'progress')
latestJob = respond; latestJob = respond;
if (packet.action === 'recognize') { if(packet.action === 'recognize'){
handleRecognize(packet.payload, respond); handleRecognize(packet.payload, respond)
} else if (packet.action === 'detect') { }else if(packet.action === 'detect'){
handleDetect(packet.payload, respond); handleDetect(packet.payload, respond)
} }
} }
exports.dispatchHandlers = dispatchHandlers; exports.dispatchHandlers = dispatchHandlers;
exports.setAdapter = function setAdapter(impl) { exports.setAdapter = function setAdapter(impl){
adapter = impl; adapter = impl;
}; }
function handleInit(req, res) { function handleInit(req, res){
if (!Module) { if(!Module){
var Core = adapter.getCore(req, res); var Core = adapter.getCore(req, res);
res.progress({ status: 'initializing tesseract api' }); res.progress({ status: 'initializing tesseract api' })
Module = Core({ Module = Core({
TOTAL_MEMORY: req.memory, TOTAL_MEMORY: req.memory,
TesseractProgress: function TesseractProgress(percent) { TesseractProgress(percent){
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent - 30) / 70) }); latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) })
}, },
onRuntimeInitialized: function onRuntimeInitialized() {} onRuntimeInitialized() {}
}); })
Module.FS_createPath("/", "tessdata", true, true); Module.FS_createPath("/", "tessdata", true, true)
base = new Module.TessBaseAPI(); base = new Module.TessBaseAPI()
res.progress({ status: 'initialized tesseract api' }); res.progress({ status: 'initialized tesseract api' })
} }
} }
var dump = require('./dump.js');
var desaturate = require('./desaturate.js');
function setImage(Module, base, image) {
var dump = require('./dump.js')
var desaturate = require('./desaturate.js')
function setImage(Module, base, image){
var imgbin = desaturate(image), var imgbin = desaturate(image),
width = image.width, width = image.width,
height = image.height; height = image.height;
var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL); var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL);
base.SetImage(Module.wrapPointer(ptr), width, height, 1, width); base.SetImage(Module.wrapPointer(ptr), width, height, 1, width);
base.SetRectangle(0, 0, width, height); base.SetRectangle(0, 0, width, height)
return ptr; return ptr;
} }
function loadLanguage(req, res, cb) { function loadLanguage(req, res, cb){
var lang = req.options.lang; var lang = req.options.lang;
if (!Module._loadedLanguages) Module._loadedLanguages = {}; if(!Module._loadedLanguages) Module._loadedLanguages = {};
if (lang in Module._loadedLanguages) return cb(); if(lang in Module._loadedLanguages) return cb();
adapter.getLanguageData(req, res, function (data) { adapter.getLanguageData(req, res, function(data){
Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false); Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false);
res.progress({ status: 'loaded ' + lang + '.traineddata' }); res.progress({ status: 'loaded ' + lang + '.traineddata' })
Module._loadedLanguages[lang] = true; Module._loadedLanguages[lang] = true;
cb(); cb()
}); })
} }
function handleRecognize(req, res) {
handleInit(req, res);
loadLanguage(req, res, function () {
function handleRecognize(req, res){
handleInit(req, res)
loadLanguage(req, res, function(){
var lang = req.options.lang; var lang = req.options.lang;
base.Init(null, lang); base.Init(null, lang)
res.progress({ status: 'initialized with language' }); res.progress({ status: 'initialized with language' })
var options = req.options; var options = req.options;
for (var option in options) { for (var option in options) {
@ -12253,37 +12236,38 @@ function handleRecognize(req, res) {
} }
var ptr = setImage(Module, base, req.image); var ptr = setImage(Module, base, req.image);
base.Recognize(null); base.Recognize(null)
var result = dump(Module, base); var result = dump(Module, base)
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
res.resolve(result); res.resolve(result);
}); })
} }
function handleDetect(req, res) {
handleInit(req, res); function handleDetect(req, res){
handleInit(req, res)
req.options.lang = 'osd'; req.options.lang = 'osd';
loadLanguage(req, res, function () { loadLanguage(req, res, function(){
base.Init(null, 'osd'); base.Init(null, 'osd')
base.SetPageSegMode(Module.PSM_OSD_ONLY); base.SetPageSegMode(Module.PSM_OSD_ONLY)
var ptr = setImage(Module, base, req.image); var ptr = setImage(Module, base, req.image);
var results = new Module.OSResults(); var results = new Module.OSResults();
var success = base.DetectOS(results); var success = base.DetectOS(results);
if (!success) { if(!success){
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
res.reject("failed to detect os"); res.reject("failed to detect os")
} else { } else {
var charset = results.get_unicharset(); var charset = results.get_unicharset()
var best = results.get_best_result(); var best = results.get_best_result()
var oid = best.get_orientation_id(), var oid = best.get_orientation_id(),
sid = best.get_script_id(); sid = best.get_script_id();
@ -12293,14 +12277,14 @@ function handleDetect(req, res) {
script_confidence: best.get_sconfidence(), script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid], orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence() orientation_confidence: best.get_oconfidence()
}; }
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
res.resolve(result); res.resolve(result)
} }
}); })
} }
},{"./desaturate.js":44,"./dump.js":45}]},{},[43]); },{"./desaturate.js":44,"./dump.js":45}]},{},[43]);

Loading…
Cancel
Save