Browse Source

Es6 and code cleanup (#41)

* Fixed syntax error

and added process.exit() to the examples

* es6 and code cleanup

Saw arrow notation used sporadically, so figured the node parts are
meant to be es6 compatible anyway, so implemented it where applicable.

Spacing and semicolons

Version 1.0.8

* revert changes to dist

* attempt to use standard spacing

* resolve conflict
pull/73/head
Kiran Kota 8 years ago committed by Guillermo Webster
parent
commit
91f9be7e81
  1. 25
      examples/node/basic.js
  2. 19
      examples/node/detect.js
  3. 2
      package.json
  4. 126
      src/common/worker.js
  5. 46
      src/index.js
  6. 72
      src/node/index.js
  7. 17
      src/node/lang.js

25
examples/node/basic.js

@ -1,14 +1,15 @@
var path = require('path'); // replace this with require('tesseract.js')
var Tesseract = require('../../') // replace this with require('tesseract.js') var Tesseract = require('../../'),
var image = path.resolve(__dirname, 'cosmic.png'); image = require('path').resolve(__dirname, 'cosmic.png');
Tesseract.recognize(image) Tesseract.recognize(image)
.then(data => { .then(data => {
console.log('then\n', data.text) console.log('then\n', data.text)
}) })
.catch(err => { .catch(err => {
console.log('catch\n', err); console.log('catch\n', err);
}) })
.finally(e => { .finally(e => {
console.log('finally\n'); console.log('finally\n');
}); process.exit();
});

19
examples/node/detect.js

@ -1,11 +1,12 @@
var path = require('path'); // replace this with require('tesseract.js')
var Tesseract = require('../../') // replace this with require('tesseract.js') var Tesseract = require('../../'),
var image = path.resolve(__dirname, 'cosmic.png'); image = require('path').resolve(__dirname, 'cosmic.png');
Tesseract.detect(image) Tesseract.detect(image)
.progress(function(info){ .progress(function(info){
console.log(info) console.log(info);
}) })
.then(function(data){ .then(function(data){
console.log('done', data) console.log('done', data);
}) process.exit();
})

2
package.json

@ -41,4 +41,4 @@
"url": "https://github.com/naptha/tesseract.js/issues" "url": "https://github.com/naptha/tesseract.js/issues"
}, },
"homepage": "https://github.com/naptha/tesseract.js" "homepage": "https://github.com/naptha/tesseract.js"
} }

126
src/common/worker.js

@ -1,28 +1,30 @@
var latestJob; var latestJob,
var Module; Module,
var base; base,
var adapter = {}; adapter = {},
dump = require('./dump.js'),
desaturate = require('./desaturate.js');
function dispatchHandlers(packet, send){ function dispatchHandlers(packet, send){
function respond(status, data){ function respond(status, data){
send({ send({
jobId: packet.jobId, jobId: packet.jobId,
status: status, status,
action: packet.action, action: packet.action,
data: data data
}) });
} }
respond.resolve = respond.bind(this, 'resolve') respond.resolve = respond.bind(this, 'resolve');
respond.reject = respond.bind(this, 'reject') respond.reject = respond.bind(this, 'reject');
respond.progress = respond.bind(this, 'progress') respond.progress = respond.bind(this, 'progress');
latestJob = respond; latestJob = respond;
try { try {
if(packet.action === 'recognize'){ if(packet.action === 'recognize'){
handleRecognize(packet.payload, respond) handleRecognize(packet.payload, respond);
}else if(packet.action === 'detect'){ } else if (packet.action === 'detect'){
handleDetect(packet.payload, respond) handleDetect(packet.payload, respond);
} }
} catch (err) { } catch (err) {
respond.reject(err) respond.reject(err)
@ -32,13 +34,13 @@ exports.dispatchHandlers = dispatchHandlers;
exports.setAdapter = function setAdapter(impl){ exports.setAdapter = function setAdapter(impl){
adapter = impl; adapter = impl;
} };
function handleInit(req, res){ function handleInit(req, res){
var MIN_MEMORY = 100663296; var MIN_MEMORY = 100663296;
if(['chi_sim', 'chi_tra', 'jpn'].indexOf(req.options.lang) != -1){ if(['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)){
MIN_MEMORY = 167772160; MIN_MEMORY = 167772160;
} }
@ -50,23 +52,17 @@ function handleInit(req, res){
Module = Core({ Module = Core({
TOTAL_MEMORY: MIN_MEMORY, TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent){ TesseractProgress(percent){
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }) latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) });
}, },
onRuntimeInitialized() {} onRuntimeInitialized() {}
}) });
Module.FS_createPath("/", "tessdata", true, true) Module.FS_createPath("/", "tessdata", true, true);
base = new Module.TessBaseAPI() base = new Module.TessBaseAPI();
res.progress({ status: 'initializing tesseract', progress: 1 }) res.progress({ status: 'initializing tesseract', progress: 1 });
} }
} }
var dump = require('./dump.js')
var desaturate = require('./desaturate.js')
function setImage(Module, base, image){ function setImage(Module, base, image){
var imgbin = desaturate(image), var imgbin = desaturate(image),
width = image.width, width = image.width,
@ -74,51 +70,55 @@ function setImage(Module, base, image){
var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL); var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL);
base.SetImage(Module.wrapPointer(ptr), width, height, 1, width); base.SetImage(Module.wrapPointer(ptr), width, height, 1, width);
base.SetRectangle(0, 0, width, height) base.SetRectangle(0, 0, width, height);
return ptr; return ptr;
} }
function loadLanguage(req, res, cb){ function loadLanguage(req, res, cb){
var lang = req.options.lang; var lang = req.options.lang,
langFile = lang + '.traineddata';
if(!Module._loadedLanguages) Module._loadedLanguages = {}; if(!Module._loadedLanguages) Module._loadedLanguages = {};
if(lang in Module._loadedLanguages) return cb(); if(lang in Module._loadedLanguages) return cb();
adapter.getLanguageData(req, res, function(data){ adapter.getLanguageData(req, res, function(data){
res.progress({ status: 'loading ' + lang + '.traineddata', progress: 0 }) res.progress({ status: 'loading ' + langFile, progress: 0 });
Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false); Module.FS_createDataFile('tessdata', langFile, data, true, false);
Module._loadedLanguages[lang] = true; Module._loadedLanguages[lang] = true;
res.progress({ status: 'loading ' + lang + '.traineddata', progress: 1 }) res.progress({ status: 'loading ' + langFile, progress: 1 });
cb() cb();
}) })
} }
function handleRecognize(req, res){ function handleRecognize(req, res){
handleInit(req, res) handleInit(req, res);
loadLanguage(req, res, function(){ loadLanguage(req, res, () => {
var lang = req.options.lang; var options = req.options;
res.progress({ status: 'initializing api', progress: 0 }) function progressUpdate(progress){
base.Init(null, lang) res.progress({ status: 'initializing api', progress: progress });
res.progress({ status: 'initializing api', progress: 0.3 }) }
progressUpdate(0);
base.Init(null, req.options.lang);
progressUpdate(.3);
var options = req.options;
for (var option in options) { for (var option in options) {
if (options.hasOwnProperty(option)) { if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]); base.SetVariable(option, options[option]);
} }
} }
res.progress({ status: 'initializing api', progress: 0.6 }) progressUpdate(.6);
var ptr = setImage(Module, base, req.image); var ptr = setImage(Module, base, req.image);
res.progress({ status: 'initializing api', progress: 1 }) progressUpdate(1);
base.Recognize(null) base.Recognize(null);
var result = dump(Module, base) var result = dump(Module, base);
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
@ -129,40 +129,34 @@ function handleRecognize(req, res){
function handleDetect(req, res){ function handleDetect(req, res){
handleInit(req, res) handleInit(req, res);
req.options.lang = 'osd'; req.options.lang = 'osd';
loadLanguage(req, res, function(){ loadLanguage(req, res, () => {
base.Init(null, 'osd');
base.SetPageSegMode(Module.PSM_OSD_ONLY);
base.Init(null, 'osd') var ptr = setImage(Module, base, req.image),
base.SetPageSegMode(Module.PSM_OSD_ONLY) results = new Module.OSResults();
var ptr = setImage(Module, base, req.image);
var results = new Module.OSResults(); if(!base.DetectOS(results)){
var success = base.DetectOS(results);
if(!success){
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
res.reject("failed to detect os") res.reject("Failed to detect OS");
} else { } else {
var charset = results.get_unicharset() var best = results.get_best_result(),
oid = best.get_orientation_id(),
var best = results.get_best_result()
var oid = best.get_orientation_id(),
sid = best.get_script_id(); sid = best.get_script_id();
var result = { base.End();
Module._free(ptr);
res.resolve({
tesseract_script_id: sid, tesseract_script_id: sid,
script: charset.get_script_from_script_id(sid), script: results.get_unicharset().get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(), script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid], orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence() orientation_confidence: best.get_oconfidence()
} });
base.End();
Module._free(ptr);
res.resolve(result)
} }
}) });
} }

46
src/index.js

@ -1,15 +1,10 @@
const adapter = require('./node/index.js') const adapter = require('./node/index.js')
const circularize = require('./common/circularize.js') const circularize = require('./common/circularize.js')
const TesseractJob = require('./common/job'); const TesseractJob = require('./common/job');
const objectAssign = require('object-assign');
const version = require('../package.json').version; const version = require('../package.json').version;
function create(workerOptions){ function create(workerOptions = {}){
workerOptions = workerOptions || {}; return new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions, {create, version}));
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions))
worker.create = create;
worker.version = version;
return worker;
} }
class TesseractWorker { class TesseractWorker {
@ -17,25 +12,19 @@ class TesseractWorker {
this.worker = null; this.worker = null;
this.workerOptions = workerOptions; this.workerOptions = workerOptions;
this._currentJob = null; this._currentJob = null;
this._queue = [] this._queue = [];
} }
recognize(image, options){ recognize(image, options = {}){
return this._delay(job => { return this._delay(job => {
if(typeof options === 'string'){ options.lang = options.lang || 'eng';
options = { lang: options };
}else{
options = options || {}
options.lang = options.lang || 'eng';
}
job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions }) job._send('recognize', { image, options, workerOptions: this.workerOptions });
}) })
} }
detect(image, options){ detect(image, options = {}){
options = options || {}
return this._delay(job => { return this._delay(job => {
job._send('detect', { image: image, options: options, workerOptions: this.workerOptions }) job._send('detect', { image, options, workerOptions: this.workerOptions });
}) })
} }
@ -49,35 +38,32 @@ class TesseractWorker {
var job = new TesseractJob(this); var job = new TesseractJob(this);
this._queue.push(e => { this._queue.push(e => {
this._queue.shift() this._queue.shift();
this._currentJob = job; this._currentJob = job;
fn(job) fn(job);
}) });
if(!this._currentJob) this._dequeue(); if(!this._currentJob) this._dequeue();
return job return job;
} }
_dequeue(){ _dequeue(){
this._currentJob = null; this._currentJob = null;
if(this._queue.length > 0){ if(this._queue.length){
this._queue[0]() this._queue[0]();
} }
} }
_recv(packet){ _recv(packet){
if(packet.status === 'resolve' && packet.action === 'recognize'){ if(packet.status === 'resolve' && packet.action === 'recognize'){
packet.data = circularize(packet.data); packet.data = circularize(packet.data);
} }
if(this._currentJob.id === packet.jobId){ if(this._currentJob.id === packet.jobId){
this._currentJob._handle(packet) this._currentJob._handle(packet)
}else{ } else {
console.warn('Job ID ' + packet.jobId + ' not known.') console.warn('Job ID ' + packet.jobId + ' not known.')
} }
} }
} }
var DefaultTesseract = create() module.exports = create();
module.exports = DefaultTesseract

72
src/node/index.js

@ -1,56 +1,50 @@
const path = require('path') const fetch = require('isomorphic-fetch'),
const fetch = require('node-fetch') isURL = require('is-url'),
const isURL = require('is-url') fork = require('child_process').fork,
fs = require('fs');
exports.defaultOptions = { exports.defaultOptions = {
workerPath: path.join(__dirname, 'worker.js'), workerPath: require('path').join(__dirname, 'worker.js'),
langPath: 'http://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', langPath: 'http://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
} }
const fork = require('child_process').fork;
const fs = require('fs')
exports.spawnWorker = function spawnWorker(instance, workerOptions){ exports.spawnWorker = function spawnWorker(instance, workerOptions){
var cp = fork(workerOptions.workerPath); var cp = fork(workerOptions.workerPath);
cp.on('message', function(packet){ cp.on('message', packet => {
instance._recv(packet) instance._recv(packet);
}) });
return cp; return cp;
} }
exports.terminateWorker = function(instance){ exports.terminateWorker = function(instance){
instance.worker.kill() instance.worker.kill();
} }
exports.sendPacket = function sendPacket(instance, packet){ exports.sendPacket = function sendPacket(instance, packet){
loadImage(packet.payload.image, function(img){ loadImage(packet.payload.image, img => {
packet.payload.image = img packet.payload.image = img;
instance.worker.send(packet) instance.worker.send(packet);
}) });
} }
function loadImage(image, cb){ function loadImage(image, cb){
if(isURL(image)) { if(isURL(image)) {
fetch(image).then(function (resp) { fetch(image)
return resp.buffer(); .then(resp => resp.buffer())
}).then(function (buffer) { .then(buffer => loadImage(buffer, cb))
return loadImage(buffer, cb); .catch(err => console.error(err));
}).catch(function (err) {
return console.error(err);
});
} }
if(typeof image === 'string'){ if(typeof image === 'string'){
fs.readFile(image, function(err, buffer){ fs.readFile(image, function(err, buffer){
if (err) throw err; if (err) throw err;
loadImage(buffer, cb) loadImage(buffer, cb);
}) });
return return;
}else if(image instanceof Buffer){ } else if (image instanceof Buffer){
var fileType = require('file-type'); var mime = require('file-type')(image).mime
var mime = fileType(image).mime
if(mime === 'image/png'){ if(mime === 'image/png'){
var PNGReader = require('png.js'); var PNGReader = require('png.js');
@ -68,20 +62,18 @@ function loadImage(image, cb){
var offset = 4 * (i + j * image.width), var offset = 4 * (i + j * image.width),
pix = png.getPixel(i, j); pix = png.getPixel(i, j);
image.data[offset] = pix[0] image.data[offset] = pix[0];
image.data[offset + 1] = pix[1] image.data[offset + 1] = pix[1];
image.data[offset + 2] = pix[2] image.data[offset + 2] = pix[2];
image.data[offset + 3] = pix[3]; image.data[offset + 3] = pix[3];
} }
} }
// console.log(image) loadImage(image, cb);
loadImage(image, cb)
}); });
return return;
}else if(mime === 'image/jpeg'){ } else if (mime === 'image/jpeg'){
var jpeg = require('jpeg-js'); loadImage(require('jpeg-js').decode(image), cb);
loadImage(jpeg.decode(image), cb) return;
return
} }
// TODO: support for TIFF, NetPBM, BMP, etc. // TODO: support for TIFF, NetPBM, BMP, etc.
@ -90,8 +82,8 @@ function loadImage(image, cb){
// node uses json.stringify for ipc which means we need to turn // node uses json.stringify for ipc which means we need to turn
// fancy arrays into raw arrays // fancy arrays into raw arrays
if(image && image.data && image.data.length && !Array.isArray(image.data)){ if(image && image.data && image.data.length && !Array.isArray(image.data)){
image.data = Array.from(image.data) image.data = Array.from(image.data);
return loadImage(image, cb) return loadImage(image, cb)
} }
cb(image) cb(image);
} }

17
src/node/lang.js

@ -6,29 +6,30 @@ const http = require("http"),
var langdata = require('../common/langdata.json') var langdata = require('../common/langdata.json')
function getLanguageData(req, res, cb){ function getLanguageData(req, res, cb){
var lang = req.options.lang; var lang = req.options.lang,
var langfile = lang + '.traineddata.gz'; langfile = lang + '.traineddata.gz';
var url = req.workerOptions.langPath + langfile;
fs.readFile(lang + '.traineddata', function (err, data) { fs.readFile(lang + '.traineddata', function (err, data) {
if(!err) return cb(new Uint8Array(data)); if(!err) return cb(new Uint8Array(data));
http.get(url, function(stream){ http.get(req.workerOptions.langPath + langfile, stream => {
var received_bytes = 0; var received_bytes = 0;
stream.on('data', function(chunk) { stream.on('data', function(chunk) {
received_bytes += chunk.length; received_bytes += chunk.length;
res.progress({ res.progress({
status: 'downloading ' + langfile, status: 'downloading ' + langfile,
loaded: received_bytes, loaded: received_bytes,
progress: Math.min(1, received_bytes / langdata[lang]) progress: Math.min(1, received_bytes / langdata[lang])
}); });
}); });
var gunzip = zlib.createGunzip(); var gunzip = zlib.createGunzip();
stream.pipe(gunzip).pipe(fs.createWriteStream(lang + '.traineddata')) stream.pipe(gunzip).pipe(fs.createWriteStream(lang + '.traineddata'))
gunzip.on('end', function(){ getLanguageData(req, stream, cb) }) gunzip.on('end',() => {
}) getLanguageData(req, stream, cb)
});
});
}); });
} }

Loading…
Cancel
Save