Browse Source

Es6 and code cleanup (#41)

* Fixed syntax error

and added process.exit() to the examples

* es6 and code cleanup

Saw arrow notation used sporadically, so figured the node parts are
meant to be es6 compatible anyway, so implemented it where applicable.

Spacing and semicolons

Version 1.0.8

* revert changes to dist

* attempt to use standard spacing

* resolve conflict
pull/73/head
Kiran Kota 8 years ago committed by Guillermo Webster
parent
commit
91f9be7e81
  1. 7
      examples/node/basic.js
  2. 11
      examples/node/detect.js
  3. 124
      src/common/worker.js
  4. 42
      src/index.js
  5. 68
      src/node/index.js
  6. 13
      src/node/lang.js

7
examples/node/basic.js

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
var path = require('path');
var Tesseract = require('../../') // replace this with require('tesseract.js')
var image = path.resolve(__dirname, 'cosmic.png');
// replace this with require('tesseract.js')
var Tesseract = require('../../'),
image = require('path').resolve(__dirname, 'cosmic.png');
Tesseract.recognize(image)
.then(data => {
@ -11,4 +11,5 @@ Tesseract.recognize(image) @@ -11,4 +11,5 @@ Tesseract.recognize(image)
})
.finally(e => {
console.log('finally\n');
process.exit();
});

11
examples/node/detect.js

@ -1,11 +1,12 @@ @@ -1,11 +1,12 @@
var path = require('path');
var Tesseract = require('../../') // replace this with require('tesseract.js')
var image = path.resolve(__dirname, 'cosmic.png');
// replace this with require('tesseract.js')
var Tesseract = require('../../'),
image = require('path').resolve(__dirname, 'cosmic.png');
Tesseract.detect(image)
.progress(function(info){
console.log(info)
console.log(info);
})
.then(function(data){
console.log('done', data)
console.log('done', data);
process.exit();
})

124
src/common/worker.js

@ -1,28 +1,30 @@ @@ -1,28 +1,30 @@
var latestJob;
var Module;
var base;
var adapter = {};
var latestJob,
Module,
base,
adapter = {},
dump = require('./dump.js'),
desaturate = require('./desaturate.js');
function dispatchHandlers(packet, send){
function respond(status, data){
send({
jobId: packet.jobId,
status: status,
status,
action: packet.action,
data: data
})
data
});
}
respond.resolve = respond.bind(this, 'resolve')
respond.reject = respond.bind(this, 'reject')
respond.progress = respond.bind(this, 'progress')
respond.resolve = respond.bind(this, 'resolve');
respond.reject = respond.bind(this, 'reject');
respond.progress = respond.bind(this, 'progress');
latestJob = respond;
try {
if(packet.action === 'recognize'){
handleRecognize(packet.payload, respond)
handleRecognize(packet.payload, respond);
} else if (packet.action === 'detect'){
handleDetect(packet.payload, respond)
handleDetect(packet.payload, respond);
}
} catch (err) {
respond.reject(err)
@ -32,13 +34,13 @@ exports.dispatchHandlers = dispatchHandlers; @@ -32,13 +34,13 @@ exports.dispatchHandlers = dispatchHandlers;
exports.setAdapter = function setAdapter(impl){
adapter = impl;
}
};
function handleInit(req, res){
var MIN_MEMORY = 100663296;
if(['chi_sim', 'chi_tra', 'jpn'].indexOf(req.options.lang) != -1){
if(['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)){
MIN_MEMORY = 167772160;
}
@ -50,23 +52,17 @@ function handleInit(req, res){ @@ -50,23 +52,17 @@ function handleInit(req, res){
Module = Core({
TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent){
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) })
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) });
},
onRuntimeInitialized() {}
})
});
Module.FS_createPath("/", "tessdata", true, true)
base = new Module.TessBaseAPI()
res.progress({ status: 'initializing tesseract', progress: 1 })
Module.FS_createPath("/", "tessdata", true, true);
base = new Module.TessBaseAPI();
res.progress({ status: 'initializing tesseract', progress: 1 });
}
}
var dump = require('./dump.js')
var desaturate = require('./desaturate.js')
function setImage(Module, base, image){
var imgbin = desaturate(image),
width = image.width,
@ -74,51 +70,55 @@ function setImage(Module, base, image){ @@ -74,51 +70,55 @@ function setImage(Module, base, image){
var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL);
base.SetImage(Module.wrapPointer(ptr), width, height, 1, width);
base.SetRectangle(0, 0, width, height)
base.SetRectangle(0, 0, width, height);
return ptr;
}
function loadLanguage(req, res, cb){
var lang = req.options.lang;
var lang = req.options.lang,
langFile = lang + '.traineddata';
if(!Module._loadedLanguages) Module._loadedLanguages = {};
if(lang in Module._loadedLanguages) return cb();
adapter.getLanguageData(req, res, function(data){
res.progress({ status: 'loading ' + lang + '.traineddata', progress: 0 })
Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false);
res.progress({ status: 'loading ' + langFile, progress: 0 });
Module.FS_createDataFile('tessdata', langFile, data, true, false);
Module._loadedLanguages[lang] = true;
res.progress({ status: 'loading ' + lang + '.traineddata', progress: 1 })
cb()
res.progress({ status: 'loading ' + langFile, progress: 1 });
cb();
})
}
function handleRecognize(req, res){
handleInit(req, res)
handleInit(req, res);
loadLanguage(req, res, () => {
var options = req.options;
loadLanguage(req, res, function(){
var lang = req.options.lang;
function progressUpdate(progress){
res.progress({ status: 'initializing api', progress: progress });
}
res.progress({ status: 'initializing api', progress: 0 })
base.Init(null, lang)
res.progress({ status: 'initializing api', progress: 0.3 })
progressUpdate(0);
base.Init(null, req.options.lang);
progressUpdate(.3);
var options = req.options;
for (var option in options) {
if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]);
}
}
res.progress({ status: 'initializing api', progress: 0.6 })
progressUpdate(.6);
var ptr = setImage(Module, base, req.image);
res.progress({ status: 'initializing api', progress: 1 })
progressUpdate(1);
base.Recognize(null)
base.Recognize(null);
var result = dump(Module, base)
var result = dump(Module, base);
base.End();
Module._free(ptr);
@ -129,40 +129,34 @@ function handleRecognize(req, res){ @@ -129,40 +129,34 @@ function handleRecognize(req, res){
function handleDetect(req, res){
handleInit(req, res)
handleInit(req, res);
req.options.lang = 'osd';
loadLanguage(req, res, function(){
loadLanguage(req, res, () => {
base.Init(null, 'osd');
base.SetPageSegMode(Module.PSM_OSD_ONLY);
base.Init(null, 'osd')
base.SetPageSegMode(Module.PSM_OSD_ONLY)
var ptr = setImage(Module, base, req.image),
results = new Module.OSResults();
var ptr = setImage(Module, base, req.image);
var results = new Module.OSResults();
var success = base.DetectOS(results);
if(!success){
if(!base.DetectOS(results)){
base.End();
Module._free(ptr);
res.reject("failed to detect os")
res.reject("Failed to detect OS");
} else {
var charset = results.get_unicharset()
var best = results.get_best_result()
var oid = best.get_orientation_id(),
var best = results.get_best_result(),
oid = best.get_orientation_id(),
sid = best.get_script_id();
var result = {
base.End();
Module._free(ptr);
res.resolve({
tesseract_script_id: sid,
script: charset.get_script_from_script_id(sid),
script: results.get_unicharset().get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
});
}
base.End();
Module._free(ptr);
res.resolve(result)
}
})
});
}

42
src/index.js

@ -1,15 +1,10 @@ @@ -1,15 +1,10 @@
const adapter = require('./node/index.js')
const circularize = require('./common/circularize.js')
const TesseractJob = require('./common/job');
const objectAssign = require('object-assign');
const version = require('../package.json').version;
function create(workerOptions){
workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions))
worker.create = create;
worker.version = version;
return worker;
function create(workerOptions = {}){
return new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions, {create, version}));
}
class TesseractWorker {
@ -17,25 +12,19 @@ class TesseractWorker { @@ -17,25 +12,19 @@ class TesseractWorker {
this.worker = null;
this.workerOptions = workerOptions;
this._currentJob = null;
this._queue = []
this._queue = [];
}
recognize(image, options){
recognize(image, options = {}){
return this._delay(job => {
if(typeof options === 'string'){
options = { lang: options };
}else{
options = options || {}
options.lang = options.lang || 'eng';
}
job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions })
job._send('recognize', { image, options, workerOptions: this.workerOptions });
})
}
detect(image, options){
options = options || {}
detect(image, options = {}){
return this._delay(job => {
job._send('detect', { image: image, options: options, workerOptions: this.workerOptions })
job._send('detect', { image, options, workerOptions: this.workerOptions });
})
}
@ -49,23 +38,22 @@ class TesseractWorker { @@ -49,23 +38,22 @@ class TesseractWorker {
var job = new TesseractJob(this);
this._queue.push(e => {
this._queue.shift()
this._queue.shift();
this._currentJob = job;
fn(job)
})
fn(job);
});
if(!this._currentJob) this._dequeue();
return job
return job;
}
_dequeue(){
this._currentJob = null;
if(this._queue.length > 0){
this._queue[0]()
if(this._queue.length){
this._queue[0]();
}
}
_recv(packet){
if(packet.status === 'resolve' && packet.action === 'recognize'){
packet.data = circularize(packet.data);
}
@ -78,6 +66,4 @@ class TesseractWorker { @@ -78,6 +66,4 @@ class TesseractWorker {
}
}
var DefaultTesseract = create()
module.exports = DefaultTesseract
module.exports = create();

68
src/node/index.js

@ -1,56 +1,50 @@ @@ -1,56 +1,50 @@
const path = require('path')
const fetch = require('node-fetch')
const isURL = require('is-url')
const fetch = require('isomorphic-fetch'),
isURL = require('is-url'),
fork = require('child_process').fork,
fs = require('fs');
exports.defaultOptions = {
workerPath: path.join(__dirname, 'worker.js'),
workerPath: require('path').join(__dirname, 'worker.js'),
langPath: 'http://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
}
const fork = require('child_process').fork;
const fs = require('fs')
exports.spawnWorker = function spawnWorker(instance, workerOptions){
var cp = fork(workerOptions.workerPath);
cp.on('message', function(packet){
instance._recv(packet)
})
cp.on('message', packet => {
instance._recv(packet);
});
return cp;
}
exports.terminateWorker = function(instance){
instance.worker.kill()
instance.worker.kill();
}
exports.sendPacket = function sendPacket(instance, packet){
loadImage(packet.payload.image, function(img){
packet.payload.image = img
instance.worker.send(packet)
})
loadImage(packet.payload.image, img => {
packet.payload.image = img;
instance.worker.send(packet);
});
}
function loadImage(image, cb){
if(isURL(image)) {
fetch(image).then(function (resp) {
return resp.buffer();
}).then(function (buffer) {
return loadImage(buffer, cb);
}).catch(function (err) {
return console.error(err);
});
fetch(image)
.then(resp => resp.buffer())
.then(buffer => loadImage(buffer, cb))
.catch(err => console.error(err));
}
if(typeof image === 'string'){
fs.readFile(image, function(err, buffer){
if (err) throw err;
loadImage(buffer, cb)
})
return
loadImage(buffer, cb);
});
return;
} else if (image instanceof Buffer){
var fileType = require('file-type');
var mime = fileType(image).mime
var mime = require('file-type')(image).mime
if(mime === 'image/png'){
var PNGReader = require('png.js');
@ -68,20 +62,18 @@ function loadImage(image, cb){ @@ -68,20 +62,18 @@ function loadImage(image, cb){
var offset = 4 * (i + j * image.width),
pix = png.getPixel(i, j);
image.data[offset] = pix[0]
image.data[offset + 1] = pix[1]
image.data[offset + 2] = pix[2]
image.data[offset] = pix[0];
image.data[offset + 1] = pix[1];
image.data[offset + 2] = pix[2];
image.data[offset + 3] = pix[3];
}
}
// console.log(image)
loadImage(image, cb)
loadImage(image, cb);
});
return
return;
} else if (mime === 'image/jpeg'){
var jpeg = require('jpeg-js');
loadImage(jpeg.decode(image), cb)
return
loadImage(require('jpeg-js').decode(image), cb);
return;
}
// TODO: support for TIFF, NetPBM, BMP, etc.
@ -90,8 +82,8 @@ function loadImage(image, cb){ @@ -90,8 +82,8 @@ function loadImage(image, cb){
// node uses json.stringify for ipc which means we need to turn
// fancy arrays into raw arrays
if(image && image.data && image.data.length && !Array.isArray(image.data)){
image.data = Array.from(image.data)
image.data = Array.from(image.data);
return loadImage(image, cb)
}
cb(image)
cb(image);
}

13
src/node/lang.js

@ -6,14 +6,13 @@ const http = require("http"), @@ -6,14 +6,13 @@ const http = require("http"),
var langdata = require('../common/langdata.json')
function getLanguageData(req, res, cb){
var lang = req.options.lang;
var langfile = lang + '.traineddata.gz';
var url = req.workerOptions.langPath + langfile;
var lang = req.options.lang,
langfile = lang + '.traineddata.gz';
fs.readFile(lang + '.traineddata', function (err, data) {
if(!err) return cb(new Uint8Array(data));
http.get(url, function(stream){
http.get(req.workerOptions.langPath + langfile, stream => {
var received_bytes = 0;
stream.on('data', function(chunk) {
received_bytes += chunk.length;
@ -27,8 +26,10 @@ function getLanguageData(req, res, cb){ @@ -27,8 +26,10 @@ function getLanguageData(req, res, cb){
var gunzip = zlib.createGunzip();
stream.pipe(gunzip).pipe(fs.createWriteStream(lang + '.traineddata'))
gunzip.on('end', function(){ getLanguageData(req, stream, cb) })
})
gunzip.on('end',() => {
getLanguageData(req, stream, cb)
});
});
});
}

Loading…
Cancel
Save