diff --git a/examples/node/basic.js b/examples/node/basic.js index 9bba8b0..f3deeb4 100644 --- a/examples/node/basic.js +++ b/examples/node/basic.js @@ -1,14 +1,15 @@ -var path = require('path'); -var Tesseract = require('../../') // replace this with require('tesseract.js') -var image = path.resolve(__dirname, 'cosmic.png'); +// replace this with require('tesseract.js') +var Tesseract = require('../../'), + image = require('path').resolve(__dirname, 'cosmic.png'); Tesseract.recognize(image) -.then(data => { - console.log('then\n', data.text) -}) -.catch(err => { - console.log('catch\n', err); -}) -.finally(e => { - console.log('finally\n'); -}); \ No newline at end of file + .then(data => { + console.log('then\n', data.text) + }) + .catch(err => { + console.log('catch\n', err); + }) + .finally(e => { + console.log('finally\n'); + process.exit(); + }); \ No newline at end of file diff --git a/examples/node/detect.js b/examples/node/detect.js index 760b275..27def3a 100644 --- a/examples/node/detect.js +++ b/examples/node/detect.js @@ -1,11 +1,12 @@ -var path = require('path'); -var Tesseract = require('../../') // replace this with require('tesseract.js') -var image = path.resolve(__dirname, 'cosmic.png'); +// replace this with require('tesseract.js') +var Tesseract = require('../../'), + image = require('path').resolve(__dirname, 'cosmic.png'); Tesseract.detect(image) -.progress(function(info){ - console.log(info) -}) -.then(function(data){ - console.log('done', data) -}) \ No newline at end of file + .progress(function(info){ + console.log(info); + }) + .then(function(data){ + console.log('done', data); + process.exit(); + }) \ No newline at end of file diff --git a/package.json b/package.json index 9ea3c08..1251d7b 100644 --- a/package.json +++ b/package.json @@ -41,4 +41,4 @@ "url": "https://github.com/naptha/tesseract.js/issues" }, "homepage": "https://github.com/naptha/tesseract.js" -} +} \ No newline at end of file diff --git a/src/common/worker.js b/src/common/worker.js index 1ff024a..5273cef 100644 --- a/src/common/worker.js +++ b/src/common/worker.js @@ -1,28 +1,30 @@ -var latestJob; -var Module; -var base; -var adapter = {}; +var latestJob, + Module, + base, + adapter = {}, + dump = require('./dump.js'), + desaturate = require('./desaturate.js'); function dispatchHandlers(packet, send){ function respond(status, data){ send({ jobId: packet.jobId, - status: status, + status, action: packet.action, - data: data - }) + data + }); } - respond.resolve = respond.bind(this, 'resolve') - respond.reject = respond.bind(this, 'reject') - respond.progress = respond.bind(this, 'progress') + respond.resolve = respond.bind(this, 'resolve'); + respond.reject = respond.bind(this, 'reject'); + respond.progress = respond.bind(this, 'progress'); latestJob = respond; try { if(packet.action === 'recognize'){ - handleRecognize(packet.payload, respond) - }else if(packet.action === 'detect'){ - handleDetect(packet.payload, respond) + handleRecognize(packet.payload, respond); + } else if (packet.action === 'detect'){ + handleDetect(packet.payload, respond); } } catch (err) { respond.reject(err) @@ -32,13 +34,13 @@ exports.dispatchHandlers = dispatchHandlers; exports.setAdapter = function setAdapter(impl){ adapter = impl; -} +}; function handleInit(req, res){ var MIN_MEMORY = 100663296; - if(['chi_sim', 'chi_tra', 'jpn'].indexOf(req.options.lang) != -1){ + if(['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)){ MIN_MEMORY = 167772160; } @@ -50,23 +52,17 @@ function handleInit(req, res){ Module = Core({ TOTAL_MEMORY: MIN_MEMORY, TesseractProgress(percent){ - latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }) + latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }); }, onRuntimeInitialized() {} - }) + }); - Module.FS_createPath("/", "tessdata", true, true) - base = new Module.TessBaseAPI() - res.progress({ status: 'initializing tesseract', progress: 1 }) + Module.FS_createPath("/", "tessdata", true, true); + base = new Module.TessBaseAPI(); + res.progress({ status: 'initializing tesseract', progress: 1 }); } } - - -var dump = require('./dump.js') -var desaturate = require('./desaturate.js') - - function setImage(Module, base, image){ var imgbin = desaturate(image), width = image.width, @@ -74,51 +70,55 @@ function setImage(Module, base, image){ var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL); base.SetImage(Module.wrapPointer(ptr), width, height, 1, width); - base.SetRectangle(0, 0, width, height) + base.SetRectangle(0, 0, width, height); return ptr; } function loadLanguage(req, res, cb){ - var lang = req.options.lang; + var lang = req.options.lang, + langFile = lang + '.traineddata'; if(!Module._loadedLanguages) Module._loadedLanguages = {}; if(lang in Module._loadedLanguages) return cb(); adapter.getLanguageData(req, res, function(data){ - res.progress({ status: 'loading ' + lang + '.traineddata', progress: 0 }) - Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false); + res.progress({ status: 'loading ' + langFile, progress: 0 }); + Module.FS_createDataFile('tessdata', langFile, data, true, false); Module._loadedLanguages[lang] = true; - res.progress({ status: 'loading ' + lang + '.traineddata', progress: 1 }) - cb() + res.progress({ status: 'loading ' + langFile, progress: 1 }); + cb(); }) } function handleRecognize(req, res){ - handleInit(req, res) + handleInit(req, res); - loadLanguage(req, res, function(){ - var lang = req.options.lang; + loadLanguage(req, res, () => { + var options = req.options; - res.progress({ status: 'initializing api', progress: 0 }) - base.Init(null, lang) - res.progress({ status: 'initializing api', progress: 0.3 }) + function progressUpdate(progress){ + res.progress({ status: 'initializing api', progress: progress }); + } + + progressUpdate(0); + base.Init(null, req.options.lang); + progressUpdate(.3); - var options = req.options; for (var option in options) { if (options.hasOwnProperty(option)) { base.SetVariable(option, options[option]); } } - res.progress({ status: 'initializing api', progress: 0.6 }) + progressUpdate(.6); var ptr = setImage(Module, base, req.image); - res.progress({ status: 'initializing api', progress: 1 }) + progressUpdate(1); - base.Recognize(null) + base.Recognize(null); - var result = dump(Module, base) + var result = dump(Module, base); base.End(); Module._free(ptr); @@ -129,40 +129,34 @@ function handleRecognize(req, res){ function handleDetect(req, res){ - handleInit(req, res) + handleInit(req, res); req.options.lang = 'osd'; - loadLanguage(req, res, function(){ + loadLanguage(req, res, () => { + base.Init(null, 'osd'); + base.SetPageSegMode(Module.PSM_OSD_ONLY); - base.Init(null, 'osd') - base.SetPageSegMode(Module.PSM_OSD_ONLY) - - var ptr = setImage(Module, base, req.image); + var ptr = setImage(Module, base, req.image), + results = new Module.OSResults(); - var results = new Module.OSResults(); - var success = base.DetectOS(results); - if(!success){ + if(!base.DetectOS(results)){ base.End(); Module._free(ptr); - res.reject("failed to detect os") + res.reject("Failed to detect OS"); } else { - var charset = results.get_unicharset() - - var best = results.get_best_result() - var oid = best.get_orientation_id(), + var best = results.get_best_result(), + oid = best.get_orientation_id(), sid = best.get_script_id(); - var result = { + base.End(); + Module._free(ptr); + + res.resolve({ tesseract_script_id: sid, - script: charset.get_script_from_script_id(sid), + script: results.get_unicharset().get_script_from_script_id(sid), script_confidence: best.get_sconfidence(), orientation_degrees: [0, 270, 180, 90][oid], orientation_confidence: best.get_oconfidence() - } - - base.End(); - Module._free(ptr); - - res.resolve(result) + }); } - }) + }); } diff --git a/src/index.js b/src/index.js index 6d479d1..86da1b1 100644 --- a/src/index.js +++ b/src/index.js @@ -1,15 +1,10 @@ const adapter = require('./node/index.js') const circularize = require('./common/circularize.js') const TesseractJob = require('./common/job'); -const objectAssign = require('object-assign'); const version = require('../package.json').version; -function create(workerOptions){ - workerOptions = workerOptions || {}; - var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions)) - worker.create = create; - worker.version = version; - return worker; +function create(workerOptions = {}){ + return new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions, {create, version})); } class TesseractWorker { @@ -17,25 +12,19 @@ class TesseractWorker { this.worker = null; this.workerOptions = workerOptions; this._currentJob = null; - this._queue = [] + this._queue = []; } - recognize(image, options){ + recognize(image, options = {}){ return this._delay(job => { - if(typeof options === 'string'){ - options = { lang: options }; - }else{ - options = options || {} - options.lang = options.lang || 'eng'; - } + options.lang = options.lang || 'eng'; - job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions }) + job._send('recognize', { image, options, workerOptions: this.workerOptions }); }) } - detect(image, options){ - options = options || {} + detect(image, options = {}){ return this._delay(job => { - job._send('detect', { image: image, options: options, workerOptions: this.workerOptions }) + job._send('detect', { image, options, workerOptions: this.workerOptions }); }) } @@ -49,35 +38,32 @@ class TesseractWorker { var job = new TesseractJob(this); this._queue.push(e => { - this._queue.shift() + this._queue.shift(); this._currentJob = job; - fn(job) - }) + fn(job); + }); if(!this._currentJob) this._dequeue(); - return job + return job; } _dequeue(){ this._currentJob = null; - if(this._queue.length > 0){ - this._queue[0]() + if(this._queue.length){ + this._queue[0](); } } _recv(packet){ - if(packet.status === 'resolve' && packet.action === 'recognize'){ packet.data = circularize(packet.data); } if(this._currentJob.id === packet.jobId){ this._currentJob._handle(packet) - }else{ + } else { console.warn('Job ID ' + packet.jobId + ' not known.') } } } -var DefaultTesseract = create() - -module.exports = DefaultTesseract \ No newline at end of file +module.exports = create(); \ No newline at end of file diff --git a/src/node/index.js b/src/node/index.js index 54c9d74..9755e9a 100644 --- a/src/node/index.js +++ b/src/node/index.js @@ -1,56 +1,50 @@ -const path = require('path') -const fetch = require('node-fetch') -const isURL = require('is-url') +const fetch = require('isomorphic-fetch'), + isURL = require('is-url'), + fork = require('child_process').fork, + fs = require('fs'); exports.defaultOptions = { - workerPath: path.join(__dirname, 'worker.js'), + workerPath: require('path').join(__dirname, 'worker.js'), langPath: 'http://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', } -const fork = require('child_process').fork; -const fs = require('fs') - exports.spawnWorker = function spawnWorker(instance, workerOptions){ var cp = fork(workerOptions.workerPath); - cp.on('message', function(packet){ - instance._recv(packet) - }) + cp.on('message', packet => { + instance._recv(packet); + }); return cp; } exports.terminateWorker = function(instance){ - instance.worker.kill() + instance.worker.kill(); } exports.sendPacket = function sendPacket(instance, packet){ - loadImage(packet.payload.image, function(img){ - packet.payload.image = img - instance.worker.send(packet) - }) + loadImage(packet.payload.image, img => { + packet.payload.image = img; + instance.worker.send(packet); + }); } function loadImage(image, cb){ if(isURL(image)) { - fetch(image).then(function (resp) { - return resp.buffer(); - }).then(function (buffer) { - return loadImage(buffer, cb); - }).catch(function (err) { - return console.error(err); - }); + fetch(image) + .then(resp => resp.buffer()) + .then(buffer => loadImage(buffer, cb)) + .catch(err => console.error(err)); } if(typeof image === 'string'){ fs.readFile(image, function(err, buffer){ if (err) throw err; - loadImage(buffer, cb) - }) - return - }else if(image instanceof Buffer){ - var fileType = require('file-type'); - var mime = fileType(image).mime + loadImage(buffer, cb); + }); + return; + } else if (image instanceof Buffer){ + var mime = require('file-type')(image).mime if(mime === 'image/png'){ var PNGReader = require('png.js'); @@ -68,20 +62,18 @@ function loadImage(image, cb){ var offset = 4 * (i + j * image.width), pix = png.getPixel(i, j); - image.data[offset] = pix[0] - image.data[offset + 1] = pix[1] - image.data[offset + 2] = pix[2] + image.data[offset] = pix[0]; + image.data[offset + 1] = pix[1]; + image.data[offset + 2] = pix[2]; image.data[offset + 3] = pix[3]; } } - // console.log(image) - loadImage(image, cb) + loadImage(image, cb); }); - return - }else if(mime === 'image/jpeg'){ - var jpeg = require('jpeg-js'); - loadImage(jpeg.decode(image), cb) - return + return; + } else if (mime === 'image/jpeg'){ + loadImage(require('jpeg-js').decode(image), cb); + return; } // TODO: support for TIFF, NetPBM, BMP, etc. @@ -90,8 +82,8 @@ function loadImage(image, cb){ // node uses json.stringify for ipc which means we need to turn // fancy arrays into raw arrays if(image && image.data && image.data.length && !Array.isArray(image.data)){ - image.data = Array.from(image.data) + image.data = Array.from(image.data); return loadImage(image, cb) } - cb(image) + cb(image); } \ No newline at end of file diff --git a/src/node/lang.js b/src/node/lang.js index 2b9eb96..3e3d858 100644 --- a/src/node/lang.js +++ b/src/node/lang.js @@ -6,29 +6,30 @@ const http = require("http"), var langdata = require('../common/langdata.json') function getLanguageData(req, res, cb){ - var lang = req.options.lang; - var langfile = lang + '.traineddata.gz'; - var url = req.workerOptions.langPath + langfile; - + var lang = req.options.lang, + langfile = lang + '.traineddata.gz'; + fs.readFile(lang + '.traineddata', function (err, data) { if(!err) return cb(new Uint8Array(data)); - http.get(url, function(stream){ + http.get(req.workerOptions.langPath + langfile, stream => { var received_bytes = 0; stream.on('data', function(chunk) { received_bytes += chunk.length; res.progress({ status: 'downloading ' + langfile, loaded: received_bytes, - progress: Math.min(1, received_bytes / langdata[lang]) + progress: Math.min(1, received_bytes / langdata[lang]) }); }); var gunzip = zlib.createGunzip(); stream.pipe(gunzip).pipe(fs.createWriteStream(lang + '.traineddata')) - gunzip.on('end', function(){ getLanguageData(req, stream, cb) }) - }) + gunzip.on('end',() => { + getLanguageData(req, stream, cb) + }); + }); }); }