From 91f9be7e81092282d4023639895a2ffb8fa6f620 Mon Sep 17 00:00:00 2001 From: Kiran Kota Date: Sat, 22 Oct 2016 11:42:09 -0400 Subject: [PATCH] Es6 and code cleanup (#41) * Fixed syntax error and added process.exit() to the examples * es6 and code cleanup Saw arrow notation used sporadically, so figured the node parts are meant to be es6 compatible anyway, so implemented it where applicable. Spacing and semicolons Version 1.0.8 * revert changes to dist * attempt to use standard spacing * resolve conflict --- examples/node/basic.js | 25 ++++---- examples/node/detect.js | 19 +++--- package.json | 2 +- src/common/worker.js | 126 +++++++++++++++++++--------------------- src/index.js | 46 +++++---------- src/node/index.js | 72 ++++++++++------------- src/node/lang.js | 17 +++--- 7 files changed, 141 insertions(+), 166 deletions(-) diff --git a/examples/node/basic.js b/examples/node/basic.js index 9bba8b0..f3deeb4 100644 --- a/examples/node/basic.js +++ b/examples/node/basic.js @@ -1,14 +1,15 @@ -var path = require('path'); -var Tesseract = require('../../') // replace this with require('tesseract.js') -var image = path.resolve(__dirname, 'cosmic.png'); +// replace this with require('tesseract.js') +var Tesseract = require('../../'), + image = require('path').resolve(__dirname, 'cosmic.png'); Tesseract.recognize(image) -.then(data => { - console.log('then\n', data.text) -}) -.catch(err => { - console.log('catch\n', err); -}) -.finally(e => { - console.log('finally\n'); -}); \ No newline at end of file + .then(data => { + console.log('then\n', data.text) + }) + .catch(err => { + console.log('catch\n', err); + }) + .finally(e => { + console.log('finally\n'); + process.exit(); + }); \ No newline at end of file diff --git a/examples/node/detect.js b/examples/node/detect.js index 760b275..27def3a 100644 --- a/examples/node/detect.js +++ b/examples/node/detect.js @@ -1,11 +1,12 @@ -var path = require('path'); -var Tesseract = require('../../') // replace this with require('tesseract.js') -var image = path.resolve(__dirname, 'cosmic.png'); +// replace this with require('tesseract.js') +var Tesseract = require('../../'), + image = require('path').resolve(__dirname, 'cosmic.png'); Tesseract.detect(image) -.progress(function(info){ - console.log(info) -}) -.then(function(data){ - console.log('done', data) -}) \ No newline at end of file + .progress(function(info){ + console.log(info); + }) + .then(function(data){ + console.log('done', data); + process.exit(); + }) \ No newline at end of file diff --git a/package.json b/package.json index 9ea3c08..1251d7b 100644 --- a/package.json +++ b/package.json @@ -41,4 +41,4 @@ "url": "https://github.com/naptha/tesseract.js/issues" }, "homepage": "https://github.com/naptha/tesseract.js" -} +} \ No newline at end of file diff --git a/src/common/worker.js b/src/common/worker.js index 1ff024a..5273cef 100644 --- a/src/common/worker.js +++ b/src/common/worker.js @@ -1,28 +1,30 @@ -var latestJob; -var Module; -var base; -var adapter = {}; +var latestJob, + Module, + base, + adapter = {}, + dump = require('./dump.js'), + desaturate = require('./desaturate.js'); function dispatchHandlers(packet, send){ function respond(status, data){ send({ jobId: packet.jobId, - status: status, + status, action: packet.action, - data: data - }) + data + }); } - respond.resolve = respond.bind(this, 'resolve') - respond.reject = respond.bind(this, 'reject') - respond.progress = respond.bind(this, 'progress') + respond.resolve = respond.bind(this, 'resolve'); + respond.reject = respond.bind(this, 'reject'); + respond.progress = respond.bind(this, 'progress'); latestJob = respond; try { if(packet.action === 'recognize'){ - handleRecognize(packet.payload, respond) - }else if(packet.action === 'detect'){ - handleDetect(packet.payload, respond) + handleRecognize(packet.payload, respond); + } else if (packet.action === 'detect'){ + handleDetect(packet.payload, respond); } } catch (err) { respond.reject(err) @@ -32,13 +34,13 @@ exports.dispatchHandlers = dispatchHandlers; exports.setAdapter = function setAdapter(impl){ adapter = impl; -} +}; function handleInit(req, res){ var MIN_MEMORY = 100663296; - if(['chi_sim', 'chi_tra', 'jpn'].indexOf(req.options.lang) != -1){ + if(['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)){ MIN_MEMORY = 167772160; } @@ -50,23 +52,17 @@ function handleInit(req, res){ Module = Core({ TOTAL_MEMORY: MIN_MEMORY, TesseractProgress(percent){ - latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }) + latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }); }, onRuntimeInitialized() {} - }) + }); - Module.FS_createPath("/", "tessdata", true, true) - base = new Module.TessBaseAPI() - res.progress({ status: 'initializing tesseract', progress: 1 }) + Module.FS_createPath("/", "tessdata", true, true); + base = new Module.TessBaseAPI(); + res.progress({ status: 'initializing tesseract', progress: 1 }); } } - - -var dump = require('./dump.js') -var desaturate = require('./desaturate.js') - - function setImage(Module, base, image){ var imgbin = desaturate(image), width = image.width, @@ -74,51 +70,55 @@ function setImage(Module, base, image){ var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL); base.SetImage(Module.wrapPointer(ptr), width, height, 1, width); - base.SetRectangle(0, 0, width, height) + base.SetRectangle(0, 0, width, height); return ptr; } function loadLanguage(req, res, cb){ - var lang = req.options.lang; + var lang = req.options.lang, + langFile = lang + '.traineddata'; if(!Module._loadedLanguages) Module._loadedLanguages = {}; if(lang in Module._loadedLanguages) return cb(); adapter.getLanguageData(req, res, function(data){ - res.progress({ status: 'loading ' + lang + '.traineddata', progress: 0 }) - Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false); + res.progress({ status: 'loading ' + langFile, progress: 0 }); + Module.FS_createDataFile('tessdata', langFile, data, true, false); Module._loadedLanguages[lang] = true; - res.progress({ status: 'loading ' + lang + '.traineddata', progress: 1 }) - cb() + res.progress({ status: 'loading ' + langFile, progress: 1 }); + cb(); }) } function handleRecognize(req, res){ - handleInit(req, res) + handleInit(req, res); - loadLanguage(req, res, function(){ - var lang = req.options.lang; + loadLanguage(req, res, () => { + var options = req.options; - res.progress({ status: 'initializing api', progress: 0 }) - base.Init(null, lang) - res.progress({ status: 'initializing api', progress: 0.3 }) + function progressUpdate(progress){ + res.progress({ status: 'initializing api', progress: progress }); + } + + progressUpdate(0); + base.Init(null, req.options.lang); + progressUpdate(.3); - var options = req.options; for (var option in options) { if (options.hasOwnProperty(option)) { base.SetVariable(option, options[option]); } } - res.progress({ status: 'initializing api', progress: 0.6 }) + progressUpdate(.6); var ptr = setImage(Module, base, req.image); - res.progress({ status: 'initializing api', progress: 1 }) + progressUpdate(1); - base.Recognize(null) + base.Recognize(null); - var result = dump(Module, base) + var result = dump(Module, base); base.End(); Module._free(ptr); @@ -129,40 +129,34 @@ function handleRecognize(req, res){ function handleDetect(req, res){ - handleInit(req, res) + handleInit(req, res); req.options.lang = 'osd'; - loadLanguage(req, res, function(){ + loadLanguage(req, res, () => { + base.Init(null, 'osd'); + base.SetPageSegMode(Module.PSM_OSD_ONLY); - base.Init(null, 'osd') - base.SetPageSegMode(Module.PSM_OSD_ONLY) - - var ptr = setImage(Module, base, req.image); + var ptr = setImage(Module, base, req.image), + results = new Module.OSResults(); - var results = new Module.OSResults(); - var success = base.DetectOS(results); - if(!success){ + if(!base.DetectOS(results)){ base.End(); Module._free(ptr); - res.reject("failed to detect os") + res.reject("Failed to detect OS"); } else { - var charset = results.get_unicharset() - - var best = results.get_best_result() - var oid = best.get_orientation_id(), + var best = results.get_best_result(), + oid = best.get_orientation_id(), sid = best.get_script_id(); - var result = { + base.End(); + Module._free(ptr); + + res.resolve({ tesseract_script_id: sid, - script: charset.get_script_from_script_id(sid), + script: results.get_unicharset().get_script_from_script_id(sid), script_confidence: best.get_sconfidence(), orientation_degrees: [0, 270, 180, 90][oid], orientation_confidence: best.get_oconfidence() - } - - base.End(); - Module._free(ptr); - - res.resolve(result) + }); } - }) + }); } diff --git a/src/index.js b/src/index.js index 6d479d1..86da1b1 100644 --- a/src/index.js +++ b/src/index.js @@ -1,15 +1,10 @@ const adapter = require('./node/index.js') const circularize = require('./common/circularize.js') const TesseractJob = require('./common/job'); -const objectAssign = require('object-assign'); const version = require('../package.json').version; -function create(workerOptions){ - workerOptions = workerOptions || {}; - var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions)) - worker.create = create; - worker.version = version; - return worker; +function create(workerOptions = {}){ + return new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions, {create, version})); } class TesseractWorker { @@ -17,25 +12,19 @@ class TesseractWorker { this.worker = null; this.workerOptions = workerOptions; this._currentJob = null; - this._queue = [] + this._queue = []; } - recognize(image, options){ + recognize(image, options = {}){ return this._delay(job => { - if(typeof options === 'string'){ - options = { lang: options }; - }else{ - options = options || {} - options.lang = options.lang || 'eng'; - } + options.lang = options.lang || 'eng'; - job._send('recognize', { image: image, options: options, workerOptions: this.workerOptions }) + job._send('recognize', { image, options, workerOptions: this.workerOptions }); }) } - detect(image, options){ - options = options || {} + detect(image, options = {}){ return this._delay(job => { - job._send('detect', { image: image, options: options, workerOptions: this.workerOptions }) + job._send('detect', { image, options, workerOptions: this.workerOptions }); }) } @@ -49,35 +38,32 @@ class TesseractWorker { var job = new TesseractJob(this); this._queue.push(e => { - this._queue.shift() + this._queue.shift(); this._currentJob = job; - fn(job) - }) + fn(job); + }); if(!this._currentJob) this._dequeue(); - return job + return job; } _dequeue(){ this._currentJob = null; - if(this._queue.length > 0){ - this._queue[0]() + if(this._queue.length){ + this._queue[0](); } } _recv(packet){ - if(packet.status === 'resolve' && packet.action === 'recognize'){ packet.data = circularize(packet.data); } if(this._currentJob.id === packet.jobId){ this._currentJob._handle(packet) - }else{ + } else { console.warn('Job ID ' + packet.jobId + ' not known.') } } } -var DefaultTesseract = create() - -module.exports = DefaultTesseract \ No newline at end of file +module.exports = create(); \ No newline at end of file diff --git a/src/node/index.js b/src/node/index.js index 54c9d74..9755e9a 100644 --- a/src/node/index.js +++ b/src/node/index.js @@ -1,56 +1,50 @@ -const path = require('path') -const fetch = require('node-fetch') -const isURL = require('is-url') +const fetch = require('isomorphic-fetch'), + isURL = require('is-url'), + fork = require('child_process').fork, + fs = require('fs'); exports.defaultOptions = { - workerPath: path.join(__dirname, 'worker.js'), + workerPath: require('path').join(__dirname, 'worker.js'), langPath: 'http://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', } -const fork = require('child_process').fork; -const fs = require('fs') - exports.spawnWorker = function spawnWorker(instance, workerOptions){ var cp = fork(workerOptions.workerPath); - cp.on('message', function(packet){ - instance._recv(packet) - }) + cp.on('message', packet => { + instance._recv(packet); + }); return cp; } exports.terminateWorker = function(instance){ - instance.worker.kill() + instance.worker.kill(); } exports.sendPacket = function sendPacket(instance, packet){ - loadImage(packet.payload.image, function(img){ - packet.payload.image = img - instance.worker.send(packet) - }) + loadImage(packet.payload.image, img => { + packet.payload.image = img; + instance.worker.send(packet); + }); } function loadImage(image, cb){ if(isURL(image)) { - fetch(image).then(function (resp) { - return resp.buffer(); - }).then(function (buffer) { - return loadImage(buffer, cb); - }).catch(function (err) { - return console.error(err); - }); + fetch(image) + .then(resp => resp.buffer()) + .then(buffer => loadImage(buffer, cb)) + .catch(err => console.error(err)); } if(typeof image === 'string'){ fs.readFile(image, function(err, buffer){ if (err) throw err; - loadImage(buffer, cb) - }) - return - }else if(image instanceof Buffer){ - var fileType = require('file-type'); - var mime = fileType(image).mime + loadImage(buffer, cb); + }); + return; + } else if (image instanceof Buffer){ + var mime = require('file-type')(image).mime if(mime === 'image/png'){ var PNGReader = require('png.js'); @@ -68,20 +62,18 @@ function loadImage(image, cb){ var offset = 4 * (i + j * image.width), pix = png.getPixel(i, j); - image.data[offset] = pix[0] - image.data[offset + 1] = pix[1] - image.data[offset + 2] = pix[2] + image.data[offset] = pix[0]; + image.data[offset + 1] = pix[1]; + image.data[offset + 2] = pix[2]; image.data[offset + 3] = pix[3]; } } - // console.log(image) - loadImage(image, cb) + loadImage(image, cb); }); - return - }else if(mime === 'image/jpeg'){ - var jpeg = require('jpeg-js'); - loadImage(jpeg.decode(image), cb) - return + return; + } else if (mime === 'image/jpeg'){ + loadImage(require('jpeg-js').decode(image), cb); + return; } // TODO: support for TIFF, NetPBM, BMP, etc. @@ -90,8 +82,8 @@ function loadImage(image, cb){ // node uses json.stringify for ipc which means we need to turn // fancy arrays into raw arrays if(image && image.data && image.data.length && !Array.isArray(image.data)){ - image.data = Array.from(image.data) + image.data = Array.from(image.data); return loadImage(image, cb) } - cb(image) + cb(image); } \ No newline at end of file diff --git a/src/node/lang.js b/src/node/lang.js index 2b9eb96..3e3d858 100644 --- a/src/node/lang.js +++ b/src/node/lang.js @@ -6,29 +6,30 @@ const http = require("http"), var langdata = require('../common/langdata.json') function getLanguageData(req, res, cb){ - var lang = req.options.lang; - var langfile = lang + '.traineddata.gz'; - var url = req.workerOptions.langPath + langfile; - + var lang = req.options.lang, + langfile = lang + '.traineddata.gz'; + fs.readFile(lang + '.traineddata', function (err, data) { if(!err) return cb(new Uint8Array(data)); - http.get(url, function(stream){ + http.get(req.workerOptions.langPath + langfile, stream => { var received_bytes = 0; stream.on('data', function(chunk) { received_bytes += chunk.length; res.progress({ status: 'downloading ' + langfile, loaded: received_bytes, - progress: Math.min(1, received_bytes / langdata[lang]) + progress: Math.min(1, received_bytes / langdata[lang]) }); }); var gunzip = zlib.createGunzip(); stream.pipe(gunzip).pipe(fs.createWriteStream(lang + '.traineddata')) - gunzip.on('end', function(){ getLanguageData(req, stream, cb) }) - }) + gunzip.on('end',() => { + getLanguageData(req, stream, cb) + }); + }); }); }