diff --git a/src/common/worker.js b/src/common/worker.js index d97d21c..1c021e4 100644 --- a/src/common/worker.js +++ b/src/common/worker.js @@ -48,19 +48,24 @@ function handleInit(req, res){ } if(!Module || Module.TOTAL_MEMORY < MIN_MEMORY){ - var Core = adapter.getCore(req, res); + var Core = adapter.getCore(req, res); res.progress({ status: 'initializing tesseract', progress: 0 }) - Module = Core({ - TOTAL_MEMORY: MIN_MEMORY, - TesseractProgress(percent){ - latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }); - }, + return Core({ + // TOTAL_MEMORY: MIN_MEMORY, + TesseractProgress(percent){ + latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }); + }, + }) + .then((TessModule) => { + Module = TessModule; + base = new Module.TessBaseAPI(); + res.progress({ status: 'initializing tesseract', progress: 1 }); }); - base = new Module.TessBaseAPI(); - res.progress({ status: 'initializing tesseract', progress: 1 }); } + + return new Promise(); } function setImage(Module, base, image) { @@ -93,13 +98,13 @@ function loadLanguage(req, res, cb){ function handleRecognize(req, res){ - handleInit(req, res); - - loadLanguage(req, res, () => { + handleInit(req, res) + .then(() => { + loadLanguage(req, res, () => { var options = req.options; function progressUpdate(progress){ - res.progress({ status: 'initializing api', progress: progress }); + res.progress({ status: 'initializing api', progress: progress }); } progressUpdate(0); @@ -107,9 +112,9 @@ function handleRecognize(req, res){ progressUpdate(.3); for (var option in options) { - if (options.hasOwnProperty(option)) { - base.SetVariable(option, options[option]); - } + if (options.hasOwnProperty(option)) { + base.SetVariable(option, options[option]); + } } progressUpdate(.6); @@ -124,39 +129,42 @@ function handleRecognize(req, res){ Module._free(ptr); res.resolve(result); - }) + }) + }); } function handleDetect(req, res){ - handleInit(req, res); - req.options.lang = 'osd'; - loadLanguage(req, res, () => { + handleInit(req, res) + .then(() => { + req.options.lang = 'osd'; + loadLanguage(req, res, () => { base.Init(null, 'osd'); base.SetPageSegMode(Module.PSM_OSD_ONLY); var ptr = setImage(Module, base, req.image), - results = new Module.OSResults(); + results = new Module.OSResults(); if(!base.DetectOS(results)){ - base.End(); - Module._free(ptr); - res.reject("Failed to detect OS"); + base.End(); + Module._free(ptr); + res.reject("Failed to detect OS"); } else { - var best = results.get_best_result(), - oid = best.get_orientation_id(), - sid = best.get_script_id(); - - base.End(); - Module._free(ptr); - - res.resolve({ - tesseract_script_id: sid, - script: results.get_unicharset().get_script_from_script_id(sid), - script_confidence: best.get_sconfidence(), - orientation_degrees: [0, 270, 180, 90][oid], - orientation_confidence: best.get_oconfidence() - }); + var best = results.get_best_result(), + oid = best.get_orientation_id(), + sid = best.get_script_id(); + + base.End(); + Module._free(ptr); + + res.resolve({ + tesseract_script_id: sid, + script: results.get_unicharset().get_script_from_script_id(sid), + script_confidence: best.get_sconfidence(), + orientation_degrees: [0, 270, 180, 90][oid], + orientation_confidence: best.get_oconfidence() + }); } + }); }); } diff --git a/src/index.js b/src/index.js index fc018d1..7c6079b 100644 --- a/src/index.js +++ b/src/index.js @@ -75,10 +75,10 @@ class TesseractWorker { _recv(packet) { if (this._currentJob.id === packet.jobId) { this._currentJob._handle({ + ...packet, data: packet.status === 'resolve' && packet.action === 'recognize' ? circularize(packet.data) : packet.data, - ...packet, }); } else { console.warn(`Job ID ${packet.jobId} not known.`); diff --git a/src/node/index.js b/src/node/index.js index 33ead58..03bc46b 100644 --- a/src/node/index.js +++ b/src/node/index.js @@ -1,89 +1,96 @@ -const fetch = require('isomorphic-fetch'), - isURL = require('is-url'), - fork = require('child_process').fork, - fs = require('fs'); +const check = require('check-types'); +const fetch = require('isomorphic-fetch'); +const isURL = require('is-url'); +const { fork } = require('child_process'); +const fs = require('fs'); +const path = require('path'); +const fileType = require('file-type'); +const PNGReader = require('png.js'); +const JPGReader = require('jpeg-js'); -exports.defaultOptions = { - workerPath: require('path').join(__dirname, 'worker.js'), - langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/4.0.0/', -} - -exports.spawnWorker = function spawnWorker(instance, workerOptions){ - var cp = fork(workerOptions.workerPath); - cp.on('message', packet => { - instance._recv(packet); - }); - return cp; -} +function loadImage(image, cb) { + if (check.string(image)) { + if (isURL(image)) { + fetch(image) + .then(resp => resp.buffer()) + .then(buffer => loadImage(buffer, cb)) + .catch(err => console.error(err)); + } else { + fs.readFile(image, (err, buffer) => { + if (err) throw err; + loadImage(buffer, cb); + }); + } + return; + } + if (image instanceof Buffer) { + const { mime } = fileType(image); -exports.terminateWorker = function(instance){ - instance.worker.kill(); -} + if (mime === 'image/png') { + const reader = new PNGReader(image); + reader.parse((err, png) => { + if (err) throw err; -exports.sendPacket = function sendPacket(instance, packet){ - loadImage(packet.payload.image, img => { - packet.payload.image = img; - instance.worker.send(packet); - }); -} + const width = png.getWidth(); + const height = png.getHeight(); + const data = new Uint8Array(width * height * 4); + for (let j = 0; j < height; j += 1) { + for (let i = 0; i < width; i += 1) { + const offset = 4 * (i + (j * width)); + const pix = png.getPixel(i, j); -function loadImage(image, cb){ - - if(typeof image === 'string'){ - if (isURL(image)) { - fetch(image) - .then(resp => resp.buffer()) - .then(buffer => loadImage(buffer, cb)) - .catch(err => console.error(err)); - } else { - fs.readFile(image, function(err, buffer){ - if (err) throw err; - loadImage(buffer, cb); + Array(4).fill(0).forEach((v, idx) => { + data[offset + idx] = pix[idx]; }); + } } - return; - } else if (image instanceof Buffer){ - var mime = require('file-type')(image).mime + loadImage({ width, height, data }, cb); + }); + return; + } + if (mime === 'image/jpeg') { + loadImage(JPGReader.decode(image), cb); + return; + } - if(mime === 'image/png'){ - var PNGReader = require('png.js'); - var reader = new PNGReader(image); - reader.parse(function(err, png){ - if (err) throw err; + // TODO: support for TIFF, NetPBM, BMP, etc. + } - var image = { - width: png.getWidth(), - height: png.getHeight() - } - image.data = new Uint8Array(image.width * image.height * 4) - for(var j = 0; j < image.height; j++){ - for(var i = 0; i < image.width; i++){ - var offset = 4 * (i + j * image.width), - pix = png.getPixel(i, j); + // node uses json.stringify for ipc which means we need to turn + // fancy arrays into raw arrays + if (image && image.data && image.data.length && !Array.isArray(image.data)) { + loadImage({ ...image, data: Array.from(image.data) }, cb); + return; + } + cb(image); +} - image.data[offset] = pix[0]; - image.data[offset + 1] = pix[1]; - image.data[offset + 2] = pix[2]; - image.data[offset + 3] = pix[3]; - } - } - loadImage(image, cb); - }); - return; - } else if (mime === 'image/jpeg'){ - loadImage(require('jpeg-js').decode(image), cb); - return; - } +exports.defaultOptions = { + workerPath: path.join(__dirname, 'worker.js'), + langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/4.0.0/', +}; - // TODO: support for TIFF, NetPBM, BMP, etc. - } +exports.spawnWorker = (instance, { workerPath }) => { + const cp = fork(workerPath); + cp.on('message', (packet) => { + instance._recv(packet); + }); + return cp; +}; - // node uses json.stringify for ipc which means we need to turn - // fancy arrays into raw arrays - if(image && image.data && image.data.length && !Array.isArray(image.data)){ - image.data = Array.from(image.data); - return loadImage(image, cb) - } - cb(image); -} +exports.terminateWorker = (instance) => { + instance.worker.kill(); +}; + +exports.sendPacket = (instance, packet) => { + loadImage(packet.payload.image, (img) => { + instance.worker.send({ + ...packet, + payload: { + ...packet.payload, + image: img, + }, + }); + }); +}; diff --git a/src/node/worker.js b/src/node/worker.js index 129c58a..1531d34 100644 --- a/src/node/worker.js +++ b/src/node/worker.js @@ -11,7 +11,7 @@ workerUtils.setAdapter({ getCore: (req, res) => { if (check.null(TesseractCore)) { res.progress({ status: 'loading tesseract core' }); - TesseractCore = require('tesseract.js-core/src/tesseract-core.asm'); + TesseractCore = require('tesseract.js-core'); res.progress({ status: 'loaded tesseract core' }); } return TesseractCore;