Browse Source

Fix Module async issue and rewrite src/node/index.js

pull/265/head
Jerome Wu 6 years ago
parent
commit
6dcef53dbc
  1. 82
      src/common/worker.js
  2. 2
      src/index.js
  3. 161
      src/node/index.js
  4. 2
      src/node/worker.js

82
src/common/worker.js

@ -48,19 +48,24 @@ function handleInit(req, res){
} }
if(!Module || Module.TOTAL_MEMORY < MIN_MEMORY){ if(!Module || Module.TOTAL_MEMORY < MIN_MEMORY){
var Core = adapter.getCore(req, res); var Core = adapter.getCore(req, res);
res.progress({ status: 'initializing tesseract', progress: 0 }) res.progress({ status: 'initializing tesseract', progress: 0 })
Module = Core({ return Core({
TOTAL_MEMORY: MIN_MEMORY, // TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent){ TesseractProgress(percent){
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }); latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) });
}, },
})
.then((TessModule) => {
Module = TessModule;
base = new Module.TessBaseAPI();
res.progress({ status: 'initializing tesseract', progress: 1 });
}); });
base = new Module.TessBaseAPI();
res.progress({ status: 'initializing tesseract', progress: 1 });
} }
return new Promise();
} }
function setImage(Module, base, image) { function setImage(Module, base, image) {
@ -93,13 +98,13 @@ function loadLanguage(req, res, cb){
function handleRecognize(req, res){ function handleRecognize(req, res){
handleInit(req, res); handleInit(req, res)
.then(() => {
loadLanguage(req, res, () => { loadLanguage(req, res, () => {
var options = req.options; var options = req.options;
function progressUpdate(progress){ function progressUpdate(progress){
res.progress({ status: 'initializing api', progress: progress }); res.progress({ status: 'initializing api', progress: progress });
} }
progressUpdate(0); progressUpdate(0);
@ -107,9 +112,9 @@ function handleRecognize(req, res){
progressUpdate(.3); progressUpdate(.3);
for (var option in options) { for (var option in options) {
if (options.hasOwnProperty(option)) { if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]); base.SetVariable(option, options[option]);
} }
} }
progressUpdate(.6); progressUpdate(.6);
@ -124,39 +129,42 @@ function handleRecognize(req, res){
Module._free(ptr); Module._free(ptr);
res.resolve(result); res.resolve(result);
}) })
});
} }
function handleDetect(req, res){ function handleDetect(req, res){
handleInit(req, res); handleInit(req, res)
req.options.lang = 'osd'; .then(() => {
loadLanguage(req, res, () => { req.options.lang = 'osd';
loadLanguage(req, res, () => {
base.Init(null, 'osd'); base.Init(null, 'osd');
base.SetPageSegMode(Module.PSM_OSD_ONLY); base.SetPageSegMode(Module.PSM_OSD_ONLY);
var ptr = setImage(Module, base, req.image), var ptr = setImage(Module, base, req.image),
results = new Module.OSResults(); results = new Module.OSResults();
if(!base.DetectOS(results)){ if(!base.DetectOS(results)){
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
res.reject("Failed to detect OS"); res.reject("Failed to detect OS");
} else { } else {
var best = results.get_best_result(), var best = results.get_best_result(),
oid = best.get_orientation_id(), oid = best.get_orientation_id(),
sid = best.get_script_id(); sid = best.get_script_id();
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
res.resolve({ res.resolve({
tesseract_script_id: sid, tesseract_script_id: sid,
script: results.get_unicharset().get_script_from_script_id(sid), script: results.get_unicharset().get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(), script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid], orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence() orientation_confidence: best.get_oconfidence()
}); });
} }
});
}); });
} }

2
src/index.js

@ -75,10 +75,10 @@ class TesseractWorker {
_recv(packet) { _recv(packet) {
if (this._currentJob.id === packet.jobId) { if (this._currentJob.id === packet.jobId) {
this._currentJob._handle({ this._currentJob._handle({
...packet,
data: packet.status === 'resolve' && packet.action === 'recognize' data: packet.status === 'resolve' && packet.action === 'recognize'
? circularize(packet.data) ? circularize(packet.data)
: packet.data, : packet.data,
...packet,
}); });
} else { } else {
console.warn(`Job ID ${packet.jobId} not known.`); console.warn(`Job ID ${packet.jobId} not known.`);

161
src/node/index.js

@ -1,89 +1,96 @@
const fetch = require('isomorphic-fetch'), const check = require('check-types');
isURL = require('is-url'), const fetch = require('isomorphic-fetch');
fork = require('child_process').fork, const isURL = require('is-url');
fs = require('fs'); const { fork } = require('child_process');
const fs = require('fs');
const path = require('path');
const fileType = require('file-type');
const PNGReader = require('png.js');
const JPGReader = require('jpeg-js');
exports.defaultOptions = { function loadImage(image, cb) {
workerPath: require('path').join(__dirname, 'worker.js'), if (check.string(image)) {
langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/4.0.0/', if (isURL(image)) {
} fetch(image)
.then(resp => resp.buffer())
exports.spawnWorker = function spawnWorker(instance, workerOptions){ .then(buffer => loadImage(buffer, cb))
var cp = fork(workerOptions.workerPath); .catch(err => console.error(err));
cp.on('message', packet => { } else {
instance._recv(packet); fs.readFile(image, (err, buffer) => {
}); if (err) throw err;
return cp; loadImage(buffer, cb);
} });
}
exports.terminateWorker = function(instance){ return;
instance.worker.kill(); }
} if (image instanceof Buffer) {
const { mime } = fileType(image);
exports.sendPacket = function sendPacket(instance, packet){ if (mime === 'image/png') {
loadImage(packet.payload.image, img => { const reader = new PNGReader(image);
packet.payload.image = img; reader.parse((err, png) => {
instance.worker.send(packet); if (err) throw err;
});
}
const width = png.getWidth();
const height = png.getHeight();
const data = new Uint8Array(width * height * 4);
function loadImage(image, cb){ for (let j = 0; j < height; j += 1) {
for (let i = 0; i < width; i += 1) {
const offset = 4 * (i + (j * width));
const pix = png.getPixel(i, j);
if(typeof image === 'string'){ Array(4).fill(0).forEach((v, idx) => {
if (isURL(image)) { data[offset + idx] = pix[idx];
fetch(image)
.then(resp => resp.buffer())
.then(buffer => loadImage(buffer, cb))
.catch(err => console.error(err));
} else {
fs.readFile(image, function(err, buffer){
if (err) throw err;
loadImage(buffer, cb);
}); });
}
} }
return; loadImage({ width, height, data }, cb);
} else if (image instanceof Buffer){ });
var mime = require('file-type')(image).mime return;
}
if (mime === 'image/jpeg') {
loadImage(JPGReader.decode(image), cb);
return;
}
if(mime === 'image/png'){ // TODO: support for TIFF, NetPBM, BMP, etc.
var PNGReader = require('png.js'); }
var reader = new PNGReader(image);
reader.parse(function(err, png){
if (err) throw err;
var image = { // node uses json.stringify for ipc which means we need to turn
width: png.getWidth(), // fancy arrays into raw arrays
height: png.getHeight() if (image && image.data && image.data.length && !Array.isArray(image.data)) {
} loadImage({ ...image, data: Array.from(image.data) }, cb);
image.data = new Uint8Array(image.width * image.height * 4) return;
for(var j = 0; j < image.height; j++){ }
for(var i = 0; i < image.width; i++){ cb(image);
var offset = 4 * (i + j * image.width), }
pix = png.getPixel(i, j);
image.data[offset] = pix[0]; exports.defaultOptions = {
image.data[offset + 1] = pix[1]; workerPath: path.join(__dirname, 'worker.js'),
image.data[offset + 2] = pix[2]; langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/4.0.0/',
image.data[offset + 3] = pix[3]; };
}
}
loadImage(image, cb);
});
return;
} else if (mime === 'image/jpeg'){
loadImage(require('jpeg-js').decode(image), cb);
return;
}
// TODO: support for TIFF, NetPBM, BMP, etc. exports.spawnWorker = (instance, { workerPath }) => {
} const cp = fork(workerPath);
cp.on('message', (packet) => {
instance._recv(packet);
});
return cp;
};
// node uses json.stringify for ipc which means we need to turn exports.terminateWorker = (instance) => {
// fancy arrays into raw arrays instance.worker.kill();
if(image && image.data && image.data.length && !Array.isArray(image.data)){ };
image.data = Array.from(image.data);
return loadImage(image, cb) exports.sendPacket = (instance, packet) => {
} loadImage(packet.payload.image, (img) => {
cb(image); instance.worker.send({
} ...packet,
payload: {
...packet.payload,
image: img,
},
});
});
};

2
src/node/worker.js

@ -11,7 +11,7 @@ workerUtils.setAdapter({
getCore: (req, res) => { getCore: (req, res) => {
if (check.null(TesseractCore)) { if (check.null(TesseractCore)) {
res.progress({ status: 'loading tesseract core' }); res.progress({ status: 'loading tesseract core' });
TesseractCore = require('tesseract.js-core/src/tesseract-core.asm'); TesseractCore = require('tesseract.js-core');
res.progress({ status: 'loaded tesseract core' }); res.progress({ status: 'loaded tesseract core' });
} }
return TesseractCore; return TesseractCore;

Loading…
Cancel
Save