Browse Source

Fix Module async issue and rewrite src/node/index.js

pull/265/head
Jerome Wu 6 years ago
parent
commit
6dcef53dbc
  1. 20
      src/common/worker.js
  2. 2
      src/index.js
  3. 117
      src/node/index.js
  4. 2
      src/node/worker.js

20
src/common/worker.js

@ -52,15 +52,20 @@ function handleInit(req, res){
res.progress({ status: 'initializing tesseract', progress: 0 }) res.progress({ status: 'initializing tesseract', progress: 0 })
Module = Core({ return Core({
TOTAL_MEMORY: MIN_MEMORY, // TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent){ TesseractProgress(percent){
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }); latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) });
}, },
}); })
.then((TessModule) => {
Module = TessModule;
base = new Module.TessBaseAPI(); base = new Module.TessBaseAPI();
res.progress({ status: 'initializing tesseract', progress: 1 }); res.progress({ status: 'initializing tesseract', progress: 1 });
});
} }
return new Promise();
} }
function setImage(Module, base, image) { function setImage(Module, base, image) {
@ -93,8 +98,8 @@ function loadLanguage(req, res, cb){
function handleRecognize(req, res){ function handleRecognize(req, res){
handleInit(req, res); handleInit(req, res)
.then(() => {
loadLanguage(req, res, () => { loadLanguage(req, res, () => {
var options = req.options; var options = req.options;
@ -125,11 +130,13 @@ function handleRecognize(req, res){
res.resolve(result); res.resolve(result);
}) })
});
} }
function handleDetect(req, res){ function handleDetect(req, res){
handleInit(req, res); handleInit(req, res)
.then(() => {
req.options.lang = 'osd'; req.options.lang = 'osd';
loadLanguage(req, res, () => { loadLanguage(req, res, () => {
base.Init(null, 'osd'); base.Init(null, 'osd');
@ -159,4 +166,5 @@ function handleDetect(req, res){
}); });
} }
}); });
});
} }

2
src/index.js

@ -75,10 +75,10 @@ class TesseractWorker {
_recv(packet) { _recv(packet) {
if (this._currentJob.id === packet.jobId) { if (this._currentJob.id === packet.jobId) {
this._currentJob._handle({ this._currentJob._handle({
...packet,
data: packet.status === 'resolve' && packet.action === 'recognize' data: packet.status === 'resolve' && packet.action === 'recognize'
? circularize(packet.data) ? circularize(packet.data)
: packet.data, : packet.data,
...packet,
}); });
} else { } else {
console.warn(`Job ID ${packet.jobId} not known.`); console.warn(`Job ID ${packet.jobId} not known.`);

117
src/node/index.js

@ -1,78 +1,56 @@
const fetch = require('isomorphic-fetch'), const check = require('check-types');
isURL = require('is-url'), const fetch = require('isomorphic-fetch');
fork = require('child_process').fork, const isURL = require('is-url');
fs = require('fs'); const { fork } = require('child_process');
const fs = require('fs');
exports.defaultOptions = { const path = require('path');
workerPath: require('path').join(__dirname, 'worker.js'), const fileType = require('file-type');
langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/4.0.0/', const PNGReader = require('png.js');
} const JPGReader = require('jpeg-js');
exports.spawnWorker = function spawnWorker(instance, workerOptions){
var cp = fork(workerOptions.workerPath);
cp.on('message', packet => {
instance._recv(packet);
});
return cp;
}
exports.terminateWorker = function(instance){
instance.worker.kill();
}
exports.sendPacket = function sendPacket(instance, packet){
loadImage(packet.payload.image, img => {
packet.payload.image = img;
instance.worker.send(packet);
});
}
function loadImage(image, cb) { function loadImage(image, cb) {
if (check.string(image)) {
if(typeof image === 'string'){
if (isURL(image)) { if (isURL(image)) {
fetch(image) fetch(image)
.then(resp => resp.buffer()) .then(resp => resp.buffer())
.then(buffer => loadImage(buffer, cb)) .then(buffer => loadImage(buffer, cb))
.catch(err => console.error(err)); .catch(err => console.error(err));
} else { } else {
fs.readFile(image, function(err, buffer){ fs.readFile(image, (err, buffer) => {
if (err) throw err; if (err) throw err;
loadImage(buffer, cb); loadImage(buffer, cb);
}); });
} }
return; return;
} else if (image instanceof Buffer){ }
var mime = require('file-type')(image).mime if (image instanceof Buffer) {
const { mime } = fileType(image);
if (mime === 'image/png') { if (mime === 'image/png') {
var PNGReader = require('png.js'); const reader = new PNGReader(image);
var reader = new PNGReader(image); reader.parse((err, png) => {
reader.parse(function(err, png){
if (err) throw err; if (err) throw err;
var image = { const width = png.getWidth();
width: png.getWidth(), const height = png.getHeight();
height: png.getHeight() const data = new Uint8Array(width * height * 4);
}
image.data = new Uint8Array(image.width * image.height * 4) for (let j = 0; j < height; j += 1) {
for(var j = 0; j < image.height; j++){ for (let i = 0; i < width; i += 1) {
for(var i = 0; i < image.width; i++){ const offset = 4 * (i + (j * width));
var offset = 4 * (i + j * image.width), const pix = png.getPixel(i, j);
pix = png.getPixel(i, j);
image.data[offset] = pix[0]; Array(4).fill(0).forEach((v, idx) => {
image.data[offset + 1] = pix[1]; data[offset + idx] = pix[idx];
image.data[offset + 2] = pix[2]; });
image.data[offset + 3] = pix[3];
} }
} }
loadImage(image, cb); loadImage({ width, height, data }, cb);
}); });
return; return;
} else if (mime === 'image/jpeg'){ }
loadImage(require('jpeg-js').decode(image), cb); if (mime === 'image/jpeg') {
loadImage(JPGReader.decode(image), cb);
return; return;
} }
@ -82,8 +60,37 @@ function loadImage(image, cb){
// node uses json.stringify for ipc which means we need to turn // node uses json.stringify for ipc which means we need to turn
// fancy arrays into raw arrays // fancy arrays into raw arrays
if (image && image.data && image.data.length && !Array.isArray(image.data)) { if (image && image.data && image.data.length && !Array.isArray(image.data)) {
image.data = Array.from(image.data); loadImage({ ...image, data: Array.from(image.data) }, cb);
return loadImage(image, cb) return;
} }
cb(image); cb(image);
} }
exports.defaultOptions = {
workerPath: path.join(__dirname, 'worker.js'),
langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/4.0.0/',
};
exports.spawnWorker = (instance, { workerPath }) => {
const cp = fork(workerPath);
cp.on('message', (packet) => {
instance._recv(packet);
});
return cp;
};
exports.terminateWorker = (instance) => {
instance.worker.kill();
};
exports.sendPacket = (instance, packet) => {
loadImage(packet.payload.image, (img) => {
instance.worker.send({
...packet,
payload: {
...packet.payload,
image: img,
},
});
});
};

2
src/node/worker.js

@ -11,7 +11,7 @@ workerUtils.setAdapter({
getCore: (req, res) => { getCore: (req, res) => {
if (check.null(TesseractCore)) { if (check.null(TesseractCore)) {
res.progress({ status: 'loading tesseract core' }); res.progress({ status: 'loading tesseract core' });
TesseractCore = require('tesseract.js-core/src/tesseract-core.asm'); TesseractCore = require('tesseract.js-core');
res.progress({ status: 'loaded tesseract core' }); res.progress({ status: 'loaded tesseract core' });
} }
return TesseractCore; return TesseractCore;

Loading…
Cancel
Save