Browse Source

Fix Module async issue and rewrite src/node/index.js

pull/265/head
Jerome Wu 6 years ago
parent
commit
6dcef53dbc
  1. 82
      src/common/worker.js
  2. 2
      src/index.js
  3. 161
      src/node/index.js
  4. 2
      src/node/worker.js

82
src/common/worker.js

@ -48,19 +48,24 @@ function handleInit(req, res){ @@ -48,19 +48,24 @@ function handleInit(req, res){
}
if(!Module || Module.TOTAL_MEMORY < MIN_MEMORY){
var Core = adapter.getCore(req, res);
var Core = adapter.getCore(req, res);
res.progress({ status: 'initializing tesseract', progress: 0 })
Module = Core({
TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent){
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) });
},
return Core({
// TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent){
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) });
},
})
.then((TessModule) => {
Module = TessModule;
base = new Module.TessBaseAPI();
res.progress({ status: 'initializing tesseract', progress: 1 });
});
base = new Module.TessBaseAPI();
res.progress({ status: 'initializing tesseract', progress: 1 });
}
return new Promise();
}
function setImage(Module, base, image) {
@ -93,13 +98,13 @@ function loadLanguage(req, res, cb){ @@ -93,13 +98,13 @@ function loadLanguage(req, res, cb){
function handleRecognize(req, res){
handleInit(req, res);
loadLanguage(req, res, () => {
handleInit(req, res)
.then(() => {
loadLanguage(req, res, () => {
var options = req.options;
function progressUpdate(progress){
res.progress({ status: 'initializing api', progress: progress });
res.progress({ status: 'initializing api', progress: progress });
}
progressUpdate(0);
@ -107,9 +112,9 @@ function handleRecognize(req, res){ @@ -107,9 +112,9 @@ function handleRecognize(req, res){
progressUpdate(.3);
for (var option in options) {
if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]);
}
if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]);
}
}
progressUpdate(.6);
@ -124,39 +129,42 @@ function handleRecognize(req, res){ @@ -124,39 +129,42 @@ function handleRecognize(req, res){
Module._free(ptr);
res.resolve(result);
})
})
});
}
function handleDetect(req, res){
handleInit(req, res);
req.options.lang = 'osd';
loadLanguage(req, res, () => {
handleInit(req, res)
.then(() => {
req.options.lang = 'osd';
loadLanguage(req, res, () => {
base.Init(null, 'osd');
base.SetPageSegMode(Module.PSM_OSD_ONLY);
var ptr = setImage(Module, base, req.image),
results = new Module.OSResults();
results = new Module.OSResults();
if(!base.DetectOS(results)){
base.End();
Module._free(ptr);
res.reject("Failed to detect OS");
base.End();
Module._free(ptr);
res.reject("Failed to detect OS");
} else {
var best = results.get_best_result(),
oid = best.get_orientation_id(),
sid = best.get_script_id();
base.End();
Module._free(ptr);
res.resolve({
tesseract_script_id: sid,
script: results.get_unicharset().get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
});
var best = results.get_best_result(),
oid = best.get_orientation_id(),
sid = best.get_script_id();
base.End();
Module._free(ptr);
res.resolve({
tesseract_script_id: sid,
script: results.get_unicharset().get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
});
}
});
});
}

2
src/index.js

@ -75,10 +75,10 @@ class TesseractWorker { @@ -75,10 +75,10 @@ class TesseractWorker {
_recv(packet) {
if (this._currentJob.id === packet.jobId) {
this._currentJob._handle({
...packet,
data: packet.status === 'resolve' && packet.action === 'recognize'
? circularize(packet.data)
: packet.data,
...packet,
});
} else {
console.warn(`Job ID ${packet.jobId} not known.`);

161
src/node/index.js

@ -1,89 +1,96 @@ @@ -1,89 +1,96 @@
const fetch = require('isomorphic-fetch'),
isURL = require('is-url'),
fork = require('child_process').fork,
fs = require('fs');
const check = require('check-types');
const fetch = require('isomorphic-fetch');
const isURL = require('is-url');
const { fork } = require('child_process');
const fs = require('fs');
const path = require('path');
const fileType = require('file-type');
const PNGReader = require('png.js');
const JPGReader = require('jpeg-js');
exports.defaultOptions = {
workerPath: require('path').join(__dirname, 'worker.js'),
langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/4.0.0/',
}
exports.spawnWorker = function spawnWorker(instance, workerOptions){
var cp = fork(workerOptions.workerPath);
cp.on('message', packet => {
instance._recv(packet);
});
return cp;
}
function loadImage(image, cb) {
if (check.string(image)) {
if (isURL(image)) {
fetch(image)
.then(resp => resp.buffer())
.then(buffer => loadImage(buffer, cb))
.catch(err => console.error(err));
} else {
fs.readFile(image, (err, buffer) => {
if (err) throw err;
loadImage(buffer, cb);
});
}
return;
}
if (image instanceof Buffer) {
const { mime } = fileType(image);
exports.terminateWorker = function(instance){
instance.worker.kill();
}
if (mime === 'image/png') {
const reader = new PNGReader(image);
reader.parse((err, png) => {
if (err) throw err;
exports.sendPacket = function sendPacket(instance, packet){
loadImage(packet.payload.image, img => {
packet.payload.image = img;
instance.worker.send(packet);
});
}
const width = png.getWidth();
const height = png.getHeight();
const data = new Uint8Array(width * height * 4);
for (let j = 0; j < height; j += 1) {
for (let i = 0; i < width; i += 1) {
const offset = 4 * (i + (j * width));
const pix = png.getPixel(i, j);
function loadImage(image, cb){
if(typeof image === 'string'){
if (isURL(image)) {
fetch(image)
.then(resp => resp.buffer())
.then(buffer => loadImage(buffer, cb))
.catch(err => console.error(err));
} else {
fs.readFile(image, function(err, buffer){
if (err) throw err;
loadImage(buffer, cb);
Array(4).fill(0).forEach((v, idx) => {
data[offset + idx] = pix[idx];
});
}
}
return;
} else if (image instanceof Buffer){
var mime = require('file-type')(image).mime
loadImage({ width, height, data }, cb);
});
return;
}
if (mime === 'image/jpeg') {
loadImage(JPGReader.decode(image), cb);
return;
}
if(mime === 'image/png'){
var PNGReader = require('png.js');
var reader = new PNGReader(image);
reader.parse(function(err, png){
if (err) throw err;
// TODO: support for TIFF, NetPBM, BMP, etc.
}
var image = {
width: png.getWidth(),
height: png.getHeight()
}
image.data = new Uint8Array(image.width * image.height * 4)
for(var j = 0; j < image.height; j++){
for(var i = 0; i < image.width; i++){
var offset = 4 * (i + j * image.width),
pix = png.getPixel(i, j);
// node uses json.stringify for ipc which means we need to turn
// fancy arrays into raw arrays
if (image && image.data && image.data.length && !Array.isArray(image.data)) {
loadImage({ ...image, data: Array.from(image.data) }, cb);
return;
}
cb(image);
}
image.data[offset] = pix[0];
image.data[offset + 1] = pix[1];
image.data[offset + 2] = pix[2];
image.data[offset + 3] = pix[3];
}
}
loadImage(image, cb);
});
return;
} else if (mime === 'image/jpeg'){
loadImage(require('jpeg-js').decode(image), cb);
return;
}
exports.defaultOptions = {
workerPath: path.join(__dirname, 'worker.js'),
langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/4.0.0/',
};
// TODO: support for TIFF, NetPBM, BMP, etc.
}
exports.spawnWorker = (instance, { workerPath }) => {
const cp = fork(workerPath);
cp.on('message', (packet) => {
instance._recv(packet);
});
return cp;
};
// node uses json.stringify for ipc which means we need to turn
// fancy arrays into raw arrays
if(image && image.data && image.data.length && !Array.isArray(image.data)){
image.data = Array.from(image.data);
return loadImage(image, cb)
}
cb(image);
}
exports.terminateWorker = (instance) => {
instance.worker.kill();
};
exports.sendPacket = (instance, packet) => {
loadImage(packet.payload.image, (img) => {
instance.worker.send({
...packet,
payload: {
...packet.payload,
image: img,
},
});
});
};

2
src/node/worker.js

@ -11,7 +11,7 @@ workerUtils.setAdapter({ @@ -11,7 +11,7 @@ workerUtils.setAdapter({
getCore: (req, res) => {
if (check.null(TesseractCore)) {
res.progress({ status: 'loading tesseract core' });
TesseractCore = require('tesseract.js-core/src/tesseract-core.asm');
TesseractCore = require('tesseract.js-core');
res.progress({ status: 'loaded tesseract core' });
}
return TesseractCore;

Loading…
Cancel
Save