Browse Source

Rewrite src to use tesseract.js-utils and fit lint

pull/265/head
Jerome Wu 7 years ago
parent
commit
30d3421d15
  1. 33
      package-lock.json
  2. 3
      package.json
  3. 202
      src/common/worker.js
  4. 47
      src/node/lang.js
  5. 1
      src/node/worker.js

33
package-lock.json generated

@ -2959,6 +2959,11 @@ @@ -2959,6 +2959,11 @@
"resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.18.tgz",
"integrity": "sha512-sr1ZQph3UwHTR0XftSbK85OvBbxe/abLGzEnPENCQwmHf7sck8Oyu4ob3LgBxWWxRoM+QszeUyl7jbqapu2TqA=="
},
"idb-keyval": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-3.1.0.tgz",
"integrity": "sha512-iFwFN5n00KNNnVxlOOK280SJJfXWY7pbMUOQXdIXehvvc/mGCV/6T2Ae+Pk2KwAkkATDTwfMavOiDH5lrJKWXQ=="
},
"idb-wrapper": {
"version": "1.7.1",
"resolved": "https://registry.npmjs.org/idb-wrapper/-/idb-wrapper-1.7.1.tgz",
@ -4822,17 +4827,30 @@ @@ -4822,17 +4827,30 @@
}
},
"tesseract.js-core": {
"version": "2.0.0-beta.2",
"resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-2.0.0-beta.2.tgz",
"integrity": "sha512-aExaof0ZxYc2q0T7HN26wxcARN/N+0q9z9gjkyaEFsuUj82VtUZr0A6yDQ0rpQyIdk22K15fASUX5XcwFIb0rw==",
"requires": {
"node-fetch": "^2.3.0"
"version": "2.0.0-beta.4",
"resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-2.0.0-beta.4.tgz",
"integrity": "sha512-s9uw+s3Rgsw4DfWzUN7LKuLUdhzOxGMRZ+NIKLFGajTfczMTOkYE7PKngz3ob0ze3JLOnimnM7A6mJ/U2xMn5w=="
},
"tesseract.js-utils": {
"version": "1.0.0-beta.1",
"resolved": "https://registry.npmjs.org/tesseract.js-utils/-/tesseract.js-utils-1.0.0-beta.1.tgz",
"integrity": "sha512-jQrP7umpOAdaeBoAE8aFl+6HUPpGpLdMozTQINVt+4allstur5dho7u8YWNWu4WXFk9XHSUl6isWKIpmBGbGcA==",
"requires": {
"idb-keyval": "^3.1.0",
"node-fetch": "^2.3.0",
"whatwg-fetch": "^3.0.0",
"zlibjs": "^0.3.1"
},
"dependencies": {
"node-fetch": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.3.0.tgz",
"integrity": "sha512-MOd8pV3fxENbryESLgVIeaGKrdl+uaYhCSSVkjeOb/31/njTpcis5aWfdqgNlHIrKOLRbMnfPINPOML2CIFeXA=="
},
"whatwg-fetch": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/whatwg-fetch/-/whatwg-fetch-3.0.0.tgz",
"integrity": "sha512-9GSJUgz1D4MfyKU7KRqwOjXCXTqWdFNvEr7eUBYchQiVc744mqK/MzXPNR2WsPkmkOa4ywfg8C2n8h+13Bey1Q=="
}
}
},
@ -5207,6 +5225,11 @@ @@ -5207,6 +5225,11 @@
"requires": {
"object-keys": "~0.4.0"
}
},
"zlibjs": {
"version": "0.3.1",
"resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz",
"integrity": "sha1-UBl+2yihxCymWcyLTmqd3W1ERVQ="
}
}
}

3
package.json

@ -42,7 +42,8 @@ @@ -42,7 +42,8 @@
"node-fetch": "^1.6.3",
"object-assign": "^4.1.0",
"png.js": "^0.2.1",
"tesseract.js-core": "^2.0.0-beta.2"
"tesseract.js-core": "^2.0.0-beta.4",
"tesseract.js-utils": "^1.0.0-beta.1"
},
"repository": {
"type": "git",

202
src/common/worker.js

@ -1,138 +1,93 @@ @@ -1,138 +1,93 @@
const readImage = require('tesseract.js-core/src/utils/readImage');
var latestJob,
Module,
base,
adapter = {},
dump = require('./dump.js'),
desaturate = require('./desaturate.js');
function dispatchHandlers(packet, send){
function respond(status, data){
send({
jobId: packet.jobId,
status,
action: packet.action,
data
});
}
respond.resolve = respond.bind(this, 'resolve');
respond.reject = respond.bind(this, 'reject');
respond.progress = respond.bind(this, 'progress');
latestJob = respond;
try {
if(packet.action === 'recognize'){
handleRecognize(packet.payload, respond);
} else if (packet.action === 'detect'){
handleDetect(packet.payload, respond);
}
} catch (err) {
// Prepare exception to travel through postMessage
err = err.toString();
const { readImage, loadLang } = require('tesseract.js-utils');
const dump = require('./dump');
respond.reject(err)
}
}
exports.dispatchHandlers = dispatchHandlers;
let Module;
let base;
let latestJob;
let adapter = {};
exports.setAdapter = function setAdapter(impl){
adapter = impl;
};
const handleInit = (req, res) => {
let MIN_MEMORY = 100663296;
if (['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)) {
MIN_MEMORY = 167772160;
}
function handleInit(req, res){
var MIN_MEMORY = 100663296;
if (!Module || Module.TOTAL_MEMORY < MIN_MEMORY) {
const Core = adapter.getCore(req, res);
if(['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)){
MIN_MEMORY = 167772160;
}
res.progress({ status: 'initializing tesseract', progress: 0 });
if(!Module || Module.TOTAL_MEMORY < MIN_MEMORY){
var Core = adapter.getCore(req, res);
res.progress({ status: 'initializing tesseract', progress: 0 })
return Core({
// TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent){
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) });
},
})
.then((TessModule) => {
Module = TessModule;
base = new Module.TessBaseAPI();
res.progress({ status: 'initializing tesseract', progress: 1 });
});
}
return Core({
// TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent) {
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent - 30) / 70) });
},
})
.then((TessModule) => {
Module = TessModule;
base = new Module.TessBaseAPI();
res.progress({ status: 'initialized tesseract', progress: 1 });
});
}
return new Promise();
}
};
function setImage(Module, base, image) {
const setImage = (image) => {
const { w, h, data } = readImage(Module, Array.from(image));
base.SetImage(data);
base.SetRectangle(0, 0, w, h);
return data;
}
function loadLanguage(req, res, cb){
var lang = req.options.lang,
langFile = lang + '.traineddata';
if(!Module._loadedLanguages) Module._loadedLanguages = {};
if(lang in Module._loadedLanguages) return cb();
adapter.getLanguageData(req, res, function(data){
res.progress({ status: 'loading ' + langFile, progress: 0 });
Module.FS.writeFile(langFile, data);
Module._loadedLanguages[lang] = true;
res.progress({ status: 'loading ' + langFile, progress: 1 });
cb();
})
}
};
const loadLanguage = (req, res, cb) => {
const { options: { lang }, workerOptions: { langPath } } = req;
return loadLang({
langs: lang,
tessModule: Module,
langURI: langPath,
cache: true,
}).then(cb);
};
function handleRecognize(req, res){
const handleRecognize = (req, res) => {
handleInit(req, res)
.then(() => {
loadLanguage(req, res, () => {
var options = req.options;
const { options } = req;
function progressUpdate(progress){
res.progress({ status: 'initializing api', progress: progress });
}
const progressUpdate = (progress) => {
res.progress({ status: 'initializing api', progress });
};
progressUpdate(0);
base.Init(null, req.options.lang);
progressUpdate(.3);
base.Init(null, options.lang);
progressUpdate(0.3);
for (var option in options) {
if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]);
}
}
Object.keys(options).forEach((key) => {
base.SetVariable(key, options[key]);
});
progressUpdate(.6);
var ptr = setImage(Module, base, req.image);
progressUpdate(0.6);
const ptr = setImage(req.image);
progressUpdate(1);
base.Recognize(null);
var result = dump(Module, base);
const result = dump(Module, base);
base.End();
Module._free(ptr);
res.resolve(result);
})
});
});
}
};
function handleDetect(req, res){
const handleDetect = (req, res) => {
handleInit(req, res)
.then(() => {
req.options.lang = 'osd';
@ -140,17 +95,17 @@ function handleDetect(req, res){ @@ -140,17 +95,17 @@ function handleDetect(req, res){
base.Init(null, 'osd');
base.SetPageSegMode(Module.PSM_OSD_ONLY);
var ptr = setImage(Module, base, req.image),
results = new Module.OSResults();
const ptr = setImage(req.image);
const results = new Module.OSResults();
if(!base.DetectOS(results)){
if (!base.DetectOS(results)) {
base.End();
Module._free(ptr);
res.reject("Failed to detect OS");
res.reject('Failed to detect OS');
} else {
var best = results.get_best_result(),
oid = best.get_orientation_id(),
sid = best.get_script_id();
const best = results.get_best_result();
const oid = best.get_orientation_id();
const sid = best.get_script_id();
base.End();
Module._free(ptr);
@ -160,9 +115,40 @@ function handleDetect(req, res){ @@ -160,9 +115,40 @@ function handleDetect(req, res){
script: results.get_unicharset().get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
orientation_confidence: best.get_oconfidence(),
});
}
});
});
}
};
exports.dispatchHandlers = (packet, send) => {
const respond = (status, data) => {
send({
jobId: packet.jobId,
status,
action: packet.action,
data,
});
};
respond.resolve = respond.bind(this, 'resolve');
respond.reject = respond.bind(this, 'reject');
respond.progress = respond.bind(this, 'progress');
latestJob = respond;
try {
if (packet.action === 'recognize') {
handleRecognize(packet.payload, respond);
} else if (packet.action === 'detect') {
handleDetect(packet.payload, respond);
}
} catch (err) {
// Prepare exception to travel through postMessage
respond.reject(err.toString());
}
};
exports.setAdapter = (impl) => {
adapter = impl;
};

47
src/node/lang.js

@ -1,47 +0,0 @@ @@ -1,47 +0,0 @@
const https = require("https"),
http = require("http"),
zlib = require("zlib"),
fs = require("fs"),
path = require("path"),
isURL = require("is-url");
var langdata = require('../common/langdata.json')
function getLanguageData(req, res, cb){
var lang = req.options.lang,
langfile = lang + '.traineddata.gz';
// langPath defaults to a URL where languages can be downloaded. If a custom path is specified
// and it is a local path, use that instead
var localPath = isURL(req.workerOptions.langPath) ?
lang + '.traineddata' :
path.join(req.workerOptions.langPath, lang + '.traineddata');
var fetchProtocol = req.workerOptions.langPath.startsWith('http://') ? http : https;
fs.readFile(localPath, function (err, data) {
if(!err) return cb(new Uint8Array(data));
fetchProtocol.get(req.workerOptions.langPath + langfile, stream => {
var received_bytes = 0;
stream.on('data', function(chunk) {
received_bytes += chunk.length;
res.progress({
status: 'downloading ' + langfile,
loaded: received_bytes,
progress: Math.min(1, received_bytes / langdata[lang])
});
});
var gunzip = zlib.createGunzip();
stream.pipe(gunzip).pipe(fs.createWriteStream(lang + '.traineddata'))
gunzip.on('end',() => {
getLanguageData(req, stream, cb)
});
});
});
}
module.exports = getLanguageData;

1
src/node/worker.js

@ -16,5 +16,4 @@ workerUtils.setAdapter({ @@ -16,5 +16,4 @@ workerUtils.setAdapter({
}
return TesseractCore;
},
getLanguageData: require('./lang'),
});

Loading…
Cancel
Save