Browse Source

Add 3rd params to recongize()

pull/265/head
Jerome Wu 6 years ago
parent
commit
7fa06577d5
  1. 65
      src/common/TesseractWorker.js
  2. 23
      src/common/worker.js
  3. 11
      src/index.js

65
src/common/TesseractWorker.js

@ -1,39 +1,82 @@ @@ -1,39 +1,82 @@
/**
*
* The core part of tesseract.js to execute the OCR jobs.
*
* @fileoverview Worker for OCR jobs
* @author Kevin Kwok <antimatter15@gmail.com>
* @author Guillermo Webster <gui@mit.edu>
* @author Jerome Wu <jeromewus@gmail.com>
*/
const check = require('check-types');
const adapter = require('../node');
const circularize = require('./circularize');
const TesseractJob = require('./TesseractJob');
/**
* TesseractWorker
* @name TesseractWorker
* @function execute TesseractJob with a queue mechanism
* @access public
*/
class TesseractWorker {
constructor(workerOptions = {}) {
/**
* constructor
*
* @name constructor
* @function initialize the worker
* @access public
* @param {object} options - worker configurations
* @param {string} options.workerPath -
* A remote path to load worker script.
* In browser-like environment, it is downloaded from a CDN service.
* Please update this option if you self-host the worker script.
* In Node.js environment, this option is not used as the worker script is in local.
* @param {string} options.corePath -
* A remote path to load tesseract.js-core script.
* In browser-like environment, it is downloaded from a CDN service.
* Please update this option if you self-host the core script.
* In Node.js environment, this option is not used as the core script is in local.
* @param {string} options.langPath -
* A remote path to load *.traineddata.gz, it is download from a CDN service.
* Please update this option if you self-host the worker script.
* @param {string} [options.cachePath=.] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js}
* @param {string} [options.cacheMethod=write] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js}
* @param {string} [options.dataPath=.] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js}
*
*/
constructor(options = {}) {
this.worker = null;
this.workerOptions = Object.assign({}, adapter.defaultOptions, workerOptions);
this.options = {
...adapter.defaultOptions,
...options,
};
this._currentJob = null;
this._queue = [];
}
recognize(image, options = { lang: 'eng' }) {
recognize(image, lang = 'eng', params = {}) {
return this._delay((job) => {
job._send(
'recognize',
{
image,
options: check.string(options)
? { lang: options || 'eng' }
: options,
workerOptions: this.workerOptions,
lang,
params,
options: this.options,
},
);
});
}
detect(image, options = {}) {
detect(image, params = {}) {
return this._delay((job) => {
job._send(
'detect',
{
image,
options,
workerOptions: this.workerOptions,
lang: 'osd',
params,
options: this.options,
},
);
});
@ -50,7 +93,7 @@ class TesseractWorker { @@ -50,7 +93,7 @@ class TesseractWorker {
_delay(fn) {
if (check.null(this.worker)) {
this.worker = adapter.spawnWorker(this, this.workerOptions);
this.worker = adapter.spawnWorker(this, this.options);
}
const job = new TesseractJob(this);

23
src/common/worker.js

@ -23,7 +23,7 @@ const setImage = (image) => { @@ -23,7 +23,7 @@ const setImage = (image) => {
const handleInit = (req, res) => {
let MIN_MEMORY = 100663296;
if (['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)) {
if (['chi_sim', 'chi_tra', 'jpn'].includes(req.lang)) {
MIN_MEMORY = 167772160;
}
@ -49,8 +49,8 @@ const handleInit = (req, res) => { @@ -49,8 +49,8 @@ const handleInit = (req, res) => {
};
const loadLanguage = ({
options: { lang },
workerOptions: {
lang,
options: {
langPath, cachePath, cacheMethod, dataPath,
},
}) => (
@ -69,18 +69,18 @@ const handleRecognize = (req, res) => ( @@ -69,18 +69,18 @@ const handleRecognize = (req, res) => (
.then(() => (
loadLanguage(req)
.then(() => {
const { options } = req;
const { image, lang, params } = req;
const progressUpdate = (progress) => {
res.progress({ status: 'initializing api', progress });
};
progressUpdate(0);
base.Init(null, options.lang);
base.Init(null, lang);
progressUpdate(0.3);
Object.keys(options).forEach((key) => {
base.SetVariable(key, options[key]);
Object.keys(params).forEach((key) => {
base.SetVariable(key, params[key]);
});
progressUpdate(0.6);
const ptr = setImage(req.image);
const ptr = setImage(image);
progressUpdate(1);
base.Recognize(null);
const result = dump(Module, base);
@ -95,12 +95,13 @@ const handleRecognize = (req, res) => ( @@ -95,12 +95,13 @@ const handleRecognize = (req, res) => (
const handleDetect = (req, res) => (
handleInit(req, res)
.then(() => (
loadLanguage({ ...req, options: { ...req.options, lang: 'osd' } })
loadLanguage(req)
.then(() => {
base.Init(null, 'osd');
const { image, lang } = req;
base.Init(null, lang);
base.SetPageSegMode(Module.PSM_OSD_ONLY);
const ptr = setImage(req.image);
const ptr = setImage(image);
const results = new Module.OSResults();
if (!base.DetectOS(results)) {

11
src/index.js

@ -1,7 +1,18 @@ @@ -1,7 +1,18 @@
/**
*
* Entry point for tesseract.js, should be the entry when bundling.
*
* @fileoverview entry point for tesseract.js
* @author Kevin Kwok <antimatter15@gmail.com>
* @author Guillermo Webster <gui@mit.edu>
* @author Jerome Wu <jeromewus@gmail.com>
*/
const utils = require('tesseract.js-utils');
const TesseractWorker = require('./common/TesseractWorker');
module.exports = {
/** Worker for OCR, @see common/TesseractWorker.js */
TesseractWorker,
/** Utilities for tesseract.js, @see {@link https://www.npmjs.com/package/tesseract.js-utils} */
utils,
};

Loading…
Cancel
Save