Browse Source

Add 3rd params to recongize()

pull/265/head
Jerome Wu 6 years ago
parent
commit
7fa06577d5
  1. 65
      src/common/TesseractWorker.js
  2. 23
      src/common/worker.js
  3. 11
      src/index.js

65
src/common/TesseractWorker.js

@ -1,39 +1,82 @@
/**
*
* The core part of tesseract.js to execute the OCR jobs.
*
* @fileoverview Worker for OCR jobs
* @author Kevin Kwok <antimatter15@gmail.com>
* @author Guillermo Webster <gui@mit.edu>
* @author Jerome Wu <jeromewus@gmail.com>
*/
const check = require('check-types'); const check = require('check-types');
const adapter = require('../node'); const adapter = require('../node');
const circularize = require('./circularize'); const circularize = require('./circularize');
const TesseractJob = require('./TesseractJob'); const TesseractJob = require('./TesseractJob');
/**
* TesseractWorker
* @name TesseractWorker
* @function execute TesseractJob with a queue mechanism
* @access public
*/
class TesseractWorker { class TesseractWorker {
constructor(workerOptions = {}) { /**
* constructor
*
* @name constructor
* @function initialize the worker
* @access public
* @param {object} options - worker configurations
* @param {string} options.workerPath -
* A remote path to load worker script.
* In browser-like environment, it is downloaded from a CDN service.
* Please update this option if you self-host the worker script.
* In Node.js environment, this option is not used as the worker script is in local.
* @param {string} options.corePath -
* A remote path to load tesseract.js-core script.
* In browser-like environment, it is downloaded from a CDN service.
* Please update this option if you self-host the core script.
* In Node.js environment, this option is not used as the core script is in local.
* @param {string} options.langPath -
* A remote path to load *.traineddata.gz, it is download from a CDN service.
* Please update this option if you self-host the worker script.
* @param {string} [options.cachePath=.] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js}
* @param {string} [options.cacheMethod=write] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js}
* @param {string} [options.dataPath=.] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js}
*
*/
constructor(options = {}) {
this.worker = null; this.worker = null;
this.workerOptions = Object.assign({}, adapter.defaultOptions, workerOptions); this.options = {
...adapter.defaultOptions,
...options,
};
this._currentJob = null; this._currentJob = null;
this._queue = []; this._queue = [];
} }
recognize(image, options = { lang: 'eng' }) { recognize(image, lang = 'eng', params = {}) {
return this._delay((job) => { return this._delay((job) => {
job._send( job._send(
'recognize', 'recognize',
{ {
image, image,
options: check.string(options) lang,
? { lang: options || 'eng' } params,
: options, options: this.options,
workerOptions: this.workerOptions,
}, },
); );
}); });
} }
detect(image, options = {}) { detect(image, params = {}) {
return this._delay((job) => { return this._delay((job) => {
job._send( job._send(
'detect', 'detect',
{ {
image, image,
options, lang: 'osd',
workerOptions: this.workerOptions, params,
options: this.options,
}, },
); );
}); });
@ -50,7 +93,7 @@ class TesseractWorker {
_delay(fn) { _delay(fn) {
if (check.null(this.worker)) { if (check.null(this.worker)) {
this.worker = adapter.spawnWorker(this, this.workerOptions); this.worker = adapter.spawnWorker(this, this.options);
} }
const job = new TesseractJob(this); const job = new TesseractJob(this);

23
src/common/worker.js

@ -23,7 +23,7 @@ const setImage = (image) => {
const handleInit = (req, res) => { const handleInit = (req, res) => {
let MIN_MEMORY = 100663296; let MIN_MEMORY = 100663296;
if (['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)) { if (['chi_sim', 'chi_tra', 'jpn'].includes(req.lang)) {
MIN_MEMORY = 167772160; MIN_MEMORY = 167772160;
} }
@ -49,8 +49,8 @@ const handleInit = (req, res) => {
}; };
const loadLanguage = ({ const loadLanguage = ({
options: { lang }, lang,
workerOptions: { options: {
langPath, cachePath, cacheMethod, dataPath, langPath, cachePath, cacheMethod, dataPath,
}, },
}) => ( }) => (
@ -69,18 +69,18 @@ const handleRecognize = (req, res) => (
.then(() => ( .then(() => (
loadLanguage(req) loadLanguage(req)
.then(() => { .then(() => {
const { options } = req; const { image, lang, params } = req;
const progressUpdate = (progress) => { const progressUpdate = (progress) => {
res.progress({ status: 'initializing api', progress }); res.progress({ status: 'initializing api', progress });
}; };
progressUpdate(0); progressUpdate(0);
base.Init(null, options.lang); base.Init(null, lang);
progressUpdate(0.3); progressUpdate(0.3);
Object.keys(options).forEach((key) => { Object.keys(params).forEach((key) => {
base.SetVariable(key, options[key]); base.SetVariable(key, params[key]);
}); });
progressUpdate(0.6); progressUpdate(0.6);
const ptr = setImage(req.image); const ptr = setImage(image);
progressUpdate(1); progressUpdate(1);
base.Recognize(null); base.Recognize(null);
const result = dump(Module, base); const result = dump(Module, base);
@ -95,12 +95,13 @@ const handleRecognize = (req, res) => (
const handleDetect = (req, res) => ( const handleDetect = (req, res) => (
handleInit(req, res) handleInit(req, res)
.then(() => ( .then(() => (
loadLanguage({ ...req, options: { ...req.options, lang: 'osd' } }) loadLanguage(req)
.then(() => { .then(() => {
base.Init(null, 'osd'); const { image, lang } = req;
base.Init(null, lang);
base.SetPageSegMode(Module.PSM_OSD_ONLY); base.SetPageSegMode(Module.PSM_OSD_ONLY);
const ptr = setImage(req.image); const ptr = setImage(image);
const results = new Module.OSResults(); const results = new Module.OSResults();
if (!base.DetectOS(results)) { if (!base.DetectOS(results)) {

11
src/index.js

@ -1,7 +1,18 @@
/**
*
* Entry point for tesseract.js, should be the entry when bundling.
*
* @fileoverview entry point for tesseract.js
* @author Kevin Kwok <antimatter15@gmail.com>
* @author Guillermo Webster <gui@mit.edu>
* @author Jerome Wu <jeromewus@gmail.com>
*/
const utils = require('tesseract.js-utils'); const utils = require('tesseract.js-utils');
const TesseractWorker = require('./common/TesseractWorker'); const TesseractWorker = require('./common/TesseractWorker');
module.exports = { module.exports = {
/** Worker for OCR, @see common/TesseractWorker.js */
TesseractWorker, TesseractWorker,
/** Utilities for tesseract.js, @see {@link https://www.npmjs.com/package/tesseract.js-utils} */
utils, utils,
}; };

Loading…
Cancel
Save