diff --git a/src/common/TesseractWorker.js b/src/common/TesseractWorker.js index 063f44f..25db027 100644 --- a/src/common/TesseractWorker.js +++ b/src/common/TesseractWorker.js @@ -1,39 +1,82 @@ +/** + * + * The core part of tesseract.js to execute the OCR jobs. + * + * @fileoverview Worker for OCR jobs + * @author Kevin Kwok + * @author Guillermo Webster + * @author Jerome Wu + */ const check = require('check-types'); const adapter = require('../node'); const circularize = require('./circularize'); const TesseractJob = require('./TesseractJob'); +/** + * TesseractWorker + * @name TesseractWorker + * @function execute TesseractJob with a queue mechanism + * @access public + */ class TesseractWorker { - constructor(workerOptions = {}) { + /** + * constructor + * + * @name constructor + * @function initialize the worker + * @access public + * @param {object} options - worker configurations + * @param {string} options.workerPath - + * A remote path to load worker script. + * In browser-like environment, it is downloaded from a CDN service. + * Please update this option if you self-host the worker script. + * In Node.js environment, this option is not used as the worker script is in local. + * @param {string} options.corePath - + * A remote path to load tesseract.js-core script. + * In browser-like environment, it is downloaded from a CDN service. + * Please update this option if you self-host the core script. + * In Node.js environment, this option is not used as the core script is in local. + * @param {string} options.langPath - + * A remote path to load *.traineddata.gz, it is download from a CDN service. + * Please update this option if you self-host the worker script. + * @param {string} [options.cachePath=.] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js} + * @param {string} [options.cacheMethod=write] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js} + * @param {string} [options.dataPath=.] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js} + * + */ + constructor(options = {}) { this.worker = null; - this.workerOptions = Object.assign({}, adapter.defaultOptions, workerOptions); + this.options = { + ...adapter.defaultOptions, + ...options, + }; this._currentJob = null; this._queue = []; } - recognize(image, options = { lang: 'eng' }) { + recognize(image, lang = 'eng', params = {}) { return this._delay((job) => { job._send( 'recognize', { image, - options: check.string(options) - ? { lang: options || 'eng' } - : options, - workerOptions: this.workerOptions, + lang, + params, + options: this.options, }, ); }); } - detect(image, options = {}) { + detect(image, params = {}) { return this._delay((job) => { job._send( 'detect', { image, - options, - workerOptions: this.workerOptions, + lang: 'osd', + params, + options: this.options, }, ); }); @@ -50,7 +93,7 @@ class TesseractWorker { _delay(fn) { if (check.null(this.worker)) { - this.worker = adapter.spawnWorker(this, this.workerOptions); + this.worker = adapter.spawnWorker(this, this.options); } const job = new TesseractJob(this); diff --git a/src/common/worker.js b/src/common/worker.js index 441c486..50faf0d 100644 --- a/src/common/worker.js +++ b/src/common/worker.js @@ -23,7 +23,7 @@ const setImage = (image) => { const handleInit = (req, res) => { let MIN_MEMORY = 100663296; - if (['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)) { + if (['chi_sim', 'chi_tra', 'jpn'].includes(req.lang)) { MIN_MEMORY = 167772160; } @@ -49,8 +49,8 @@ const handleInit = (req, res) => { }; const loadLanguage = ({ - options: { lang }, - workerOptions: { + lang, + options: { langPath, cachePath, cacheMethod, dataPath, }, }) => ( @@ -69,18 +69,18 @@ const handleRecognize = (req, res) => ( .then(() => ( loadLanguage(req) .then(() => { - const { options } = req; + const { image, lang, params } = req; const progressUpdate = (progress) => { res.progress({ status: 'initializing api', progress }); }; progressUpdate(0); - base.Init(null, options.lang); + base.Init(null, lang); progressUpdate(0.3); - Object.keys(options).forEach((key) => { - base.SetVariable(key, options[key]); + Object.keys(params).forEach((key) => { + base.SetVariable(key, params[key]); }); progressUpdate(0.6); - const ptr = setImage(req.image); + const ptr = setImage(image); progressUpdate(1); base.Recognize(null); const result = dump(Module, base); @@ -95,12 +95,13 @@ const handleRecognize = (req, res) => ( const handleDetect = (req, res) => ( handleInit(req, res) .then(() => ( - loadLanguage({ ...req, options: { ...req.options, lang: 'osd' } }) + loadLanguage(req) .then(() => { - base.Init(null, 'osd'); + const { image, lang } = req; + base.Init(null, lang); base.SetPageSegMode(Module.PSM_OSD_ONLY); - const ptr = setImage(req.image); + const ptr = setImage(image); const results = new Module.OSResults(); if (!base.DetectOS(results)) { diff --git a/src/index.js b/src/index.js index 4db7640..71bc77e 100644 --- a/src/index.js +++ b/src/index.js @@ -1,7 +1,18 @@ +/** + * + * Entry point for tesseract.js, should be the entry when bundling. + * + * @fileoverview entry point for tesseract.js + * @author Kevin Kwok + * @author Guillermo Webster + * @author Jerome Wu + */ const utils = require('tesseract.js-utils'); const TesseractWorker = require('./common/TesseractWorker'); module.exports = { + /** Worker for OCR, @see common/TesseractWorker.js */ TesseractWorker, + /** Utilities for tesseract.js, @see {@link https://www.npmjs.com/package/tesseract.js-utils} */ utils, };