From f1c9e2f08caa87b456073696a2207443d66b2801 Mon Sep 17 00:00:00 2001 From: Jerome Wu Date: Tue, 14 May 2019 18:28:33 +0800 Subject: [PATCH] Add init_oem parameters in recognize() for switching modes --- src/common/types.js | 17 +++++++++++++++++ src/common/workerUtils.js | 8 ++++++-- src/index.js | 3 +++ 3 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 src/common/types.js diff --git a/src/common/types.js b/src/common/types.js new file mode 100644 index 0000000..faadd9f --- /dev/null +++ b/src/common/types.js @@ -0,0 +1,17 @@ +module.exports = { + /* + * OEM = OCR Engine Mode, and there are 5 possible modes. + * + * By default tesseract.js uses DEFAULT mode, which uses LSTM when possible. + * If you need to use some tesseract v3 features (like tessedit_chars_whitelist), + * you need to use TESSERACT_ONLY mode. + * + */ + OEM: { + TESSERACT_ONLY: 0, + LSTM_ONLY: 1, + TESSERACT_LSTM_COMBINED: 2, + DEFAULT: 3, + COUNT: 4, + }, +}; diff --git a/src/common/workerUtils.js b/src/common/workerUtils.js index caea2a3..0f3905e 100644 --- a/src/common/workerUtils.js +++ b/src/common/workerUtils.js @@ -8,6 +8,7 @@ * @author Jerome Wu */ const { readImage, loadLang } = require('tesseract.js-utils'); +const check = require('check-types'); const dump = require('./dump'); /* @@ -122,13 +123,16 @@ const handleRecognize = ({ .then(() => ( loadLanguage({ lang, options }, res) .then(() => { + const OEM = check.undefined(params['init_oem']) + ? TessModule.OEM_DEFAULT + : params['init_oem']; const progressUpdate = (progress) => { res.progress({ status: 'initializing api', progress }); }; progressUpdate(0); - api.Init(null, lang); + api.Init(null, lang, OEM); progressUpdate(0.3); - Object.keys(params).forEach((key) => { + Object.keys(params).filter(key => !key.startsWith('init_')).forEach((key) => { api.SetVariable(key, params[key]); }); progressUpdate(0.6); diff --git a/src/index.js b/src/index.js index 71bc77e..ab9a505 100644 --- a/src/index.js +++ b/src/index.js @@ -9,10 +9,13 @@ */ const utils = require('tesseract.js-utils'); const TesseractWorker = require('./common/TesseractWorker'); +const { OEM } = require('./common/types'); module.exports = { /** Worker for OCR, @see common/TesseractWorker.js */ TesseractWorker, /** Utilities for tesseract.js, @see {@link https://www.npmjs.com/package/tesseract.js-utils} */ utils, + /** Check ./common/types for more details */ + OEM, };