Browse Source

Upgrade to latest tesseract.js-utils

feature/aio
Jerome Wu 6 years ago
parent
commit
de4b98ae23
  1. 6
      package-lock.json
  2. 4
      package.json
  3. 1
      scripts/webpack.config.common.js
  4. 20
      scripts/webpack.config.dev.js
  5. 22
      scripts/webpack.config.prod.js
  6. 10
      src/common/TesseractWorker.js
  7. 36
      src/common/workerUtils.js
  8. 4
      tests/detect.test.js
  9. 16
      tests/recognize.test.js

6
package-lock.json generated

@ -8537,9 +8537,9 @@
"integrity": "sha512-QmNgMA9m5ES5uMTqpOAPysrUA80vUx/6WKQlfkK3zhOeAgqv8DjwwcDv9tQv2TgRzOQ+LFKrJn94Y2rw5b2IGw==" "integrity": "sha512-QmNgMA9m5ES5uMTqpOAPysrUA80vUx/6WKQlfkK3zhOeAgqv8DjwwcDv9tQv2TgRzOQ+LFKrJn94Y2rw5b2IGw=="
}, },
"tesseract.js-utils": { "tesseract.js-utils": {
"version": "1.0.0-beta.6", "version": "1.0.0-beta.7",
"resolved": "https://registry.npmjs.org/tesseract.js-utils/-/tesseract.js-utils-1.0.0-beta.6.tgz", "resolved": "https://registry.npmjs.org/tesseract.js-utils/-/tesseract.js-utils-1.0.0-beta.7.tgz",
"integrity": "sha512-AENYhkqafwysayWmKtyApV0gR4abLJ426plLNHs/++8oHt+ekooyp77ew/q4+QLE7cbUDyxiNGawcraOWE/RuQ==", "integrity": "sha512-MvBQNxVoueDg/8iN8jb26Tnj0lBRfmHVMcnaNdab+JeQKg/SuNXWMCpTD4D17+S6zeE9AAlVQymDmXH2/3vaMg==",
"requires": { "requires": {
"axios": "^0.18.0", "axios": "^0.18.0",
"bmp-js": "^0.1.0", "bmp-js": "^0.1.0",

4
package.json

@ -5,7 +5,7 @@
"main": "src/index.js", "main": "src/index.js",
"scripts": { "scripts": {
"start": "node scripts/server.js", "start": "node scripts/server.js",
"build": "webpack --config scripts/webpack.config.prod.js", "build": "webpack --progress --config scripts/webpack.config.prod.js",
"prepublishOnly": "npm run build", "prepublishOnly": "npm run build",
"wait": "wait-on http://localhost:3000/package.json", "wait": "wait-on http://localhost:3000/package.json",
"test": "npm-run-all -p -r start test:all", "test": "npm-run-all -p -r start test:all",
@ -54,7 +54,7 @@
"node-fetch": "^2.3.0", "node-fetch": "^2.3.0",
"resolve-url": "^0.2.1", "resolve-url": "^0.2.1",
"tesseract.js-core": "^2.0.0-beta.10", "tesseract.js-core": "^2.0.0-beta.10",
"tesseract.js-utils": "^1.0.0-beta.6" "tesseract.js-utils": "^1.0.0-beta.7"
}, },
"repository": { "repository": {
"type": "git", "type": "git",

1
scripts/webpack.config.common.js

@ -4,6 +4,7 @@ module.exports = {
{ {
test: /\.m?js$/, test: /\.m?js$/,
// exclude: /(node_modules|bower_components)/, // exclude: /(node_modules|bower_components)/,
exclude: /(tesseract.js-core)/,
use: { use: {
loader: 'babel-loader', loader: 'babel-loader',
options: { options: {

20
scripts/webpack.config.dev.js

@ -3,9 +3,10 @@ const webpack = require('webpack');
const common = require('./webpack.config.common'); const common = require('./webpack.config.common');
const genConfig = ({ const genConfig = ({
entry, filename, library, libraryTarget, entry, filename, library, libraryTarget, ...config
}) => ({ }) => ({
...common, ...common,
...config,
mode: 'development', mode: 'development',
entry, entry,
output: { output: {
@ -29,10 +30,15 @@ module.exports = [
library: 'Tesseract', library: 'Tesseract',
libraryTarget: 'umd', libraryTarget: 'umd',
}), }),
/* genConfig({
*genConfig({ entry: path.resolve(__dirname, '..', 'src', 'index.js'),
* entry: path.resolve(__dirname, '..', 'src', 'browser', 'worker.js'), filename: 'tesseract.asm.dev.js',
* filename: 'worker.dev.js', library: 'Tesseract',
*}), libraryTarget: 'umd',
*/ resolve: {
alias: {
'tesseract.js-core/tesseract-core.wasm.js': 'tesseract.js-core/tesseract-core.asm.js',
},
},
}),
]; ];

22
scripts/webpack.config.prod.js

@ -2,11 +2,12 @@ const path = require('path');
const common = require('./webpack.config.common'); const common = require('./webpack.config.common');
const genConfig = ({ const genConfig = ({
entry, filename, library, libraryTarget, entry, filename, library, libraryTarget, ...config
}) => ({ }) => ({
...common, ...common,
...config,
mode: 'production', mode: 'production',
devtool: 'source-map', // devtool: 'source-map',
entry, entry,
output: { output: {
path: path.resolve(__dirname, '..', 'dist'), path: path.resolve(__dirname, '..', 'dist'),
@ -23,10 +24,15 @@ module.exports = [
library: 'Tesseract', library: 'Tesseract',
libraryTarget: 'umd', libraryTarget: 'umd',
}), }),
/* genConfig({
*genConfig({ entry: path.resolve(__dirname, '..', 'src', 'index.js'),
* entry: path.resolve(__dirname, '..', 'src', 'browser', 'worker.js'), filename: 'tesseract.asm.min.js',
* filename: 'worker.min.js', library: 'Tesseract',
*}), libraryTarget: 'umd',
*/ resolve: {
alias: {
'tesseract.js-core/tesseract-core.wasm.js': 'tesseract.js-core/tesseract-core.asm.js',
},
},
}),
]; ];

10
src/common/TesseractWorker.js

@ -67,12 +67,12 @@ class TesseractWorker {
* @function recognize text in given image * @function recognize text in given image
* @access public * @access public
* @param {Buffer, string} image - image to be recognized * @param {Buffer, string} image - image to be recognized
* @param {string} [lang=eng] - language to recognize * @param {string, array} [langs='eng'] - language to recognize
* @param {object} params - tesseract parameters * @param {object} params - tesseract parameters
* *
*/ */
recognize(image, lang = 'eng', params = {}) { recognize(image, langs = 'eng', params = {}) {
return this._sendJob('recognize', image, lang, params); return this._sendJob('recognize', image, langs, params);
} }
/** /**
@ -152,13 +152,13 @@ class TesseractWorker {
* @param {string} lang language to recognize * @param {string} lang language to recognize
* @param {object} params tesseract parameters * @param {object} params tesseract parameters
*/ */
_sendJob(type, image, lang, params) { _sendJob(type, image, langs, params) {
return this._delay((job) => { return this._delay((job) => {
job.send( job.send(
type, type,
{ {
image, image,
lang, langs,
params, params,
options: this.options, options: this.options,
}, },

36
src/common/workerUtils.js

@ -52,16 +52,24 @@ const setImage = (image) => {
return data === null ? pix : data; return data === null ? pix : data;
}; };
const getLangsStr = (langs) => {
if (typeof langs === 'string') {
return langs;
}
return langs.map(lang => (typeof lang === 'string' ? lang : lang.code)).join('+');
};
/** /**
* handleParams * handleParams
* *
* @name handleParams * @name handleParams
* @function hanlde params from users * @function hanlde params from users
* @access private * @access private
* @param {string} lang - lang string for Init() * @param {string} langs - lang string for Init()
* @param {object} customParams - an object of params * @param {object} customParams - an object of params
*/ */
const handleParams = (lang, customParams) => { const handleParams = (langs, customParams) => {
const { const {
tessedit_ocr_engine_mode, tessedit_ocr_engine_mode,
...params ...params
@ -69,7 +77,7 @@ const handleParams = (lang, customParams) => {
...defaultParams, ...defaultParams,
...customParams, ...customParams,
}; };
api.Init(null, lang, tessedit_ocr_engine_mode); api.Init(null, getLangsStr(langs), tessedit_ocr_engine_mode);
Object.keys(params).forEach((key) => { Object.keys(params).forEach((key) => {
api.SetVariable(key, params[key]); api.SetVariable(key, params[key]);
}); });
@ -158,14 +166,14 @@ const handleInit = ({ corePath }, res) => {
* @function load language from remote or local cache * @function load language from remote or local cache
* @access public * @access public
* @param {object} req - job payload * @param {object} req - job payload
* @param {string} req.lang - languages to load, ex: eng, eng+chi_tra * @param {string} req.langs - languages to load, ex: eng, eng+chi_tra
* @param {object} req.options - other options for loadLang function * @param {object} req.options - other options for loadLang function
* @param {object} res - job instance * @param {object} res - job instance
* @returns {Promise} A Promise for callback * @returns {Promise} A Promise for callback
*/ */
const loadLanguage = ({ lang, options }, res) => { const loadLanguage = ({ langs, options }, res) => {
res.progress({ status: 'loading language traineddata', progress: 0 }); res.progress({ status: 'loading language traineddata', progress: 0 });
return loadLang({ lang, TessModule, ...options }).then((...args) => { return loadLang({ langs, TessModule, ...options }).then((...args) => {
res.progress({ status: 'loaded language traineddata', progress: 1 }); res.progress({ status: 'loaded language traineddata', progress: 1 });
return args; return args;
}); });
@ -179,17 +187,17 @@ const loadLanguage = ({ lang, options }, res) => {
* @access public * @access public
* @param {object} req - job payload * @param {object} req - job payload
* @param {array} req.image - binary image in array format * @param {array} req.image - binary image in array format
* @param {string} req.lang - languages to load, ex: eng, eng+chi_tra * @param {string} req.langs - languages to load, ex: eng, eng+chi_tra
* @param {object} req.options - other options for loadLang function * @param {object} req.options - other options for loadLang function
* @param {object} req.params - parameters for tesseract * @param {object} req.params - parameters for tesseract
* @param {object} res - job instance * @param {object} res - job instance
*/ */
const handleRecognize = ({ const handleRecognize = ({
image, lang, options, params, image, langs, options, params,
}, res) => ( }, res) => (
handleInit(options, res) handleInit(options, res)
.then(() => ( .then(() => (
loadLanguage({ lang, options }, res) loadLanguage({ langs, options }, res)
.catch((e) => { .catch((e) => {
if (e instanceof DOMException) { if (e instanceof DOMException) {
/* /*
@ -206,7 +214,7 @@ const handleRecognize = ({
res.progress({ status: 'initializing api', progress }); res.progress({ status: 'initializing api', progress });
}; };
progressUpdate(0); progressUpdate(0);
handleParams(lang, params); handleParams(langs, params);
progressUpdate(0.5); progressUpdate(0.5);
const ptr = setImage(image); const ptr = setImage(image);
progressUpdate(1); progressUpdate(1);
@ -228,18 +236,18 @@ const handleRecognize = ({
* @access public * @access public
* @param {object} req - job payload * @param {object} req - job payload
* @param {array} req.image - binary image in array format * @param {array} req.image - binary image in array format
* @param {string} req.lang - languages to load, ex: eng, eng+chi_tra * @param {string} req.langs - languages to load, ex: eng, eng+chi_tra
* @param {object} req.options - other options for loadLang function * @param {object} req.options - other options for loadLang function
* @param {object} res - job instance * @param {object} res - job instance
*/ */
const handleDetect = ({ const handleDetect = ({
image, lang, options, image, langs, options,
}, res) => ( }, res) => (
handleInit(options, res) handleInit(options, res)
.then(() => ( .then(() => (
loadLanguage({ lang, options }, res) loadLanguage({ langs, options }, res)
.then(() => { .then(() => {
api.Init(null, lang); api.Init(null, getLangsStr(langs));
api.SetPageSegMode(TessModule.PSM_OSD_ONLY); api.SetPageSegMode(TessModule.PSM_OSD_ONLY);
const ptr = setImage(image); const ptr = setImage(image);

4
tests/detect.test.js

@ -1,5 +1,4 @@
const { TesseractWorker, utils: { loadLang } } = Tesseract; const { TesseractWorker } = Tesseract;
const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined';
const IMAGE_PATH = 'http://localhost:3000/tests/assets/images'; const IMAGE_PATH = 'http://localhost:3000/tests/assets/images';
const loadLangOptions = { const loadLangOptions = {
langPath: 'http://localhost:3000/tests/assets/traineddata', langPath: 'http://localhost:3000/tests/assets/traineddata',
@ -9,7 +8,6 @@ const loadLangOptions = {
const getWorker = options => ( const getWorker = options => (
new TesseractWorker({ new TesseractWorker({
cacheMethod: 'readOnly', cacheMethod: 'readOnly',
...(isBrowser ? { workerPath: 'http://localhost:3000/dist/worker.dev.js' } : {}),
...loadLangOptions, ...loadLangOptions,
...options, ...options,
}) })

16
tests/recognize.test.js

@ -1,6 +1,5 @@
const { TesseractWorker, utils: { loadLang } } = Tesseract; const { TesseractWorker } = Tesseract;
const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined';
const IMAGE_PATH = 'http://localhost:3000/tests/assets/images'; const IMAGE_PATH = 'http://localhost:3000/tests/assets/images';
const SIMPLE_TEXT = 'Tesseract.js\n'; const SIMPLE_TEXT = 'Tesseract.js\n';
const COMSIC_TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n'; const COMSIC_TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n';
@ -16,7 +15,6 @@ const loadLangOptions = {
const getWorker = options => ( const getWorker = options => (
new TesseractWorker({ new TesseractWorker({
cacheMethod: 'readOnly', cacheMethod: 'readOnly',
...(isBrowser ? { workerPath: 'http://localhost:3000/dist/worker.dev.js' } : {}),
...loadLangOptions, ...loadLangOptions,
...options, ...options,
}) })
@ -133,7 +131,7 @@ describe('recognize()', () => {
}).timeout(10000) }).timeout(10000)
)); ));
}); });
(isBrowser ? describe : describe.skip)('should read image from video DOM element (browser only)', () => { (isBrowser ? describe : describe.skip)('should read image from video DOM element (browser only)', () => {
FORMATS.forEach(format => ( FORMATS.forEach(format => (
it(`support ${format} format`, (done) => { it(`support ${format} format`, (done) => {
@ -160,22 +158,23 @@ describe('recognize()', () => {
let canvasDOM = null; let canvasDOM = null;
let imageDOM = null; let imageDOM = null;
let idx = 0; let idx = 0;
beforeEach(function cb(done) { beforeEach((done) => {
canvasDOM = document.createElement('canvas'); canvasDOM = document.createElement('canvas');
imageDOM = document.createElement('img'); imageDOM = document.createElement('img');
imageDOM.setAttribute('crossOrigin', 'Anonymous'); imageDOM.setAttribute('crossOrigin', 'Anonymous');
imageDOM.onload = () => { imageDOM.onload = () => {
canvasDOM.getContext('2d').drawImage(imageDOM, 0, 0); canvasDOM.getContext('2d').drawImage(imageDOM, 0, 0);
done(); done();
} };
imageDOM.setAttribute('src', `${IMAGE_PATH}/simple.${formats[idx++]}`); imageDOM.setAttribute('src', `${IMAGE_PATH}/simple.${formats[idx]}`);
idx += 1;
}); });
afterEach(() => { afterEach(() => {
canvasDOM.remove(); canvasDOM.remove();
imageDOM.remove(); imageDOM.remove();
}); });
formats.forEach(format => ( formats.forEach(format => (
it(`support ${format} format`, (done) => { it(`support ${format} format`, (done) => {
const worker = getWorker(); const worker = getWorker();
@ -189,5 +188,4 @@ describe('recognize()', () => {
}).timeout(10000) }).timeout(10000)
)); ));
}); });
}); });

Loading…
Cancel
Save