diff --git a/docs/examples.md b/docs/examples.md index ac4924e..d476d9e 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -6,6 +6,7 @@ Example repositories: - Offline version: https://github.com/jeromewu/tesseract.js-offline - With Vue (similar with React/Angular): https://github.com/jeromewu/tesseract-vue-app +- Chrome Extension: https://github.com/jeromewu/tesseract.js-chrome-extension ### basic @@ -16,7 +17,7 @@ const { TesseractWorker } = Tesseract; const worker = new TesseractWorker(); worker - .recognize('http://jeroen.github.io/images/testocr.png') + .recognize('https://tesseract.projectnaptha.com/img/eng_bw.png') .then((result) => { console.log(result); }); @@ -31,7 +32,7 @@ const { TesseractWorker } = Tesseract; const worker = new TesseractWorker(); worker - .recognize('http://jeroen.github.io/images/testocr.png') + .recognize('https://tesseract.projectnaptha.com/img/eng_bw.png') .progress((p) => { console.log('progress', p); }) @@ -50,7 +51,7 @@ const worker = new TesseractWorker(); worker .recognize( - 'http://jeroen.github.io/images/testocr.png', + 'https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng+chi_tra' ) .progress((p) => { @@ -73,7 +74,7 @@ const worker = new TesseractWorker(); worker .recognize( - 'http://jeroen.github.io/images/testocr.png', + 'https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng', { 'tessedit_ocr_engine_mode': OEM.TESSERACT_ONLY, @@ -100,7 +101,7 @@ const worker = new TesseractWorker(); worker .recognize( - 'http://jeroen.github.io/images/testocr.png', + 'https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng', { 'tessedit_pageseg_mode': PSM.SINGLE_BLOCK, @@ -126,7 +127,7 @@ const worker = new TesseractWorker(); worker .recognize( - 'http://jeroen.github.io/images/testocr.png', + 'https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng', { 'tessedit_create_pdf': '1', @@ -150,7 +151,7 @@ const worker = new TesseractWorker(); worker .recognize( - 'http://jeroen.github.io/images/testocr.png', + 'https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng', { 'tessedit_create_pdf': '1', diff --git a/package-lock.json b/package-lock.json index 1a2a3dd..c041946 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7833,12 +7833,28 @@ "dev": true }, "rimraf": { - "version": "2.6.2", - "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.2.tgz", - "integrity": "sha512-lreewLK/BlghmxtfH36YYVg1i8IAce4TI7oao75I1g245+6BctqTVQiBP3YUJ9C6DQOXJmkYR9X9fCLtCOJc5w==", + "version": "2.6.3", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.3.tgz", + "integrity": "sha512-mwqeW5XsA2qAejG46gYdENaxXjx9onRNCfn7L0duuP4hCuTIi/QO7PDK07KJfp1d+izWPrzEJDcSqBa0OZQriA==", "dev": true, "requires": { - "glob": "^7.0.5" + "glob": "^7.1.3" + }, + "dependencies": { + "glob": { + "version": "7.1.4", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.4.tgz", + "integrity": "sha512-hkLPepehmnKk41pUGm3sYxoFs/umurYfYJCerbXEyFIWcAzvpipAgVkBqqT9RBKMGjnq6kMuyYwha6csxbiM1A==", + "dev": true, + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + } } }, "ripemd160": { @@ -8537,16 +8553,15 @@ "integrity": "sha512-QmNgMA9m5ES5uMTqpOAPysrUA80vUx/6WKQlfkK3zhOeAgqv8DjwwcDv9tQv2TgRzOQ+LFKrJn94Y2rw5b2IGw==" }, "tesseract.js-utils": { - "version": "1.0.0-beta.6", - "resolved": "https://registry.npmjs.org/tesseract.js-utils/-/tesseract.js-utils-1.0.0-beta.6.tgz", - "integrity": "sha512-AENYhkqafwysayWmKtyApV0gR4abLJ426plLNHs/++8oHt+ekooyp77ew/q4+QLE7cbUDyxiNGawcraOWE/RuQ==", + "version": "1.0.0-beta.8", + "resolved": "https://registry.npmjs.org/tesseract.js-utils/-/tesseract.js-utils-1.0.0-beta.8.tgz", + "integrity": "sha512-qjHBfWfzo2o1ZY9XI0Wh2hmpp38+mIgCMOk60W5Yyie/pBl421VLBKOZUEwQgpbLnOJ24VU6Q8yXsVgtFFHcFg==", "requires": { "axios": "^0.18.0", "bmp-js": "^0.1.0", "file-type": "^10.5.0", "idb-keyval": "^3.1.0", "is-url": "^1.2.4", - "resolve-url": "^0.2.1", "zlibjs": "^0.3.1" }, "dependencies": { diff --git a/package.json b/package.json index d90ee44..47cf8a8 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "main": "src/index.js", "scripts": { "start": "node scripts/server.js", - "build": "webpack --config scripts/webpack.config.prod.js", + "build": "rimraf dist && webpack --config scripts/webpack.config.prod.js", "prepublishOnly": "npm run build", "wait": "wait-on http://localhost:3000/package.json", "test": "npm-run-all -p -r start test:all", @@ -41,6 +41,7 @@ "mocha-headless-chrome": "^2.0.2", "npm-run-all": "^4.1.5", "nyc": "^13.1.0", + "rimraf": "^2.6.3", "wait-on": "^3.2.0", "webpack": "^4.26.0", "webpack-cli": "^3.1.2", @@ -53,7 +54,7 @@ "node-fetch": "^2.3.0", "resolve-url": "^0.2.1", "tesseract.js-core": "^2.0.0-beta.10", - "tesseract.js-utils": "^1.0.0-beta.6" + "tesseract.js-utils": "^1.0.0-beta.8" }, "repository": { "type": "git", diff --git a/src/common/TesseractWorker.js b/src/common/TesseractWorker.js index 9c92afc..0a46d51 100644 --- a/src/common/TesseractWorker.js +++ b/src/common/TesseractWorker.js @@ -67,12 +67,12 @@ class TesseractWorker { * @function recognize text in given image * @access public * @param {Buffer, string} image - image to be recognized - * @param {string} [lang=eng] - language to recognize + * @param {string, array} [langs=eng] - languages to recognize * @param {object} params - tesseract parameters * */ - recognize(image, lang = 'eng', params = {}) { - return this._sendJob('recognize', image, lang, params); + recognize(image, langs = 'eng', params = {}) { + return this._sendJob('recognize', image, langs, params); } /** @@ -152,13 +152,13 @@ class TesseractWorker { * @param {string} lang language to recognize * @param {object} params tesseract parameters */ - _sendJob(type, image, lang, params) { + _sendJob(type, image, langs, params) { return this._delay((job) => { job.send( type, { image, - lang, + langs, params, options: this.options, }, diff --git a/src/common/workerUtils.js b/src/common/workerUtils.js index 93f9bce..43aeb74 100644 --- a/src/common/workerUtils.js +++ b/src/common/workerUtils.js @@ -52,16 +52,22 @@ const setImage = (image) => { return data === null ? pix : data; }; +const getLangsStr = langs => ( + typeof langs === 'string' + ? langs + : langs.map(lang => (typeof lang === 'string' ? lang : lang.data)).join('+') +); + /** * handleParams * * @name handleParams * @function hanlde params from users * @access private - * @param {string} lang - lang string for Init() + * @param {string} langs - lang string for Init() * @param {object} customParams - an object of params */ -const handleParams = (lang, customParams) => { +const handleParams = (langs, customParams) => { const { tessedit_ocr_engine_mode, ...params @@ -69,7 +75,7 @@ const handleParams = (lang, customParams) => { ...defaultParams, ...customParams, }; - api.Init(null, lang, tessedit_ocr_engine_mode); + api.Init(null, getLangsStr(langs), tessedit_ocr_engine_mode); Object.keys(params).forEach((key) => { api.SetVariable(key, params[key]); }); @@ -158,14 +164,14 @@ const handleInit = ({ corePath }, res) => { * @function load language from remote or local cache * @access public * @param {object} req - job payload - * @param {string} req.lang - languages to load, ex: eng, eng+chi_tra + * @param {string} req.langs - languages to load, ex: eng, eng+chi_tra * @param {object} req.options - other options for loadLang function * @param {object} res - job instance * @returns {Promise} A Promise for callback */ -const loadLanguage = ({ lang, options }, res) => { +const loadLanguage = ({ langs, options }, res) => { res.progress({ status: 'loading language traineddata', progress: 0 }); - return loadLang({ lang, TessModule, ...options }).then((...args) => { + return loadLang({ langs, TessModule, ...options }).then((...args) => { res.progress({ status: 'loaded language traineddata', progress: 1 }); return args; }); @@ -179,17 +185,17 @@ const loadLanguage = ({ lang, options }, res) => { * @access public * @param {object} req - job payload * @param {array} req.image - binary image in array format - * @param {string} req.lang - languages to load, ex: eng, eng+chi_tra + * @param {string} req.langs - languages to load, ex: eng, eng+chi_tra * @param {object} req.options - other options for loadLang function * @param {object} req.params - parameters for tesseract * @param {object} res - job instance */ const handleRecognize = ({ - image, lang, options, params, + image, langs, options, params, }, res) => ( handleInit(options, res) .then(() => ( - loadLanguage({ lang, options }, res) + loadLanguage({ langs, options }, res) .catch((e) => { if (e instanceof DOMException) { /* @@ -206,7 +212,7 @@ const handleRecognize = ({ res.progress({ status: 'initializing api', progress }); }; progressUpdate(0); - handleParams(lang, params); + handleParams(langs, params); progressUpdate(0.5); const ptr = setImage(image); progressUpdate(1); @@ -228,18 +234,18 @@ const handleRecognize = ({ * @access public * @param {object} req - job payload * @param {array} req.image - binary image in array format - * @param {string} req.lang - languages to load, ex: eng, eng+chi_tra + * @param {string} req.langs - languages to load, ex: eng, eng+chi_tra * @param {object} req.options - other options for loadLang function * @param {object} res - job instance */ const handleDetect = ({ - image, lang, options, + image, langs, options, }, res) => ( handleInit(options, res) .then(() => ( - loadLanguage({ lang, options }, res) + loadLanguage({ langs, options }, res) .then(() => { - api.Init(null, lang); + api.Init(null, getLangsStr(langs)); api.SetPageSegMode(TessModule.PSM_OSD_ONLY); const ptr = setImage(image); diff --git a/tests/detect.test.js b/tests/detect.test.js index 4d49d86..6bf5095 100644 --- a/tests/detect.test.js +++ b/tests/detect.test.js @@ -1,4 +1,4 @@ -const { TesseractWorker, utils: { loadLang } } = Tesseract; +const { TesseractWorker } = Tesseract; const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined'; const IMAGE_PATH = 'http://localhost:3000/tests/assets/images'; const loadLangOptions = { @@ -15,32 +15,6 @@ const getWorker = options => ( }) ); -before(function cb(done) { - this.timeout(30000); - const load = () => ( - loadLang({ - lang: 'osd', - cacheMethod: 'write', - ...loadLangOptions, - }).then(() => { - done(); - }) - ); - if (typeof startServer !== 'undefined') { - startServer(load); - } else { - load(); - } -}); - -after((done) => { - if (typeof stopServer !== 'undefined') { - stopServer(done); - } else { - done(); - } -}); - describe('detect()', () => { it('should detect OSD', (done) => { [ diff --git a/tests/recognize.test.js b/tests/recognize.test.js index c7559d5..f9271a6 100644 --- a/tests/recognize.test.js +++ b/tests/recognize.test.js @@ -1,4 +1,4 @@ -const { TesseractWorker, utils: { loadLang } } = Tesseract; +const { TesseractWorker } = Tesseract; const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined'; const IMAGE_PATH = 'http://localhost:3000/tests/assets/images'; @@ -22,34 +22,7 @@ const getWorker = options => ( }) ); -before(function cb(done) { - this.timeout(30000); - const load = () => ( - loadLang({ - lang: 'eng+chi_tra', - cacheMethod: 'write', - ...loadLangOptions, - }).then(() => { - done(); - }) - ); - if (typeof startServer !== 'undefined') { - startServer(load); - } else { - load(); - } -}); - -after((done) => { - if (typeof stopServer !== 'undefined') { - stopServer(done); - } else { - done(); - } -}); - -describe('recognize()',() => { - +describe('recognize()', () => { describe('should recognize different langs', () => { [ { name: 'chinese.png', lang: 'chi_tra', ans: CHINESE_TEXT }, @@ -187,22 +160,23 @@ describe('recognize()',() => { let canvasDOM = null; let imageDOM = null; let idx = 0; - beforeEach(function cb(done) { + beforeEach((done) => { canvasDOM = document.createElement('canvas'); imageDOM = document.createElement('img'); imageDOM.setAttribute('crossOrigin', 'Anonymous'); imageDOM.onload = () => { canvasDOM.getContext('2d').drawImage(imageDOM, 0, 0); done(); - } - imageDOM.setAttribute('src', `${IMAGE_PATH}/simple.${formats[idx++]}`); + }; + imageDOM.setAttribute('src', `${IMAGE_PATH}/simple.${formats[idx]}`); + idx += 1; }); afterEach(() => { canvasDOM.remove(); imageDOM.remove(); }); - + formats.forEach(format => ( it(`support ${format} format`, (done) => { const worker = getWorker(); @@ -216,5 +190,4 @@ describe('recognize()',() => { }).timeout(10000) )); }); - });