Browse Source

Update docs and tesseract.js-utils to latest version

pull/288/head
Jerome Wu 6 years ago
parent
commit
559cabd69c
  1. 15
      docs/examples.md
  2. 31
      package-lock.json
  3. 5
      package.json
  4. 10
      src/common/TesseractWorker.js
  5. 34
      src/common/workerUtils.js
  6. 28
      tests/detect.test.js
  7. 41
      tests/recognize.test.js

15
docs/examples.md

@ -6,6 +6,7 @@ Example repositories: @@ -6,6 +6,7 @@ Example repositories:
- Offline version: https://github.com/jeromewu/tesseract.js-offline
- With Vue (similar with React/Angular): https://github.com/jeromewu/tesseract-vue-app
- Chrome Extension: https://github.com/jeromewu/tesseract.js-chrome-extension
### basic
@ -16,7 +17,7 @@ const { TesseractWorker } = Tesseract; @@ -16,7 +17,7 @@ const { TesseractWorker } = Tesseract;
const worker = new TesseractWorker();
worker
.recognize('http://jeroen.github.io/images/testocr.png')
.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png')
.then((result) => {
console.log(result);
});
@ -31,7 +32,7 @@ const { TesseractWorker } = Tesseract; @@ -31,7 +32,7 @@ const { TesseractWorker } = Tesseract;
const worker = new TesseractWorker();
worker
.recognize('http://jeroen.github.io/images/testocr.png')
.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png')
.progress((p) => {
console.log('progress', p);
})
@ -50,7 +51,7 @@ const worker = new TesseractWorker(); @@ -50,7 +51,7 @@ const worker = new TesseractWorker();
worker
.recognize(
'http://jeroen.github.io/images/testocr.png',
'https://tesseract.projectnaptha.com/img/eng_bw.png',
'eng+chi_tra'
)
.progress((p) => {
@ -73,7 +74,7 @@ const worker = new TesseractWorker(); @@ -73,7 +74,7 @@ const worker = new TesseractWorker();
worker
.recognize(
'http://jeroen.github.io/images/testocr.png',
'https://tesseract.projectnaptha.com/img/eng_bw.png',
'eng',
{
'tessedit_ocr_engine_mode': OEM.TESSERACT_ONLY,
@ -100,7 +101,7 @@ const worker = new TesseractWorker(); @@ -100,7 +101,7 @@ const worker = new TesseractWorker();
worker
.recognize(
'http://jeroen.github.io/images/testocr.png',
'https://tesseract.projectnaptha.com/img/eng_bw.png',
'eng',
{
'tessedit_pageseg_mode': PSM.SINGLE_BLOCK,
@ -126,7 +127,7 @@ const worker = new TesseractWorker(); @@ -126,7 +127,7 @@ const worker = new TesseractWorker();
worker
.recognize(
'http://jeroen.github.io/images/testocr.png',
'https://tesseract.projectnaptha.com/img/eng_bw.png',
'eng',
{
'tessedit_create_pdf': '1',
@ -150,7 +151,7 @@ const worker = new TesseractWorker(); @@ -150,7 +151,7 @@ const worker = new TesseractWorker();
worker
.recognize(
'http://jeroen.github.io/images/testocr.png',
'https://tesseract.projectnaptha.com/img/eng_bw.png',
'eng',
{
'tessedit_create_pdf': '1',

31
package-lock.json generated

@ -7833,12 +7833,28 @@ @@ -7833,12 +7833,28 @@
"dev": true
},
"rimraf": {
"version": "2.6.2",
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.2.tgz",
"integrity": "sha512-lreewLK/BlghmxtfH36YYVg1i8IAce4TI7oao75I1g245+6BctqTVQiBP3YUJ9C6DQOXJmkYR9X9fCLtCOJc5w==",
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.6.3.tgz",
"integrity": "sha512-mwqeW5XsA2qAejG46gYdENaxXjx9onRNCfn7L0duuP4hCuTIi/QO7PDK07KJfp1d+izWPrzEJDcSqBa0OZQriA==",
"dev": true,
"requires": {
"glob": "^7.0.5"
"glob": "^7.1.3"
},
"dependencies": {
"glob": {
"version": "7.1.4",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.1.4.tgz",
"integrity": "sha512-hkLPepehmnKk41pUGm3sYxoFs/umurYfYJCerbXEyFIWcAzvpipAgVkBqqT9RBKMGjnq6kMuyYwha6csxbiM1A==",
"dev": true,
"requires": {
"fs.realpath": "^1.0.0",
"inflight": "^1.0.4",
"inherits": "2",
"minimatch": "^3.0.4",
"once": "^1.3.0",
"path-is-absolute": "^1.0.0"
}
}
}
},
"ripemd160": {
@ -8537,16 +8553,15 @@ @@ -8537,16 +8553,15 @@
"integrity": "sha512-QmNgMA9m5ES5uMTqpOAPysrUA80vUx/6WKQlfkK3zhOeAgqv8DjwwcDv9tQv2TgRzOQ+LFKrJn94Y2rw5b2IGw=="
},
"tesseract.js-utils": {
"version": "1.0.0-beta.6",
"resolved": "https://registry.npmjs.org/tesseract.js-utils/-/tesseract.js-utils-1.0.0-beta.6.tgz",
"integrity": "sha512-AENYhkqafwysayWmKtyApV0gR4abLJ426plLNHs/++8oHt+ekooyp77ew/q4+QLE7cbUDyxiNGawcraOWE/RuQ==",
"version": "1.0.0-beta.8",
"resolved": "https://registry.npmjs.org/tesseract.js-utils/-/tesseract.js-utils-1.0.0-beta.8.tgz",
"integrity": "sha512-qjHBfWfzo2o1ZY9XI0Wh2hmpp38+mIgCMOk60W5Yyie/pBl421VLBKOZUEwQgpbLnOJ24VU6Q8yXsVgtFFHcFg==",
"requires": {
"axios": "^0.18.0",
"bmp-js": "^0.1.0",
"file-type": "^10.5.0",
"idb-keyval": "^3.1.0",
"is-url": "^1.2.4",
"resolve-url": "^0.2.1",
"zlibjs": "^0.3.1"
},
"dependencies": {

5
package.json

@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
"main": "src/index.js",
"scripts": {
"start": "node scripts/server.js",
"build": "webpack --config scripts/webpack.config.prod.js",
"build": "rimraf dist && webpack --config scripts/webpack.config.prod.js",
"prepublishOnly": "npm run build",
"wait": "wait-on http://localhost:3000/package.json",
"test": "npm-run-all -p -r start test:all",
@ -41,6 +41,7 @@ @@ -41,6 +41,7 @@
"mocha-headless-chrome": "^2.0.2",
"npm-run-all": "^4.1.5",
"nyc": "^13.1.0",
"rimraf": "^2.6.3",
"wait-on": "^3.2.0",
"webpack": "^4.26.0",
"webpack-cli": "^3.1.2",
@ -53,7 +54,7 @@ @@ -53,7 +54,7 @@
"node-fetch": "^2.3.0",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^2.0.0-beta.10",
"tesseract.js-utils": "^1.0.0-beta.6"
"tesseract.js-utils": "^1.0.0-beta.8"
},
"repository": {
"type": "git",

10
src/common/TesseractWorker.js

@ -67,12 +67,12 @@ class TesseractWorker { @@ -67,12 +67,12 @@ class TesseractWorker {
* @function recognize text in given image
* @access public
* @param {Buffer, string} image - image to be recognized
* @param {string} [lang=eng] - language to recognize
* @param {string, array} [langs=eng] - languages to recognize
* @param {object} params - tesseract parameters
*
*/
recognize(image, lang = 'eng', params = {}) {
return this._sendJob('recognize', image, lang, params);
recognize(image, langs = 'eng', params = {}) {
return this._sendJob('recognize', image, langs, params);
}
/**
@ -152,13 +152,13 @@ class TesseractWorker { @@ -152,13 +152,13 @@ class TesseractWorker {
* @param {string} lang language to recognize
* @param {object} params tesseract parameters
*/
_sendJob(type, image, lang, params) {
_sendJob(type, image, langs, params) {
return this._delay((job) => {
job.send(
type,
{
image,
lang,
langs,
params,
options: this.options,
},

34
src/common/workerUtils.js

@ -52,16 +52,22 @@ const setImage = (image) => { @@ -52,16 +52,22 @@ const setImage = (image) => {
return data === null ? pix : data;
};
const getLangsStr = langs => (
typeof langs === 'string'
? langs
: langs.map(lang => (typeof lang === 'string' ? lang : lang.data)).join('+')
);
/**
* handleParams
*
* @name handleParams
* @function hanlde params from users
* @access private
* @param {string} lang - lang string for Init()
* @param {string} langs - lang string for Init()
* @param {object} customParams - an object of params
*/
const handleParams = (lang, customParams) => {
const handleParams = (langs, customParams) => {
const {
tessedit_ocr_engine_mode,
...params
@ -69,7 +75,7 @@ const handleParams = (lang, customParams) => { @@ -69,7 +75,7 @@ const handleParams = (lang, customParams) => {
...defaultParams,
...customParams,
};
api.Init(null, lang, tessedit_ocr_engine_mode);
api.Init(null, getLangsStr(langs), tessedit_ocr_engine_mode);
Object.keys(params).forEach((key) => {
api.SetVariable(key, params[key]);
});
@ -158,14 +164,14 @@ const handleInit = ({ corePath }, res) => { @@ -158,14 +164,14 @@ const handleInit = ({ corePath }, res) => {
* @function load language from remote or local cache
* @access public
* @param {object} req - job payload
* @param {string} req.lang - languages to load, ex: eng, eng+chi_tra
* @param {string} req.langs - languages to load, ex: eng, eng+chi_tra
* @param {object} req.options - other options for loadLang function
* @param {object} res - job instance
* @returns {Promise} A Promise for callback
*/
const loadLanguage = ({ lang, options }, res) => {
const loadLanguage = ({ langs, options }, res) => {
res.progress({ status: 'loading language traineddata', progress: 0 });
return loadLang({ lang, TessModule, ...options }).then((...args) => {
return loadLang({ langs, TessModule, ...options }).then((...args) => {
res.progress({ status: 'loaded language traineddata', progress: 1 });
return args;
});
@ -179,17 +185,17 @@ const loadLanguage = ({ lang, options }, res) => { @@ -179,17 +185,17 @@ const loadLanguage = ({ lang, options }, res) => {
* @access public
* @param {object} req - job payload
* @param {array} req.image - binary image in array format
* @param {string} req.lang - languages to load, ex: eng, eng+chi_tra
* @param {string} req.langs - languages to load, ex: eng, eng+chi_tra
* @param {object} req.options - other options for loadLang function
* @param {object} req.params - parameters for tesseract
* @param {object} res - job instance
*/
const handleRecognize = ({
image, lang, options, params,
image, langs, options, params,
}, res) => (
handleInit(options, res)
.then(() => (
loadLanguage({ lang, options }, res)
loadLanguage({ langs, options }, res)
.catch((e) => {
if (e instanceof DOMException) {
/*
@ -206,7 +212,7 @@ const handleRecognize = ({ @@ -206,7 +212,7 @@ const handleRecognize = ({
res.progress({ status: 'initializing api', progress });
};
progressUpdate(0);
handleParams(lang, params);
handleParams(langs, params);
progressUpdate(0.5);
const ptr = setImage(image);
progressUpdate(1);
@ -228,18 +234,18 @@ const handleRecognize = ({ @@ -228,18 +234,18 @@ const handleRecognize = ({
* @access public
* @param {object} req - job payload
* @param {array} req.image - binary image in array format
* @param {string} req.lang - languages to load, ex: eng, eng+chi_tra
* @param {string} req.langs - languages to load, ex: eng, eng+chi_tra
* @param {object} req.options - other options for loadLang function
* @param {object} res - job instance
*/
const handleDetect = ({
image, lang, options,
image, langs, options,
}, res) => (
handleInit(options, res)
.then(() => (
loadLanguage({ lang, options }, res)
loadLanguage({ langs, options }, res)
.then(() => {
api.Init(null, lang);
api.Init(null, getLangsStr(langs));
api.SetPageSegMode(TessModule.PSM_OSD_ONLY);
const ptr = setImage(image);

28
tests/detect.test.js

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
const { TesseractWorker, utils: { loadLang } } = Tesseract;
const { TesseractWorker } = Tesseract;
const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined';
const IMAGE_PATH = 'http://localhost:3000/tests/assets/images';
const loadLangOptions = {
@ -15,32 +15,6 @@ const getWorker = options => ( @@ -15,32 +15,6 @@ const getWorker = options => (
})
);
before(function cb(done) {
this.timeout(30000);
const load = () => (
loadLang({
lang: 'osd',
cacheMethod: 'write',
...loadLangOptions,
}).then(() => {
done();
})
);
if (typeof startServer !== 'undefined') {
startServer(load);
} else {
load();
}
});
after((done) => {
if (typeof stopServer !== 'undefined') {
stopServer(done);
} else {
done();
}
});
describe('detect()', () => {
it('should detect OSD', (done) => {
[

41
tests/recognize.test.js

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
const { TesseractWorker, utils: { loadLang } } = Tesseract;
const { TesseractWorker } = Tesseract;
const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined';
const IMAGE_PATH = 'http://localhost:3000/tests/assets/images';
@ -22,34 +22,7 @@ const getWorker = options => ( @@ -22,34 +22,7 @@ const getWorker = options => (
})
);
before(function cb(done) {
this.timeout(30000);
const load = () => (
loadLang({
lang: 'eng+chi_tra',
cacheMethod: 'write',
...loadLangOptions,
}).then(() => {
done();
})
);
if (typeof startServer !== 'undefined') {
startServer(load);
} else {
load();
}
});
after((done) => {
if (typeof stopServer !== 'undefined') {
stopServer(done);
} else {
done();
}
});
describe('recognize()',() => {
describe('recognize()', () => {
describe('should recognize different langs', () => {
[
{ name: 'chinese.png', lang: 'chi_tra', ans: CHINESE_TEXT },
@ -187,22 +160,23 @@ describe('recognize()',() => { @@ -187,22 +160,23 @@ describe('recognize()',() => {
let canvasDOM = null;
let imageDOM = null;
let idx = 0;
beforeEach(function cb(done) {
beforeEach((done) => {
canvasDOM = document.createElement('canvas');
imageDOM = document.createElement('img');
imageDOM.setAttribute('crossOrigin', 'Anonymous');
imageDOM.onload = () => {
canvasDOM.getContext('2d').drawImage(imageDOM, 0, 0);
done();
}
imageDOM.setAttribute('src', `${IMAGE_PATH}/simple.${formats[idx++]}`);
};
imageDOM.setAttribute('src', `${IMAGE_PATH}/simple.${formats[idx]}`);
idx += 1;
});
afterEach(() => {
canvasDOM.remove();
imageDOM.remove();
});
formats.forEach(format => (
it(`support ${format} format`, (done) => {
const worker = getWorker();
@ -216,5 +190,4 @@ describe('recognize()',() => { @@ -216,5 +190,4 @@ describe('recognize()',() => {
}).timeout(10000)
));
});
});

Loading…
Cancel
Save