Browse Source

Add Chinese test case

pull/265/head
Jerome Wu 6 years ago
parent
commit
a45e35467e
  1. 2
      package.json
  2. 9
      src/common/worker.js
  3. BIN
      tests/assets/images/chinese.png
  4. BIN
      tests/assets/traineddata/chi_tra.traineddata
  5. BIN
      tests/assets/traineddata/chi_tra.traineddata.gz
  6. 55
      tests/recognize.test.js

2
package.json

@ -7,7 +7,7 @@
"start": "node scripts/server.js", "start": "node scripts/server.js",
"build": "webpack --config scripts/webpack.config.prod.js", "build": "webpack --config scripts/webpack.config.prod.js",
"prepublishOnly": "npm run build", "prepublishOnly": "npm run build",
"test:node": "nyc mocha --exit --require ./scripts/test-helper.js ./tests/*.test.js", "test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js ./tests/*.test.js",
"lint": "eslint src" "lint": "eslint src"
}, },
"browser": { "browser": {

9
src/common/worker.js

@ -21,19 +21,12 @@ const setImage = (image) => {
}; };
const handleInit = (req, res) => { const handleInit = (req, res) => {
let MIN_MEMORY = 100663296; if (!Module) {
if (['chi_sim', 'chi_tra', 'jpn'].includes(req.lang)) {
MIN_MEMORY = 167772160;
}
if (!Module || Module.TOTAL_MEMORY < MIN_MEMORY) {
const Core = adapter.getCore(req, res); const Core = adapter.getCore(req, res);
res.progress({ status: 'initializing tesseract', progress: 0 }); res.progress({ status: 'initializing tesseract', progress: 0 });
return Core({ return Core({
// TOTAL_MEMORY: MIN_MEMORY,
TesseractProgress(percent) { TesseractProgress(percent) {
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent - 30) / 70) }); latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent - 30) / 70) });
}, },

BIN
tests/assets/images/chinese.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

BIN
tests/assets/traineddata/chi_tra.traineddata

Binary file not shown.

BIN
tests/assets/traineddata/chi_tra.traineddata.gz

Binary file not shown.

55
tests/recognize.test.js

@ -4,6 +4,7 @@ const IMAGE_PATH = 'http://localhost:3000/tests/assets/images';
const SIMPLE_TEXT = 'Tesseract.js\n'; const SIMPLE_TEXT = 'Tesseract.js\n';
const COMSIC_TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n'; const COMSIC_TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n';
const TESTOCR_TEXT = 'This is a lot of 12 point text to test the\nocr code and see if it works on all types\nof file format.\n\nThe quick brown dog jumped over the\nlazy fox. The quick brown dog jumped\nover the lazy fox. The quick brown dog\njumped over the lazy fox. The quick\nbrown dog jumped over the lazy fox.\n'; const TESTOCR_TEXT = 'This is a lot of 12 point text to test the\nocr code and see if it works on all types\nof file format.\n\nThe quick brown dog jumped over the\nlazy fox. The quick brown dog jumped\nover the lazy fox. The quick brown dog\njumped over the lazy fox. The quick\nbrown dog jumped over the lazy fox.\n';
const CHINESE_TEXT = '繁 體 中 文 測 試\n';
const loadLangOptions = { const loadLangOptions = {
langPath: 'http://localhost:3000/tests/assets/traineddata', langPath: 'http://localhost:3000/tests/assets/traineddata',
@ -19,19 +20,22 @@ const getWorker = options => (
); );
before(function cb(done) { before(function cb(done) {
this.timeout(10000); this.timeout(30000);
loadLang({ const load = () => (
langs: 'eng', loadLang({
cacheMethod: 'write', langs: 'eng+chi_tra',
langURI: loadLangOptions.langPath, cacheMethod: 'write',
...loadLangOptions, langURI: loadLangOptions.langPath,
}).then(() => { ...loadLangOptions,
if (typeof startServer !== 'undefined') { }).then(() => {
startServer(done);
} else {
done(); done();
} })
}); );
if (typeof startServer !== 'undefined') {
startServer(load);
} else {
load();
}
}); });
after((done) => { after((done) => {
@ -43,14 +47,31 @@ after((done) => {
}); });
describe('recognize()', () => { describe('recognize()', () => {
describe('supports multiple formats', () => { describe('should recognize different langs', () => {
[
{ name: 'chinese.png', lang: 'chi_tra', ans: CHINESE_TEXT },
].forEach(({ name, lang, ans }) => (
it(`recongize ${lang}`, (done) => {
const worker = getWorker();
worker
.recognize(`${IMAGE_PATH}/${name}`, lang)
.then(({ text }) => {
expect(text).to.be(ans);
worker.terminate();
done();
});
}).timeout(30000)
));
});
describe('should read bmp, jpg, png and pbm format images', () => {
['bmp', 'jpg', 'png', 'pbm'].forEach(format => ( ['bmp', 'jpg', 'png', 'pbm'].forEach(format => (
it(`support ${format} format`, (done) => { it(`support ${format} format`, (done) => {
const worker = getWorker(); const worker = getWorker();
worker worker
.recognize(`${IMAGE_PATH}/simple.${format}`) .recognize(`${IMAGE_PATH}/simple.${format}`)
.then((result) => { .then(({ text }) => {
expect(result.text).to.be(SIMPLE_TEXT); expect(text).to.be(SIMPLE_TEXT);
worker.terminate(); worker.terminate();
done(); done();
}); });
@ -58,7 +79,7 @@ describe('recognize()', () => {
)); ));
}); });
describe('1 worker multiple recognition', () => { describe('should be able to recognize multiple images with 1 worker', () => {
[3, 10, 20].forEach(num => ( [3, 10, 20].forEach(num => (
it(`recognize ${num} images with 1 worker`, (done) => { it(`recognize ${num} images with 1 worker`, (done) => {
const worker = getWorker(); const worker = getWorker();
@ -75,7 +96,7 @@ describe('recognize()', () => {
)); ));
}); });
describe('should recognize in order', () => { describe('should recognize multiple images in order', () => {
[1, 2].forEach((num) => { [1, 2].forEach((num) => {
it(`recognize ${num * 2} images with 1 worker in order`, (done) => { it(`recognize ${num * 2} images with 1 worker in order`, (done) => {
const worker = getWorker(); const worker = getWorker();

Loading…
Cancel
Save