Browse Source

Add partial recongnize unit tests

pull/265/head
Jerome Wu 6 years ago
parent
commit
e0b263b88b
  1. 3
      .eslintrc
  2. 4
      .gitignore
  3. 2
      dist/tesseract.min.js
  4. 2
      dist/tesseract.min.js.map
  5. 4
      dist/worker.min.js
  6. 2
      dist/worker.min.js.map
  7. 1601
      package-lock.json
  8. 6
      package.json
  9. 2
      scripts/dist/tesseract.min.js
  10. 1
      scripts/dist/tesseract.min.js.map
  11. 9
      scripts/dist/worker.min.js
  12. 1
      scripts/dist/worker.min.js.map
  13. 6
      scripts/server.js
  14. 17
      scripts/test-helper.js
  15. 2
      scripts/webpack.config.prod.js
  16. 17
      src/common/worker.js
  17. 2
      src/node/index.js
  18. 5
      tests/.eslintrc
  19. BIN
      tests/assets/images/cosmic.png
  20. BIN
      tests/assets/images/simple.bmp
  21. BIN
      tests/assets/images/simple.jpg
  22. BIN
      tests/assets/images/simple.pbm
  23. BIN
      tests/assets/images/simple.png
  24. BIN
      tests/assets/images/testocr.jpg
  25. BIN
      tests/assets/images/testocr.pbm
  26. BIN
      tests/assets/images/testocr.png
  27. BIN
      tests/assets/testocr.bmp
  28. BIN
      tests/assets/traineddata/eng.traineddata
  29. 100
      tests/recognize.test.js

3
.eslintrc

@ -2,7 +2,8 @@
"extends": "airbnb", "extends": "airbnb",
"env": { "env": {
"browser": true, "browser": true,
"node": true "node": true,
"mocha": true
}, },
"rules": { "rules": {
"no-underscore-dangle": 0, "no-underscore-dangle": 0,

4
.gitignore vendored

@ -3,4 +3,6 @@ node_modules/*
yarn.lock yarn.lock
tesseract.dev.js tesseract.dev.js
worker.dev.js worker.dev.js
*.traineddata /*.traineddata
/examples/**/*.traineddata
.nyc_output

2
dist/tesseract.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/tesseract.min.js.map vendored

File diff suppressed because one or more lines are too long

4
dist/worker.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/worker.min.js.map vendored

File diff suppressed because one or more lines are too long

1601
package-lock.json generated

File diff suppressed because it is too large Load Diff

6
package.json

@ -7,6 +7,7 @@
"start": "node scripts/server.js", "start": "node scripts/server.js",
"build": "webpack --config scripts/webpack.config.prod.js", "build": "webpack --config scripts/webpack.config.prod.js",
"prepublishOnly": "npm run build", "prepublishOnly": "npm run build",
"test:node": "nyc mocha --exit --require ./scripts/test-helper.js ./tests/*.test.js",
"lint": "eslint src" "lint": "eslint src"
}, },
"browser": { "browser": {
@ -23,7 +24,10 @@
"eslint-plugin-import": "^2.14.0", "eslint-plugin-import": "^2.14.0",
"eslint-plugin-jsx-a11y": "^6.1.2", "eslint-plugin-jsx-a11y": "^6.1.2",
"eslint-plugin-react": "^7.11.1", "eslint-plugin-react": "^7.11.1",
"expect.js": "^0.3.1",
"express": "^4.16.4", "express": "^4.16.4",
"mocha": "^5.2.0",
"nyc": "^13.1.0",
"webpack": "^4.26.0", "webpack": "^4.26.0",
"webpack-cli": "^3.1.2", "webpack-cli": "^3.1.2",
"webpack-dev-middleware": "^3.4.0" "webpack-dev-middleware": "^3.4.0"
@ -31,7 +35,7 @@
"dependencies": { "dependencies": {
"check-types": "^7.4.0", "check-types": "^7.4.0",
"is-url": "1.2.2", "is-url": "1.2.2",
"node-fetch": "^1.7.3", "node-fetch": "^2.3.0",
"resolve-url": "^0.2.1", "resolve-url": "^0.2.1",
"tesseract.js-core": "^2.0.0-beta.5", "tesseract.js-core": "^2.0.0-beta.5",
"tesseract.js-utils": "^1.0.0-beta.2" "tesseract.js-utils": "^1.0.0-beta.2"

2
scripts/dist/tesseract.min.js vendored

File diff suppressed because one or more lines are too long

1
scripts/dist/tesseract.min.js.map vendored

File diff suppressed because one or more lines are too long

9
scripts/dist/worker.min.js vendored

File diff suppressed because one or more lines are too long

1
scripts/dist/worker.min.js.map vendored

File diff suppressed because one or more lines are too long

6
scripts/server.js

@ -7,9 +7,9 @@ const webpackConfig = require('./webpack.config.dev');
const compiler = webpack(webpackConfig); const compiler = webpack(webpackConfig);
const app = express(); const app = express();
express.static.mime.types.wasm = 'application/wasm';
app.use('/', express.static(path.resolve(__dirname, '..'))); app.use('/', express.static(path.resolve(__dirname, '..')));
app.use(middleware(compiler, { publicPath: '/dist' })); app.use(middleware(compiler, { publicPath: '/dist' }));
app.listen(3000, () => console.log('Server is running on port 3000')); module.exports = app.listen(3000, () => {
console.log('Server is running on port 3000');
});

17
scripts/test-helper.js

@ -0,0 +1,17 @@
const express = require('express');
const path = require('path');
global.expect = require('expect.js');
global.fetch = require('node-fetch');
global.Tesseract = require('../src');
const app = express();
let devServer = null;
global.startServer = (done) => {
app.use('/', express.static(path.resolve(__dirname, '..')));
devServer = app.listen(3000, done);
};
global.stopServer = (done) => {
devServer.close(done);
};

2
scripts/webpack.config.prod.js

@ -7,7 +7,7 @@ const genConfig = ({
devtool: 'source-map', devtool: 'source-map',
entry, entry,
output: { output: {
path: path.resolve(__dirname, 'dist'), path: path.resolve(__dirname, '..', 'dist'),
filename, filename,
library, library,
libraryTarget, libraryTarget,

17
src/common/worker.js

@ -48,14 +48,21 @@ const handleInit = (req, res) => {
return Promise.resolve(); return Promise.resolve();
}; };
const loadLanguage = (req) => { const loadLanguage = ({
const { options: { lang }, workerOptions: { langPath } } = req; options: { lang },
return loadLang({ workerOptions: {
langPath, cachePath, cacheMethod, dataPath,
},
}) => (
loadLang({
langs: lang, langs: lang,
tessModule: Module, tessModule: Module,
langURI: langPath, langURI: langPath,
}); cachePath,
}; cacheMethod,
dataPath,
})
);
const handleRecognize = (req, res) => ( const handleRecognize = (req, res) => (
handleInit(req, res) handleInit(req, res)

2
src/node/index.js

@ -11,7 +11,7 @@ const readFile = util.promisify(fs.readFile);
const loadImage = (imageURI) => { const loadImage = (imageURI) => {
if (isURL(imageURI)) { if (isURL(imageURI)) {
return fetch(imageURI) return fetch(imageURI)
.then(resp => resp.buffer()); .then(resp => resp.arrayBuffer());
} }
return readFile(imageURI); return readFile(imageURI);
}; };

5
tests/.eslintrc

@ -0,0 +1,5 @@
{
"rules": {
"no-undef": 0
}
}

BIN
tests/assets/images/cosmic.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

BIN
tests/assets/images/simple.bmp

Binary file not shown.

After

Width:  |  Height:  |  Size: 169 KiB

BIN
tests/assets/images/simple.jpg

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

BIN
tests/assets/images/simple.pbm

Binary file not shown.

BIN
tests/assets/images/simple.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

BIN
tests/assets/images/testocr.jpg

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

BIN
tests/assets/images/testocr.pbm

Binary file not shown.

BIN
tests/assets/images/testocr.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

BIN
tests/assets/testocr.bmp

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

BIN
tests/assets/traineddata/eng.traineddata

Binary file not shown.

100
tests/recognize.test.js

@ -0,0 +1,100 @@
const { TesseractWorker } = Tesseract;
const IMAGE_PATH = 'http://localhost:3000/tests/assets/images';
const cachePath = './tests/assets/traineddata';
const cacheMethod = 'readOnly';
const SIMPLE_TEXT = 'Tesseract.js\n';
const COMSIC_TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n';
const TESTOCR_TEXT = 'This is a lot of 12 point text to test the\nocr code and see if it works on all types\nof file format.\n\nThe quick brown dog jumped over the\nlazy fox. The quick brown dog jumped\nover the lazy fox. The quick brown dog\njumped over the lazy fox. The quick\nbrown dog jumped over the lazy fox.\n';
before((done) => {
if (typeof startServer !== 'undefined') {
startServer(done);
} else {
done();
}
});
after((done) => {
if (typeof stopServer !== 'undefined') {
stopServer(done);
}
});
describe('recognize()', () => {
describe('supports multiple formats', () => {
['bmp', 'jpg', 'png', 'pbm'].forEach(format => (
it(`support ${format} format`, (done) => {
const worker = new TesseractWorker({ cachePath, cacheMethod });
worker
.recognize(`${IMAGE_PATH}/simple.${format}`)
.then((result) => {
expect(result.text).to.be(SIMPLE_TEXT);
worker.terminate();
done();
});
}).timeout(10000)
));
});
describe('1 worker multiple recognition', () => {
[3, 10, 20].forEach(num => (
it(`recognize ${num} images with 1 worker`, (done) => {
const worker = new TesseractWorker({ cachePath, cacheMethod });
Promise.all(
Array(num).fill(0).map(() => worker.recognize(`${IMAGE_PATH}/simple.png`)),
).then((results) => {
results.forEach(({ text }) => {
expect(text).to.be(SIMPLE_TEXT);
});
worker.terminate();
done();
});
}).timeout(30000)
));
});
describe('should recognize in order', () => {
[1, 2].forEach((num) => {
it(`recognize ${num * 2} images with 1 worker in order`, (done) => {
const worker = new TesseractWorker({ cachePath, cacheMethod });
const cases = Array(num).fill(0)
.reduce(acc => (
acc.concat([
{ name: 'simple.png', ans: SIMPLE_TEXT },
{ name: 'cosmic.png', ans: COMSIC_TEXT },
])
),
[]);
Promise.all(
cases.map(({ name }) => worker.recognize(`${IMAGE_PATH}/${name}`)),
).then((results) => {
results.forEach(({ text }, idx) => {
expect(text).to.be(cases[idx].ans);
});
worker.terminate();
done();
});
}).timeout(30000);
});
});
describe('supports different complexity', () => {
[
{ name: 'simple.png', desc: 'simple', ans: SIMPLE_TEXT },
{ name: 'cosmic.png', desc: 'normal', ans: COMSIC_TEXT },
{ name: 'testocr.png', desc: 'complex', ans: TESTOCR_TEXT },
].forEach(({ name, desc, ans }) => (
it(`recongize ${desc} image`, (done) => {
const worker = new TesseractWorker({ cachePath, cacheMethod });
worker
.recognize(`${IMAGE_PATH}/${name}`)
.then(({ text }) => {
expect(text).to.be(ans);
worker.terminate();
done();
});
}).timeout(10000)
));
});
});
Loading…
Cancel
Save