Browse Source

Add partial recongnize unit tests

pull/265/head
Jerome Wu 6 years ago
parent
commit
e0b263b88b
  1. 3
      .eslintrc
  2. 4
      .gitignore
  3. 2
      dist/tesseract.min.js
  4. 2
      dist/tesseract.min.js.map
  5. 4
      dist/worker.min.js
  6. 2
      dist/worker.min.js.map
  7. 1601
      package-lock.json
  8. 6
      package.json
  9. 2
      scripts/dist/tesseract.min.js
  10. 1
      scripts/dist/tesseract.min.js.map
  11. 9
      scripts/dist/worker.min.js
  12. 1
      scripts/dist/worker.min.js.map
  13. 6
      scripts/server.js
  14. 17
      scripts/test-helper.js
  15. 2
      scripts/webpack.config.prod.js
  16. 17
      src/common/worker.js
  17. 2
      src/node/index.js
  18. 5
      tests/.eslintrc
  19. BIN
      tests/assets/images/cosmic.png
  20. BIN
      tests/assets/images/simple.bmp
  21. BIN
      tests/assets/images/simple.jpg
  22. BIN
      tests/assets/images/simple.pbm
  23. BIN
      tests/assets/images/simple.png
  24. BIN
      tests/assets/images/testocr.jpg
  25. BIN
      tests/assets/images/testocr.pbm
  26. BIN
      tests/assets/images/testocr.png
  27. BIN
      tests/assets/testocr.bmp
  28. BIN
      tests/assets/traineddata/eng.traineddata
  29. 100
      tests/recognize.test.js

3
.eslintrc

@ -2,7 +2,8 @@ @@ -2,7 +2,8 @@
"extends": "airbnb",
"env": {
"browser": true,
"node": true
"node": true,
"mocha": true
},
"rules": {
"no-underscore-dangle": 0,

4
.gitignore vendored

@ -3,4 +3,6 @@ node_modules/* @@ -3,4 +3,6 @@ node_modules/*
yarn.lock
tesseract.dev.js
worker.dev.js
*.traineddata
/*.traineddata
/examples/**/*.traineddata
.nyc_output

2
dist/tesseract.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/tesseract.min.js.map vendored

File diff suppressed because one or more lines are too long

4
dist/worker.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/worker.min.js.map vendored

File diff suppressed because one or more lines are too long

1601
package-lock.json generated

File diff suppressed because it is too large Load Diff

6
package.json

@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
"start": "node scripts/server.js",
"build": "webpack --config scripts/webpack.config.prod.js",
"prepublishOnly": "npm run build",
"test:node": "nyc mocha --exit --require ./scripts/test-helper.js ./tests/*.test.js",
"lint": "eslint src"
},
"browser": {
@ -23,7 +24,10 @@ @@ -23,7 +24,10 @@
"eslint-plugin-import": "^2.14.0",
"eslint-plugin-jsx-a11y": "^6.1.2",
"eslint-plugin-react": "^7.11.1",
"expect.js": "^0.3.1",
"express": "^4.16.4",
"mocha": "^5.2.0",
"nyc": "^13.1.0",
"webpack": "^4.26.0",
"webpack-cli": "^3.1.2",
"webpack-dev-middleware": "^3.4.0"
@ -31,7 +35,7 @@ @@ -31,7 +35,7 @@
"dependencies": {
"check-types": "^7.4.0",
"is-url": "1.2.2",
"node-fetch": "^1.7.3",
"node-fetch": "^2.3.0",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^2.0.0-beta.5",
"tesseract.js-utils": "^1.0.0-beta.2"

2
scripts/dist/tesseract.min.js vendored

File diff suppressed because one or more lines are too long

1
scripts/dist/tesseract.min.js.map vendored

File diff suppressed because one or more lines are too long

9
scripts/dist/worker.min.js vendored

File diff suppressed because one or more lines are too long

1
scripts/dist/worker.min.js.map vendored

File diff suppressed because one or more lines are too long

6
scripts/server.js

@ -7,9 +7,9 @@ const webpackConfig = require('./webpack.config.dev'); @@ -7,9 +7,9 @@ const webpackConfig = require('./webpack.config.dev');
const compiler = webpack(webpackConfig);
const app = express();
express.static.mime.types.wasm = 'application/wasm';
app.use('/', express.static(path.resolve(__dirname, '..')));
app.use(middleware(compiler, { publicPath: '/dist' }));
app.listen(3000, () => console.log('Server is running on port 3000'));
module.exports = app.listen(3000, () => {
console.log('Server is running on port 3000');
});

17
scripts/test-helper.js

@ -0,0 +1,17 @@ @@ -0,0 +1,17 @@
const express = require('express');
const path = require('path');
global.expect = require('expect.js');
global.fetch = require('node-fetch');
global.Tesseract = require('../src');
const app = express();
let devServer = null;
global.startServer = (done) => {
app.use('/', express.static(path.resolve(__dirname, '..')));
devServer = app.listen(3000, done);
};
global.stopServer = (done) => {
devServer.close(done);
};

2
scripts/webpack.config.prod.js

@ -7,7 +7,7 @@ const genConfig = ({ @@ -7,7 +7,7 @@ const genConfig = ({
devtool: 'source-map',
entry,
output: {
path: path.resolve(__dirname, 'dist'),
path: path.resolve(__dirname, '..', 'dist'),
filename,
library,
libraryTarget,

17
src/common/worker.js

@ -48,14 +48,21 @@ const handleInit = (req, res) => { @@ -48,14 +48,21 @@ const handleInit = (req, res) => {
return Promise.resolve();
};
const loadLanguage = (req) => {
const { options: { lang }, workerOptions: { langPath } } = req;
return loadLang({
const loadLanguage = ({
options: { lang },
workerOptions: {
langPath, cachePath, cacheMethod, dataPath,
},
}) => (
loadLang({
langs: lang,
tessModule: Module,
langURI: langPath,
});
};
cachePath,
cacheMethod,
dataPath,
})
);
const handleRecognize = (req, res) => (
handleInit(req, res)

2
src/node/index.js

@ -11,7 +11,7 @@ const readFile = util.promisify(fs.readFile); @@ -11,7 +11,7 @@ const readFile = util.promisify(fs.readFile);
const loadImage = (imageURI) => {
if (isURL(imageURI)) {
return fetch(imageURI)
.then(resp => resp.buffer());
.then(resp => resp.arrayBuffer());
}
return readFile(imageURI);
};

5
tests/.eslintrc

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
{
"rules": {
"no-undef": 0
}
}

BIN
tests/assets/images/cosmic.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

BIN
tests/assets/images/simple.bmp

Binary file not shown.

After

Width:  |  Height:  |  Size: 169 KiB

BIN
tests/assets/images/simple.jpg

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

BIN
tests/assets/images/simple.pbm

Binary file not shown.

BIN
tests/assets/images/simple.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

BIN
tests/assets/images/testocr.jpg

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

BIN
tests/assets/images/testocr.pbm

Binary file not shown.

BIN
tests/assets/images/testocr.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

BIN
tests/assets/testocr.bmp

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

BIN
tests/assets/traineddata/eng.traineddata

Binary file not shown.

100
tests/recognize.test.js

@ -0,0 +1,100 @@ @@ -0,0 +1,100 @@
const { TesseractWorker } = Tesseract;
const IMAGE_PATH = 'http://localhost:3000/tests/assets/images';
const cachePath = './tests/assets/traineddata';
const cacheMethod = 'readOnly';
const SIMPLE_TEXT = 'Tesseract.js\n';
const COMSIC_TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n';
const TESTOCR_TEXT = 'This is a lot of 12 point text to test the\nocr code and see if it works on all types\nof file format.\n\nThe quick brown dog jumped over the\nlazy fox. The quick brown dog jumped\nover the lazy fox. The quick brown dog\njumped over the lazy fox. The quick\nbrown dog jumped over the lazy fox.\n';
before((done) => {
if (typeof startServer !== 'undefined') {
startServer(done);
} else {
done();
}
});
after((done) => {
if (typeof stopServer !== 'undefined') {
stopServer(done);
}
});
describe('recognize()', () => {
describe('supports multiple formats', () => {
['bmp', 'jpg', 'png', 'pbm'].forEach(format => (
it(`support ${format} format`, (done) => {
const worker = new TesseractWorker({ cachePath, cacheMethod });
worker
.recognize(`${IMAGE_PATH}/simple.${format}`)
.then((result) => {
expect(result.text).to.be(SIMPLE_TEXT);
worker.terminate();
done();
});
}).timeout(10000)
));
});
describe('1 worker multiple recognition', () => {
[3, 10, 20].forEach(num => (
it(`recognize ${num} images with 1 worker`, (done) => {
const worker = new TesseractWorker({ cachePath, cacheMethod });
Promise.all(
Array(num).fill(0).map(() => worker.recognize(`${IMAGE_PATH}/simple.png`)),
).then((results) => {
results.forEach(({ text }) => {
expect(text).to.be(SIMPLE_TEXT);
});
worker.terminate();
done();
});
}).timeout(30000)
));
});
describe('should recognize in order', () => {
[1, 2].forEach((num) => {
it(`recognize ${num * 2} images with 1 worker in order`, (done) => {
const worker = new TesseractWorker({ cachePath, cacheMethod });
const cases = Array(num).fill(0)
.reduce(acc => (
acc.concat([
{ name: 'simple.png', ans: SIMPLE_TEXT },
{ name: 'cosmic.png', ans: COMSIC_TEXT },
])
),
[]);
Promise.all(
cases.map(({ name }) => worker.recognize(`${IMAGE_PATH}/${name}`)),
).then((results) => {
results.forEach(({ text }, idx) => {
expect(text).to.be(cases[idx].ans);
});
worker.terminate();
done();
});
}).timeout(30000);
});
});
describe('supports different complexity', () => {
[
{ name: 'simple.png', desc: 'simple', ans: SIMPLE_TEXT },
{ name: 'cosmic.png', desc: 'normal', ans: COMSIC_TEXT },
{ name: 'testocr.png', desc: 'complex', ans: TESTOCR_TEXT },
].forEach(({ name, desc, ans }) => (
it(`recongize ${desc} image`, (done) => {
const worker = new TesseractWorker({ cachePath, cacheMethod });
worker
.recognize(`${IMAGE_PATH}/${name}`)
.then(({ text }) => {
expect(text).to.be(ans);
worker.terminate();
done();
});
}).timeout(10000)
));
});
});
Loading…
Cancel
Save