Browse Source

Update tests

develop
Jerome Wu 5 years ago
parent
commit
a470b836d5
  1. 3
      package.json
  2. 2
      scripts/server.js
  3. 5
      scripts/test-helper.js
  4. 16
      src/createJob.js
  5. 37
      src/createScheduler.js
  6. 74
      src/createWorker.js
  7. 3
      src/utils/getId.js
  8. 11
      src/worker-script/index.js
  9. 4
      src/worker/browser/defaultOptions.js
  10. 18
      src/worker/browser/defaultOptions.js~
  11. 4
      src/worker/browser/loadImage.js
  12. 33
      tests/constants.js
  13. 3
      tests/detect.test.html
  14. 46
      tests/detect.test.js
  15. 3
      tests/recognize.test.html
  16. 284
      tests/recognize.test.js
  17. 18
      tests/scheduler.test.html
  18. 35
      tests/scheduler.test.js

3
package.json

@ -10,13 +10,14 @@
"start": "node scripts/server.js", "start": "node scripts/server.js",
"build": "rimraf dist && webpack --config scripts/webpack.config.prod.js", "build": "rimraf dist && webpack --config scripts/webpack.config.prod.js",
"prepublishOnly": "npm run build", "prepublishOnly": "npm run build",
"wait": "wait-on http://localhost:3000/package.json", "wait": "rimraf dist && wait-on http://localhost:3000/dist/tesseract.dev.js",
"test": "npm-run-all -p -r start test:all", "test": "npm-run-all -p -r start test:all",
"test:all": "npm-run-all wait test:browser:* test:node", "test:all": "npm-run-all wait test:browser:* test:node",
"test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js ./tests/*.test.js", "test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js ./tests/*.test.js",
"test:browser-tpl": "mocha-headless-chrome -a incognito -a no-sandbox -a disable-setuid-sandbox -t 300000", "test:browser-tpl": "mocha-headless-chrome -a incognito -a no-sandbox -a disable-setuid-sandbox -t 300000",
"test:browser:detect": "npm run test:browser-tpl -- -f ./tests/detect.test.html", "test:browser:detect": "npm run test:browser-tpl -- -f ./tests/detect.test.html",
"test:browser:recognize": "npm run test:browser-tpl -- -f ./tests/recognize.test.html", "test:browser:recognize": "npm run test:browser-tpl -- -f ./tests/recognize.test.html",
"test:browser:scheduler": "npm run test:browser-tpl -- -f ./tests/scheduler.test.html",
"lint": "eslint src", "lint": "eslint src",
"postinstall": "opencollective-postinstall || true" "postinstall": "opencollective-postinstall || true"
}, },

2
scripts/server.js

@ -10,7 +10,7 @@ const app = express();
app.use(cors()); app.use(cors());
app.use('/', express.static(path.resolve(__dirname, '..'))); app.use('/', express.static(path.resolve(__dirname, '..')));
app.use(middleware(compiler, { publicPath: '/dist' })); app.use(middleware(compiler, { publicPath: '/dist', writeToDisk: true }));
module.exports = app.listen(3000, () => { module.exports = app.listen(3000, () => {
console.log('Server is running on port 3000'); console.log('Server is running on port 3000');

5
scripts/test-helper.js

@ -1,4 +1,9 @@
const constants = require('../tests/constants');
global.expect = require('expect.js'); global.expect = require('expect.js');
global.fs = require('fs'); global.fs = require('fs');
global.path = require('path'); global.path = require('path');
global.Tesseract = require('../src'); global.Tesseract = require('../src');
Object.keys(constants).forEach((key) => {
global[key] = constants[key];
});

16
src/createJob.js

@ -1,11 +1,17 @@
let jobCounter = 1; const getId = require('./utils/getId');
module.exports = ( let jobCounter = 0;
module.exports = ({
id: _id,
action, action,
payload, payload = {},
) => { }) => {
const id = `Job-${jobCounter}-${Math.random().toString(16).slice(3, 8)}`; let id = _id;
if (typeof id === 'undefined') {
id = getId('Job', jobCounter);
jobCounter += 1; jobCounter += 1;
}
return { return {
id, id,

37
src/createScheduler.js

@ -1,8 +1,20 @@
const createJob = require('./createJob');
const log = require('./utils/log');
const getId = require('./utils/getId');
let schedulerCounter = 0;
module.exports = () => { module.exports = () => {
const id = getId('Scheduler', schedulerCounter);
const workers = {}; const workers = {};
const runningWorkers = {}; const runningWorkers = {};
let jobQueue = []; let jobQueue = [];
schedulerCounter += 1;
const getQueueLen = () => jobQueue.length;
const getNumWorkers = () => Object.keys(workers).length;
const dequeue = () => { const dequeue = () => {
if (jobQueue.length !== 0) { if (jobQueue.length !== 0) {
const wIds = Object.keys(workers); const wIds = Object.keys(workers);
@ -17,11 +29,12 @@ module.exports = () => {
const queue = (action, payload) => ( const queue = (action, payload) => (
new Promise((resolve, reject) => { new Promise((resolve, reject) => {
const job = createJob({ action, payload });
jobQueue.push(async (w) => { jobQueue.push(async (w) => {
jobQueue.shift(); jobQueue.shift();
runningWorkers[w.id] = true; runningWorkers[w.id] = job;
try { try {
resolve(await w[action].apply(this, payload)); resolve(await w[action].apply(this, [...payload, job.id]));
} catch (err) { } catch (err) {
reject(err); reject(err);
} finally { } finally {
@ -29,22 +42,30 @@ module.exports = () => {
dequeue(); dequeue();
} }
}); });
log(`[${id}]: add ${job.id} to JobQueue`);
log(`[${id}]: JobQueue length=${jobQueue.length}`);
dequeue(); dequeue();
}) })
); );
const addWorker = (w) => { const addWorker = (w) => {
workers[w.id] = w; workers[w.id] = w;
log(`[${id}]: add ${w.id}`);
log(`[${id}]: number of workers=${getNumWorkers()}`);
dequeue();
return w.id; return w.id;
}; };
const addJob = (action, ...payload) => ( const addJob = async (action, ...payload) => {
queue(action, payload) if (getNumWorkers() === 0) {
); throw Error(`[${id}]: You need to have at least one worker before adding jobs`);
}
return queue(action, payload);
};
const terminate = async () => { const terminate = async () => {
Object.keys(workers).forEach(async (id) => { Object.keys(workers).forEach(async (wid) => {
await workers[id].terminate(); await workers[wid].terminate();
}); });
jobQueue = []; jobQueue = [];
}; };
@ -53,5 +74,7 @@ module.exports = () => {
addWorker, addWorker,
addJob, addJob,
terminate, terminate,
getQueueLen,
getNumWorkers,
}; };
}; };

74
src/createWorker.js

@ -2,6 +2,7 @@ const resolvePaths = require('./utils/resolvePaths');
const circularize = require('./utils/circularize'); const circularize = require('./utils/circularize');
const createJob = require('./createJob'); const createJob = require('./createJob');
const log = require('./utils/log'); const log = require('./utils/log');
const getId = require('./utils/getId');
const { defaultOEM } = require('./constants/config'); const { defaultOEM } = require('./constants/config');
const { const {
defaultOptions, defaultOptions,
@ -12,11 +13,10 @@ const {
send, send,
} = require('./worker/node'); } = require('./worker/node');
let workerCounter = 1; let workerCounter = 0;
module.exports = (_options = {}) => { module.exports = (_options = {}) => {
const id = `Worker-${workerCounter}-${Math.random().toString(16).slice(3, 8)}`; const id = getId('Worker', workerCounter);
workerCounter += 1;
const { const {
logger, logger,
...options ...options
@ -28,6 +28,8 @@ module.exports = (_options = {}) => {
const rejects = {}; const rejects = {};
let worker = spawnWorker(options); let worker = spawnWorker(options);
workerCounter += 1;
const setResolve = (action, res) => { const setResolve = (action, res) => {
resolves[action] = res; resolves[action] = res;
}; };
@ -36,10 +38,9 @@ module.exports = (_options = {}) => {
rejects[action] = rej; rejects[action] = rej;
}; };
const startJob = (action, payload = {}) => ( const startJob = ({ id: jobId, action, payload }) => (
new Promise((resolve, reject) => { new Promise((resolve, reject) => {
const { id: jobId } = createJob(action, payload); log(`[${id}]: Start ${jobId}, action=${action}, payload=`, payload);
log(`[${id}]: Start ${jobId}, action=${action}`);
setResolve(action, resolve); setResolve(action, resolve);
setReject(action, reject); setReject(action, reject);
send(worker, { send(worker, {
@ -51,32 +52,58 @@ module.exports = (_options = {}) => {
}) })
); );
const load = () => ( const load = jobId => (
startJob('load', { options }) startJob(createJob({
id: jobId, action: 'load', payload: { options },
}))
); );
const loadLanguage = (langs = 'eng') => ( const loadLanguage = (langs = 'eng', jobId) => (
startJob('loadLanguage', { langs, options }) startJob(createJob({
id: jobId,
action: 'loadLanguage',
payload: { langs, options },
}))
); );
const initialize = (langs = 'eng', oem = defaultOEM) => ( const initialize = (langs = 'eng', oem = defaultOEM, jobId) => (
startJob('initialize', { langs, oem }) startJob(createJob({
id: jobId,
action: 'initialize',
payload: { langs, oem },
}))
); );
const setParameters = (params = {}) => ( const setParameters = (params = {}, jobId) => (
startJob('setParameters', { params }) startJob(createJob({
id: jobId,
action: 'setParameters',
payload: { params },
}))
); );
const recognize = async (image, opts = {}) => ( const recognize = async (image, opts = {}, jobId) => (
startJob('recognize', { image: await loadImage(image), options: opts }) startJob(createJob({
id: jobId,
action: 'recognize',
payload: { image: await loadImage(image), options: opts },
}))
); );
const getPDF = (title = 'Tesseract OCR Result', textonly = false) => ( const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => (
startJob('getPDF', { title, textonly }) startJob(createJob({
id: jobId,
action: 'getPDF',
payload: { title, textonly },
}))
); );
const detect = async image => ( const detect = async (image, jobId) => (
startJob('detect', { image: await loadImage(image) }) startJob(createJob({
id: jobId,
action: 'detect',
payload: { image: await loadImage(image) },
}))
); );
const terminate = async () => { const terminate = async () => {
@ -88,15 +115,18 @@ module.exports = (_options = {}) => {
return Promise.resolve(); return Promise.resolve();
}; };
onMessage(worker, ({ status, action, data }) => { onMessage(worker, ({
workerId, jobId, status, action, data,
}) => {
if (status === 'resolve') { if (status === 'resolve') {
log(`[${workerId}]: Complete ${jobId}, data=`, data);
let d = data; let d = data;
if (action === 'recognize') { if (action === 'recognize') {
d = circularize(data); d = circularize(data);
} else if (action === 'getPDF') { } else if (action === 'getPDF') {
d = Array.from({ ...data, length: Object.keys(data).length }); d = Array.from({ ...data, length: Object.keys(data).length });
} }
resolves[action](d); resolves[action]({ jobId, data: d });
} else if (status === 'reject') { } else if (status === 'reject') {
rejects[action](data); rejects[action](data);
throw Error(data); throw Error(data);

3
src/utils/getId.js

@ -0,0 +1,3 @@
module.exports = (prefix, cnt) => (
`${prefix}-${cnt}-${Math.random().toString(16).slice(3, 8)}`
);

11
src/worker-script/index.js

@ -15,6 +15,7 @@ const dump = require('./utils/dump');
const isBrowser = require('../utils/getEnvironment')('type') === 'browser'; const isBrowser = require('../utils/getEnvironment')('type') === 'browser';
const setImage = require('./utils/setImage'); const setImage = require('./utils/setImage');
const defaultParams = require('./constants/defaultParams'); const defaultParams = require('./constants/defaultParams');
const log = require('../utils/log');
/* /*
* Tesseract Module returned by TesseractCore. * Tesseract Module returned by TesseractCore.
@ -23,7 +24,7 @@ let TessModule;
/* /*
* TessearctBaseAPI instance * TessearctBaseAPI instance
*/ */
let api; let api = null;
let latestJob; let latestJob;
let adapter = {}; let adapter = {};
let params = defaultParams; let params = defaultParams;
@ -77,11 +78,13 @@ const loadLanguage = async ({
try { try {
const _data = await readCache(`${cachePath || '.'}/${lang}.traineddata`); const _data = await readCache(`${cachePath || '.'}/${lang}.traineddata`);
if (typeof _data !== 'undefined') { if (typeof _data !== 'undefined') {
log(`[${workerId}]: Load ${lang}.traineddata from cache`);
data = _data; data = _data;
} else { } else {
throw Error('Not found in cache'); throw Error('Not found in cache');
} }
} catch (e) { } catch (e) {
log(`[${workerId}]: Load ${lang}.traineddata from ${langPath}`);
if (typeof _lang === 'string') { if (typeof _lang === 'string') {
let path = null; let path = null;
@ -173,8 +176,12 @@ const initialize = ({
res.progress({ res.progress({
workerId, status: 'initializing api', progress: 0, workerId, status: 'initializing api', progress: 0,
}); });
if (api !== null) {
api.End();
}
api = new TessModule.TessBaseAPI(); api = new TessModule.TessBaseAPI();
api.Init(null, langs, oem); api.Init(null, langs, oem);
params = defaultParams;
setParameters({ payload: { params } }); setParameters({ payload: { params } });
res.progress({ res.progress({
workerId, status: 'initialized api', progress: 1, workerId, status: 'initialized api', progress: 1,
@ -242,7 +249,9 @@ const detect = ({ payload: { image } }, res) => {
const terminate = (_, res) => { const terminate = (_, res) => {
try { try {
if (api !== null) {
api.End(); api.End();
}
res.resolve({ terminated: true }); res.resolve({ terminated: true });
} catch (err) { } catch (err) {
res.reject(err.toString()); res.reject(err.toString());

4
src/worker/browser/defaultOptions.js

@ -1,5 +1,5 @@
const resolveURL = require('resolve-url'); const resolveURL = require('resolve-url');
const { version } = require('../../../package.json'); const { version, dependencies } = require('../../../package.json');
const defaultOptions = require('../../constants/defaultOptions'); const defaultOptions = require('../../constants/defaultOptions');
/* /*
@ -14,5 +14,5 @@ module.exports = {
* If browser doesn't support WebAssembly, * If browser doesn't support WebAssembly,
* load ASM version instead * load ASM version instead
*/ */
corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`, corePath: `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
}; };

18
src/worker/browser/defaultOptions.js~

@ -0,0 +1,18 @@
const resolveURL = require('resolve-url');
const { version, dependencies } = require('../../../package.json');
const defaultOptions = require('../../constants/defaultOptions');
/*
* Default options for browser worker
*/
module.exports = {
...defaultOptions,
workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development')
? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`)
: `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`,
/*
* If browser doesn't support WebAssembly,
* load ASM version instead
*/
corePath: `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
};

4
src/worker/browser/loadImage.js

@ -56,10 +56,10 @@ const loadImage = async (image) => {
} }
} else if (image instanceof HTMLElement) { } else if (image instanceof HTMLElement) {
if (image.tagName === 'IMG') { if (image.tagName === 'IMG') {
data = loadImage(image.src); data = await loadImage(image.src);
} }
if (image.tagName === 'VIDEO') { if (image.tagName === 'VIDEO') {
data = loadImage(image.poster); data = await loadImage(image.poster);
} }
if (image.tagName === 'CANVAS') { if (image.tagName === 'CANVAS') {
await new Promise((resolve) => { await new Promise((resolve) => {

33
tests/constants.js

File diff suppressed because one or more lines are too long

3
tests/detect.test.html

@ -7,7 +7,8 @@
<div id="mocha"></div> <div id="mocha"></div>
<script src="../node_modules/mocha/mocha.js"></script> <script src="../node_modules/mocha/mocha.js"></script>
<script src="../node_modules/expect.js/index.js"></script> <script src="../node_modules/expect.js/index.js"></script>
<script src="http://localhost:3000/dist/tesseract.dev.js"></script> <script src="../dist/tesseract.dev.js"></script>
<script src="./constants.js"></script>
<script>mocha.setup('bdd');</script> <script>mocha.setup('bdd');</script>
<script src="./detect.test.js"></script> <script src="./detect.test.js"></script>
<script> <script>

46
tests/detect.test.js

@ -1,33 +1,21 @@
const { TesseractWorker } = Tesseract; const { createScheduler, createWorker } = Tesseract;
const isBrowser = typeof window !== 'undefined' && typeof window.document !== 'undefined'; const scheduler = createScheduler();
const IMAGE_PATH = 'http://localhost:3000/tests/assets/images'; const worker = createWorker(OPTIONS);
const loadLangOptions = { scheduler.addWorker(worker);
langPath: 'http://localhost:3000/tests/assets/traineddata', before(function cb() {
cachePath: './tests/assets/traineddata', this.timeout(0);
}; return worker.load();
});
const getWorker = options => (
new TesseractWorker({
cacheMethod: 'readOnly',
...(isBrowser ? { workerPath: 'http://localhost:3000/dist/worker.dev.js' } : {}),
...loadLangOptions,
...options,
})
);
describe('detect()', () => { describe('detect()', async () => {
it('should detect OSD', (done) => { it('should detect OSD', () => {
[ [
{ name: 'cosmic.png', ans: { id: 12, degree: 0 } }, { name: 'cosmic.png', ans: { script: 'Latin' } },
].forEach(({ name, ans: { id, degree } }) => { ].forEach(async ({ name, ans: { script } }) => {
const worker = getWorker(); await worker.loadLanguage('osd');
worker await worker.initialize('osd');
.detect(`${IMAGE_PATH}/${name}`) const { data: { script: s } } = await scheduler.addJob('detect', `${IMAGE_PATH}/${name}`);
.then(({ tesseract_script_id, orientation_degrees }) => { expect(s).to.be(script);
expect(tesseract_script_id).to.be(id);
expect(orientation_degrees).to.be(degree);
done();
});
}); });
}).timeout(10000); }).timeout(TIMEOUT);
}); });

3
tests/recognize.test.html

@ -7,7 +7,8 @@
<div id="mocha"></div> <div id="mocha"></div>
<script src="../node_modules/mocha/mocha.js"></script> <script src="../node_modules/mocha/mocha.js"></script>
<script src="../node_modules/expect.js/index.js"></script> <script src="../node_modules/expect.js/index.js"></script>
<script src="http://localhost:3000/dist/tesseract.dev.js"></script> <script src="../dist/tesseract.dev.js"></script>
<script src="./constants.js"></script>
<script>mocha.setup('bdd');</script> <script>mocha.setup('bdd');</script>
<script src="./recognize.test.js"></script> <script src="./recognize.test.js"></script>
<script> <script>

284
tests/recognize.test.js

File diff suppressed because one or more lines are too long

18
tests/scheduler.test.html

@ -0,0 +1,18 @@
<html>
<head>
<meta charset="utf-8">
<link rel="stylesheet" href="../node_modules/mocha/mocha.css">
</head>
<body>
<div id="mocha"></div>
<script src="../node_modules/mocha/mocha.js"></script>
<script src="../node_modules/expect.js/index.js"></script>
<script src="../dist/tesseract.dev.js"></script>
<script src="./constants.js"></script>
<script>mocha.setup('bdd');</script>
<script src="./scheduler.test.js"></script>
<script>
mocha.run();
</script>
</body>
</html>

35
tests/scheduler.test.js

@ -0,0 +1,35 @@
const { createScheduler, createWorker } = Tesseract;
let workers = [];
before(async function cb() {
this.timeout(0);
const NUM_WORKERS = 10;
console.log(`Initializing ${NUM_WORKERS} workers`);
workers = await Promise.all(Array(NUM_WORKERS).fill(0).map(async () => {
const w = createWorker(OPTIONS);
await w.load();
await w.loadLanguage('eng');
await w.initialize('eng');
return w;
}));
console.log(`Initialized ${NUM_WORKERS} workers`);
});
describe('scheduler', () => {
describe('should speed up with more workers (running 20 jobs)', () => {
Array(10).fill(0).forEach((_, num) => (
it(`support using ${num + 1} workers`, async () => {
const NUM_JOBS = 30;
const scheduler = createScheduler();
workers.slice(0, num + 1).forEach((w) => {
scheduler.addWorker(w);
});
const rets = await Promise.all(Array(NUM_JOBS).fill(0).map((_, idx) => (
scheduler.addJob('recognize', `${IMAGE_PATH}/${idx % 2 === 0 ? 'simple' : 'cosmic'}.png`)
)));
expect(rets.length).to.be(NUM_JOBS);
}).timeout(60000)
));
});
});
Loading…
Cancel
Save