Browse Source

Update test & examples

develop
Jerome Wu 5 years ago
parent
commit
0e968d1829
  1. 2
      examples/browser/basic.html
  2. 52
      examples/browser/download-pdf.html
  3. 2
      examples/node/detect.js
  4. 22
      examples/node/download-pdf.js
  5. 7
      examples/node/recognize.js
  6. 17
      src/Tesseract.js
  7. 2
      src/createScheduler.js
  8. 9
      src/createWorker.js
  9. 2
      src/index.js
  10. 11
      src/utils/log.js
  11. 5
      src/worker-script/index.js
  12. 6
      tests/detect.test.js
  13. 25
      tests/recognize.test.js

2
examples/browser/basic.html

@ -6,7 +6,7 @@ @@ -6,7 +6,7 @@
<input type="file" id="uploader">
<script>
const recognize = async ({ target: { files } }) => {
const { text } = await Tesseract.recognize(files[0], 'eng', {
const { data: { text } } = await Tesseract.recognize(files[0], 'eng', {
corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js',
logger: m => console.log(m),
});

52
examples/browser/download-pdf.html

@ -0,0 +1,52 @@ @@ -0,0 +1,52 @@
<html>
<head>
<script src="/dist/tesseract.dev.js"></script>
</head>
<body>
<div>
<input type="file" id="uploader">
<button id="download-pdf" disabled="true">Download PDF</button>
</div>
<textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea>
<script>
const { createWorker } = Tesseract;
const worker = createWorker({
corePath: '/node_modules/tesseract.js-core/tesseract-core.wasm.js',
logger: m => console.log(m),
});
const uploader = document.getElementById('uploader');
const dlBtn = document.getElementById('download-pdf');
const recognize = async ({ target: { files } }) => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(files[0]);
const board = document.getElementById('board');
board.value = text;
dlBtn.disabled = false;
};
const downloadPDF = async () => {
const filename = 'tesseract-ocr-result.pdf';
const { data } = await worker.getPDF('Tesseract OCR Result');
const blob = new Blob([new Uint8Array(data)], { type: 'application/pdf' });
if (navigator.msSaveBlob) {
// IE 10+
navigator.msSaveBlob(blob, filename);
} else {
const link = document.createElement('a');
if (link.download !== undefined) {
const url = URL.createObjectURL(blob);
link.setAttribute('href', url);
link.setAttribute('download', filename);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
}
};
uploader.addEventListener('change', recognize);
dlBtn.addEventListener('click', downloadPDF);
</script>
</body>
</html>

2
examples/node/detect.js

@ -8,6 +8,6 @@ const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/c @@ -8,6 +8,6 @@ const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/c
console.log(`Recognizing ${image}`);
Tesseract.detect(image, { logger: m => console.log(m) })
.then((data) => {
.then(({ data }) => {
console.log(data);
});

22
examples/node/download-pdf.js

@ -0,0 +1,22 @@ @@ -0,0 +1,22 @@
#!/usr/bin/env node
const path = require('path');
const fs = require('fs');
const { createWorker } = require('../../');
const [,, imagePath] = process.argv;
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png'));
console.log(`Recognizing ${image}`);
(async () => {
const worker = createWorker();
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(image);
console.log(text);
const { data } = await worker.getPDF('Tesseract OCR Result');
fs.writeFileSync('tesseract-ocr-result.pdf', Buffer.from(data));
console.log('Generate PDF: tesseract-ocr-result.pdf');
await worker.terminate();
})();

7
examples/node/recognize.js

@ -8,11 +8,6 @@ const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/c @@ -8,11 +8,6 @@ const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/c
console.log(`Recognizing ${image}`);
Tesseract.recognize(image, 'eng', { logger: m => console.log(m) })
.then(({ text }) => {
.then(({ data: { text } }) => {
console.log(text);
});
//Tesseract.detect(image, { logger: m => console.log(m) })
// .then((data) => {
// console.log(data);
// });

17
src/Tesseract.js

@ -1,29 +1,24 @@ @@ -1,29 +1,24 @@
const createScheduler = require('./createScheduler');
const createWorker = require('./createWorker');
const recognize = async (image, langs, options) => {
const scheduler = createScheduler();
const worker = createWorker(options);
await worker.load();
await worker.loadLanguage(langs);
await worker.initialize(langs);
scheduler.addWorker(worker);
return scheduler.addJob('recognize', image)
.finally(() => {
scheduler.terminate();
return worker.recognize(image)
.finally(async () => {
await worker.terminate();
});
};
const detect = async (image, options) => {
const scheduler = createScheduler();
const worker = createWorker(options);
await worker.load();
await worker.loadLanguage('osd');
await worker.initialize('osd');
scheduler.addWorker(worker);
return scheduler.addJob('detect', image)
.finally(() => {
scheduler.terminate();
return worker.detect(image)
.finally(async () => {
await worker.terminate();
});
};

2
src/createScheduler.js

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
const createJob = require('./createJob');
const log = require('./utils/log');
const { log } = require('./utils/log');
const getId = require('./utils/getId');
let schedulerCounter = 0;

9
src/createWorker.js

@ -1,7 +1,7 @@ @@ -1,7 +1,7 @@
const resolvePaths = require('./utils/resolvePaths');
const circularize = require('./utils/circularize');
const createJob = require('./createJob');
const log = require('./utils/log');
const { log } = require('./utils/log');
const getId = require('./utils/getId');
const { defaultOEM } = require('./constants/config');
const {
@ -106,9 +106,12 @@ module.exports = (_options = {}) => { @@ -106,9 +106,12 @@ module.exports = (_options = {}) => {
}))
);
const terminate = async () => {
const terminate = async (jobId) => {
if (worker !== null) {
await startJob('terminate');
await startJob(createJob({
id: jobId,
action: 'terminate',
}));
terminateWorker(worker);
worker = null;
}

2
src/index.js

@ -14,6 +14,7 @@ const createJob = require('./createJob'); @@ -14,6 +14,7 @@ const createJob = require('./createJob');
const Tesseract = require('./Tesseract');
const OEM = require('./constants/OEM');
const PSM = require('./constants/PSM');
const { setLogging } = require('./utils/log');
module.exports = {
OEM,
@ -21,5 +22,6 @@ module.exports = { @@ -21,5 +22,6 @@ module.exports = {
createScheduler,
createWorker,
createJob,
setLogging,
...Tesseract,
};

11
src/utils/log.js

@ -1,2 +1,9 @@ @@ -1,2 +1,9 @@
module.exports = (typeof process.env !== 'undefined' && process.env.NODE_ENV === 'development')
? console.log : () => {};
let logging = false;
exports.logging = logging;
exports.setLogging = (_logging) => {
logging = _logging;
};
exports.log = (...args) => (logging ? console.log.apply(this, args) : null);

5
src/worker-script/index.js

@ -15,7 +15,7 @@ const dump = require('./utils/dump'); @@ -15,7 +15,7 @@ const dump = require('./utils/dump');
const isBrowser = require('../utils/getEnvironment')('type') === 'browser';
const setImage = require('./utils/setImage');
const defaultParams = require('./constants/defaultParams');
const log = require('../utils/log');
const { log, setLogging } = require('../utils/log');
/*
* Tesseract Module returned by TesseractCore.
@ -29,7 +29,8 @@ let latestJob; @@ -29,7 +29,8 @@ let latestJob;
let adapter = {};
let params = defaultParams;
const load = ({ workerId, jobId, payload: { options: { corePath } } }, res) => {
const load = ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => {
setLogging(logging);
if (!TessModule) {
const Core = adapter.getCore(corePath, res);

6
tests/detect.test.js

@ -1,7 +1,5 @@ @@ -1,7 +1,5 @@
const { createScheduler, createWorker } = Tesseract;
const scheduler = createScheduler();
const { createWorker } = Tesseract;
const worker = createWorker(OPTIONS);
scheduler.addWorker(worker);
before(function cb() {
this.timeout(0);
return worker.load();
@ -14,7 +12,7 @@ describe('detect()', async () => { @@ -14,7 +12,7 @@ describe('detect()', async () => {
].forEach(async ({ name, ans: { script } }) => {
await worker.loadLanguage('osd');
await worker.initialize('osd');
const { data: { script: s } } = await scheduler.addJob('detect', `${IMAGE_PATH}/${name}`);
const { data: { script: s } } = await worker.detect(`${IMAGE_PATH}/${name}`);
expect(s).to.be(script);
});
}).timeout(TIMEOUT);

25
tests/recognize.test.js

@ -1,7 +1,5 @@ @@ -1,7 +1,5 @@
const { createScheduler, createWorker, PSM } = Tesseract;
const scheduler = createScheduler();
const { createWorker, PSM } = Tesseract;
const worker = createWorker(OPTIONS);
scheduler.addWorker(worker);
before(async function cb() {
this.timeout(0);
await worker.load();
@ -13,7 +11,7 @@ describe('recognize()', () => { @@ -13,7 +11,7 @@ describe('recognize()', () => {
FORMATS.forEach(format => (
it(`support ${format} format`, async () => {
await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', `${IMAGE_PATH}/simple.${format}`);
const { data: { text } } = await worker.recognize(`${IMAGE_PATH}/simple.${format}`);
expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT)
));
@ -26,7 +24,7 @@ describe('recognize()', () => { @@ -26,7 +24,7 @@ describe('recognize()', () => {
].forEach(({ format, image, ans }) => (
it(`recongize ${format} in base64`, async () => {
await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', image);
const { data: { text } } = await worker.recognize(image);
expect(text).to.be(ans);
}).timeout(TIMEOUT)
));
@ -38,7 +36,7 @@ describe('recognize()', () => { @@ -38,7 +36,7 @@ describe('recognize()', () => {
].forEach(({ name, lang, ans }) => (
it(`recongize ${lang}`, async () => {
await worker.initialize(lang);
const { data: { text } } = await scheduler.addJob('recognize', `${IMAGE_PATH}/${name}`);
const { data: { text } } = await worker.recognize(`${IMAGE_PATH}/${name}`);
expect(text).to.be(ans);
}).timeout(TIMEOUT)
));
@ -52,7 +50,7 @@ describe('recognize()', () => { @@ -52,7 +50,7 @@ describe('recognize()', () => {
].forEach(({ name, desc, ans }) => (
it(`recongize ${desc} image`, async () => {
await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', `${IMAGE_PATH}/${name}`);
const { data: { text } } = await worker.recognize(`${IMAGE_PATH}/${name}`);
expect(text).to.be(ans);
}).timeout(TIMEOUT)
));
@ -68,8 +66,7 @@ describe('recognize()', () => { @@ -68,8 +66,7 @@ describe('recognize()', () => {
}) => (
it(`recongize half ${name}`, async () => {
await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob(
'recognize',
const { data: { text } } = await worker.recognize(
`${IMAGE_PATH}/${name}`,
{
rectangles: [
@ -94,7 +91,7 @@ describe('recognize()', () => { @@ -94,7 +91,7 @@ describe('recognize()', () => {
await worker.setParameters({
tessedit_pageseg_mode: mode,
});
const { data } = await scheduler.addJob('recognize', `${IMAGE_PATH}/simple.png`);
const { data } = await worker.recognize(`${IMAGE_PATH}/simple.png`);
expect(Object.keys(data).length).not.to.be(0);
}).timeout(TIMEOUT)
));
@ -105,7 +102,7 @@ describe('recognize()', () => { @@ -105,7 +102,7 @@ describe('recognize()', () => {
it(`support ${format} format`, async () => {
const buf = fs.readFileSync(path.join(__dirname, 'assets', 'images', `simple.${format}`));
await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', buf);
const { data: { text } } = await worker.recognize(buf);
expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT)
));
@ -117,7 +114,7 @@ describe('recognize()', () => { @@ -117,7 +114,7 @@ describe('recognize()', () => {
const imageDOM = document.createElement('img');
imageDOM.setAttribute('src', `${IMAGE_PATH}/simple.${format}`);
await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', imageDOM);
const { data: { text } } = await worker.recognize(imageDOM);
expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT)
));
@ -129,7 +126,7 @@ describe('recognize()', () => { @@ -129,7 +126,7 @@ describe('recognize()', () => {
const videoDOM = document.createElement('video');
videoDOM.setAttribute('poster', `${IMAGE_PATH}/simple.${format}`);
await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', videoDOM);
const { data: { text } } = await worker.recognize(videoDOM);
expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT)
));
@ -161,7 +158,7 @@ describe('recognize()', () => { @@ -161,7 +158,7 @@ describe('recognize()', () => {
formats.forEach(format => (
it(`support ${format} format`, async () => {
await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', canvasDOM);
const { data: { text } } = await worker.recognize(canvasDOM);
expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT)
));

Loading…
Cancel
Save