Browse Source

Update test & examples

develop
Jerome Wu 5 years ago
parent
commit
0e968d1829
  1. 2
      examples/browser/basic.html
  2. 52
      examples/browser/download-pdf.html
  3. 2
      examples/node/detect.js
  4. 22
      examples/node/download-pdf.js
  5. 7
      examples/node/recognize.js
  6. 17
      src/Tesseract.js
  7. 2
      src/createScheduler.js
  8. 9
      src/createWorker.js
  9. 2
      src/index.js
  10. 11
      src/utils/log.js
  11. 5
      src/worker-script/index.js
  12. 6
      tests/detect.test.js
  13. 25
      tests/recognize.test.js

2
examples/browser/basic.html

@ -6,7 +6,7 @@
<input type="file" id="uploader"> <input type="file" id="uploader">
<script> <script>
const recognize = async ({ target: { files } }) => { const recognize = async ({ target: { files } }) => {
const { text } = await Tesseract.recognize(files[0], 'eng', { const { data: { text } } = await Tesseract.recognize(files[0], 'eng', {
corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js', corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js',
logger: m => console.log(m), logger: m => console.log(m),
}); });

52
examples/browser/download-pdf.html

@ -0,0 +1,52 @@
<html>
<head>
<script src="/dist/tesseract.dev.js"></script>
</head>
<body>
<div>
<input type="file" id="uploader">
<button id="download-pdf" disabled="true">Download PDF</button>
</div>
<textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea>
<script>
const { createWorker } = Tesseract;
const worker = createWorker({
corePath: '/node_modules/tesseract.js-core/tesseract-core.wasm.js',
logger: m => console.log(m),
});
const uploader = document.getElementById('uploader');
const dlBtn = document.getElementById('download-pdf');
const recognize = async ({ target: { files } }) => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(files[0]);
const board = document.getElementById('board');
board.value = text;
dlBtn.disabled = false;
};
const downloadPDF = async () => {
const filename = 'tesseract-ocr-result.pdf';
const { data } = await worker.getPDF('Tesseract OCR Result');
const blob = new Blob([new Uint8Array(data)], { type: 'application/pdf' });
if (navigator.msSaveBlob) {
// IE 10+
navigator.msSaveBlob(blob, filename);
} else {
const link = document.createElement('a');
if (link.download !== undefined) {
const url = URL.createObjectURL(blob);
link.setAttribute('href', url);
link.setAttribute('download', filename);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
}
};
uploader.addEventListener('change', recognize);
dlBtn.addEventListener('click', downloadPDF);
</script>
</body>
</html>

2
examples/node/detect.js

@ -8,6 +8,6 @@ const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/c
console.log(`Recognizing ${image}`); console.log(`Recognizing ${image}`);
Tesseract.detect(image, { logger: m => console.log(m) }) Tesseract.detect(image, { logger: m => console.log(m) })
.then((data) => { .then(({ data }) => {
console.log(data); console.log(data);
}); });

22
examples/node/download-pdf.js

@ -0,0 +1,22 @@
#!/usr/bin/env node
const path = require('path');
const fs = require('fs');
const { createWorker } = require('../../');
const [,, imagePath] = process.argv;
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png'));
console.log(`Recognizing ${image}`);
(async () => {
const worker = createWorker();
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(image);
console.log(text);
const { data } = await worker.getPDF('Tesseract OCR Result');
fs.writeFileSync('tesseract-ocr-result.pdf', Buffer.from(data));
console.log('Generate PDF: tesseract-ocr-result.pdf');
await worker.terminate();
})();

7
examples/node/recognize.js

@ -8,11 +8,6 @@ const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/c
console.log(`Recognizing ${image}`); console.log(`Recognizing ${image}`);
Tesseract.recognize(image, 'eng', { logger: m => console.log(m) }) Tesseract.recognize(image, 'eng', { logger: m => console.log(m) })
.then(({ text }) => { .then(({ data: { text } }) => {
console.log(text); console.log(text);
}); });
//Tesseract.detect(image, { logger: m => console.log(m) })
// .then((data) => {
// console.log(data);
// });

17
src/Tesseract.js

@ -1,29 +1,24 @@
const createScheduler = require('./createScheduler');
const createWorker = require('./createWorker'); const createWorker = require('./createWorker');
const recognize = async (image, langs, options) => { const recognize = async (image, langs, options) => {
const scheduler = createScheduler();
const worker = createWorker(options); const worker = createWorker(options);
await worker.load(); await worker.load();
await worker.loadLanguage(langs); await worker.loadLanguage(langs);
await worker.initialize(langs); await worker.initialize(langs);
scheduler.addWorker(worker); return worker.recognize(image)
return scheduler.addJob('recognize', image) .finally(async () => {
.finally(() => { await worker.terminate();
scheduler.terminate();
}); });
}; };
const detect = async (image, options) => { const detect = async (image, options) => {
const scheduler = createScheduler();
const worker = createWorker(options); const worker = createWorker(options);
await worker.load(); await worker.load();
await worker.loadLanguage('osd'); await worker.loadLanguage('osd');
await worker.initialize('osd'); await worker.initialize('osd');
scheduler.addWorker(worker); return worker.detect(image)
return scheduler.addJob('detect', image) .finally(async () => {
.finally(() => { await worker.terminate();
scheduler.terminate();
}); });
}; };

2
src/createScheduler.js

@ -1,5 +1,5 @@
const createJob = require('./createJob'); const createJob = require('./createJob');
const log = require('./utils/log'); const { log } = require('./utils/log');
const getId = require('./utils/getId'); const getId = require('./utils/getId');
let schedulerCounter = 0; let schedulerCounter = 0;

9
src/createWorker.js

@ -1,7 +1,7 @@
const resolvePaths = require('./utils/resolvePaths'); const resolvePaths = require('./utils/resolvePaths');
const circularize = require('./utils/circularize'); const circularize = require('./utils/circularize');
const createJob = require('./createJob'); const createJob = require('./createJob');
const log = require('./utils/log'); const { log } = require('./utils/log');
const getId = require('./utils/getId'); const getId = require('./utils/getId');
const { defaultOEM } = require('./constants/config'); const { defaultOEM } = require('./constants/config');
const { const {
@ -106,9 +106,12 @@ module.exports = (_options = {}) => {
})) }))
); );
const terminate = async () => { const terminate = async (jobId) => {
if (worker !== null) { if (worker !== null) {
await startJob('terminate'); await startJob(createJob({
id: jobId,
action: 'terminate',
}));
terminateWorker(worker); terminateWorker(worker);
worker = null; worker = null;
} }

2
src/index.js

@ -14,6 +14,7 @@ const createJob = require('./createJob');
const Tesseract = require('./Tesseract'); const Tesseract = require('./Tesseract');
const OEM = require('./constants/OEM'); const OEM = require('./constants/OEM');
const PSM = require('./constants/PSM'); const PSM = require('./constants/PSM');
const { setLogging } = require('./utils/log');
module.exports = { module.exports = {
OEM, OEM,
@ -21,5 +22,6 @@ module.exports = {
createScheduler, createScheduler,
createWorker, createWorker,
createJob, createJob,
setLogging,
...Tesseract, ...Tesseract,
}; };

11
src/utils/log.js

@ -1,2 +1,9 @@
module.exports = (typeof process.env !== 'undefined' && process.env.NODE_ENV === 'development') let logging = false;
? console.log : () => {};
exports.logging = logging;
exports.setLogging = (_logging) => {
logging = _logging;
};
exports.log = (...args) => (logging ? console.log.apply(this, args) : null);

5
src/worker-script/index.js

@ -15,7 +15,7 @@ const dump = require('./utils/dump');
const isBrowser = require('../utils/getEnvironment')('type') === 'browser'; const isBrowser = require('../utils/getEnvironment')('type') === 'browser';
const setImage = require('./utils/setImage'); const setImage = require('./utils/setImage');
const defaultParams = require('./constants/defaultParams'); const defaultParams = require('./constants/defaultParams');
const log = require('../utils/log'); const { log, setLogging } = require('../utils/log');
/* /*
* Tesseract Module returned by TesseractCore. * Tesseract Module returned by TesseractCore.
@ -29,7 +29,8 @@ let latestJob;
let adapter = {}; let adapter = {};
let params = defaultParams; let params = defaultParams;
const load = ({ workerId, jobId, payload: { options: { corePath } } }, res) => { const load = ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => {
setLogging(logging);
if (!TessModule) { if (!TessModule) {
const Core = adapter.getCore(corePath, res); const Core = adapter.getCore(corePath, res);

6
tests/detect.test.js

@ -1,7 +1,5 @@
const { createScheduler, createWorker } = Tesseract; const { createWorker } = Tesseract;
const scheduler = createScheduler();
const worker = createWorker(OPTIONS); const worker = createWorker(OPTIONS);
scheduler.addWorker(worker);
before(function cb() { before(function cb() {
this.timeout(0); this.timeout(0);
return worker.load(); return worker.load();
@ -14,7 +12,7 @@ describe('detect()', async () => {
].forEach(async ({ name, ans: { script } }) => { ].forEach(async ({ name, ans: { script } }) => {
await worker.loadLanguage('osd'); await worker.loadLanguage('osd');
await worker.initialize('osd'); await worker.initialize('osd');
const { data: { script: s } } = await scheduler.addJob('detect', `${IMAGE_PATH}/${name}`); const { data: { script: s } } = await worker.detect(`${IMAGE_PATH}/${name}`);
expect(s).to.be(script); expect(s).to.be(script);
}); });
}).timeout(TIMEOUT); }).timeout(TIMEOUT);

25
tests/recognize.test.js

@ -1,7 +1,5 @@
const { createScheduler, createWorker, PSM } = Tesseract; const { createWorker, PSM } = Tesseract;
const scheduler = createScheduler();
const worker = createWorker(OPTIONS); const worker = createWorker(OPTIONS);
scheduler.addWorker(worker);
before(async function cb() { before(async function cb() {
this.timeout(0); this.timeout(0);
await worker.load(); await worker.load();
@ -13,7 +11,7 @@ describe('recognize()', () => {
FORMATS.forEach(format => ( FORMATS.forEach(format => (
it(`support ${format} format`, async () => { it(`support ${format} format`, async () => {
await worker.initialize('eng'); await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', `${IMAGE_PATH}/simple.${format}`); const { data: { text } } = await worker.recognize(`${IMAGE_PATH}/simple.${format}`);
expect(text).to.be(SIMPLE_TEXT); expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT) }).timeout(TIMEOUT)
)); ));
@ -26,7 +24,7 @@ describe('recognize()', () => {
].forEach(({ format, image, ans }) => ( ].forEach(({ format, image, ans }) => (
it(`recongize ${format} in base64`, async () => { it(`recongize ${format} in base64`, async () => {
await worker.initialize('eng'); await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', image); const { data: { text } } = await worker.recognize(image);
expect(text).to.be(ans); expect(text).to.be(ans);
}).timeout(TIMEOUT) }).timeout(TIMEOUT)
)); ));
@ -38,7 +36,7 @@ describe('recognize()', () => {
].forEach(({ name, lang, ans }) => ( ].forEach(({ name, lang, ans }) => (
it(`recongize ${lang}`, async () => { it(`recongize ${lang}`, async () => {
await worker.initialize(lang); await worker.initialize(lang);
const { data: { text } } = await scheduler.addJob('recognize', `${IMAGE_PATH}/${name}`); const { data: { text } } = await worker.recognize(`${IMAGE_PATH}/${name}`);
expect(text).to.be(ans); expect(text).to.be(ans);
}).timeout(TIMEOUT) }).timeout(TIMEOUT)
)); ));
@ -52,7 +50,7 @@ describe('recognize()', () => {
].forEach(({ name, desc, ans }) => ( ].forEach(({ name, desc, ans }) => (
it(`recongize ${desc} image`, async () => { it(`recongize ${desc} image`, async () => {
await worker.initialize('eng'); await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', `${IMAGE_PATH}/${name}`); const { data: { text } } = await worker.recognize(`${IMAGE_PATH}/${name}`);
expect(text).to.be(ans); expect(text).to.be(ans);
}).timeout(TIMEOUT) }).timeout(TIMEOUT)
)); ));
@ -68,8 +66,7 @@ describe('recognize()', () => {
}) => ( }) => (
it(`recongize half ${name}`, async () => { it(`recongize half ${name}`, async () => {
await worker.initialize('eng'); await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob( const { data: { text } } = await worker.recognize(
'recognize',
`${IMAGE_PATH}/${name}`, `${IMAGE_PATH}/${name}`,
{ {
rectangles: [ rectangles: [
@ -94,7 +91,7 @@ describe('recognize()', () => {
await worker.setParameters({ await worker.setParameters({
tessedit_pageseg_mode: mode, tessedit_pageseg_mode: mode,
}); });
const { data } = await scheduler.addJob('recognize', `${IMAGE_PATH}/simple.png`); const { data } = await worker.recognize(`${IMAGE_PATH}/simple.png`);
expect(Object.keys(data).length).not.to.be(0); expect(Object.keys(data).length).not.to.be(0);
}).timeout(TIMEOUT) }).timeout(TIMEOUT)
)); ));
@ -105,7 +102,7 @@ describe('recognize()', () => {
it(`support ${format} format`, async () => { it(`support ${format} format`, async () => {
const buf = fs.readFileSync(path.join(__dirname, 'assets', 'images', `simple.${format}`)); const buf = fs.readFileSync(path.join(__dirname, 'assets', 'images', `simple.${format}`));
await worker.initialize('eng'); await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', buf); const { data: { text } } = await worker.recognize(buf);
expect(text).to.be(SIMPLE_TEXT); expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT) }).timeout(TIMEOUT)
)); ));
@ -117,7 +114,7 @@ describe('recognize()', () => {
const imageDOM = document.createElement('img'); const imageDOM = document.createElement('img');
imageDOM.setAttribute('src', `${IMAGE_PATH}/simple.${format}`); imageDOM.setAttribute('src', `${IMAGE_PATH}/simple.${format}`);
await worker.initialize('eng'); await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', imageDOM); const { data: { text } } = await worker.recognize(imageDOM);
expect(text).to.be(SIMPLE_TEXT); expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT) }).timeout(TIMEOUT)
)); ));
@ -129,7 +126,7 @@ describe('recognize()', () => {
const videoDOM = document.createElement('video'); const videoDOM = document.createElement('video');
videoDOM.setAttribute('poster', `${IMAGE_PATH}/simple.${format}`); videoDOM.setAttribute('poster', `${IMAGE_PATH}/simple.${format}`);
await worker.initialize('eng'); await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', videoDOM); const { data: { text } } = await worker.recognize(videoDOM);
expect(text).to.be(SIMPLE_TEXT); expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT) }).timeout(TIMEOUT)
)); ));
@ -161,7 +158,7 @@ describe('recognize()', () => {
formats.forEach(format => ( formats.forEach(format => (
it(`support ${format} format`, async () => { it(`support ${format} format`, async () => {
await worker.initialize('eng'); await worker.initialize('eng');
const { data: { text } } = await scheduler.addJob('recognize', canvasDOM); const { data: { text } } = await worker.recognize(canvasDOM);
expect(text).to.be(SIMPLE_TEXT); expect(text).to.be(SIMPLE_TEXT);
}).timeout(TIMEOUT) }).timeout(TIMEOUT)
)); ));

Loading…
Cancel
Save