diff --git a/docs/api.md b/docs/api.md index b9a4f43..ba3b14d 100644 --- a/docs/api.md +++ b/docs/api.md @@ -179,7 +179,7 @@ Figures out what words are in `image`, where the words are in `image`, etc. - `image` see [Image Format](./image-format.md) for more details. - `options` a object of customized optons - - `rectangles` an array of objects to specify the region you want to recognized in the image, the object should contain top, left, width and height, see example below. + - `rectangle` an object to specify the region you want to recognized in the image, the object should contain top, left, width and height, see example below. - `jobId` Please see details above **Output:** @@ -198,7 +198,7 @@ const { createWorker } = Tesseract; })(); ``` -With rectangles +With rectangle ```javascript const { createWorker } = Tesseract; @@ -208,9 +208,7 @@ const { createWorker } = Tesseract; await worker.loadLanguage('eng'); await worker.initialize('eng'); const { data: { text } } = await worker.recognize(image, { - rectangles: [ - { top: 0, left: 0, width: 100, height: 100 }, - ], + rectangle: { top: 0, left: 0, width: 100, height: 100 }, }); console.log(text); })(); diff --git a/docs/examples.md b/docs/examples.md index e01db0a..21c93c1 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -5,7 +5,7 @@ You can also check [examples](../examples) folder. ### basic ```javascript -import { createWorker } from 'tesseract.js'; +const { createWorker } = require('tesseract.js'); const worker = createWorker(); @@ -22,7 +22,7 @@ const worker = createWorker(); ### with detailed progress ```javascript -import { createWorker } from 'tesseract.js'; +const { createWorker } = require('tesseract.js'); const worker = createWorker({ logger: m => console.log(m), // Add logger here @@ -41,7 +41,7 @@ const worker = createWorker({ ### with multiple languages, separate by '+' ```javascript -import { createWorker } from 'tesseract.js'; +const { createWorker } = require('tesseract.js'); const worker = createWorker(); @@ -57,7 +57,7 @@ const worker = createWorker(); ### with whitelist char (^2.0.0-beta.1) ```javascript -import { createWorker } from 'tesseract.js'; +const { createWorker } = require('tesseract.js'); const worker = createWorker(); @@ -79,7 +79,7 @@ const worker = createWorker(); Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163 ```javascript -import { createWorker, PSM } from 'tesseract.js'; +const { createWorker, PSM } = require('tesseract.js'); const worker = createWorker(); @@ -103,30 +103,105 @@ Please check **examples** folder for details. Browser: [download-pdf.html](../examples/browser/download-pdf.html) Node: [download-pdf.js](../examples/node/download-pdf.js) -### with only part of the image (^2.0.0-beta.1) +### with only part of the image (^2.0.1) + +**One rectangle** ```javascript -import { createWorker } from 'tesseract.js'; +const { createWorker } = require('tesseract.js'); + +const worker = createWorker(); +const rectangle = { left: 0, top: 0, width: 500, height: 250 }; + +(async () => { + await worker.load(); + await worker.loadLanguage('eng'); + await worker.initialize('eng'); + const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle }); + console.log(text); + await worker.terminate(); +})(); +``` + +**Multiple Rectangles** + +```javascript +const { createWorker } = require('tesseract.js'); const worker = createWorker(); const rectangles = [ - { left: 0, top: 0, width: 500, height: 250 }, + { + left: 0, + top: 0, + width: 500, + height: 250, + }, + { + left: 500, + top: 0, + width: 500, + height: 250, + }, ]; (async () => { await worker.load(); await worker.loadLanguage('eng'); await worker.initialize('eng'); - const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng', { rectangles }); - console.log(text); + const values = []; + for (let i = 0; i < rectangles.length; i++) { + const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle: rectangles[i] }); + values.push(text); + } + console.log(values); await worker.terminate(); })(); ``` +**Multiple Rectangles (with scheduler to do recognition in parallel)** + +```javascript +const { createWorker, createScheduler } = require('tesseract.js'); + +const scheduler = createScheduler(); +const worker1 = createWorker(); +const worker2 = createWorker(); +const rectangles = [ + { + left: 0, + top: 0, + width: 500, + height: 250, + }, + { + left: 500, + top: 0, + width: 500, + height: 250, + }, +]; + +(async () => { + await worker1.load(); + await worker2.load(); + await worker1.loadLanguage('eng'); + await worker2.loadLanguage('eng'); + await worker1.initialize('eng'); + await worker2.initialize('eng'); + scheduler.addWorker(worker1); + scheduler.addWorker(worker2); + const results = await Promise.all(rectangles.map((rectangle) => ( + scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle }) + ))); + console.log(results.map(r => r.data.text)); + await scheduler.terminate(); +})(); +``` + ### with multiple workers to speed up (^2.0.0-beta.1) ```javascript -import { createWorker, createScheduler } from 'tesseract.js'; +const { createWorker, createScheduler } = require('tesseract.js'); const scheduler = createScheduler(); const worker1 = createWorker(); @@ -143,7 +218,7 @@ const worker2 = createWorker(); scheduler.addWorker(worker2); /** Add 10 recognition jobs */ const results = await Promise.all(Array(10).fill(0).map(() => ( - await scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png') + scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png') ))) console.log(results); await scheduler.terminate(); // It also terminates all workers. diff --git a/src/worker-script/index.js b/src/worker-script/index.js index f157c23..c090416 100644 --- a/src/worker-script/index.js +++ b/src/worker-script/index.js @@ -187,14 +187,12 @@ const initialize = ({ } }; -const recognize = ({ payload: { image, options: { rectangles = [] } } }, res) => { +const recognize = ({ payload: { image, options: { rectangle: rec } } }, res) => { try { const ptr = setImage(TessModule, api, image); - rectangles.forEach(({ - left, top, width, height, - }) => { - api.SetRectangle(left, top, width, height); - }); + if (typeof rec === 'object') { + api.SetRectangle(rec.left, rec.top, rec.width, rec.height); + } api.Recognize(null); res.resolve(dump(TessModule, api, params)); TessModule._free(ptr); diff --git a/tests/recognize.test.js b/tests/recognize.test.js index 6904489..a65f108 100644 --- a/tests/recognize.test.js +++ b/tests/recognize.test.js @@ -69,11 +69,9 @@ describe('recognize()', () => { const { data: { text } } = await worker.recognize( `${IMAGE_PATH}/${name}`, { - rectangles: [ - { - top, left, width, height, - }, - ], + rectangle: { + top, left, width, height, + }, }, ); expect(text).to.be(ans);