Browse Source

Update rectangles to rectangle as only one region can be assigned, fix #378

pull/379/head
Jerome Wu 5 years ago
parent
commit
945f5d3f35
  1. 8
      docs/api.md
  2. 99
      docs/examples.md
  3. 10
      src/worker-script/index.js
  4. 8
      tests/recognize.test.js

8
docs/api.md

@ -179,7 +179,7 @@ Figures out what words are in `image`, where the words are in `image`, etc. @@ -179,7 +179,7 @@ Figures out what words are in `image`, where the words are in `image`, etc.
- `image` see [Image Format](./image-format.md) for more details.
- `options` a object of customized optons
- `rectangles` an array of objects to specify the region you want to recognized in the image, the object should contain top, left, width and height, see example below.
- `rectangle` an object to specify the region you want to recognized in the image, the object should contain top, left, width and height, see example below.
- `jobId` Please see details above
**Output:**
@ -198,7 +198,7 @@ const { createWorker } = Tesseract; @@ -198,7 +198,7 @@ const { createWorker } = Tesseract;
})();
```
With rectangles
With rectangle
```javascript
const { createWorker } = Tesseract;
@ -208,9 +208,7 @@ const { createWorker } = Tesseract; @@ -208,9 +208,7 @@ const { createWorker } = Tesseract;
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(image, {
rectangles: [
{ top: 0, left: 0, width: 100, height: 100 },
],
rectangle: { top: 0, left: 0, width: 100, height: 100 },
});
console.log(text);
})();

99
docs/examples.md

@ -5,7 +5,7 @@ You can also check [examples](../examples) folder. @@ -5,7 +5,7 @@ You can also check [examples](../examples) folder.
### basic
```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');
const worker = createWorker();
@ -22,7 +22,7 @@ const worker = createWorker(); @@ -22,7 +22,7 @@ const worker = createWorker();
### with detailed progress
```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');
const worker = createWorker({
logger: m => console.log(m), // Add logger here
@ -41,7 +41,7 @@ const worker = createWorker({ @@ -41,7 +41,7 @@ const worker = createWorker({
### with multiple languages, separate by '+'
```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');
const worker = createWorker();
@ -57,7 +57,7 @@ const worker = createWorker(); @@ -57,7 +57,7 @@ const worker = createWorker();
### with whitelist char (^2.0.0-beta.1)
```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');
const worker = createWorker();
@ -79,7 +79,7 @@ const worker = createWorker(); @@ -79,7 +79,7 @@ const worker = createWorker();
Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163
```javascript
import { createWorker, PSM } from 'tesseract.js';
const { createWorker, PSM } = require('tesseract.js');
const worker = createWorker();
@ -103,30 +103,105 @@ Please check **examples** folder for details. @@ -103,30 +103,105 @@ Please check **examples** folder for details.
Browser: [download-pdf.html](../examples/browser/download-pdf.html)
Node: [download-pdf.js](../examples/node/download-pdf.js)
### with only part of the image (^2.0.0-beta.1)
### with only part of the image (^2.0.1)
**One rectangle**
```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');
const worker = createWorker();
const rectangle = { left: 0, top: 0, width: 500, height: 250 };
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle });
console.log(text);
await worker.terminate();
})();
```
**Multiple Rectangles**
```javascript
const { createWorker } = require('tesseract.js');
const worker = createWorker();
const rectangles = [
{ left: 0, top: 0, width: 500, height: 250 },
{
left: 0,
top: 0,
width: 500,
height: 250,
},
{
left: 500,
top: 0,
width: 500,
height: 250,
},
];
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng', { rectangles });
console.log(text);
const values = [];
for (let i = 0; i < rectangles.length; i++) {
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle: rectangles[i] });
values.push(text);
}
console.log(values);
await worker.terminate();
})();
```
**Multiple Rectangles (with scheduler to do recognition in parallel)**
```javascript
const { createWorker, createScheduler } = require('tesseract.js');
const scheduler = createScheduler();
const worker1 = createWorker();
const worker2 = createWorker();
const rectangles = [
{
left: 0,
top: 0,
width: 500,
height: 250,
},
{
left: 500,
top: 0,
width: 500,
height: 250,
},
];
(async () => {
await worker1.load();
await worker2.load();
await worker1.loadLanguage('eng');
await worker2.loadLanguage('eng');
await worker1.initialize('eng');
await worker2.initialize('eng');
scheduler.addWorker(worker1);
scheduler.addWorker(worker2);
const results = await Promise.all(rectangles.map((rectangle) => (
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle })
)));
console.log(results.map(r => r.data.text));
await scheduler.terminate();
})();
```
### with multiple workers to speed up (^2.0.0-beta.1)
```javascript
import { createWorker, createScheduler } from 'tesseract.js';
const { createWorker, createScheduler } = require('tesseract.js');
const scheduler = createScheduler();
const worker1 = createWorker();
@ -143,7 +218,7 @@ const worker2 = createWorker(); @@ -143,7 +218,7 @@ const worker2 = createWorker();
scheduler.addWorker(worker2);
/** Add 10 recognition jobs */
const results = await Promise.all(Array(10).fill(0).map(() => (
await scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png')
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png')
)))
console.log(results);
await scheduler.terminate(); // It also terminates all workers.

10
src/worker-script/index.js

@ -187,14 +187,12 @@ const initialize = ({ @@ -187,14 +187,12 @@ const initialize = ({
}
};
const recognize = ({ payload: { image, options: { rectangles = [] } } }, res) => {
const recognize = ({ payload: { image, options: { rectangle: rec } } }, res) => {
try {
const ptr = setImage(TessModule, api, image);
rectangles.forEach(({
left, top, width, height,
}) => {
api.SetRectangle(left, top, width, height);
});
if (typeof rec === 'object') {
api.SetRectangle(rec.left, rec.top, rec.width, rec.height);
}
api.Recognize(null);
res.resolve(dump(TessModule, api, params));
TessModule._free(ptr);

8
tests/recognize.test.js

@ -69,11 +69,9 @@ describe('recognize()', () => { @@ -69,11 +69,9 @@ describe('recognize()', () => {
const { data: { text } } = await worker.recognize(
`${IMAGE_PATH}/${name}`,
{
rectangles: [
{
top, left, width, height,
},
],
rectangle: {
top, left, width, height,
},
},
);
expect(text).to.be(ans);

Loading…
Cancel
Save