@ -5,7 +5,7 @@ You can also check [examples](../examples) folder.
@@ -5,7 +5,7 @@ You can also check [examples](../examples) folder.
### basic
```javascript
import { createWorker } from 'tesseract.js' ;
const { createWorker } = require('tesseract.js') ;
const worker = createWorker();
@ -22,7 +22,7 @@ const worker = createWorker();
@@ -22,7 +22,7 @@ const worker = createWorker();
### with detailed progress
```javascript
import { createWorker } from 'tesseract.js' ;
const { createWorker } = require('tesseract.js') ;
const worker = createWorker({
logger: m => console.log(m), // Add logger here
@ -41,7 +41,7 @@ const worker = createWorker({
@@ -41,7 +41,7 @@ const worker = createWorker({
### with multiple languages, separate by '+'
```javascript
import { createWorker } from 'tesseract.js' ;
const { createWorker } = require('tesseract.js') ;
const worker = createWorker();
@ -57,7 +57,7 @@ const worker = createWorker();
@@ -57,7 +57,7 @@ const worker = createWorker();
### with whitelist char (^2.0.0-beta.1)
```javascript
import { createWorker } from 'tesseract.js' ;
const { createWorker } = require('tesseract.js') ;
const worker = createWorker();
@ -79,7 +79,7 @@ const worker = createWorker();
@@ -79,7 +79,7 @@ const worker = createWorker();
Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163
```javascript
import { createWorker, PSM } from 'tesseract.js' ;
const { createWorker, PSM } = require('tesseract.js') ;
const worker = createWorker();
@ -103,30 +103,105 @@ Please check **examples** folder for details.
@@ -103,30 +103,105 @@ Please check **examples** folder for details.
Browser: [download-pdf.html ](../examples/browser/download-pdf.html )
Node: [download-pdf.js ](../examples/node/download-pdf.js )
### with only part of the image (^2.0.0-beta.1)
### with only part of the image (^2.0.1)
**One rectangle**
```javascript
import { createWorker } from 'tesseract.js';
const { createWorker } = require('tesseract.js');
const worker = createWorker();
const rectangle = { left: 0, top: 0, width: 500, height: 250 };
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle });
console.log(text);
await worker.terminate();
})();
```
**Multiple Rectangles**
```javascript
const { createWorker } = require('tesseract.js');
const worker = createWorker();
const rectangles = [
{ left: 0, top: 0, width: 500, height: 250 },
{
left: 0,
top: 0,
width: 500,
height: 250,
},
{
left: 500,
top: 0,
width: 500,
height: 250,
},
];
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', 'eng', { rectangles });
console.log(text);
const values = [];
for (let i = 0; i < rectangles.length ; i + + ) {
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle: rectangles[i] });
values.push(text);
}
console.log(values);
await worker.terminate();
})();
```
**Multiple Rectangles (with scheduler to do recognition in parallel)**
```javascript
const { createWorker, createScheduler } = require('tesseract.js');
const scheduler = createScheduler();
const worker1 = createWorker();
const worker2 = createWorker();
const rectangles = [
{
left: 0,
top: 0,
width: 500,
height: 250,
},
{
left: 500,
top: 0,
width: 500,
height: 250,
},
];
(async () => {
await worker1.load();
await worker2.load();
await worker1.loadLanguage('eng');
await worker2.loadLanguage('eng');
await worker1.initialize('eng');
await worker2.initialize('eng');
scheduler.addWorker(worker1);
scheduler.addWorker(worker2);
const results = await Promise.all(rectangles.map((rectangle) => (
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle })
)));
console.log(results.map(r => r.data.text));
await scheduler.terminate();
})();
```
### with multiple workers to speed up (^2.0.0-beta.1)
```javascript
import { createWorker, createScheduler } from 'tesseract.js';
const { createWorker, createScheduler } = require('tesseract.js') ;
const scheduler = createScheduler();
const worker1 = createWorker();
@ -143,7 +218,7 @@ const worker2 = createWorker();
@@ -143,7 +218,7 @@ const worker2 = createWorker();
scheduler.addWorker(worker2);
/** Add 10 recognition jobs */
const results = await Promise.all(Array(10).fill(0).map(() => (
await scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png')
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png')
)))
console.log(results);
await scheduler.terminate(); // It also terminates all workers.