From ca99c35d141999e9df1aded1c9df6b861691da95 Mon Sep 17 00:00:00 2001 From: Balearica Date: Sat, 17 Sep 2022 13:50:24 -0700 Subject: [PATCH] Reworked createWorker to be async and throw errors per #654 --- README.md | 3 +- docs/api.md | 32 +- docs/examples.md | 33 +- docs/faq.md | 3 +- docs/local-installation.md | 2 +- examples/browser/basic-edge.html | 18 +- examples/browser/benchmark.html | 3 +- examples/browser/download-pdf.html | 3 +- examples/browser/image-processing.html | 5 +- package-lock.json | 2887 +++++++++++------------- package.json | 4 +- src/Tesseract.js | 6 +- src/createWorker.js | 13 +- src/index.d.ts | 2 +- src/worker-script/index.js | 8 - tests/FS.test.js | 1 - tests/detect.test.js | 13 +- tests/error.test.js | 45 +- tests/recognize.test.js | 14 +- tests/scheduler.test.js | 1 - 20 files changed, 1374 insertions(+), 1722 deletions(-) diff --git a/README.md b/README.md index f945ad9..241972a 100644 --- a/README.md +++ b/README.md @@ -46,12 +46,11 @@ Or more imperative ```javascript import { createWorker } from 'tesseract.js'; -const worker = createWorker({ +const worker = await createWorker({ logger: m => console.log(m) }); (async () => { - await worker.load(); await worker.loadLanguage('eng'); await worker.initialize('eng'); const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); diff --git a/docs/api.md b/docs/api.md index f66e359..8e795b0 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,7 +1,6 @@ # API - [createWorker()](#create-worker) - - [Worker.load](#worker-load) - [Worker.writeText](#worker-writeText) - [Worker.readText](#worker-readText) - [Worker.removeFile](#worker-removeFile) @@ -53,7 +52,7 @@ createWorker is a factory function that creates a tesseract worker, a worker is ```javascript const { createWorker } = Tesseract; -const worker = createWorker({ +const worker = await createWorker({ langPath: '...', logger: m => console.log(m), }); @@ -63,7 +62,6 @@ const worker = createWorker({ A Worker helps you to do the OCR related tasks, it takes few steps to setup Worker before it is fully functional. The full flow is: -- load - FS functions // optional - loadLanguauge - initialize @@ -82,23 +80,6 @@ Each function is async, so using async/await or Promise is required. When it is jobId is generated by Tesseract.js, but you can put your own when calling any of the function above. - -### Worker.load(jobId): Promise - -Worker.load() loads tesseract.js-core scripts (download from remote if not presented), it makes Web Worker/Child Process ready for next action. - -**Arguments:** - -- `jobId` Please see details above - -**Examples:** - -```javascript -(async () => { - await worker.load(); -})(); -``` - ### Worker.writeText(path, text, jobId): Promise @@ -273,8 +254,7 @@ Figures out what words are in `image`, where the words are in `image`, etc. ```javascript const { createWorker } = Tesseract; (async () => { - const worker = createWorker(); - await worker.load(); + const worker = await createWorker(); await worker.loadLanguage('eng'); await worker.initialize('eng'); const { data: { text } } = await worker.recognize(image); @@ -287,8 +267,7 @@ With rectangle ```javascript const { createWorker } = Tesseract; (async () => { - const worker = createWorker(); - await worker.load(); + const worker = await createWorker(); await worker.loadLanguage('eng'); await worker.initialize('eng'); const { data: { text } } = await worker.recognize(image, { @@ -313,8 +292,7 @@ Worker.detect() does OSD (Orientation and Script Detection) to the image instead ```javascript const { createWorker } = Tesseract; (async () => { - const worker = createWorker(); - await worker.load(); + const worker = await createWorker(); await worker.loadLanguage('eng'); await worker.initialize('eng'); const { data } = await worker.detect(image); @@ -361,7 +339,7 @@ Scheduler.addWorker() adds a worker into the worker pool inside scheduler, it is ```javascript const { createWorker, createScheduler } = Tesseract; const scheduler = createScheduler(); -const worker = createWorker(); +const worker = await createWorker(); scheduler.addWorker(worker); ``` diff --git a/docs/examples.md b/docs/examples.md index 21c93c1..188fa13 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -7,10 +7,9 @@ You can also check [examples](../examples) folder. ```javascript const { createWorker } = require('tesseract.js'); -const worker = createWorker(); +const worker = await createWorker(); (async () => { - await worker.load(); await worker.loadLanguage('eng'); await worker.initialize('eng'); const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); @@ -24,12 +23,11 @@ const worker = createWorker(); ```javascript const { createWorker } = require('tesseract.js'); -const worker = createWorker({ +const worker = await createWorker({ logger: m => console.log(m), // Add logger here }); (async () => { - await worker.load(); await worker.loadLanguage('eng'); await worker.initialize('eng'); const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); @@ -43,10 +41,9 @@ const worker = createWorker({ ```javascript const { createWorker } = require('tesseract.js'); -const worker = createWorker(); +const worker = await createWorker(); (async () => { - await worker.load(); await worker.loadLanguage('eng+chi_tra'); await worker.initialize('eng+chi_tra'); const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); @@ -59,10 +56,9 @@ const worker = createWorker(); ```javascript const { createWorker } = require('tesseract.js'); -const worker = createWorker(); +const worker = await createWorker(); (async () => { - await worker.load(); await worker.loadLanguage('eng'); await worker.initialize('eng'); await worker.setParameters({ @@ -81,10 +77,9 @@ Check here for more details of pageseg mode: https://github.com/tesseract-ocr/te ```javascript const { createWorker, PSM } = require('tesseract.js'); -const worker = createWorker(); +const worker = await createWorker(); (async () => { - await worker.load(); await worker.loadLanguage('eng'); await worker.initialize('eng'); await worker.setParameters({ @@ -110,11 +105,10 @@ Node: [download-pdf.js](../examples/node/download-pdf.js) ```javascript const { createWorker } = require('tesseract.js'); -const worker = createWorker(); +const worker = await createWorker(); const rectangle = { left: 0, top: 0, width: 500, height: 250 }; (async () => { - await worker.load(); await worker.loadLanguage('eng'); await worker.initialize('eng'); const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle }); @@ -128,7 +122,7 @@ const rectangle = { left: 0, top: 0, width: 500, height: 250 }; ```javascript const { createWorker } = require('tesseract.js'); -const worker = createWorker(); +const worker = await createWorker(); const rectangles = [ { left: 0, @@ -145,7 +139,6 @@ const rectangles = [ ]; (async () => { - await worker.load(); await worker.loadLanguage('eng'); await worker.initialize('eng'); const values = []; @@ -164,8 +157,8 @@ const rectangles = [ const { createWorker, createScheduler } = require('tesseract.js'); const scheduler = createScheduler(); -const worker1 = createWorker(); -const worker2 = createWorker(); +const worker1 = await createWorker(); +const worker2 = await createWorker(); const rectangles = [ { left: 0, @@ -182,8 +175,6 @@ const rectangles = [ ]; (async () => { - await worker1.load(); - await worker2.load(); await worker1.loadLanguage('eng'); await worker2.loadLanguage('eng'); await worker1.initialize('eng'); @@ -204,12 +195,10 @@ const rectangles = [ const { createWorker, createScheduler } = require('tesseract.js'); const scheduler = createScheduler(); -const worker1 = createWorker(); -const worker2 = createWorker(); +const worker1 = await createWorker(); +const worker2 = await createWorker(); (async () => { - await worker1.load(); - await worker2.load(); await worker1.loadLanguage('eng'); await worker2.loadLanguage('eng'); await worker1.initialize('eng'); diff --git a/docs/faq.md b/docs/faq.md index 8f0738e..900ea7a 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -19,12 +19,11 @@ Starting from 2.0.0-beta.1, you can get all these information in the final resul ```javascript import { createWorker } from 'tesseract.js'; -const worker = createWorker({ +const worker = await createWorker({ logger: m => console.log(m) }); (async () => { - await worker.load(); await worker.loadLanguage('eng'); await worker.initialize('eng'); await worker.setParameters({ diff --git a/docs/local-installation.md b/docs/local-installation.md index 6832bba..f3fd35b 100644 --- a/docs/local-installation.md +++ b/docs/local-installation.md @@ -19,7 +19,7 @@ Tesseract.recognize(image, langs, { Or ```javascript -const worker = createWorker({ +const worker = await createWorker({ workerPath: 'https://unpkg.com/tesseract.js@v2.0.0/dist/worker.min.js', langPath: 'https://tessdata.projectnaptha.com/4.0.0', corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js', diff --git a/examples/browser/basic-edge.html b/examples/browser/basic-edge.html index 11125a7..a2bee27 100644 --- a/examples/browser/basic-edge.html +++ b/examples/browser/basic-edge.html @@ -6,9 +6,9 @@