Compare commits
14 Commits
master
...
support/1.
Author | SHA1 | Date |
---|---|---|
Jerome Wu | aba237af2e | 6 years ago |
Jerome Wu | a26566be04 | 6 years ago |
Jerome Wu | 55e355eff5 | 6 years ago |
Jerome Wu | 1f497271b5 | 6 years ago |
jeromewu | 1a12ead46f | 6 years ago |
Urs Wolfer | 5c930514f5 | 6 years ago |
jeromewu | 9268572644 | 6 years ago |
HoldYourWaffle | 7911518b39 | 6 years ago |
Jerome Wu | 613a19c7e1 | 6 years ago |
Jerome Wu | 07ea31a9cd | 6 years ago |
Jerome Wu | 741ff413b3 | 6 years ago |
Jerome Wu | cdb86c694a | 6 years ago |
Jerome Wu | 06d32c6804 | 6 years ago |
Jerome Wu | 8e1b21cd2c | 6 years ago |
@ -1,17 +0,0 @@ |
|||||||
{ |
|
||||||
"extends": "airbnb-base", |
|
||||||
"parser": "babel-eslint", |
|
||||||
"env": { |
|
||||||
"browser": true, |
|
||||||
"node": true, |
|
||||||
"mocha": true, |
|
||||||
"worker": true |
|
||||||
}, |
|
||||||
"rules": { |
|
||||||
"no-underscore-dangle": 0, |
|
||||||
"no-console": 0, |
|
||||||
"global-require": 0, |
|
||||||
"camelcase": 0, |
|
||||||
"no-control-regex": 0 |
|
||||||
} |
|
||||||
} |
|
@ -1,9 +0,0 @@ |
|||||||
# These are supported funding model platforms |
|
||||||
|
|
||||||
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] |
|
||||||
patreon: # Replace with a single Patreon username |
|
||||||
open_collective: tesseractjs |
|
||||||
ko_fi: # Replace with a single Ko-fi username |
|
||||||
tidelift: npm/tesseract.js |
|
||||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry |
|
||||||
custom: ["https://etherscan.io/address/0x74ace8c74535d6dac03ebdc708ca2fba54796ef2"] |
|
@ -1,38 +0,0 @@ |
|||||||
--- |
|
||||||
name: Bug report |
|
||||||
about: Create a report to help us improve |
|
||||||
title: '' |
|
||||||
labels: '' |
|
||||||
assignees: '' |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
**Describe the bug** |
|
||||||
A clear and concise description of what the bug is. |
|
||||||
|
|
||||||
**To Reproduce** |
|
||||||
Steps to reproduce the behavior: |
|
||||||
1. Go to '...' |
|
||||||
2. Click on '....' |
|
||||||
3. Scroll down to '....' |
|
||||||
4. See error |
|
||||||
|
|
||||||
**Expected behavior** |
|
||||||
A clear and concise description of what you expected to happen. |
|
||||||
|
|
||||||
**Screenshots** |
|
||||||
If applicable, add screenshots to help explain your problem. |
|
||||||
|
|
||||||
**Desktop (please complete the following information):** |
|
||||||
- OS: [e.g. iOS] |
|
||||||
- Browser [e.g. chrome, safari] |
|
||||||
- Version [e.g. 22] |
|
||||||
|
|
||||||
**Smartphone (please complete the following information):** |
|
||||||
- Device: [e.g. iPhone6] |
|
||||||
- OS: [e.g. iOS8.1] |
|
||||||
- Browser [e.g. stock browser, safari] |
|
||||||
- Version [e.g. 22] |
|
||||||
|
|
||||||
**Additional context** |
|
||||||
Add any other context about the problem here. |
|
@ -1,20 +0,0 @@ |
|||||||
--- |
|
||||||
name: Feature request |
|
||||||
about: Suggest an idea for this project |
|
||||||
title: '' |
|
||||||
labels: '' |
|
||||||
assignees: '' |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
**Is your feature request related to a problem? Please describe.** |
|
||||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] |
|
||||||
|
|
||||||
**Describe the solution you'd like** |
|
||||||
A clear and concise description of what you want to happen. |
|
||||||
|
|
||||||
**Describe alternatives you've considered** |
|
||||||
A clear and concise description of any alternative solutions or features you've considered. |
|
||||||
|
|
||||||
**Additional context** |
|
||||||
Add any other context or screenshots about the feature request here. |
|
@ -1,5 +0,0 @@ |
|||||||
## Security contact information |
|
||||||
|
|
||||||
To report a security vulnerability, please use the |
|
||||||
[Tidelift security contact](https://tidelift.com/security). |
|
||||||
Tidelift will coordinate the fix and disclosure. |
|
@ -1,29 +0,0 @@ |
|||||||
# This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node |
|
||||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions |
|
||||||
|
|
||||||
name: Node.js CI |
|
||||||
|
|
||||||
on: |
|
||||||
push: |
|
||||||
branches: [ master ] |
|
||||||
pull_request: |
|
||||||
branches: [ master ] |
|
||||||
|
|
||||||
jobs: |
|
||||||
build: |
|
||||||
|
|
||||||
runs-on: ubuntu-latest |
|
||||||
|
|
||||||
strategy: |
|
||||||
matrix: |
|
||||||
node-version: [14.x, 16.x] |
|
||||||
|
|
||||||
steps: |
|
||||||
- uses: actions/checkout@v2 |
|
||||||
- name: Use Node.js ${{ matrix.node-version }} |
|
||||||
uses: actions/setup-node@v1 |
|
||||||
with: |
|
||||||
node-version: ${{ matrix.node-version }} |
|
||||||
- run: npm ci |
|
||||||
- run: npm run lint |
|
||||||
- run: npm test |
|
@ -1,2 +0,0 @@ |
|||||||
FROM gitpod/workspace-full |
|
||||||
RUN sudo apt-get update && sudo apt-get install -y libgtk-3-0 libx11-xcb1 libnss3 libxss1 libasound2 |
|
@ -1,9 +0,0 @@ |
|||||||
image: |
|
||||||
file: .gitpod.Dockerfile |
|
||||||
tasks: |
|
||||||
- command: gp await-port 3000 && sleep 3 && gp preview $(gp url 3000)/examples/browser/demo.html |
|
||||||
- init: npm install |
|
||||||
command: npm start |
|
||||||
ports: |
|
||||||
- port: 3000 |
|
||||||
onOpen: ignore |
|
@ -1,197 +1,303 @@ |
|||||||
<p align="center"> |
# [Tesseract.js](http://tesseract.projectnaptha.com/) |
||||||
<a href="https://tesseract.projectnaptha.com/"><img width="256px" height="256px" alt="Tesseract.js" src="./docs/images/tesseract.png"></a> |
|
||||||
</p> |
|
||||||
|
|
||||||
![Lint & Test](https://github.com/naptha/tesseract.js/workflows/Node.js%20CI/badge.svg) |
|
||||||
![CodeQL](https://github.com/naptha/tesseract.js/workflows/CodeQL/badge.svg) |
|
||||||
[![Gitpod Ready-to-Code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://github.com/naptha/tesseract.js) |
|
||||||
[![Financial Contributors on Open Collective](https://opencollective.com/tesseractjs/all/badge.svg?label=financial+contributors)](https://opencollective.com/tesseractjs) [![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js) |
|
||||||
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/naptha/tesseract.js/graphs/commit-activity) |
|
||||||
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) |
|
||||||
[![Code Style](https://badgen.net/badge/code%20style/airbnb/ff5a5f?icon=airbnb)](https://github.com/airbnb/javascript) |
|
||||||
[![Downloads Total](https://img.shields.io/npm/dt/tesseract.js.svg)](https://www.npmjs.com/package/tesseract.js) |
|
||||||
[![Downloads Month](https://img.shields.io/npm/dm/tesseract.js.svg)](https://www.npmjs.com/package/tesseract.js) |
|
||||||
|
|
||||||
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/)) |
|
||||||
|
|
||||||
Image Recognition |
[![NPM version][tesseractjs-npm-image]][tesseractjs-npm-url] |
||||||
|
|
||||||
|
[tesseractjs-npm-image]: https://img.shields.io/npm/v/tesseract.js.svg |
||||||
|
[tesseractjs-npm-url]: https://npmjs.org/package/tesseract.js |
||||||
|
|
||||||
|
**Tesseract.js v2 alpha is now available!! Check [HERE](https://github.com/naptha/tesseract.js) for more information.** |
||||||
|
|
||||||
[![fancy demo gif](./docs/images/demo.gif)](http://tesseract.projectnaptha.com) |
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/)) |
||||||
|
|
||||||
|
[![fancy demo gif](https://raw.githubusercontent.com/naptha/tesseract.js/support/1.x/docs/images/demo.gif)](http://tesseract.projectnaptha.com) |
||||||
|
|
||||||
Video Real-time Recognition |
Tesseract.js works with script tags, [webpack](https://webpack.js.org/)/[Browserify](http://browserify.org/), and [Node.js](https://nodejs.org/en/). [After you install it](#installation), using it is as simple as |
||||||
|
|
||||||
<p align="center"> |
```javascript |
||||||
<a href="https://github.com/jeromewu/tesseract.js-video"><img alt="Tesseract.js Video" src="./docs/images/video-demo.gif"></a> |
Tesseract.recognize(myImage) |
||||||
</p> |
.progress(function (p) { console.log('progress', p) }) |
||||||
|
.then(function (result) { console.log('result', result) }) |
||||||
|
``` |
||||||
|
|
||||||
|
[Check out the docs](#docs) for a full treatment of the API. |
||||||
|
|
||||||
|
## Provenance |
||||||
Tesseract.js wraps an [emscripten](https://github.com/kripken/emscripten) [port](https://github.com/naptha/tesseract.js-core) of the [Tesseract](https://github.com/tesseract-ocr/tesseract) [OCR](https://en.wikipedia.org/wiki/Optical_character_recognition) Engine. |
Tesseract.js wraps an [emscripten](https://github.com/kripken/emscripten) [port](https://github.com/naptha/tesseract.js-core) of the [Tesseract](https://github.com/tesseract-ocr/tesseract) [OCR](https://en.wikipedia.org/wiki/Optical_character_recognition) Engine. |
||||||
It works in the browser using [webpack](https://webpack.js.org/) or plain script tags with a [CDN](#CDN) and on the server with [Node.js](https://nodejs.org/en/). |
|
||||||
After you [install it](#installation), using it is as simple as: |
|
||||||
|
|
||||||
|
|
||||||
|
# Installation |
||||||
|
|
||||||
|
Tesseract.js works with a `<script>` tag via local copy or CDN, with webpack and Browserify via `npm`, and on Node.js via `npm`. [Check out the docs](#docs) for a full treatment of the API. |
||||||
|
|
||||||
|
## <script /> |
||||||
|
|
||||||
|
You can simply include Tesseract.js with a CDN like this: |
||||||
|
```html |
||||||
|
<script src='https://cdn.jsdelivr.net/gh/naptha/tesseract.js@v1.0.14/dist/tesseract.min.js'></script> |
||||||
|
``` |
||||||
|
|
||||||
|
After including your scripts, the `Tesseract` variable will be defined globally! |
||||||
|
|
||||||
|
## Dependency |
||||||
|
First: |
||||||
|
```shell |
||||||
|
> yarn add tesseract.js |
||||||
|
``` |
||||||
|
or |
||||||
|
``` |
||||||
|
> npm install tesseract.js --save |
||||||
|
``` |
||||||
|
> Note: Tesseract.js currently requires Node.js v6.8.0 or higher. |
||||||
|
|
||||||
|
|
||||||
|
## Usage |
||||||
|
```javascript |
||||||
|
var Tesseract = require('tesseract.js') |
||||||
|
``` |
||||||
|
|
||||||
|
or |
||||||
|
```javascript |
||||||
|
import Tesseract from 'tesseract.js' |
||||||
|
``` |
||||||
|
|
||||||
|
|
||||||
|
# Docs |
||||||
|
|
||||||
|
* [Tesseract.recognize](#tesseractrecognizeimage-imagelike-options---tesseractjob) |
||||||
|
+ [Simple Example](#simple-example) |
||||||
|
+ [More Complicated Example](#more-complicated-example) |
||||||
|
* [Tesseract.detect](#tesseractdetectimage-imagelike---tesseractjob) |
||||||
|
* [ImageLike](#imagelike) |
||||||
|
* [TesseractJob](#tesseractjob) |
||||||
|
+ [TesseractJob.progress](#tesseractjobprogresscallback-function---tesseractjob) |
||||||
|
+ [TesseractJob.then](#tesseractjobthencallback-function---tesseractjob) |
||||||
|
+ [TesseractJob.catch](#tesseractjobcatchcallback-function---tesseractjob) |
||||||
|
+ [TesseractJob.finally](#tesseractjobfinallycallback-function---tesseractjob) |
||||||
|
* [Local Installation](#local-installation) |
||||||
|
+ [corePath](#corepath) |
||||||
|
+ [workerPath](#workerpath) |
||||||
|
+ [langPath](#langpath) |
||||||
|
* [Contributing](#contributing) |
||||||
|
+ [Development](#development) |
||||||
|
+ [Building Static Files](#building-static-files) |
||||||
|
+ [Send us a Pull Request!](#send-us-a-pull-request) |
||||||
|
|
||||||
|
|
||||||
|
## Tesseract.recognize(image: [ImageLike](#imagelike)[, options]) -> [TesseractJob](#tesseractjob) |
||||||
|
Figures out what words are in `image`, where the words are in `image`, etc. |
||||||
|
> Note: `image` should be sufficiently high resolution. |
||||||
|
> Often, the same image will get much better results if you upscale it before calling `recognize`. |
||||||
|
|
||||||
|
- `image` is any [ImageLike](#imagelike) object. |
||||||
|
- `options` is either absent (in which case it is interpreted as `'eng'`), a string specifing a language short code from the [language list](./docs/tesseract_lang_list.md), or a flat json object that may: |
||||||
|
+ include properties that override some subset of the [default tesseract parameters](./docs/tesseract_parameters.md) |
||||||
|
+ include a `lang` property with a value from the [list of lang parameters](./docs/tesseract_lang_list.md) |
||||||
|
|
||||||
|
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, `catch` and `finally` methods can be used to act on the result. |
||||||
|
|
||||||
|
### Simple Example: |
||||||
```javascript |
```javascript |
||||||
import Tesseract from 'tesseract.js'; |
Tesseract.recognize(myImage) |
||||||
|
.then(function(result){ |
||||||
Tesseract.recognize( |
console.log(result) |
||||||
'https://tesseract.projectnaptha.com/img/eng_bw.png', |
}) |
||||||
'eng', |
``` |
||||||
{ logger: m => console.log(m) } |
|
||||||
).then(({ data: { text } }) => { |
### More Complicated Example: |
||||||
console.log(text); |
```javascript |
||||||
|
// if we know our image is of spanish words without the letter 'e': |
||||||
|
Tesseract.recognize(myImage, { |
||||||
|
lang: 'spa', |
||||||
|
tessedit_char_blacklist: 'e' |
||||||
|
}) |
||||||
|
.then(function(result){ |
||||||
|
console.log(result) |
||||||
}) |
}) |
||||||
``` |
``` |
||||||
|
|
||||||
Or more imperative |
|
||||||
|
|
||||||
|
|
||||||
|
## Tesseract.detect(image: [ImageLike](#imagelike)) -> [TesseractJob](#tesseractjob) |
||||||
|
|
||||||
|
Figures out what script (e.g. 'Latin', 'Chinese') the words in image are written in. |
||||||
|
|
||||||
|
- `image` is any [ImageLike](#imagelike) object. |
||||||
|
|
||||||
|
Returns a [TesseractJob](#tesseractjob) whose `then`, `progress`, `catch` and `finally` methods can be used to act on the result of the script. |
||||||
|
|
||||||
|
|
||||||
```javascript |
```javascript |
||||||
import { createWorker } from 'tesseract.js'; |
Tesseract.detect(myImage) |
||||||
|
.then(function(result){ |
||||||
const worker = createWorker({ |
console.log(result) |
||||||
logger: m => console.log(m) |
}) |
||||||
}); |
``` |
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
|
||||||
console.log(text); |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
[Check out the docs](#documentation) for a full explanation of the API. |
|
||||||
|
|
||||||
## Major changes in v3 |
|
||||||
- Significantly faster performance |
|
||||||
- Runtime reduction of 84% for Browser and 96% for Node.js when recognizing the [example images](./examples/data) |
|
||||||
- Upgrade to Tesseract v5.1.0 (using emscripten 3.1.18) |
|
||||||
- Added SIMD-enabled build for supported devices |
|
||||||
- Added support: |
|
||||||
- Node.js version 18 |
|
||||||
- Removed support: |
|
||||||
- ASM.js version, any other old versions of Tesseract.js-core (<3.0.0) |
|
||||||
- Node.js versions 10 and 12 |
|
||||||
|
|
||||||
## Major changes in v2 |
|
||||||
- Upgrade to tesseract v4.1.1 (using emscripten 1.39.10 upstream) |
|
||||||
- Support multiple languages at the same time, eg: eng+chi\_tra for English and Traditional Chinese |
|
||||||
- Supported image formats: png, jpg, bmp, pbm |
|
||||||
- Support WebAssembly (fallback to ASM.js when browser doesn't support) |
|
||||||
- Support Typescript |
|
||||||
|
|
||||||
Read a story about v2: <a href="https://jeromewu.github.io/why-i-refactor-tesseract.js-v2/">Why I refactor tesseract.js v2?</a><br> |
|
||||||
Check the <a href="https://github.com/naptha/tesseract.js/tree/support/1.x">support/1.x</a> branch for version 1 |
|
||||||
## Installation |
|
||||||
Tesseract.js works with a `<script>` tag via local copy or CDN, with webpack via `npm` and on Node.js with `npm/yarn`. |
|
||||||
|
|
||||||
|
|
||||||
### CDN |
|
||||||
```html |
|
||||||
<!-- v2 --> |
|
||||||
<script src='https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js'></script> |
|
||||||
|
|
||||||
<!-- v1 --> |
|
||||||
<script src='https://unpkg.com/tesseract.js@1.0.19/src/index.js'></script> |
## ImageLike |
||||||
|
|
||||||
|
The main Tesseract.js functions take an `image` parameter, which should be something that is like an image. What's considered "image-like" differs depending on whether it is being run from the browser or through NodeJS. |
||||||
|
|
||||||
|
|
||||||
|
On a browser, an image can be: |
||||||
|
- an `img`, `video`, or `canvas` element |
||||||
|
- a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`) |
||||||
|
- a `File` object (from a file `<input>` or drag-drop event) |
||||||
|
- a `Blob` object |
||||||
|
- a `ImageData` instance (an object containing `width`, `height` and `data` properties) |
||||||
|
- a path or URL to an accessible image (the image must either be hosted locally or accessible by CORS) |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
In Node.js, an image can be |
||||||
|
- a path to a local image |
||||||
|
- a `Buffer` instance containing a `PNG` or `JPEG` image |
||||||
|
- a `ImageData` instance (an object containing `width`, `height` and `data` properties) |
||||||
|
|
||||||
|
|
||||||
|
## TesseractJob |
||||||
|
|
||||||
|
A TesseractJob is an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. It also provides `finally` method, which will be fired regardless of the job fate. One important difference is that these methods return the job itself (to enable chaining) rather than new. |
||||||
|
|
||||||
|
Typical use is: |
||||||
|
```javascript |
||||||
|
Tesseract.recognize(myImage) |
||||||
|
.progress(message => console.log(message)) |
||||||
|
.catch(err => console.error(err)) |
||||||
|
.then(result => console.log(result)) |
||||||
|
.finally(resultOrError => console.log(resultOrError)) |
||||||
``` |
``` |
||||||
After including the script the `Tesseract` variable will be globally available. |
|
||||||
|
|
||||||
|
Which is equivalent to: |
||||||
|
```javascript |
||||||
|
var job1 = Tesseract.recognize(myImage); |
||||||
|
|
||||||
### Node.js |
job1.progress(message => console.log(message)); |
||||||
|
|
||||||
**Tesseract.js v3 requires Node.js v14 or higher** |
job1.catch(err => console.error(err)); |
||||||
|
|
||||||
```shell |
job1.then(result => console.log(result)); |
||||||
# For v3 |
|
||||||
npm install tesseract.js |
|
||||||
yarn add tesseract.js |
|
||||||
|
|
||||||
# For v2 |
job1.finally(resultOrError => console.log(resultOrError)); |
||||||
npm install tesseract.js@2 |
|
||||||
yarn add tesseract.js@2 |
|
||||||
``` |
``` |
||||||
|
|
||||||
|
|
||||||
## Documentation |
|
||||||
|
|
||||||
* [Examples](./docs/examples.md) |
### TesseractJob.progress(callback: function) -> TesseractJob |
||||||
* [Image Format](./docs/image-format.md) |
Sets `callback` as the function that will be called every time the job progresses. |
||||||
* [API](./docs/api.md) |
- `callback` is a function with the signature `callback(progress)` where `progress` is a json object. |
||||||
* [Local Installation](./docs/local-installation.md) |
|
||||||
* [FAQ](./docs/faq.md) |
|
||||||
|
|
||||||
## Use tesseract.js the way you like! |
For example: |
||||||
|
```javascript |
||||||
|
Tesseract.recognize(myImage) |
||||||
|
.progress(function(message){console.log('progress is: ', message)}) |
||||||
|
``` |
||||||
|
|
||||||
- Offline Version: https://github.com/jeromewu/tesseract.js-offline |
The console will show something like: |
||||||
- Electron Version: https://github.com/jeromewu/tesseract.js-electron |
```javascript |
||||||
- Custom Traineddata: https://github.com/jeromewu/tesseract.js-custom-traineddata |
progress is: {loaded_lang_model: "eng", from_cache: true} |
||||||
- Chrome Extension #1: https://github.com/jeromewu/tesseract.js-chrome-extension |
progress is: {initialized_with_lang: "eng"} |
||||||
- Chrome Extension #2: https://github.com/fxnoob/image-to-text |
progress is: {set_variable: Object} |
||||||
- Firefox Extension: https://github.com/gnonio/korporize |
progress is: {set_variable: Object} |
||||||
- With Vue: https://github.com/jeromewu/tesseract.js-vue-app |
progress is: {recognized: 0} |
||||||
- With Angular: https://github.com/jeromewu/tesseract.js-angular-app |
progress is: {recognized: 0.3} |
||||||
- With React: https://github.com/jeromewu/tesseract.js-react-app |
progress is: {recognized: 0.6} |
||||||
- Typescript: https://github.com/jeromewu/tesseract.js-typescript |
progress is: {recognized: 0.9} |
||||||
- Video Real-time Recognition: https://github.com/jeromewu/tesseract.js-video |
progress is: {recognized: 1} |
||||||
|
``` |
||||||
|
|
||||||
## Contributing |
|
||||||
|
|
||||||
### Development |
### TesseractJob.then(callback: function) -> TesseractJob |
||||||
To run a development copy of Tesseract.js do the following: |
Sets `callback` as the function that will be called if and when the job successfully completes. |
||||||
```shell |
- `callback` is a function with the signature `callback(result)` where `result` is a json object. |
||||||
# First we clone the repository |
|
||||||
git clone https://github.com/naptha/tesseract.js.git |
|
||||||
cd tesseract.js |
|
||||||
|
|
||||||
# Then we install the dependencies |
|
||||||
npm install |
|
||||||
|
|
||||||
# And finally we start the development server |
For example: |
||||||
npm start |
```javascript |
||||||
|
Tesseract.recognize(myImage) |
||||||
|
.then(function(result){console.log('result is: ', result)}) |
||||||
``` |
``` |
||||||
|
|
||||||
The development server will be available at http://localhost:3000/examples/browser/demo.html in your favorite browser. |
The console will show something like: |
||||||
It will automatically rebuild `tesseract.dev.js` and `worker.dev.js` when you change files in the **src** folder. |
```javascript |
||||||
|
result is: { |
||||||
|
blocks: Array[1] |
||||||
|
confidence: 87 |
||||||
|
html: "<div class='ocr_page' id='page_1' ..." |
||||||
|
lines: Array[3] |
||||||
|
oem: "DEFAULT" |
||||||
|
paragraphs: Array[1] |
||||||
|
psm: "SINGLE_BLOCK" |
||||||
|
symbols: Array[33] |
||||||
|
text: "Hello World↵from beyond↵the Cosmic Void↵↵" |
||||||
|
version: "3.04.00" |
||||||
|
words: Array[7] |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
### Online Setup with a single Click |
### TesseractJob.catch(callback: function) -> TesseractJob |
||||||
|
Sets `callback` as the function that will be called if the job fails. |
||||||
|
- `callback` is a function with the signature `callback(error)` where `error` is a json object. |
||||||
|
|
||||||
You can use Gitpod(A free online VS Code like IDE) for contributing. With a single click it will launch a ready to code workspace with the build & start scripts already in process and within a few seconds it will spin up the dev server so that you can start contributing straight away without wasting any time. |
### TesseractJob.finally(callback: function) -> TesseractJob |
||||||
|
Sets `callback` as the function that will be called regardless if the job fails or success. |
||||||
|
- `callback` is a function with the signature `callback(resultOrError)` where `resultOrError` is a json object. |
||||||
|
|
||||||
[![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/naptha/tesseract.js/blob/master/examples/browser/demo.html) |
## Local Installation |
||||||
|
|
||||||
### Building Static Files |
In the browser, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN. |
||||||
To build the compiled static files just execute the following: |
|
||||||
```shell |
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can use the `Tesseract.create` function which allows you to specify custom paths for workers, languages, and core. |
||||||
npm run build |
|
||||||
|
```javascript |
||||||
|
window.Tesseract = Tesseract.create({ |
||||||
|
workerPath: '/path/to/worker.js', |
||||||
|
langPath: 'https://tessdata.projectnaptha.com/3.02/', |
||||||
|
corePath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@0.1.0/index.js', |
||||||
|
}) |
||||||
``` |
``` |
||||||
This will output the files into the `dist` directory. |
|
||||||
|
|
||||||
## Contributors |
### corePath |
||||||
|
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@0.1.0/index.js'. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file. |
||||||
|
|
||||||
### Code Contributors |
### workerPath |
||||||
|
A string specifying the location of the [worker.js](./dist/worker.js) file. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use a different file. |
||||||
|
|
||||||
This project exists thanks to all the people who contribute. [[Contribute](CONTRIBUTING.md)]. |
### langPath |
||||||
<a href="https://github.com/naptha/tesseract.js/graphs/contributors"><img src="https://opencollective.com/tesseractjs/contributors.svg?width=890&button=false" /></a> |
A string specifying the location of the tesseract language files, with default value 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/3.02/'. Language file URLs are calculated according to the formula `langPath + langCode + '.traineddata.gz'`. Set this string before calling `Tesseract.recognize` and `Tesseract.detect` if you want Tesseract.js to use different language files. |
||||||
|
|
||||||
### Financial Contributors |
|
||||||
|
|
||||||
Become a financial contributor and help us sustain our community. [[Contribute](https://opencollective.com/tesseractjs/contribute)] |
## Contributing |
||||||
|
### Development |
||||||
|
To run a development copy of tesseract.js, first clone this repo. |
||||||
|
```shell |
||||||
|
> git clone https://github.com/naptha/tesseract.js.git |
||||||
|
``` |
||||||
|
|
||||||
|
Then, `cd tesseract.js && npm install && npm start` |
||||||
|
```shell |
||||||
|
> cd tesseract.js |
||||||
|
> npm install && npm start |
||||||
|
|
||||||
#### Individuals |
... a bunch of npm stuff ... |
||||||
|
|
||||||
<a href="https://opencollective.com/tesseractjs"><img src="https://opencollective.com/tesseractjs/individuals.svg?width=890"></a> |
Starting up http-server, serving ./ |
||||||
|
Available on: |
||||||
|
http://127.0.0.1:7355 |
||||||
|
http://[your ip]:7355 |
||||||
|
|
||||||
#### Organizations |
``` |
||||||
|
|
||||||
|
Then open `http://localhost:7355/examples/file-input/demo.html` in your favorite browser. The devServer automatically rebuilds `tesseract.js` and `tesseract.worker.js` when you change files in the src folder. |
||||||
|
|
||||||
Support this project with your organization. Your logo will show up here with a link to your website. [[Contribute](https://opencollective.com/tesseractjs/contribute)] |
### Building Static Files |
||||||
|
After you've cloned the repo and run `npm install` as described in the [Development Section](#development), you can build static library files in the dist folder with |
||||||
|
```shell |
||||||
|
> npm run build |
||||||
|
``` |
||||||
|
|
||||||
<a href="https://opencollective.com/tesseractjs/organization/0/website"><img src="https://opencollective.com/tesseractjs/organization/0/avatar.svg"></a> |
### Send us a Pull Request! |
||||||
<a href="https://opencollective.com/tesseractjs/organization/1/website"><img src="https://opencollective.com/tesseractjs/organization/1/avatar.svg"></a> |
Thanks :) |
||||||
<a href="https://opencollective.com/tesseractjs/organization/2/website"><img src="https://opencollective.com/tesseractjs/organization/2/avatar.svg"></a> |
|
||||||
<a href="https://opencollective.com/tesseractjs/organization/3/website"><img src="https://opencollective.com/tesseractjs/organization/3/avatar.svg"></a> |
|
||||||
<a href="https://opencollective.com/tesseractjs/organization/4/website"><img src="https://opencollective.com/tesseractjs/organization/4/avatar.svg"></a> |
|
||||||
<a href="https://opencollective.com/tesseractjs/organization/5/website"><img src="https://opencollective.com/tesseractjs/organization/5/avatar.svg"></a> |
|
||||||
<a href="https://opencollective.com/tesseractjs/organization/6/website"><img src="https://opencollective.com/tesseractjs/organization/6/avatar.svg"></a> |
|
||||||
<a href="https://opencollective.com/tesseractjs/organization/7/website"><img src="https://opencollective.com/tesseractjs/organization/7/avatar.svg"></a> |
|
||||||
<a href="https://opencollective.com/tesseractjs/organization/8/website"><img src="https://opencollective.com/tesseractjs/organization/8/avatar.svg"></a> |
|
||||||
<a href="https://opencollective.com/tesseractjs/organization/9/website"><img src="https://opencollective.com/tesseractjs/organization/9/avatar.svg"></a> |
|
||||||
|
@ -0,0 +1,640 @@ |
|||||||
|
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){ |
||||||
|
// shim for using process in browser
|
||||||
|
var process = module.exports = {}; |
||||||
|
|
||||||
|
// cached from whatever global is present so that test runners that stub it
|
||||||
|
// don't break things. But we need to wrap it in a try catch in case it is
|
||||||
|
// wrapped in strict mode code which doesn't define any globals. It's inside a
|
||||||
|
// function because try/catches deoptimize in certain engines.
|
||||||
|
|
||||||
|
var cachedSetTimeout; |
||||||
|
var cachedClearTimeout; |
||||||
|
|
||||||
|
function defaultSetTimout() { |
||||||
|
throw new Error('setTimeout has not been defined'); |
||||||
|
} |
||||||
|
function defaultClearTimeout () { |
||||||
|
throw new Error('clearTimeout has not been defined'); |
||||||
|
} |
||||||
|
(function () { |
||||||
|
try { |
||||||
|
if (typeof setTimeout === 'function') { |
||||||
|
cachedSetTimeout = setTimeout; |
||||||
|
} else { |
||||||
|
cachedSetTimeout = defaultSetTimout; |
||||||
|
} |
||||||
|
} catch (e) { |
||||||
|
cachedSetTimeout = defaultSetTimout; |
||||||
|
} |
||||||
|
try { |
||||||
|
if (typeof clearTimeout === 'function') { |
||||||
|
cachedClearTimeout = clearTimeout; |
||||||
|
} else { |
||||||
|
cachedClearTimeout = defaultClearTimeout; |
||||||
|
} |
||||||
|
} catch (e) { |
||||||
|
cachedClearTimeout = defaultClearTimeout; |
||||||
|
} |
||||||
|
} ()) |
||||||
|
function runTimeout(fun) { |
||||||
|
if (cachedSetTimeout === setTimeout) { |
||||||
|
//normal enviroments in sane situations
|
||||||
|
return setTimeout(fun, 0); |
||||||
|
} |
||||||
|
// if setTimeout wasn't available but was latter defined
|
||||||
|
if ((cachedSetTimeout === defaultSetTimout || !cachedSetTimeout) && setTimeout) { |
||||||
|
cachedSetTimeout = setTimeout; |
||||||
|
return setTimeout(fun, 0); |
||||||
|
} |
||||||
|
try { |
||||||
|
// when when somebody has screwed with setTimeout but no I.E. maddness
|
||||||
|
return cachedSetTimeout(fun, 0); |
||||||
|
} catch(e){ |
||||||
|
try { |
||||||
|
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
|
||||||
|
return cachedSetTimeout.call(null, fun, 0); |
||||||
|
} catch(e){ |
||||||
|
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error
|
||||||
|
return cachedSetTimeout.call(this, fun, 0); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
} |
||||||
|
function runClearTimeout(marker) { |
||||||
|
if (cachedClearTimeout === clearTimeout) { |
||||||
|
//normal enviroments in sane situations
|
||||||
|
return clearTimeout(marker); |
||||||
|
} |
||||||
|
// if clearTimeout wasn't available but was latter defined
|
||||||
|
if ((cachedClearTimeout === defaultClearTimeout || !cachedClearTimeout) && clearTimeout) { |
||||||
|
cachedClearTimeout = clearTimeout; |
||||||
|
return clearTimeout(marker); |
||||||
|
} |
||||||
|
try { |
||||||
|
// when when somebody has screwed with setTimeout but no I.E. maddness
|
||||||
|
return cachedClearTimeout(marker); |
||||||
|
} catch (e){ |
||||||
|
try { |
||||||
|
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
|
||||||
|
return cachedClearTimeout.call(null, marker); |
||||||
|
} catch (e){ |
||||||
|
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error.
|
||||||
|
// Some versions of I.E. have different rules for clearTimeout vs setTimeout
|
||||||
|
return cachedClearTimeout.call(this, marker); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} |
||||||
|
var queue = []; |
||||||
|
var draining = false; |
||||||
|
var currentQueue; |
||||||
|
var queueIndex = -1; |
||||||
|
|
||||||
|
function cleanUpNextTick() { |
||||||
|
if (!draining || !currentQueue) { |
||||||
|
return; |
||||||
|
} |
||||||
|
draining = false; |
||||||
|
if (currentQueue.length) { |
||||||
|
queue = currentQueue.concat(queue); |
||||||
|
} else { |
||||||
|
queueIndex = -1; |
||||||
|
} |
||||||
|
if (queue.length) { |
||||||
|
drainQueue(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function drainQueue() { |
||||||
|
if (draining) { |
||||||
|
return; |
||||||
|
} |
||||||
|
var timeout = runTimeout(cleanUpNextTick); |
||||||
|
draining = true; |
||||||
|
|
||||||
|
var len = queue.length; |
||||||
|
while(len) { |
||||||
|
currentQueue = queue; |
||||||
|
queue = []; |
||||||
|
while (++queueIndex < len) { |
||||||
|
if (currentQueue) { |
||||||
|
currentQueue[queueIndex].run(); |
||||||
|
} |
||||||
|
} |
||||||
|
queueIndex = -1; |
||||||
|
len = queue.length; |
||||||
|
} |
||||||
|
currentQueue = null; |
||||||
|
draining = false; |
||||||
|
runClearTimeout(timeout); |
||||||
|
} |
||||||
|
|
||||||
|
process.nextTick = function (fun) { |
||||||
|
var args = new Array(arguments.length - 1); |
||||||
|
if (arguments.length > 1) { |
||||||
|
for (var i = 1; i < arguments.length; i++) { |
||||||
|
args[i - 1] = arguments[i]; |
||||||
|
} |
||||||
|
} |
||||||
|
queue.push(new Item(fun, args)); |
||||||
|
if (queue.length === 1 && !draining) { |
||||||
|
runTimeout(drainQueue); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
// v8 likes predictible objects
|
||||||
|
function Item(fun, array) { |
||||||
|
this.fun = fun; |
||||||
|
this.array = array; |
||||||
|
} |
||||||
|
Item.prototype.run = function () { |
||||||
|
this.fun.apply(null, this.array); |
||||||
|
}; |
||||||
|
process.title = 'browser'; |
||||||
|
process.browser = true; |
||||||
|
process.env = {}; |
||||||
|
process.argv = []; |
||||||
|
process.version = ''; // empty string to avoid regexp issues
|
||||||
|
process.versions = {}; |
||||||
|
|
||||||
|
function noop() {} |
||||||
|
|
||||||
|
process.on = noop; |
||||||
|
process.addListener = noop; |
||||||
|
process.once = noop; |
||||||
|
process.off = noop; |
||||||
|
process.removeListener = noop; |
||||||
|
process.removeAllListeners = noop; |
||||||
|
process.emit = noop; |
||||||
|
process.prependListener = noop; |
||||||
|
process.prependOnceListener = noop; |
||||||
|
|
||||||
|
process.listeners = function (name) { return [] } |
||||||
|
|
||||||
|
process.binding = function (name) { |
||||||
|
throw new Error('process.binding is not supported'); |
||||||
|
}; |
||||||
|
|
||||||
|
process.cwd = function () { return '/' }; |
||||||
|
process.chdir = function (dir) { |
||||||
|
throw new Error('process.chdir is not supported'); |
||||||
|
}; |
||||||
|
process.umask = function() { return 0; }; |
||||||
|
|
||||||
|
},{}],2:[function(require,module,exports){ |
||||||
|
module.exports={ |
||||||
|
"name": "tesseract.js", |
||||||
|
"version": "1.0.19", |
||||||
|
"description": "Pure Javascript Multilingual OCR", |
||||||
|
"main": "src/index.js", |
||||||
|
"scripts": { |
||||||
|
"start": "concurrently --kill-others \"watchify src/index.js -t [ envify --TESS_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract\" \"watchify src/browser/worker.js -t [ envify --TESS_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js\" \"http-server -p 7355\"", |
||||||
|
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js && uglifyjs dist/tesseract.js --source-map -o dist/tesseract.min.js && uglifyjs dist/worker.js --source-map -o dist/worker.min.js", |
||||||
|
"release": "npm run build && git commit -am 'new release' && git push && git tag `jq -r '.version' package.json` && git push origin --tags && npm publish" |
||||||
|
}, |
||||||
|
"browser": { |
||||||
|
"./src/node/index.js": "./src/browser/index.js" |
||||||
|
}, |
||||||
|
"author": "", |
||||||
|
"license": "Apache-2.0", |
||||||
|
"devDependencies": { |
||||||
|
"babel-preset-es2015": "^6.16.0", |
||||||
|
"babelify": "^7.3.0", |
||||||
|
"browserify": "^13.1.0", |
||||||
|
"concurrently": "^3.1.0", |
||||||
|
"envify": "^3.4.1", |
||||||
|
"http-server": "^0.9.0", |
||||||
|
"pako": "^1.0.3", |
||||||
|
"uglify-js": "^3.4.9", |
||||||
|
"watchify": "^3.7.0" |
||||||
|
}, |
||||||
|
"dependencies": { |
||||||
|
"file-type": "^3.8.0", |
||||||
|
"isomorphic-fetch": "^2.2.1", |
||||||
|
"is-url": "1.2.2", |
||||||
|
"jpeg-js": "^0.2.0", |
||||||
|
"level-js": "^2.2.4", |
||||||
|
"node-fetch": "^1.6.3", |
||||||
|
"object-assign": "^4.1.0", |
||||||
|
"png.js": "^0.2.1", |
||||||
|
"tesseract.js-core": "^1.0.2" |
||||||
|
}, |
||||||
|
"repository": { |
||||||
|
"type": "git", |
||||||
|
"url": "https://github.com/naptha/tesseract.js.git" |
||||||
|
}, |
||||||
|
"bugs": { |
||||||
|
"url": "https://github.com/naptha/tesseract.js/issues" |
||||||
|
}, |
||||||
|
"homepage": "https://github.com/naptha/tesseract.js" |
||||||
|
} |
||||||
|
|
||||||
|
},{}],3:[function(require,module,exports){ |
||||||
|
(function (process){ |
||||||
|
'use strict'; |
||||||
|
|
||||||
|
var defaultOptions = { |
||||||
|
// workerPath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js@0.2.0/dist/worker.js',
|
||||||
|
corePath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@0.1.0/index.js', |
||||||
|
langPath: 'https://tessdata.projectnaptha.com/3.02/' |
||||||
|
}; |
||||||
|
|
||||||
|
if (process.env.TESS_ENV === "development") { |
||||||
|
console.debug('Using Development Configuration'); |
||||||
|
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js?nocache=' + Math.random().toString(36).slice(3); |
||||||
|
} else { |
||||||
|
var version = require('../../package.json').version; |
||||||
|
defaultOptions.workerPath = 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js@' + version + '/dist/worker.js'; |
||||||
|
} |
||||||
|
|
||||||
|
exports.defaultOptions = defaultOptions; |
||||||
|
|
||||||
|
exports.spawnWorker = function spawnWorker(instance, workerOptions) { |
||||||
|
if (Blob && URL) { |
||||||
|
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");'], { |
||||||
|
type: 'application/javascript' |
||||||
|
}); |
||||||
|
var worker = new Worker(URL.createObjectURL(blob)); |
||||||
|
} else { |
||||||
|
var worker = new Worker(workerOptions.workerPath); |
||||||
|
} |
||||||
|
|
||||||
|
worker.onmessage = function (e) { |
||||||
|
var packet = e.data; |
||||||
|
instance._recv(packet); |
||||||
|
}; |
||||||
|
return worker; |
||||||
|
}; |
||||||
|
|
||||||
|
exports.terminateWorker = function (instance) { |
||||||
|
instance.worker.terminate(); |
||||||
|
}; |
||||||
|
|
||||||
|
exports.sendPacket = function sendPacket(instance, packet) { |
||||||
|
loadImage(packet.payload.image, function (img) { |
||||||
|
packet.payload.image = img; |
||||||
|
instance.worker.postMessage(packet); |
||||||
|
}); |
||||||
|
}; |
||||||
|
|
||||||
|
function loadImage(image, cb) { |
||||||
|
if (typeof image === 'string') { |
||||||
|
if (/^\#/.test(image)) { |
||||||
|
// element css selector
|
||||||
|
return loadImage(document.querySelector(image), cb); |
||||||
|
} else if (/(blob|data)\:/.test(image)) { |
||||||
|
// data url
|
||||||
|
var im = new Image(); |
||||||
|
im.src = image; |
||||||
|
im.onload = function (e) { |
||||||
|
return loadImage(im, cb); |
||||||
|
}; |
||||||
|
im.onerror = function (e) { |
||||||
|
throw e; |
||||||
|
}; |
||||||
|
return; |
||||||
|
} else { |
||||||
|
var xhr = new XMLHttpRequest(); |
||||||
|
xhr.open('GET', image, true); |
||||||
|
xhr.responseType = "blob"; |
||||||
|
|
||||||
|
xhr.onload = function (e) { |
||||||
|
if (xhr.status >= 400) { |
||||||
|
throw new Error('Fail to get image as Blob'); |
||||||
|
} else { |
||||||
|
loadImage(xhr.response, cb); |
||||||
|
} |
||||||
|
}; |
||||||
|
xhr.onerror = function (e) { |
||||||
|
throw e; |
||||||
|
}; |
||||||
|
|
||||||
|
xhr.send(null); |
||||||
|
return; |
||||||
|
} |
||||||
|
} else if (image instanceof File) { |
||||||
|
// files
|
||||||
|
var fr = new FileReader(); |
||||||
|
fr.onload = function (e) { |
||||||
|
return loadImage(fr.result, cb); |
||||||
|
}; |
||||||
|
fr.onerror = function (e) { |
||||||
|
throw e; |
||||||
|
}; |
||||||
|
fr.readAsDataURL(image); |
||||||
|
return; |
||||||
|
} else if (image instanceof Blob) { |
||||||
|
return loadImage(URL.createObjectURL(image), cb); |
||||||
|
} else if (image.getContext) { |
||||||
|
// canvas element
|
||||||
|
return loadImage(image.getContext('2d'), cb); |
||||||
|
} else if (image.tagName == "IMG" || image.tagName == "VIDEO") { |
||||||
|
// image element or video element
|
||||||
|
var c = document.createElement('canvas'); |
||||||
|
c.width = image.naturalWidth || image.videoWidth; |
||||||
|
c.height = image.naturalHeight || image.videoHeight; |
||||||
|
var ctx = c.getContext('2d'); |
||||||
|
ctx.drawImage(image, 0, 0); |
||||||
|
return loadImage(ctx, cb); |
||||||
|
} else if (image.getImageData) { |
||||||
|
// canvas context
|
||||||
|
var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height); |
||||||
|
return loadImage(data, cb); |
||||||
|
} else { |
||||||
|
return cb(image); |
||||||
|
} |
||||||
|
throw new Error('Missing return in loadImage cascade'); |
||||||
|
} |
||||||
|
|
||||||
|
}).call(this,require('_process')) |
||||||
|
},{"../../package.json":2,"_process":1}],4:[function(require,module,exports){ |
||||||
|
"use strict"; |
||||||
|
|
||||||
|
// The result of dump.js is a big JSON tree
|
||||||
|
// which can be easily serialized (for instance
|
||||||
|
// to be sent from a webworker to the main app
|
||||||
|
// or through Node's IPC), but we want
|
||||||
|
// a (circular) DOM-like interface for walking
|
||||||
|
// through the data.
|
||||||
|
|
||||||
|
module.exports = function circularize(page) { |
||||||
|
page.paragraphs = []; |
||||||
|
page.lines = []; |
||||||
|
page.words = []; |
||||||
|
page.symbols = []; |
||||||
|
|
||||||
|
page.blocks.forEach(function (block) { |
||||||
|
block.page = page; |
||||||
|
|
||||||
|
block.lines = []; |
||||||
|
block.words = []; |
||||||
|
block.symbols = []; |
||||||
|
|
||||||
|
block.paragraphs.forEach(function (para) { |
||||||
|
para.block = block; |
||||||
|
para.page = page; |
||||||
|
|
||||||
|
para.words = []; |
||||||
|
para.symbols = []; |
||||||
|
|
||||||
|
para.lines.forEach(function (line) { |
||||||
|
line.paragraph = para; |
||||||
|
line.block = block; |
||||||
|
line.page = page; |
||||||
|
|
||||||
|
line.symbols = []; |
||||||
|
|
||||||
|
line.words.forEach(function (word) { |
||||||
|
word.line = line; |
||||||
|
word.paragraph = para; |
||||||
|
word.block = block; |
||||||
|
word.page = page; |
||||||
|
word.symbols.forEach(function (sym) { |
||||||
|
sym.word = word; |
||||||
|
sym.line = line; |
||||||
|
sym.paragraph = para; |
||||||
|
sym.block = block; |
||||||
|
sym.page = page; |
||||||
|
|
||||||
|
sym.line.symbols.push(sym); |
||||||
|
sym.paragraph.symbols.push(sym); |
||||||
|
sym.block.symbols.push(sym); |
||||||
|
sym.page.symbols.push(sym); |
||||||
|
}); |
||||||
|
word.paragraph.words.push(word); |
||||||
|
word.block.words.push(word); |
||||||
|
word.page.words.push(word); |
||||||
|
}); |
||||||
|
line.block.lines.push(line); |
||||||
|
line.page.lines.push(line); |
||||||
|
}); |
||||||
|
para.page.paragraphs.push(para); |
||||||
|
}); |
||||||
|
}); |
||||||
|
return page; |
||||||
|
}; |
||||||
|
|
||||||
|
},{}],5:[function(require,module,exports){ |
||||||
|
'use strict'; |
||||||
|
|
||||||
|
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); |
||||||
|
|
||||||
|
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } |
||||||
|
|
||||||
|
var adapter = require('../node/index.js'); |
||||||
|
|
||||||
|
var jobCounter = 0; |
||||||
|
|
||||||
|
module.exports = function () { |
||||||
|
function TesseractJob(instance) { |
||||||
|
_classCallCheck(this, TesseractJob); |
||||||
|
|
||||||
|
this.id = 'Job-' + ++jobCounter + '-' + Math.random().toString(16).slice(3, 8); |
||||||
|
|
||||||
|
this._instance = instance; |
||||||
|
this._resolve = []; |
||||||
|
this._reject = []; |
||||||
|
this._progress = []; |
||||||
|
this._finally = []; |
||||||
|
} |
||||||
|
|
||||||
|
_createClass(TesseractJob, [{ |
||||||
|
key: 'then', |
||||||
|
value: function then(resolve, reject) { |
||||||
|
if (this._resolve.push) { |
||||||
|
this._resolve.push(resolve); |
||||||
|
} else { |
||||||
|
resolve(this._resolve); |
||||||
|
} |
||||||
|
|
||||||
|
if (reject) this.catch(reject); |
||||||
|
return this; |
||||||
|
} |
||||||
|
}, { |
||||||
|
key: 'catch', |
||||||
|
value: function _catch(reject) { |
||||||
|
if (this._reject.push) { |
||||||
|
this._reject.push(reject); |
||||||
|
} else { |
||||||
|
reject(this._reject); |
||||||
|
} |
||||||
|
return this; |
||||||
|
} |
||||||
|
}, { |
||||||
|
key: 'progress', |
||||||
|
value: function progress(fn) { |
||||||
|
this._progress.push(fn); |
||||||
|
return this; |
||||||
|
} |
||||||
|
}, { |
||||||
|
key: 'finally', |
||||||
|
value: function _finally(fn) { |
||||||
|
this._finally.push(fn); |
||||||
|
return this; |
||||||
|
} |
||||||
|
}, { |
||||||
|
key: '_send', |
||||||
|
value: function _send(action, payload) { |
||||||
|
adapter.sendPacket(this._instance, { |
||||||
|
jobId: this.id, |
||||||
|
action: action, |
||||||
|
payload: payload |
||||||
|
}); |
||||||
|
} |
||||||
|
}, { |
||||||
|
key: '_handle', |
||||||
|
value: function _handle(packet) { |
||||||
|
var data = packet.data; |
||||||
|
var runFinallyCbs = false; |
||||||
|
|
||||||
|
if (packet.status === 'resolve') { |
||||||
|
if (this._resolve.length === 0) console.log(data); |
||||||
|
this._resolve.forEach(function (fn) { |
||||||
|
var ret = fn(data); |
||||||
|
if (ret && typeof ret.then == 'function') { |
||||||
|
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.'); |
||||||
|
} |
||||||
|
}); |
||||||
|
this._resolve = data; |
||||||
|
this._instance._dequeue(); |
||||||
|
runFinallyCbs = true; |
||||||
|
} else if (packet.status === 'reject') { |
||||||
|
if (this._reject.length === 0) console.error(data); |
||||||
|
this._reject.forEach(function (fn) { |
||||||
|
return fn(data); |
||||||
|
}); |
||||||
|
this._reject = data; |
||||||
|
this._instance._dequeue(); |
||||||
|
runFinallyCbs = true; |
||||||
|
} else if (packet.status === 'progress') { |
||||||
|
this._progress.forEach(function (fn) { |
||||||
|
return fn(data); |
||||||
|
}); |
||||||
|
} else { |
||||||
|
console.warn('Message type unknown', packet.status); |
||||||
|
} |
||||||
|
|
||||||
|
if (runFinallyCbs) { |
||||||
|
this._finally.forEach(function (fn) { |
||||||
|
return fn(data); |
||||||
|
}); |
||||||
|
} |
||||||
|
} |
||||||
|
}]); |
||||||
|
|
||||||
|
return TesseractJob; |
||||||
|
}(); |
||||||
|
|
||||||
|
},{"../node/index.js":3}],6:[function(require,module,exports){ |
||||||
|
'use strict'; |
||||||
|
|
||||||
|
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); |
||||||
|
|
||||||
|
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } |
||||||
|
|
||||||
|
var adapter = require('./node/index.js'); |
||||||
|
var circularize = require('./common/circularize.js'); |
||||||
|
var TesseractJob = require('./common/job'); |
||||||
|
var version = require('../package.json').version; |
||||||
|
|
||||||
|
var create = function create() { |
||||||
|
var workerOptions = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; |
||||||
|
|
||||||
|
var worker = new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions)); |
||||||
|
worker.create = create; |
||||||
|
worker.version = version; |
||||||
|
return worker; |
||||||
|
}; |
||||||
|
|
||||||
|
var TesseractWorker = function () { |
||||||
|
function TesseractWorker(workerOptions) { |
||||||
|
_classCallCheck(this, TesseractWorker); |
||||||
|
|
||||||
|
this.worker = null; |
||||||
|
this.workerOptions = workerOptions; |
||||||
|
this._currentJob = null; |
||||||
|
this._queue = []; |
||||||
|
} |
||||||
|
|
||||||
|
_createClass(TesseractWorker, [{ |
||||||
|
key: 'recognize', |
||||||
|
value: function recognize(image) { |
||||||
|
var _this = this; |
||||||
|
|
||||||
|
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; |
||||||
|
|
||||||
|
return this._delay(function (job) { |
||||||
|
if (typeof options === 'string') options = { lang: options }; |
||||||
|
options.lang = options.lang || 'eng'; |
||||||
|
|
||||||
|
job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions }); |
||||||
|
}); |
||||||
|
} |
||||||
|
}, { |
||||||
|
key: 'detect', |
||||||
|
value: function detect(image) { |
||||||
|
var _this2 = this; |
||||||
|
|
||||||
|
var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; |
||||||
|
|
||||||
|
return this._delay(function (job) { |
||||||
|
job._send('detect', { image: image, options: options, workerOptions: _this2.workerOptions }); |
||||||
|
}); |
||||||
|
} |
||||||
|
}, { |
||||||
|
key: 'terminate', |
||||||
|
value: function terminate() { |
||||||
|
if (this.worker) adapter.terminateWorker(this); |
||||||
|
this.worker = null; |
||||||
|
this._currentJob = null; |
||||||
|
this._queue = []; |
||||||
|
} |
||||||
|
}, { |
||||||
|
key: '_delay', |
||||||
|
value: function _delay(fn) { |
||||||
|
var _this3 = this; |
||||||
|
|
||||||
|
if (!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions); |
||||||
|
|
||||||
|
var job = new TesseractJob(this); |
||||||
|
this._queue.push(function (e) { |
||||||
|
_this3._queue.shift(); |
||||||
|
_this3._currentJob = job; |
||||||
|
fn(job); |
||||||
|
}); |
||||||
|
if (!this._currentJob) this._dequeue(); |
||||||
|
return job; |
||||||
|
} |
||||||
|
}, { |
||||||
|
key: '_dequeue', |
||||||
|
value: function _dequeue() { |
||||||
|
this._currentJob = null; |
||||||
|
if (this._queue.length) { |
||||||
|
this._queue[0](); |
||||||
|
} |
||||||
|
} |
||||||
|
}, { |
||||||
|
key: '_recv', |
||||||
|
value: function _recv(packet) { |
||||||
|
if (packet.status === 'resolve' && packet.action === 'recognize') { |
||||||
|
packet.data = circularize(packet.data); |
||||||
|
} |
||||||
|
|
||||||
|
if (this._currentJob.id === packet.jobId) { |
||||||
|
this._currentJob._handle(packet); |
||||||
|
} else { |
||||||
|
console.warn('Job ID ' + packet.jobId + ' not known.'); |
||||||
|
} |
||||||
|
} |
||||||
|
}]); |
||||||
|
|
||||||
|
return TesseractWorker; |
||||||
|
}(); |
||||||
|
|
||||||
|
module.exports = create(); |
||||||
|
|
||||||
|
},{"../package.json":2,"./common/circularize.js":4,"./common/job":5,"./node/index.js":3}]},{},[6])(6) |
||||||
|
}); |
@ -1,448 +0,0 @@ |
|||||||
# API |
|
||||||
|
|
||||||
- [createWorker()](#create-worker) |
|
||||||
- [Worker.load](#worker-load) |
|
||||||
- [Worker.writeText](#worker-writeText) |
|
||||||
- [Worker.readText](#worker-readText) |
|
||||||
- [Worker.removeFile](#worker-removeFile) |
|
||||||
- [Worker.FS](#worker-FS) |
|
||||||
- [Worker.loadLanguage](#worker-load-language) |
|
||||||
- [Worker.initialize](#worker-initialize) |
|
||||||
- [Worker.setParameters](#worker-set-parameters) |
|
||||||
- [Worker.recognize](#worker-recognize) |
|
||||||
- [Worker.detect](#worker-detect) |
|
||||||
- [Worker.terminate](#worker-terminate) |
|
||||||
- [createScheduler()](#create-scheduler) |
|
||||||
- [Scheduler.addWorker](#scheduler-add-worker) |
|
||||||
- [Scheduler.addJob](#scheduler-add-job) |
|
||||||
- [Scheduler.getQueueLen](#scheduler-get-queue-len) |
|
||||||
- [Scheduler.getNumWorkers](#scheduler-get-num-workers) |
|
||||||
- [setLogging()](#set-logging) |
|
||||||
- [recognize()](#recognize) |
|
||||||
- [detect()](#detect) |
|
||||||
- [PSM](#psm) |
|
||||||
- [OEM](#oem) |
|
||||||
|
|
||||||
--- |
|
||||||
|
|
||||||
<a name="create-worker"></a> |
|
||||||
## createWorker(options): Worker |
|
||||||
|
|
||||||
createWorker is a factory function that creates a tesseract worker, a worker is basically a Web Worker in browser and Child Process in Node. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `options` an object of customized options |
|
||||||
- `corePath` path for tesseract-core.js script |
|
||||||
- `langPath` path for downloading traineddata, do not include `/` at the end of the path |
|
||||||
- `workerPath` path for downloading worker script |
|
||||||
- `dataPath` path for saving traineddata in WebAssembly file system, not common to modify |
|
||||||
- `cachePath` path for the cached traineddata, more useful for Node, for browser it only changes the key in IndexDB |
|
||||||
- `cacheMethod` a string to indicate the method of cache management, should be one of the following options |
|
||||||
- write: read cache and write back (default method) |
|
||||||
- readOnly: read cache and not to write back |
|
||||||
- refresh: not to read cache and write back |
|
||||||
- none: not to read cache and not to write back |
|
||||||
- `workerBlobURL` a boolean to define whether to use Blob URL for worker script, default: true |
|
||||||
- `gzip` a boolean to define whether the traineddata from the remote is gzipped, default: true |
|
||||||
- `logger` a function to log the progress, a quick example is `m => console.log(m)` |
|
||||||
- `errorHandler` a function to handle worker errors, a quick example is `err => console.error(err)` |
|
||||||
|
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker } = Tesseract; |
|
||||||
const worker = createWorker({ |
|
||||||
langPath: '...', |
|
||||||
logger: m => console.log(m), |
|
||||||
}); |
|
||||||
``` |
|
||||||
|
|
||||||
## Worker |
|
||||||
|
|
||||||
A Worker helps you to do the OCR related tasks, it takes few steps to setup Worker before it is fully functional. The full flow is: |
|
||||||
|
|
||||||
- load |
|
||||||
- FS functions // optional |
|
||||||
- loadLanguauge |
|
||||||
- initialize |
|
||||||
- setParameters // optional |
|
||||||
- recognize or detect |
|
||||||
- terminate |
|
||||||
|
|
||||||
Each function is async, so using async/await or Promise is required. When it is resolved, you get an object: |
|
||||||
|
|
||||||
```json |
|
||||||
{ |
|
||||||
"jobId": "Job-1-123", |
|
||||||
"data": { ... } |
|
||||||
} |
|
||||||
``` |
|
||||||
|
|
||||||
jobId is generated by Tesseract.js, but you can put your own when calling any of the function above. |
|
||||||
|
|
||||||
<a name="worker-load"></a> |
|
||||||
### Worker.load(jobId): Promise |
|
||||||
|
|
||||||
Worker.load() loads tesseract.js-core scripts (download from remote if not presented), it makes Web Worker/Child Process ready for next action. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `jobId` Please see details above |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="worker-writeText"></a> |
|
||||||
### Worker.writeText(path, text, jobId): Promise |
|
||||||
|
|
||||||
Worker.writeText() writes a text file to the path specified in MEMFS, it is useful when you want to use some features that requires tesseract.js |
|
||||||
to read file from file system. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `path` text file path |
|
||||||
- `text` content of the text file |
|
||||||
- `jobId` Please see details above |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
await worker.writeText('tmp.txt', 'Hi\nTesseract.js\n'); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="worker-readText"></a> |
|
||||||
### Worker.readText(path, jobId): Promise |
|
||||||
|
|
||||||
Worker.readText() reads a text file to the path specified in MEMFS, it is useful when you want to check the content. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `path` text file path |
|
||||||
- `jobId` Please see details above |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
const { data } = await worker.readText('tmp.txt'); |
|
||||||
console.log(data); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="worker-removeFile"></a> |
|
||||||
### Worker.removeFile(path, jobId): Promise |
|
||||||
|
|
||||||
Worker.readFile() remove a file in MEMFS, it is useful when you want to free the memory. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `path` file path |
|
||||||
- `jobId` Please see details above |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
await worker.removeFile('tmp.txt'); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="worker-FS"></a> |
|
||||||
### Worker.FS(method, args, jobId): Promise |
|
||||||
|
|
||||||
Worker.FS() is a generic FS function to do anything you want, you can check [HERE](ihttps://emscripten.org/docs/api_reference/Filesystem-API.html) for all functions. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `method` method name |
|
||||||
- `args` array of arguments to pass |
|
||||||
- `jobId` Please see details above |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
await worker.FS('writeFile', ['tmp.txt', 'Hi\nTesseract.js\n']); |
|
||||||
// equal to: |
|
||||||
// await worker.readText('tmp.txt', 'Hi\nTesseract.js\n'); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="worker-load-language"></a> |
|
||||||
### Worker.loadLanguage(langs, jobId): Promise |
|
||||||
|
|
||||||
Worker.loadLanguage() loads traineddata from cache or download traineddata from remote, and put traineddata into the WebAssembly file system. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `langs` a string to indicate the languages traineddata to download, multiple languages are concated with **+**, ex: **eng+chi\_tra** |
|
||||||
- `jobId` Please see details above |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
await worker.loadLanguage('eng+chi_tra'); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="worker-initialize"></a> |
|
||||||
### Worker.initialize(langs, oem, jobId): Promise |
|
||||||
|
|
||||||
Worker.initialize() initializes the Tesseract API, make sure it is ready for doing OCR tasks. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `langs` a string to indicate the languages loaded by Tesseract API, it can be the subset of the languauge traineddata you loaded from Worker.loadLanguage. |
|
||||||
- `oem` a enum to indicate the OCR Engine Mode you use |
|
||||||
- `jobId` Please see details above |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
/** You can load more languages in advance, but use only part of them in Worker.initialize() */ |
|
||||||
await worker.loadLanguage('eng+chi_tra'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
<a name="worker-set-parameters"></a> |
|
||||||
### Worker.setParameters(params, jobId): Promise |
|
||||||
|
|
||||||
Worker.setParameters() set parameters for Tesseract API (using SetVariable()), it changes the behavior of Tesseract and some parameters like tessedit\_char\_whitelist is very useful. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `params` an object with key and value of the parameters |
|
||||||
- `jobId` Please see details above |
|
||||||
|
|
||||||
**Supported Paramters:** |
|
||||||
|
|
||||||
| name | type | default value | description | |
|
||||||
| --------------------------- | ------ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------- | |
|
||||||
| tessedit\_ocr\_engine\_mode | enum | OEM.DEFAULT | Check [HERE](https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L268) for definition of each mode | |
|
||||||
| tessedit\_pageseg\_mode | enum | PSM.SINGLE\_BLOCK | Check [HERE](https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163) for definition of each mode | |
|
||||||
| tessedit\_char\_whitelist | string | '' | setting white list characters makes the result only contains these characters, useful the content in image is limited | |
|
||||||
| preserve\_interword\_spaces | string | '0' | '0' or '1', keeps the space between words | |
|
||||||
| user\_defined\_dpi | string | '' | Define custom dpi, use to fix **Warning: Invalid resolution 0 dpi. Using 70 instead.** | |
|
||||||
| tessjs\_create\_hocr | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes hocr in the result | |
|
||||||
| tessjs\_create\_tsv | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes tsv in the result | |
|
||||||
| tessjs\_create\_box | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes box in the result | |
|
||||||
| tessjs\_create\_unlv | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes unlv in the result | |
|
||||||
| tessjs\_create\_osd | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes osd in the result | |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
await worker.setParameters({ |
|
||||||
tessedit_char_whitelist: '0123456789', |
|
||||||
}); |
|
||||||
}) |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="worker-recognize"></a> |
|
||||||
### Worker.recognize(image, options, jobId): Promise |
|
||||||
|
|
||||||
Worker.recognize() provides core function of Tesseract.js as it executes OCR |
|
||||||
|
|
||||||
Figures out what words are in `image`, where the words are in `image`, etc. |
|
||||||
> Note: `image` should be sufficiently high resolution. |
|
||||||
> Often, the same image will get much better results if you upscale it before calling `recognize`. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `image` see [Image Format](./image-format.md) for more details. |
|
||||||
- `options` a object of customized options |
|
||||||
- `rectangle` an object to specify the regions you want to recognized in the image, should contain top, left, width and height, see example below. |
|
||||||
- `jobId` Please see details above |
|
||||||
|
|
||||||
**Output:** |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker } = Tesseract; |
|
||||||
(async () => { |
|
||||||
const worker = createWorker(); |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const { data: { text } } = await worker.recognize(image); |
|
||||||
console.log(text); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
With rectangle |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker } = Tesseract; |
|
||||||
(async () => { |
|
||||||
const worker = createWorker(); |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const { data: { text } } = await worker.recognize(image, { |
|
||||||
rectangle: { top: 0, left: 0, width: 100, height: 100 }, |
|
||||||
}); |
|
||||||
console.log(text); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="worker-detect"></a> |
|
||||||
### Worker.detect(image, jobId): Promise |
|
||||||
|
|
||||||
Worker.detect() does OSD (Orientation and Script Detection) to the image instead of OCR. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `image` see [Image Format](./image-format.md) for more details. |
|
||||||
- `jobId` Please see details above |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker } = Tesseract; |
|
||||||
(async () => { |
|
||||||
const worker = createWorker(); |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const { data } = await worker.detect(image); |
|
||||||
console.log(data); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="worker-terminate"></a> |
|
||||||
### Worker.terminate(jobId): Promise |
|
||||||
|
|
||||||
Worker.terminate() terminates the worker and cleans up |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="create-scheduler"></a> |
|
||||||
## createScheduler(): Scheduler |
|
||||||
|
|
||||||
createScheduler() is a factory function to create a scheduler, a scheduler manages a job queue and workers to enable multiple workers to work together, it is useful when you want to speed up your performance. |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createScheduler } = Tesseract; |
|
||||||
const scheduler = createScheduler(); |
|
||||||
``` |
|
||||||
|
|
||||||
### Scheduler |
|
||||||
|
|
||||||
<a name="scheduler-add-worker"></a> |
|
||||||
### Scheduler.addWorker(worker): string |
|
||||||
|
|
||||||
Scheduler.addWorker() adds a worker into the worker pool inside scheduler, it is suggested to add one worker to only one scheduler. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `worker` see Worker above |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker, createScheduler } = Tesseract; |
|
||||||
const scheduler = createScheduler(); |
|
||||||
const worker = createWorker(); |
|
||||||
scheduler.addWorker(worker); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="scheduler-add-job"></a> |
|
||||||
### Scheduler.addJob(action, ...payload): Promise |
|
||||||
|
|
||||||
Scheduler.addJob() adds a job to the job queue and scheduler waits and finds an idle worker to take the job. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `action` a string to indicate the action you want to do, right now only **recognize** and **detect** are supported |
|
||||||
- `payload` a arbitrary number of args depending on the action you called. |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
const { data: { text } } = await scheduler.addJob('recognize', image, options); |
|
||||||
const { data } = await scheduler.addJob('detect', image); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="scheduler-get-queue-len"></a> |
|
||||||
### Scheduler.getQueueLen(): number |
|
||||||
|
|
||||||
Scheduler.getNumWorkers() returns the length of job queue. |
|
||||||
|
|
||||||
<a name="scheduler-get-num-workers"></a> |
|
||||||
### Scheduler.getNumWorkers(): number |
|
||||||
|
|
||||||
Scheduler.getNumWorkers() returns number of workers added into the scheduler |
|
||||||
|
|
||||||
<a name="scheduler-terminate"></a> |
|
||||||
### Scheduler.terminate(): Promise |
|
||||||
|
|
||||||
Scheduler.terminate() terminates all workers added, useful to do quick clean up. |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
(async () => { |
|
||||||
await scheduler.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="set-logging"></a> |
|
||||||
## setLogging(logging: boolean) |
|
||||||
|
|
||||||
setLogging() sets the logging flag, you can `setLogging(true)` to see detailed information, useful for debugging. |
|
||||||
|
|
||||||
**Arguments:** |
|
||||||
|
|
||||||
- `logging` boolean to define whether to see detailed logs, default: false |
|
||||||
|
|
||||||
**Examples:** |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { setLogging } = Tesseract; |
|
||||||
setLogging(true); |
|
||||||
``` |
|
||||||
|
|
||||||
<a name="recognize"></a> |
|
||||||
## recognize(image, langs, options): Promise |
|
||||||
|
|
||||||
recognize() is a function to quickly do recognize() task, it is not recommended to use in real application, but useful when you want to save some time. |
|
||||||
|
|
||||||
See [Tesseract.js](../src/Tesseract.js) |
|
||||||
|
|
||||||
<a name="detect"></a> |
|
||||||
## detect(image, options): Promise |
|
||||||
|
|
||||||
Same background as recognize(), but it does detect instead. |
|
||||||
|
|
||||||
See [Tesseract.js](../src/Tesseract.js) |
|
||||||
|
|
||||||
<a name="psm"></a> |
|
||||||
## PSM |
|
||||||
|
|
||||||
See [PSM.js](../src/constants/PSM.js) |
|
||||||
|
|
||||||
<a name="oem"></a> |
|
||||||
## OEM |
|
||||||
|
|
||||||
See [OEM.js](../src/constants/OEM.js) |
|
@ -1,226 +0,0 @@ |
|||||||
# Tesseract.js Examples |
|
||||||
|
|
||||||
You can also check [examples](../examples) folder. |
|
||||||
|
|
||||||
### basic |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker } = require('tesseract.js'); |
|
||||||
|
|
||||||
const worker = createWorker(); |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
|
||||||
console.log(text); |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
### with detailed progress |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker } = require('tesseract.js'); |
|
||||||
|
|
||||||
const worker = createWorker({ |
|
||||||
logger: m => console.log(m), // Add logger here |
|
||||||
}); |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
|
||||||
console.log(text); |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
### with multiple languages, separate by '+' |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker } = require('tesseract.js'); |
|
||||||
|
|
||||||
const worker = createWorker(); |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng+chi_tra'); |
|
||||||
await worker.initialize('eng+chi_tra'); |
|
||||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
|
||||||
console.log(text); |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
### with whitelist char (^2.0.0-beta.1) |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker } = require('tesseract.js'); |
|
||||||
|
|
||||||
const worker = createWorker(); |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
await worker.setParameters({ |
|
||||||
tessedit_char_whitelist: '0123456789', |
|
||||||
}); |
|
||||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
|
||||||
console.log(text); |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
### with different pageseg mode (^2.0.0-beta.1) |
|
||||||
|
|
||||||
Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163 |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker, PSM } = require('tesseract.js'); |
|
||||||
|
|
||||||
const worker = createWorker(); |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
await worker.setParameters({ |
|
||||||
tessedit_pageseg_mode: PSM.SINGLE_BLOCK, |
|
||||||
}); |
|
||||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
|
||||||
console.log(text); |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
### with pdf output (^2.0.0-beta.1) |
|
||||||
|
|
||||||
Please check **examples** folder for details. |
|
||||||
|
|
||||||
Browser: [download-pdf.html](../examples/browser/download-pdf.html) |
|
||||||
Node: [download-pdf.js](../examples/node/download-pdf.js) |
|
||||||
|
|
||||||
### with only part of the image (^2.0.1) |
|
||||||
|
|
||||||
**One rectangle** |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker } = require('tesseract.js'); |
|
||||||
|
|
||||||
const worker = createWorker(); |
|
||||||
const rectangle = { left: 0, top: 0, width: 500, height: 250 }; |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle }); |
|
||||||
console.log(text); |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
**Multiple Rectangles** |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker } = require('tesseract.js'); |
|
||||||
|
|
||||||
const worker = createWorker(); |
|
||||||
const rectangles = [ |
|
||||||
{ |
|
||||||
left: 0, |
|
||||||
top: 0, |
|
||||||
width: 500, |
|
||||||
height: 250, |
|
||||||
}, |
|
||||||
{ |
|
||||||
left: 500, |
|
||||||
top: 0, |
|
||||||
width: 500, |
|
||||||
height: 250, |
|
||||||
}, |
|
||||||
]; |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const values = []; |
|
||||||
for (let i = 0; i < rectangles.length; i++) { |
|
||||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle: rectangles[i] }); |
|
||||||
values.push(text); |
|
||||||
} |
|
||||||
console.log(values); |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
**Multiple Rectangles (with scheduler to do recognition in parallel)** |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker, createScheduler } = require('tesseract.js'); |
|
||||||
|
|
||||||
const scheduler = createScheduler(); |
|
||||||
const worker1 = createWorker(); |
|
||||||
const worker2 = createWorker(); |
|
||||||
const rectangles = [ |
|
||||||
{ |
|
||||||
left: 0, |
|
||||||
top: 0, |
|
||||||
width: 500, |
|
||||||
height: 250, |
|
||||||
}, |
|
||||||
{ |
|
||||||
left: 500, |
|
||||||
top: 0, |
|
||||||
width: 500, |
|
||||||
height: 250, |
|
||||||
}, |
|
||||||
]; |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker1.load(); |
|
||||||
await worker2.load(); |
|
||||||
await worker1.loadLanguage('eng'); |
|
||||||
await worker2.loadLanguage('eng'); |
|
||||||
await worker1.initialize('eng'); |
|
||||||
await worker2.initialize('eng'); |
|
||||||
scheduler.addWorker(worker1); |
|
||||||
scheduler.addWorker(worker2); |
|
||||||
const results = await Promise.all(rectangles.map((rectangle) => ( |
|
||||||
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle }) |
|
||||||
))); |
|
||||||
console.log(results.map(r => r.data.text)); |
|
||||||
await scheduler.terminate(); |
|
||||||
})(); |
|
||||||
``` |
|
||||||
|
|
||||||
### with multiple workers to speed up (^2.0.0-beta.1) |
|
||||||
|
|
||||||
```javascript |
|
||||||
const { createWorker, createScheduler } = require('tesseract.js'); |
|
||||||
|
|
||||||
const scheduler = createScheduler(); |
|
||||||
const worker1 = createWorker(); |
|
||||||
const worker2 = createWorker(); |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker1.load(); |
|
||||||
await worker2.load(); |
|
||||||
await worker1.loadLanguage('eng'); |
|
||||||
await worker2.loadLanguage('eng'); |
|
||||||
await worker1.initialize('eng'); |
|
||||||
await worker2.initialize('eng'); |
|
||||||
scheduler.addWorker(worker1); |
|
||||||
scheduler.addWorker(worker2); |
|
||||||
/** Add 10 recognition jobs */ |
|
||||||
const results = await Promise.all(Array(10).fill(0).map(() => ( |
|
||||||
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png') |
|
||||||
))) |
|
||||||
console.log(results); |
|
||||||
await scheduler.terminate(); // It also terminates all workers. |
|
||||||
})(); |
|
||||||
``` |
|
@ -1,42 +0,0 @@ |
|||||||
FAQ |
|
||||||
=== |
|
||||||
|
|
||||||
## How does tesseract.js download and keep \*.traineddata? |
|
||||||
|
|
||||||
The language model is downloaded by `worker.loadLanguage()` and you need to pass the langs to `worker.initialize()`. |
|
||||||
|
|
||||||
During the downloading of language model, Tesseract.js will first check if \*.traineddata already exists. (browser: [IndexedDB](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), Node.js: fs, in the folder you execute the command) If the \*.traineddata doesn't exist, it will fetch \*.traineddata.gz from [tessdata](https://github.com/naptha/tessdata), ungzip and store in IndexedDB or fs, you can delete it manually and it will download again for you. |
|
||||||
|
|
||||||
## How can I train my own \*.traineddata? |
|
||||||
|
|
||||||
For tesseract.js v2, check [TrainingTesseract 4.00](https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00) |
|
||||||
|
|
||||||
For tesseract.js v1, check [Training Tesseract 3.03–3.05](https://tesseract-ocr.github.io/tessdoc/Training-Tesseract-3.03%E2%80%933.05) |
|
||||||
|
|
||||||
## How can I get HOCR, TSV, Box, UNLV, OSD? |
|
||||||
|
|
||||||
Starting from 2.0.0-beta.1, you can get all these information in the final result. |
|
||||||
|
|
||||||
```javascript |
|
||||||
import { createWorker } from 'tesseract.js'; |
|
||||||
const worker = createWorker({ |
|
||||||
logger: m => console.log(m) |
|
||||||
}); |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
await worker.setParameters({ |
|
||||||
tessedit_create_box: '1', |
|
||||||
tessedit_create_unlv: '1', |
|
||||||
tessedit_create_osd: '1', |
|
||||||
}); |
|
||||||
const { data: { text, hocr, tsv, box, unlv } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
|
||||||
console.log(text); |
|
||||||
console.log(hocr); |
|
||||||
console.log(tsv); |
|
||||||
console.log(box); |
|
||||||
console.log(unlv); |
|
||||||
})(); |
|
||||||
``` |
|
@ -1,18 +0,0 @@ |
|||||||
# Image Format |
|
||||||
|
|
||||||
The main Tesseract.js functions (ex. recognize, detect) take an `image` parameter. The image formats and data types supported are listed below. |
|
||||||
|
|
||||||
Support Image Formats: **bmp, jpg, png, pbm, webp** |
|
||||||
|
|
||||||
For browser and Node, supported data types are: |
|
||||||
- string with base64 encoded image (fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp) |
|
||||||
- buffer |
|
||||||
|
|
||||||
For browser only, supported data types are: |
|
||||||
- `File` or `Blob` object |
|
||||||
- `img` or `canvas` element |
|
||||||
|
|
||||||
For Node only, supported data types are: |
|
||||||
- string containing a path to local image |
|
||||||
|
|
||||||
Note: images must be a supported image format **and** a supported data type. For example, a buffer containing a png image is supported. A buffer containing raw pixel data is not supported. |
|
Before Width: | Height: | Size: 105 KiB |
Before Width: | Height: | Size: 237 KiB |
@ -1,38 +0,0 @@ |
|||||||
## Local Installation |
|
||||||
|
|
||||||
Check here for examples: https://github.com/naptha/tesseract.js/blob/master/docs/examples.md |
|
||||||
|
|
||||||
In browser environment, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN. |
|
||||||
|
|
||||||
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can pass extra arguments to `TesseractWorker` to specify custom paths for workers, languages, and core. |
|
||||||
|
|
||||||
In Node.js environment, the only path you may want to customize is languages/langPath. |
|
||||||
|
|
||||||
```javascript |
|
||||||
Tesseract.recognize(image, langs, { |
|
||||||
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0/dist/worker.min.js', |
|
||||||
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
|
||||||
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js', |
|
||||||
}) |
|
||||||
``` |
|
||||||
|
|
||||||
Or |
|
||||||
|
|
||||||
```javascript |
|
||||||
const worker = createWorker({ |
|
||||||
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0/dist/worker.min.js', |
|
||||||
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
|
||||||
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js', |
|
||||||
}); |
|
||||||
``` |
|
||||||
|
|
||||||
### workerPath |
|
||||||
A string specifying the location of the [worker.js](./dist/worker.min.js) file. |
|
||||||
|
|
||||||
### langPath |
|
||||||
A string specifying the location of the tesseract language files, with default value 'https://tessdata.projectnaptha.com/4.0.0'. Language file URLs are calculated according to the formula `langPath + langCode + '.traineddata.gz'`. |
|
||||||
|
|
||||||
### corePath |
|
||||||
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js' (fallback to tesseract-core.asm.js when WebAssembly is not available). |
|
||||||
|
|
||||||
Another WASM option is 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.js' which is a script that loads 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm'. But it fails to fetch at this moment. |
|
@ -1,3 +1,72 @@ |
|||||||
# Tesseract Languages |
# Tesseract Languages |
||||||
|
|
||||||
Please check [HERE](https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016) for supported languages |
The `lang` property of the options object passed to `Tesseract.recognize` can have one of the following values (the default is `'eng'`.): |
||||||
|
|
||||||
|
| `lang` | Language | |
||||||
|
|-----------|-----------------------| |
||||||
|
| 'afr' | Afrikaans | |
||||||
|
| 'ara' | Arabic | |
||||||
|
| 'aze' | Azerbaijani | |
||||||
|
| 'bel' | Belarusian | |
||||||
|
| 'ben' | Bengali | |
||||||
|
| 'bul' | Bulgarian | |
||||||
|
| 'cat' | Catalan | |
||||||
|
| 'ces' | Czech | |
||||||
|
| 'chi_sim' | Chinese | |
||||||
|
| 'chi_tra' | Traditional Chinese | |
||||||
|
| 'chr' | Cherokee | |
||||||
|
| 'dan' | Danish | |
||||||
|
| 'deu' | German | |
||||||
|
| 'ell' | Greek | |
||||||
|
| 'eng' | English | |
||||||
|
| 'enm' | English (Old) | |
||||||
|
| 'epo' | Esperanto | |
||||||
|
| 'epo_alt' | Esperanto alternative | |
||||||
|
| 'equ' | Math | |
||||||
|
| 'est' | Estonian | |
||||||
|
| 'eus' | Basque | |
||||||
|
| 'fas' |Persian (Farsi) | |
||||||
|
| 'fin' | Finnish | |
||||||
|
| 'fra' | French | |
||||||
|
| 'frk' | Frankish | |
||||||
|
| 'frm' | French (Old) | |
||||||
|
| 'glg' | Galician | |
||||||
|
| 'grc' | Ancient Greek | |
||||||
|
| 'heb' | Hebrew | |
||||||
|
| 'hin' | Hindi | |
||||||
|
| 'hrv' | Croatian | |
||||||
|
| 'hun' | Hungarian | |
||||||
|
| 'ind' | Indonesian | |
||||||
|
| 'isl' | Icelandic | |
||||||
|
| 'ita' | Italian | |
||||||
|
| 'ita_old' | Italian (Old) | |
||||||
|
| 'jpn' | Japanese | |
||||||
|
| 'kan' | Kannada | |
||||||
|
| 'kor' | Korean | |
||||||
|
| 'lav' | Latvian | |
||||||
|
| 'lit' | Lithuanian | |
||||||
|
| 'mal' | Malayalam | |
||||||
|
| 'mkd' | Macedonian | |
||||||
|
| 'mlt' | Maltese | |
||||||
|
| 'msa' | Malay | |
||||||
|
| 'nld' | Dutch | |
||||||
|
| 'nor' | Norwegian | |
||||||
|
| 'pol' | Polish | |
||||||
|
| 'por' | Portuguese | |
||||||
|
| 'ron' | Romanian | |
||||||
|
| 'rus' | Russian | |
||||||
|
| 'slk' | Slovakian | |
||||||
|
| 'slv' | Slovenian | |
||||||
|
| 'spa' | Spanish | |
||||||
|
| 'spa_old' | Old Spanish | |
||||||
|
| 'sqi' | Albanian | |
||||||
|
| 'srp' | Serbian (Latin) | |
||||||
|
| 'swa' | Swahili | |
||||||
|
| 'swe' | Swedish | |
||||||
|
| 'tam' | Tamil | |
||||||
|
| 'tel' | Telugu | |
||||||
|
| 'tgl' | Tagalog | |
||||||
|
| 'tha' | Thai | |
||||||
|
| 'tur' | Turkish | |
||||||
|
| 'ukr' | Ukrainian | |
||||||
|
| 'vie' | Vietnamese | |
||||||
|
Before Width: | Height: | Size: 169 KiB After Width: | Height: | Size: 215 KiB |
@ -1,37 +0,0 @@ |
|||||||
<!DOCTYPE HTML> |
|
||||||
<html> |
|
||||||
<head> |
|
||||||
<script src="/dist/tesseract.dev.js"></script> |
|
||||||
</head> |
|
||||||
<body> |
|
||||||
<input type="file" id="uploader"> |
|
||||||
<script> |
|
||||||
const recognize = function(evt){ |
|
||||||
const files = evt.target.files; |
|
||||||
const worker = Tesseract.createWorker({ |
|
||||||
/* |
|
||||||
* As Edge don't support webassembly, |
|
||||||
* here we force to use asm.js version. |
|
||||||
*/ |
|
||||||
corePath: '../../node_modules/tesseract.js-core/tesseract-core.asm.js', |
|
||||||
logger: function(m){console.log(m);}, |
|
||||||
/* |
|
||||||
* As there is no indexedDB in earlier version |
|
||||||
* of Edge, here we disable cache. |
|
||||||
*/ |
|
||||||
cacheMethod: 'none', |
|
||||||
}); |
|
||||||
Promise.resolve() |
|
||||||
.then(() => worker.load()) |
|
||||||
.then(() => worker.loadLanguage('eng')) |
|
||||||
.then(() => worker.initialize('eng')) |
|
||||||
.then(() => worker.recognize(files[0])) |
|
||||||
.then((ret) => { |
|
||||||
console.log(ret.data.text); |
|
||||||
}); |
|
||||||
} |
|
||||||
const elm = document.getElementById('uploader'); |
|
||||||
elm.addEventListener('change', recognize); |
|
||||||
</script> |
|
||||||
</body> |
|
||||||
</html> |
|
@ -1,19 +0,0 @@ |
|||||||
<html> |
|
||||||
<head> |
|
||||||
<script src="/dist/tesseract.dev.js"></script> |
|
||||||
</head> |
|
||||||
<body> |
|
||||||
<input type="file" id="uploader"> |
|
||||||
<script> |
|
||||||
const recognize = async ({ target: { files } }) => { |
|
||||||
const { data: { text } } = await Tesseract.recognize(files[0], 'eng', { |
|
||||||
corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js', |
|
||||||
logger: m => console.log(m), |
|
||||||
}); |
|
||||||
console.log(text); |
|
||||||
} |
|
||||||
const elm = document.getElementById('uploader'); |
|
||||||
elm.addEventListener('change', recognize); |
|
||||||
</script> |
|
||||||
</body> |
|
||||||
</html> |
|
@ -1,33 +0,0 @@ |
|||||||
<html> |
|
||||||
<head> |
|
||||||
<script src="/dist/tesseract.dev.js"></script> |
|
||||||
</head> |
|
||||||
<body> |
|
||||||
<textarea id="message">Working...</textarea> |
|
||||||
|
|
||||||
<script> |
|
||||||
const { createWorker } = Tesseract; |
|
||||||
const worker = createWorker(); |
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
|
|
||||||
const fileArr = ["../data/meditations.jpg", "../data/tyger.jpg", "../data/testocr.png"]; |
|
||||||
let timeTotal = 0; |
|
||||||
for (let file of fileArr) { |
|
||||||
let time1 = Date.now(); |
|
||||||
for (let i=0; i < 10; i++) { |
|
||||||
await worker.recognize(file); |
|
||||||
} |
|
||||||
let time2 = Date.now(); |
|
||||||
const timeDif = (time2 - time1) / 1e3; |
|
||||||
timeTotal += timeDif; |
|
||||||
document.getElementById('message').innerHTML += "\n" + file + " [x10] runtime: " + timeDif + "s"; |
|
||||||
} |
|
||||||
document.getElementById('message').innerHTML += "\nTotal runtime: " + timeTotal + "s"; |
|
||||||
|
|
||||||
})(); |
|
||||||
</script> |
|
||||||
</body> |
|
||||||
</html> |
|
@ -1,52 +0,0 @@ |
|||||||
<html> |
|
||||||
<head> |
|
||||||
<script src="/dist/tesseract.dev.js"></script> |
|
||||||
</head> |
|
||||||
<body> |
|
||||||
<div> |
|
||||||
<input type="file" id="uploader"> |
|
||||||
<button id="download-pdf" disabled="true">Download PDF</button> |
|
||||||
</div> |
|
||||||
<textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea> |
|
||||||
<script> |
|
||||||
const { createWorker } = Tesseract; |
|
||||||
const worker = createWorker({ |
|
||||||
corePath: '/node_modules/tesseract.js-core/tesseract-core.wasm.js', |
|
||||||
logger: m => console.log(m), |
|
||||||
}); |
|
||||||
const uploader = document.getElementById('uploader'); |
|
||||||
const dlBtn = document.getElementById('download-pdf'); |
|
||||||
const recognize = async ({ target: { files } }) => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const { data: { text } } = await worker.recognize(files[0]); |
|
||||||
const board = document.getElementById('board'); |
|
||||||
board.value = text; |
|
||||||
dlBtn.disabled = false; |
|
||||||
}; |
|
||||||
const downloadPDF = async () => { |
|
||||||
const filename = 'tesseract-ocr-result.pdf'; |
|
||||||
const { data } = await worker.getPDF('Tesseract OCR Result'); |
|
||||||
const blob = new Blob([new Uint8Array(data)], { type: 'application/pdf' }); |
|
||||||
if (navigator.msSaveBlob) { |
|
||||||
// IE 10+ |
|
||||||
navigator.msSaveBlob(blob, filename); |
|
||||||
} else { |
|
||||||
const link = document.createElement('a'); |
|
||||||
if (link.download !== undefined) { |
|
||||||
const url = URL.createObjectURL(blob); |
|
||||||
link.setAttribute('href', url); |
|
||||||
link.setAttribute('download', filename); |
|
||||||
link.style.visibility = 'hidden'; |
|
||||||
document.body.appendChild(link); |
|
||||||
link.click(); |
|
||||||
document.body.removeChild(link); |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
uploader.addEventListener('change', recognize); |
|
||||||
dlBtn.addEventListener('click', downloadPDF); |
|
||||||
</script> |
|
||||||
</body> |
|
||||||
</html> |
|
Before Width: | Height: | Size: 1011 KiB |
Before Width: | Height: | Size: 23 KiB |
Before Width: | Height: | Size: 408 KiB |
@ -0,0 +1,2 @@ |
|||||||
|
<script src="/dist/tesseract.dev.js"></script> |
||||||
|
<input type="file" onchange="Tesseract.recognize(this.files[0]).progress(function(data){console.log(data)}).then(function(data){console.log(data)})"> |
@ -0,0 +1,15 @@ |
|||||||
|
// replace this with require('tesseract.js')
|
||||||
|
var Tesseract = require('../../'), |
||||||
|
image = require('path').resolve(__dirname, 'cosmic.png'); |
||||||
|
|
||||||
|
Tesseract.recognize(image) |
||||||
|
.then(data => { |
||||||
|
console.log('then\n', data.text) |
||||||
|
}) |
||||||
|
.catch(err => { |
||||||
|
console.log('catch\n', err); |
||||||
|
}) |
||||||
|
.finally(e => { |
||||||
|
console.log('finally\n'); |
||||||
|
process.exit(); |
||||||
|
}); |
@ -1,27 +0,0 @@ |
|||||||
#!/usr/bin/env node
|
|
||||||
const path = require('path'); |
|
||||||
const { createWorker } = require('../../'); |
|
||||||
|
|
||||||
const worker = createWorker(); |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const fileArr = ["../data/meditations.jpg", "../data/tyger.jpg", "../data/testocr.png"]; |
|
||||||
let timeTotal = 0; |
|
||||||
for (let file of fileArr) { |
|
||||||
let time1 = Date.now(); |
|
||||||
for (let i=0; i < 10; i++) { |
|
||||||
await worker.recognize(file) |
|
||||||
} |
|
||||||
let time2 = Date.now(); |
|
||||||
const timeDif = (time2 - time1) / 1e3; |
|
||||||
timeTotal += timeDif; |
|
||||||
|
|
||||||
console.log(file + " [x10] runtime: " + timeDif + "s"); |
|
||||||
} |
|
||||||
console.log("Total runtime: " + timeTotal + "s"); |
|
||||||
|
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 13 KiB |
@ -1,13 +1,12 @@ |
|||||||
#!/usr/bin/env node
|
// replace this with require('tesseract.js')
|
||||||
const path = require('path'); |
var Tesseract = require('../../'), |
||||||
const Tesseract = require('../../'); |
image = require('path').resolve(__dirname, 'cosmic.png'); |
||||||
|
|
||||||
const [,, imagePath] = process.argv; |
Tesseract.detect(image) |
||||||
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
.progress(function(info){ |
||||||
|
console.log(info); |
||||||
console.log(`Recognizing ${image}`); |
}) |
||||||
|
.then(function(data){ |
||||||
Tesseract.detect(image, { logger: m => console.log(m) }) |
console.log('done', data); |
||||||
.then(({ data }) => { |
process.exit(); |
||||||
console.log(data); |
}) |
||||||
}); |
|
@ -1,22 +0,0 @@ |
|||||||
#!/usr/bin/env node
|
|
||||||
const path = require('path'); |
|
||||||
const fs = require('fs'); |
|
||||||
const { createWorker } = require('../../'); |
|
||||||
|
|
||||||
const [,, imagePath] = process.argv; |
|
||||||
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
|
||||||
|
|
||||||
console.log(`Recognizing ${image}`); |
|
||||||
|
|
||||||
(async () => { |
|
||||||
const worker = createWorker(); |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const { data: { text } } = await worker.recognize(image); |
|
||||||
console.log(text); |
|
||||||
const { data } = await worker.getPDF('Tesseract OCR Result'); |
|
||||||
fs.writeFileSync('tesseract-ocr-result.pdf', Buffer.from(data)); |
|
||||||
console.log('Generate PDF: tesseract-ocr-result.pdf'); |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
@ -1,20 +0,0 @@ |
|||||||
#!/usr/bin/env node
|
|
||||||
const path = require('path'); |
|
||||||
const { createWorker } = require('../../'); |
|
||||||
|
|
||||||
const [,, imagePath] = process.argv; |
|
||||||
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
|
||||||
|
|
||||||
console.log(`Recognizing ${image}`); |
|
||||||
const worker = createWorker({ |
|
||||||
logger: m => console.log(m), |
|
||||||
}); |
|
||||||
|
|
||||||
(async () => { |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('eng'); |
|
||||||
await worker.initialize('eng'); |
|
||||||
const { data: { text } } = await worker.recognize(image); |
|
||||||
console.log(text); |
|
||||||
await worker.terminate(); |
|
||||||
})(); |
|
@ -1,5 +0,0 @@ |
|||||||
{ |
|
||||||
"rules": { |
|
||||||
"import/no-extraneous-dependencies": 0 |
|
||||||
} |
|
||||||
} |
|
@ -1,13 +0,0 @@ |
|||||||
import commonjs from "@rollup/plugin-commonjs"; |
|
||||||
|
|
||||||
export default [ |
|
||||||
{ |
|
||||||
input: "dist/tesseract.min.js", |
|
||||||
output: { |
|
||||||
file: "dist/tesseract.esm.min.js", |
|
||||||
format: "esm", |
|
||||||
banner: "/* eslint-disable */", |
|
||||||
}, |
|
||||||
plugins: [commonjs()], |
|
||||||
}, |
|
||||||
]; |
|
@ -1,17 +0,0 @@ |
|||||||
const webpack = require('webpack'); |
|
||||||
const middleware = require('webpack-dev-middleware'); |
|
||||||
const express = require('express'); |
|
||||||
const path = require('path'); |
|
||||||
const cors = require('cors'); |
|
||||||
const webpackConfig = require('./webpack.config.dev'); |
|
||||||
|
|
||||||
const compiler = webpack(webpackConfig); |
|
||||||
const app = express(); |
|
||||||
|
|
||||||
app.use(cors()); |
|
||||||
app.use('/', express.static(path.resolve(__dirname, '..'))); |
|
||||||
app.use(middleware(compiler, { publicPath: '/dist', writeToDisk: true })); |
|
||||||
|
|
||||||
module.exports = app.listen(3000, () => { |
|
||||||
console.log('Server is running on the port no. 3000'); |
|
||||||
}); |
|
@ -1,9 +0,0 @@ |
|||||||
const constants = require('../tests/constants'); |
|
||||||
global.expect = require('expect.js'); |
|
||||||
global.fs = require('fs'); |
|
||||||
global.path = require('path'); |
|
||||||
global.Tesseract = require('../src'); |
|
||||||
|
|
||||||
Object.keys(constants).forEach((key) => { |
|
||||||
global[key] = constants[key]; |
|
||||||
}); |
|
@ -1,28 +0,0 @@ |
|||||||
module.exports = { |
|
||||||
resolve: { |
|
||||||
fallback: { |
|
||||||
buffer: require.resolve('buffer/'), |
|
||||||
}, |
|
||||||
}, |
|
||||||
module: { |
|
||||||
rules: [ |
|
||||||
{ |
|
||||||
test: /\.m?js$/, |
|
||||||
// exclude: /(node_modules|bower_components)/,
|
|
||||||
use: { |
|
||||||
loader: 'babel-loader', |
|
||||||
options: { |
|
||||||
presets: [ |
|
||||||
[ |
|
||||||
'@babel/preset-env', |
|
||||||
{ |
|
||||||
targets: 'last 2 versions', |
|
||||||
}, |
|
||||||
], |
|
||||||
], |
|
||||||
}, |
|
||||||
}, |
|
||||||
}, |
|
||||||
], |
|
||||||
}, |
|
||||||
}; |
|
@ -1,48 +0,0 @@ |
|||||||
const path = require('path'); |
|
||||||
const webpack = require('webpack'); |
|
||||||
const { BundleAnalyzerPlugin } = require('webpack-bundle-analyzer'); |
|
||||||
const common = require('./webpack.config.common'); |
|
||||||
|
|
||||||
const genConfig = ({ |
|
||||||
entry, filename, library, libraryTarget, |
|
||||||
}) => ({ |
|
||||||
...common, |
|
||||||
mode: 'development', |
|
||||||
entry, |
|
||||||
output: { |
|
||||||
filename, |
|
||||||
library, |
|
||||||
libraryTarget, |
|
||||||
}, |
|
||||||
plugins: [ |
|
||||||
new webpack.ProvidePlugin({ |
|
||||||
Buffer: ['buffer', 'Buffer'], |
|
||||||
}), |
|
||||||
new webpack.DefinePlugin({ |
|
||||||
'process.env': { |
|
||||||
TESS_ENV: JSON.stringify('development'), |
|
||||||
}, |
|
||||||
}), |
|
||||||
new BundleAnalyzerPlugin({ |
|
||||||
analyzerMode: 'disable', |
|
||||||
statsFilename: `${filename.split('.')[0]}-stats.json`, |
|
||||||
generateStatsFile: true |
|
||||||
}), |
|
||||||
], |
|
||||||
devServer: { |
|
||||||
allowedHosts: ['localhost', '.gitpod.io'], |
|
||||||
}, |
|
||||||
}); |
|
||||||
|
|
||||||
module.exports = [ |
|
||||||
genConfig({ |
|
||||||
entry: path.resolve(__dirname, '..', 'src', 'index.js'), |
|
||||||
filename: 'tesseract.dev.js', |
|
||||||
library: 'Tesseract', |
|
||||||
libraryTarget: 'umd', |
|
||||||
}), |
|
||||||
genConfig({ |
|
||||||
entry: path.resolve(__dirname, '..', 'src', 'worker-script', 'browser', 'index.js'), |
|
||||||
filename: 'worker.dev.js', |
|
||||||
}), |
|
||||||
]; |
|
@ -1,36 +0,0 @@ |
|||||||
const path = require('path'); |
|
||||||
const common = require('./webpack.config.common'); |
|
||||||
const webpack = require('webpack'); |
|
||||||
|
|
||||||
const genConfig = ({ |
|
||||||
entry, filename, library, libraryTarget, |
|
||||||
}) => ({ |
|
||||||
...common, |
|
||||||
mode: 'production', |
|
||||||
devtool: 'source-map', |
|
||||||
entry, |
|
||||||
output: { |
|
||||||
path: path.resolve(__dirname, '..', 'dist'), |
|
||||||
filename, |
|
||||||
library, |
|
||||||
libraryTarget, |
|
||||||
}, |
|
||||||
plugins: [ |
|
||||||
new webpack.ProvidePlugin({ |
|
||||||
Buffer: ['buffer', 'Buffer'], |
|
||||||
}), |
|
||||||
] |
|
||||||
}); |
|
||||||
|
|
||||||
module.exports = [ |
|
||||||
genConfig({ |
|
||||||
entry: path.resolve(__dirname, '..', 'src', 'index.js'), |
|
||||||
filename: 'tesseract.min.js', |
|
||||||
library: 'Tesseract', |
|
||||||
libraryTarget: 'umd', |
|
||||||
}), |
|
||||||
genConfig({ |
|
||||||
entry: path.resolve(__dirname, '..', 'src', 'worker-script', 'browser', 'index.js'), |
|
||||||
filename: 'worker.min.js', |
|
||||||
}), |
|
||||||
]; |
|
@ -1,28 +0,0 @@ |
|||||||
const createWorker = require('./createWorker'); |
|
||||||
|
|
||||||
const recognize = async (image, langs, options) => { |
|
||||||
const worker = createWorker(options); |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage(langs); |
|
||||||
await worker.initialize(langs); |
|
||||||
return worker.recognize(image) |
|
||||||
.finally(async () => { |
|
||||||
await worker.terminate(); |
|
||||||
}); |
|
||||||
}; |
|
||||||
|
|
||||||
const detect = async (image, options) => { |
|
||||||
const worker = createWorker(options); |
|
||||||
await worker.load(); |
|
||||||
await worker.loadLanguage('osd'); |
|
||||||
await worker.initialize('osd'); |
|
||||||
return worker.detect(image) |
|
||||||
.finally(async () => { |
|
||||||
await worker.terminate(); |
|
||||||
}); |
|
||||||
}; |
|
||||||
|
|
||||||
module.exports = { |
|
||||||
recognize, |
|
||||||
detect, |
|
||||||
}; |
|
@ -0,0 +1,105 @@ |
|||||||
|
var defaultOptions = { |
||||||
|
// workerPath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js@0.2.0/dist/worker.js',
|
||||||
|
corePath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@0.1.0/index.js', |
||||||
|
langPath: 'https://tessdata.projectnaptha.com/3.02/', |
||||||
|
} |
||||||
|
|
||||||
|
if (process.env.TESS_ENV === "development") { |
||||||
|
console.debug('Using Development Configuration') |
||||||
|
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js?nocache=' + Math.random().toString(36).slice(3) |
||||||
|
}else{ |
||||||
|
var version = require('../../package.json').version; |
||||||
|
defaultOptions.workerPath = 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js@' + version + '/dist/worker.js' |
||||||
|
} |
||||||
|
|
||||||
|
exports.defaultOptions = defaultOptions; |
||||||
|
|
||||||
|
|
||||||
|
exports.spawnWorker = function spawnWorker(instance, workerOptions){ |
||||||
|
if(Blob && URL){ |
||||||
|
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");'], { |
||||||
|
type: 'application/javascript' |
||||||
|
}); |
||||||
|
var worker = new Worker(URL.createObjectURL(blob)); |
||||||
|
}else{ |
||||||
|
var worker = new Worker(workerOptions.workerPath) |
||||||
|
} |
||||||
|
|
||||||
|
worker.onmessage = function(e){ |
||||||
|
var packet = e.data; |
||||||
|
instance._recv(packet) |
||||||
|
} |
||||||
|
return worker |
||||||
|
} |
||||||
|
|
||||||
|
exports.terminateWorker = function(instance){ |
||||||
|
instance.worker.terminate() |
||||||
|
} |
||||||
|
|
||||||
|
exports.sendPacket = function sendPacket(instance, packet){ |
||||||
|
loadImage(packet.payload.image, function(img){ |
||||||
|
packet.payload.image = img |
||||||
|
instance.worker.postMessage(packet) |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
function loadImage(image, cb){ |
||||||
|
if(typeof image === 'string'){ |
||||||
|
if(/^\#/.test(image)){ |
||||||
|
// element css selector
|
||||||
|
return loadImage(document.querySelector(image), cb) |
||||||
|
}else if(/(blob|data)\:/.test(image)){ |
||||||
|
// data url
|
||||||
|
var im = new Image |
||||||
|
im.src = image; |
||||||
|
im.onload = e => loadImage(im, cb); |
||||||
|
im.onerror = e => { throw e; }; |
||||||
|
return |
||||||
|
}else{ |
||||||
|
var xhr = new XMLHttpRequest(); |
||||||
|
xhr.open('GET', image, true) |
||||||
|
xhr.responseType = "blob"; |
||||||
|
|
||||||
|
xhr.onload = e => { |
||||||
|
if (xhr.status >= 400){ |
||||||
|
throw new Error('Fail to get image as Blob'); |
||||||
|
}else{ |
||||||
|
loadImage(xhr.response, cb); |
||||||
|
} |
||||||
|
}; |
||||||
|
xhr.onerror = e => { throw e; }; |
||||||
|
|
||||||
|
xhr.send(null) |
||||||
|
return |
||||||
|
} |
||||||
|
}else if(image instanceof File){ |
||||||
|
// files
|
||||||
|
var fr = new FileReader() |
||||||
|
fr.onload = e => loadImage(fr.result, cb); |
||||||
|
fr.onerror = e => { throw e; }; |
||||||
|
fr.readAsDataURL(image) |
||||||
|
return |
||||||
|
}else if(image instanceof Blob){ |
||||||
|
return loadImage(URL.createObjectURL(image), cb) |
||||||
|
}else if(image.getContext){ |
||||||
|
// canvas element
|
||||||
|
return loadImage(image.getContext('2d'), cb) |
||||||
|
}else if(image.tagName == "IMG" || image.tagName == "VIDEO"){ |
||||||
|
// image element or video element
|
||||||
|
var c = document.createElement('canvas'); |
||||||
|
c.width = image.naturalWidth || image.videoWidth; |
||||||
|
c.height = image.naturalHeight || image.videoHeight; |
||||||
|
var ctx = c.getContext('2d'); |
||||||
|
ctx.drawImage(image, 0, 0); |
||||||
|
return loadImage(ctx, cb) |
||||||
|
}else if(image.getImageData){ |
||||||
|
// canvas context
|
||||||
|
var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height); |
||||||
|
return loadImage(data, cb) |
||||||
|
}else{ |
||||||
|
return cb(image) |
||||||
|
} |
||||||
|
throw new Error('Missing return in loadImage cascade') |
||||||
|
|
||||||
|
} |
@ -0,0 +1,76 @@ |
|||||||
|
const leveljs = require('level-js') |
||||||
|
|
||||||
|
// something about trying to store these language files in indexedDB
|
||||||
|
// causes iOS Safari to crash
|
||||||
|
|
||||||
|
var iOS = /iPad|iPhone|iPod/.test(navigator.userAgent); |
||||||
|
var noIDB = typeof indexedDB === 'undefined' || iOS; |
||||||
|
|
||||||
|
var db = noIDB ? { open: (_, cb) => cb(true) } : leveljs('./tessdata2') |
||||||
|
|
||||||
|
var langdata = require('../common/langdata.json') |
||||||
|
|
||||||
|
module.exports = function getLanguageData(req, res, cb){ |
||||||
|
var lang = req.options.lang; |
||||||
|
|
||||||
|
function saveDataFile(data){ |
||||||
|
try { |
||||||
|
db.put(lang, data, err => console.log('cached', lang, err)) |
||||||
|
} finally { |
||||||
|
cb(data) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
db.open({ compression: false }, err => { |
||||||
|
if (err) return fetchLanguageData(req, res, cb); |
||||||
|
db.get(lang, (err, data) => { |
||||||
|
if (err) return fetchLanguageData(req, res, saveDataFile); |
||||||
|
res.progress({ status: 'found in cache ' + lang + '.traineddata' }) |
||||||
|
cb(data) |
||||||
|
}) |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
const ungzip = require('pako/lib/inflate.js').ungzip; |
||||||
|
|
||||||
|
function fetchLanguageData(req, res, cb){ |
||||||
|
var lang = req.options.lang; |
||||||
|
var langfile = lang + '.traineddata.gz'; |
||||||
|
var url = req.workerOptions.langPath + langfile; |
||||||
|
|
||||||
|
var xhr = new XMLHttpRequest(); |
||||||
|
xhr.open('GET', url, true); |
||||||
|
xhr.responseType = 'arraybuffer'; |
||||||
|
xhr.onerror = e => { |
||||||
|
xhr.onprogress = xhr.onload = null |
||||||
|
cb(xhr, null) |
||||||
|
} |
||||||
|
xhr.onprogress = e => |
||||||
|
res.progress({ |
||||||
|
status: 'downloading ' + langfile, |
||||||
|
loaded: e.loaded, |
||||||
|
progress: Math.min(1, e.loaded / langdata[lang]) |
||||||
|
}); |
||||||
|
|
||||||
|
xhr.onload = e => { |
||||||
|
if (!(xhr.status == 200 || (xhr.status == 0 && xhr.response))) return res.reject('Error downloading language ' + url); |
||||||
|
res.progress({ status: 'unzipping ' + langfile, progress: 0 }) |
||||||
|
|
||||||
|
// in case the gzips are already ungzipped or extra gzipped
|
||||||
|
var response = new Uint8Array(xhr.response) |
||||||
|
try { |
||||||
|
var n = 2; |
||||||
|
while(response[0] == 0x1f && response[1] == 0x8b){ |
||||||
|
response = ungzip(response); |
||||||
|
res.progress({ status: 'unzipping ' + langfile, progress: 1 - 1 / (n++) }) |
||||||
|
} |
||||||
|
} catch (err) { |
||||||
|
return res.reject('Error unzipping language file ' + langfile + '\n' + err.message) |
||||||
|
} |
||||||
|
res.progress({ status: 'unzipping ' + langfile, progress: 1 }) |
||||||
|
|
||||||
|
cb(response) |
||||||
|
} |
||||||
|
xhr.send() |
||||||
|
} |
@ -0,0 +1,23 @@ |
|||||||
|
const workerUtils = require('../common/worker.js') |
||||||
|
|
||||||
|
if (process.env.TESS_ENV === "development") { |
||||||
|
console.debug('Using Development Worker') |
||||||
|
} |
||||||
|
|
||||||
|
global.addEventListener('message', function(e){ |
||||||
|
var packet = e.data; |
||||||
|
workerUtils.dispatchHandlers(packet, obj => postMessage(obj)) |
||||||
|
}) |
||||||
|
|
||||||
|
exports.getCore = function(req, res){ |
||||||
|
if(!global.TesseractCore){ |
||||||
|
res.progress({ status: 'loading tesseract core', progress: 0 }) |
||||||
|
importScripts(req.workerOptions.corePath) |
||||||
|
res.progress({ status: 'loading tesseract core', progress: 1 }) |
||||||
|
} |
||||||
|
return TesseractCore |
||||||
|
} |
||||||
|
|
||||||
|
exports.getLanguageData = require('./lang.js') |
||||||
|
|
||||||
|
workerUtils.setAdapter(module.exports); |
@ -0,0 +1,63 @@ |
|||||||
|
// The result of dump.js is a big JSON tree
|
||||||
|
// which can be easily serialized (for instance
|
||||||
|
// to be sent from a webworker to the main app
|
||||||
|
// or through Node's IPC), but we want
|
||||||
|
// a (circular) DOM-like interface for walking
|
||||||
|
// through the data.
|
||||||
|
|
||||||
|
module.exports = function circularize(page){ |
||||||
|
page.paragraphs = [] |
||||||
|
page.lines = [] |
||||||
|
page.words = [] |
||||||
|
page.symbols = [] |
||||||
|
|
||||||
|
page.blocks.forEach(function(block){ |
||||||
|
block.page = page; |
||||||
|
|
||||||
|
block.lines = [] |
||||||
|
block.words = [] |
||||||
|
block.symbols = [] |
||||||
|
|
||||||
|
block.paragraphs.forEach(function(para){ |
||||||
|
para.block = block; |
||||||
|
para.page = page; |
||||||
|
|
||||||
|
para.words = [] |
||||||
|
para.symbols = [] |
||||||
|
|
||||||
|
para.lines.forEach(function(line){ |
||||||
|
line.paragraph = para; |
||||||
|
line.block = block; |
||||||
|
line.page = page; |
||||||
|
|
||||||
|
line.symbols = [] |
||||||
|
|
||||||
|
line.words.forEach(function(word){ |
||||||
|
word.line = line; |
||||||
|
word.paragraph = para; |
||||||
|
word.block = block; |
||||||
|
word.page = page; |
||||||
|
word.symbols.forEach(function(sym){ |
||||||
|
sym.word = word; |
||||||
|
sym.line = line; |
||||||
|
sym.paragraph = para; |
||||||
|
sym.block = block; |
||||||
|
sym.page = page; |
||||||
|
|
||||||
|
sym.line.symbols.push(sym) |
||||||
|
sym.paragraph.symbols.push(sym) |
||||||
|
sym.block.symbols.push(sym) |
||||||
|
sym.page.symbols.push(sym) |
||||||
|
}) |
||||||
|
word.paragraph.words.push(word) |
||||||
|
word.block.words.push(word) |
||||||
|
word.page.words.push(word) |
||||||
|
}) |
||||||
|
line.block.lines.push(line) |
||||||
|
line.page.lines.push(line) |
||||||
|
}) |
||||||
|
para.page.paragraphs.push(para) |
||||||
|
}) |
||||||
|
}) |
||||||
|
return page |
||||||
|
} |
@ -0,0 +1,24 @@ |
|||||||
|
// This converts an image to grayscale
|
||||||
|
|
||||||
|
module.exports = function desaturate(image){ |
||||||
|
var width, height; |
||||||
|
if(image.data){ |
||||||
|
var src = image.data; |
||||||
|
width = image.width, |
||||||
|
height = image.height; |
||||||
|
var dst = new Uint8Array(width * height); |
||||||
|
var srcLength = src.length | 0, srcLength_16 = (srcLength - 16) | 0; |
||||||
|
|
||||||
|
for (var i = 0, j = 0; i <= srcLength_16; i += 16, j += 4) { |
||||||
|
// convert to grayscale 4 pixels at a time; eveything with alpha gets put in front of 50% gray
|
||||||
|
dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16 |
||||||
|
dst[j+1] = (((src[i+4] * 77 + src[i+5] * 151 + src[i+6] * 28) * src[i+7]) + ((255-src[i+7]) << 15) + 32768) >> 16 |
||||||
|
dst[j+2] = (((src[i+8] * 77 + src[i+9] * 151 + src[i+10] * 28) * src[i+11]) + ((255-src[i+11]) << 15) + 32768) >> 16 |
||||||
|
dst[j+3] = (((src[i+12] * 77 + src[i+13] * 151 + src[i+14] * 28) * src[i+15]) + ((255-src[i+15]) << 15) + 32768) >> 16 |
||||||
|
} |
||||||
|
for (; i < srcLength; i += 4, ++j) //finish up
|
||||||
|
dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16 |
||||||
|
image = dst; |
||||||
|
} else { throw 'Invalid ImageData' } |
||||||
|
return image |
||||||
|
} |
@ -0,0 +1,164 @@ |
|||||||
|
module.exports = function DumpLiterallyEverything(Module, base){ |
||||||
|
var ri = base.GetIterator(); |
||||||
|
var blocks = []; |
||||||
|
var block, para, textline, word, symbol; |
||||||
|
|
||||||
|
function enumToString(value, prefix){ |
||||||
|
return (Object.keys(Module) |
||||||
|
.filter(function(e){ return e.substr(0, prefix.length + 1) == prefix + '_' }) |
||||||
|
.filter(function(e){ return Module[e] === value }) |
||||||
|
.map(function(e){ return e.slice(prefix.length + 1) })[0]) |
||||||
|
} |
||||||
|
|
||||||
|
ri.Begin() |
||||||
|
do { |
||||||
|
if(ri.IsAtBeginningOf(Module.RIL_BLOCK)){ |
||||||
|
var poly = ri.BlockPolygon(); |
||||||
|
var polygon = null; |
||||||
|
// BlockPolygon() returns null when automatic page segmentation is off
|
||||||
|
if(Module.getPointer(poly) > 0){ |
||||||
|
var n = poly.get_n(), |
||||||
|
px = poly.get_x(), |
||||||
|
py = poly.get_y(), |
||||||
|
polygon = []; |
||||||
|
for(var i = 0; i < n; i++){ |
||||||
|
polygon.push([px.getValue(i), py.getValue(i)]); |
||||||
|
} |
||||||
|
Module._ptaDestroy(Module.getPointer(poly)); |
||||||
|
} |
||||||
|
|
||||||
|
block = { |
||||||
|
paragraphs: [], |
||||||
|
|
||||||
|
text: ri.GetUTF8Text(Module.RIL_BLOCK), |
||||||
|
confidence: ri.Confidence(Module.RIL_BLOCK), |
||||||
|
baseline: ri.getBaseline(Module.RIL_BLOCK), |
||||||
|
bbox: ri.getBoundingBox(Module.RIL_BLOCK), |
||||||
|
|
||||||
|
blocktype: enumToString(ri.BlockType(), 'PT'), |
||||||
|
polygon: polygon |
||||||
|
} |
||||||
|
blocks.push(block) |
||||||
|
} |
||||||
|
if(ri.IsAtBeginningOf(Module.RIL_PARA)){ |
||||||
|
para = { |
||||||
|
lines: [], |
||||||
|
|
||||||
|
text: ri.GetUTF8Text(Module.RIL_PARA), |
||||||
|
confidence: ri.Confidence(Module.RIL_PARA), |
||||||
|
baseline: ri.getBaseline(Module.RIL_PARA), |
||||||
|
bbox: ri.getBoundingBox(Module.RIL_PARA), |
||||||
|
|
||||||
|
is_ltr: !!ri.ParagraphIsLtr() |
||||||
|
} |
||||||
|
block.paragraphs.push(para) |
||||||
|
} |
||||||
|
if(ri.IsAtBeginningOf(Module.RIL_TEXTLINE)){ |
||||||
|
textline = { |
||||||
|
words: [], |
||||||
|
|
||||||
|
text: ri.GetUTF8Text(Module.RIL_TEXTLINE), |
||||||
|
confidence: ri.Confidence(Module.RIL_TEXTLINE), |
||||||
|
baseline: ri.getBaseline(Module.RIL_TEXTLINE), |
||||||
|
bbox: ri.getBoundingBox(Module.RIL_TEXTLINE) |
||||||
|
} |
||||||
|
para.lines.push(textline) |
||||||
|
} |
||||||
|
if(ri.IsAtBeginningOf(Module.RIL_WORD)){ |
||||||
|
var fontInfo = ri.getWordFontAttributes(), |
||||||
|
wordDir = ri.WordDirection(); |
||||||
|
word = { |
||||||
|
symbols: [], |
||||||
|
choices: [], |
||||||
|
|
||||||
|
text: ri.GetUTF8Text(Module.RIL_WORD), |
||||||
|
confidence: ri.Confidence(Module.RIL_WORD), |
||||||
|
baseline: ri.getBaseline(Module.RIL_WORD), |
||||||
|
bbox: ri.getBoundingBox(Module.RIL_WORD), |
||||||
|
|
||||||
|
is_numeric: !!ri.WordIsNumeric(), |
||||||
|
in_dictionary: !!ri.WordIsFromDictionary(), |
||||||
|
direction: enumToString(wordDir, 'DIR'), |
||||||
|
language: ri.WordRecognitionLanguage(), |
||||||
|
|
||||||
|
is_bold: fontInfo.is_bold, |
||||||
|
is_italic: fontInfo.is_italic, |
||||||
|
is_underlined: fontInfo.is_underlined, |
||||||
|
is_monospace: fontInfo.is_monospace, |
||||||
|
is_serif: fontInfo.is_serif, |
||||||
|
is_smallcaps: fontInfo.is_smallcaps, |
||||||
|
font_size: fontInfo.pointsize, |
||||||
|
font_id: fontInfo.font_id, |
||||||
|
font_name: fontInfo.font_name, |
||||||
|
} |
||||||
|
var wc = new Module.WordChoiceIterator(ri); |
||||||
|
do { |
||||||
|
word.choices.push({ |
||||||
|
text: wc.GetUTF8Text(), |
||||||
|
confidence: wc.Confidence() |
||||||
|
}) |
||||||
|
} while (wc.Next()); |
||||||
|
Module.destroy(wc) |
||||||
|
textline.words.push(word) |
||||||
|
} |
||||||
|
|
||||||
|
var image = null; |
||||||
|
// var pix = ri.GetBinaryImage(Module.RIL_SYMBOL)
|
||||||
|
// var image = pix2array(pix);
|
||||||
|
// // for some reason it seems that things stop working if you destroy pics
|
||||||
|
// Module._pixDestroy(Module.getPointer(pix));
|
||||||
|
if(ri.IsAtBeginningOf(Module.RIL_SYMBOL)){ |
||||||
|
symbol = { |
||||||
|
choices: [], |
||||||
|
image: image, |
||||||
|
|
||||||
|
text: ri.GetUTF8Text(Module.RIL_SYMBOL), |
||||||
|
confidence: ri.Confidence(Module.RIL_SYMBOL), |
||||||
|
baseline: ri.getBaseline(Module.RIL_SYMBOL), |
||||||
|
bbox: ri.getBoundingBox(Module.RIL_SYMBOL), |
||||||
|
|
||||||
|
is_superscript: !!ri.SymbolIsSuperscript(), |
||||||
|
is_subscript: !!ri.SymbolIsSubscript(), |
||||||
|
is_dropcap: !!ri.SymbolIsDropcap(), |
||||||
|
} |
||||||
|
word.symbols.push(symbol) |
||||||
|
var ci = new Module.ChoiceIterator(ri); |
||||||
|
do { |
||||||
|
symbol.choices.push({ |
||||||
|
text: ci.GetUTF8Text(), |
||||||
|
confidence: ci.Confidence() |
||||||
|
}) |
||||||
|
} while (ci.Next()); |
||||||
|
Module.destroy(ci) |
||||||
|
} |
||||||
|
} while (ri.Next(Module.RIL_SYMBOL)); |
||||||
|
Module.destroy(ri) |
||||||
|
|
||||||
|
return { |
||||||
|
text: base.GetUTF8Text(), |
||||||
|
html: deindent(base.GetHOCRText()), |
||||||
|
|
||||||
|
confidence: base.MeanTextConf(), |
||||||
|
|
||||||
|
blocks: blocks, |
||||||
|
|
||||||
|
psm: enumToString(base.GetPageSegMode(), 'PSM'), |
||||||
|
oem: enumToString(base.oem(), 'OEM'), |
||||||
|
version: base.Version(), |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// the generated HOCR is excessively indented, so
|
||||||
|
// we get rid of that indentation
|
||||||
|
|
||||||
|
function deindent(html){ |
||||||
|
var lines = html.split('\n') |
||||||
|
if(lines[0].substring(0, 2) === " "){ |
||||||
|
for (var i = 0; i < lines.length; i++) { |
||||||
|
if (lines[i].substring(0,2) === " ") { |
||||||
|
lines[i] = lines[i].slice(2) |
||||||
|
} |
||||||
|
}; |
||||||
|
} |
||||||
|
return lines.join('\n') |
||||||
|
} |
@ -0,0 +1,81 @@ |
|||||||
|
const adapter = require('../node/index.js') |
||||||
|
|
||||||
|
let jobCounter = 0; |
||||||
|
|
||||||
|
module.exports = class TesseractJob { |
||||||
|
constructor(instance){ |
||||||
|
this.id = 'Job-' + (++jobCounter) + '-' + Math.random().toString(16).slice(3, 8) |
||||||
|
|
||||||
|
this._instance = instance; |
||||||
|
this._resolve = [] |
||||||
|
this._reject = [] |
||||||
|
this._progress = [] |
||||||
|
this._finally = [] |
||||||
|
} |
||||||
|
|
||||||
|
then(resolve, reject){ |
||||||
|
if(this._resolve.push){ |
||||||
|
this._resolve.push(resolve) |
||||||
|
}else{ |
||||||
|
resolve(this._resolve) |
||||||
|
} |
||||||
|
|
||||||
|
if(reject) this.catch(reject); |
||||||
|
return this; |
||||||
|
} |
||||||
|
catch(reject){ |
||||||
|
if(this._reject.push){ |
||||||
|
this._reject.push(reject) |
||||||
|
}else{ |
||||||
|
reject(this._reject) |
||||||
|
} |
||||||
|
return this; |
||||||
|
} |
||||||
|
progress(fn){ |
||||||
|
this._progress.push(fn) |
||||||
|
return this; |
||||||
|
} |
||||||
|
finally(fn) { |
||||||
|
this._finally.push(fn) |
||||||
|
return this; |
||||||
|
} |
||||||
|
_send(action, payload){ |
||||||
|
adapter.sendPacket(this._instance, { |
||||||
|
jobId: this.id, |
||||||
|
action: action, |
||||||
|
payload: payload |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
_handle(packet){ |
||||||
|
var data = packet.data; |
||||||
|
let runFinallyCbs = false; |
||||||
|
|
||||||
|
if(packet.status === 'resolve'){ |
||||||
|
if(this._resolve.length === 0) console.log(data); |
||||||
|
this._resolve.forEach(fn => { |
||||||
|
var ret = fn(data); |
||||||
|
if(ret && typeof ret.then == 'function'){ |
||||||
|
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.') |
||||||
|
} |
||||||
|
}) |
||||||
|
this._resolve = data; |
||||||
|
this._instance._dequeue() |
||||||
|
runFinallyCbs = true; |
||||||
|
}else if(packet.status === 'reject'){ |
||||||
|
if(this._reject.length === 0) console.error(data); |
||||||
|
this._reject.forEach(fn => fn(data)) |
||||||
|
this._reject = data; |
||||||
|
this._instance._dequeue() |
||||||
|
runFinallyCbs = true; |
||||||
|
}else if(packet.status === 'progress'){ |
||||||
|
this._progress.forEach(fn => fn(data)) |
||||||
|
}else{ |
||||||
|
console.warn('Message type unknown', packet.status) |
||||||
|
} |
||||||
|
|
||||||
|
if (runFinallyCbs) { |
||||||
|
this._finally.forEach(fn => fn(data)); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
@ -0,0 +1 @@ |
|||||||
|
{"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922} |
@ -0,0 +1,165 @@ |
|||||||
|
var latestJob, |
||||||
|
Module, |
||||||
|
base, |
||||||
|
adapter = {}, |
||||||
|
dump = require('./dump.js'), |
||||||
|
desaturate = require('./desaturate.js'); |
||||||
|
|
||||||
|
function dispatchHandlers(packet, send){ |
||||||
|
function respond(status, data){ |
||||||
|
send({ |
||||||
|
jobId: packet.jobId, |
||||||
|
status, |
||||||
|
action: packet.action, |
||||||
|
data |
||||||
|
}); |
||||||
|
} |
||||||
|
respond.resolve = respond.bind(this, 'resolve'); |
||||||
|
respond.reject = respond.bind(this, 'reject'); |
||||||
|
respond.progress = respond.bind(this, 'progress'); |
||||||
|
|
||||||
|
latestJob = respond; |
||||||
|
|
||||||
|
try { |
||||||
|
if(packet.action === 'recognize'){ |
||||||
|
handleRecognize(packet.payload, respond); |
||||||
|
} else if (packet.action === 'detect'){ |
||||||
|
handleDetect(packet.payload, respond); |
||||||
|
} |
||||||
|
} catch (err) { |
||||||
|
// Prepare exception to travel through postMessage
|
||||||
|
err = err.toString(); |
||||||
|
|
||||||
|
respond.reject(err) |
||||||
|
} |
||||||
|
} |
||||||
|
exports.dispatchHandlers = dispatchHandlers; |
||||||
|
|
||||||
|
exports.setAdapter = function setAdapter(impl){ |
||||||
|
adapter = impl; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
function handleInit(req, res){ |
||||||
|
var MIN_MEMORY = 100663296; |
||||||
|
|
||||||
|
if(['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)){ |
||||||
|
MIN_MEMORY = 167772160; |
||||||
|
} |
||||||
|
|
||||||
|
if(!Module || Module.TOTAL_MEMORY < MIN_MEMORY){ |
||||||
|
var Core = adapter.getCore(req, res); |
||||||
|
|
||||||
|
res.progress({ status: 'initializing tesseract', progress: 0 }) |
||||||
|
|
||||||
|
Module = Core({ |
||||||
|
TOTAL_MEMORY: MIN_MEMORY, |
||||||
|
TesseractProgress(percent){ |
||||||
|
latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }); |
||||||
|
}, |
||||||
|
onRuntimeInitialized() {} |
||||||
|
}); |
||||||
|
|
||||||
|
Module.FS_createPath("/", "tessdata", true, true); |
||||||
|
base = new Module.TessBaseAPI(); |
||||||
|
res.progress({ status: 'initializing tesseract', progress: 1 }); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
function setImage(Module, base, image){ |
||||||
|
var imgbin = desaturate(image), |
||||||
|
width = image.width, |
||||||
|
height = image.height; |
||||||
|
|
||||||
|
var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL); |
||||||
|
base.SetImage(Module.wrapPointer(ptr), width, height, 1, width); |
||||||
|
base.SetRectangle(0, 0, width, height); |
||||||
|
return ptr; |
||||||
|
} |
||||||
|
|
||||||
|
function loadLanguage(req, res, cb){ |
||||||
|
var lang = req.options.lang, |
||||||
|
langFile = lang + '.traineddata'; |
||||||
|
|
||||||
|
if(!Module._loadedLanguages) Module._loadedLanguages = {}; |
||||||
|
if(lang in Module._loadedLanguages) return cb(); |
||||||
|
|
||||||
|
adapter.getLanguageData(req, res, function(data){ |
||||||
|
res.progress({ status: 'loading ' + langFile, progress: 0 }); |
||||||
|
Module.FS_createDataFile('tessdata', langFile, data, true, false); |
||||||
|
Module._loadedLanguages[lang] = true; |
||||||
|
res.progress({ status: 'loading ' + langFile, progress: 1 }); |
||||||
|
cb(); |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
function handleRecognize(req, res){ |
||||||
|
handleInit(req, res); |
||||||
|
|
||||||
|
loadLanguage(req, res, () => { |
||||||
|
var options = req.options; |
||||||
|
|
||||||
|
function progressUpdate(progress){ |
||||||
|
res.progress({ status: 'initializing api', progress: progress }); |
||||||
|
} |
||||||
|
|
||||||
|
progressUpdate(0); |
||||||
|
base.Init(null, req.options.lang); |
||||||
|
progressUpdate(.3); |
||||||
|
|
||||||
|
for (var option in options) { |
||||||
|
if (options.hasOwnProperty(option)) { |
||||||
|
base.SetVariable(option, options[option]); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
progressUpdate(.6); |
||||||
|
var ptr = setImage(Module, base, req.image); |
||||||
|
progressUpdate(1); |
||||||
|
|
||||||
|
base.Recognize(null); |
||||||
|
|
||||||
|
var result = dump(Module, base); |
||||||
|
|
||||||
|
base.End(); |
||||||
|
Module._free(ptr); |
||||||
|
|
||||||
|
res.resolve(result); |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
function handleDetect(req, res){ |
||||||
|
handleInit(req, res); |
||||||
|
req.options.lang = 'osd'; |
||||||
|
loadLanguage(req, res, () => { |
||||||
|
base.Init(null, 'osd'); |
||||||
|
base.SetPageSegMode(Module.PSM_OSD_ONLY); |
||||||
|
|
||||||
|
var ptr = setImage(Module, base, req.image), |
||||||
|
results = new Module.OSResults(); |
||||||
|
|
||||||
|
if(!base.DetectOS(results)){ |
||||||
|
base.End(); |
||||||
|
Module._free(ptr); |
||||||
|
res.reject("Failed to detect OS"); |
||||||
|
} else { |
||||||
|
var best = results.get_best_result(), |
||||||
|
oid = best.get_orientation_id(), |
||||||
|
sid = best.get_script_id(); |
||||||
|
|
||||||
|
base.End(); |
||||||
|
Module._free(ptr); |
||||||
|
|
||||||
|
res.resolve({ |
||||||
|
tesseract_script_id: sid, |
||||||
|
script: results.get_unicharset().get_script_from_script_id(sid), |
||||||
|
script_confidence: best.get_sconfidence(), |
||||||
|
orientation_degrees: [0, 270, 180, 90][oid], |
||||||
|
orientation_confidence: best.get_oconfidence() |
||||||
|
}); |
||||||
|
} |
||||||
|
}); |
||||||
|
} |
@ -1,12 +0,0 @@ |
|||||||
/* |
|
||||||
* OEM = OCR Engine Mode, and there are 4 possible modes. |
|
||||||
* |
|
||||||
* By default tesseract.js uses LSTM_ONLY mode. |
|
||||||
* |
|
||||||
*/ |
|
||||||
module.exports = { |
|
||||||
TESSERACT_ONLY: 0, |
|
||||||
LSTM_ONLY: 1, |
|
||||||
TESSERACT_LSTM_COMBINED: 2, |
|
||||||
DEFAULT: 3, |
|
||||||
}; |
|
@ -1,19 +0,0 @@ |
|||||||
/* |
|
||||||
* PSM = Page Segmentation Mode |
|
||||||
*/ |
|
||||||
module.exports = { |
|
||||||
OSD_ONLY: '0', |
|
||||||
AUTO_OSD: '1', |
|
||||||
AUTO_ONLY: '2', |
|
||||||
AUTO: '3', |
|
||||||
SINGLE_COLUMN: '4', |
|
||||||
SINGLE_BLOCK_VERT_TEXT: '5', |
|
||||||
SINGLE_BLOCK: '6', |
|
||||||
SINGLE_LINE: '7', |
|
||||||
SINGLE_WORD: '8', |
|
||||||
CIRCLE_WORD: '9', |
|
||||||
SINGLE_CHAR: '10', |
|
||||||
SPARSE_TEXT: '11', |
|
||||||
SPARSE_TEXT_OSD: '12', |
|
||||||
RAW_LINE: '13', |
|
||||||
}; |
|
@ -1,5 +0,0 @@ |
|||||||
const OEM = require('./OEM'); |
|
||||||
|
|
||||||
module.exports = { |
|
||||||
defaultOEM: OEM.DEFAULT, |
|
||||||
}; |
|
@ -1,13 +0,0 @@ |
|||||||
module.exports = { |
|
||||||
/* |
|
||||||
* default path for downloading *.traineddata |
|
||||||
*/ |
|
||||||
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
|
||||||
/* |
|
||||||
* Use BlobURL for worker script by default |
|
||||||
* TODO: remove this option |
|
||||||
* |
|
||||||
*/ |
|
||||||
workerBlobURL: true, |
|
||||||
logger: () => {}, |
|
||||||
}; |
|
@ -1,218 +0,0 @@ |
|||||||
/* |
|
||||||
* languages with existing tesseract traineddata |
|
||||||
* https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016
|
|
||||||
*/ |
|
||||||
|
|
||||||
/** |
|
||||||
* @typedef {object} Languages |
|
||||||
* @property {string} AFR Afrikaans |
|
||||||
* @property {string} AMH Amharic |
|
||||||
* @property {string} ARA Arabic |
|
||||||
* @property {string} ASM Assamese |
|
||||||
* @property {string} AZE Azerbaijani |
|
||||||
* @property {string} AZE_CYRL Azerbaijani - Cyrillic |
|
||||||
* @property {string} BEL Belarusian |
|
||||||
* @property {string} BEN Bengali |
|
||||||
* @property {string} BOD Tibetan |
|
||||||
* @property {string} BOS Bosnian |
|
||||||
* @property {string} BUL Bulgarian |
|
||||||
* @property {string} CAT Catalan; Valencian |
|
||||||
* @property {string} CEB Cebuano |
|
||||||
* @property {string} CES Czech |
|
||||||
* @property {string} CHI_SIM Chinese - Simplified |
|
||||||
* @property {string} CHI_TRA Chinese - Traditional |
|
||||||
* @property {string} CHR Cherokee |
|
||||||
* @property {string} CYM Welsh |
|
||||||
* @property {string} DAN Danish |
|
||||||
* @property {string} DEU German |
|
||||||
* @property {string} DZO Dzongkha |
|
||||||
* @property {string} ELL Greek, Modern (1453-) |
|
||||||
* @property {string} ENG English |
|
||||||
* @property {string} ENM English, Middle (1100-1500) |
|
||||||
* @property {string} EPO Esperanto |
|
||||||
* @property {string} EST Estonian |
|
||||||
* @property {string} EUS Basque |
|
||||||
* @property {string} FAS Persian |
|
||||||
* @property {string} FIN Finnish |
|
||||||
* @property {string} FRA French |
|
||||||
* @property {string} FRK German Fraktur |
|
||||||
* @property {string} FRM French, Middle (ca. 1400-1600) |
|
||||||
* @property {string} GLE Irish |
|
||||||
* @property {string} GLG Galician |
|
||||||
* @property {string} GRC Greek, Ancient (-1453) |
|
||||||
* @property {string} GUJ Gujarati |
|
||||||
* @property {string} HAT Haitian; Haitian Creole |
|
||||||
* @property {string} HEB Hebrew |
|
||||||
* @property {string} HIN Hindi |
|
||||||
* @property {string} HRV Croatian |
|
||||||
* @property {string} HUN Hungarian |
|
||||||
* @property {string} IKU Inuktitut |
|
||||||
* @property {string} IND Indonesian |
|
||||||
* @property {string} ISL Icelandic |
|
||||||
* @property {string} ITA Italian |
|
||||||
* @property {string} ITA_OLD Italian - Old |
|
||||||
* @property {string} JAV Javanese |
|
||||||
* @property {string} JPN Japanese |
|
||||||
* @property {string} KAN Kannada |
|
||||||
* @property {string} KAT Georgian |
|
||||||
* @property {string} KAT_OLD Georgian - Old |
|
||||||
* @property {string} KAZ Kazakh |
|
||||||
* @property {string} KHM Central Khmer |
|
||||||
* @property {string} KIR Kirghiz; Kyrgyz |
|
||||||
* @property {string} KOR Korean |
|
||||||
* @property {string} KUR Kurdish |
|
||||||
* @property {string} LAO Lao |
|
||||||
* @property {string} LAT Latin |
|
||||||
* @property {string} LAV Latvian |
|
||||||
* @property {string} LIT Lithuanian |
|
||||||
* @property {string} MAL Malayalam |
|
||||||
* @property {string} MAR Marathi |
|
||||||
* @property {string} MKD Macedonian |
|
||||||
* @property {string} MLT Maltese |
|
||||||
* @property {string} MSA Malay |
|
||||||
* @property {string} MYA Burmese |
|
||||||
* @property {string} NEP Nepali |
|
||||||
* @property {string} NLD Dutch; Flemish |
|
||||||
* @property {string} NOR Norwegian |
|
||||||
* @property {string} ORI Oriya |
|
||||||
* @property {string} PAN Panjabi; Punjabi |
|
||||||
* @property {string} POL Polish |
|
||||||
* @property {string} POR Portuguese |
|
||||||
* @property {string} PUS Pushto; Pashto |
|
||||||
* @property {string} RON Romanian; Moldavian; Moldovan |
|
||||||
* @property {string} RUS Russian |
|
||||||
* @property {string} SAN Sanskrit |
|
||||||
* @property {string} SIN Sinhala; Sinhalese |
|
||||||
* @property {string} SLK Slovak |
|
||||||
* @property {string} SLV Slovenian |
|
||||||
* @property {string} SPA Spanish; Castilian |
|
||||||
* @property {string} SPA_OLD Spanish; Castilian - Old |
|
||||||
* @property {string} SQI Albanian |
|
||||||
* @property {string} SRP Serbian |
|
||||||
* @property {string} SRP_LATN Serbian - Latin |
|
||||||
* @property {string} SWA Swahili |
|
||||||
* @property {string} SWE Swedish |
|
||||||
* @property {string} SYR Syriac |
|
||||||
* @property {string} TAM Tamil |
|
||||||
* @property {string} TEL Telugu |
|
||||||
* @property {string} TGK Tajik |
|
||||||
* @property {string} TGL Tagalog |
|
||||||
* @property {string} THA Thai |
|
||||||
* @property {string} TIR Tigrinya |
|
||||||
* @property {string} TUR Turkish |
|
||||||
* @property {string} UIG Uighur; Uyghur |
|
||||||
* @property {string} UKR Ukrainian |
|
||||||
* @property {string} URD Urdu |
|
||||||
* @property {string} UZB Uzbek |
|
||||||
* @property {string} UZB_CYRL Uzbek - Cyrillic |
|
||||||
* @property {string} VIE Vietnamese |
|
||||||
* @property {string} YID Yiddish |
|
||||||
*/ |
|
||||||
|
|
||||||
/** |
|
||||||
* @type {Languages} |
|
||||||
*/ |
|
||||||
module.exports = { |
|
||||||
AFR: 'afr', |
|
||||||
AMH: 'amh', |
|
||||||
ARA: 'ara', |
|
||||||
ASM: 'asm', |
|
||||||
AZE: 'aze', |
|
||||||
AZE_CYRL: 'aze_cyrl', |
|
||||||
BEL: 'bel', |
|
||||||
BEN: 'ben', |
|
||||||
BOD: 'bod', |
|
||||||
BOS: 'bos', |
|
||||||
BUL: 'bul', |
|
||||||
CAT: 'cat', |
|
||||||
CEB: 'ceb', |
|
||||||
CES: 'ces', |
|
||||||
CHI_SIM: 'chi_sim', |
|
||||||
CHI_TRA: 'chi_tra', |
|
||||||
CHR: 'chr', |
|
||||||
CYM: 'cym', |
|
||||||
DAN: 'dan', |
|
||||||
DEU: 'deu', |
|
||||||
DZO: 'dzo', |
|
||||||
ELL: 'ell', |
|
||||||
ENG: 'eng', |
|
||||||
ENM: 'enm', |
|
||||||
EPO: 'epo', |
|
||||||
EST: 'est', |
|
||||||
EUS: 'eus', |
|
||||||
FAS: 'fas', |
|
||||||
FIN: 'fin', |
|
||||||
FRA: 'fra', |
|
||||||
FRK: 'frk', |
|
||||||
FRM: 'frm', |
|
||||||
GLE: 'gle', |
|
||||||
GLG: 'glg', |
|
||||||
GRC: 'grc', |
|
||||||
GUJ: 'guj', |
|
||||||
HAT: 'hat', |
|
||||||
HEB: 'heb', |
|
||||||
HIN: 'hin', |
|
||||||
HRV: 'hrv', |
|
||||||
HUN: 'hun', |
|
||||||
IKU: 'iku', |
|
||||||
IND: 'ind', |
|
||||||
ISL: 'isl', |
|
||||||
ITA: 'ita', |
|
||||||
ITA_OLD: 'ita_old', |
|
||||||
JAV: 'jav', |
|
||||||
JPN: 'jpn', |
|
||||||
KAN: 'kan', |
|
||||||
KAT: 'kat', |
|
||||||
KAT_OLD: 'kat_old', |
|
||||||
KAZ: 'kaz', |
|
||||||
KHM: 'khm', |
|
||||||
KIR: 'kir', |
|
||||||
KOR: 'kor', |
|
||||||
KUR: 'kur', |
|
||||||
LAO: 'lao', |
|
||||||
LAT: 'lat', |
|
||||||
LAV: 'lav', |
|
||||||
LIT: 'lit', |
|
||||||
MAL: 'mal', |
|
||||||
MAR: 'mar', |
|
||||||
MKD: 'mkd', |
|
||||||
MLT: 'mlt', |
|
||||||
MSA: 'msa', |
|
||||||
MYA: 'mya', |
|
||||||
NEP: 'nep', |
|
||||||
NLD: 'nld', |
|
||||||
NOR: 'nor', |
|
||||||
ORI: 'ori', |
|
||||||
PAN: 'pan', |
|
||||||
POL: 'pol', |
|
||||||
POR: 'por', |
|
||||||
PUS: 'pus', |
|
||||||
RON: 'ron', |
|
||||||
RUS: 'rus', |
|
||||||
SAN: 'san', |
|
||||||
SIN: 'sin', |
|
||||||
SLK: 'slk', |
|
||||||
SLV: 'slv', |
|
||||||
SPA: 'spa', |
|
||||||
SPA_OLD: 'spa_old', |
|
||||||
SQI: 'sqi', |
|
||||||
SRP: 'srp', |
|
||||||
SRP_LATN: 'srp_latn', |
|
||||||
SWA: 'swa', |
|
||||||
SWE: 'swe', |
|
||||||
SYR: 'syr', |
|
||||||
TAM: 'tam', |
|
||||||
TEL: 'tel', |
|
||||||
TGK: 'tgk', |
|
||||||
TGL: 'tgl', |
|
||||||
THA: 'tha', |
|
||||||
TIR: 'tir', |
|
||||||
TUR: 'tur', |
|
||||||
UIG: 'uig', |
|
||||||
UKR: 'ukr', |
|
||||||
URD: 'urd', |
|
||||||
UZB: 'uzb', |
|
||||||
UZB_CYRL: 'uzb_cyrl', |
|
||||||
VIE: 'vie', |
|
||||||
YID: 'yid', |
|
||||||
}; |
|
@ -1,21 +0,0 @@ |
|||||||
const getId = require('./utils/getId'); |
|
||||||
|
|
||||||
let jobCounter = 0; |
|
||||||
|
|
||||||
module.exports = ({ |
|
||||||
id: _id, |
|
||||||
action, |
|
||||||
payload = {}, |
|
||||||
}) => { |
|
||||||
let id = _id; |
|
||||||
if (typeof id === 'undefined') { |
|
||||||
id = getId('Job', jobCounter); |
|
||||||
jobCounter += 1; |
|
||||||
} |
|
||||||
|
|
||||||
return { |
|
||||||
id, |
|
||||||
action, |
|
||||||
payload, |
|
||||||
}; |
|
||||||
}; |
|
@ -1,80 +0,0 @@ |
|||||||
const createJob = require('./createJob'); |
|
||||||
const { log } = require('./utils/log'); |
|
||||||
const getId = require('./utils/getId'); |
|
||||||
|
|
||||||
let schedulerCounter = 0; |
|
||||||
|
|
||||||
module.exports = () => { |
|
||||||
const id = getId('Scheduler', schedulerCounter); |
|
||||||
const workers = {}; |
|
||||||
const runningWorkers = {}; |
|
||||||
let jobQueue = []; |
|
||||||
|
|
||||||
schedulerCounter += 1; |
|
||||||
|
|
||||||
const getQueueLen = () => jobQueue.length; |
|
||||||
const getNumWorkers = () => Object.keys(workers).length; |
|
||||||
|
|
||||||
const dequeue = () => { |
|
||||||
if (jobQueue.length !== 0) { |
|
||||||
const wIds = Object.keys(workers); |
|
||||||
for (let i = 0; i < wIds.length; i += 1) { |
|
||||||
if (typeof runningWorkers[wIds[i]] === 'undefined') { |
|
||||||
jobQueue[0](workers[wIds[i]]); |
|
||||||
break; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
const queue = (action, payload) => ( |
|
||||||
new Promise((resolve, reject) => { |
|
||||||
const job = createJob({ action, payload }); |
|
||||||
jobQueue.push(async (w) => { |
|
||||||
jobQueue.shift(); |
|
||||||
runningWorkers[w.id] = job; |
|
||||||
try { |
|
||||||
resolve(await w[action].apply(this, [...payload, job.id])); |
|
||||||
} catch (err) { |
|
||||||
reject(err); |
|
||||||
} finally { |
|
||||||
delete runningWorkers[w.id]; |
|
||||||
dequeue(); |
|
||||||
} |
|
||||||
}); |
|
||||||
log(`[${id}]: Add ${job.id} to JobQueue`); |
|
||||||
log(`[${id}]: JobQueue length=${jobQueue.length}`); |
|
||||||
dequeue(); |
|
||||||
}) |
|
||||||
); |
|
||||||
|
|
||||||
const addWorker = (w) => { |
|
||||||
workers[w.id] = w; |
|
||||||
log(`[${id}]: Add ${w.id}`); |
|
||||||
log(`[${id}]: Number of workers=${getNumWorkers()}`); |
|
||||||
dequeue(); |
|
||||||
return w.id; |
|
||||||
}; |
|
||||||
|
|
||||||
const addJob = async (action, ...payload) => { |
|
||||||
if (getNumWorkers() === 0) { |
|
||||||
throw Error(`[${id}]: You need to have at least one worker before adding jobs`); |
|
||||||
} |
|
||||||
return queue(action, payload); |
|
||||||
}; |
|
||||||
|
|
||||||
const terminate = async () => { |
|
||||||
Object.keys(workers).forEach(async (wid) => { |
|
||||||
await workers[wid].terminate(); |
|
||||||
}); |
|
||||||
jobQueue = []; |
|
||||||
}; |
|
||||||
|
|
||||||
return { |
|
||||||
addWorker, |
|
||||||
addJob, |
|
||||||
terminate, |
|
||||||
getQueueLen, |
|
||||||
getNumWorkers, |
|
||||||
}; |
|
||||||
}; |
|
@ -1,198 +0,0 @@ |
|||||||
const resolvePaths = require('./utils/resolvePaths'); |
|
||||||
const circularize = require('./utils/circularize'); |
|
||||||
const createJob = require('./createJob'); |
|
||||||
const { log } = require('./utils/log'); |
|
||||||
const getId = require('./utils/getId'); |
|
||||||
const { defaultOEM } = require('./constants/config'); |
|
||||||
const { |
|
||||||
defaultOptions, |
|
||||||
spawnWorker, |
|
||||||
terminateWorker, |
|
||||||
onMessage, |
|
||||||
loadImage, |
|
||||||
send, |
|
||||||
} = require('./worker/node'); |
|
||||||
|
|
||||||
let workerCounter = 0; |
|
||||||
|
|
||||||
module.exports = (_options = {}) => { |
|
||||||
const id = getId('Worker', workerCounter); |
|
||||||
const { |
|
||||||
logger, |
|
||||||
errorHandler, |
|
||||||
...options |
|
||||||
} = resolvePaths({ |
|
||||||
...defaultOptions, |
|
||||||
..._options, |
|
||||||
}); |
|
||||||
const resolves = {}; |
|
||||||
const rejects = {}; |
|
||||||
let worker = spawnWorker(options); |
|
||||||
|
|
||||||
workerCounter += 1; |
|
||||||
|
|
||||||
const setResolve = (action, res) => { |
|
||||||
resolves[action] = res; |
|
||||||
}; |
|
||||||
|
|
||||||
const setReject = (action, rej) => { |
|
||||||
rejects[action] = rej; |
|
||||||
}; |
|
||||||
|
|
||||||
const startJob = ({ id: jobId, action, payload }) => ( |
|
||||||
new Promise((resolve, reject) => { |
|
||||||
log(`[${id}]: Start ${jobId}, action=${action}`); |
|
||||||
setResolve(action, resolve); |
|
||||||
setReject(action, reject); |
|
||||||
send(worker, { |
|
||||||
workerId: id, |
|
||||||
jobId, |
|
||||||
action, |
|
||||||
payload, |
|
||||||
}); |
|
||||||
}) |
|
||||||
); |
|
||||||
|
|
||||||
const load = (jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, action: 'load', payload: { options }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const writeText = (path, text, jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'FS', |
|
||||||
payload: { method: 'writeFile', args: [path, text] }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const readText = (path, jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'FS', |
|
||||||
payload: { method: 'readFile', args: [path, { encoding: 'utf8' }] }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const removeFile = (path, jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'FS', |
|
||||||
payload: { method: 'unlink', args: [path] }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const FS = (method, args, jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'FS', |
|
||||||
payload: { method, args }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const loadLanguage = (langs = 'eng', jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'loadLanguage', |
|
||||||
payload: { langs, options }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const initialize = (langs = 'eng', oem = defaultOEM, jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'initialize', |
|
||||||
payload: { langs, oem }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const setParameters = (params = {}, jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'setParameters', |
|
||||||
payload: { params }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const recognize = async (image, opts = {}, jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'recognize', |
|
||||||
payload: { image: await loadImage(image), options: opts }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'getPDF', |
|
||||||
payload: { title, textonly }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const detect = async (image, jobId) => ( |
|
||||||
startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'detect', |
|
||||||
payload: { image: await loadImage(image) }, |
|
||||||
})) |
|
||||||
); |
|
||||||
|
|
||||||
const terminate = async () => { |
|
||||||
if (worker !== null) { |
|
||||||
/* |
|
||||||
await startJob(createJob({ |
|
||||||
id: jobId, |
|
||||||
action: 'terminate', |
|
||||||
})); |
|
||||||
*/ |
|
||||||
terminateWorker(worker); |
|
||||||
worker = null; |
|
||||||
} |
|
||||||
return Promise.resolve(); |
|
||||||
}; |
|
||||||
|
|
||||||
onMessage(worker, ({ |
|
||||||
workerId, jobId, status, action, data, |
|
||||||
}) => { |
|
||||||
if (status === 'resolve') { |
|
||||||
log(`[${workerId}]: Complete ${jobId}`); |
|
||||||
let d = data; |
|
||||||
if (action === 'recognize') { |
|
||||||
d = circularize(data); |
|
||||||
} else if (action === 'getPDF') { |
|
||||||
d = Array.from({ ...data, length: Object.keys(data).length }); |
|
||||||
} |
|
||||||
resolves[action]({ jobId, data: d }); |
|
||||||
} else if (status === 'reject') { |
|
||||||
rejects[action](data); |
|
||||||
if (errorHandler) { |
|
||||||
errorHandler(data); |
|
||||||
} else { |
|
||||||
throw Error(data); |
|
||||||
} |
|
||||||
} else if (status === 'progress') { |
|
||||||
logger({ ...data, userJobId: jobId }); |
|
||||||
} |
|
||||||
}); |
|
||||||
|
|
||||||
return { |
|
||||||
id, |
|
||||||
worker, |
|
||||||
setResolve, |
|
||||||
setReject, |
|
||||||
load, |
|
||||||
writeText, |
|
||||||
readText, |
|
||||||
removeFile, |
|
||||||
FS, |
|
||||||
loadLanguage, |
|
||||||
initialize, |
|
||||||
setParameters, |
|
||||||
recognize, |
|
||||||
getPDF, |
|
||||||
detect, |
|
||||||
terminate, |
|
||||||
}; |
|
||||||
}; |
|
@ -1,231 +0,0 @@ |
|||||||
declare namespace Tesseract { |
|
||||||
function createScheduler(): Scheduler |
|
||||||
function createWorker(options?: Partial<WorkerOptions>): Worker |
|
||||||
function setLogging(logging: boolean): void |
|
||||||
function recognize(image: ImageLike, langs?: string, options?: Partial<WorkerOptions>): Promise<RecognizeResult> |
|
||||||
function detect(image: ImageLike, options?: Partial<WorkerOptions>): any |
|
||||||
|
|
||||||
interface Scheduler { |
|
||||||
addWorker(worker: Worker): string |
|
||||||
addJob(action: string, ...args: any[]): Promise<ConfigResult | RecognizeResult | DetectResult> |
|
||||||
terminate(): Promise<any> |
|
||||||
getQueueLen(): number |
|
||||||
getNumWorkers(): number |
|
||||||
} |
|
||||||
|
|
||||||
interface Worker { |
|
||||||
load(jobId?: string): Promise<ConfigResult> |
|
||||||
writeText(path: string, text: string, jobId?: string): Promise<ConfigResult> |
|
||||||
readText(path: string, jobId?: string): Promise<ConfigResult> |
|
||||||
removeText(path: string, jobId?: string): Promise<ConfigResult> |
|
||||||
FS(method: string, args: any[], jobId?: string): Promise<ConfigResult> |
|
||||||
loadLanguage(langs?: string | Lang[], jobId?: string): Promise<ConfigResult> |
|
||||||
initialize(langs?: string | Lang[], oem?: OEM, jobId?: string): Promise<ConfigResult> |
|
||||||
setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult> |
|
||||||
recognize(image: ImageLike, options?: Partial<RecognizeOptions>, jobId?: string): Promise<RecognizeResult> |
|
||||||
detect(image: ImageLike, jobId?: string): Promise<DetectResult> |
|
||||||
terminate(jobId?: string): Promise<ConfigResult> |
|
||||||
getPDF(title?: string, textonly?: boolean, jobId?: string):Promise<GetPDFResult> |
|
||||||
} |
|
||||||
|
|
||||||
interface Lang { |
|
||||||
code: string; |
|
||||||
data: unknown; |
|
||||||
} |
|
||||||
|
|
||||||
interface WorkerOptions { |
|
||||||
corePath: string |
|
||||||
langPath: string |
|
||||||
cachePath: string |
|
||||||
dataPath: string |
|
||||||
workerPath: string |
|
||||||
cacheMethod: string |
|
||||||
workerBlobURL: boolean |
|
||||||
gzip: boolean |
|
||||||
logger: (arg: any) => void, |
|
||||||
errorHandler: (arg: any) => void |
|
||||||
} |
|
||||||
interface WorkerParams { |
|
||||||
tessedit_ocr_engine_mode: OEM |
|
||||||
tessedit_pageseg_mode: PSM |
|
||||||
tessedit_char_whitelist: string |
|
||||||
preserve_interword_spaces: string |
|
||||||
user_defined_dpi: string |
|
||||||
tessjs_create_hocr: string |
|
||||||
tessjs_create_tsv: string |
|
||||||
tessjs_create_box: string |
|
||||||
tessjs_create_unlv: string |
|
||||||
tessjs_create_osd: string |
|
||||||
} |
|
||||||
interface RecognizeOptions { |
|
||||||
rectangle: Rectangle |
|
||||||
} |
|
||||||
interface ConfigResult { |
|
||||||
jobId: string |
|
||||||
data: any |
|
||||||
} |
|
||||||
interface RecognizeResult { |
|
||||||
jobId: string |
|
||||||
data: Page |
|
||||||
} |
|
||||||
interface GetPDFResult { |
|
||||||
jobId: string |
|
||||||
data: number[] |
|
||||||
} |
|
||||||
interface DetectResult { |
|
||||||
jobId: string |
|
||||||
data: DetectData |
|
||||||
} |
|
||||||
interface DetectData { |
|
||||||
tesseract_script_id: number |
|
||||||
script: string |
|
||||||
script_confidence: number |
|
||||||
orientation_degrees: number |
|
||||||
orientation_confidence: number |
|
||||||
} |
|
||||||
interface Rectangle { |
|
||||||
left: number |
|
||||||
top: number |
|
||||||
width: number |
|
||||||
height: number |
|
||||||
} |
|
||||||
enum OEM { |
|
||||||
TESSERACT_ONLY, |
|
||||||
LSTM_ONLY, |
|
||||||
TESSERACT_LSTM_COMBINED, |
|
||||||
DEFAULT, |
|
||||||
} |
|
||||||
enum PSM { |
|
||||||
OSD_ONLY = '0', |
|
||||||
AUTO_OSD = '1', |
|
||||||
AUTO_ONLY = '2', |
|
||||||
AUTO = '3', |
|
||||||
SINGLE_COLUMN = '4', |
|
||||||
SINGLE_BLOCK_VERT_TEXT = '5', |
|
||||||
SINGLE_BLOCK = '6', |
|
||||||
SINGLE_LINE = '7', |
|
||||||
SINGLE_WORD = '8', |
|
||||||
CIRCLE_WORD = '9', |
|
||||||
SINGLE_CHAR = '10', |
|
||||||
SPARSE_TEXT = '11', |
|
||||||
SPARSE_TEXT_OSD = '12', |
|
||||||
RAW_LINE = '13' |
|
||||||
} |
|
||||||
type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement |
|
||||||
| CanvasRenderingContext2D | File | Blob | ImageData | Buffer; |
|
||||||
interface Block { |
|
||||||
paragraphs: Paragraph[]; |
|
||||||
text: string; |
|
||||||
confidence: number; |
|
||||||
baseline: Baseline; |
|
||||||
bbox: Bbox; |
|
||||||
blocktype: string; |
|
||||||
polygon: any; |
|
||||||
page: Page; |
|
||||||
lines: Line[]; |
|
||||||
words: Word[]; |
|
||||||
symbols: Symbol[]; |
|
||||||
} |
|
||||||
interface Baseline { |
|
||||||
x0: number; |
|
||||||
y0: number; |
|
||||||
x1: number; |
|
||||||
y1: number; |
|
||||||
has_baseline: boolean; |
|
||||||
} |
|
||||||
interface Bbox { |
|
||||||
x0: number; |
|
||||||
y0: number; |
|
||||||
x1: number; |
|
||||||
y1: number; |
|
||||||
} |
|
||||||
interface Line { |
|
||||||
words: Word[]; |
|
||||||
text: string; |
|
||||||
confidence: number; |
|
||||||
baseline: Baseline; |
|
||||||
bbox: Bbox; |
|
||||||
paragraph: Paragraph; |
|
||||||
block: Block; |
|
||||||
page: Page; |
|
||||||
symbols: Symbol[]; |
|
||||||
} |
|
||||||
interface Paragraph { |
|
||||||
lines: Line[]; |
|
||||||
text: string; |
|
||||||
confidence: number; |
|
||||||
baseline: Baseline; |
|
||||||
bbox: Bbox; |
|
||||||
is_ltr: boolean; |
|
||||||
block: Block; |
|
||||||
page: Page; |
|
||||||
words: Word[]; |
|
||||||
symbols: Symbol[]; |
|
||||||
} |
|
||||||
interface Symbol { |
|
||||||
choices: Choice[]; |
|
||||||
image: any; |
|
||||||
text: string; |
|
||||||
confidence: number; |
|
||||||
baseline: Baseline; |
|
||||||
bbox: Bbox; |
|
||||||
is_superscript: boolean; |
|
||||||
is_subscript: boolean; |
|
||||||
is_dropcap: boolean; |
|
||||||
word: Word; |
|
||||||
line: Line; |
|
||||||
paragraph: Paragraph; |
|
||||||
block: Block; |
|
||||||
page: Page; |
|
||||||
} |
|
||||||
interface Choice { |
|
||||||
text: string; |
|
||||||
confidence: number; |
|
||||||
} |
|
||||||
interface Word { |
|
||||||
symbols: Symbol[]; |
|
||||||
choices: Choice[]; |
|
||||||
text: string; |
|
||||||
confidence: number; |
|
||||||
baseline: Baseline; |
|
||||||
bbox: Bbox; |
|
||||||
is_numeric: boolean; |
|
||||||
in_dictionary: boolean; |
|
||||||
direction: string; |
|
||||||
language: string; |
|
||||||
is_bold: boolean; |
|
||||||
is_italic: boolean; |
|
||||||
is_underlined: boolean; |
|
||||||
is_monospace: boolean; |
|
||||||
is_serif: boolean; |
|
||||||
is_smallcaps: boolean; |
|
||||||
font_size: number; |
|
||||||
font_id: number; |
|
||||||
font_name: string; |
|
||||||
line: Line; |
|
||||||
paragraph: Paragraph; |
|
||||||
block: Block; |
|
||||||
page: Page; |
|
||||||
} |
|
||||||
interface Page { |
|
||||||
blocks: Block[]; |
|
||||||
confidence: number; |
|
||||||
lines: Line[]; |
|
||||||
oem: string; |
|
||||||
osd: string; |
|
||||||
paragraphs: Paragraph[]; |
|
||||||
psm: string; |
|
||||||
symbols: Symbol[]; |
|
||||||
text: string; |
|
||||||
version: string; |
|
||||||
words: Word[]; |
|
||||||
hocr: string | null; |
|
||||||
tsv: string | null; |
|
||||||
box: string | null; |
|
||||||
unlv: string | null; |
|
||||||
sd: string | null; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
export = Tesseract; |
|
||||||
export as namespace Tesseract; |
|
@ -1,27 +1,75 @@ |
|||||||
/** |
const adapter = require('./node/index.js') |
||||||
* |
const circularize = require('./common/circularize.js') |
||||||
* Entry point for tesseract.js, should be the entry when bundling. |
const TesseractJob = require('./common/job'); |
||||||
* |
const version = require('../package.json').version; |
||||||
* @fileoverview entry point for tesseract.js |
|
||||||
* @author Kevin Kwok <antimatter15@gmail.com> |
const create = function(workerOptions = {}){ |
||||||
* @author Guillermo Webster <gui@mit.edu> |
var worker = new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions)); |
||||||
* @author Jerome Wu <jeromewus@gmail.com> |
worker.create = create; |
||||||
*/ |
worker.version = version; |
||||||
require('regenerator-runtime/runtime'); |
return worker; |
||||||
const createScheduler = require('./createScheduler'); |
} |
||||||
const createWorker = require('./createWorker'); |
|
||||||
const Tesseract = require('./Tesseract'); |
class TesseractWorker { |
||||||
const languages = require('./constants/languages'); |
constructor(workerOptions){ |
||||||
const OEM = require('./constants/OEM'); |
this.worker = null; |
||||||
const PSM = require('./constants/PSM'); |
this.workerOptions = workerOptions; |
||||||
const { setLogging } = require('./utils/log'); |
this._currentJob = null; |
||||||
|
this._queue = []; |
||||||
module.exports = { |
} |
||||||
languages, |
|
||||||
OEM, |
recognize(image, options = {}){ |
||||||
PSM, |
return this._delay(job => { |
||||||
createScheduler, |
if (typeof options === 'string') options = {lang: options} |
||||||
createWorker, |
options.lang = options.lang || 'eng'; |
||||||
setLogging, |
|
||||||
...Tesseract, |
job._send('recognize', { image, options, workerOptions: this.workerOptions }); |
||||||
}; |
}) |
||||||
|
} |
||||||
|
detect(image, options = {}){ |
||||||
|
return this._delay(job => { |
||||||
|
job._send('detect', { image, options, workerOptions: this.workerOptions }); |
||||||
|
}) |
||||||
|
} |
||||||
|
|
||||||
|
terminate(){ |
||||||
|
if(this.worker) adapter.terminateWorker(this); |
||||||
|
this.worker = null; |
||||||
|
this._currentJob = null; |
||||||
|
this._queue = []; |
||||||
|
} |
||||||
|
|
||||||
|
_delay(fn){ |
||||||
|
if(!this.worker) this.worker = adapter.spawnWorker(this, this.workerOptions); |
||||||
|
|
||||||
|
var job = new TesseractJob(this); |
||||||
|
this._queue.push(e => { |
||||||
|
this._queue.shift(); |
||||||
|
this._currentJob = job; |
||||||
|
fn(job); |
||||||
|
}); |
||||||
|
if(!this._currentJob) this._dequeue(); |
||||||
|
return job; |
||||||
|
} |
||||||
|
|
||||||
|
_dequeue(){ |
||||||
|
this._currentJob = null; |
||||||
|
if(this._queue.length){ |
||||||
|
this._queue[0](); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
_recv(packet){ |
||||||
|
if(packet.status === 'resolve' && packet.action === 'recognize'){ |
||||||
|
packet.data = circularize(packet.data); |
||||||
|
} |
||||||
|
|
||||||
|
if(this._currentJob.id === packet.jobId){ |
||||||
|
this._currentJob._handle(packet) |
||||||
|
} else { |
||||||
|
console.warn('Job ID ' + packet.jobId + ' not known.') |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
module.exports = create(); |
||||||
|
@ -0,0 +1,89 @@ |
|||||||
|
const fetch = require('isomorphic-fetch'), |
||||||
|
isURL = require('is-url'), |
||||||
|
fork = require('child_process').fork, |
||||||
|
fs = require('fs'); |
||||||
|
|
||||||
|
exports.defaultOptions = { |
||||||
|
workerPath: require('path').join(__dirname, 'worker.js'), |
||||||
|
langPath: 'https://tessdata.projectnaptha.com/3.02/', |
||||||
|
} |
||||||
|
|
||||||
|
exports.spawnWorker = function spawnWorker(instance, workerOptions){ |
||||||
|
var cp = fork(workerOptions.workerPath); |
||||||
|
cp.on('message', packet => { |
||||||
|
instance._recv(packet); |
||||||
|
}); |
||||||
|
return cp; |
||||||
|
} |
||||||
|
|
||||||
|
exports.terminateWorker = function(instance){ |
||||||
|
instance.worker.kill(); |
||||||
|
} |
||||||
|
|
||||||
|
exports.sendPacket = function sendPacket(instance, packet){ |
||||||
|
loadImage(packet.payload.image, img => { |
||||||
|
packet.payload.image = img; |
||||||
|
instance.worker.send(packet); |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
function loadImage(image, cb){ |
||||||
|
|
||||||
|
if(typeof image === 'string'){ |
||||||
|
if (isURL(image)) { |
||||||
|
fetch(image) |
||||||
|
.then(resp => resp.buffer()) |
||||||
|
.then(buffer => loadImage(buffer, cb)) |
||||||
|
.catch(err => console.error(err)); |
||||||
|
} else { |
||||||
|
fs.readFile(image, function(err, buffer){ |
||||||
|
if (err) throw err; |
||||||
|
loadImage(buffer, cb); |
||||||
|
}); |
||||||
|
} |
||||||
|
return; |
||||||
|
} else if (image instanceof Buffer){ |
||||||
|
var mime = require('file-type')(image).mime |
||||||
|
|
||||||
|
if(mime === 'image/png'){ |
||||||
|
var PNGReader = require('png.js'); |
||||||
|
var reader = new PNGReader(image); |
||||||
|
reader.parse(function(err, png){ |
||||||
|
if (err) throw err; |
||||||
|
|
||||||
|
var image = { |
||||||
|
width: png.getWidth(), |
||||||
|
height: png.getHeight() |
||||||
|
} |
||||||
|
image.data = new Uint8Array(image.width * image.height * 4) |
||||||
|
for(var j = 0; j < image.height; j++){ |
||||||
|
for(var i = 0; i < image.width; i++){ |
||||||
|
var offset = 4 * (i + j * image.width), |
||||||
|
pix = png.getPixel(i, j); |
||||||
|
|
||||||
|
image.data[offset] = pix[0]; |
||||||
|
image.data[offset + 1] = pix[1]; |
||||||
|
image.data[offset + 2] = pix[2]; |
||||||
|
image.data[offset + 3] = pix[3]; |
||||||
|
} |
||||||
|
} |
||||||
|
loadImage(image, cb); |
||||||
|
}); |
||||||
|
return; |
||||||
|
} else if (mime === 'image/jpeg'){ |
||||||
|
loadImage(require('jpeg-js').decode(image), cb); |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
// TODO: support for TIFF, NetPBM, BMP, etc.
|
||||||
|
} |
||||||
|
|
||||||
|
// node uses json.stringify for ipc which means we need to turn
|
||||||
|
// fancy arrays into raw arrays
|
||||||
|
if(image && image.data && image.data.length && !Array.isArray(image.data)){ |
||||||
|
image.data = Array.from(image.data); |
||||||
|
return loadImage(image, cb) |
||||||
|
} |
||||||
|
cb(image); |
||||||
|
} |
@ -0,0 +1,47 @@ |
|||||||
|
const https = require("https"), |
||||||
|
http = require("http"), |
||||||
|
zlib = require("zlib"), |
||||||
|
fs = require("fs"), |
||||||
|
path = require("path"), |
||||||
|
isURL = require("is-url"); |
||||||
|
|
||||||
|
var langdata = require('../common/langdata.json') |
||||||
|
|
||||||
|
function getLanguageData(req, res, cb){ |
||||||
|
var lang = req.options.lang, |
||||||
|
langfile = lang + '.traineddata.gz'; |
||||||
|
|
||||||
|
// langPath defaults to a URL where languages can be downloaded. If a custom path is specified
|
||||||
|
// and it is a local path, use that instead
|
||||||
|
var localPath = isURL(req.workerOptions.langPath) ? |
||||||
|
lang + '.traineddata' : |
||||||
|
path.join(req.workerOptions.langPath, lang + '.traineddata'); |
||||||
|
|
||||||
|
var fetchProtocol = req.workerOptions.langPath.startsWith('http://') ? http : https; |
||||||
|
|
||||||
|
fs.readFile(localPath, function (err, data) { |
||||||
|
if(!err) return cb(new Uint8Array(data)); |
||||||
|
|
||||||
|
fetchProtocol.get(req.workerOptions.langPath + langfile, stream => { |
||||||
|
var received_bytes = 0; |
||||||
|
stream.on('data', function(chunk) { |
||||||
|
received_bytes += chunk.length; |
||||||
|
res.progress({ |
||||||
|
status: 'downloading ' + langfile, |
||||||
|
loaded: received_bytes, |
||||||
|
progress: Math.min(1, received_bytes / langdata[lang]) |
||||||
|
}); |
||||||
|
|
||||||
|
}); |
||||||
|
|
||||||
|
var gunzip = zlib.createGunzip(); |
||||||
|
stream.pipe(gunzip).pipe(fs.createWriteStream(lang + '.traineddata')) |
||||||
|
gunzip.on('end',() => { |
||||||
|
getLanguageData(req, stream, cb) |
||||||
|
}); |
||||||
|
}); |
||||||
|
}); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
module.exports = getLanguageData; |
@ -0,0 +1,19 @@ |
|||||||
|
const workerUtils = require('../common/worker.js') |
||||||
|
|
||||||
|
process.on('message', function(packet){ |
||||||
|
workerUtils.dispatchHandlers(packet, obj => process.send(obj)) |
||||||
|
}) |
||||||
|
|
||||||
|
var TesseractCore; |
||||||
|
exports.getCore = function(req, res){ |
||||||
|
if(!TesseractCore){ |
||||||
|
res.progress({ status: 'loading tesseract core' }) |
||||||
|
TesseractCore = require('tesseract.js-core') |
||||||
|
res.progress({ status: 'loaded tesseract core' }) |
||||||
|
} |
||||||
|
return TesseractCore |
||||||
|
} |
||||||
|
|
||||||
|
exports.getLanguageData = require('./lang.js') |
||||||
|
|
||||||
|
workerUtils.setAdapter(module.exports); |
@ -1,54 +0,0 @@ |
|||||||
/** |
|
||||||
* In the recognition result of tesseract, there |
|
||||||
* is a deep JSON object for details, it has around |
|
||||||
* |
|
||||||
* The result of dump.js is a big JSON tree |
|
||||||
* which can be easily serialized (for instance |
|
||||||
* to be sent from a webworker to the main app |
|
||||||
* or through Node's IPC), but we want |
|
||||||
* a (circular) DOM-like interface for walking |
|
||||||
* through the data. |
|
||||||
* |
|
||||||
* @fileoverview DOM-like interface for walking through data |
|
||||||
* @author Kevin Kwok <antimatter15@gmail.com> |
|
||||||
* @author Guillermo Webster <gui@mit.edu> |
|
||||||
* @author Jerome Wu <jeromewus@gmail.com> |
|
||||||
*/ |
|
||||||
|
|
||||||
module.exports = (page) => { |
|
||||||
const blocks = []; |
|
||||||
const paragraphs = []; |
|
||||||
const lines = []; |
|
||||||
const words = []; |
|
||||||
const symbols = []; |
|
||||||
|
|
||||||
page.blocks.forEach((block) => { |
|
||||||
block.paragraphs.forEach((paragraph) => { |
|
||||||
paragraph.lines.forEach((line) => { |
|
||||||
line.words.forEach((word) => { |
|
||||||
word.symbols.forEach((sym) => { |
|
||||||
symbols.push({ |
|
||||||
...sym, page, block, paragraph, line, word, |
|
||||||
}); |
|
||||||
}); |
|
||||||
words.push({ |
|
||||||
...word, page, block, paragraph, line, |
|
||||||
}); |
|
||||||
}); |
|
||||||
lines.push({ |
|
||||||
...line, page, block, paragraph, |
|
||||||
}); |
|
||||||
}); |
|
||||||
paragraphs.push({ |
|
||||||
...paragraph, page, block, |
|
||||||
}); |
|
||||||
}); |
|
||||||
blocks.push({ |
|
||||||
...block, page, |
|
||||||
}); |
|
||||||
}); |
|
||||||
|
|
||||||
return { |
|
||||||
...page, blocks, paragraphs, lines, words, symbols, |
|
||||||
}; |
|
||||||
}; |
|
@ -1,21 +0,0 @@ |
|||||||
const isElectron = require('is-electron'); |
|
||||||
|
|
||||||
module.exports = (key) => { |
|
||||||
const env = {}; |
|
||||||
|
|
||||||
if (typeof WorkerGlobalScope !== 'undefined') { |
|
||||||
env.type = 'webworker'; |
|
||||||
} else if (isElectron()) { |
|
||||||
env.type = 'electron'; |
|
||||||
} else if (typeof window === 'object') { |
|
||||||
env.type = 'browser'; |
|
||||||
} else if (typeof process === 'object' && typeof require === 'function') { |
|
||||||
env.type = 'node'; |
|
||||||
} |
|
||||||
|
|
||||||
if (typeof key === 'undefined') { |
|
||||||
return env; |
|
||||||
} |
|
||||||
|
|
||||||
return env[key]; |
|
||||||
}; |
|
@ -1,3 +0,0 @@ |
|||||||
module.exports = (prefix, cnt) => ( |
|
||||||
`${prefix}-${cnt}-${Math.random().toString(16).slice(3, 8)}` |
|
||||||
); |
|
@ -1,9 +0,0 @@ |
|||||||
let logging = false; |
|
||||||
|
|
||||||
exports.logging = logging; |
|
||||||
|
|
||||||
exports.setLogging = (_logging) => { |
|
||||||
logging = _logging; |
|
||||||
}; |
|
||||||
|
|
||||||
exports.log = (...args) => (logging ? console.log.apply(this, args) : null); |
|
@ -1,12 +0,0 @@ |
|||||||
const isBrowser = require('./getEnvironment')('type') === 'browser'; |
|
||||||
const resolveURL = isBrowser ? require('resolve-url') : s => s; // eslint-disable-line
|
|
||||||
|
|
||||||
module.exports = (options) => { |
|
||||||
const opts = { ...options }; |
|
||||||
['corePath', 'workerPath', 'langPath'].forEach((key) => { |
|
||||||
if (options[key]) { |
|
||||||
opts[key] = resolveURL(opts[key]); |
|
||||||
} |
|
||||||
}); |
|
||||||
return opts; |
|
||||||
}; |
|
@ -1,10 +0,0 @@ |
|||||||
const { set, get, del } = require('idb-keyval'); |
|
||||||
|
|
||||||
module.exports = { |
|
||||||
readCache: get, |
|
||||||
writeCache: set, |
|
||||||
deleteCache: del, |
|
||||||
checkCache: (path) => ( |
|
||||||
get(path).then((v) => typeof v !== 'undefined') |
|
||||||
), |
|
||||||
}; |
|
@ -1,30 +0,0 @@ |
|||||||
const { simd } = require('wasm-feature-detect'); |
|
||||||
const { dependencies } = require('../../../package.json'); |
|
||||||
|
|
||||||
module.exports = async (corePath, res) => { |
|
||||||
if (typeof global.TesseractCore === 'undefined') { |
|
||||||
res.progress({ status: 'loading tesseract core', progress: 0 }); |
|
||||||
|
|
||||||
// If the user specifies a core path, we use that
|
|
||||||
// Otherwise, we detect the correct core based on SIMD support
|
|
||||||
let corePathImport = corePath; |
|
||||||
if (!corePathImport) { |
|
||||||
const simdSupport = await simd(); |
|
||||||
if (simdSupport) { |
|
||||||
corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core-simd.wasm.js`; |
|
||||||
} else { |
|
||||||
corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.wasm.js`; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
global.importScripts(corePathImport); |
|
||||||
|
|
||||||
if (typeof global.TesseractCoreWASM !== 'undefined' && typeof WebAssembly === 'object') { |
|
||||||
global.TesseractCore = global.TesseractCoreWASM; |
|
||||||
} else { |
|
||||||
throw Error('Failed to load TesseractCore'); |
|
||||||
} |
|
||||||
res.progress({ status: 'loading tesseract core', progress: 1 }); |
|
||||||
} |
|
||||||
return global.TesseractCore; |
|
||||||
}; |
|
@ -1 +0,0 @@ |
|||||||
module.exports = require('zlibjs').gunzipSync; |
|
@ -1,32 +0,0 @@ |
|||||||
/** |
|
||||||
* |
|
||||||
* Browser worker scripts |
|
||||||
* |
|
||||||
* @fileoverview Browser worker implementation |
|
||||||
* @author Kevin Kwok <antimatter15@gmail.com> |
|
||||||
* @author Guillermo Webster <gui@mit.edu> |
|
||||||
* @author Jerome Wu <jeromewus@gmail.com> |
|
||||||
*/ |
|
||||||
|
|
||||||
const worker = require('..'); |
|
||||||
const getCore = require('./getCore'); |
|
||||||
const gunzip = require('./gunzip'); |
|
||||||
const cache = require('./cache'); |
|
||||||
|
|
||||||
/* |
|
||||||
* register message handler |
|
||||||
*/ |
|
||||||
global.addEventListener('message', ({ data }) => { |
|
||||||
worker.dispatchHandlers(data, (obj) => postMessage(obj)); |
|
||||||
}); |
|
||||||
|
|
||||||
/* |
|
||||||
* getCore is a sync function to load and return |
|
||||||
* TesseractCore. |
|
||||||
*/ |
|
||||||
worker.setAdapter({ |
|
||||||
getCore, |
|
||||||
gunzip, |
|
||||||
fetch: () => {}, |
|
||||||
...cache, |
|
||||||
}); |
|
@ -1,14 +0,0 @@ |
|||||||
/* |
|
||||||
* default params for tesseract.js |
|
||||||
*/ |
|
||||||
const PSM = require('../../constants/PSM'); |
|
||||||
|
|
||||||
module.exports = { |
|
||||||
tessedit_pageseg_mode: PSM.SINGLE_BLOCK, |
|
||||||
tessedit_char_whitelist: '', |
|
||||||
tessjs_create_hocr: '1', |
|
||||||
tessjs_create_tsv: '1', |
|
||||||
tessjs_create_box: '0', |
|
||||||
tessjs_create_unlv: '0', |
|
||||||
tessjs_create_osd: '0', |
|
||||||
}; |
|
@ -1,313 +0,0 @@ |
|||||||
/** |
|
||||||
* |
|
||||||
* Worker script for browser and node |
|
||||||
* |
|
||||||
* @fileoverview Worker script for browser and node |
|
||||||
* @author Kevin Kwok <antimatter15@gmail.com> |
|
||||||
* @author Guillermo Webster <gui@mit.edu> |
|
||||||
* @author Jerome Wu <jeromewus@gmail.com> |
|
||||||
*/ |
|
||||||
require('regenerator-runtime/runtime'); |
|
||||||
const fileType = require('file-type'); |
|
||||||
const isURL = require('is-url'); |
|
||||||
const dump = require('./utils/dump'); |
|
||||||
const isWebWorker = require('../utils/getEnvironment')('type') === 'webworker'; |
|
||||||
const setImage = require('./utils/setImage'); |
|
||||||
const defaultParams = require('./constants/defaultParams'); |
|
||||||
const { log, setLogging } = require('../utils/log'); |
|
||||||
|
|
||||||
/* |
|
||||||
* Tesseract Module returned by TesseractCore. |
|
||||||
*/ |
|
||||||
let TessModule; |
|
||||||
/* |
|
||||||
* TessearctBaseAPI instance |
|
||||||
*/ |
|
||||||
let api = null; |
|
||||||
let latestJob; |
|
||||||
let adapter = {}; |
|
||||||
let params = defaultParams; |
|
||||||
|
|
||||||
const load = async ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => { |
|
||||||
setLogging(logging); |
|
||||||
if (!TessModule) { |
|
||||||
const Core = await adapter.getCore(corePath, res); |
|
||||||
|
|
||||||
res.progress({ workerId, status: 'initializing tesseract', progress: 0 }); |
|
||||||
|
|
||||||
Core({ |
|
||||||
TesseractProgress(percent) { |
|
||||||
latestJob.progress({ |
|
||||||
workerId, |
|
||||||
jobId, |
|
||||||
status: 'recognizing text', |
|
||||||
progress: Math.max(0, (percent - 30) / 70), |
|
||||||
}); |
|
||||||
}, |
|
||||||
}).then((tessModule) => { |
|
||||||
TessModule = tessModule; |
|
||||||
res.progress({ workerId, status: 'initialized tesseract', progress: 1 }); |
|
||||||
res.resolve({ loaded: true }); |
|
||||||
}); |
|
||||||
} else { |
|
||||||
res.resolve({ loaded: true }); |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
const FS = ({ workerId, payload: { method, args } }, res) => { |
|
||||||
log(`[${workerId}]: FS.${method} with args ${args}`); |
|
||||||
res.resolve(TessModule.FS[method](...args)); |
|
||||||
}; |
|
||||||
|
|
||||||
const loadLanguage = async ({ |
|
||||||
workerId, |
|
||||||
payload: { |
|
||||||
langs, |
|
||||||
options: { |
|
||||||
langPath, |
|
||||||
dataPath, |
|
||||||
cachePath, |
|
||||||
cacheMethod, |
|
||||||
gzip = true, |
|
||||||
}, |
|
||||||
}, |
|
||||||
}, |
|
||||||
res) => { |
|
||||||
const loadAndGunzipFile = async (_lang) => { |
|
||||||
const lang = typeof _lang === 'string' ? _lang : _lang.code; |
|
||||||
const readCache = ['refresh', 'none'].includes(cacheMethod) |
|
||||||
? () => Promise.resolve() |
|
||||||
: adapter.readCache; |
|
||||||
let data = null; |
|
||||||
|
|
||||||
try { |
|
||||||
const _data = await readCache(`${cachePath || '.'}/${lang}.traineddata`); |
|
||||||
if (typeof _data !== 'undefined') { |
|
||||||
log(`[${workerId}]: Load ${lang}.traineddata from cache`); |
|
||||||
res.progress({ workerId, status: 'loading language traineddata (from cache)', progress: 0.5 }); |
|
||||||
data = _data; |
|
||||||
} else { |
|
||||||
throw Error('Not found in cache'); |
|
||||||
} |
|
||||||
} catch (e) { |
|
||||||
log(`[${workerId}]: Load ${lang}.traineddata from ${langPath}`); |
|
||||||
if (typeof _lang === 'string') { |
|
||||||
let path = null; |
|
||||||
|
|
||||||
if (isURL(langPath) || langPath.startsWith('moz-extension://') || langPath.startsWith('chrome-extension://') || langPath.startsWith('file://')) { /** When langPath is an URL */ |
|
||||||
path = langPath; |
|
||||||
} |
|
||||||
|
|
||||||
if (path !== null) { |
|
||||||
const fetchUrl = `${path}/${lang}.traineddata${gzip ? '.gz' : ''}`; |
|
||||||
const resp = await (isWebWorker ? fetch : adapter.fetch)(fetchUrl); |
|
||||||
if (!resp.ok) { |
|
||||||
throw Error(`Network error while fetching ${fetchUrl}. Response code: ${resp.status}`); |
|
||||||
} |
|
||||||
data = await resp.arrayBuffer(); |
|
||||||
} else { |
|
||||||
data = await adapter.readCache(`${langPath}/${lang}.traineddata${gzip ? '.gz' : ''}`); |
|
||||||
} |
|
||||||
} else { |
|
||||||
data = _lang.data; // eslint-disable-line
|
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
data = new Uint8Array(data); |
|
||||||
|
|
||||||
const type = fileType(data); |
|
||||||
if (typeof type !== 'undefined' && type.mime === 'application/gzip') { |
|
||||||
data = adapter.gunzip(data); |
|
||||||
} |
|
||||||
|
|
||||||
if (TessModule) { |
|
||||||
if (dataPath) { |
|
||||||
try { |
|
||||||
TessModule.FS.mkdir(dataPath); |
|
||||||
} catch (err) { |
|
||||||
res.reject(err.toString()); |
|
||||||
} |
|
||||||
} |
|
||||||
TessModule.FS.writeFile(`${dataPath || '.'}/${lang}.traineddata`, data); |
|
||||||
} |
|
||||||
|
|
||||||
if (['write', 'refresh', undefined].includes(cacheMethod)) { |
|
||||||
await adapter.writeCache(`${cachePath || '.'}/${lang}.traineddata`, data); |
|
||||||
} |
|
||||||
|
|
||||||
return Promise.resolve(data); |
|
||||||
}; |
|
||||||
|
|
||||||
res.progress({ workerId, status: 'loading language traineddata', progress: 0 }); |
|
||||||
try { |
|
||||||
await Promise.all((typeof langs === 'string' ? langs.split('+') : langs).map(loadAndGunzipFile)); |
|
||||||
res.progress({ workerId, status: 'loaded language traineddata', progress: 1 }); |
|
||||||
res.resolve(langs); |
|
||||||
} catch (err) { |
|
||||||
res.reject(err.toString()); |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
const setParameters = ({ payload: { params: _params } }, res) => { |
|
||||||
Object.keys(_params) |
|
||||||
.filter((k) => !k.startsWith('tessjs_')) |
|
||||||
.forEach((key) => { |
|
||||||
api.SetVariable(key, _params[key]); |
|
||||||
}); |
|
||||||
params = { ...params, ..._params }; |
|
||||||
|
|
||||||
if (typeof res !== 'undefined') { |
|
||||||
res.resolve(params); |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
const initialize = ({ |
|
||||||
workerId, |
|
||||||
payload: { langs: _langs, oem }, |
|
||||||
}, res) => { |
|
||||||
const langs = (typeof _langs === 'string') |
|
||||||
? _langs |
|
||||||
: _langs.map((l) => ((typeof l === 'string') ? l : l.data)).join('+'); |
|
||||||
|
|
||||||
try { |
|
||||||
res.progress({ |
|
||||||
workerId, status: 'initializing api', progress: 0, |
|
||||||
}); |
|
||||||
if (api !== null) { |
|
||||||
api.End(); |
|
||||||
} |
|
||||||
api = new TessModule.TessBaseAPI(); |
|
||||||
const status = api.Init(null, langs, oem); |
|
||||||
if (status === -1) { |
|
||||||
res.reject('initialization failed'); |
|
||||||
} |
|
||||||
params = defaultParams; |
|
||||||
setParameters({ payload: { params } }); |
|
||||||
res.progress({ |
|
||||||
workerId, status: 'initialized api', progress: 1, |
|
||||||
}); |
|
||||||
res.resolve(); |
|
||||||
} catch (err) { |
|
||||||
res.reject(err.toString()); |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
const recognize = ({ payload: { image, options: { rectangle: rec } } }, res) => { |
|
||||||
try { |
|
||||||
const ptr = setImage(TessModule, api, image); |
|
||||||
if (typeof rec === 'object') { |
|
||||||
api.SetRectangle(rec.left, rec.top, rec.width, rec.height); |
|
||||||
} |
|
||||||
api.Recognize(null); |
|
||||||
res.resolve(dump(TessModule, api, params)); |
|
||||||
TessModule._free(ptr); |
|
||||||
} catch (err) { |
|
||||||
res.reject(err.toString()); |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
const getPDF = ({ payload: { title, textonly } }, res) => { |
|
||||||
const pdfRenderer = new TessModule.TessPDFRenderer('tesseract-ocr', '/', textonly); |
|
||||||
pdfRenderer.BeginDocument(title); |
|
||||||
pdfRenderer.AddImage(api); |
|
||||||
pdfRenderer.EndDocument(); |
|
||||||
TessModule._free(pdfRenderer); |
|
||||||
|
|
||||||
res.resolve(TessModule.FS.readFile('/tesseract-ocr.pdf')); |
|
||||||
}; |
|
||||||
|
|
||||||
const detect = ({ payload: { image } }, res) => { |
|
||||||
try { |
|
||||||
const ptr = setImage(TessModule, api, image); |
|
||||||
const results = new TessModule.OSResults(); |
|
||||||
|
|
||||||
if (!api.DetectOS(results)) { |
|
||||||
api.End(); |
|
||||||
TessModule._free(ptr); |
|
||||||
res.reject('Failed to detect OS'); |
|
||||||
} else { |
|
||||||
const best = results.best_result; |
|
||||||
const oid = best.orientation_id; |
|
||||||
const sid = best.script_id; |
|
||||||
|
|
||||||
TessModule._free(ptr); |
|
||||||
|
|
||||||
res.resolve({ |
|
||||||
tesseract_script_id: sid, |
|
||||||
script: results.unicharset.get_script_from_script_id(sid), |
|
||||||
script_confidence: best.sconfidence, |
|
||||||
orientation_degrees: [0, 270, 180, 90][oid], |
|
||||||
orientation_confidence: best.oconfidence, |
|
||||||
}); |
|
||||||
} |
|
||||||
} catch (err) { |
|
||||||
res.reject(err.toString()); |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
const terminate = (_, res) => { |
|
||||||
try { |
|
||||||
if (api !== null) { |
|
||||||
api.End(); |
|
||||||
} |
|
||||||
res.resolve({ terminated: true }); |
|
||||||
} catch (err) { |
|
||||||
res.reject(err.toString()); |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
/** |
|
||||||
* dispatchHandlers |
|
||||||
* |
|
||||||
* @name dispatchHandlers |
|
||||||
* @function worker data handler |
|
||||||
* @access public |
|
||||||
* @param {object} data |
|
||||||
* @param {string} data.jobId - unique job id |
|
||||||
* @param {string} data.action - action of the job, only recognize and detect for now |
|
||||||
* @param {object} data.payload - data for the job |
|
||||||
* @param {function} send - trigger job to work |
|
||||||
*/ |
|
||||||
exports.dispatchHandlers = (packet, send) => { |
|
||||||
const res = (status, data) => { |
|
||||||
send({ |
|
||||||
...packet, |
|
||||||
status, |
|
||||||
data, |
|
||||||
}); |
|
||||||
}; |
|
||||||
res.resolve = res.bind(this, 'resolve'); |
|
||||||
res.reject = res.bind(this, 'reject'); |
|
||||||
res.progress = res.bind(this, 'progress'); |
|
||||||
|
|
||||||
latestJob = res; |
|
||||||
|
|
||||||
try { |
|
||||||
({ |
|
||||||
load, |
|
||||||
FS, |
|
||||||
loadLanguage, |
|
||||||
initialize, |
|
||||||
setParameters, |
|
||||||
recognize, |
|
||||||
getPDF, |
|
||||||
detect, |
|
||||||
terminate, |
|
||||||
})[packet.action](packet, res); |
|
||||||
} catch (err) { |
|
||||||
/** Prepare exception to travel through postMessage */ |
|
||||||
res.reject(err.toString()); |
|
||||||
} |
|
||||||
}; |
|
||||||
|
|
||||||
/** |
|
||||||
* setAdapter |
|
||||||
* |
|
||||||
* @name setAdapter |
|
||||||
* @function |
|
||||||
* @access public |
|
||||||
* @param {object} adapter - implementation of the worker, different in browser and node environment |
|
||||||
*/ |
|
||||||
exports.setAdapter = (_adapter) => { |
|
||||||
adapter = _adapter; |
|
||||||
}; |
|
@ -1,16 +0,0 @@ |
|||||||
const util = require('util'); |
|
||||||
const fs = require('fs'); |
|
||||||
|
|
||||||
module.exports = { |
|
||||||
readCache: util.promisify(fs.readFile), |
|
||||||
writeCache: util.promisify(fs.writeFile), |
|
||||||
deleteCache: (path) => ( |
|
||||||
util.promisify(fs.unlink)(path) |
|
||||||
.catch(() => {}) |
|
||||||
), |
|
||||||
checkCache: (path) => ( |
|
||||||
util.promisify(fs.access)(path, fs.F_OK) |
|
||||||
.then((err) => (err === null)) |
|
||||||
.catch(() => false) |
|
||||||
), |
|
||||||
}; |
|
@ -1,20 +0,0 @@ |
|||||||
const { simd } = require('wasm-feature-detect'); |
|
||||||
|
|
||||||
let TesseractCore = null; |
|
||||||
/* |
|
||||||
* getCore is a sync function to load and return |
|
||||||
* TesseractCore. |
|
||||||
*/ |
|
||||||
module.exports = async (_, res) => { |
|
||||||
if (TesseractCore === null) { |
|
||||||
const simdSupport = await simd(); |
|
||||||
res.progress({ status: 'loading tesseract core', progress: 0 }); |
|
||||||
if (simdSupport) { |
|
||||||
TesseractCore = require('tesseract.js-core/tesseract-core-simd'); |
|
||||||
} else { |
|
||||||
TesseractCore = require('tesseract.js-core/tesseract-core'); |
|
||||||
} |
|
||||||
res.progress({ status: 'loaded tesseract core', progress: 1 }); |
|
||||||
} |
|
||||||
return TesseractCore; |
|
||||||
}; |
|
@ -1 +0,0 @@ |
|||||||
module.exports = require('zlib').gunzipSync; |
|
@ -1,30 +0,0 @@ |
|||||||
/** |
|
||||||
* |
|
||||||
* Tesseract Worker Script for Node |
|
||||||
* |
|
||||||
* @fileoverview Node worker implementation |
|
||||||
* @author Kevin Kwok <antimatter15@gmail.com> |
|
||||||
* @author Guillermo Webster <gui@mit.edu> |
|
||||||
* @author Jerome Wu <jeromewus@gmail.com> |
|
||||||
*/ |
|
||||||
|
|
||||||
const fetch = require('node-fetch'); |
|
||||||
const { parentPort } = require('worker_threads'); |
|
||||||
const worker = require('..'); |
|
||||||
const getCore = require('./getCore'); |
|
||||||
const gunzip = require('./gunzip'); |
|
||||||
const cache = require('./cache'); |
|
||||||
|
|
||||||
/* |
|
||||||
* register message handler |
|
||||||
*/ |
|
||||||
parentPort.on('message', (packet) => { |
|
||||||
worker.dispatchHandlers(packet, (obj) => parentPort.postMessage(obj)); |
|
||||||
}); |
|
||||||
|
|
||||||
worker.setAdapter({ |
|
||||||
getCore, |
|
||||||
gunzip, |
|
||||||
fetch, |
|
||||||
...cache, |
|
||||||
}); |
|
@ -1,201 +0,0 @@ |
|||||||
/** |
|
||||||
* |
|
||||||
* Dump data to a big JSON tree |
|
||||||
* |
|
||||||
* @fileoverview dump data to JSON tree |
|
||||||
* @author Kevin Kwok <antimatter15@gmail.com> |
|
||||||
* @author Guillermo Webster <gui@mit.edu> |
|
||||||
* @author Jerome Wu <jeromewus@gmail.com> |
|
||||||
*/ |
|
||||||
|
|
||||||
/** |
|
||||||
* deindent |
|
||||||
* |
|
||||||
* The generated HOCR is excessively indented, so |
|
||||||
* we get rid of that indentation |
|
||||||
* |
|
||||||
* @name deindent |
|
||||||
* @function deindent string |
|
||||||
* @access public |
|
||||||
*/ |
|
||||||
const deindent = (html) => { |
|
||||||
const lines = html.split('\n'); |
|
||||||
if (lines[0].substring(0, 2) === ' ') { |
|
||||||
for (let i = 0; i < lines.length; i += 1) { |
|
||||||
if (lines[i].substring(0, 2) === ' ') { |
|
||||||
lines[i] = lines[i].slice(2); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
return lines.join('\n'); |
|
||||||
}; |
|
||||||
|
|
||||||
/** |
|
||||||
* dump |
|
||||||
* |
|
||||||
* @name dump |
|
||||||
* @function dump recognition result to a JSON object |
|
||||||
* @access public |
|
||||||
*/ |
|
||||||
module.exports = (TessModule, api, { |
|
||||||
tessjs_create_hocr, |
|
||||||
tessjs_create_tsv, |
|
||||||
tessjs_create_box, |
|
||||||
tessjs_create_unlv, |
|
||||||
tessjs_create_osd, |
|
||||||
}) => { |
|
||||||
const ri = api.GetIterator(); |
|
||||||
const { |
|
||||||
RIL_BLOCK, |
|
||||||
RIL_PARA, |
|
||||||
RIL_TEXTLINE, |
|
||||||
RIL_WORD, |
|
||||||
RIL_SYMBOL, |
|
||||||
} = TessModule; |
|
||||||
const blocks = []; |
|
||||||
let block; |
|
||||||
let para; |
|
||||||
let textline; |
|
||||||
let word; |
|
||||||
let symbol; |
|
||||||
|
|
||||||
const enumToString = (value, prefix) => ( |
|
||||||
Object.keys(TessModule) |
|
||||||
.filter((e) => (e.startsWith(`${prefix}_`) && TessModule[e] === value)) |
|
||||||
.map((e) => e.slice(prefix.length + 1))[0] |
|
||||||
); |
|
||||||
|
|
||||||
ri.Begin(); |
|
||||||
do { |
|
||||||
if (ri.IsAtBeginningOf(RIL_BLOCK)) { |
|
||||||
const poly = ri.BlockPolygon(); |
|
||||||
let polygon = null; |
|
||||||
// BlockPolygon() returns null when automatic page segmentation is off
|
|
||||||
if (TessModule.getPointer(poly) > 0) { |
|
||||||
const n = poly.get_n(); |
|
||||||
const px = poly.get_x(); |
|
||||||
const py = poly.get_y(); |
|
||||||
polygon = []; |
|
||||||
for (let i = 0; i < n; i += 1) { |
|
||||||
polygon.push([px.getValue(i), py.getValue(i)]); |
|
||||||
} |
|
||||||
/* |
|
||||||
* TODO: find out why _ptaDestroy doesn't work |
|
||||||
*/ |
|
||||||
// TessModule._ptaDestroy(TessModule.getPointer(poly));
|
|
||||||
} |
|
||||||
|
|
||||||
block = { |
|
||||||
paragraphs: [], |
|
||||||
text: ri.GetUTF8Text(RIL_BLOCK), |
|
||||||
confidence: ri.Confidence(RIL_BLOCK), |
|
||||||
baseline: ri.getBaseline(RIL_BLOCK), |
|
||||||
bbox: ri.getBoundingBox(RIL_BLOCK), |
|
||||||
blocktype: enumToString(ri.BlockType(), 'PT'), |
|
||||||
polygon, |
|
||||||
}; |
|
||||||
blocks.push(block); |
|
||||||
} |
|
||||||
if (ri.IsAtBeginningOf(RIL_PARA)) { |
|
||||||
para = { |
|
||||||
lines: [], |
|
||||||
text: ri.GetUTF8Text(RIL_PARA), |
|
||||||
confidence: ri.Confidence(RIL_PARA), |
|
||||||
baseline: ri.getBaseline(RIL_PARA), |
|
||||||
bbox: ri.getBoundingBox(RIL_PARA), |
|
||||||
is_ltr: !!ri.ParagraphIsLtr(), |
|
||||||
}; |
|
||||||
block.paragraphs.push(para); |
|
||||||
} |
|
||||||
if (ri.IsAtBeginningOf(RIL_TEXTLINE)) { |
|
||||||
textline = { |
|
||||||
words: [], |
|
||||||
text: ri.GetUTF8Text(RIL_TEXTLINE), |
|
||||||
confidence: ri.Confidence(RIL_TEXTLINE), |
|
||||||
baseline: ri.getBaseline(RIL_TEXTLINE), |
|
||||||
bbox: ri.getBoundingBox(RIL_TEXTLINE), |
|
||||||
}; |
|
||||||
para.lines.push(textline); |
|
||||||
} |
|
||||||
if (ri.IsAtBeginningOf(RIL_WORD)) { |
|
||||||
const fontInfo = ri.getWordFontAttributes(); |
|
||||||
const wordDir = ri.WordDirection(); |
|
||||||
word = { |
|
||||||
symbols: [], |
|
||||||
choices: [], |
|
||||||
|
|
||||||
text: ri.GetUTF8Text(RIL_WORD), |
|
||||||
confidence: ri.Confidence(RIL_WORD), |
|
||||||
baseline: ri.getBaseline(RIL_WORD), |
|
||||||
bbox: ri.getBoundingBox(RIL_WORD), |
|
||||||
|
|
||||||
is_numeric: !!ri.WordIsNumeric(), |
|
||||||
in_dictionary: !!ri.WordIsFromDictionary(), |
|
||||||
direction: enumToString(wordDir, 'DIR'), |
|
||||||
language: ri.WordRecognitionLanguage(), |
|
||||||
|
|
||||||
is_bold: fontInfo.is_bold, |
|
||||||
is_italic: fontInfo.is_italic, |
|
||||||
is_underlined: fontInfo.is_underlined, |
|
||||||
is_monospace: fontInfo.is_monospace, |
|
||||||
is_serif: fontInfo.is_serif, |
|
||||||
is_smallcaps: fontInfo.is_smallcaps, |
|
||||||
font_size: fontInfo.pointsize, |
|
||||||
font_id: fontInfo.font_id, |
|
||||||
font_name: fontInfo.font_name, |
|
||||||
}; |
|
||||||
const wc = new TessModule.WordChoiceIterator(ri); |
|
||||||
do { |
|
||||||
word.choices.push({ |
|
||||||
text: wc.GetUTF8Text(), |
|
||||||
confidence: wc.Confidence(), |
|
||||||
}); |
|
||||||
} while (wc.Next()); |
|
||||||
TessModule.destroy(wc); |
|
||||||
textline.words.push(word); |
|
||||||
} |
|
||||||
|
|
||||||
// let image = null;
|
|
||||||
// var pix = ri.GetBinaryImage(TessModule.RIL_SYMBOL)
|
|
||||||
// var image = pix2array(pix);
|
|
||||||
// // for some reason it seems that things stop working if you destroy pics
|
|
||||||
// TessModule._pixDestroy(TessModule.getPointer(pix));
|
|
||||||
if (ri.IsAtBeginningOf(RIL_SYMBOL)) { |
|
||||||
symbol = { |
|
||||||
choices: [], |
|
||||||
image: null, |
|
||||||
text: ri.GetUTF8Text(RIL_SYMBOL), |
|
||||||
confidence: ri.Confidence(RIL_SYMBOL), |
|
||||||
baseline: ri.getBaseline(RIL_SYMBOL), |
|
||||||
bbox: ri.getBoundingBox(RIL_SYMBOL), |
|
||||||
is_superscript: !!ri.SymbolIsSuperscript(), |
|
||||||
is_subscript: !!ri.SymbolIsSubscript(), |
|
||||||
is_dropcap: !!ri.SymbolIsDropcap(), |
|
||||||
}; |
|
||||||
word.symbols.push(symbol); |
|
||||||
const ci = new TessModule.ChoiceIterator(ri); |
|
||||||
do { |
|
||||||
symbol.choices.push({ |
|
||||||
text: ci.GetUTF8Text(), |
|
||||||
confidence: ci.Confidence(), |
|
||||||
}); |
|
||||||
} while (ci.Next()); |
|
||||||
// TessModule.destroy(i);
|
|
||||||
} |
|
||||||
} while (ri.Next(RIL_SYMBOL)); |
|
||||||
TessModule.destroy(ri); |
|
||||||
|
|
||||||
return { |
|
||||||
text: api.GetUTF8Text(), |
|
||||||
hocr: tessjs_create_hocr === '1' ? deindent(api.GetHOCRText()) : null, |
|
||||||
tsv: tessjs_create_tsv === '1' ? api.GetTSVText() : null, |
|
||||||
box: tessjs_create_box === '1' ? api.GetBoxText() : null, |
|
||||||
unlv: tessjs_create_unlv === '1' ? api.GetUNLVText() : null, |
|
||||||
osd: tessjs_create_osd === '1' ? api.GetOsdText() : null, |
|
||||||
confidence: api.MeanTextConf(), |
|
||||||
blocks, |
|
||||||
psm: enumToString(api.GetPageSegMode(), 'PSM'), |
|
||||||
oem: enumToString(api.oem(), 'OEM'), |
|
||||||
version: api.Version(), |
|
||||||
}; |
|
||||||
}; |
|
@ -1,63 +0,0 @@ |
|||||||
const bmp = require('bmp-js'); |
|
||||||
const fileType = require('file-type'); |
|
||||||
|
|
||||||
/** |
|
||||||
* setImage |
|
||||||
* |
|
||||||
* @name setImage |
|
||||||
* @function set image in tesseract for recognition |
|
||||||
* @access public |
|
||||||
*/ |
|
||||||
module.exports = (TessModule, api, image) => { |
|
||||||
const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length })); |
|
||||||
const type = fileType(buf); |
|
||||||
let bytesPerPixel = 0; |
|
||||||
let data = null; |
|
||||||
let pix = null; |
|
||||||
let w = 0; |
|
||||||
let h = 0; |
|
||||||
|
|
||||||
const exif = buf.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1; |
|
||||||
|
|
||||||
/* |
|
||||||
* Leptonica supports uncompressed but not compressed bmp files |
|
||||||
* @see https://github.com/DanBloomberg/leptonica/issues/607#issuecomment-1068802516
|
|
||||||
* We therefore use bmp-js to process all bmp files |
|
||||||
*/ |
|
||||||
if (type && type.mime === 'image/bmp') { |
|
||||||
const bmpBuf = bmp.decode(buf); |
|
||||||
data = TessModule._malloc(bmpBuf.data.length * Uint8Array.BYTES_PER_ELEMENT); |
|
||||||
TessModule.HEAPU8.set(bmpBuf.data, data); |
|
||||||
w = bmpBuf.width; |
|
||||||
h = bmpBuf.height; |
|
||||||
bytesPerPixel = 4; |
|
||||||
} else { |
|
||||||
const ptr = TessModule._malloc(buf.length * Uint8Array.BYTES_PER_ELEMENT); |
|
||||||
TessModule.HEAPU8.set(buf, ptr); |
|
||||||
pix = TessModule._pixReadMem(ptr, buf.length); |
|
||||||
if (TessModule.getValue(pix + (7 * 4), 'i32') === 0) { |
|
||||||
/* |
|
||||||
* Set a yres default value to prevent warning from tesseract |
|
||||||
* See kMinCredibleResolution in tesseract/src/ccstruct/publictypes.h |
|
||||||
*/ |
|
||||||
TessModule.setValue(pix + (7 * 4), 300, 'i32'); |
|
||||||
} |
|
||||||
[w, h] = Array(2).fill(0) |
|
||||||
.map((v, idx) => ( |
|
||||||
TessModule.getValue(pix + (idx * 4), 'i32') |
|
||||||
)); |
|
||||||
} |
|
||||||
|
|
||||||
/* |
|
||||||
* As some image format (ex. bmp) is not supported natiely by tesseract, |
|
||||||
* sometimes it will not return pix directly, but data and bytesPerPixel |
|
||||||
* for another SetImage usage. |
|
||||||
* |
|
||||||
*/ |
|
||||||
if (data === null) { |
|
||||||
api.SetImage(pix, undefined, undefined, undefined, undefined, exif); |
|
||||||
} else { |
|
||||||
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif); |
|
||||||
} |
|
||||||
return data === null ? pix : data; |
|
||||||
}; |
|
@ -1,18 +0,0 @@ |
|||||||
const resolveURL = require('resolve-url'); |
|
||||||
const { version } = require('../../../package.json'); |
|
||||||
const defaultOptions = require('../../constants/defaultOptions'); |
|
||||||
|
|
||||||
/* |
|
||||||
* Default options for browser worker |
|
||||||
*/ |
|
||||||
module.exports = { |
|
||||||
...defaultOptions, |
|
||||||
workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development') |
|
||||||
? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`) |
|
||||||
: `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`, |
|
||||||
/* |
|
||||||
* If browser doesn't support WebAssembly, |
|
||||||
* load ASM version instead |
|
||||||
*/ |
|
||||||
corePath: null, |
|
||||||
}; |
|
@ -1,24 +0,0 @@ |
|||||||
/** |
|
||||||
* |
|
||||||
* Tesseract Worker adapter for browser |
|
||||||
* |
|
||||||
* @fileoverview Tesseract Worker adapter for browser |
|
||||||
* @author Kevin Kwok <antimatter15@gmail.com> |
|
||||||
* @author Guillermo Webster <gui@mit.edu> |
|
||||||
* @author Jerome Wu <jeromewus@gmail.com> |
|
||||||
*/ |
|
||||||
const defaultOptions = require('./defaultOptions'); |
|
||||||
const spawnWorker = require('./spawnWorker'); |
|
||||||
const terminateWorker = require('./terminateWorker'); |
|
||||||
const onMessage = require('./onMessage'); |
|
||||||
const send = require('./send'); |
|
||||||
const loadImage = require('./loadImage'); |
|
||||||
|
|
||||||
module.exports = { |
|
||||||
defaultOptions, |
|
||||||
spawnWorker, |
|
||||||
terminateWorker, |
|
||||||
onMessage, |
|
||||||
send, |
|
||||||
loadImage, |
|
||||||
}; |
|
@ -1,68 +0,0 @@ |
|||||||
const resolveURL = require('resolve-url'); |
|
||||||
|
|
||||||
/** |
|
||||||
* readFromBlobOrFile |
|
||||||
* |
|
||||||
* @name readFromBlobOrFile |
|
||||||
* @function |
|
||||||
* @access private |
|
||||||
*/ |
|
||||||
const readFromBlobOrFile = (blob) => ( |
|
||||||
new Promise((resolve, reject) => { |
|
||||||
const fileReader = new FileReader(); |
|
||||||
fileReader.onload = () => { |
|
||||||
resolve(fileReader.result); |
|
||||||
}; |
|
||||||
fileReader.onerror = ({ target: { error: { code } } }) => { |
|
||||||
reject(Error(`File could not be read! Code=${code}`)); |
|
||||||
}; |
|
||||||
fileReader.readAsArrayBuffer(blob); |
|
||||||
}) |
|
||||||
); |
|
||||||
|
|
||||||
/** |
|
||||||
* loadImage |
|
||||||
* |
|
||||||
* @name loadImage |
|
||||||
* @function load image from different source |
|
||||||
* @access private |
|
||||||
*/ |
|
||||||
const loadImage = async (image) => { |
|
||||||
let data = image; |
|
||||||
if (typeof image === 'undefined') { |
|
||||||
return 'undefined'; |
|
||||||
} |
|
||||||
|
|
||||||
if (typeof image === 'string') { |
|
||||||
// Base64 Image
|
|
||||||
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { |
|
||||||
data = atob(image.split(',')[1]) |
|
||||||
.split('') |
|
||||||
.map((c) => c.charCodeAt(0)); |
|
||||||
} else { |
|
||||||
const resp = await fetch(resolveURL(image)); |
|
||||||
data = await resp.arrayBuffer(); |
|
||||||
} |
|
||||||
} else if (image instanceof HTMLElement) { |
|
||||||
if (image.tagName === 'IMG') { |
|
||||||
data = await loadImage(image.src); |
|
||||||
} |
|
||||||
if (image.tagName === 'VIDEO') { |
|
||||||
data = await loadImage(image.poster); |
|
||||||
} |
|
||||||
if (image.tagName === 'CANVAS') { |
|
||||||
await new Promise((resolve) => { |
|
||||||
image.toBlob(async (blob) => { |
|
||||||
data = await readFromBlobOrFile(blob); |
|
||||||
resolve(); |
|
||||||
}); |
|
||||||
}); |
|
||||||
} |
|
||||||
} else if (image instanceof File || image instanceof Blob) { |
|
||||||
data = await readFromBlobOrFile(image); |
|
||||||
} |
|
||||||
|
|
||||||
return new Uint8Array(data); |
|
||||||
}; |
|
||||||
|
|
||||||
module.exports = loadImage; |
|
@ -1,5 +0,0 @@ |
|||||||
module.exports = (worker, handler) => { |
|
||||||
worker.onmessage = ({ data }) => { // eslint-disable-line
|
|
||||||
handler(data); |
|
||||||
}; |
|
||||||
}; |
|
@ -1,10 +0,0 @@ |
|||||||
/** |
|
||||||
* send |
|
||||||
* |
|
||||||
* @name send |
|
||||||
* @function send packet to worker and create a job |
|
||||||
* @access public |
|
||||||
*/ |
|
||||||
module.exports = async (worker, packet) => { |
|
||||||
worker.postMessage(packet); |
|
||||||
}; |
|
@ -1,20 +0,0 @@ |
|||||||
/** |
|
||||||
* spawnWorker |
|
||||||
* |
|
||||||
* @name spawnWorker |
|
||||||
* @function create a new Worker in browser |
|
||||||
* @access public |
|
||||||
*/ |
|
||||||
module.exports = ({ workerPath, workerBlobURL }) => { |
|
||||||
let worker; |
|
||||||
if (Blob && URL && workerBlobURL) { |
|
||||||
const blob = new Blob([`importScripts("${workerPath}");`], { |
|
||||||
type: 'application/javascript', |
|
||||||
}); |
|
||||||
worker = new Worker(URL.createObjectURL(blob)); |
|
||||||
} else { |
|
||||||
worker = new Worker(workerPath); |
|
||||||
} |
|
||||||
|
|
||||||
return worker; |
|
||||||
}; |
|
@ -1,10 +0,0 @@ |
|||||||
/** |
|
||||||
* terminateWorker |
|
||||||
* |
|
||||||
* @name terminateWorker |
|
||||||
* @function terminate worker |
|
||||||
* @access public |
|
||||||
*/ |
|
||||||
module.exports = (worker) => { |
|
||||||
worker.terminate(); |
|
||||||
}; |
|
@ -1,10 +0,0 @@ |
|||||||
const path = require('path'); |
|
||||||
const defaultOptions = require('../../constants/defaultOptions'); |
|
||||||
|
|
||||||
/* |
|
||||||
* Default options for node worker |
|
||||||
*/ |
|
||||||
module.exports = { |
|
||||||
...defaultOptions, |
|
||||||
workerPath: path.join(__dirname, '..', '..', 'worker-script', 'node', 'index.js'), |
|
||||||
}; |
|