Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot] 249ab776f7
Bump json-schema from 0.2.3 to 0.4.0 2 years ago
  1. 25
      docs/image-format.md
  2. 38
      package-lock.json
  3. 2
      package.json
  4. 1
      src/constants/PSM.js
  5. 13
      src/index.d.ts
  6. 10
      src/worker-script/index.js

25
docs/image-format.md

@ -1,18 +1,17 @@
# Image Format # Image Format
The main Tesseract.js functions (ex. recognize, detect) take an `image` parameter. The image formats and data types supported are listed below. Support Format: **bmp, jpg, png, pbm**
Support Image Formats: **bmp, jpg, png, pbm, webp** The main Tesseract.js functions (ex. recognize, detect) take an `image` parameter, which should be something that is like an image. What's considered "image-like" differs depending on whether it is being run from the browser or through NodeJS.
For browser and Node, supported data types are: On a browser, an image can be:
- string with base64 encoded image (fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp) - an `img` or `canvas` element
- buffer - a `File` object (from a file `<input>`)
- a `Blob` object
- a path or URL to an accessible image
- a base64 encoded image fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp
For browser only, supported data types are: In Node.js, an image can be
- `File` or `Blob` object - a path to a local image
- `img` or `canvas` element - a Buffer storing binary image
- a base64 encoded image fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp
For Node only, supported data types are:
- string containing a path to local image
Note: images must be a supported image format **and** a supported data type. For example, a buffer containing a png image is supported. A buffer containing raw pixel data is not supported.

38
package-lock.json generated

@ -1,12 +1,12 @@
{ {
"name": "tesseract.js", "name": "tesseract.js",
"version": "3.0.3", "version": "3.0.2",
"lockfileVersion": 2, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "tesseract.js", "name": "tesseract.js",
"version": "3.0.3", "version": "3.0.2",
"hasInstallScript": true, "hasInstallScript": true,
"license": "Apache-2.0", "license": "Apache-2.0",
"dependencies": { "dependencies": {
@ -5904,9 +5904,9 @@
"dev": true "dev": true
}, },
"node_modules/json-schema": { "node_modules/json-schema": {
"version": "0.2.3", "version": "0.4.0",
"resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz",
"integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=", "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==",
"dev": true "dev": true
}, },
"node_modules/json-schema-traverse": { "node_modules/json-schema-traverse": {
@ -5947,18 +5947,18 @@
} }
}, },
"node_modules/jsprim": { "node_modules/jsprim": {
"version": "1.4.1", "version": "1.4.2",
"resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz", "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.2.tgz",
"integrity": "sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=", "integrity": "sha512-P2bSOMAc/ciLz6DzgjVlGJP9+BrJWu5UDGK70C2iweC5QBIeFf0ZXRvGjEj2uYgrY2MkAAhsSWHDWlFtEroZWw==",
"dev": true, "dev": true,
"engines": [
"node >=0.6.0"
],
"dependencies": { "dependencies": {
"assert-plus": "1.0.0", "assert-plus": "1.0.0",
"extsprintf": "1.3.0", "extsprintf": "1.3.0",
"json-schema": "0.2.3", "json-schema": "0.4.0",
"verror": "1.10.0" "verror": "1.10.0"
},
"engines": {
"node": ">=0.6.0"
} }
}, },
"node_modules/kind-of": { "node_modules/kind-of": {
@ -14005,9 +14005,9 @@
"dev": true "dev": true
}, },
"json-schema": { "json-schema": {
"version": "0.2.3", "version": "0.4.0",
"resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.2.3.tgz", "resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz",
"integrity": "sha1-tIDIkuWaLwWVTOcnvT8qTogvnhM=", "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==",
"dev": true "dev": true
}, },
"json-schema-traverse": { "json-schema-traverse": {
@ -14042,14 +14042,14 @@
"dev": true "dev": true
}, },
"jsprim": { "jsprim": {
"version": "1.4.1", "version": "1.4.2",
"resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.1.tgz", "resolved": "https://registry.npmjs.org/jsprim/-/jsprim-1.4.2.tgz",
"integrity": "sha1-MT5mvB5cwG5Di8G3SZwuXFastqI=", "integrity": "sha512-P2bSOMAc/ciLz6DzgjVlGJP9+BrJWu5UDGK70C2iweC5QBIeFf0ZXRvGjEj2uYgrY2MkAAhsSWHDWlFtEroZWw==",
"dev": true, "dev": true,
"requires": { "requires": {
"assert-plus": "1.0.0", "assert-plus": "1.0.0",
"extsprintf": "1.3.0", "extsprintf": "1.3.0",
"json-schema": "0.2.3", "json-schema": "0.4.0",
"verror": "1.10.0" "verror": "1.10.0"
} }
}, },

2
package.json

@ -1,6 +1,6 @@
{ {
"name": "tesseract.js", "name": "tesseract.js",
"version": "3.0.3", "version": "3.0.2",
"description": "Pure Javascript Multilingual OCR", "description": "Pure Javascript Multilingual OCR",
"main": "src/index.js", "main": "src/index.js",
"types": "src/index.d.ts", "types": "src/index.d.ts",

1
src/constants/PSM.js

@ -15,5 +15,4 @@ module.exports = {
SINGLE_CHAR: '10', SINGLE_CHAR: '10',
SPARSE_TEXT: '11', SPARSE_TEXT: '11',
SPARSE_TEXT_OSD: '12', SPARSE_TEXT_OSD: '12',
RAW_LINE: '13',
}; };

13
src/index.d.ts vendored

@ -19,8 +19,8 @@ declare namespace Tesseract {
readText(path: string, jobId?: string): Promise<ConfigResult> readText(path: string, jobId?: string): Promise<ConfigResult>
removeText(path: string, jobId?: string): Promise<ConfigResult> removeText(path: string, jobId?: string): Promise<ConfigResult>
FS(method: string, args: any[], jobId?: string): Promise<ConfigResult> FS(method: string, args: any[], jobId?: string): Promise<ConfigResult>
loadLanguage(langs?: string | Lang[], jobId?: string): Promise<ConfigResult> loadLanguage(langs?: string, jobId?: string): Promise<ConfigResult>
initialize(langs?: string | Lang[], oem?: OEM, jobId?: string): Promise<ConfigResult> initialize(langs?: string, oem?: OEM, jobId?: string): Promise<ConfigResult>
setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult> setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult>
recognize(image: ImageLike, options?: Partial<RecognizeOptions>, jobId?: string): Promise<RecognizeResult> recognize(image: ImageLike, options?: Partial<RecognizeOptions>, jobId?: string): Promise<RecognizeResult>
detect(image: ImageLike, jobId?: string): Promise<DetectResult> detect(image: ImageLike, jobId?: string): Promise<DetectResult>
@ -28,11 +28,6 @@ declare namespace Tesseract {
getPDF(title?: string, textonly?: boolean, jobId?: string):Promise<GetPDFResult> getPDF(title?: string, textonly?: boolean, jobId?: string):Promise<GetPDFResult>
} }
interface Lang {
code: string;
data: unknown;
}
interface WorkerOptions { interface WorkerOptions {
corePath: string corePath: string
langPath: string langPath: string
@ -89,13 +84,13 @@ declare namespace Tesseract {
width: number width: number
height: number height: number
} }
enum OEM { const enum OEM {
TESSERACT_ONLY, TESSERACT_ONLY,
LSTM_ONLY, LSTM_ONLY,
TESSERACT_LSTM_COMBINED, TESSERACT_LSTM_COMBINED,
DEFAULT, DEFAULT,
} }
enum PSM { const enum PSM {
OSD_ONLY = '0', OSD_ONLY = '0',
AUTO_OSD = '1', AUTO_OSD = '1',
AUTO_ONLY = '2', AUTO_ONLY = '2',

10
src/worker-script/index.js

@ -144,7 +144,15 @@ res) => {
res.progress({ workerId, status: 'loaded language traineddata', progress: 1 }); res.progress({ workerId, status: 'loaded language traineddata', progress: 1 });
res.resolve(langs); res.resolve(langs);
} catch (err) { } catch (err) {
res.reject(err.toString()); if (isWebWorker && err instanceof DOMException) {
/*
* For some reason google chrome throw DOMException in loadLang,
* while other browser is OK, for now we ignore this exception
* and hopefully to find the root cause one day.
*/
} else {
res.reject(err.toString());
}
} }
}; };

Loading…
Cancel
Save