Compare commits

..

1 Commits

Author SHA1 Message Date
dependabot[bot] 965bd8a5a4
Bump y18n from 4.0.0 to 4.0.1 4 years ago
  1. 4
      .eslintrc
  2. 2
      .github/FUNDING.yml
  3. 2
      .github/workflows/node.js.yml
  4. 30
      README.md
  5. 25
      docs/image-format.md
  6. 33
      examples/browser/benchmark.html
  7. 1
      examples/browser/demo.html
  8. BIN
      examples/data/meditations.jpg
  9. BIN
      examples/data/testocr.png
  10. BIN
      examples/data/tyger.jpg
  11. 27
      examples/node/benchmark.js
  12. 20468
      package-lock.json
  13. 27
      package.json
  14. 13
      scripts/rollup.esm.js
  15. 5
      scripts/webpack.config.common.js
  16. 3
      scripts/webpack.config.dev.js
  17. 6
      scripts/webpack.config.prod.js
  18. 1
      src/constants/PSM.js
  19. 19
      src/index.d.ts
  20. 2
      src/utils/resolvePaths.js
  21. 27
      src/worker-script/browser/getCore.js
  22. 25
      src/worker-script/index.js
  23. 11
      src/worker-script/node/getCore.js
  24. 5
      src/worker-script/node/index.js
  25. 12
      src/worker-script/utils/setImage.js
  26. 4
      src/worker/browser/defaultOptions.js
  27. 36
      src/worker/browser/loadImage.js
  28. 5
      src/worker/node/loadImage.js
  29. 4
      src/worker/node/send.js
  30. 9
      src/worker/node/spawnWorker.js
  31. 2
      src/worker/node/terminateWorker.js
  32. BIN
      tests/assets/images/simple.gif
  33. BIN
      tests/assets/images/simple.webp
  34. 4
      tests/constants.js

4
.eslintrc

@ -1,6 +1,5 @@ @@ -1,6 +1,5 @@
{
"extends": "airbnb-base",
"parser": "babel-eslint",
"env": {
"browser": true,
"node": true,
@ -11,7 +10,6 @@ @@ -11,7 +10,6 @@
"no-underscore-dangle": 0,
"no-console": 0,
"global-require": 0,
"camelcase": 0,
"no-control-regex": 0
"camelcase": 0
}
}

2
.github/FUNDING.yml

@ -6,4 +6,4 @@ open_collective: tesseractjs @@ -6,4 +6,4 @@ open_collective: tesseractjs
ko_fi: # Replace with a single Ko-fi username
tidelift: npm/tesseract.js
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
custom: ["https://etherscan.io/address/0x74ace8c74535d6dac03ebdc708ca2fba54796ef2"]
custom: ["https://etherscan.io/address/0x74ace8c74535d6dac03ebdc708ca2fba54796ef2", "https://www.paypal.me/jeromewusg"]

2
.github/workflows/node.js.yml

@ -16,7 +16,7 @@ jobs: @@ -16,7 +16,7 @@ jobs:
strategy:
matrix:
node-version: [14.x, 16.x]
node-version: [10.x, 12.x, 14.x]
steps:
- uses: actions/checkout@v2

30
README.md

@ -12,6 +12,13 @@ @@ -12,6 +12,13 @@
[![Downloads Total](https://img.shields.io/npm/dt/tesseract.js.svg)](https://www.npmjs.com/package/tesseract.js)
[![Downloads Month](https://img.shields.io/npm/dm/tesseract.js.svg)](https://www.npmjs.com/package/tesseract.js)
<h3 align="center">
Version 2 is now available and under development in the master branch, read a story about v2: <a href="https://jeromewu.github.io/why-i-refactor-tesseract.js-v2/">Why I refactor tesseract.js v2?</a><br>
Check the <a href="https://github.com/naptha/tesseract.js/tree/support/1.x">support/1.x</a> branch for version 1
</h3>
<br>
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/))
Image Recognition
@ -62,16 +69,6 @@ const worker = createWorker({ @@ -62,16 +69,6 @@ const worker = createWorker({
[Check out the docs](#documentation) for a full explanation of the API.
## Major changes in v3
- Significantly faster performance
- Runtime reduction of 84% for Browser and 96% for Node.js when recognizing the [example images](./examples/data)
- Upgrade to Tesseract v5.1.0 (using emscripten 3.1.18)
- Added SIMD-enabled build for supported devices
- Added support:
- Node.js version 18
- Removed support:
- ASM.js version, any other old versions of Tesseract.js-core (<3.0.0)
- Node.js versions 10 and 12
## Major changes in v2
- Upgrade to tesseract v4.1.1 (using emscripten 1.39.10 upstream)
@ -80,8 +77,7 @@ const worker = createWorker({ @@ -80,8 +77,7 @@ const worker = createWorker({
- Support WebAssembly (fallback to ASM.js when browser doesn't support)
- Support Typescript
Read a story about v2: <a href="https://jeromewu.github.io/why-i-refactor-tesseract.js-v2/">Why I refactor tesseract.js v2?</a><br>
Check the <a href="https://github.com/naptha/tesseract.js/tree/support/1.x">support/1.x</a> branch for version 1
## Installation
Tesseract.js works with a `<script>` tag via local copy or CDN, with webpack via `npm` and on Node.js with `npm/yarn`.
@ -99,16 +95,16 @@ After including the script the `Tesseract` variable will be globally available. @@ -99,16 +95,16 @@ After including the script the `Tesseract` variable will be globally available.
### Node.js
**Tesseract.js v3 requires Node.js v14 or higher**
**Tesseract.js currently requires Node.js v6.8.0 or higher**
```shell
# For v3
# For v2
npm install tesseract.js
yarn add tesseract.js
# For v2
npm install tesseract.js@2
yarn add tesseract.js@2
# For v1
npm install tesseract.js@1
yarn add tesseract.js@1
```

25
docs/image-format.md

@ -1,18 +1,17 @@ @@ -1,18 +1,17 @@
# Image Format
The main Tesseract.js functions (ex. recognize, detect) take an `image` parameter. The image formats and data types supported are listed below.
Support Format: **bmp, jpg, png, pbm**
Support Image Formats: **bmp, jpg, png, pbm, webp**
The main Tesseract.js functions (ex. recognize, detect) take an `image` parameter, which should be something that is like an image. What's considered "image-like" differs depending on whether it is being run from the browser or through NodeJS.
For browser and Node, supported data types are:
- string with base64 encoded image (fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp)
- buffer
On a browser, an image can be:
- an `img`, `video`, or `canvas` element
- a `File` object (from a file `<input>`)
- a `Blob` object
- a path or URL to an accessible image
- a base64 encoded image fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp
For browser only, supported data types are:
- `File` or `Blob` object
- `img` or `canvas` element
For Node only, supported data types are:
- string containing a path to local image
Note: images must be a supported image format **and** a supported data type. For example, a buffer containing a png image is supported. A buffer containing raw pixel data is not supported.
In Node.js, an image can be
- a path to a local image
- a Buffer storing binary image
- a base64 encoded image fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp

33
examples/browser/benchmark.html

@ -1,33 +0,0 @@ @@ -1,33 +0,0 @@
<html>
<head>
<script src="/dist/tesseract.dev.js"></script>
</head>
<body>
<textarea id="message">Working...</textarea>
<script>
const { createWorker } = Tesseract;
const worker = createWorker();
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const fileArr = ["../data/meditations.jpg", "../data/tyger.jpg", "../data/testocr.png"];
let timeTotal = 0;
for (let file of fileArr) {
let time1 = Date.now();
for (let i=0; i < 10; i++) {
await worker.recognize(file);
}
let time2 = Date.now();
const timeDif = (time2 - time1) / 1e3;
timeTotal += timeDif;
document.getElementById('message').innerHTML += "\n" + file + " [x10] runtime: " + timeDif + "s";
}
document.getElementById('message').innerHTML += "\nTotal runtime: " + timeTotal + "s";
})();
</script>
</body>
</html>

1
examples/browser/demo.html

@ -71,6 +71,7 @@ async function recognizeFile(file) { @@ -71,6 +71,7 @@ async function recognizeFile(file) {
<option value='meme' > Internet Meme </option>
<option value='epo' > Esperanto </option>
<option value='epo_alt' > Esperanto alternative </option>
<option value='equ' > Math </option>
<option value='est' > Estonian </option>
<option value='eus' > Basque </option>
<option value='fin' > Finnish </option>

BIN
examples/data/meditations.jpg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1011 KiB

BIN
examples/data/testocr.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

BIN
examples/data/tyger.jpg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 408 KiB

27
examples/node/benchmark.js

@ -1,27 +0,0 @@ @@ -1,27 +0,0 @@
#!/usr/bin/env node
const path = require('path');
const { createWorker } = require('../../');
const worker = createWorker();
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const fileArr = ["../data/meditations.jpg", "../data/tyger.jpg", "../data/testocr.png"];
let timeTotal = 0;
for (let file of fileArr) {
let time1 = Date.now();
for (let i=0; i < 10; i++) {
await worker.recognize(file)
}
let time2 = Date.now();
const timeDif = (time2 - time1) / 1e3;
timeTotal += timeDif;
console.log(file + " [x10] runtime: " + timeDif + "s");
}
console.log("Total runtime: " + timeTotal + "s");
await worker.terminate();
})();

20468
package-lock.json generated

File diff suppressed because it is too large Load Diff

27
package.json

@ -1,6 +1,6 @@ @@ -1,6 +1,6 @@
{
"name": "tesseract.js",
"version": "3.0.3",
"version": "2.1.4",
"description": "Pure Javascript Multilingual OCR",
"main": "src/index.js",
"types": "src/index.d.ts",
@ -8,7 +8,7 @@ @@ -8,7 +8,7 @@
"jsdelivr": "dist/tesseract.min.js",
"scripts": {
"start": "node scripts/server.js",
"build": "rimraf dist && webpack --config scripts/webpack.config.prod.js && rollup -c scripts/rollup.esm.js",
"build": "rimraf dist && webpack --config scripts/webpack.config.prod.js",
"profile:tesseract": "webpack-bundle-analyzer dist/tesseract-stats.json",
"profile:worker": "webpack-bundle-analyzer dist/worker-stats.json",
"prepublishOnly": "npm run build",
@ -35,12 +35,10 @@ @@ -35,12 +35,10 @@
],
"license": "Apache-2.0",
"devDependencies": {
"@babel/core": "^7.18.7",
"@babel/preset-env": "^7.18.7",
"@rollup/plugin-commonjs": "^22.0.2",
"@babel/core": "^7.7.7",
"@babel/preset-env": "^7.7.7",
"acorn": "^6.4.0",
"babel-loader": "^8.2.0",
"buffer": "^6.0.3",
"babel-loader": "^8.1.0",
"cors": "^2.8.5",
"eslint": "^7.2.0",
"eslint-config-airbnb-base": "^14.2.0",
@ -52,26 +50,25 @@ @@ -52,26 +50,25 @@
"npm-run-all": "^4.1.5",
"nyc": "^15.1.0",
"rimraf": "^2.7.1",
"rollup": "^2.79.0",
"wait-on": "^3.3.0",
"webpack": "^5.74.0",
"webpack-bundle-analyzer": "^4.6.0",
"webpack-cli": "^4.10.0",
"webpack-dev-middleware": "^5.3.3"
"webpack": "^4.44.2",
"webpack-bundle-analyzer": "^3.6.0",
"webpack-cli": "^3.3.12",
"webpack-dev-middleware": "^3.7.2"
},
"dependencies": {
"babel-eslint": "^10.1.0",
"blueimp-load-image": "^3.0.0",
"bmp-js": "^0.1.0",
"file-type": "^12.4.1",
"idb-keyval": "^3.2.0",
"is-electron": "^2.2.0",
"is-url": "^1.2.4",
"jpeg-autorotate": "^7.1.1",
"node-fetch": "^2.6.0",
"opencollective-postinstall": "^2.0.2",
"regenerator-runtime": "^0.13.3",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^3.0.2",
"wasm-feature-detect": "^1.2.11",
"tesseract.js-core": "^2.2.0",
"zlibjs": "^0.3.1"
},
"repository": {

13
scripts/rollup.esm.js

@ -1,13 +0,0 @@ @@ -1,13 +0,0 @@
import commonjs from "@rollup/plugin-commonjs";
export default [
{
input: "dist/tesseract.min.js",
output: {
file: "dist/tesseract.esm.min.js",
format: "esm",
banner: "/* eslint-disable */",
},
plugins: [commonjs()],
},
];

5
scripts/webpack.config.common.js

@ -1,9 +1,4 @@ @@ -1,9 +1,4 @@
module.exports = {
resolve: {
fallback: {
buffer: require.resolve('buffer/'),
},
},
module: {
rules: [
{

3
scripts/webpack.config.dev.js

@ -15,9 +15,6 @@ const genConfig = ({ @@ -15,9 +15,6 @@ const genConfig = ({
libraryTarget,
},
plugins: [
new webpack.ProvidePlugin({
Buffer: ['buffer', 'Buffer'],
}),
new webpack.DefinePlugin({
'process.env': {
TESS_ENV: JSON.stringify('development'),

6
scripts/webpack.config.prod.js

@ -1,6 +1,5 @@ @@ -1,6 +1,5 @@
const path = require('path');
const common = require('./webpack.config.common');
const webpack = require('webpack');
const genConfig = ({
entry, filename, library, libraryTarget,
@ -15,11 +14,6 @@ const genConfig = ({ @@ -15,11 +14,6 @@ const genConfig = ({
library,
libraryTarget,
},
plugins: [
new webpack.ProvidePlugin({
Buffer: ['buffer', 'Buffer'],
}),
]
});
module.exports = [

1
src/constants/PSM.js

@ -15,5 +15,4 @@ module.exports = { @@ -15,5 +15,4 @@ module.exports = {
SINGLE_CHAR: '10',
SPARSE_TEXT: '11',
SPARSE_TEXT_OSD: '12',
RAW_LINE: '13',
};

19
src/index.d.ts vendored

@ -19,18 +19,12 @@ declare namespace Tesseract { @@ -19,18 +19,12 @@ declare namespace Tesseract {
readText(path: string, jobId?: string): Promise<ConfigResult>
removeText(path: string, jobId?: string): Promise<ConfigResult>
FS(method: string, args: any[], jobId?: string): Promise<ConfigResult>
loadLanguage(langs?: string | Lang[], jobId?: string): Promise<ConfigResult>
initialize(langs?: string | Lang[], oem?: OEM, jobId?: string): Promise<ConfigResult>
loadLanguage(langs?: string, jobId?: string): Promise<ConfigResult>
initialize(langs?: string, oem?: OEM, jobId?: string): Promise<ConfigResult>
setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult>
recognize(image: ImageLike, options?: Partial<RecognizeOptions>, jobId?: string): Promise<RecognizeResult>
detect(image: ImageLike, jobId?: string): Promise<DetectResult>
terminate(jobId?: string): Promise<ConfigResult>
getPDF(title?: string, textonly?: boolean, jobId?: string):Promise<GetPDFResult>
}
interface Lang {
code: string;
data: unknown;
}
interface WorkerOptions {
@ -68,10 +62,6 @@ declare namespace Tesseract { @@ -68,10 +62,6 @@ declare namespace Tesseract {
jobId: string
data: Page
}
interface GetPDFResult {
jobId: string
data: number[]
}
interface DetectResult {
jobId: string
data: DetectData
@ -89,13 +79,13 @@ declare namespace Tesseract { @@ -89,13 +79,13 @@ declare namespace Tesseract {
width: number
height: number
}
enum OEM {
const enum OEM {
TESSERACT_ONLY,
LSTM_ONLY,
TESSERACT_LSTM_COMBINED,
DEFAULT,
}
enum PSM {
const enum PSM {
OSD_ONLY = '0',
AUTO_OSD = '1',
AUTO_ONLY = '2',
@ -109,7 +99,6 @@ declare namespace Tesseract { @@ -109,7 +99,6 @@ declare namespace Tesseract {
SINGLE_CHAR = '10',
SPARSE_TEXT = '11',
SPARSE_TEXT_OSD = '12',
RAW_LINE = '13'
}
type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement
| CanvasRenderingContext2D | File | Blob | ImageData | Buffer;

2
src/utils/resolvePaths.js

@ -4,7 +4,7 @@ const resolveURL = isBrowser ? require('resolve-url') : s => s; // eslint-disabl @@ -4,7 +4,7 @@ const resolveURL = isBrowser ? require('resolve-url') : s => s; // eslint-disabl
module.exports = (options) => {
const opts = { ...options };
['corePath', 'workerPath', 'langPath'].forEach((key) => {
if (options[key]) {
if (typeof options[key] !== 'undefined') {
opts[key] = resolveURL(opts[key]);
}
});

27
src/worker-script/browser/getCore.js

@ -1,26 +1,15 @@ @@ -1,26 +1,15 @@
const { simd } = require('wasm-feature-detect');
const { dependencies } = require('../../../package.json');
module.exports = async (corePath, res) => {
module.exports = (corePath, res) => {
if (typeof global.TesseractCore === 'undefined') {
res.progress({ status: 'loading tesseract core', progress: 0 });
// If the user specifies a core path, we use that
// Otherwise, we detect the correct core based on SIMD support
let corePathImport = corePath;
if (!corePathImport) {
const simdSupport = await simd();
if (simdSupport) {
corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core-simd.wasm.js`;
} else {
corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.wasm.js`;
}
}
global.importScripts(corePathImport);
global.importScripts(corePath);
/*
* Depending on whether the browser supports WebAssembly,
* the version of the TesseractCore will be different.
*/
if (typeof global.TesseractCoreWASM !== 'undefined' && typeof WebAssembly === 'object') {
global.TesseractCore = global.TesseractCoreWASM;
} else if (typeof global.TesseractCoreASM !== 'undefined') {
global.TesseractCore = global.TesseractCoreASM;
} else {
throw Error('Failed to load TesseractCore');
}

25
src/worker-script/index.js

@ -28,10 +28,10 @@ let latestJob; @@ -28,10 +28,10 @@ let latestJob;
let adapter = {};
let params = defaultParams;
const load = async ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => {
const load = ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => {
setLogging(logging);
if (!TessModule) {
const Core = await adapter.getCore(corePath, res);
const Core = adapter.getCore(corePath, res);
res.progress({ workerId, status: 'initializing tesseract', progress: 0 });
@ -72,7 +72,7 @@ const loadLanguage = async ({ @@ -72,7 +72,7 @@ const loadLanguage = async ({
},
},
},
res) => {
res) => {
const loadAndGunzipFile = async (_lang) => {
const lang = typeof _lang === 'string' ? _lang : _lang.code;
const readCache = ['refresh', 'none'].includes(cacheMethod)
@ -99,11 +99,7 @@ res) => { @@ -99,11 +99,7 @@ res) => {
}
if (path !== null) {
const fetchUrl = `${path}/${lang}.traineddata${gzip ? '.gz' : ''}`;
const resp = await (isWebWorker ? fetch : adapter.fetch)(fetchUrl);
if (!resp.ok) {
throw Error(`Network error while fetching ${fetchUrl}. Response code: ${resp.status}`);
}
const resp = await (isWebWorker ? fetch : adapter.fetch)(`${path}/${lang}.traineddata${gzip ? '.gz' : ''}`);
data = await resp.arrayBuffer();
} else {
data = await adapter.readCache(`${langPath}/${lang}.traineddata${gzip ? '.gz' : ''}`);
@ -144,8 +140,16 @@ res) => { @@ -144,8 +140,16 @@ res) => {
res.progress({ workerId, status: 'loaded language traineddata', progress: 1 });
res.resolve(langs);
} catch (err) {
if (isWebWorker && err instanceof DOMException) {
/*
* For some reason google chrome throw DOMException in loadLang,
* while other browser is OK, for now we ignore this exception
* and hopefully to find the root cause one day.
*/
} else {
res.reject(err.toString());
}
}
};
const setParameters = ({ payload: { params: _params } }, res) => {
@ -177,10 +181,7 @@ const initialize = ({ @@ -177,10 +181,7 @@ const initialize = ({
api.End();
}
api = new TessModule.TessBaseAPI();
const status = api.Init(null, langs, oem);
if (status === -1) {
res.reject('initialization failed');
}
api.Init(null, langs, oem);
params = defaultParams;
setParameters({ payload: { params } });
res.progress({

11
src/worker-script/node/getCore.js

@ -1,19 +1,12 @@ @@ -1,19 +1,12 @@
const { simd } = require('wasm-feature-detect');
let TesseractCore = null;
/*
* getCore is a sync function to load and return
* TesseractCore.
*/
module.exports = async (_, res) => {
module.exports = (_, res) => {
if (TesseractCore === null) {
const simdSupport = await simd();
res.progress({ status: 'loading tesseract core', progress: 0 });
if (simdSupport) {
TesseractCore = require('tesseract.js-core/tesseract-core-simd');
} else {
TesseractCore = require('tesseract.js-core/tesseract-core');
}
TesseractCore = require('tesseract.js-core');
res.progress({ status: 'loaded tesseract core', progress: 1 });
}
return TesseractCore;

5
src/worker-script/node/index.js

@ -9,7 +9,6 @@ @@ -9,7 +9,6 @@
*/
const fetch = require('node-fetch');
const { parentPort } = require('worker_threads');
const worker = require('..');
const getCore = require('./getCore');
const gunzip = require('./gunzip');
@ -18,8 +17,8 @@ const cache = require('./cache'); @@ -18,8 +17,8 @@ const cache = require('./cache');
/*
* register message handler
*/
parentPort.on('message', (packet) => {
worker.dispatchHandlers(packet, (obj) => parentPort.postMessage(obj));
process.on('message', (packet) => {
worker.dispatchHandlers(packet, (obj) => process.send(obj));
});
worker.setAdapter({

12
src/worker-script/utils/setImage.js

@ -17,12 +17,10 @@ module.exports = (TessModule, api, image) => { @@ -17,12 +17,10 @@ module.exports = (TessModule, api, image) => {
let w = 0;
let h = 0;
const exif = buf.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1;
/*
* Leptonica supports uncompressed but not compressed bmp files
* @see https://github.com/DanBloomberg/leptonica/issues/607#issuecomment-1068802516
* We therefore use bmp-js to process all bmp files
* Although leptonica should support reading bmp, there is a bug of "compressed BMP files".
* As there is no solution, we need to use bmp-js for now.
* @see https://groups.google.com/forum/#!topic/tesseract-ocr/4mPD9zTxdxE
*/
if (type && type.mime === 'image/bmp') {
const bmpBuf = bmp.decode(buf);
@ -55,9 +53,9 @@ module.exports = (TessModule, api, image) => { @@ -55,9 +53,9 @@ module.exports = (TessModule, api, image) => {
*
*/
if (data === null) {
api.SetImage(pix, undefined, undefined, undefined, undefined, exif);
api.SetImage(pix);
} else {
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif);
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel);
}
return data === null ? pix : data;
};

4
src/worker/browser/defaultOptions.js

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
const resolveURL = require('resolve-url');
const { version } = require('../../../package.json');
const { version, dependencies } = require('../../../package.json');
const defaultOptions = require('../../constants/defaultOptions');
/*
@ -14,5 +14,5 @@ module.exports = { @@ -14,5 +14,5 @@ module.exports = {
* If browser doesn't support WebAssembly,
* load ASM version instead
*/
corePath: null,
corePath: `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
};

36
src/worker/browser/loadImage.js

@ -1,4 +1,5 @@ @@ -1,4 +1,5 @@
const resolveURL = require('resolve-url');
const blueimpLoadImage = require('blueimp-load-image');
/**
* readFromBlobOrFile
@ -20,6 +21,19 @@ const readFromBlobOrFile = (blob) => ( @@ -20,6 +21,19 @@ const readFromBlobOrFile = (blob) => (
})
);
const fixOrientationFromUrlOrBlobOrFile = (blob) => (
new Promise((resolve) => {
blueimpLoadImage(
blob,
(img) => img.toBlob(resolve),
{
orientation: true,
canvas: true,
},
);
})
);
/**
* loadImage
*
@ -34,14 +48,18 @@ const loadImage = async (image) => { @@ -34,14 +48,18 @@ const loadImage = async (image) => {
}
if (typeof image === 'string') {
// Base64 Image
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) {
data = atob(image.split(',')[1])
.split('')
.map((c) => c.charCodeAt(0));
} else {
if (image.endsWith('.pbm')) {
const resp = await fetch(resolveURL(image));
data = await resp.arrayBuffer();
} else {
let img = image;
// If not Base64 Image
if (!/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) {
img = resolveURL(image);
}
data = await readFromBlobOrFile(
await fixOrientationFromUrlOrBlobOrFile(img),
);
}
} else if (image instanceof HTMLElement) {
if (image.tagName === 'IMG') {
@ -59,7 +77,11 @@ const loadImage = async (image) => { @@ -59,7 +77,11 @@ const loadImage = async (image) => {
});
}
} else if (image instanceof File || image instanceof Blob) {
data = await readFromBlobOrFile(image);
let img = image;
if (!image.name.endsWith('.pbm')) {
img = await fixOrientationFromUrlOrBlobOrFile(img);
}
data = await readFromBlobOrFile(img);
}
return new Uint8Array(data);

5
src/worker/node/loadImage.js

@ -2,6 +2,7 @@ const util = require('util'); @@ -2,6 +2,7 @@ const util = require('util');
const fs = require('fs');
const fetch = require('node-fetch');
const isURL = require('is-url');
const jo = require('jpeg-autorotate');
const readFile = util.promisify(fs.readFile);
@ -31,5 +32,9 @@ module.exports = async (image) => { @@ -31,5 +32,9 @@ module.exports = async (image) => {
data = image;
}
try {
data = (await jo.rotate(data, { quality: 100 })).buffer;
} catch (_) {} /* eslint-disable-line */
return new Uint8Array(data);
};

4
src/worker/node/send.js

@ -5,6 +5,6 @@ @@ -5,6 +5,6 @@
* @function send packet to worker and create a job
* @access public
*/
module.exports = async (worker, packet) => {
worker.postMessage(packet);
module.exports = (worker, packet) => {
worker.send(packet);
};

9
src/worker/node/spawnWorker.js

@ -1,4 +1,6 @@ @@ -1,4 +1,6 @@
const { Worker } = require('worker_threads');
const { fork } = require('child_process');
let debugPort = 9229;
/**
* spawnWorker
@ -7,4 +9,7 @@ const { Worker } = require('worker_threads'); @@ -7,4 +9,7 @@ const { Worker } = require('worker_threads');
* @function fork a new process in node
* @access public
*/
module.exports = ({ workerPath }) => new Worker(workerPath);
module.exports = ({ workerPath }) => {
debugPort += 1;
return fork(workerPath, { execArgv: [`--debug-port=${debugPort}`] });
};

2
src/worker/node/terminateWorker.js

@ -6,5 +6,5 @@ @@ -6,5 +6,5 @@
* @access public
*/
module.exports = (worker) => {
worker.terminate();
worker.kill();
};

BIN
tests/assets/images/simple.gif

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1011 B

BIN
tests/assets/images/simple.webp

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.7 KiB

4
tests/constants.js

File diff suppressed because one or more lines are too long
Loading…
Cancel
Save