Browse Source

Added automatic detection of simd support (#641)

pull/642/head
Balearica 2 years ago committed by GitHub
parent
commit
f372818146
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 11
      package-lock.json
  2. 1
      package.json
  3. 27
      src/worker-script/browser/getCore.js
  4. 4
      src/worker-script/index.js
  5. 11
      src/worker-script/node/getCore.js
  6. 6
      src/worker-script/utils/setImage.js
  7. 4
      src/worker/browser/defaultOptions.js

11
package-lock.json generated

@ -21,6 +21,7 @@ @@ -21,6 +21,7 @@
"regenerator-runtime": "^0.13.3",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^3.0.1",
"wasm-feature-detect": "^1.2.11",
"zlibjs": "^0.3.1"
},
"devDependencies": {
@ -8740,6 +8741,11 @@ @@ -8740,6 +8741,11 @@
"node": ">=4.0.0"
}
},
"node_modules/wasm-feature-detect": {
"version": "1.2.11",
"resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.2.11.tgz",
"integrity": "sha512-HUqwaodrQGaZgz1lZaNioIkog9tkeEJjrM3eq4aUL04whXOVDRc/o2EGb/8kV0QX411iAYWEqq7fMBmJ6dKS6w=="
},
"node_modules/watchpack": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz",
@ -16052,6 +16058,11 @@ @@ -16052,6 +16058,11 @@
"rx": "^4.1.0"
}
},
"wasm-feature-detect": {
"version": "1.2.11",
"resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.2.11.tgz",
"integrity": "sha512-HUqwaodrQGaZgz1lZaNioIkog9tkeEJjrM3eq4aUL04whXOVDRc/o2EGb/8kV0QX411iAYWEqq7fMBmJ6dKS6w=="
},
"watchpack": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz",

1
package.json

@ -69,6 +69,7 @@ @@ -69,6 +69,7 @@
"regenerator-runtime": "^0.13.3",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^3.0.1",
"wasm-feature-detect": "^1.2.11",
"zlibjs": "^0.3.1"
},
"repository": {

27
src/worker-script/browser/getCore.js

@ -1,15 +1,26 @@ @@ -1,15 +1,26 @@
module.exports = (corePath, res) => {
const { simd } = require('wasm-feature-detect');
const { dependencies } = require('../../../package.json');
module.exports = async (corePath, res) => {
if (typeof global.TesseractCore === 'undefined') {
res.progress({ status: 'loading tesseract core', progress: 0 });
global.importScripts(corePath);
/*
* Depending on whether the browser supports WebAssembly,
* the version of the TesseractCore will be different.
*/
// If the user specifies a core path, we use that
// Otherwise, we detect the correct core based on SIMD support
let corePathImport = corePath;
if (!corePathImport) {
const simdSupport = await simd();
if (simdSupport) {
corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.wasm.js`;
} else {
corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core-simd.wasm.js`;
}
}
global.importScripts(corePathImport);
if (typeof global.TesseractCoreWASM !== 'undefined' && typeof WebAssembly === 'object') {
global.TesseractCore = global.TesseractCoreWASM;
} else if (typeof global.TesseractCoreASM !== 'undefined') {
global.TesseractCore = global.TesseractCoreASM;
} else {
throw Error('Failed to load TesseractCore');
}

4
src/worker-script/index.js

@ -28,10 +28,10 @@ let latestJob; @@ -28,10 +28,10 @@ let latestJob;
let adapter = {};
let params = defaultParams;
const load = ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => {
const load = async ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => {
setLogging(logging);
if (!TessModule) {
const Core = adapter.getCore(corePath, res);
const Core = await adapter.getCore(corePath, res);
res.progress({ workerId, status: 'initializing tesseract', progress: 0 });

11
src/worker-script/node/getCore.js

@ -1,12 +1,19 @@ @@ -1,12 +1,19 @@
const { simd } = require('wasm-feature-detect');
let TesseractCore = null;
/*
* getCore is a sync function to load and return
* TesseractCore.
*/
module.exports = (_, res) => {
module.exports = async (_, res) => {
if (TesseractCore === null) {
const simdSupport = await simd();
res.progress({ status: 'loading tesseract core', progress: 0 });
TesseractCore = require('tesseract.js-core');
if (simdSupport) {
TesseractCore = require('tesseract.js-core/tesseract-core-simd');
} else {
TesseractCore = require('tesseract.js-core/tesseract-core');
}
res.progress({ status: 'loaded tesseract core', progress: 1 });
}
return TesseractCore;

6
src/worker-script/utils/setImage.js

@ -20,9 +20,9 @@ module.exports = (TessModule, api, image) => { @@ -20,9 +20,9 @@ module.exports = (TessModule, api, image) => {
const exif = buf.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1;
/*
* Although leptonica should support reading bmp, there is a bug of "compressed BMP files".
* As there is no solution, we need to use bmp-js for now.
* @see https://groups.google.com/forum/#!topic/tesseract-ocr/4mPD9zTxdxE
* Leptonica supports uncompressed but not compressed bmp files
* @see https://github.com/DanBloomberg/leptonica/issues/607#issuecomment-1068802516
* We therefore use bmp-js to process all bmp files
*/
if (type && type.mime === 'image/bmp') {
const bmpBuf = bmp.decode(buf);

4
src/worker/browser/defaultOptions.js

@ -1,5 +1,5 @@ @@ -1,5 +1,5 @@
const resolveURL = require('resolve-url');
const { version, dependencies } = require('../../../package.json');
const { version } = require('../../../package.json');
const defaultOptions = require('../../constants/defaultOptions');
/*
@ -14,5 +14,5 @@ module.exports = { @@ -14,5 +14,5 @@ module.exports = {
* If browser doesn't support WebAssembly,
* load ASM version instead
*/
corePath: `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
corePath: null,
};

Loading…
Cancel
Save