Browse Source

Complete basic refactoring

develop
Jerome Wu 5 years ago
parent
commit
4100c0ed7a
  1. 21
      examples/browser/basic.html
  2. 22
      examples/browser/demo.html
  3. 13
      examples/node/detect.js
  4. 26
      examples/node/recognize.js
  5. 35
      package-lock.json
  6. 5
      package.json
  7. 2
      scripts/webpack.config.dev.js
  8. 2
      scripts/webpack.config.prod.js
  9. 3
      src/createJob.js
  10. 8
      src/createWorker.js
  11. 3
      src/index.js
  12. 2
      src/utils/log.js
  13. 10
      src/worker-script/browser/cache.js
  14. 19
      src/worker-script/browser/getCore.js
  15. 1
      src/worker-script/browser/gunzip.js
  16. 33
      src/worker-script/browser/index.js
  17. 3
      src/worker-script/index.js
  18. 2
      src/worker-script/utils/setImage.js
  19. 1
      src/worker/browser/b64toU8Array.js
  20. 18
      src/worker/browser/defaultOptions.js
  21. 184
      src/worker/browser/index.js
  22. 5
      src/worker/browser/onMessage.js
  23. 92
      src/worker/browser/send.js
  24. 23
      src/worker/browser/spawnWorker.js
  25. 11
      src/worker/browser/terminateWorker.js
  26. 55
      src/worker/browser/worker.js
  27. 45
      src/worker/node/send.js

21
examples/browser/basic.html

@ -1,2 +1,19 @@ @@ -1,2 +1,19 @@
<script src="/dist/tesseract.dev.js"></script>
<input type="file" onchange="const worker = new Tesseract.TesseractWorker({ corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js' });worker.recognize(this.files[0]).progress(function(data){console.log(data)}).then(function(data){console.log(data)})">
<html>
<head>
<script src="/dist/tesseract.dev.js"></script>
</head>
<body>
<input type="file" id="uploader">
<script>
const recognize = async ({ target: { files } }) => {
const { text } = await Tesseract.recognize(files[0], 'eng', {
corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js',
logger: m => console.log(m),
});
console.log(text);
}
const elm = document.getElementById('uploader');
elm.addEventListener('change', recognize);
</script>
</body>
</html>

22
examples/browser/demo.html

@ -37,30 +37,18 @@ function progressUpdate(packet){ @@ -37,30 +37,18 @@ function progressUpdate(packet){
}
}
function recognizeFile(file){
async function recognizeFile(file) {
document.querySelector("#log").innerHTML = ''
const corePath = window.navigator.userAgent.indexOf("Edge") > -1
? '../../node_modules/tesseract.js-core/tesseract-core.asm.js'
: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js';
const { TesseractWorker } = Tesseract;
const worker = new TesseractWorker({
const lang = document.querySelector('#langsel').value
const data = await Tesseract.recognize(file, lang, {
corePath,
logger: progressUpdate,
});
worker.recognize(file,
document.querySelector('#langsel').value
)
.progress(function(packet){
console.info(packet)
progressUpdate(packet)
})
.then(function(data){
console.log(data)
progressUpdate({ status: 'done', data: data })
})
progressUpdate({ status: 'done', data });
}
</script>
<select id="langsel" onchange="window.lastFile && recognizeFile(window.lastFile)">

13
examples/node/detect.js

@ -1,18 +1,13 @@ @@ -1,18 +1,13 @@
#!/usr/bin/env node
const path = require('path');
const { TesseractWorker } = require('../../');
const Tesseract = require('../../');
const [,, imagePath] = process.argv;
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png'));
const tessWorker = new TesseractWorker();
console.log(`Detecting ${image}`);
console.log(`Recognizing ${image}`);
tessWorker.detect(image)
.progress((info) => {
console.log(info);
})
Tesseract.detect(image, { logger: m => console.log(m) })
.then((data) => {
console.log('done', data);
process.exit();
console.log(data);
});

26
examples/node/recognize.js

@ -1,32 +1,16 @@ @@ -1,32 +1,16 @@
#!/usr/bin/env node
const path = require('path');
const fs = require('fs');
const {
Tesseract, createScheduler, createWorker,
} = require('../../');
const Tesseract = require('../../');
const [,, imagePath] = process.argv;
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png'));
console.log(`Recognizing ${image}`);
(async () => {
const scheduler = createScheduler();
const worker = createWorker();
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
scheduler.addWorker(worker);
console.log((await scheduler.addJob('recognize', image)).text);
const data = await worker.getPDF('ocr', 'Tesseract OCR');
fs.writeFileSync('test.pdf', Buffer.from(data));
await scheduler.terminate();
})();
//Tesseract.recognize(image, 'eng', { logger: m => console.log(m) })
// .then(({ text }) => {
// console.log(text);
// });
Tesseract.recognize(image, 'eng', { logger: m => console.log(m) })
.then(({ text }) => {
console.log(text);
});
//Tesseract.detect(image, { logger: m => console.log(m) })
// .then((data) => {

35
package-lock.json generated

@ -2223,11 +2223,6 @@ @@ -2223,11 +2223,6 @@
"integrity": "sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA==",
"dev": true
},
"check-types": {
"version": "7.4.0",
"resolved": "https://registry.npmjs.org/check-types/-/check-types-7.4.0.tgz",
"integrity": "sha512-YbulWHdfP99UfZ73NcUDlNJhEIDgm9Doq9GhpyXbF+7Aegi3CVV7qqMCKTTqJxlvEvnQBp9IA+dxsGN6xK/nSg=="
},
"chownr": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.1.tgz",
@ -7627,6 +7622,11 @@ @@ -7627,6 +7622,11 @@
"regenerate": "^1.4.0"
}
},
"regenerator-runtime": {
"version": "0.13.3",
"resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.3.tgz",
"integrity": "sha512-naKIZz2GQ8JWh///G7L3X6LaQUAMp2lvb1rvwwsURe/VXwD6VMfr+/1NuNw3ag8v2kY1aQ/go5SNn79O9JU7yw=="
},
"regenerator-transform": {
"version": "0.14.0",
"resolved": "https://registry.npmjs.org/regenerator-transform/-/regenerator-transform-0.14.0.tgz",
@ -8552,31 +8552,6 @@ @@ -8552,31 +8552,6 @@
"resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-2.0.0-beta.13.tgz",
"integrity": "sha512-GboWV/aV5h+Whito6L6Q3WCFZ2+lgxZGgjY84wSpWbTLEkkZgHsU+dz1or+3rWSABH/nuzHDco1bZRk5+f94mw=="
},
"tesseract.js-utils": {
"version": "1.0.0-beta.8",
"resolved": "https://registry.npmjs.org/tesseract.js-utils/-/tesseract.js-utils-1.0.0-beta.8.tgz",
"integrity": "sha512-qjHBfWfzo2o1ZY9XI0Wh2hmpp38+mIgCMOk60W5Yyie/pBl421VLBKOZUEwQgpbLnOJ24VU6Q8yXsVgtFFHcFg==",
"requires": {
"axios": "^0.18.0",
"bmp-js": "^0.1.0",
"file-type": "^10.5.0",
"idb-keyval": "^3.1.0",
"is-url": "^1.2.4",
"zlibjs": "^0.3.1"
},
"dependencies": {
"file-type": {
"version": "10.11.0",
"resolved": "https://registry.npmjs.org/file-type/-/file-type-10.11.0.tgz",
"integrity": "sha512-uzk64HRpUZyTGZtVuvrjP0FYxzQrBf4rojot6J65YMEbwBLB0CWm0CLojVpwpmFmxcE/lkvYICgfcGozbBq6rw=="
},
"is-url": {
"version": "1.2.4",
"resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.4.tgz",
"integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww=="
}
}
},
"text-table": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",

5
package.json

@ -21,7 +21,7 @@ @@ -21,7 +21,7 @@
"postinstall": "opencollective-postinstall || true"
},
"browser": {
"./src/node/index.js": "./src/browser/index.js"
"./src/worker/node/index.js": "./src/worker/browser/index.js"
},
"author": "",
"contributors": [
@ -54,14 +54,13 @@ @@ -54,14 +54,13 @@
"dependencies": {
"axios": "^0.18.0",
"bmp-js": "^0.1.0",
"check-types": "^7.4.0",
"file-type": "^12.3.0",
"idb-keyval": "^3.2.0",
"is-url": "1.2.2",
"opencollective-postinstall": "^2.0.2",
"regenerator-runtime": "^0.13.3",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^2.0.0-beta.13",
"tesseract.js-utils": "^1.0.0-beta.8",
"zlibjs": "^0.3.1"
},
"repository": {

2
scripts/webpack.config.dev.js

@ -33,7 +33,7 @@ module.exports = [ @@ -33,7 +33,7 @@ module.exports = [
libraryTarget: 'umd',
}),
genConfig({
entry: path.resolve(__dirname, '..', 'src', 'browser', 'worker.js'),
entry: path.resolve(__dirname, '..', 'src', 'worker-script', 'browser', 'index.js'),
filename: 'worker.dev.js',
}),
];

2
scripts/webpack.config.prod.js

@ -24,7 +24,7 @@ module.exports = [ @@ -24,7 +24,7 @@ module.exports = [
libraryTarget: 'umd',
}),
genConfig({
entry: path.resolve(__dirname, '..', 'src', 'browser', 'worker.js'),
entry: path.resolve(__dirname, '..', 'src', 'worker-script', 'browser', 'index.js'),
filename: 'worker.min.js',
}),
];

3
src/createJob.js

@ -1,4 +1,5 @@ @@ -1,4 +1,5 @@
const { send } = require('./worker/node');
const log = require('./utils/log');
let jobCounter = 0;
@ -10,7 +11,7 @@ module.exports = ( @@ -10,7 +11,7 @@ module.exports = (
const id = `Job-${jobCounter}-${Math.random().toString(16).slice(3, 8)}`;
const start = (w) => {
console.log(`[${w.id}]: Start ${id}, action=${action}`);
log(`[${w.id}]: Start ${id}, action=${action}`);
send(w.worker, {
workerId: w.id,
jobId: id,

8
src/createWorker.js

@ -14,11 +14,13 @@ let workerCounter = 0; @@ -14,11 +14,13 @@ let workerCounter = 0;
module.exports = (_options = {}) => {
workerCounter += 1;
const id = `Worker-${workerCounter}-${Math.random().toString(16).slice(3, 8)}`;
const options = resolvePaths({
const {
logger,
...options
} = resolvePaths({
...defaultOptions,
..._options,
});
const { logger } = options;
const resolves = {};
const rejects = {};
let worker = spawnWorker(options);
@ -31,7 +33,7 @@ module.exports = (_options = {}) => { @@ -31,7 +33,7 @@ module.exports = (_options = {}) => {
rejects[action] = rej;
};
const doJob = (action, payload) => (
const doJob = (action, payload = {}) => (
new Promise((resolve, reject) => {
setResolve(action, resolve);
setReject(action, reject);

3
src/index.js

@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
* @author Guillermo Webster <gui@mit.edu>
* @author Jerome Wu <jeromewus@gmail.com>
*/
require('regenerator-runtime/runtime');
const createScheduler = require('./createScheduler');
const createWorker = require('./createWorker');
const createJob = require('./createJob');
@ -20,5 +21,5 @@ module.exports = { @@ -20,5 +21,5 @@ module.exports = {
createScheduler,
createWorker,
createJob,
Tesseract,
...Tesseract,
};

2
src/utils/log.js

@ -0,0 +1,2 @@ @@ -0,0 +1,2 @@
module.exports = (typeof process.env !== 'undefined' && process.env.NODE_ENV === 'development')
? console.log : () => {};

10
src/worker-script/browser/cache.js

@ -0,0 +1,10 @@ @@ -0,0 +1,10 @@
const { set, get, del } = require('idb-keyval');
module.exports = {
readCache: get,
writeCache: set,
deleteCache: del,
checkCache: path => (
get(path).then(v => typeof v !== 'undefined')
),
};

19
src/worker-script/browser/getCore.js

@ -0,0 +1,19 @@ @@ -0,0 +1,19 @@
module.exports = (corePath, res) => {
if (typeof global.TesseractCore === 'undefined') {
res.progress({ status: 'loading tesseract core', progress: 0 });
global.importScripts(corePath);
/*
* Depending on whether the browser supports WebAssembly,
* the version of the TesseractCore will be different.
*/
if (typeof global.TesseractCoreWASM !== 'undefined' && typeof WebAssembly === 'object') {
global.TesseractCore = global.TesseractCoreWASM;
} else if (typeof global.TesseractCoreASM !== 'undefined') {
global.TesseractCore = global.TesseractCoreASM;
} else {
throw Error('Failed to load TesseractCore');
}
res.progress({ status: 'loading tesseract core', progress: 1 });
}
return global.TesseractCore;
};

1
src/worker-script/browser/gunzip.js

@ -0,0 +1 @@ @@ -0,0 +1 @@
module.exports = require('zlibjs').gunzipSync;

33
src/worker-script/browser/index.js

@ -0,0 +1,33 @@ @@ -0,0 +1,33 @@
/**
*
* Browser worker scripts
*
* @fileoverview Browser worker implementation
* @author Kevin Kwok <antimatter15@gmail.com>
* @author Guillermo Webster <gui@mit.edu>
* @author Jerome Wu <jeromewus@gmail.com>
*/
const worker = require('../');
const getCore = require('./getCore');
const gunzip = require('./gunzip');
const resolveURL = require('./resolveURL');
const cache = require('./cache');
/*
* register message handler
*/
global.addEventListener('message', ({ data }) => {
worker.dispatchHandlers(data, obj => postMessage(obj));
});
/*
* getCore is a sync function to load and return
* TesseractCore.
*/
worker.setAdapter({
getCore,
gunzip,
resolveURL,
...cache,
});

3
src/worker-script/index.js

@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
* @author Guillermo Webster <gui@mit.edu>
* @author Jerome Wu <jeromewus@gmail.com>
*/
require('regenerator-runtime/runtime');
const fileType = require('file-type');
const axios = require('axios');
const isURL = require('is-url');
@ -84,7 +85,7 @@ const loadLanguage = async ({ @@ -84,7 +85,7 @@ const loadLanguage = async ({
if (typeof _lang === 'string') {
let path = null;
if (isURL(langPath)) { /** When langPath is an URL */
if (isURL(langPath) || langPath.startsWith('chrome-extension://') || langPath.startsWith('file://')) { /** When langPath is an URL */
path = langPath;
} else if (process.browser) { /** When langPath is not an URL in browser */
path = adapter.resolveURL(langPath);

2
src/worker-script/utils/setImage.js

@ -11,7 +11,7 @@ const fileType = require('file-type'); @@ -11,7 +11,7 @@ const fileType = require('file-type');
* @returns {number} - an emscripten pointer of the image
*/
module.exports = (TessModule, api, image) => {
const buf = Buffer.from(Array.from(image));
const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length }));
const type = fileType(buf);
let bytesPerPixel = 0;
let data = null;

1
src/worker/browser/b64toU8Array.js

@ -1 +0,0 @@ @@ -1 +0,0 @@
module.exports = s => new Uint8Array(atob(s).split('').map(c => c.charCodeAt(0)));

18
src/worker/browser/defaultOptions.js

@ -0,0 +1,18 @@ @@ -0,0 +1,18 @@
const resolveURL = require('resolve-url');
const { version } = require('../../../package.json');
const defaultOptions = require('../../constants/defaultOptions');
/*
* Default options for browser worker
*/
module.exports = {
...defaultOptions,
workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development')
? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`)
: `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`,
/*
* If browser doesn't support WebAssembly,
* load ASM version instead
*/
corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
};

184
src/worker/browser/index.js

@ -7,176 +7,16 @@ @@ -7,176 +7,16 @@
* @author Guillermo Webster <gui@mit.edu>
* @author Jerome Wu <jeromewus@gmail.com>
*/
const check = require('check-types');
const resolveURL = require('resolve-url');
const axios = require('axios');
const b64toU8Array = require('./b64toU8Array');
const { defaultOptions } = require('../common/options');
const { version } = require('../../package.json');
/**
* readFromBlobOrFile
*
* @name readFromBlobOrFile
* @function
* @access private
* @param {object} blob A blob or file objec to read
* @param {function} res callback function after reading completes
*/
const readFromBlobOrFile = (blob, res) => {
const fileReader = new FileReader();
fileReader.onload = () => {
res(fileReader.result);
};
fileReader.readAsArrayBuffer(blob);
};
/**
* loadImage
*
* @name loadImage
* @function load image from different source
* @access private
* @param {string, object} image - image source, supported formats:
* string: URL string, can be relative path
* string: base64 image
* img HTMLElement: extract image source from src attribute
* video HTMLElement: extract image source from poster attribute
* canvas HTMLElement: extract image data by converting to Blob
* File instance: data from <input type="file" />
* @returns {array} binary image in array format
*/
const loadImage = (image) => {
if (check.string(image)) {
// Base64 Image
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) {
return Promise.resolve(b64toU8Array(image.split(',')[1]));
}
// Image URL
return axios.get(resolveURL(image), {
responseType: 'arraybuffer',
})
.then(resp => resp.data);
}
if (check.instance(image, HTMLElement)) {
if (image.tagName === 'IMG') {
return loadImage(image.src);
}
if (image.tagName === 'VIDEO') {
return loadImage(image.poster);
}
if (image.tagName === 'CANVAS') {
return new Promise((res) => {
image.toBlob((blob) => {
readFromBlobOrFile(blob, res);
});
});
}
}
if (check.instance(image, File) || check.instance(image, Blob)) {
return new Promise((res) => {
readFromBlobOrFile(image, res);
});
}
return Promise.reject();
};
const downloadFile = (path, blob) => {
if (navigator.msSaveBlob) {
// IE 10+
navigator.msSaveBlob(blob, path);
} else {
const link = document.createElement('a');
// Browsers that support HTML5 download attribute
if (link.download !== undefined) {
const url = URL.createObjectURL(blob);
link.setAttribute('href', url);
link.setAttribute('download', path);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
}
};
/*
* Default options for browser worker
*/
exports.defaultOptions = {
...defaultOptions,
workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development')
? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`)
: `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`,
/*
* If browser doesn't support WebAssembly,
* load ASM version instead
*/
corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`,
};
/**
* spawnWorker
*
* @name spawnWorker
* @function create a new Worker in browser
* @access public
* @param {object} instance - TesseractWorker instance
* @param {object} options
* @param {string} options.workerPath - worker script path
* @param {boolean} options.workerBlobURL - Use a blob:// URL for the worker script
*/
exports.spawnWorker = (instance, { workerPath, workerBlobURL }) => {
let worker;
if (Blob && URL && workerBlobURL) {
const blob = new Blob([`importScripts("${workerPath}");`], {
type: 'application/javascript',
});
worker = new Worker(URL.createObjectURL(blob));
} else {
worker = new Worker(workerPath);
}
worker.onmessage = ({ data }) => {
if (data.jobId.startsWith('Job')) {
instance.recv(data);
} else if (data.jobId.startsWith('Download')) {
const { path, data: d, type } = data;
const blob = new Blob([d], { type });
downloadFile(path, blob);
}
};
return worker;
};
/**
* terminateWorker
*
* @name terminateWorker
* @function terminate worker
* @access public
* @param {object} instance TesseractWorker instance
*/
exports.terminateWorker = (instance) => {
instance.worker.terminate();
};
/**
* sendPacket
*
* @name sendPacket
* @function send packet to worker and create a job
* @access public
* @param {object} instance TesseractWorker instance
* @param {object} iPacket data for worker
*/
exports.sendPacket = (instance, iPacket) => {
const packet = { ...iPacket };
loadImage(packet.payload.image)
.then(buf => new Uint8Array(buf))
.then((img) => {
packet.payload.image = Array.from(img);
instance.worker.postMessage(packet);
});
const defaultOptions = require('./defaultOptions');
const spawnWorker = require('./spawnWorker');
const terminateWorker = require('./terminateWorker');
const onMessage = require('./onMessage');
const send = require('./send');
module.exports = {
defaultOptions,
spawnWorker,
terminateWorker,
onMessage,
send,
};

5
src/worker/browser/onMessage.js

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
module.exports = (worker, handler) => {
worker.onmessage = ({ data }) => { // eslint-disable-line
handler(data);
};
};

92
src/worker/browser/send.js

@ -0,0 +1,92 @@ @@ -0,0 +1,92 @@
const axios = require('axios');
const resolveURL = require('resolve-url');
/**
* readFromBlobOrFile
*
* @name readFromBlobOrFile
* @function
* @access private
* @param {object} blob A blob or file objec to read
* @param {function} res callback function after reading completes
*/
const readFromBlobOrFile = blob => (
new Promise((resolve, reject) => {
const fileReader = new FileReader();
fileReader.onload = () => {
resolve(fileReader.result);
};
fileReader.onerror = ({ target: { error: { code } } }) => {
reject(Error(`File could not be read! Code=${code}`));
};
fileReader.readAsArrayBuffer(blob);
})
);
/**
* loadImage
*
* @name loadImage
* @function load image from different source
* @access private
* @param {string, object} image - image source, supported formats:
* string: URL string, can be relative path
* string: base64 image
* img HTMLElement: extract image source from src attribute
* video HTMLElement: extract image source from poster attribute
* canvas HTMLElement: extract image data by converting to Blob
* File instance: data from <input type="file" />
* @returns {array} binary image in array format
*/
const loadImage = async (image) => {
let data = image;
if (typeof image === 'undefined') {
return 'undefined';
}
if (typeof image === 'string') {
// Base64 Image
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) {
data = atob(image.split(',')[1])
.split('')
.map(c => c.charCodeAt(0));
} else {
const { data: _data } = await axios.get(resolveURL(image), { responseType: 'arraybuffer' });
data = _data;
}
} else if (image instanceof HTMLElement) {
if (image.tagName === 'IMG') {
data = loadImage(image.src);
}
if (image.tagName === 'VIDEO') {
data = loadImage(image.poster);
}
if (image.tagName === 'CANVAS') {
await new Promise((resolve) => {
image.toBlob(async (blob) => {
data = await readFromBlobOrFile(blob);
resolve();
});
});
}
} else if (image instanceof File || image instanceof Blob) {
data = await readFromBlobOrFile(image);
}
return new Uint8Array(data);
};
/**
* sendPacket
*
* @name sendPacket
* @function send packet to worker and create a job
* @access public
* @param {object} instance TesseractWorker instance
* @param {object} iPacket data for worker
*/
module.exports = async (worker, _packet) => {
const packet = { ..._packet };
packet.payload.image = await loadImage(packet.payload.image);
worker.postMessage(packet);
};

23
src/worker/browser/spawnWorker.js

@ -0,0 +1,23 @@ @@ -0,0 +1,23 @@
/**
* spawnWorker
*
* @name spawnWorker
* @function create a new Worker in browser
* @access public
* @param {object} options
* @param {string} options.workerPath - worker script path
* @param {boolean} options.workerBlobURL - Use a blob:// URL for the worker script
*/
module.exports = ({ workerPath, workerBlobURL }) => {
let worker;
if (Blob && URL && workerBlobURL) {
const blob = new Blob([`importScripts("${workerPath}");`], {
type: 'application/javascript',
});
worker = new Worker(URL.createObjectURL(blob));
} else {
worker = new Worker(workerPath);
}
return worker;
};

11
src/worker/browser/terminateWorker.js

@ -0,0 +1,11 @@ @@ -0,0 +1,11 @@
/**
* terminateWorker
*
* @name terminateWorker
* @function terminate worker
* @access public
* @param {object} instance TesseractWorker instance
*/
module.exports = (worker) => {
worker.terminate();
};

55
src/worker/browser/worker.js

@ -1,55 +0,0 @@ @@ -1,55 +0,0 @@
/**
*
* Browser worker scripts
*
* @fileoverview Browser worker implementation
* @author Kevin Kwok <antimatter15@gmail.com>
* @author Guillermo Webster <gui@mit.edu>
* @author Jerome Wu <jeromewus@gmail.com>
*/
const check = require('check-types');
const workerWrapper = require('../../workerWrapper');
const b64toU8Array = require('./b64toU8Array');
/*
* register message handler
*/
global.addEventListener('message', ({ data }) => {
workerWrapper.dispatchHandlers(data, obj => postMessage(obj));
});
/*
* getCore is a sync function to load and return
* TesseractCore.
*/
workerWrapper.setAdapter({
getCore: (corePath, res) => {
if (check.undefined(global.TesseractCore)) {
res.progress({ status: 'loading tesseract core', progress: 0 });
global.importScripts(corePath);
/*
* Depending on whether the browser supports WebAssembly,
* the version of the TesseractCore will be different.
*/
if (check.not.undefined(global.TesseractCoreWASM) && typeof WebAssembly === 'object') {
global.TesseractCore = global.TesseractCoreWASM;
} else if (check.not.undefined(global.TesseractCoreASM)) {
global.TesseractCore = global.TesseractCoreASM;
} else {
throw Error('Failed to load TesseractCore');
}
res.progress({ status: 'loading tesseract core', progress: 1 });
}
return global.TesseractCore;
},
b64toU8Array,
writeFile: (path, data, type) => {
postMessage({
jobId: 'Download',
path,
data,
type,
});
},
});

45
src/worker/node/send.js

@ -17,23 +17,26 @@ const readFile = util.promisify(fs.readFile); @@ -17,23 +17,26 @@ const readFile = util.promisify(fs.readFile);
* buffer: image buffer
* @returns {array} binary image in array format
*/
const loadImage = (image) => {
if (isURL(image)) {
return axios.get(image, {
responseType: 'arraybuffer',
})
.then(resp => resp.data);
const loadImage = async (image) => {
let data = image;
if (typeof image === 'undefined') {
return image;
}
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) {
return Promise.resolve(Buffer.from(image.split(',')[1], 'base64'));
if (typeof image === 'string') {
if (isURL(image) || image.startsWith('chrome-extension://') || image.startsWith('file://')) {
const { data: _data } = await axios.get(image, { responseType: 'arraybuffer' });
data = _data;
} else if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) {
data = Buffer.from(image.split(',')[1], 'base64');
} else {
data = await readFile(image);
}
} else if (Buffer.isBuffer(image)) {
data = image;
}
if (Buffer.isBuffer(image)) {
return Promise.resolve(image);
}
return readFile(image);
return new Uint8Array(data);
};
@ -46,16 +49,8 @@ const loadImage = (image) => { @@ -46,16 +49,8 @@ const loadImage = (image) => {
* @param {object} instance TesseractWorker instance
* @param {object} iPacket data for worker
*/
module.exports = (worker, packet) => {
const p = { ...packet };
if (['recognize', 'detect'].includes(p.action)) {
loadImage(p.payload.image)
.then(buf => new Uint8Array(buf))
.then((img) => {
p.payload.image = Array.from(img);
worker.send(p);
});
} else {
worker.send(p);
}
module.exports = async (worker, _packet) => {
const packet = { ..._packet };
packet.payload.image = await loadImage(packet.payload.image);
worker.send(packet);
};

Loading…
Cancel
Save