From 4100c0ed7a276d8958ef73c50ca20e1e427d62cd Mon Sep 17 00:00:00 2001 From: Jerome Wu Date: Tue, 1 Oct 2019 21:54:59 +0800 Subject: [PATCH] Complete basic refactoring --- examples/browser/basic.html | 21 ++- examples/browser/demo.html | 22 +-- examples/node/detect.js | 13 +- examples/node/recognize.js | 26 +--- package-lock.json | 35 +---- package.json | 5 +- scripts/webpack.config.dev.js | 2 +- scripts/webpack.config.prod.js | 2 +- src/createJob.js | 3 +- src/createWorker.js | 8 +- src/index.js | 3 +- src/utils/log.js | 2 + src/worker-script/browser/cache.js | 10 ++ src/worker-script/browser/getCore.js | 19 +++ src/worker-script/browser/gunzip.js | 1 + src/worker-script/browser/index.js | 33 +++++ src/worker-script/index.js | 3 +- src/worker-script/utils/setImage.js | 2 +- src/worker/browser/b64toU8Array.js | 1 - src/worker/browser/defaultOptions.js | 18 +++ src/worker/browser/index.js | 184 ++------------------------ src/worker/browser/onMessage.js | 5 + src/worker/browser/send.js | 92 +++++++++++++ src/worker/browser/spawnWorker.js | 23 ++++ src/worker/browser/terminateWorker.js | 11 ++ src/worker/browser/worker.js | 55 -------- src/worker/node/send.js | 45 +++---- 27 files changed, 300 insertions(+), 344 deletions(-) create mode 100644 src/utils/log.js create mode 100644 src/worker-script/browser/cache.js create mode 100644 src/worker-script/browser/getCore.js create mode 100644 src/worker-script/browser/gunzip.js delete mode 100644 src/worker/browser/b64toU8Array.js create mode 100644 src/worker/browser/defaultOptions.js create mode 100644 src/worker/browser/onMessage.js create mode 100644 src/worker/browser/send.js create mode 100644 src/worker/browser/spawnWorker.js create mode 100644 src/worker/browser/terminateWorker.js delete mode 100644 src/worker/browser/worker.js diff --git a/examples/browser/basic.html b/examples/browser/basic.html index 84870d5..a5a1424 100644 --- a/examples/browser/basic.html +++ b/examples/browser/basic.html @@ -1,2 +1,19 @@ - - + + + + + + + + + diff --git a/examples/browser/demo.html b/examples/browser/demo.html index 2df7679..c383283 100644 --- a/examples/browser/demo.html +++ b/examples/browser/demo.html @@ -37,30 +37,18 @@ function progressUpdate(packet){ } } -function recognizeFile(file){ +async function recognizeFile(file) { document.querySelector("#log").innerHTML = '' const corePath = window.navigator.userAgent.indexOf("Edge") > -1 ? '../../node_modules/tesseract.js-core/tesseract-core.asm.js' : '../../node_modules/tesseract.js-core/tesseract-core.wasm.js'; - const { TesseractWorker } = Tesseract; - - const worker = new TesseractWorker({ + const lang = document.querySelector('#langsel').value + const data = await Tesseract.recognize(file, lang, { corePath, + logger: progressUpdate, }); - - worker.recognize(file, - document.querySelector('#langsel').value - ) - .progress(function(packet){ - console.info(packet) - progressUpdate(packet) - - }) - .then(function(data){ - console.log(data) - progressUpdate({ status: 'done', data: data }) - }) + progressUpdate({ status: 'done', data }); } - * @returns {array} binary image in array format - */ -const loadImage = (image) => { - if (check.string(image)) { - // Base64 Image - if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { - return Promise.resolve(b64toU8Array(image.split(',')[1])); - } - // Image URL - return axios.get(resolveURL(image), { - responseType: 'arraybuffer', - }) - .then(resp => resp.data); - } - if (check.instance(image, HTMLElement)) { - if (image.tagName === 'IMG') { - return loadImage(image.src); - } - if (image.tagName === 'VIDEO') { - return loadImage(image.poster); - } - if (image.tagName === 'CANVAS') { - return new Promise((res) => { - image.toBlob((blob) => { - readFromBlobOrFile(blob, res); - }); - }); - } - } - if (check.instance(image, File) || check.instance(image, Blob)) { - return new Promise((res) => { - readFromBlobOrFile(image, res); - }); - } - return Promise.reject(); -}; - -const downloadFile = (path, blob) => { - if (navigator.msSaveBlob) { - // IE 10+ - navigator.msSaveBlob(blob, path); - } else { - const link = document.createElement('a'); - // Browsers that support HTML5 download attribute - if (link.download !== undefined) { - const url = URL.createObjectURL(blob); - link.setAttribute('href', url); - link.setAttribute('download', path); - link.style.visibility = 'hidden'; - document.body.appendChild(link); - link.click(); - document.body.removeChild(link); - } - } -}; - -/* - * Default options for browser worker - */ -exports.defaultOptions = { - ...defaultOptions, - workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development') - ? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`) - : `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`, - /* - * If browser doesn't support WebAssembly, - * load ASM version instead - */ - corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`, -}; - -/** - * spawnWorker - * - * @name spawnWorker - * @function create a new Worker in browser - * @access public - * @param {object} instance - TesseractWorker instance - * @param {object} options - * @param {string} options.workerPath - worker script path - * @param {boolean} options.workerBlobURL - Use a blob:// URL for the worker script - */ -exports.spawnWorker = (instance, { workerPath, workerBlobURL }) => { - let worker; - if (Blob && URL && workerBlobURL) { - const blob = new Blob([`importScripts("${workerPath}");`], { - type: 'application/javascript', - }); - worker = new Worker(URL.createObjectURL(blob)); - } else { - worker = new Worker(workerPath); - } - - worker.onmessage = ({ data }) => { - if (data.jobId.startsWith('Job')) { - instance.recv(data); - } else if (data.jobId.startsWith('Download')) { - const { path, data: d, type } = data; - const blob = new Blob([d], { type }); - downloadFile(path, blob); - } - }; - - return worker; -}; - -/** - * terminateWorker - * - * @name terminateWorker - * @function terminate worker - * @access public - * @param {object} instance TesseractWorker instance - */ -exports.terminateWorker = (instance) => { - instance.worker.terminate(); -}; - -/** - * sendPacket - * - * @name sendPacket - * @function send packet to worker and create a job - * @access public - * @param {object} instance TesseractWorker instance - * @param {object} iPacket data for worker - */ -exports.sendPacket = (instance, iPacket) => { - const packet = { ...iPacket }; - loadImage(packet.payload.image) - .then(buf => new Uint8Array(buf)) - .then((img) => { - packet.payload.image = Array.from(img); - instance.worker.postMessage(packet); - }); +const defaultOptions = require('./defaultOptions'); +const spawnWorker = require('./spawnWorker'); +const terminateWorker = require('./terminateWorker'); +const onMessage = require('./onMessage'); +const send = require('./send'); + +module.exports = { + defaultOptions, + spawnWorker, + terminateWorker, + onMessage, + send, }; diff --git a/src/worker/browser/onMessage.js b/src/worker/browser/onMessage.js new file mode 100644 index 0000000..ec74783 --- /dev/null +++ b/src/worker/browser/onMessage.js @@ -0,0 +1,5 @@ +module.exports = (worker, handler) => { + worker.onmessage = ({ data }) => { // eslint-disable-line + handler(data); + }; +}; diff --git a/src/worker/browser/send.js b/src/worker/browser/send.js new file mode 100644 index 0000000..94f75b2 --- /dev/null +++ b/src/worker/browser/send.js @@ -0,0 +1,92 @@ +const axios = require('axios'); +const resolveURL = require('resolve-url'); + +/** + * readFromBlobOrFile + * + * @name readFromBlobOrFile + * @function + * @access private + * @param {object} blob A blob or file objec to read + * @param {function} res callback function after reading completes + */ +const readFromBlobOrFile = blob => ( + new Promise((resolve, reject) => { + const fileReader = new FileReader(); + fileReader.onload = () => { + resolve(fileReader.result); + }; + fileReader.onerror = ({ target: { error: { code } } }) => { + reject(Error(`File could not be read! Code=${code}`)); + }; + fileReader.readAsArrayBuffer(blob); + }) +); + +/** + * loadImage + * + * @name loadImage + * @function load image from different source + * @access private + * @param {string, object} image - image source, supported formats: + * string: URL string, can be relative path + * string: base64 image + * img HTMLElement: extract image source from src attribute + * video HTMLElement: extract image source from poster attribute + * canvas HTMLElement: extract image data by converting to Blob + * File instance: data from + * @returns {array} binary image in array format + */ +const loadImage = async (image) => { + let data = image; + if (typeof image === 'undefined') { + return 'undefined'; + } + + if (typeof image === 'string') { + // Base64 Image + if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { + data = atob(image.split(',')[1]) + .split('') + .map(c => c.charCodeAt(0)); + } else { + const { data: _data } = await axios.get(resolveURL(image), { responseType: 'arraybuffer' }); + data = _data; + } + } else if (image instanceof HTMLElement) { + if (image.tagName === 'IMG') { + data = loadImage(image.src); + } + if (image.tagName === 'VIDEO') { + data = loadImage(image.poster); + } + if (image.tagName === 'CANVAS') { + await new Promise((resolve) => { + image.toBlob(async (blob) => { + data = await readFromBlobOrFile(blob); + resolve(); + }); + }); + } + } else if (image instanceof File || image instanceof Blob) { + data = await readFromBlobOrFile(image); + } + + return new Uint8Array(data); +}; + +/** + * sendPacket + * + * @name sendPacket + * @function send packet to worker and create a job + * @access public + * @param {object} instance TesseractWorker instance + * @param {object} iPacket data for worker + */ +module.exports = async (worker, _packet) => { + const packet = { ..._packet }; + packet.payload.image = await loadImage(packet.payload.image); + worker.postMessage(packet); +}; diff --git a/src/worker/browser/spawnWorker.js b/src/worker/browser/spawnWorker.js new file mode 100644 index 0000000..6622fa0 --- /dev/null +++ b/src/worker/browser/spawnWorker.js @@ -0,0 +1,23 @@ +/** + * spawnWorker + * + * @name spawnWorker + * @function create a new Worker in browser + * @access public + * @param {object} options + * @param {string} options.workerPath - worker script path + * @param {boolean} options.workerBlobURL - Use a blob:// URL for the worker script + */ +module.exports = ({ workerPath, workerBlobURL }) => { + let worker; + if (Blob && URL && workerBlobURL) { + const blob = new Blob([`importScripts("${workerPath}");`], { + type: 'application/javascript', + }); + worker = new Worker(URL.createObjectURL(blob)); + } else { + worker = new Worker(workerPath); + } + + return worker; +}; diff --git a/src/worker/browser/terminateWorker.js b/src/worker/browser/terminateWorker.js new file mode 100644 index 0000000..93a38c7 --- /dev/null +++ b/src/worker/browser/terminateWorker.js @@ -0,0 +1,11 @@ +/** + * terminateWorker + * + * @name terminateWorker + * @function terminate worker + * @access public + * @param {object} instance TesseractWorker instance + */ +module.exports = (worker) => { + worker.terminate(); +}; diff --git a/src/worker/browser/worker.js b/src/worker/browser/worker.js deleted file mode 100644 index 84a0316..0000000 --- a/src/worker/browser/worker.js +++ /dev/null @@ -1,55 +0,0 @@ -/** - * - * Browser worker scripts - * - * @fileoverview Browser worker implementation - * @author Kevin Kwok - * @author Guillermo Webster - * @author Jerome Wu - */ - -const check = require('check-types'); -const workerWrapper = require('../../workerWrapper'); -const b64toU8Array = require('./b64toU8Array'); - -/* - * register message handler - */ -global.addEventListener('message', ({ data }) => { - workerWrapper.dispatchHandlers(data, obj => postMessage(obj)); -}); - -/* - * getCore is a sync function to load and return - * TesseractCore. - */ -workerWrapper.setAdapter({ - getCore: (corePath, res) => { - if (check.undefined(global.TesseractCore)) { - res.progress({ status: 'loading tesseract core', progress: 0 }); - global.importScripts(corePath); - /* - * Depending on whether the browser supports WebAssembly, - * the version of the TesseractCore will be different. - */ - if (check.not.undefined(global.TesseractCoreWASM) && typeof WebAssembly === 'object') { - global.TesseractCore = global.TesseractCoreWASM; - } else if (check.not.undefined(global.TesseractCoreASM)) { - global.TesseractCore = global.TesseractCoreASM; - } else { - throw Error('Failed to load TesseractCore'); - } - res.progress({ status: 'loading tesseract core', progress: 1 }); - } - return global.TesseractCore; - }, - b64toU8Array, - writeFile: (path, data, type) => { - postMessage({ - jobId: 'Download', - path, - data, - type, - }); - }, -}); diff --git a/src/worker/node/send.js b/src/worker/node/send.js index bcc9e07..476a817 100644 --- a/src/worker/node/send.js +++ b/src/worker/node/send.js @@ -17,23 +17,26 @@ const readFile = util.promisify(fs.readFile); * buffer: image buffer * @returns {array} binary image in array format */ -const loadImage = (image) => { - if (isURL(image)) { - return axios.get(image, { - responseType: 'arraybuffer', - }) - .then(resp => resp.data); +const loadImage = async (image) => { + let data = image; + if (typeof image === 'undefined') { + return image; } - if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { - return Promise.resolve(Buffer.from(image.split(',')[1], 'base64')); + if (typeof image === 'string') { + if (isURL(image) || image.startsWith('chrome-extension://') || image.startsWith('file://')) { + const { data: _data } = await axios.get(image, { responseType: 'arraybuffer' }); + data = _data; + } else if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { + data = Buffer.from(image.split(',')[1], 'base64'); + } else { + data = await readFile(image); + } + } else if (Buffer.isBuffer(image)) { + data = image; } - if (Buffer.isBuffer(image)) { - return Promise.resolve(image); - } - - return readFile(image); + return new Uint8Array(data); }; @@ -46,16 +49,8 @@ const loadImage = (image) => { * @param {object} instance TesseractWorker instance * @param {object} iPacket data for worker */ -module.exports = (worker, packet) => { - const p = { ...packet }; - if (['recognize', 'detect'].includes(p.action)) { - loadImage(p.payload.image) - .then(buf => new Uint8Array(buf)) - .then((img) => { - p.payload.image = Array.from(img); - worker.send(p); - }); - } else { - worker.send(p); - } +module.exports = async (worker, _packet) => { + const packet = { ..._packet }; + packet.payload.image = await loadImage(packet.payload.image); + worker.send(packet); };