diff --git a/src/createJob.js b/src/createJob.js index 8455131..d8d9e3d 100644 --- a/src/createJob.js +++ b/src/createJob.js @@ -1,4 +1,6 @@ -const { send } = require('./worker/node'); +const { + send, +} = require('./worker/node'); const log = require('./utils/log'); let jobCounter = 0; @@ -10,7 +12,7 @@ module.exports = ( jobCounter += 1; const id = `Job-${jobCounter}-${Math.random().toString(16).slice(3, 8)}`; - const start = (w) => { + const start = async (w) => { log(`[${w.id}]: Start ${id}, action=${action}`); send(w.worker, { workerId: w.id, diff --git a/src/createWorker.js b/src/createWorker.js index 5dd6f9d..c28ef22 100644 --- a/src/createWorker.js +++ b/src/createWorker.js @@ -7,6 +7,7 @@ const { spawnWorker, terminateWorker, onMessage, + loadImage, } = require('./worker/node'); let workerCounter = 0; @@ -57,17 +58,19 @@ module.exports = (_options = {}) => { doJob('setParameters', { params }) ); - const recognize = (image, opts = {}) => ( - doJob('recognize', { image, options: opts }) - ); + const recognize = async (_image, opts = {}) => { + const image = await loadImage(_image); + return doJob('recognize', { image, options: opts }); + }; const getPDF = (title = 'Tesseract OCR Result', textonly = false) => ( doJob('getPDF', { title, textonly }) ); - const detect = image => ( - doJob('detect', { image }) - ); + const detect = async (_image) => { + const image = await loadImage(_image); + return doJob('detect', { image }); + }; const terminate = async () => { if (worker !== null) { diff --git a/src/worker/browser/index.js b/src/worker/browser/index.js index 0cecfbc..3c02f92 100644 --- a/src/worker/browser/index.js +++ b/src/worker/browser/index.js @@ -12,6 +12,7 @@ const spawnWorker = require('./spawnWorker'); const terminateWorker = require('./terminateWorker'); const onMessage = require('./onMessage'); const send = require('./send'); +const loadImage = require('./loadImage'); module.exports = { defaultOptions, @@ -19,4 +20,5 @@ module.exports = { terminateWorker, onMessage, send, + loadImage, }; diff --git a/src/worker/browser/loadImage.js b/src/worker/browser/loadImage.js new file mode 100644 index 0000000..a57b4f4 --- /dev/null +++ b/src/worker/browser/loadImage.js @@ -0,0 +1,79 @@ +const axios = require('axios'); +const resolveURL = require('resolve-url'); + +/** + * readFromBlobOrFile + * + * @name readFromBlobOrFile + * @function + * @access private + * @param {object} blob A blob or file objec to read + * @param {function} res callback function after reading completes + */ +const readFromBlobOrFile = blob => ( + new Promise((resolve, reject) => { + const fileReader = new FileReader(); + fileReader.onload = () => { + resolve(fileReader.result); + }; + fileReader.onerror = ({ target: { error: { code } } }) => { + reject(Error(`File could not be read! Code=${code}`)); + }; + fileReader.readAsArrayBuffer(blob); + }) +); + +/** + * loadImage + * + * @name loadImage + * @function load image from different source + * @access private + * @param {string, object} image - image source, supported formats: + * string: URL string, can be relative path + * string: base64 image + * img HTMLElement: extract image source from src attribute + * video HTMLElement: extract image source from poster attribute + * canvas HTMLElement: extract image data by converting to Blob + * File instance: data from + * @returns {array} binary image in array format + */ +const loadImage = async (image) => { + let data = image; + if (typeof image === 'undefined') { + return 'undefined'; + } + + if (typeof image === 'string') { + // Base64 Image + if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { + data = atob(image.split(',')[1]) + .split('') + .map(c => c.charCodeAt(0)); + } else { + const { data: _data } = await axios.get(resolveURL(image), { responseType: 'arraybuffer' }); + data = _data; + } + } else if (image instanceof HTMLElement) { + if (image.tagName === 'IMG') { + data = loadImage(image.src); + } + if (image.tagName === 'VIDEO') { + data = loadImage(image.poster); + } + if (image.tagName === 'CANVAS') { + await new Promise((resolve) => { + image.toBlob(async (blob) => { + data = await readFromBlobOrFile(blob); + resolve(); + }); + }); + } + } else if (image instanceof File || image instanceof Blob) { + data = await readFromBlobOrFile(image); + } + + return new Uint8Array(data); +}; + +module.exports = loadImage; diff --git a/src/worker/browser/send.js b/src/worker/browser/send.js index 94f75b2..de7ed94 100644 --- a/src/worker/browser/send.js +++ b/src/worker/browser/send.js @@ -1,92 +1,12 @@ -const axios = require('axios'); -const resolveURL = require('resolve-url'); - /** - * readFromBlobOrFile + * send * - * @name readFromBlobOrFile - * @function - * @access private - * @param {object} blob A blob or file objec to read - * @param {function} res callback function after reading completes - */ -const readFromBlobOrFile = blob => ( - new Promise((resolve, reject) => { - const fileReader = new FileReader(); - fileReader.onload = () => { - resolve(fileReader.result); - }; - fileReader.onerror = ({ target: { error: { code } } }) => { - reject(Error(`File could not be read! Code=${code}`)); - }; - fileReader.readAsArrayBuffer(blob); - }) -); - -/** - * loadImage - * - * @name loadImage - * @function load image from different source - * @access private - * @param {string, object} image - image source, supported formats: - * string: URL string, can be relative path - * string: base64 image - * img HTMLElement: extract image source from src attribute - * video HTMLElement: extract image source from poster attribute - * canvas HTMLElement: extract image data by converting to Blob - * File instance: data from - * @returns {array} binary image in array format - */ -const loadImage = async (image) => { - let data = image; - if (typeof image === 'undefined') { - return 'undefined'; - } - - if (typeof image === 'string') { - // Base64 Image - if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { - data = atob(image.split(',')[1]) - .split('') - .map(c => c.charCodeAt(0)); - } else { - const { data: _data } = await axios.get(resolveURL(image), { responseType: 'arraybuffer' }); - data = _data; - } - } else if (image instanceof HTMLElement) { - if (image.tagName === 'IMG') { - data = loadImage(image.src); - } - if (image.tagName === 'VIDEO') { - data = loadImage(image.poster); - } - if (image.tagName === 'CANVAS') { - await new Promise((resolve) => { - image.toBlob(async (blob) => { - data = await readFromBlobOrFile(blob); - resolve(); - }); - }); - } - } else if (image instanceof File || image instanceof Blob) { - data = await readFromBlobOrFile(image); - } - - return new Uint8Array(data); -}; - -/** - * sendPacket - * - * @name sendPacket + * @name send * @function send packet to worker and create a job * @access public * @param {object} instance TesseractWorker instance * @param {object} iPacket data for worker */ -module.exports = async (worker, _packet) => { - const packet = { ..._packet }; - packet.payload.image = await loadImage(packet.payload.image); +module.exports = async (worker, packet) => { worker.postMessage(packet); }; diff --git a/src/worker/node/index.js b/src/worker/node/index.js index cedf1f5..f60b01b 100644 --- a/src/worker/node/index.js +++ b/src/worker/node/index.js @@ -12,6 +12,7 @@ const spawnWorker = require('./spawnWorker'); const terminateWorker = require('./terminateWorker'); const onMessage = require('./onMessage'); const send = require('./send'); +const loadImage = require('./loadImage'); module.exports = { defaultOptions, @@ -19,4 +20,5 @@ module.exports = { terminateWorker, onMessage, send, + loadImage, }; diff --git a/src/worker/node/loadImage.js b/src/worker/node/loadImage.js new file mode 100644 index 0000000..d4bc77f --- /dev/null +++ b/src/worker/node/loadImage.js @@ -0,0 +1,40 @@ +const util = require('util'); +const fs = require('fs'); +const axios = require('axios'); +const isURL = require('is-url'); + +const readFile = util.promisify(fs.readFile); + +/** + * loadImage + * + * @name loadImage + * @function load image from different source + * @access public + * @param {string} image - image source, supported formats: + * string: URL string or file path + * string: base64 image + * buffer: image buffer + * @returns {array} binary image in array format + */ +module.exports = async (image) => { + let data = image; + if (typeof image === 'undefined') { + return image; + } + + if (typeof image === 'string') { + if (isURL(image) || image.startsWith('chrome-extension://') || image.startsWith('file://')) { + const { data: _data } = await axios.get(image, { responseType: 'arraybuffer' }); + data = _data; + } else if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { + data = Buffer.from(image.split(',')[1], 'base64'); + } else { + data = await readFile(image); + } + } else if (Buffer.isBuffer(image)) { + data = image; + } + + return new Uint8Array(data); +}; diff --git a/src/worker/node/send.js b/src/worker/node/send.js index 476a817..da701d2 100644 --- a/src/worker/node/send.js +++ b/src/worker/node/send.js @@ -1,45 +1,3 @@ -const util = require('util'); -const fs = require('fs'); -const axios = require('axios'); -const isURL = require('is-url'); - -const readFile = util.promisify(fs.readFile); - -/** - * loadImage - * - * @name loadImage - * @function load image from different source - * @access public - * @param {string} image - image source, supported formats: - * string: URL string or file path - * string: base64 image - * buffer: image buffer - * @returns {array} binary image in array format - */ -const loadImage = async (image) => { - let data = image; - if (typeof image === 'undefined') { - return image; - } - - if (typeof image === 'string') { - if (isURL(image) || image.startsWith('chrome-extension://') || image.startsWith('file://')) { - const { data: _data } = await axios.get(image, { responseType: 'arraybuffer' }); - data = _data; - } else if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { - data = Buffer.from(image.split(',')[1], 'base64'); - } else { - data = await readFile(image); - } - } else if (Buffer.isBuffer(image)) { - data = image; - } - - return new Uint8Array(data); -}; - - /** * send * @@ -49,8 +7,6 @@ const loadImage = async (image) => { * @param {object} instance TesseractWorker instance * @param {object} iPacket data for worker */ -module.exports = async (worker, _packet) => { - const packet = { ..._packet }; - packet.payload.image = await loadImage(packet.payload.image); +module.exports = (worker, packet) => { worker.send(packet); };