Jerome Wu
5 years ago
46 changed files with 632 additions and 998 deletions
@ -1,163 +0,0 @@
@@ -1,163 +0,0 @@
|
||||
/** |
||||
* |
||||
* The job exectued by worker, each job is basically a recognition of an image. |
||||
* |
||||
* @fileoverview Job excuted by Worker |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
const adapter = require('../node/'); |
||||
|
||||
/** A global job counter as part of job id */ |
||||
let jobCounter = 0; |
||||
|
||||
class TesseractJob { |
||||
/** |
||||
* constructor |
||||
* |
||||
* @name constructor |
||||
* @function initial a TesseractJob |
||||
* @access public |
||||
* @param {object} worker - An instance of TesseractWorker |
||||
*/ |
||||
constructor(worker) { |
||||
jobCounter += 1; |
||||
this.id = `Job-${jobCounter}-${Math.random().toString(16).slice(3, 8)}`; |
||||
|
||||
this._worker = worker; |
||||
|
||||
/** |
||||
* As all the callback functions are saved in an array. |
||||
* Basically you can register more than callback function |
||||
* for then, catch, progress and finally. |
||||
*/ |
||||
this._resolve = []; |
||||
this._reject = []; |
||||
this._progress = []; |
||||
this._finally = []; |
||||
} |
||||
|
||||
/** |
||||
* then |
||||
* |
||||
* @name then |
||||
* @function A function to chain like Promise |
||||
* @access public |
||||
* @param {function} resolve - called when the job succeeds |
||||
* @param {function} reject - called when the job fails |
||||
*/ |
||||
then(resolve, reject) { |
||||
return new Promise((res, rej) => { |
||||
if (!this._resolve.push) { |
||||
res(this._result); |
||||
} else { |
||||
this._resolve.push(res); |
||||
} |
||||
this.catch(rej); |
||||
}).then(resolve, reject); |
||||
} |
||||
|
||||
/** |
||||
* catch |
||||
* |
||||
* @name catch |
||||
* @function register a function to call when there is an error |
||||
* @access public |
||||
* @param {function} reject - callback function for error |
||||
*/ |
||||
catch(reject) { |
||||
if (this._reject.push) { |
||||
this._reject.push(reject); |
||||
} else { |
||||
reject(this._reject); |
||||
} |
||||
return this; |
||||
} |
||||
|
||||
/** |
||||
* progress |
||||
* |
||||
* @name progress |
||||
* @function register a function to show progress of the recognition, |
||||
* use res.progress to print the message |
||||
* @access public |
||||
* @param {function} fn - callback function for progress information |
||||
*/ |
||||
progress(fn) { |
||||
this._progress.push(fn); |
||||
return this; |
||||
} |
||||
|
||||
/** |
||||
* finally |
||||
* |
||||
* @name finally |
||||
* @function registry a callback function for final |
||||
* @access public |
||||
* @param {function} fn - callback function for final |
||||
*/ |
||||
finally(fn) { |
||||
this._finally.push(fn); |
||||
return this; |
||||
} |
||||
|
||||
/** |
||||
* send |
||||
* |
||||
* @name send |
||||
* @function send specific action with payload a worker |
||||
* @access public |
||||
* @param {string} action - action to trigger, should be "recognize" or "detect" |
||||
* @param {object} payload - data to be consumed |
||||
*/ |
||||
send(action, payload) { |
||||
adapter.sendPacket(this._worker, { |
||||
jobId: this.id, |
||||
action, |
||||
payload, |
||||
}); |
||||
} |
||||
|
||||
/** |
||||
* handle |
||||
* |
||||
* @name handle |
||||
* @function execute packet action |
||||
* @access public |
||||
* @param {object} packet action and payload to handle |
||||
*/ |
||||
handle(packet) { |
||||
const { data } = packet; |
||||
let runFinallyCbs = false; |
||||
|
||||
if (packet.status === 'resolve') { |
||||
if (this._resolve.length === 0) console.log(data); |
||||
this._resolve.forEach((fn) => { |
||||
const ret = fn(data); |
||||
if (ret && typeof ret.then === 'function') { |
||||
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.'); |
||||
} |
||||
}); |
||||
this._resolve = data; |
||||
this._worker.dequeue(); |
||||
runFinallyCbs = true; |
||||
} else if (packet.status === 'reject') { |
||||
if (this._reject.length === 0) console.error(data); |
||||
this._reject.forEach(fn => fn(data)); |
||||
this._reject = data; |
||||
this._worker.dequeue(); |
||||
runFinallyCbs = true; |
||||
} else if (packet.status === 'progress') { |
||||
this._progress.forEach(fn => fn(data)); |
||||
} else { |
||||
console.warn('Message type unknown', packet.status); |
||||
} |
||||
|
||||
if (runFinallyCbs) { |
||||
this._finally.forEach(fn => fn(data)); |
||||
} |
||||
} |
||||
} |
||||
|
||||
module.exports = TesseractJob; |
@ -1,196 +0,0 @@
@@ -1,196 +0,0 @@
|
||||
/** |
||||
* |
||||
* The core part of tesseract.js to execute the OCR jobs. |
||||
* |
||||
* @fileoverview Worker for OCR jobs |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
const check = require('check-types'); |
||||
const resolveURL = (typeof window !== 'undefined' && typeof window.document !== 'undefined') ? require('resolve-url') : s => s; |
||||
const adapter = require('../node'); |
||||
const circularize = require('./circularize'); |
||||
const TesseractJob = require('./TesseractJob'); |
||||
|
||||
/** |
||||
* TesseractWorker |
||||
* @name TesseractWorker |
||||
* @function execute TesseractJob with a queue mechanism |
||||
* @access public |
||||
*/ |
||||
class TesseractWorker { |
||||
/** |
||||
* constructor |
||||
* |
||||
* @name constructor |
||||
* @function initialize the worker |
||||
* @access public |
||||
* @param {object} options - worker configurations |
||||
* @param {string} options.workerPath - |
||||
* A remote path to load worker script. |
||||
* In browser-like environment, it is downloaded from a CDN service. |
||||
* Please update this option if you self-host the worker script. |
||||
* In Node.js environment, this option is not used as the worker script is in local. |
||||
* @param {boolean} [options.workerBlobURL=true] - Use a blob: URL for the worker script |
||||
* @param {string} options.corePath - |
||||
* A remote path to load tesseract.js-core script. |
||||
* In browser-like environment, it is downloaded from a CDN service. |
||||
* Please update this option if you self-host the core script. |
||||
* In Node.js environment, this option is not used as the core script is in local. |
||||
* @param {string} options.langPath - |
||||
* A remote path to load *.traineddata.gz, it is download from a CDN service. |
||||
* Please update this option if you self-host the worker script. |
||||
* @param {string} [options.cachePath=.] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js}
|
||||
* @param {string} [options.cacheMethod=write] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js}
|
||||
* @param {string} [options.dataPath=.] - @see {@link https://github.com/jeromewu/tesseract.js-utils/blob/master/src/loadLang.js}
|
||||
* |
||||
*/ |
||||
constructor(options = {}) { |
||||
this.worker = null; |
||||
this.options = { |
||||
...adapter.defaultOptions, |
||||
...options, |
||||
}; |
||||
['corePath', 'workerPath', 'langPath'].forEach((key) => { |
||||
if (check.not.undefined(options[key])) { |
||||
this.options = { ...this.options, [key]: resolveURL(options[key]) }; |
||||
} |
||||
}); |
||||
this._currentJob = null; |
||||
this._queue = []; |
||||
} |
||||
|
||||
/** |
||||
* recognize |
||||
* |
||||
* @name recognize |
||||
* @function recognize text in given image |
||||
* @access public |
||||
* @param {Buffer, string} image - image to be recognized |
||||
* @param {string, array} [langs=eng] - languages to recognize |
||||
* @param {object} params - tesseract parameters |
||||
* |
||||
*/ |
||||
recognize(image, langs = 'eng', params = {}) { |
||||
return this._sendJob('recognize', image, langs, params); |
||||
} |
||||
|
||||
/** |
||||
* detect |
||||
* |
||||
* @name detect |
||||
* @function detect language of the text in the image |
||||
* @access public |
||||
* @param {Buffer, string} image - image to be recognized |
||||
* @param {object} params - tesseract parameters |
||||
* |
||||
*/ |
||||
detect(image, params = {}) { |
||||
return this._sendJob('detect', image, 'osd', params); |
||||
} |
||||
|
||||
/** |
||||
* recv |
||||
* |
||||
* @name recv |
||||
* @function handle completed job |
||||
* @access public |
||||
* @param {object} packet job data |
||||
*/ |
||||
recv(packet) { |
||||
if (this._currentJob.id === packet.jobId) { |
||||
this._currentJob.handle({ |
||||
...packet, |
||||
data: packet.status === 'resolve' && packet.action === 'recognize' |
||||
? circularize(packet.data) |
||||
: packet.data, |
||||
}); |
||||
} else { |
||||
console.warn(`Job ID ${packet.jobId} not known.`); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* dequeue |
||||
* |
||||
* @name dequeue |
||||
* @function dequeue and execute the rear job |
||||
* @access public |
||||
*/ |
||||
dequeue() { |
||||
this._currentJob = null; |
||||
if (this._queue.length) { |
||||
this._queue[0](); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* terminate |
||||
* |
||||
* @name terminate |
||||
* @function terminate the worker |
||||
* @access public |
||||
* |
||||
*/ |
||||
terminate() { |
||||
if (this.worker) { |
||||
adapter.terminateWorker(this); |
||||
} |
||||
this.worker = null; |
||||
this._currentJob = null; |
||||
this._queue = []; |
||||
} |
||||
|
||||
/** |
||||
* _sendJob |
||||
* |
||||
* @name _sendJob |
||||
* @function append a new job to the job queue |
||||
* @access private |
||||
* @param {string} type job type, should be recognize or detect |
||||
* @param {Buffer, string} image image to recognize |
||||
* @param {string} lang language to recognize |
||||
* @param {object} params tesseract parameters |
||||
*/ |
||||
_sendJob(type, image, langs, params) { |
||||
return this._delay((job) => { |
||||
job.send( |
||||
type, |
||||
{ |
||||
image, |
||||
langs, |
||||
params, |
||||
options: this.options, |
||||
}, |
||||
); |
||||
}); |
||||
} |
||||
|
||||
/** |
||||
* _delay |
||||
* |
||||
* @name _delay |
||||
* @function delays the fn to execute until it is on the rear of the queue |
||||
* @access private |
||||
* @param {function} fn A handler function for the job |
||||
*/ |
||||
_delay(fn) { |
||||
if (check.null(this.worker)) { |
||||
this.worker = adapter.spawnWorker(this, this.options); |
||||
} |
||||
|
||||
const job = new TesseractJob(this); |
||||
this._queue.push(() => { |
||||
this._queue.shift(); |
||||
this._currentJob = job; |
||||
fn(job); |
||||
}); |
||||
if (check.null(this._currentJob)) { |
||||
this.dequeue(); |
||||
} |
||||
return job; |
||||
} |
||||
} |
||||
|
||||
module.exports = TesseractWorker; |
@ -1,86 +0,0 @@
@@ -1,86 +0,0 @@
|
||||
/** |
||||
* The result of dump.js is a big JSON tree |
||||
* which can be easily serialized (for instance |
||||
* to be sent from a webworker to the main app |
||||
* or through Node's IPC), but we want |
||||
* a (circular) DOM-like interface for walking |
||||
* through the data. |
||||
* |
||||
* @fileoverview DOM-like interface for walking through data |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
|
||||
module.exports = (iPage) => { |
||||
const page = { |
||||
...iPage, |
||||
paragraphs: [], |
||||
lines: [], |
||||
words: [], |
||||
symbols: [], |
||||
}; |
||||
|
||||
page.blocks.forEach((iBlock) => { |
||||
const block = { |
||||
...iBlock, |
||||
page, |
||||
lines: [], |
||||
words: [], |
||||
symbols: [], |
||||
}; |
||||
|
||||
block.paragraphs.forEach((iPara) => { |
||||
const para = { |
||||
...iPara, |
||||
block, |
||||
page, |
||||
words: [], |
||||
symbols: [], |
||||
}; |
||||
|
||||
para.lines.forEach((iLine) => { |
||||
const line = { |
||||
...iLine, |
||||
paragraph: para, |
||||
block, |
||||
page, |
||||
symbols: [], |
||||
}; |
||||
|
||||
line.words.forEach((iWord) => { |
||||
const word = { |
||||
...iWord, |
||||
line, |
||||
paragraph: para, |
||||
block, |
||||
page, |
||||
}; |
||||
|
||||
word.symbols.forEach((iSym) => { |
||||
const sym = { |
||||
...iSym, |
||||
word, |
||||
line, |
||||
paragraph: para, |
||||
block, |
||||
page, |
||||
}; |
||||
|
||||
sym.line.symbols.push(sym); |
||||
sym.paragraph.symbols.push(sym); |
||||
sym.block.symbols.push(sym); |
||||
sym.page.symbols.push(sym); |
||||
}); |
||||
word.paragraph.words.push(word); |
||||
word.block.words.push(word); |
||||
word.page.words.push(word); |
||||
}); |
||||
line.block.lines.push(line); |
||||
line.page.lines.push(line); |
||||
}); |
||||
para.page.paragraphs.push(para); |
||||
}); |
||||
}); |
||||
return page; |
||||
}; |
@ -1,129 +0,0 @@
@@ -1,129 +0,0 @@
|
||||
const { isBrowser } = require('./env'); |
||||
const resolveURL = isBrowser ? require('resolve-url') : s => s; // eslint-disable-line
|
||||
const circularize = require('./circularize'); |
||||
const createJob = require('./createJob'); |
||||
const { defaultParams } = require('./options'); |
||||
const { |
||||
defaultOptions, |
||||
spawnWorker, |
||||
terminateWorker, |
||||
setOnMessage, |
||||
} = require('../node'); |
||||
|
||||
let workerCounter = 0; |
||||
|
||||
const resolvePaths = (options) => { |
||||
const opts = { ...options }; |
||||
['corePath', 'workerPath', 'langPath'].forEach((key) => { |
||||
if (typeof options[key] !== 'undefined') { |
||||
opts[key] = resolveURL(opts[key]); |
||||
} |
||||
}); |
||||
return opts; |
||||
}; |
||||
|
||||
module.exports = (options = {}) => { |
||||
workerCounter += 1; |
||||
const id = `Worker-${workerCounter}-${Math.random().toString(16).slice(3, 8)}`; |
||||
const opts = resolvePaths({ |
||||
...defaultOptions, |
||||
...options, |
||||
}); |
||||
const { logger } = opts; |
||||
const resolves = {}; |
||||
const rejects = {}; |
||||
let worker = spawnWorker(opts); |
||||
|
||||
const setResolve = (action, res) => { |
||||
resolves[action] = res; |
||||
}; |
||||
|
||||
const setReject = (action, rej) => { |
||||
rejects[action] = rej; |
||||
}; |
||||
|
||||
const load = () => ( |
||||
new Promise((resolve, reject) => { |
||||
const job = createJob( |
||||
'load', |
||||
opts, |
||||
); |
||||
setResolve('load', resolve); |
||||
setReject('load', reject); |
||||
job.start({ worker, id }); |
||||
}) |
||||
); |
||||
|
||||
const loadLanguage = (langs = 'eng') => ( |
||||
new Promise((resolve, reject) => { |
||||
const job = createJob( |
||||
'load-language', |
||||
{ |
||||
langs, |
||||
options: opts, |
||||
}, |
||||
); |
||||
setResolve('load-language', resolve); |
||||
setReject('load-language', reject); |
||||
job.start({ worker, id }); |
||||
}) |
||||
); |
||||
|
||||
const initialize = (langs = 'eng', params = {}) => ( |
||||
new Promise((resolve, reject) => { |
||||
const job = createJob( |
||||
'initialize', |
||||
{ |
||||
langs, |
||||
params: { |
||||
...defaultParams, |
||||
...params, |
||||
}, |
||||
}, |
||||
); |
||||
setResolve('initialize', resolve); |
||||
setReject('initialize', reject); |
||||
job.start({ worker, id }); |
||||
}) |
||||
); |
||||
|
||||
const terminate = () => { |
||||
if (worker !== null) { |
||||
terminateWorker({ worker }); |
||||
worker = null; |
||||
} |
||||
}; |
||||
|
||||
setOnMessage(worker, (packet) => { |
||||
const { status, action, data } = packet; |
||||
if (status === 'resolve') { |
||||
if (action === 'load') { |
||||
resolves.load(data); |
||||
} else if (action === 'initialize') { |
||||
resolves.initialize({ id }); |
||||
} else if (action === 'load-language') { |
||||
resolves['load-language'](data); |
||||
} else if (action === 'recognize') { |
||||
resolves.recognize(circularize(data)); |
||||
} else if (action === 'detect') { |
||||
resolves.detect(data); |
||||
} |
||||
} else if (status === 'reject') { |
||||
rejects[action](data); |
||||
throw Error(data); |
||||
} else if (status === 'progress') { |
||||
logger(data); |
||||
} |
||||
}); |
||||
|
||||
return { |
||||
id, |
||||
worker, |
||||
setResolve, |
||||
setReject, |
||||
load, |
||||
loadLanguage, |
||||
initialize, |
||||
terminate, |
||||
}; |
||||
}; |
@ -1 +0,0 @@
@@ -1 +0,0 @@
|
||||
exports.isBrowser = (typeof window !== 'undefined') && (typeof window.document !== 'undefined'); |
@ -1,40 +0,0 @@
@@ -1,40 +0,0 @@
|
||||
const { OEM, PSM } = require('./types'); |
||||
|
||||
module.exports = { |
||||
defaultOptions: { |
||||
/* |
||||
* default path for downloading *.traineddata, this URL basically |
||||
* points to a github page, not using jsDelivr as there is is limitation |
||||
* of 20 MB. |
||||
*/ |
||||
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
||||
/* |
||||
* Use BlobURL for worker script by default |
||||
*/ |
||||
workerBlobURL: true, |
||||
logger: () => {}, |
||||
}, |
||||
/* |
||||
* default params for recognize() |
||||
*/ |
||||
defaultParams: { |
||||
tessedit_ocr_engine_mode: OEM.LSTM_ONLY, |
||||
tessedit_pageseg_mode: PSM.SINGLE_BLOCK, |
||||
tessedit_char_whiltelist: '', |
||||
tessjs_create_pdf: '0', |
||||
tessjs_create_hocr: '1', |
||||
tessjs_create_tsv: '1', |
||||
tessjs_create_box: '0', |
||||
tessjs_create_unlv: '0', |
||||
tessjs_create_osd: '0', |
||||
tessjs_textonly_pdf: '0', |
||||
tessjs_pdf_name: 'tesseract.js-ocr-result', |
||||
tessjs_pdf_title: 'Tesseract.js OCR Result', |
||||
tessjs_pdf_auto_download: true, |
||||
tessjs_pdf_bin: false, |
||||
tessjs_image_rectangle_left: 0, |
||||
tessjs_image_rectangle_top: 0, |
||||
tessjs_image_rectangle_width: -1, |
||||
tessjs_image_rectangle_height: -1, |
||||
}, |
||||
}; |
@ -1 +0,0 @@
@@ -1 +0,0 @@
|
||||
module.exports = 'AAEAAAAKAIAAAwAgT1MvMlbeyJQAAAEoAAAAYGNtYXAACgA0AAABkAAAAB5nbHlmFSJBJAAAAbgAAAAYaGVhZAt48WUAAACsAAAANmhoZWEMAgQCAAAA5AAAACRobXR4BAAAAAAAAYgAAAAIbG9jYQAMAAAAAAGwAAAABm1heHAABAAFAAABCAAAACBuYW1l8usW2gAAAdAAAABLcG9zdAABAAEAAAIcAAAAIAABAAAAAQAAsJRxEF8PPPUEBwgAAAAAAM+a/G4AAAAA1MOn8gAAAAAEAAgAAAAAEAACAAAAAAAAAAEAAAgA//8AAAQAAAAAAAQAAAEAAAAAAAAAAAAAAAAAAAACAAEAAAACAAQAAQAAAAAAAQAAAAAAAAAAAAAAAAAAAAAAAwAAAZAABQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUAAQABAAAAAAAAAAAAAAAAAAAAAAAAAAAAR09PRwBAAAAAAAAB//8AAAABAAGAAAAAAAAAAAAAAAAAAAABAAAAAAAABAAAAAAAAAIAAQAAAAAAFAADAAAAAAAUAAYACgAAAAAAAAAAAAAAAAAMAAAAAQAAAAAEAAgAAAMAADEhESEEAPwACAAAAAADACoAAAADAAAABQAWAAAAAQAAAAAABQALABYAAwABBAkABQAWAAAAVgBlAHIAcwBpAG8AbgAgADEALgAwVmVyc2lvbiAxLjAAAAEAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAA='; |
@ -1,34 +0,0 @@
@@ -1,34 +0,0 @@
|
||||
module.exports = { |
||||
/* |
||||
* OEM = OCR Engine Mode, and there are 5 possible modes. |
||||
* |
||||
* By default tesseract.js uses TESSERACT_LSTM_COMBINED mode, which uses LSTM when possible. |
||||
* If you need to use some tesseract v3 features (like tessedit_char_whitelist), |
||||
* you need to use TESSERACT_ONLY mode. |
||||
* |
||||
*/ |
||||
OEM: { |
||||
TESSERACT_ONLY: 0, |
||||
LSTM_ONLY: 1, |
||||
TESSERACT_LSTM_COMBINED: 2, |
||||
DEFAULT: 3, |
||||
}, |
||||
/* |
||||
* PSM = Page Segmentation Mode |
||||
*/ |
||||
PSM: { |
||||
OSD_ONLY: '0', |
||||
AUTO_OSD: '1', |
||||
AUTO_ONLY: '2', |
||||
AUTO: '3', |
||||
SINGLE_COLUMN: '4', |
||||
SINGLE_BLOCK_VERT_TEXT: '5', |
||||
SINGLE_BLOCK: '6', |
||||
SINGLE_LINE: '7', |
||||
SINGLE_WORD: '8', |
||||
SINGLE_CHAR: '9', |
||||
SPARSE_TEXT: '10', |
||||
SPARSE_TEXT_OSD: '11', |
||||
RAW_LINE: '12', |
||||
}, |
||||
}; |
@ -1,87 +0,0 @@
@@ -1,87 +0,0 @@
|
||||
const { readImage } = require('tesseract.js-utils'); |
||||
|
||||
/** |
||||
* setImage |
||||
* |
||||
* @name setImage |
||||
* @function set image in tesseract for recognition |
||||
* @access public |
||||
* @param {array} image - binary array in array format |
||||
* @returns {number} - an emscripten pointer of the image |
||||
*/ |
||||
exports.setImage = (TessModule, api, image, params) => { |
||||
const { |
||||
tessjs_image_rectangle_left: left, |
||||
tessjs_image_rectangle_top: top, |
||||
tessjs_image_rectangle_width: width, |
||||
tessjs_image_rectangle_height: height, |
||||
} = params; |
||||
const { |
||||
w, h, bytesPerPixel, data, pix, |
||||
} = readImage(TessModule, Array.from(image)); |
||||
|
||||
/* |
||||
* As some image format (ex. bmp) is not supported natiely by tesseract, |
||||
* sometimes it will not return pix directly, but data and bytesPerPixel |
||||
* for another SetImage usage. |
||||
* |
||||
*/ |
||||
if (data === null) { |
||||
api.SetImage(pix); |
||||
} else { |
||||
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel); |
||||
} |
||||
api.SetRectangle( |
||||
(left < 0) ? 0 : left, |
||||
(top < 0) ? 0 : top, |
||||
(width < 0) ? w : width, |
||||
(height < 0) ? h : height, |
||||
); |
||||
return data === null ? pix : data; |
||||
}; |
||||
|
||||
exports.getLangsStr = langs => ( |
||||
typeof langs === 'string' |
||||
? langs |
||||
: langs.map(lang => (typeof lang === 'string' ? lang : lang.data)).join('+') |
||||
); |
||||
|
||||
/** |
||||
* handleOutput |
||||
* |
||||
* @name handleOutput |
||||
* @function handle file output |
||||
* @access private |
||||
* @param {object} customParams - an object of params |
||||
*/ |
||||
exports.getFiles = (TessModule, api, adapter, params) => { |
||||
let files = {}; |
||||
const { |
||||
tessjs_create_pdf, |
||||
tessjs_textonly_pdf, |
||||
tessjs_pdf_name, |
||||
tessjs_pdf_title, |
||||
tessjs_pdf_auto_download, |
||||
tessjs_pdf_bin, |
||||
} = params; |
||||
|
||||
if (tessjs_create_pdf === '1') { |
||||
const pdfRenderer = new TessModule.TessPDFRenderer(tessjs_pdf_name, '/', tessjs_textonly_pdf === '1'); |
||||
pdfRenderer.BeginDocument(tessjs_pdf_title); |
||||
pdfRenderer.AddImage(api); |
||||
pdfRenderer.EndDocument(); |
||||
TessModule._free(pdfRenderer); |
||||
|
||||
const data = TessModule.FS.readFile(`/${tessjs_pdf_name}.pdf`); |
||||
|
||||
if (tessjs_pdf_bin) { |
||||
files = { pdf: data, ...files }; |
||||
} |
||||
|
||||
if (tessjs_pdf_auto_download) { |
||||
adapter.writeFile(`${tessjs_pdf_name}.pdf`, data, 'application/pdf'); |
||||
} |
||||
} |
||||
|
||||
return files; |
||||
}; |
@ -0,0 +1,12 @@
@@ -0,0 +1,12 @@
|
||||
/* |
||||
* OEM = OCR Engine Mode, and there are 4 possible modes. |
||||
* |
||||
* By default tesseract.js uses LSTM_ONLY mode. |
||||
* |
||||
*/ |
||||
module.exports = { |
||||
TESSERACT_ONLY: 0, |
||||
LSTM_ONLY: 1, |
||||
TESSERACT_LSTM_COMBINED: 2, |
||||
DEFAULT: 3, |
||||
}; |
@ -0,0 +1,18 @@
@@ -0,0 +1,18 @@
|
||||
/* |
||||
* PSM = Page Segmentation Mode |
||||
*/ |
||||
module.exports = { |
||||
OSD_ONLY: '0', |
||||
AUTO_OSD: '1', |
||||
AUTO_ONLY: '2', |
||||
AUTO: '3', |
||||
SINGLE_COLUMN: '4', |
||||
SINGLE_BLOCK_VERT_TEXT: '5', |
||||
SINGLE_BLOCK: '6', |
||||
SINGLE_LINE: '7', |
||||
SINGLE_WORD: '8', |
||||
CIRCLE_WORD: '9', |
||||
SINGLE_CHAR: '10', |
||||
SPARSE_TEXT: '11', |
||||
SPARSE_TEXT_OSD: '12', |
||||
}; |
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
const OEM = require('./OEM'); |
||||
|
||||
module.exports = { |
||||
defaultOEM: OEM.DEFAULT, |
||||
}; |
@ -0,0 +1,13 @@
@@ -0,0 +1,13 @@
|
||||
module.exports = { |
||||
/* |
||||
* default path for downloading *.traineddata |
||||
*/ |
||||
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
||||
/* |
||||
* Use BlobURL for worker script by default |
||||
* TODO: remove this option |
||||
* |
||||
*/ |
||||
workerBlobURL: true, |
||||
logger: () => {}, |
||||
}; |
@ -0,0 +1,100 @@
@@ -0,0 +1,100 @@
|
||||
const resolvePaths = require('./utils/resolvePaths'); |
||||
const circularize = require('./utils/circularize'); |
||||
const createJob = require('./createJob'); |
||||
const { defaultOEM } = require('./constants/config'); |
||||
const { |
||||
defaultOptions, |
||||
spawnWorker, |
||||
terminateWorker, |
||||
onMessage, |
||||
} = require('./worker/node'); |
||||
|
||||
let workerCounter = 0; |
||||
|
||||
module.exports = (_options = {}) => { |
||||
workerCounter += 1; |
||||
const id = `Worker-${workerCounter}-${Math.random().toString(16).slice(3, 8)}`; |
||||
const options = resolvePaths({ |
||||
...defaultOptions, |
||||
..._options, |
||||
}); |
||||
const { logger } = options; |
||||
const resolves = {}; |
||||
const rejects = {}; |
||||
let worker = spawnWorker(options); |
||||
|
||||
const setResolve = (action, res) => { |
||||
resolves[action] = res; |
||||
}; |
||||
|
||||
const setReject = (action, rej) => { |
||||
rejects[action] = rej; |
||||
}; |
||||
|
||||
const doJob = (action, payload) => ( |
||||
new Promise((resolve, reject) => { |
||||
setResolve(action, resolve); |
||||
setReject(action, reject); |
||||
createJob(action, payload).start({ worker, id }); |
||||
}) |
||||
); |
||||
|
||||
const load = () => ( |
||||
doJob('load', { options }) |
||||
); |
||||
|
||||
const loadLanguage = (langs = 'eng') => ( |
||||
doJob('load-language', { langs, options }) |
||||
); |
||||
|
||||
const initialize = (langs = 'eng', oem = defaultOEM) => ( |
||||
doJob('initialize', { langs, oem }) |
||||
); |
||||
|
||||
const setParameters = (params = {}) => ( |
||||
doJob('set-parameters', { params }) |
||||
); |
||||
|
||||
const terminate = () => { |
||||
if (worker !== null) { |
||||
terminateWorker(worker); |
||||
worker = null; |
||||
} |
||||
}; |
||||
|
||||
onMessage(worker, (packet) => { |
||||
const { status, action, data } = packet; |
||||
if (status === 'resolve') { |
||||
if (action === 'load') { |
||||
resolves.load(data); |
||||
} else if (action === 'initialize') { |
||||
resolves.initialize({ id }); |
||||
} else if (action === 'set-parameters') { |
||||
resolves['set-parameters'](data); |
||||
} else if (action === 'load-language') { |
||||
resolves['load-language'](data); |
||||
} else if (action === 'recognize') { |
||||
resolves.recognize(circularize(data)); |
||||
} else if (action === 'detect') { |
||||
resolves.detect(data); |
||||
} |
||||
} else if (status === 'reject') { |
||||
rejects[action](data); |
||||
throw Error(data); |
||||
} else if (status === 'progress') { |
||||
logger(data); |
||||
} |
||||
}); |
||||
|
||||
return { |
||||
id, |
||||
worker, |
||||
setResolve, |
||||
setReject, |
||||
load, |
||||
loadLanguage, |
||||
initialize, |
||||
setParameters, |
||||
terminate, |
||||
}; |
||||
}; |
@ -1 +0,0 @@
@@ -1 +0,0 @@
|
||||
module.exports = s => Buffer.from(s, 'base64'); |
@ -1,111 +0,0 @@
@@ -1,111 +0,0 @@
|
||||
/** |
||||
* |
||||
* Tesseract Worker adapter for node |
||||
* |
||||
* @fileoverview Tesseract Worker adapter for node |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
const util = require('util'); |
||||
const fs = require('fs'); |
||||
const axios = require('axios'); |
||||
const isURL = require('is-url'); |
||||
const { fork } = require('child_process'); |
||||
const path = require('path'); |
||||
const b64toU8Array = require('./b64toU8Array'); |
||||
const { defaultOptions } = require('../common/options'); |
||||
|
||||
const readFile = util.promisify(fs.readFile); |
||||
|
||||
/** |
||||
* loadImage |
||||
* |
||||
* @name loadImage |
||||
* @function load image from different source |
||||
* @access public |
||||
* @param {string} image - image source, supported formats: |
||||
* string: URL string or file path |
||||
* string: base64 image |
||||
* buffer: image buffer |
||||
* @returns {array} binary image in array format |
||||
*/ |
||||
const loadImage = (image) => { |
||||
if (isURL(image)) { |
||||
return axios.get(image, { |
||||
responseType: 'arraybuffer', |
||||
}) |
||||
.then(resp => resp.data); |
||||
} |
||||
|
||||
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { |
||||
return Promise.resolve(b64toU8Array(image.split(',')[1])); |
||||
} |
||||
|
||||
if (Buffer.isBuffer(image)) { |
||||
return Promise.resolve(image); |
||||
} |
||||
|
||||
return readFile(image); |
||||
}; |
||||
|
||||
/* |
||||
* Default options for node worker |
||||
*/ |
||||
exports.defaultOptions = { |
||||
...defaultOptions, |
||||
workerPath: path.join(__dirname, 'worker.js'), |
||||
}; |
||||
|
||||
/** |
||||
* spawnWorker |
||||
* |
||||
* @name spawnWorker |
||||
* @function fork a new process in node |
||||
* @access public |
||||
* @param {object} instance - TesseractWorker instance |
||||
* @param {object} options |
||||
* @param {string} options.workerPath - worker script path |
||||
*/ |
||||
exports.spawnWorker = ({ workerPath }) => ( |
||||
fork(workerPath) |
||||
); |
||||
|
||||
exports.setOnMessage = (worker, handler) => { |
||||
worker.on('message', handler); |
||||
}; |
||||
|
||||
/** |
||||
* terminateWorker |
||||
* |
||||
* @name terminateWorker |
||||
* @function kill worker |
||||
* @access public |
||||
* @param {object} instance TesseractWorker instance |
||||
*/ |
||||
exports.terminateWorker = ({ worker }) => { |
||||
worker.kill(); |
||||
}; |
||||
|
||||
/** |
||||
* sendPacket |
||||
* |
||||
* @name sendPacket |
||||
* @function send packet to worker and create a job |
||||
* @access public |
||||
* @param {object} instance TesseractWorker instance |
||||
* @param {object} iPacket data for worker |
||||
*/ |
||||
exports.sendPacket = ({ worker }, packet) => { |
||||
const p = { ...packet }; |
||||
if (['recognize', 'detect'].includes(p.action)) { |
||||
loadImage(p.payload.image) |
||||
.then(buf => new Uint8Array(buf)) |
||||
.then((img) => { |
||||
p.payload.image = Array.from(img); |
||||
worker.send(p); |
||||
}); |
||||
} else { |
||||
worker.send(p); |
||||
} |
||||
}; |
@ -1,44 +0,0 @@
@@ -1,44 +0,0 @@
|
||||
/** |
||||
* |
||||
* Node worker implementation |
||||
* |
||||
* @fileoverview Node worker implementation |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
|
||||
const check = require('check-types'); |
||||
const workerUtils = require('../common/workerUtils'); |
||||
const b64toU8Array = require('./b64toU8Array'); |
||||
|
||||
let TesseractCore = null; |
||||
|
||||
/* |
||||
* register message handler |
||||
*/ |
||||
process.on('message', (packet) => { |
||||
workerUtils.dispatchHandlers(packet, obj => process.send(obj)); |
||||
}); |
||||
|
||||
/* |
||||
* getCore is a sync function to load and return |
||||
* TesseractCore. |
||||
*/ |
||||
workerUtils.setAdapter({ |
||||
getCore: (corePath, res) => { |
||||
if (check.null(TesseractCore)) { |
||||
res.progress({ status: 'loading tesseract core', progress: 0 }); |
||||
TesseractCore = require('tesseract.js-core'); |
||||
res.progress({ status: 'loaded tesseract core', progress: 1 }); |
||||
} |
||||
return TesseractCore; |
||||
}, |
||||
b64toU8Array, |
||||
writeFile: (path, data) => { |
||||
const fs = require('fs'); |
||||
fs.writeFile(path, data, (err) => { |
||||
if (err) throw err; |
||||
}); |
||||
}, |
||||
}); |
@ -0,0 +1,57 @@
@@ -0,0 +1,57 @@
|
||||
/** |
||||
* In the recognition result of tesseract, there |
||||
* is a deep JSON object for details, it has around |
||||
* |
||||
* The result of dump.js is a big JSON tree |
||||
* which can be easily serialized (for instance |
||||
* to be sent from a webworker to the main app |
||||
* or through Node's IPC), but we want |
||||
* a (circular) DOM-like interface for walking |
||||
* through the data. |
||||
* |
||||
* A (circular) DOM-like interface here means that |
||||
* each child element |
||||
* |
||||
* @fileoverview DOM-like interface for walking through data |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
|
||||
module.exports = (page) => { |
||||
const blocks = []; |
||||
const paragraphs = []; |
||||
const lines = []; |
||||
const words = []; |
||||
const symbols = []; |
||||
|
||||
page.blocks.forEach((block) => { |
||||
block.paragraphs.forEach((paragraph) => { |
||||
paragraph.lines.forEach((line) => { |
||||
line.words.forEach((word) => { |
||||
word.symbols.forEach((sym) => { |
||||
symbols.push({ |
||||
...sym, page, block, paragraph, line, word, |
||||
}); |
||||
}); |
||||
words.push({ |
||||
...word, page, block, paragraph, line, |
||||
}); |
||||
}); |
||||
lines.push({ |
||||
...line, page, block, paragraph, |
||||
}); |
||||
}); |
||||
paragraphs.push({ |
||||
...paragraph, page, block, |
||||
}); |
||||
}); |
||||
blocks.push({ |
||||
...block, page, |
||||
}); |
||||
}); |
||||
|
||||
return { |
||||
...page, blocks, paragraphs, lines, words, symbols, |
||||
}; |
||||
}; |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
module.exports = (key) => { |
||||
const env = { |
||||
type: (typeof window !== 'undefined') && (typeof window.document !== 'undefined') ? 'browser' : 'node', |
||||
}; |
||||
|
||||
if (typeof key === 'undefined') { |
||||
return env; |
||||
} |
||||
return env[key]; |
||||
}; |
@ -0,0 +1,12 @@
@@ -0,0 +1,12 @@
|
||||
const isBrowser = require('./getEnvironment')('type') === 'browser'; |
||||
const resolveURL = isBrowser ? require('resolve-url') : s => s; // eslint-disable-line
|
||||
|
||||
module.exports = (options) => { |
||||
const opts = { ...options }; |
||||
['corePath', 'workerPath', 'langPath'].forEach((key) => { |
||||
if (typeof options[key] !== 'undefined') { |
||||
opts[key] = resolveURL(opts[key]); |
||||
} |
||||
}); |
||||
return opts; |
||||
}; |
@ -0,0 +1,24 @@
@@ -0,0 +1,24 @@
|
||||
/* |
||||
* default params for tesseract.js |
||||
*/ |
||||
const PSM = require('../../constants/PSM'); |
||||
|
||||
module.exports = { |
||||
tessedit_pageseg_mode: PSM.SINGLE_BLOCK, |
||||
tessedit_char_whiltelist: '', |
||||
tessjs_create_pdf: '0', |
||||
tessjs_create_hocr: '1', |
||||
tessjs_create_tsv: '1', |
||||
tessjs_create_box: '0', |
||||
tessjs_create_unlv: '0', |
||||
tessjs_create_osd: '0', |
||||
tessjs_textonly_pdf: '0', |
||||
tessjs_pdf_name: 'tesseract.js-ocr-result', |
||||
tessjs_pdf_title: 'Tesseract.js OCR Result', |
||||
tessjs_pdf_auto_download: true, |
||||
tessjs_pdf_bin: false, |
||||
tessjs_image_rectangle_left: 0, |
||||
tessjs_image_rectangle_top: 0, |
||||
tessjs_image_rectangle_width: -1, |
||||
tessjs_image_rectangle_height: -1, |
||||
}; |
@ -0,0 +1,7 @@
@@ -0,0 +1,7 @@
|
||||
const fs = require('fs'); |
||||
|
||||
module.exports = (path, data) => { |
||||
fs.writeFile(path, data, (err) => { |
||||
if (err) throw err; |
||||
}); |
||||
}; |
@ -0,0 +1,39 @@
@@ -0,0 +1,39 @@
|
||||
/** |
||||
* |
||||
* Tesseract Worker Script for Node |
||||
* |
||||
* @fileoverview Node worker implementation |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
|
||||
const worker = require('../'); |
||||
const exportFile = require('./exportFile'); |
||||
|
||||
let TesseractCore = null; |
||||
|
||||
/* |
||||
* register message handler |
||||
*/ |
||||
process.on('message', (packet) => { |
||||
worker.dispatchHandlers(packet, obj => process.send(obj)); |
||||
}); |
||||
|
||||
/* |
||||
* getCore is a sync function to load and return |
||||
* TesseractCore. |
||||
*/ |
||||
const getCore = (_, res) => { |
||||
if (TesseractCore === null) { |
||||
res.progress({ status: 'loading tesseract core', progress: 0 }); |
||||
TesseractCore = require('tesseract.js-core'); |
||||
res.progress({ status: 'loaded tesseract core', progress: 1 }); |
||||
} |
||||
return TesseractCore; |
||||
}; |
||||
|
||||
worker.setAdapter({ |
||||
getCore, |
||||
exportFile, |
||||
}); |
@ -0,0 +1,39 @@
@@ -0,0 +1,39 @@
|
||||
/** |
||||
* handleOutput |
||||
* |
||||
* @name handleOutput |
||||
* @function handle file output |
||||
* @access private |
||||
* @param {object} customParams - an object of params |
||||
*/ |
||||
module.exports = (TessModule, api, adapter, params) => { |
||||
let files = {}; |
||||
const { |
||||
tessjs_create_pdf, |
||||
tessjs_textonly_pdf, |
||||
tessjs_pdf_name, |
||||
tessjs_pdf_title, |
||||
tessjs_pdf_auto_download, |
||||
tessjs_pdf_bin, |
||||
} = params; |
||||
|
||||
if (tessjs_create_pdf === '1') { |
||||
const pdfRenderer = new TessModule.TessPDFRenderer(tessjs_pdf_name, '/', tessjs_textonly_pdf === '1'); |
||||
pdfRenderer.BeginDocument(tessjs_pdf_title); |
||||
pdfRenderer.AddImage(api); |
||||
pdfRenderer.EndDocument(); |
||||
TessModule._free(pdfRenderer); |
||||
|
||||
const data = TessModule.FS.readFile(`/${tessjs_pdf_name}.pdf`); |
||||
|
||||
if (tessjs_pdf_bin) { |
||||
files = { pdf: data, ...files }; |
||||
} |
||||
|
||||
if (tessjs_pdf_auto_download) { |
||||
adapter.exportFile(`${tessjs_pdf_name}.pdf`, data, 'application/pdf'); |
||||
} |
||||
} |
||||
|
||||
return files; |
||||
}; |
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
module.exports = langs => ( |
||||
typeof langs === 'string' |
||||
? langs |
||||
: langs.map(lang => (typeof lang === 'string' ? lang : lang.data)).join('+') |
||||
); |
@ -0,0 +1,41 @@
@@ -0,0 +1,41 @@
|
||||
const { readImage } = require('tesseract.js-utils'); |
||||
|
||||
/** |
||||
* setImage |
||||
* |
||||
* @name setImage |
||||
* @function set image in tesseract for recognition |
||||
* @access public |
||||
* @param {array} image - binary array in array format |
||||
* @returns {number} - an emscripten pointer of the image |
||||
*/ |
||||
module.exports = (TessModule, api, image, params) => { |
||||
const { |
||||
tessjs_image_rectangle_left: left, |
||||
tessjs_image_rectangle_top: top, |
||||
tessjs_image_rectangle_width: width, |
||||
tessjs_image_rectangle_height: height, |
||||
} = params; |
||||
const { |
||||
w, h, bytesPerPixel, data, pix, |
||||
} = readImage(TessModule, Array.from(image)); |
||||
|
||||
/* |
||||
* As some image format (ex. bmp) is not supported natiely by tesseract, |
||||
* sometimes it will not return pix directly, but data and bytesPerPixel |
||||
* for another SetImage usage. |
||||
* |
||||
*/ |
||||
if (data === null) { |
||||
api.SetImage(pix); |
||||
} else { |
||||
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel); |
||||
} |
||||
api.SetRectangle( |
||||
(left < 0) ? 0 : left, |
||||
(top < 0) ? 0 : top, |
||||
(width < 0) ? w : width, |
||||
(height < 0) ? h : height, |
||||
); |
||||
return data === null ? pix : data; |
||||
}; |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
const path = require('path'); |
||||
const defaultOptions = require('../../constants/defaultOptions'); |
||||
|
||||
/* |
||||
* Default options for node worker |
||||
*/ |
||||
module.exports = { |
||||
...defaultOptions, |
||||
workerPath: path.join(__dirname, '..', '..', 'worker-script', 'node', 'index.js'), |
||||
}; |
@ -0,0 +1,22 @@
@@ -0,0 +1,22 @@
|
||||
/** |
||||
* |
||||
* Tesseract Worker impl. for node (using child_process) |
||||
* |
||||
* @fileoverview Tesseract Worker impl. for node |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
const defaultOptions = require('./defaultOptions'); |
||||
const spawnWorker = require('./spawnWorker'); |
||||
const terminateWorker = require('./terminateWorker'); |
||||
const onMessage = require('./onMessage'); |
||||
const send = require('./send'); |
||||
|
||||
module.exports = { |
||||
defaultOptions, |
||||
spawnWorker, |
||||
terminateWorker, |
||||
onMessage, |
||||
send, |
||||
}; |
@ -0,0 +1,3 @@
@@ -0,0 +1,3 @@
|
||||
module.exports = (worker, handler) => { |
||||
worker.on('message', handler); |
||||
}; |
@ -0,0 +1,61 @@
@@ -0,0 +1,61 @@
|
||||
const util = require('util'); |
||||
const fs = require('fs'); |
||||
const axios = require('axios'); |
||||
const isURL = require('is-url'); |
||||
|
||||
const readFile = util.promisify(fs.readFile); |
||||
|
||||
/** |
||||
* loadImage |
||||
* |
||||
* @name loadImage |
||||
* @function load image from different source |
||||
* @access public |
||||
* @param {string} image - image source, supported formats: |
||||
* string: URL string or file path |
||||
* string: base64 image |
||||
* buffer: image buffer |
||||
* @returns {array} binary image in array format |
||||
*/ |
||||
const loadImage = (image) => { |
||||
if (isURL(image)) { |
||||
return axios.get(image, { |
||||
responseType: 'arraybuffer', |
||||
}) |
||||
.then(resp => resp.data); |
||||
} |
||||
|
||||
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { |
||||
return Promise.resolve(Buffer.from(image.split(',')[1], 'base64')); |
||||
} |
||||
|
||||
if (Buffer.isBuffer(image)) { |
||||
return Promise.resolve(image); |
||||
} |
||||
|
||||
return readFile(image); |
||||
}; |
||||
|
||||
|
||||
/** |
||||
* send |
||||
* |
||||
* @name send |
||||
* @function send packet to worker and create a job |
||||
* @access public |
||||
* @param {object} instance TesseractWorker instance |
||||
* @param {object} iPacket data for worker |
||||
*/ |
||||
module.exports = ({ worker }, packet) => { |
||||
const p = { ...packet }; |
||||
if (['recognize', 'detect'].includes(p.action)) { |
||||
loadImage(p.payload.image) |
||||
.then(buf => new Uint8Array(buf)) |
||||
.then((img) => { |
||||
p.payload.image = Array.from(img); |
||||
worker.send(p); |
||||
}); |
||||
} else { |
||||
worker.send(p); |
||||
} |
||||
}; |
@ -0,0 +1,15 @@
@@ -0,0 +1,15 @@
|
||||
const { fork } = require('child_process'); |
||||
|
||||
/** |
||||
* spawnWorker |
||||
* |
||||
* @name spawnWorker |
||||
* @function fork a new process in node |
||||
* @access public |
||||
* @param {object} instance - TesseractWorker instance |
||||
* @param {object} options |
||||
* @param {string} options.workerPath - worker script path |
||||
*/ |
||||
module.exports = ({ workerPath }) => ( |
||||
fork(workerPath) |
||||
); |
Loading…
Reference in new issue