diff --git a/src/.createScheduler.js.swp b/src/.createScheduler.js.swp new file mode 100644 index 0000000..acfa2ec Binary files /dev/null and b/src/.createScheduler.js.swp differ diff --git a/src/index.d.ts b/src/index.d.ts index 59e8a75..05a59bb 100644 --- a/src/index.d.ts +++ b/src/index.d.ts @@ -3,10 +3,10 @@ declare namespace Tesseract { function createWorker(options?: Partial): Worker function setLogging(logging: boolean): void function recognize(image: ImageLike, langs?: string, options?: Partial): Promise - function detect(image: ImageLike, langs?: string, options?: Partial) + function detect(image: ImageLike, options?: Partial) interface Scheduler { - addWorker(worker: Worker): void + addWorker(worker: Worker): string addJob(action: string, ...args: any[]): Promise terminate(): Promise getQueueLen(): number @@ -16,7 +16,8 @@ declare namespace Tesseract { interface Worker { load(jobId?: string): Promise loadLanguage(langs?: string, jobId?: string): Promise - initialize(langs?: string, params?: Partial, jobId?: string): Promise + initialize(langs?: string, oem?: OEM, jobId?: string): Promise + setParameters(params: Partial, jobId?: string): Promise recognize(image: ImageLike, options?: Partial, jobId?: string): Promise detect(image: ImageLike, jobId?: string): Promise terminate(jobId?: string): Promise @@ -37,7 +38,6 @@ declare namespace Tesseract { tessedit_ocr_engine_mode: OEM tessedit_pageseg_mode: PSM tessedit_char_whiltelist: string - tessjs_create_pdf: string tessjs_create_hocr: string tessjs_create_tsv: string tessjs_create_box: string @@ -88,10 +88,10 @@ declare namespace Tesseract { SINGLE_BLOCK = '6', SINGLE_LINE = '7', SINGLE_WORD = '8', - SINGLE_CHAR = '9', - SPARSE_TEXT = '10', - SPARSE_TEXT_OSD = '11', - RAW_LINE = '12' + CIRCLE_WORD = '9', + SINGLE_CHAR = '10', + SPARSE_TEXT = '11', + SPARSE_TEXT_OSD = '12', } type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement | CanvasRenderingContext2D | File | Blob | ImageData | Buffer; diff --git a/src/utils/circularize.js b/src/utils/circularize.js index 6e682a1..55486da 100644 --- a/src/utils/circularize.js +++ b/src/utils/circularize.js @@ -9,9 +9,6 @@ * a (circular) DOM-like interface for walking * through the data. * - * A (circular) DOM-like interface here means that - * each child element - * * @fileoverview DOM-like interface for walking through data * @author Kevin Kwok * @author Guillermo Webster diff --git a/src/worker-script/constants/defaultParams.js b/src/worker-script/constants/defaultParams.js index 3238f03..2e45308 100644 --- a/src/worker-script/constants/defaultParams.js +++ b/src/worker-script/constants/defaultParams.js @@ -7,15 +7,9 @@ module.exports = { tessedit_pageseg_mode: PSM.SINGLE_BLOCK, tessedit_char_whiltelist: '', user_defined_dpi: '300', - tessjs_create_pdf: '0', tessjs_create_hocr: '1', tessjs_create_tsv: '1', tessjs_create_box: '0', tessjs_create_unlv: '0', tessjs_create_osd: '0', - tessjs_textonly_pdf: '0', - tessjs_pdf_name: 'tesseract.js-ocr-result', - tessjs_pdf_title: 'Tesseract.js OCR Result', - tessjs_pdf_auto_download: true, - tessjs_pdf_bin: false, }; diff --git a/src/worker-script/utils/dump.js b/src/worker-script/utils/dump.js index abaf541..bafd9f5 100644 --- a/src/worker-script/utils/dump.js +++ b/src/worker-script/utils/dump.js @@ -17,8 +17,6 @@ * @name deindent * @function deindent string * @access public - * @param {string} html HOCR in html format - * @returns {string} deindent html string */ const deindent = (html) => { const lines = html.split('\n'); @@ -38,9 +36,6 @@ const deindent = (html) => { * @name dump * @function dump recognition result to a JSON object * @access public - * @param {object} TessModule TessModule from TesseractCore - * @param {object} api TesseractBaseAPI instance - * @returns {object} dumpped JSON object */ module.exports = (TessModule, api, { tessjs_create_hocr, diff --git a/src/worker-script/utils/setImage.js b/src/worker-script/utils/setImage.js index 6ae356d..e9918db 100644 --- a/src/worker-script/utils/setImage.js +++ b/src/worker-script/utils/setImage.js @@ -7,8 +7,6 @@ const fileType = require('file-type'); * @name setImage * @function set image in tesseract for recognition * @access public - * @param {array} image - binary array in array format - * @returns {number} - an emscripten pointer of the image */ module.exports = (TessModule, api, image) => { const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length })); diff --git a/src/worker/browser/defaultOptions.js~ b/src/worker/browser/defaultOptions.js~ deleted file mode 100644 index cef5e58..0000000 --- a/src/worker/browser/defaultOptions.js~ +++ /dev/null @@ -1,18 +0,0 @@ -const resolveURL = require('resolve-url'); -const { version, dependencies } = require('../../../package.json'); -const defaultOptions = require('../../constants/defaultOptions'); - -/* - * Default options for browser worker - */ -module.exports = { - ...defaultOptions, - workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development') - ? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`) - : `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`, - /* - * If browser doesn't support WebAssembly, - * load ASM version instead - */ - corePath: `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`, -}; diff --git a/src/worker/browser/loadImage.js b/src/worker/browser/loadImage.js index b849153..864eede 100644 --- a/src/worker/browser/loadImage.js +++ b/src/worker/browser/loadImage.js @@ -7,8 +7,6 @@ const resolveURL = require('resolve-url'); * @name readFromBlobOrFile * @function * @access private - * @param {object} blob A blob or file objec to read - * @param {function} res callback function after reading completes */ const readFromBlobOrFile = blob => ( new Promise((resolve, reject) => { @@ -29,14 +27,6 @@ const readFromBlobOrFile = blob => ( * @name loadImage * @function load image from different source * @access private - * @param {string, object} image - image source, supported formats: - * string: URL string, can be relative path - * string: base64 image - * img HTMLElement: extract image source from src attribute - * video HTMLElement: extract image source from poster attribute - * canvas HTMLElement: extract image data by converting to Blob - * File instance: data from - * @returns {array} binary image in array format */ const loadImage = async (image) => { let data = image; diff --git a/src/worker/browser/send.js b/src/worker/browser/send.js index de7ed94..88f8aaf 100644 --- a/src/worker/browser/send.js +++ b/src/worker/browser/send.js @@ -4,8 +4,6 @@ * @name send * @function send packet to worker and create a job * @access public - * @param {object} instance TesseractWorker instance - * @param {object} iPacket data for worker */ module.exports = async (worker, packet) => { worker.postMessage(packet); diff --git a/src/worker/browser/spawnWorker.js b/src/worker/browser/spawnWorker.js index 6622fa0..1a0239c 100644 --- a/src/worker/browser/spawnWorker.js +++ b/src/worker/browser/spawnWorker.js @@ -4,9 +4,6 @@ * @name spawnWorker * @function create a new Worker in browser * @access public - * @param {object} options - * @param {string} options.workerPath - worker script path - * @param {boolean} options.workerBlobURL - Use a blob:// URL for the worker script */ module.exports = ({ workerPath, workerBlobURL }) => { let worker; diff --git a/src/worker/browser/terminateWorker.js b/src/worker/browser/terminateWorker.js index 93a38c7..753a3fd 100644 --- a/src/worker/browser/terminateWorker.js +++ b/src/worker/browser/terminateWorker.js @@ -4,7 +4,6 @@ * @name terminateWorker * @function terminate worker * @access public - * @param {object} instance TesseractWorker instance */ module.exports = (worker) => { worker.terminate(); diff --git a/src/worker/node/loadImage.js b/src/worker/node/loadImage.js index d4bc77f..91ab15a 100644 --- a/src/worker/node/loadImage.js +++ b/src/worker/node/loadImage.js @@ -11,11 +11,6 @@ const readFile = util.promisify(fs.readFile); * @name loadImage * @function load image from different source * @access public - * @param {string} image - image source, supported formats: - * string: URL string or file path - * string: base64 image - * buffer: image buffer - * @returns {array} binary image in array format */ module.exports = async (image) => { let data = image; diff --git a/src/worker/node/send.js b/src/worker/node/send.js index da701d2..783c6e1 100644 --- a/src/worker/node/send.js +++ b/src/worker/node/send.js @@ -4,8 +4,6 @@ * @name send * @function send packet to worker and create a job * @access public - * @param {object} instance TesseractWorker instance - * @param {object} iPacket data for worker */ module.exports = (worker, packet) => { worker.send(packet); diff --git a/src/worker/node/spawnWorker.js b/src/worker/node/spawnWorker.js index 1dabcd7..e3c5d89 100644 --- a/src/worker/node/spawnWorker.js +++ b/src/worker/node/spawnWorker.js @@ -6,9 +6,6 @@ const { fork } = require('child_process'); * @name spawnWorker * @function fork a new process in node * @access public - * @param {object} instance - TesseractWorker instance - * @param {object} options - * @param {string} options.workerPath - worker script path */ module.exports = ({ workerPath }) => ( fork(workerPath) diff --git a/src/worker/node/terminateWorker.js b/src/worker/node/terminateWorker.js index 2ee0a78..0e8b67e 100644 --- a/src/worker/node/terminateWorker.js +++ b/src/worker/node/terminateWorker.js @@ -4,7 +4,6 @@ * @name terminateWorker * @function kill worker * @access public - * @param {object} instance TesseractWorker instance */ module.exports = (worker) => { worker.kill();