From 67848464ac6d89d00c3404e0ac90f36cf98c7233 Mon Sep 17 00:00:00 2001 From: WintrySnowman <52296952+WintrySnowman@users.noreply.github.com> Date: Thu, 11 Aug 2022 05:02:14 +0100 Subject: [PATCH] Add support for ImageData and fix a hang in buffer handling (#610) --- src/worker-script/utils/setImage.js | 30 ++++++++++++++++++++--------- src/worker/browser/loadImage.js | 4 ++++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/worker-script/utils/setImage.js b/src/worker-script/utils/setImage.js index e9918db..c8fabcd 100644 --- a/src/worker-script/utils/setImage.js +++ b/src/worker-script/utils/setImage.js @@ -9,27 +9,37 @@ const fileType = require('file-type'); * @access public */ module.exports = (TessModule, api, image) => { - const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length })); - const type = fileType(buf); + const buf = (image instanceof Uint8Array) ? Buffer.from(image) : null; + const type = buf ? fileType(buf) : null; let bytesPerPixel = 0; let data = null; let pix = null; let w = 0; let h = 0; - /* - * Although leptonica should support reading bmp, there is a bug of "compressed BMP files". - * As there is no solution, we need to use bmp-js for now. - * @see https://groups.google.com/forum/#!topic/tesseract-ocr/4mPD9zTxdxE - */ - if (type && type.mime === 'image/bmp') { + + if (image instanceof ImageData) { + // The pixel format of ImageData is RGBA and technically + // Tesseract is expecting ABGR, but it does not seem to matter + // in practice, so to save effort the bytes are not rearranged. + data = TessModule._malloc(image.data.byteLength); + TessModule.HEAPU8.set(image.data, data); + w = image.width; + h = image.height; + bytesPerPixel = 4; + } else if (buf && type && type.mime === 'image/bmp') { + /* + * Although leptonica should support reading bmp, there is a bug of "compressed BMP files". + * As there is no solution, we need to use bmp-js for now. + * @see https://groups.google.com/forum/#!topic/tesseract-ocr/4mPD9zTxdxE + */ const bmpBuf = bmp.decode(buf); data = TessModule._malloc(bmpBuf.data.length * Uint8Array.BYTES_PER_ELEMENT); TessModule.HEAPU8.set(bmpBuf.data, data); w = bmpBuf.width; h = bmpBuf.height; bytesPerPixel = 4; - } else { + } else if (buf) { const ptr = TessModule._malloc(buf.length * Uint8Array.BYTES_PER_ELEMENT); TessModule.HEAPU8.set(buf, ptr); pix = TessModule._pixReadMem(ptr, buf.length); @@ -44,6 +54,8 @@ module.exports = (TessModule, api, image) => { .map((v, idx) => ( TessModule.getValue(pix + (idx * 4), 'i32') )); + } else { + throw Error('Unsupported image data container'); } /* diff --git a/src/worker/browser/loadImage.js b/src/worker/browser/loadImage.js index 3102967..34f047d 100644 --- a/src/worker/browser/loadImage.js +++ b/src/worker/browser/loadImage.js @@ -82,6 +82,10 @@ const loadImage = async (image) => { img = await fixOrientationFromUrlOrBlobOrFile(img); } data = await readFromBlobOrFile(img); + } else if (image instanceof ImageData) { + // Support a canvas' ImageData by passing it directly, + // where it is handled by setImage() without conversion. + return image; } return new Uint8Array(data);