From 9e121635142e5fe7f8bc0a23e74578c85b357949 Mon Sep 17 00:00:00 2001 From: Balearica Date: Sun, 9 Oct 2022 20:27:28 -0700 Subject: [PATCH] Updated setImage to resolve memory leak per #678 --- src/worker-script/index.js | 15 +++----- src/worker-script/utils/setImage.js | 59 ++++++++--------------------- 2 files changed, 20 insertions(+), 54 deletions(-) diff --git a/src/worker-script/index.js b/src/worker-script/index.js index 2b8222b..0d7d396 100644 --- a/src/worker-script/index.js +++ b/src/worker-script/index.js @@ -294,7 +294,6 @@ const recognize = async ({ // When the auto-rotate option is True, setImage is called with no angle, // then the angle is calculated by Tesseract and then setImage is re-called. // Otherwise, setImage is called once using the user-provided rotateRadiansFinal value. - let ptr; let rotateRadiansFinal; if (options.rotateAuto) { // The angle is only detected if auto page segmentation is used @@ -306,7 +305,7 @@ const recognize = async ({ api.SetVariable('tessedit_pageseg_mode', String(PSM.AUTO)); } - ptr = setImage(TessModule, api, image); + setImage(TessModule, api, image); api.FindLines(); const rotateRadiansCalc = api.GetAngle(); @@ -318,17 +317,17 @@ const recognize = async ({ // Small angles (<0.005 radians/~0.3 degrees) are ignored to save on runtime if (Math.abs(rotateRadiansCalc) >= 0.005) { rotateRadiansFinal = rotateRadiansCalc; - ptr = setImage(TessModule, api, image, rotateRadiansFinal); + setImage(TessModule, api, image, rotateRadiansFinal); } else { // Image needs to be reset if run with different PSM setting earlier if (psmEdit) { - ptr = setImage(TessModule, api, image); + setImage(TessModule, api, image); } rotateRadiansFinal = 0; } } else { rotateRadiansFinal = options.rotateRadians || 0; - ptr = setImage(TessModule, api, image, rotateRadiansFinal); + setImage(TessModule, api, image, rotateRadiansFinal); } const rec = options.rectangle; @@ -351,7 +350,6 @@ const recognize = async ({ } res.resolve(result); - TessModule._free(ptr); } catch (err) { res.reject(err.toString()); } @@ -360,11 +358,10 @@ const recognize = async ({ const detect = async ({ payload: { image } }, res) => { try { - const ptr = setImage(TessModule, api, image); + setImage(TessModule, api, image); const results = new TessModule.OSResults(); if (!api.DetectOS(results)) { - TessModule._free(ptr); res.resolve({ tesseract_script_id: null, @@ -378,8 +375,6 @@ const detect = async ({ payload: { image } }, res) => { const oid = best.orientation_id; const sid = best.script_id; - TessModule._free(ptr); - res.resolve({ tesseract_script_id: sid, script: results.unicharset.get_script_from_script_id(sid), diff --git a/src/worker-script/utils/setImage.js b/src/worker-script/utils/setImage.js index 7ebc194..1f971fc 100644 --- a/src/worker-script/utils/setImage.js +++ b/src/worker-script/utils/setImage.js @@ -9,55 +9,26 @@ const fileType = require('file-type'); * @access public */ module.exports = (TessModule, api, image, angle = 0) => { - const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length })); - const type = fileType(buf); - let bytesPerPixel = 0; - let data = null; - let pix = null; - let w = 0; - let h = 0; - const exif = buf.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1; + const type = fileType(image); - /* - * Leptonica supports uncompressed but not compressed bmp files - * @see https://github.com/DanBloomberg/leptonica/issues/607#issuecomment-1068802516 - * We therefore use bmp-js to process all bmp files - */ + const exif = image.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1; + + // /* + // * Leptonica supports some but not all bmp files + // * @see https://github.com/DanBloomberg/leptonica/issues/607#issuecomment-1068802516 + // * We therefore use bmp-js to convert all bmp files into a format Leptonica is known to support + // */ if (type && type.mime === 'image/bmp') { + // Not sure what this line actually does, but removing breaks the function + const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length })); const bmpBuf = bmp.decode(buf); - data = TessModule._malloc(bmpBuf.data.length * Uint8Array.BYTES_PER_ELEMENT); - TessModule.HEAPU8.set(bmpBuf.data, data); - w = bmpBuf.width; - h = bmpBuf.height; - bytesPerPixel = 4; + TessModule.FS.writeFile("/input", bmp.encode(bmpBuf).data); } else { - const ptr = TessModule._malloc(buf.length * Uint8Array.BYTES_PER_ELEMENT); - TessModule.HEAPU8.set(buf, ptr); - pix = TessModule._pixReadMem(ptr, buf.length); - if (TessModule.getValue(pix + (7 * 4), 'i32') === 0) { - /* - * Set a yres default value to prevent warning from tesseract - * See kMinCredibleResolution in tesseract/src/ccstruct/publictypes.h - */ - TessModule.setValue(pix + (7 * 4), 300, 'i32'); - } - [w, h] = Array(2).fill(0) - .map((v, idx) => ( - TessModule.getValue(pix + (idx * 4), 'i32') - )); + TessModule.FS.writeFile("/input", image); } - /* - * As some image format (ex. bmp) is not supported natiely by tesseract, - * sometimes it will not return pix directly, but data and bytesPerPixel - * for another SetImage usage. - * - */ - if (data === null) { - api.SetImage(pix, undefined, undefined, undefined, undefined, exif, angle); - } else { - api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif, angle); - } - return data === null ? pix : data; + api.SetImageFile(exif, angle); + return; + };