Browse Source

Updated setImage to resolve memory leak per #678

dev/v4
Balearica 2 years ago
parent
commit
9e12163514
  1. 15
      src/worker-script/index.js
  2. 59
      src/worker-script/utils/setImage.js

15
src/worker-script/index.js

@ -294,7 +294,6 @@ const recognize = async ({ @@ -294,7 +294,6 @@ const recognize = async ({
// When the auto-rotate option is True, setImage is called with no angle,
// then the angle is calculated by Tesseract and then setImage is re-called.
// Otherwise, setImage is called once using the user-provided rotateRadiansFinal value.
let ptr;
let rotateRadiansFinal;
if (options.rotateAuto) {
// The angle is only detected if auto page segmentation is used
@ -306,7 +305,7 @@ const recognize = async ({ @@ -306,7 +305,7 @@ const recognize = async ({
api.SetVariable('tessedit_pageseg_mode', String(PSM.AUTO));
}
ptr = setImage(TessModule, api, image);
setImage(TessModule, api, image);
api.FindLines();
const rotateRadiansCalc = api.GetAngle();
@ -318,17 +317,17 @@ const recognize = async ({ @@ -318,17 +317,17 @@ const recognize = async ({
// Small angles (<0.005 radians/~0.3 degrees) are ignored to save on runtime
if (Math.abs(rotateRadiansCalc) >= 0.005) {
rotateRadiansFinal = rotateRadiansCalc;
ptr = setImage(TessModule, api, image, rotateRadiansFinal);
setImage(TessModule, api, image, rotateRadiansFinal);
} else {
// Image needs to be reset if run with different PSM setting earlier
if (psmEdit) {
ptr = setImage(TessModule, api, image);
setImage(TessModule, api, image);
}
rotateRadiansFinal = 0;
}
} else {
rotateRadiansFinal = options.rotateRadians || 0;
ptr = setImage(TessModule, api, image, rotateRadiansFinal);
setImage(TessModule, api, image, rotateRadiansFinal);
}
const rec = options.rectangle;
@ -351,7 +350,6 @@ const recognize = async ({ @@ -351,7 +350,6 @@ const recognize = async ({
}
res.resolve(result);
TessModule._free(ptr);
} catch (err) {
res.reject(err.toString());
}
@ -360,11 +358,10 @@ const recognize = async ({ @@ -360,11 +358,10 @@ const recognize = async ({
const detect = async ({ payload: { image } }, res) => {
try {
const ptr = setImage(TessModule, api, image);
setImage(TessModule, api, image);
const results = new TessModule.OSResults();
if (!api.DetectOS(results)) {
TessModule._free(ptr);
res.resolve({
tesseract_script_id: null,
@ -378,8 +375,6 @@ const detect = async ({ payload: { image } }, res) => { @@ -378,8 +375,6 @@ const detect = async ({ payload: { image } }, res) => {
const oid = best.orientation_id;
const sid = best.script_id;
TessModule._free(ptr);
res.resolve({
tesseract_script_id: sid,
script: results.unicharset.get_script_from_script_id(sid),

59
src/worker-script/utils/setImage.js

@ -9,55 +9,26 @@ const fileType = require('file-type'); @@ -9,55 +9,26 @@ const fileType = require('file-type');
* @access public
*/
module.exports = (TessModule, api, image, angle = 0) => {
const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length }));
const type = fileType(buf);
let bytesPerPixel = 0;
let data = null;
let pix = null;
let w = 0;
let h = 0;
const exif = buf.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1;
const type = fileType(image);
/*
* Leptonica supports uncompressed but not compressed bmp files
* @see https://github.com/DanBloomberg/leptonica/issues/607#issuecomment-1068802516
* We therefore use bmp-js to process all bmp files
*/
const exif = image.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1;
// /*
// * Leptonica supports some but not all bmp files
// * @see https://github.com/DanBloomberg/leptonica/issues/607#issuecomment-1068802516
// * We therefore use bmp-js to convert all bmp files into a format Leptonica is known to support
// */
if (type && type.mime === 'image/bmp') {
// Not sure what this line actually does, but removing breaks the function
const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length }));
const bmpBuf = bmp.decode(buf);
data = TessModule._malloc(bmpBuf.data.length * Uint8Array.BYTES_PER_ELEMENT);
TessModule.HEAPU8.set(bmpBuf.data, data);
w = bmpBuf.width;
h = bmpBuf.height;
bytesPerPixel = 4;
TessModule.FS.writeFile("/input", bmp.encode(bmpBuf).data);
} else {
const ptr = TessModule._malloc(buf.length * Uint8Array.BYTES_PER_ELEMENT);
TessModule.HEAPU8.set(buf, ptr);
pix = TessModule._pixReadMem(ptr, buf.length);
if (TessModule.getValue(pix + (7 * 4), 'i32') === 0) {
/*
* Set a yres default value to prevent warning from tesseract
* See kMinCredibleResolution in tesseract/src/ccstruct/publictypes.h
*/
TessModule.setValue(pix + (7 * 4), 300, 'i32');
}
[w, h] = Array(2).fill(0)
.map((v, idx) => (
TessModule.getValue(pix + (idx * 4), 'i32')
));
TessModule.FS.writeFile("/input", image);
}
/*
* As some image format (ex. bmp) is not supported natiely by tesseract,
* sometimes it will not return pix directly, but data and bytesPerPixel
* for another SetImage usage.
*
*/
if (data === null) {
api.SetImage(pix, undefined, undefined, undefined, undefined, exif, angle);
} else {
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif, angle);
}
return data === null ? pix : data;
api.SetImageFile(exif, angle);
return;
};

Loading…
Cancel
Save