From 3678dbceae3f50929b7b3748bbe6b6bd9f767bf3 Mon Sep 17 00:00:00 2001 From: Balearica Date: Wed, 14 Sep 2022 18:07:56 -0700 Subject: [PATCH] Added image preprocessing functions (rotate + save images) --- examples/browser/image-processing.html | 60 ++++++++ src/constants/imageType.js | 5 + src/createWorker.js | 9 ++ src/index.d.ts | 16 +++ src/worker-script/index.js | 136 +++++++++++++++++- .../utils/arrayBufferToBase64.js | 56 ++++++++ src/worker-script/utils/setImage.js | 6 +- 7 files changed, 282 insertions(+), 6 deletions(-) create mode 100644 examples/browser/image-processing.html create mode 100644 src/constants/imageType.js create mode 100644 src/worker-script/utils/arrayBufferToBase64.js diff --git a/examples/browser/image-processing.html b/examples/browser/image-processing.html new file mode 100644 index 0000000..cafb6d2 --- /dev/null +++ b/examples/browser/image-processing.html @@ -0,0 +1,60 @@ + + + + + + + + + + +
+
+

Input Image

+ +
+
+

Rotated, Original Color

+ +
+
+

Rotated, Grey

+ +
+
+

Rotated, Binary

+ +
+
+ + + + + \ No newline at end of file diff --git a/src/constants/imageType.js b/src/constants/imageType.js new file mode 100644 index 0000000..e21ccfe --- /dev/null +++ b/src/constants/imageType.js @@ -0,0 +1,5 @@ +module.exports = { + ORIGINAL: 0, + GREY: 1, + BINARY: 2, +}; diff --git a/src/createWorker.js b/src/createWorker.js index d6a5875..3c3b626 100644 --- a/src/createWorker.js +++ b/src/createWorker.js @@ -123,6 +123,14 @@ module.exports = (_options = {}) => { })) ); + const threshold = async (image, opts = {}, jobId) => ( + startJob(createJob({ + id: jobId, + action: 'threshold', + payload: { image: await loadImage(image), options: opts }, + })) + ); + const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => ( startJob(createJob({ id: jobId, @@ -191,6 +199,7 @@ module.exports = (_options = {}) => { initialize, setParameters, recognize, + threshold, getPDF, detect, terminate, diff --git a/src/index.d.ts b/src/index.d.ts index 2a7f265..bcc5d33 100644 --- a/src/index.d.ts +++ b/src/index.d.ts @@ -22,7 +22,9 @@ declare namespace Tesseract { loadLanguage(langs?: string, jobId?: string): Promise initialize(langs?: string, oem?: OEM, jobId?: string): Promise setParameters(params: Partial, jobId?: string): Promise + getImage(type: imageType): string recognize(image: ImageLike, options?: Partial, jobId?: string): Promise + threshold(image: ImageLike, options?: Partial, jobId?: string): Promise detect(image: ImageLike, jobId?: string): Promise terminate(jobId?: string): Promise } @@ -53,6 +55,11 @@ declare namespace Tesseract { } interface RecognizeOptions { rectangle: Rectangle + saveImageOriginal: boolean + saveImageGrey: boolean + saveImageBinary: boolean + rotateAuto: boolean + rotateRadians: float } interface ConfigResult { jobId: string @@ -100,6 +107,11 @@ declare namespace Tesseract { SPARSE_TEXT = '11', SPARSE_TEXT_OSD = '12', } + const enum imageType { + ORIGINAL = 0, + GREY = 1, + BINARY = 2 + } type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement | CanvasRenderingContext2D | File | Blob | ImageData | Buffer; interface Block { @@ -213,6 +225,10 @@ declare namespace Tesseract { box: string | null; unlv: string | null; sd: string | null; + imageOriginal: string; + imageGrey: string; + imageBinary: string; + rotateRadians: number; } } diff --git a/src/worker-script/index.js b/src/worker-script/index.js index 1e2cfb6..3634f1c 100644 --- a/src/worker-script/index.js +++ b/src/worker-script/index.js @@ -15,6 +15,9 @@ const isWebWorker = require('../utils/getEnvironment')('type') === 'webworker'; const setImage = require('./utils/setImage'); const defaultParams = require('./constants/defaultParams'); const { log, setLogging } = require('../utils/log'); +const arrayBufferToBase64 = require('./utils/arrayBufferToBase64'); +const imageType = require('../constants/imageType'); +const PSM = require('../constants/PSM'); /* * Tesseract Module returned by TesseractCore. @@ -197,14 +200,140 @@ const initialize = ({ } }; -const recognize = ({ payload: { image, options: { rectangle: rec } } }, res) => { +const getImage = (type) => { + api.WriteImage(type, '/image.png'); + const pngBuffer = TessModule.FS.readFile('/image.png'); + const pngStr = `data:image/png;base64,${arrayBufferToBase64(pngBuffer.buffer)}`; + TessModule.FS.unlink('/image.png'); + return pngStr; +}; + +const recognize = ({ + payload: { + image, options: { + rectangle: rec, saveImageOriginal, saveImageGrey, saveImageBinary, rotateAuto, rotateRadians, + }, + }, +}, res) => { try { - const ptr = setImage(TessModule, api, image); + // When the auto-rotate option is True, setImage is called with no angle, + // then the angle is calculated by Tesseract and then setImage is re-called. + // Otherwise, setImage is called once using the user-provided rotateRadiansFinal value. + let ptr; + let rotateRadiansFinal; + if (rotateAuto) { + // The angle is only detected if auto page segmentation is used + // Therefore, if this is not the mode specified by the user, it is enabled temporarily here + const psmInit = api.GetPageSegMode(); + let psmEdit = false; + if (![PSM.AUTO, PSM.AUTO_ONLY, PSM.OSD].includes(psmInit)) { + psmEdit = true; + api.SetVariable('tessedit_pageseg_mode', String(PSM.AUTO)); + } + + ptr = setImage(TessModule, api, image); + api.FindLines(); + const rotateRadiansCalc = api.GetAngle(); + + // Restore user-provided PSM setting + if (psmEdit) { + api.SetVariable('tessedit_pageseg_mode', String(psmInit)); + } + + // Small angles (<0.005 radians/~0.3 degrees) are ignored to save on runtime + if (Math.abs(rotateRadiansCalc) >= 0.005) { + rotateRadiansFinal = rotateRadiansCalc; + ptr = setImage(TessModule, api, image, rotateRadiansFinal); + } else { + // Image needs to be reset if run with different PSM setting earlier + if (psmEdit) { + ptr = setImage(TessModule, api, image); + } + rotateRadiansFinal = 0; + } + } else { + rotateRadiansFinal = rotateRadians || 0; + ptr = setImage(TessModule, api, image, rotateRadiansFinal); + } + if (typeof rec === 'object') { api.SetRectangle(rec.left, rec.top, rec.width, rec.height); } api.Recognize(null); - res.resolve(dump(TessModule, api, params)); + const result = dump(TessModule, api, params); + if (saveImageOriginal) { + result.imageOriginal = getImage(imageType.ORIGINAL); + } + if (saveImageGrey) { + result.imageGrey = getImage(imageType.GREY); + } + if (saveImageBinary) { + result.imageBinary = getImage(imageType.BINARY); + } + result.rotateRadians = rotateRadiansFinal; + res.resolve(result); + TessModule._free(ptr); + } catch (err) { + res.reject(err.toString()); + } +}; + +// `threshold` is similar to `recognize` except it skips the recognition step +// Useful for getting rotated/binarized images without running recognition +const threshold = ({ + payload: { + image, options: { + rectangle: rec, saveImageOriginal, saveImageGrey, saveImageBinary, rotateAuto, rotateRadians, + }, + }, +}, res) => { + try { + let ptr; + let rotateRadiansFinal; + if (rotateAuto) { + const psmInit = api.GetPageSegMode(); + let psmEdit = false; + if (![PSM.AUTO, PSM.AUTO_ONLY, PSM.OSD].includes(psmInit)) { + psmEdit = true; + api.SetVariable('tessedit_pageseg_mode', String(PSM.AUTO)); + } + + ptr = setImage(TessModule, api, image); + api.FindLines(); + const rotateRadiansCalc = api.GetAngle(); + + // Restore user-provided PSM setting + if (psmEdit) { + api.SetVariable('tessedit_pageseg_mode', String(psmInit)); + } + + // Small angles (<0.005 radians/~0.3 degrees) are ignored to save on runtime + if (Math.abs(rotateRadiansCalc) >= 0.005) { + rotateRadiansFinal = rotateRadiansCalc; + ptr = setImage(TessModule, api, image, rotateRadiansFinal); + } else { + rotateRadiansFinal = 0; + } + } else { + rotateRadiansFinal = rotateRadians || 0; + ptr = setImage(TessModule, api, image, rotateRadiansFinal); + } + + if (typeof rec === 'object') { + api.SetRectangle(rec.left, rec.top, rec.width, rec.height); + } + const result = {}; + if (saveImageOriginal) { + result.imageOriginal = getImage(imageType.ORIGINAL); + } + if (saveImageGrey) { + result.imageGrey = getImage(imageType.GREY); + } + if (saveImageBinary) { + result.imageBinary = getImage(imageType.BINARY); + } + result.rotateRadians = rotateRadiansFinal; + res.resolve(result); TessModule._free(ptr); } catch (err) { res.reject(err.toString()); @@ -295,6 +424,7 @@ exports.dispatchHandlers = (packet, send) => { initialize, setParameters, recognize, + threshold, getPDF, detect, terminate, diff --git a/src/worker-script/utils/arrayBufferToBase64.js b/src/worker-script/utils/arrayBufferToBase64.js new file mode 100644 index 0000000..727f6f5 --- /dev/null +++ b/src/worker-script/utils/arrayBufferToBase64.js @@ -0,0 +1,56 @@ +// Copied from https://gist.github.com/jonleighton/958841 +// Copyright 2011 Jon Leighton, MIT LICENSE + +/* eslint no-bitwise: 0 */ +module.exports = (arrayBuffer) => { + let base64 = ''; + const encodings = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; + + const bytes = new Uint8Array(arrayBuffer); + const { byteLength } = bytes; + const byteRemainder = byteLength % 3; + const mainLength = byteLength - byteRemainder; + + let a; let b; let c; let + d; + let chunk; + + // Main loop deals with bytes in chunks of 3 + for (let i = 0; i < mainLength; i += 3) { + // Combine the three bytes into a single integer + chunk = (bytes[i] << 16) | (bytes[i + 1] << 8) | bytes[i + 2]; + + // Use bitmasks to extract 6-bit segments from the triplet + a = (chunk & 16515072) >> 18; // 16515072 = (2^6 - 1) << 18 + b = (chunk & 258048) >> 12; // 258048 = (2^6 - 1) << 12 + c = (chunk & 4032) >> 6; // 4032 = (2^6 - 1) << 6 + d = chunk & 63; // 63 = 2^6 - 1 + + // Convert the raw binary segments to the appropriate ASCII encoding + base64 += encodings[a] + encodings[b] + encodings[c] + encodings[d]; + } + + // Deal with the remaining bytes and padding + if (byteRemainder === 1) { + chunk = bytes[mainLength]; + + a = (chunk & 252) >> 2; // 252 = (2^6 - 1) << 2 + + // Set the 4 least significant bits to zero + b = (chunk & 3) << 4; // 3 = 2^2 - 1 + + base64 += `${encodings[a] + encodings[b]}==`; + } else if (byteRemainder === 2) { + chunk = (bytes[mainLength] << 8) | bytes[mainLength + 1]; + + a = (chunk & 64512) >> 10; // 64512 = (2^6 - 1) << 10 + b = (chunk & 1008) >> 4; // 1008 = (2^6 - 1) << 4 + + // Set the 2 least significant bits to zero + c = (chunk & 15) << 2; // 15 = 2^4 - 1 + + base64 += `${encodings[a] + encodings[b] + encodings[c]}=`; + } + + return base64; +}; diff --git a/src/worker-script/utils/setImage.js b/src/worker-script/utils/setImage.js index 3e09045..7ebc194 100644 --- a/src/worker-script/utils/setImage.js +++ b/src/worker-script/utils/setImage.js @@ -8,7 +8,7 @@ const fileType = require('file-type'); * @function set image in tesseract for recognition * @access public */ -module.exports = (TessModule, api, image) => { +module.exports = (TessModule, api, image, angle = 0) => { const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length })); const type = fileType(buf); let bytesPerPixel = 0; @@ -55,9 +55,9 @@ module.exports = (TessModule, api, image) => { * */ if (data === null) { - api.SetImage(pix, undefined, undefined, undefined, undefined, exif); + api.SetImage(pix, undefined, undefined, undefined, undefined, exif, angle); } else { - api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif); + api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif, angle); } return data === null ? pix : data; };