diff --git a/examples/browser/image-processing.html b/examples/browser/image-processing.html
new file mode 100644
index 0000000..cafb6d2
--- /dev/null
+++ b/examples/browser/image-processing.html
@@ -0,0 +1,60 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
Input Image
+
+
+
+
Rotated, Original Color
+
+
+
+
Rotated, Grey
+
+
+
+
Rotated, Binary
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/constants/imageType.js b/src/constants/imageType.js
new file mode 100644
index 0000000..e21ccfe
--- /dev/null
+++ b/src/constants/imageType.js
@@ -0,0 +1,5 @@
+module.exports = {
+ ORIGINAL: 0,
+ GREY: 1,
+ BINARY: 2,
+};
diff --git a/src/createWorker.js b/src/createWorker.js
index d6a5875..3c3b626 100644
--- a/src/createWorker.js
+++ b/src/createWorker.js
@@ -123,6 +123,14 @@ module.exports = (_options = {}) => {
}))
);
+ const threshold = async (image, opts = {}, jobId) => (
+ startJob(createJob({
+ id: jobId,
+ action: 'threshold',
+ payload: { image: await loadImage(image), options: opts },
+ }))
+ );
+
const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => (
startJob(createJob({
id: jobId,
@@ -191,6 +199,7 @@ module.exports = (_options = {}) => {
initialize,
setParameters,
recognize,
+ threshold,
getPDF,
detect,
terminate,
diff --git a/src/index.d.ts b/src/index.d.ts
index 2a7f265..bcc5d33 100644
--- a/src/index.d.ts
+++ b/src/index.d.ts
@@ -22,7 +22,9 @@ declare namespace Tesseract {
loadLanguage(langs?: string, jobId?: string): Promise
initialize(langs?: string, oem?: OEM, jobId?: string): Promise
setParameters(params: Partial, jobId?: string): Promise
+ getImage(type: imageType): string
recognize(image: ImageLike, options?: Partial, jobId?: string): Promise
+ threshold(image: ImageLike, options?: Partial, jobId?: string): Promise
detect(image: ImageLike, jobId?: string): Promise
terminate(jobId?: string): Promise
}
@@ -53,6 +55,11 @@ declare namespace Tesseract {
}
interface RecognizeOptions {
rectangle: Rectangle
+ saveImageOriginal: boolean
+ saveImageGrey: boolean
+ saveImageBinary: boolean
+ rotateAuto: boolean
+ rotateRadians: float
}
interface ConfigResult {
jobId: string
@@ -100,6 +107,11 @@ declare namespace Tesseract {
SPARSE_TEXT = '11',
SPARSE_TEXT_OSD = '12',
}
+ const enum imageType {
+ ORIGINAL = 0,
+ GREY = 1,
+ BINARY = 2
+ }
type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement
| CanvasRenderingContext2D | File | Blob | ImageData | Buffer;
interface Block {
@@ -213,6 +225,10 @@ declare namespace Tesseract {
box: string | null;
unlv: string | null;
sd: string | null;
+ imageOriginal: string;
+ imageGrey: string;
+ imageBinary: string;
+ rotateRadians: number;
}
}
diff --git a/src/worker-script/index.js b/src/worker-script/index.js
index 1e2cfb6..3634f1c 100644
--- a/src/worker-script/index.js
+++ b/src/worker-script/index.js
@@ -15,6 +15,9 @@ const isWebWorker = require('../utils/getEnvironment')('type') === 'webworker';
const setImage = require('./utils/setImage');
const defaultParams = require('./constants/defaultParams');
const { log, setLogging } = require('../utils/log');
+const arrayBufferToBase64 = require('./utils/arrayBufferToBase64');
+const imageType = require('../constants/imageType');
+const PSM = require('../constants/PSM');
/*
* Tesseract Module returned by TesseractCore.
@@ -197,14 +200,140 @@ const initialize = ({
}
};
-const recognize = ({ payload: { image, options: { rectangle: rec } } }, res) => {
+const getImage = (type) => {
+ api.WriteImage(type, '/image.png');
+ const pngBuffer = TessModule.FS.readFile('/image.png');
+ const pngStr = `data:image/png;base64,${arrayBufferToBase64(pngBuffer.buffer)}`;
+ TessModule.FS.unlink('/image.png');
+ return pngStr;
+};
+
+const recognize = ({
+ payload: {
+ image, options: {
+ rectangle: rec, saveImageOriginal, saveImageGrey, saveImageBinary, rotateAuto, rotateRadians,
+ },
+ },
+}, res) => {
try {
- const ptr = setImage(TessModule, api, image);
+ // When the auto-rotate option is True, setImage is called with no angle,
+ // then the angle is calculated by Tesseract and then setImage is re-called.
+ // Otherwise, setImage is called once using the user-provided rotateRadiansFinal value.
+ let ptr;
+ let rotateRadiansFinal;
+ if (rotateAuto) {
+ // The angle is only detected if auto page segmentation is used
+ // Therefore, if this is not the mode specified by the user, it is enabled temporarily here
+ const psmInit = api.GetPageSegMode();
+ let psmEdit = false;
+ if (![PSM.AUTO, PSM.AUTO_ONLY, PSM.OSD].includes(psmInit)) {
+ psmEdit = true;
+ api.SetVariable('tessedit_pageseg_mode', String(PSM.AUTO));
+ }
+
+ ptr = setImage(TessModule, api, image);
+ api.FindLines();
+ const rotateRadiansCalc = api.GetAngle();
+
+ // Restore user-provided PSM setting
+ if (psmEdit) {
+ api.SetVariable('tessedit_pageseg_mode', String(psmInit));
+ }
+
+ // Small angles (<0.005 radians/~0.3 degrees) are ignored to save on runtime
+ if (Math.abs(rotateRadiansCalc) >= 0.005) {
+ rotateRadiansFinal = rotateRadiansCalc;
+ ptr = setImage(TessModule, api, image, rotateRadiansFinal);
+ } else {
+ // Image needs to be reset if run with different PSM setting earlier
+ if (psmEdit) {
+ ptr = setImage(TessModule, api, image);
+ }
+ rotateRadiansFinal = 0;
+ }
+ } else {
+ rotateRadiansFinal = rotateRadians || 0;
+ ptr = setImage(TessModule, api, image, rotateRadiansFinal);
+ }
+
if (typeof rec === 'object') {
api.SetRectangle(rec.left, rec.top, rec.width, rec.height);
}
api.Recognize(null);
- res.resolve(dump(TessModule, api, params));
+ const result = dump(TessModule, api, params);
+ if (saveImageOriginal) {
+ result.imageOriginal = getImage(imageType.ORIGINAL);
+ }
+ if (saveImageGrey) {
+ result.imageGrey = getImage(imageType.GREY);
+ }
+ if (saveImageBinary) {
+ result.imageBinary = getImage(imageType.BINARY);
+ }
+ result.rotateRadians = rotateRadiansFinal;
+ res.resolve(result);
+ TessModule._free(ptr);
+ } catch (err) {
+ res.reject(err.toString());
+ }
+};
+
+// `threshold` is similar to `recognize` except it skips the recognition step
+// Useful for getting rotated/binarized images without running recognition
+const threshold = ({
+ payload: {
+ image, options: {
+ rectangle: rec, saveImageOriginal, saveImageGrey, saveImageBinary, rotateAuto, rotateRadians,
+ },
+ },
+}, res) => {
+ try {
+ let ptr;
+ let rotateRadiansFinal;
+ if (rotateAuto) {
+ const psmInit = api.GetPageSegMode();
+ let psmEdit = false;
+ if (![PSM.AUTO, PSM.AUTO_ONLY, PSM.OSD].includes(psmInit)) {
+ psmEdit = true;
+ api.SetVariable('tessedit_pageseg_mode', String(PSM.AUTO));
+ }
+
+ ptr = setImage(TessModule, api, image);
+ api.FindLines();
+ const rotateRadiansCalc = api.GetAngle();
+
+ // Restore user-provided PSM setting
+ if (psmEdit) {
+ api.SetVariable('tessedit_pageseg_mode', String(psmInit));
+ }
+
+ // Small angles (<0.005 radians/~0.3 degrees) are ignored to save on runtime
+ if (Math.abs(rotateRadiansCalc) >= 0.005) {
+ rotateRadiansFinal = rotateRadiansCalc;
+ ptr = setImage(TessModule, api, image, rotateRadiansFinal);
+ } else {
+ rotateRadiansFinal = 0;
+ }
+ } else {
+ rotateRadiansFinal = rotateRadians || 0;
+ ptr = setImage(TessModule, api, image, rotateRadiansFinal);
+ }
+
+ if (typeof rec === 'object') {
+ api.SetRectangle(rec.left, rec.top, rec.width, rec.height);
+ }
+ const result = {};
+ if (saveImageOriginal) {
+ result.imageOriginal = getImage(imageType.ORIGINAL);
+ }
+ if (saveImageGrey) {
+ result.imageGrey = getImage(imageType.GREY);
+ }
+ if (saveImageBinary) {
+ result.imageBinary = getImage(imageType.BINARY);
+ }
+ result.rotateRadians = rotateRadiansFinal;
+ res.resolve(result);
TessModule._free(ptr);
} catch (err) {
res.reject(err.toString());
@@ -295,6 +424,7 @@ exports.dispatchHandlers = (packet, send) => {
initialize,
setParameters,
recognize,
+ threshold,
getPDF,
detect,
terminate,
diff --git a/src/worker-script/utils/arrayBufferToBase64.js b/src/worker-script/utils/arrayBufferToBase64.js
new file mode 100644
index 0000000..727f6f5
--- /dev/null
+++ b/src/worker-script/utils/arrayBufferToBase64.js
@@ -0,0 +1,56 @@
+// Copied from https://gist.github.com/jonleighton/958841
+// Copyright 2011 Jon Leighton, MIT LICENSE
+
+/* eslint no-bitwise: 0 */
+module.exports = (arrayBuffer) => {
+ let base64 = '';
+ const encodings = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
+
+ const bytes = new Uint8Array(arrayBuffer);
+ const { byteLength } = bytes;
+ const byteRemainder = byteLength % 3;
+ const mainLength = byteLength - byteRemainder;
+
+ let a; let b; let c; let
+ d;
+ let chunk;
+
+ // Main loop deals with bytes in chunks of 3
+ for (let i = 0; i < mainLength; i += 3) {
+ // Combine the three bytes into a single integer
+ chunk = (bytes[i] << 16) | (bytes[i + 1] << 8) | bytes[i + 2];
+
+ // Use bitmasks to extract 6-bit segments from the triplet
+ a = (chunk & 16515072) >> 18; // 16515072 = (2^6 - 1) << 18
+ b = (chunk & 258048) >> 12; // 258048 = (2^6 - 1) << 12
+ c = (chunk & 4032) >> 6; // 4032 = (2^6 - 1) << 6
+ d = chunk & 63; // 63 = 2^6 - 1
+
+ // Convert the raw binary segments to the appropriate ASCII encoding
+ base64 += encodings[a] + encodings[b] + encodings[c] + encodings[d];
+ }
+
+ // Deal with the remaining bytes and padding
+ if (byteRemainder === 1) {
+ chunk = bytes[mainLength];
+
+ a = (chunk & 252) >> 2; // 252 = (2^6 - 1) << 2
+
+ // Set the 4 least significant bits to zero
+ b = (chunk & 3) << 4; // 3 = 2^2 - 1
+
+ base64 += `${encodings[a] + encodings[b]}==`;
+ } else if (byteRemainder === 2) {
+ chunk = (bytes[mainLength] << 8) | bytes[mainLength + 1];
+
+ a = (chunk & 64512) >> 10; // 64512 = (2^6 - 1) << 10
+ b = (chunk & 1008) >> 4; // 1008 = (2^6 - 1) << 4
+
+ // Set the 2 least significant bits to zero
+ c = (chunk & 15) << 2; // 15 = 2^4 - 1
+
+ base64 += `${encodings[a] + encodings[b] + encodings[c]}=`;
+ }
+
+ return base64;
+};
diff --git a/src/worker-script/utils/setImage.js b/src/worker-script/utils/setImage.js
index 3e09045..7ebc194 100644
--- a/src/worker-script/utils/setImage.js
+++ b/src/worker-script/utils/setImage.js
@@ -8,7 +8,7 @@ const fileType = require('file-type');
* @function set image in tesseract for recognition
* @access public
*/
-module.exports = (TessModule, api, image) => {
+module.exports = (TessModule, api, image, angle = 0) => {
const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length }));
const type = fileType(buf);
let bytesPerPixel = 0;
@@ -55,9 +55,9 @@ module.exports = (TessModule, api, image) => {
*
*/
if (data === null) {
- api.SetImage(pix, undefined, undefined, undefined, undefined, exif);
+ api.SetImage(pix, undefined, undefined, undefined, undefined, exif, angle);
} else {
- api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif);
+ api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif, angle);
}
return data === null ? pix : data;
};