Browse Source

Added image preprocessing functions (rotate + save images)

dev/v4
Balearica 2 years ago
parent
commit
3678dbceae
  1. 60
      examples/browser/image-processing.html
  2. 5
      src/constants/imageType.js
  3. 9
      src/createWorker.js
  4. 16
      src/index.d.ts
  5. 136
      src/worker-script/index.js
  6. 56
      src/worker-script/utils/arrayBufferToBase64.js
  7. 6
      src/worker-script/utils/setImage.js

60
examples/browser/image-processing.html

@ -0,0 +1,60 @@ @@ -0,0 +1,60 @@
<html>
<head>
<script src="/dist/tesseract.dev.js"></script>
<style>
.column {
float: left;
width: 20%;
padding: 5px;
}
</style>
</head>
<body>
<input type="file" id="uploader">
<div class="row">
<div class="column">
<p>Input Image</p>
<img id="imgInput" style="max-width:500px;">
</div>
<div class="column">
<p>Rotated, Original Color</p>
<img id="imgOriginal" style="max-width:500px;">
</div>
<div class="column">
<p>Rotated, Grey</p>
<img id="imgGrey" style="max-width:500px;">
</div>
<div class="column">
<p>Rotated, Binary</p>
<img id="imgBinary" style="max-width:500px;">
</div>
</div>
<script>
const recognize = async ({ target: { files } }) => {
document.getElementById("imgInput").src = URL.createObjectURL(files[0]);
const worker = Tesseract.createWorker({
corePath: '/tesseract-core-simd.wasm.js',
workerPath: "/dist/worker.dev.js"
});
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
await worker.initialize();
const ret = await worker.recognize(files[0], { saveImageOriginal: true, saveImageGrey: true, saveImageBinary: true, rotateAuto: true })
document.getElementById("imgOriginal").src = ret.data.imageOriginal;
document.getElementById("imgGrey").src = ret.data.imageGrey;
document.getElementById("imgBinary").src = ret.data.imageBinary;
}
const elm = document.getElementById('uploader');
elm.addEventListener('change', recognize);
</script>
</body>
</html>

5
src/constants/imageType.js

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
module.exports = {
ORIGINAL: 0,
GREY: 1,
BINARY: 2,
};

9
src/createWorker.js

@ -123,6 +123,14 @@ module.exports = (_options = {}) => { @@ -123,6 +123,14 @@ module.exports = (_options = {}) => {
}))
);
const threshold = async (image, opts = {}, jobId) => (
startJob(createJob({
id: jobId,
action: 'threshold',
payload: { image: await loadImage(image), options: opts },
}))
);
const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => (
startJob(createJob({
id: jobId,
@ -191,6 +199,7 @@ module.exports = (_options = {}) => { @@ -191,6 +199,7 @@ module.exports = (_options = {}) => {
initialize,
setParameters,
recognize,
threshold,
getPDF,
detect,
terminate,

16
src/index.d.ts vendored

@ -22,7 +22,9 @@ declare namespace Tesseract { @@ -22,7 +22,9 @@ declare namespace Tesseract {
loadLanguage(langs?: string, jobId?: string): Promise<ConfigResult>
initialize(langs?: string, oem?: OEM, jobId?: string): Promise<ConfigResult>
setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult>
getImage(type: imageType): string
recognize(image: ImageLike, options?: Partial<RecognizeOptions>, jobId?: string): Promise<RecognizeResult>
threshold(image: ImageLike, options?: Partial<RecognizeOptions>, jobId?: string): Promise<RecognizeResult>
detect(image: ImageLike, jobId?: string): Promise<DetectResult>
terminate(jobId?: string): Promise<ConfigResult>
}
@ -53,6 +55,11 @@ declare namespace Tesseract { @@ -53,6 +55,11 @@ declare namespace Tesseract {
}
interface RecognizeOptions {
rectangle: Rectangle
saveImageOriginal: boolean
saveImageGrey: boolean
saveImageBinary: boolean
rotateAuto: boolean
rotateRadians: float
}
interface ConfigResult {
jobId: string
@ -100,6 +107,11 @@ declare namespace Tesseract { @@ -100,6 +107,11 @@ declare namespace Tesseract {
SPARSE_TEXT = '11',
SPARSE_TEXT_OSD = '12',
}
const enum imageType {
ORIGINAL = 0,
GREY = 1,
BINARY = 2
}
type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement
| CanvasRenderingContext2D | File | Blob | ImageData | Buffer;
interface Block {
@ -213,6 +225,10 @@ declare namespace Tesseract { @@ -213,6 +225,10 @@ declare namespace Tesseract {
box: string | null;
unlv: string | null;
sd: string | null;
imageOriginal: string;
imageGrey: string;
imageBinary: string;
rotateRadians: number;
}
}

136
src/worker-script/index.js

@ -15,6 +15,9 @@ const isWebWorker = require('../utils/getEnvironment')('type') === 'webworker'; @@ -15,6 +15,9 @@ const isWebWorker = require('../utils/getEnvironment')('type') === 'webworker';
const setImage = require('./utils/setImage');
const defaultParams = require('./constants/defaultParams');
const { log, setLogging } = require('../utils/log');
const arrayBufferToBase64 = require('./utils/arrayBufferToBase64');
const imageType = require('../constants/imageType');
const PSM = require('../constants/PSM');
/*
* Tesseract Module returned by TesseractCore.
@ -197,14 +200,140 @@ const initialize = ({ @@ -197,14 +200,140 @@ const initialize = ({
}
};
const recognize = ({ payload: { image, options: { rectangle: rec } } }, res) => {
const getImage = (type) => {
api.WriteImage(type, '/image.png');
const pngBuffer = TessModule.FS.readFile('/image.png');
const pngStr = `data:image/png;base64,${arrayBufferToBase64(pngBuffer.buffer)}`;
TessModule.FS.unlink('/image.png');
return pngStr;
};
const recognize = ({
payload: {
image, options: {
rectangle: rec, saveImageOriginal, saveImageGrey, saveImageBinary, rotateAuto, rotateRadians,
},
},
}, res) => {
try {
const ptr = setImage(TessModule, api, image);
// When the auto-rotate option is True, setImage is called with no angle,
// then the angle is calculated by Tesseract and then setImage is re-called.
// Otherwise, setImage is called once using the user-provided rotateRadiansFinal value.
let ptr;
let rotateRadiansFinal;
if (rotateAuto) {
// The angle is only detected if auto page segmentation is used
// Therefore, if this is not the mode specified by the user, it is enabled temporarily here
const psmInit = api.GetPageSegMode();
let psmEdit = false;
if (![PSM.AUTO, PSM.AUTO_ONLY, PSM.OSD].includes(psmInit)) {
psmEdit = true;
api.SetVariable('tessedit_pageseg_mode', String(PSM.AUTO));
}
ptr = setImage(TessModule, api, image);
api.FindLines();
const rotateRadiansCalc = api.GetAngle();
// Restore user-provided PSM setting
if (psmEdit) {
api.SetVariable('tessedit_pageseg_mode', String(psmInit));
}
// Small angles (<0.005 radians/~0.3 degrees) are ignored to save on runtime
if (Math.abs(rotateRadiansCalc) >= 0.005) {
rotateRadiansFinal = rotateRadiansCalc;
ptr = setImage(TessModule, api, image, rotateRadiansFinal);
} else {
// Image needs to be reset if run with different PSM setting earlier
if (psmEdit) {
ptr = setImage(TessModule, api, image);
}
rotateRadiansFinal = 0;
}
} else {
rotateRadiansFinal = rotateRadians || 0;
ptr = setImage(TessModule, api, image, rotateRadiansFinal);
}
if (typeof rec === 'object') {
api.SetRectangle(rec.left, rec.top, rec.width, rec.height);
}
api.Recognize(null);
res.resolve(dump(TessModule, api, params));
const result = dump(TessModule, api, params);
if (saveImageOriginal) {
result.imageOriginal = getImage(imageType.ORIGINAL);
}
if (saveImageGrey) {
result.imageGrey = getImage(imageType.GREY);
}
if (saveImageBinary) {
result.imageBinary = getImage(imageType.BINARY);
}
result.rotateRadians = rotateRadiansFinal;
res.resolve(result);
TessModule._free(ptr);
} catch (err) {
res.reject(err.toString());
}
};
// `threshold` is similar to `recognize` except it skips the recognition step
// Useful for getting rotated/binarized images without running recognition
const threshold = ({
payload: {
image, options: {
rectangle: rec, saveImageOriginal, saveImageGrey, saveImageBinary, rotateAuto, rotateRadians,
},
},
}, res) => {
try {
let ptr;
let rotateRadiansFinal;
if (rotateAuto) {
const psmInit = api.GetPageSegMode();
let psmEdit = false;
if (![PSM.AUTO, PSM.AUTO_ONLY, PSM.OSD].includes(psmInit)) {
psmEdit = true;
api.SetVariable('tessedit_pageseg_mode', String(PSM.AUTO));
}
ptr = setImage(TessModule, api, image);
api.FindLines();
const rotateRadiansCalc = api.GetAngle();
// Restore user-provided PSM setting
if (psmEdit) {
api.SetVariable('tessedit_pageseg_mode', String(psmInit));
}
// Small angles (<0.005 radians/~0.3 degrees) are ignored to save on runtime
if (Math.abs(rotateRadiansCalc) >= 0.005) {
rotateRadiansFinal = rotateRadiansCalc;
ptr = setImage(TessModule, api, image, rotateRadiansFinal);
} else {
rotateRadiansFinal = 0;
}
} else {
rotateRadiansFinal = rotateRadians || 0;
ptr = setImage(TessModule, api, image, rotateRadiansFinal);
}
if (typeof rec === 'object') {
api.SetRectangle(rec.left, rec.top, rec.width, rec.height);
}
const result = {};
if (saveImageOriginal) {
result.imageOriginal = getImage(imageType.ORIGINAL);
}
if (saveImageGrey) {
result.imageGrey = getImage(imageType.GREY);
}
if (saveImageBinary) {
result.imageBinary = getImage(imageType.BINARY);
}
result.rotateRadians = rotateRadiansFinal;
res.resolve(result);
TessModule._free(ptr);
} catch (err) {
res.reject(err.toString());
@ -295,6 +424,7 @@ exports.dispatchHandlers = (packet, send) => { @@ -295,6 +424,7 @@ exports.dispatchHandlers = (packet, send) => {
initialize,
setParameters,
recognize,
threshold,
getPDF,
detect,
terminate,

56
src/worker-script/utils/arrayBufferToBase64.js

@ -0,0 +1,56 @@ @@ -0,0 +1,56 @@
// Copied from https://gist.github.com/jonleighton/958841
// Copyright 2011 Jon Leighton, MIT LICENSE
/* eslint no-bitwise: 0 */
module.exports = (arrayBuffer) => {
let base64 = '';
const encodings = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
const bytes = new Uint8Array(arrayBuffer);
const { byteLength } = bytes;
const byteRemainder = byteLength % 3;
const mainLength = byteLength - byteRemainder;
let a; let b; let c; let
d;
let chunk;
// Main loop deals with bytes in chunks of 3
for (let i = 0; i < mainLength; i += 3) {
// Combine the three bytes into a single integer
chunk = (bytes[i] << 16) | (bytes[i + 1] << 8) | bytes[i + 2];
// Use bitmasks to extract 6-bit segments from the triplet
a = (chunk & 16515072) >> 18; // 16515072 = (2^6 - 1) << 18
b = (chunk & 258048) >> 12; // 258048 = (2^6 - 1) << 12
c = (chunk & 4032) >> 6; // 4032 = (2^6 - 1) << 6
d = chunk & 63; // 63 = 2^6 - 1
// Convert the raw binary segments to the appropriate ASCII encoding
base64 += encodings[a] + encodings[b] + encodings[c] + encodings[d];
}
// Deal with the remaining bytes and padding
if (byteRemainder === 1) {
chunk = bytes[mainLength];
a = (chunk & 252) >> 2; // 252 = (2^6 - 1) << 2
// Set the 4 least significant bits to zero
b = (chunk & 3) << 4; // 3 = 2^2 - 1
base64 += `${encodings[a] + encodings[b]}==`;
} else if (byteRemainder === 2) {
chunk = (bytes[mainLength] << 8) | bytes[mainLength + 1];
a = (chunk & 64512) >> 10; // 64512 = (2^6 - 1) << 10
b = (chunk & 1008) >> 4; // 1008 = (2^6 - 1) << 4
// Set the 2 least significant bits to zero
c = (chunk & 15) << 2; // 15 = 2^4 - 1
base64 += `${encodings[a] + encodings[b] + encodings[c]}=`;
}
return base64;
};

6
src/worker-script/utils/setImage.js

@ -8,7 +8,7 @@ const fileType = require('file-type'); @@ -8,7 +8,7 @@ const fileType = require('file-type');
* @function set image in tesseract for recognition
* @access public
*/
module.exports = (TessModule, api, image) => {
module.exports = (TessModule, api, image, angle = 0) => {
const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length }));
const type = fileType(buf);
let bytesPerPixel = 0;
@ -55,9 +55,9 @@ module.exports = (TessModule, api, image) => { @@ -55,9 +55,9 @@ module.exports = (TessModule, api, image) => {
*
*/
if (data === null) {
api.SetImage(pix, undefined, undefined, undefined, undefined, exif);
api.SetImage(pix, undefined, undefined, undefined, undefined, exif, angle);
} else {
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif);
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif, angle);
}
return data === null ? pix : data;
};

Loading…
Cancel
Save