Browse Source

Fix lint error

pull/265/head
Jerome Wu 6 years ago
parent
commit
af05a27d35
  1. 116
      src/common/circularize.js
  2. 50
      src/common/desaturate.js
  3. 310
      src/common/dump.js
  4. 139
      src/common/job.js

116
src/common/circularize.js

@ -5,59 +5,75 @@ @@ -5,59 +5,75 @@
// a (circular) DOM-like interface for walking
// through the data.
module.exports = function circularize(page){
page.paragraphs = []
page.lines = []
page.words = []
page.symbols = []
module.exports = (iPage) => {
const page = {
...iPage,
paragraphs: [],
lines: [],
words: [],
symbols: [],
};
page.blocks.forEach(function(block){
block.page = page;
page.blocks.forEach((iBlock) => {
const block = {
...iBlock,
page,
lines: [],
words: [],
symbols: [],
};
block.lines = []
block.words = []
block.symbols = []
block.paragraphs.forEach((iPara) => {
const para = {
...iPara,
block,
page,
words: [],
symbols: [],
};
block.paragraphs.forEach(function(para){
para.block = block;
para.page = page;
para.lines.forEach((iLine) => {
const line = {
...iLine,
paragraph: para,
block,
page,
symbols: [],
};
para.words = []
para.symbols = []
line.words.forEach((iWord) => {
const word = {
...iWord,
line,
paragraph: para,
block,
page,
};
para.lines.forEach(function(line){
line.paragraph = para;
line.block = block;
line.page = page;
word.symbols.forEach((iSym) => {
const sym = {
...iSym,
word,
line,
paragraph: para,
block,
page,
};
line.symbols = []
line.words.forEach(function(word){
word.line = line;
word.paragraph = para;
word.block = block;
word.page = page;
word.symbols.forEach(function(sym){
sym.word = word;
sym.line = line;
sym.paragraph = para;
sym.block = block;
sym.page = page;
sym.line.symbols.push(sym)
sym.paragraph.symbols.push(sym)
sym.block.symbols.push(sym)
sym.page.symbols.push(sym)
})
word.paragraph.words.push(word)
word.block.words.push(word)
word.page.words.push(word)
})
line.block.lines.push(line)
line.page.lines.push(line)
})
para.page.paragraphs.push(para)
})
})
return page
}
sym.line.symbols.push(sym);
sym.paragraph.symbols.push(sym);
sym.block.symbols.push(sym);
sym.page.symbols.push(sym);
});
word.paragraph.words.push(word);
word.block.words.push(word);
word.page.words.push(word);
});
line.block.lines.push(line);
line.page.lines.push(line);
});
para.page.paragraphs.push(para);
});
});
return page;
};

50
src/common/desaturate.js

@ -1,24 +1,30 @@ @@ -1,24 +1,30 @@
// This converts an image to grayscale
/* eslint-disable no-bitwise */
/* eslint-disable max-len */
module.exports = function desaturate(image){
var width, height;
if(image.data){
var src = image.data;
width = image.width,
height = image.height;
var dst = new Uint8Array(width * height);
var srcLength = src.length | 0, srcLength_16 = (srcLength - 16) | 0;
// This converts an image to grayscale
module.exports = (image) => {
if (image.data) {
const src = image.data;
const { width, height } = image;
const dst = new Uint8Array(width * height);
const srcLength = src.length | 0;
const srcLength16 = (srcLength - 16) | 0;
let i = 0;
let j = 0;
for (var i = 0, j = 0; i <= srcLength_16; i += 16, j += 4) {
// convert to grayscale 4 pixels at a time; eveything with alpha gets put in front of 50% gray
dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16
dst[j+1] = (((src[i+4] * 77 + src[i+5] * 151 + src[i+6] * 28) * src[i+7]) + ((255-src[i+7]) << 15) + 32768) >> 16
dst[j+2] = (((src[i+8] * 77 + src[i+9] * 151 + src[i+10] * 28) * src[i+11]) + ((255-src[i+11]) << 15) + 32768) >> 16
dst[j+3] = (((src[i+12] * 77 + src[i+13] * 151 + src[i+14] * 28) * src[i+15]) + ((255-src[i+15]) << 15) + 32768) >> 16
}
for (; i < srcLength; i += 4, ++j) //finish up
dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16
image = dst;
} else { throw 'Invalid ImageData' }
return image
}
for (; i <= srcLength16; i += 16, j += 4) {
// convert to grayscale 4 pixels at a time; eveything with alpha gets put in front of 50% gray
dst[j] = (((src[i] * 77 + src[i + 1] * 151 + src[i + 2] * 28) * src[i + 3]) + ((255 - src[i + 3]) << 15) + 32768) >> 16;
dst[j + 1] = (((src[i + 4] * 77 + src[i + 5] * 151 + src[i + 6] * 28) * src[i + 7]) + ((255 - src[i + 7]) << 15) + 32768) >> 16;
dst[j + 2] = (((src[i + 8] * 77 + src[i + 9] * 151 + src[i + 10] * 28) * src[i + 11]) + ((255 - src[i + 11]) << 15) + 32768) >> 16;
dst[j + 3] = (((src[i + 12] * 77 + src[i + 13] * 151 + src[i + 14] * 28) * src[i + 15]) + ((255 - src[i + 15]) << 15) + 32768) >> 16;
}
// finish up
for (; i < srcLength; i += 4, j += 1) {
dst[j] = (((src[i] * 77 + src[i + 1] * 151 + src[i + 2] * 28) * src[i + 3]) + ((255 - src[i + 3]) << 15) + 32768) >> 16;
}
return dst;
}
return null;
// throw { err: 'Invalid ImageData' };
};

310
src/common/dump.js

@ -1,164 +1,158 @@ @@ -1,164 +1,158 @@
module.exports = function DumpLiterallyEverything(Module, base){
var ri = base.GetIterator();
var blocks = [];
var block, para, textline, word, symbol;
// the generated HOCR is excessively indented, so
// we get rid of that indentation
function enumToString(value, prefix){
return (Object.keys(Module)
.filter(function(e){ return e.substr(0, prefix.length + 1) == prefix + '_' })
.filter(function(e){ return Module[e] === value })
.map(function(e){ return e.slice(prefix.length + 1) })[0])
const deindent = (html) => {
const lines = html.split('\n');
if (lines[0].substring(0, 2) === ' ') {
for (let i = 0; i < lines.length; i += 1) {
if (lines[i].substring(0, 2) === ' ') {
lines[i] = lines[i].slice(2);
}
}
ri.Begin()
do {
if(ri.IsAtBeginningOf(Module.RIL_BLOCK)){
var poly = ri.BlockPolygon();
var polygon = null;
// BlockPolygon() returns null when automatic page segmentation is off
if(Module.getPointer(poly) > 0){
var n = poly.get_n(),
px = poly.get_x(),
py = poly.get_y(),
polygon = [];
for(var i = 0; i < n; i++){
polygon.push([px.getValue(i), py.getValue(i)]);
}
Module._ptaDestroy(Module.getPointer(poly));
}
block = {
paragraphs: [],
text: ri.GetUTF8Text(Module.RIL_BLOCK),
confidence: ri.Confidence(Module.RIL_BLOCK),
baseline: ri.getBaseline(Module.RIL_BLOCK),
bbox: ri.getBoundingBox(Module.RIL_BLOCK),
blocktype: enumToString(ri.BlockType(), 'PT'),
polygon: polygon
}
blocks.push(block)
}
if(ri.IsAtBeginningOf(Module.RIL_PARA)){
para = {
lines: [],
text: ri.GetUTF8Text(Module.RIL_PARA),
confidence: ri.Confidence(Module.RIL_PARA),
baseline: ri.getBaseline(Module.RIL_PARA),
bbox: ri.getBoundingBox(Module.RIL_PARA),
is_ltr: !!ri.ParagraphIsLtr()
}
block.paragraphs.push(para)
}
if(ri.IsAtBeginningOf(Module.RIL_TEXTLINE)){
textline = {
words: [],
text: ri.GetUTF8Text(Module.RIL_TEXTLINE),
confidence: ri.Confidence(Module.RIL_TEXTLINE),
baseline: ri.getBaseline(Module.RIL_TEXTLINE),
bbox: ri.getBoundingBox(Module.RIL_TEXTLINE)
}
para.lines.push(textline)
}
if(ri.IsAtBeginningOf(Module.RIL_WORD)){
var fontInfo = ri.getWordFontAttributes(),
wordDir = ri.WordDirection();
word = {
symbols: [],
choices: [],
text: ri.GetUTF8Text(Module.RIL_WORD),
confidence: ri.Confidence(Module.RIL_WORD),
baseline: ri.getBaseline(Module.RIL_WORD),
bbox: ri.getBoundingBox(Module.RIL_WORD),
is_numeric: !!ri.WordIsNumeric(),
in_dictionary: !!ri.WordIsFromDictionary(),
direction: enumToString(wordDir, 'DIR'),
language: ri.WordRecognitionLanguage(),
is_bold: fontInfo.is_bold,
is_italic: fontInfo.is_italic,
is_underlined: fontInfo.is_underlined,
is_monospace: fontInfo.is_monospace,
is_serif: fontInfo.is_serif,
is_smallcaps: fontInfo.is_smallcaps,
font_size: fontInfo.pointsize,
font_id: fontInfo.font_id,
font_name: fontInfo.font_name,
}
var wc = new Module.WordChoiceIterator(ri);
do {
word.choices.push({
text: wc.GetUTF8Text(),
confidence: wc.Confidence()
})
} while (wc.Next());
Module.destroy(wc)
textline.words.push(word)
}
return lines.join('\n');
};
module.exports = (Module, base) => {
const ri = base.GetIterator();
const blocks = [];
let block;
let para;
let textline;
let word;
let symbol;
const enumToString = (value, prefix) => (
Object.keys(Module)
.filter(e => (e.substr(0, prefix.length + 1) === `${prefix}_`))
.filter(e => Module[e] === value)
.map(e => e.slice(prefix.length + 1))[0]
);
ri.Begin();
do {
if (ri.IsAtBeginningOf(Module.RIL_BLOCK)) {
const poly = ri.BlockPolygon();
let polygon = null;
// BlockPolygon() returns null when automatic page segmentation is off
if (Module.getPointer(poly) > 0) {
const n = poly.get_n();
const px = poly.get_x();
const py = poly.get_y();
polygon = [];
for (let i = 0; i < n; i += 1) {
polygon.push([px.getValue(i), py.getValue(i)]);
}
var image = null;
// var pix = ri.GetBinaryImage(Module.RIL_SYMBOL)
// var image = pix2array(pix);
// // for some reason it seems that things stop working if you destroy pics
// Module._pixDestroy(Module.getPointer(pix));
if(ri.IsAtBeginningOf(Module.RIL_SYMBOL)){
symbol = {
choices: [],
image: image,
text: ri.GetUTF8Text(Module.RIL_SYMBOL),
confidence: ri.Confidence(Module.RIL_SYMBOL),
baseline: ri.getBaseline(Module.RIL_SYMBOL),
bbox: ri.getBoundingBox(Module.RIL_SYMBOL),
is_superscript: !!ri.SymbolIsSuperscript(),
is_subscript: !!ri.SymbolIsSubscript(),
is_dropcap: !!ri.SymbolIsDropcap(),
}
word.symbols.push(symbol)
var ci = new Module.ChoiceIterator(ri);
do {
symbol.choices.push({
text: ci.GetUTF8Text(),
confidence: ci.Confidence()
})
} while (ci.Next());
Module.destroy(ci)
}
} while (ri.Next(Module.RIL_SYMBOL));
Module.destroy(ri)
return {
text: base.GetUTF8Text(),
html: deindent(base.GetHOCRText()),
confidence: base.MeanTextConf(),
blocks: blocks,
psm: enumToString(base.GetPageSegMode(), 'PSM'),
oem: enumToString(base.oem(), 'OEM'),
version: base.Version(),
Module._ptaDestroy(Module.getPointer(poly));
}
block = {
paragraphs: [],
text: ri.GetUTF8Text(Module.RIL_BLOCK),
confidence: ri.Confidence(Module.RIL_BLOCK),
baseline: ri.getBaseline(Module.RIL_BLOCK),
bbox: ri.getBoundingBox(Module.RIL_BLOCK),
blocktype: enumToString(ri.BlockType(), 'PT'),
polygon,
};
blocks.push(block);
}
if (ri.IsAtBeginningOf(Module.RIL_PARA)) {
para = {
lines: [],
text: ri.GetUTF8Text(Module.RIL_PARA),
confidence: ri.Confidence(Module.RIL_PARA),
baseline: ri.getBaseline(Module.RIL_PARA),
bbox: ri.getBoundingBox(Module.RIL_PARA),
is_ltr: !!ri.ParagraphIsLtr(),
};
block.paragraphs.push(para);
}
if (ri.IsAtBeginningOf(Module.RIL_TEXTLINE)) {
textline = {
words: [],
text: ri.GetUTF8Text(Module.RIL_TEXTLINE),
confidence: ri.Confidence(Module.RIL_TEXTLINE),
baseline: ri.getBaseline(Module.RIL_TEXTLINE),
bbox: ri.getBoundingBox(Module.RIL_TEXTLINE),
};
para.lines.push(textline);
}
if (ri.IsAtBeginningOf(Module.RIL_WORD)) {
const fontInfo = ri.getWordFontAttributes();
const wordDir = ri.WordDirection();
word = {
symbols: [],
choices: [],
text: ri.GetUTF8Text(Module.RIL_WORD),
confidence: ri.Confidence(Module.RIL_WORD),
baseline: ri.getBaseline(Module.RIL_WORD),
bbox: ri.getBoundingBox(Module.RIL_WORD),
is_numeric: !!ri.WordIsNumeric(),
in_dictionary: !!ri.WordIsFromDictionary(),
direction: enumToString(wordDir, 'DIR'),
language: ri.WordRecognitionLanguage(),
is_bold: fontInfo.is_bold,
is_italic: fontInfo.is_italic,
is_underlined: fontInfo.is_underlined,
is_monospace: fontInfo.is_monospace,
is_serif: fontInfo.is_serif,
is_smallcaps: fontInfo.is_smallcaps,
font_size: fontInfo.pointsize,
font_id: fontInfo.font_id,
font_name: fontInfo.font_name,
};
const wc = new Module.WordChoiceIterator(ri);
do {
word.choices.push({
text: wc.GetUTF8Text(),
confidence: wc.Confidence(),
});
} while (wc.Next());
Module.destroy(wc);
textline.words.push(word);
}
}
// the generated HOCR is excessively indented, so
// we get rid of that indentation
function deindent(html){
var lines = html.split('\n')
if(lines[0].substring(0, 2) === " "){
for (var i = 0; i < lines.length; i++) {
if (lines[i].substring(0,2) === " ") {
lines[i] = lines[i].slice(2)
}
};
// let image = null;
// var pix = ri.GetBinaryImage(Module.RIL_SYMBOL)
// var image = pix2array(pix);
// // for some reason it seems that things stop working if you destroy pics
// Module._pixDestroy(Module.getPointer(pix));
if (ri.IsAtBeginningOf(Module.RIL_SYMBOL)) {
symbol = {
choices: [],
image: null,
text: ri.GetUTF8Text(Module.RIL_SYMBOL),
confidence: ri.Confidence(Module.RIL_SYMBOL),
baseline: ri.getBaseline(Module.RIL_SYMBOL),
bbox: ri.getBoundingBox(Module.RIL_SYMBOL),
is_superscript: !!ri.SymbolIsSuperscript(),
is_subscript: !!ri.SymbolIsSubscript(),
is_dropcap: !!ri.SymbolIsDropcap(),
};
word.symbols.push(symbol);
const ci = new Module.ChoiceIterator(ri);
do {
symbol.choices.push({
text: ci.GetUTF8Text(),
confidence: ci.Confidence(),
});
} while (ci.Next());
// Module.destroy(i);
}
return lines.join('\n')
}
} while (ri.Next(Module.RIL_SYMBOL));
Module.destroy(ri);
return {
text: base.GetUTF8Text(),
html: deindent(base.GetHOCRText()),
confidence: base.MeanTextConf(),
blocks,
psm: enumToString(base.GetPageSegMode(), 'PSM'),
oem: enumToString(base.oem(), 'OEM'),
version: base.Version(),
};
};

139
src/common/job.js

@ -1,81 +1,86 @@ @@ -1,81 +1,86 @@
const adapter = require('../node/index.js')
const adapter = require('../node/');
let jobCounter = 0;
module.exports = class TesseractJob {
constructor(instance){
this.id = 'Job-' + (++jobCounter) + '-' + Math.random().toString(16).slice(3, 8)
constructor(instance) {
jobCounter += 1;
this.id = `Job-${jobCounter}-${Math.random().toString(16).slice(3, 8)}`;
this._instance = instance;
this._resolve = []
this._reject = []
this._progress = []
this._finally = []
this._instance = instance;
this._resolve = [];
this._reject = [];
this._progress = [];
this._finally = [];
}
then(resolve, reject) {
if (this._resolve.push) {
this._resolve.push(resolve);
} else {
resolve(this._resolve);
}
then(resolve, reject){
if(this._resolve.push){
this._resolve.push(resolve)
}else{
resolve(this._resolve)
}
if (reject) this.catch(reject);
return this;
}
if(reject) this.catch(reject);
return this;
}
catch(reject){
if(this._reject.push){
this._reject.push(reject)
}else{
reject(this._reject)
}
return this;
}
progress(fn){
this._progress.push(fn)
return this;
}
finally(fn) {
this._finally.push(fn)
return this;
}
_send(action, payload){
adapter.sendPacket(this._instance, {
jobId: this.id,
action: action,
payload: payload
})
catch(reject) {
if (this._reject.push) {
this._reject.push(reject);
} else {
reject(this._reject);
}
return this;
}
_handle(packet){
var data = packet.data;
let runFinallyCbs = false;
progress(fn) {
this._progress.push(fn);
return this;
}
if(packet.status === 'resolve'){
if(this._resolve.length === 0) console.log(data);
this._resolve.forEach(fn => {
var ret = fn(data);
if(ret && typeof ret.then == 'function'){
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.')
}
})
this._resolve = data;
this._instance._dequeue()
runFinallyCbs = true;
}else if(packet.status === 'reject'){
if(this._reject.length === 0) console.error(data);
this._reject.forEach(fn => fn(data))
this._reject = data;
this._instance._dequeue()
runFinallyCbs = true;
}else if(packet.status === 'progress'){
this._progress.forEach(fn => fn(data))
}else{
console.warn('Message type unknown', packet.status)
}
finally(fn) {
this._finally.push(fn);
return this;
}
_send(action, payload) {
adapter.sendPacket(this._instance, {
jobId: this.id,
action,
payload,
});
}
if (runFinallyCbs) {
this._finally.forEach(fn => fn(data));
_handle(packet) {
const { data } = packet;
let runFinallyCbs = false;
if (packet.status === 'resolve') {
if (this._resolve.length === 0) console.log(data);
this._resolve.forEach((fn) => {
const ret = fn(data);
if (ret && typeof ret.then === 'function') {
console.warn('TesseractJob instances do not chain like ES6 Promises. To convert it into a real promise, use Promise.resolve.');
}
});
this._resolve = data;
this._instance._dequeue();
runFinallyCbs = true;
} else if (packet.status === 'reject') {
if (this._reject.length === 0) console.error(data);
this._reject.forEach(fn => fn(data));
this._reject = data;
this._instance._dequeue();
runFinallyCbs = true;
} else if (packet.status === 'progress') {
this._progress.forEach(fn => fn(data));
} else {
console.warn('Message type unknown', packet.status);
}
if (runFinallyCbs) {
this._finally.forEach(fn => fn(data));
}
}
}
};

Loading…
Cancel
Save