Pure Javascript OCR for more than 100 Languages 📖🎉🖥
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

226 lines
5.5 KiB

declare namespace Tesseract {
function createScheduler(): Scheduler
function createWorker(options?: Partial<WorkerOptions>): Worker
function setLogging(logging: boolean): void
function recognize(image: ImageLike, langs?: string, options?: Partial<WorkerOptions>): Promise<RecognizeResult>
function detect(image: ImageLike, options?: Partial<WorkerOptions>): any
interface Scheduler {
addWorker(worker: Worker): string
addJob(action: string, ...args: any[]): Promise<ConfigResult | RecognizeResult | DetectResult>
terminate(): Promise<any>
getQueueLen(): number
getNumWorkers(): number
}
interface Worker {
load(jobId?: string): Promise<ConfigResult>
writeText(path: string, text: string, jobId?: string): Promise<ConfigResult>
readText(path: string, jobId?: string): Promise<ConfigResult>
removeText(path: string, jobId?: string): Promise<ConfigResult>
FS(method: string, args: any[], jobId?: string): Promise<ConfigResult>
loadLanguage(langs?: string, jobId?: string): Promise<ConfigResult>
initialize(langs?: string, oem?: OEM, jobId?: string): Promise<ConfigResult>
setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult>
recognize(image: ImageLike, options?: Partial<RecognizeOptions>, jobId?: string): Promise<RecognizeResult>
detect(image: ImageLike, jobId?: string): Promise<DetectResult>
terminate(jobId?: string): Promise<ConfigResult>
getPDF(title?: string, textonly?: boolean, jobId?: string):Promise<GetPDFResult>
}
interface WorkerOptions {
corePath: string
langPath: string
cachePath: string
dataPath: string
workerPath: string
cacheMethod: string
workerBlobURL: boolean
gzip: boolean
logger: (arg: any) => void,
errorHandler: (arg: any) => void
}
interface WorkerParams {
tessedit_ocr_engine_mode: OEM
tessedit_pageseg_mode: PSM
tessedit_char_whitelist: string
preserve_interword_spaces: string
user_defined_dpi: string
tessjs_create_hocr: string
tessjs_create_tsv: string
tessjs_create_box: string
tessjs_create_unlv: string
tessjs_create_osd: string
}
interface RecognizeOptions {
rectangle: Rectangle
}
interface ConfigResult {
jobId: string
data: any
}
interface RecognizeResult {
jobId: string
data: Page
}
interface GetPDFResult {
jobId: string
data: number[]
}
interface DetectResult {
jobId: string
data: DetectData
}
interface DetectData {
tesseract_script_id: number
script: string
script_confidence: number
orientation_degrees: number
orientation_confidence: number
}
interface Rectangle {
left: number
top: number
width: number
height: number
}
const enum OEM {
TESSERACT_ONLY,
LSTM_ONLY,
TESSERACT_LSTM_COMBINED,
DEFAULT,
}
const enum PSM {
OSD_ONLY = '0',
AUTO_OSD = '1',
AUTO_ONLY = '2',
AUTO = '3',
SINGLE_COLUMN = '4',
SINGLE_BLOCK_VERT_TEXT = '5',
SINGLE_BLOCK = '6',
SINGLE_LINE = '7',
SINGLE_WORD = '8',
CIRCLE_WORD = '9',
SINGLE_CHAR = '10',
SPARSE_TEXT = '11',
SPARSE_TEXT_OSD = '12',
RAW_LINE = '13'
}
type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement
| CanvasRenderingContext2D | File | Blob | ImageData | Buffer;
interface Block {
paragraphs: Paragraph[];
text: string;
confidence: number;
baseline: Baseline;
bbox: Bbox;
blocktype: string;
polygon: any;
page: Page;
lines: Line[];
words: Word[];
symbols: Symbol[];
}
interface Baseline {
x0: number;
y0: number;
x1: number;
y1: number;
has_baseline: boolean;
}
interface Bbox {
x0: number;
y0: number;
x1: number;
y1: number;
}
interface Line {
words: Word[];
text: string;
confidence: number;
baseline: Baseline;
bbox: Bbox;
paragraph: Paragraph;
block: Block;
page: Page;
symbols: Symbol[];
}
interface Paragraph {
lines: Line[];
text: string;
confidence: number;
baseline: Baseline;
bbox: Bbox;
is_ltr: boolean;
block: Block;
page: Page;
words: Word[];
symbols: Symbol[];
}
interface Symbol {
choices: Choice[];
image: any;
text: string;
confidence: number;
baseline: Baseline;
bbox: Bbox;
is_superscript: boolean;
is_subscript: boolean;
is_dropcap: boolean;
word: Word;
line: Line;
paragraph: Paragraph;
block: Block;
page: Page;
}
interface Choice {
text: string;
confidence: number;
}
interface Word {
symbols: Symbol[];
choices: Choice[];
text: string;
confidence: number;
baseline: Baseline;
bbox: Bbox;
is_numeric: boolean;
in_dictionary: boolean;
direction: string;
language: string;
is_bold: boolean;
is_italic: boolean;
is_underlined: boolean;
is_monospace: boolean;
is_serif: boolean;
is_smallcaps: boolean;
font_size: number;
font_id: number;
font_name: string;
line: Line;
paragraph: Paragraph;
block: Block;
page: Page;
}
interface Page {
blocks: Block[];
confidence: number;
lines: Line[];
oem: string;
osd: string;
paragraphs: Paragraph[];
psm: string;
symbols: Symbol[];
text: string;
version: string;
words: Word[];
hocr: string | null;
tsv: string | null;
box: string | null;
unlv: string | null;
sd: string | null;
}
}
export = Tesseract;
export as namespace Tesseract;