@ -9,7 +9,9 @@
* /
* /
const { readImage , loadLang } = require ( 'tesseract.js-utils' ) ;
const { readImage , loadLang } = require ( 'tesseract.js-utils' ) ;
const check = require ( 'check-types' ) ;
const check = require ( 'check-types' ) ;
const pdfTTF = require ( './pdf-ttf' ) ;
const dump = require ( './dump' ) ;
const dump = require ( './dump' ) ;
const { defaultParams } = require ( './options' ) ;
/ *
/ *
* Tesseract Module returned by TesseractCore .
* Tesseract Module returned by TesseractCore .
@ -51,6 +53,58 @@ const setImage = (image) => {
return data === null ? pix : data ;
return data === null ? pix : data ;
} ;
} ;
/ * *
* handleParams
*
* @ name handleParams
* @ function hanlde params from users
* @ access private
* @ param { string } lang - lang string for Init ( )
* @ param { object } customParams - an object of params
* /
const handleParams = ( lang , customParams ) => {
const {
tessedit _ocr _engine _mode ,
... params
} = {
... defaultParams ,
... customParams ,
} ;
api . Init ( null , lang , tessedit _ocr _engine _mode ) ;
Object . keys ( params ) . forEach ( ( key ) => {
api . SetVariable ( key , params [ key ] ) ;
} ) ;
} ;
/ * *
* handleOutput
*
* @ name handleOutput
* @ function handle file output
* @ access private
* @ param { object } customParams - an object of params
* /
const handleOutput = ( customParams ) => {
const {
tessedit _create _pdf ,
textonly _pdf ,
pdf _name ,
pdf _title ,
} = {
... defaultParams ,
... customParams ,
} ;
if ( tessedit _create _pdf === '1' ) {
const pdfRenderer = new TessModule . TessPDFRenderer ( pdf _name , '/' , textonly _pdf === '1' ) ;
pdfRenderer . BeginDocument ( pdf _title ) ;
pdfRenderer . AddImage ( api ) ;
pdfRenderer . EndDocument ( ) ;
adapter . writeFile ( ` ${ pdf _name } .pdf ` , TessModule . FS . readFile ( ` / ${ pdf _name } .pdf ` ) , 'application/pdf' ) ;
TessModule . _free ( pdfRenderer ) ;
}
}
/ * *
/ * *
* handleInit
* handleInit
*
*
@ -75,6 +129,7 @@ const handleInit = ({ corePath }, res) => {
} )
} )
. then ( ( tessModule ) => {
. then ( ( tessModule ) => {
TessModule = tessModule ;
TessModule = tessModule ;
TessModule . FS . writeFile ( '/pdf.ttf' , adapter . b64toU8Array ( pdfTTF ) ) ;
api = new TessModule . TessBaseAPI ( ) ;
api = new TessModule . TessBaseAPI ( ) ;
res . progress ( { status : 'initialized tesseract' , progress : 1 } ) ;
res . progress ( { status : 'initialized tesseract' , progress : 1 } ) ;
} ) ;
} ) ;
@ -123,22 +178,16 @@ const handleRecognize = ({
. then ( ( ) => (
. then ( ( ) => (
loadLanguage ( { lang , options } , res )
loadLanguage ( { lang , options } , res )
. then ( ( ) => {
. then ( ( ) => {
const OEM = check . undefined ( params [ 'init_oem' ] )
? TessModule . OEM _DEFAULT
: params [ 'init_oem' ] ;
const progressUpdate = ( progress ) => {
const progressUpdate = ( progress ) => {
res . progress ( { status : 'initializing api' , progress } ) ;
res . progress ( { status : 'initializing api' , progress } ) ;
} ;
} ;
progressUpdate ( 0 ) ;
progressUpdate ( 0 ) ;
api . Init ( null , lang , OEM ) ;
handleParams ( lang , params ) ;
progressUpdate ( 0.3 ) ;
progressUpdate ( 0.5 ) ;
Object . keys ( params ) . filter ( key => ! key . startsWith ( 'init_' ) ) . forEach ( ( key ) => {
api . SetVariable ( key , params [ key ] ) ;
} ) ;
progressUpdate ( 0.6 ) ;
const ptr = setImage ( image ) ;
const ptr = setImage ( image ) ;
progressUpdate ( 1 ) ;
progressUpdate ( 1 ) ;
api . Recognize ( null ) ;
api . Recognize ( null ) ;
handleOutput ( params ) ;
const result = dump ( TessModule , api ) ;
const result = dump ( TessModule , api ) ;
api . End ( ) ;
api . End ( ) ;
TessModule . _free ( ptr ) ;
TessModule . _free ( ptr ) ;