'use strict' ;
// const leveljs = require('level-js')
// something about trying to store these language files in indexedDB
// causes iOS Safari to crash
var iOS = /iPad|iPhone|iPod/ . test ( navigator . userAgent ) ;
var noIDB = typeof indexedDB === 'undefined' || iOS ;
var db = noIDB ? { open : function open ( _ , cb ) {
return cb ( true ) ;
} } : leveljs ( './tessdata2' ) ;
var langdata = require ( '../common/langdata.json' ) ;
module . exports = function getLanguageData ( req , res , cb ) {
var lang = req . options . lang ;
function saveDataFile ( data ) {
try {
db . put ( lang , data , function ( err ) {
return console . log ( 'cached' , lang , err ) ;
} ) ;
} finally {
cb ( data ) ;
db . open ( { compression : false } , function ( err ) {
if ( err ) return fetchLanguageData ( req , res , cb ) ;
db . get ( lang , function ( err , data ) {
if ( err ) return fetchLanguageData ( req , res , saveDataFile ) ;
res . progress ( { status : 'found in cache ' + lang + '.traineddata' } ) ;
cb ( data ) ;
} ) ;
} ) ;
} ;
var ungzip = require ( 'pako/lib/inflate.js' ) . ungzip ;
function fetchLanguageData ( req , res , cb ) {
var lang = req . options . lang ;
var langfile = lang + '.traineddata.gz' ;
var url = req . workerOptions . langPath + langfile ;
var xhr = new XMLHttpRequest ( ) ;
xhr . open ( 'GET' , url , true ) ;
xhr . responseType = 'arraybuffer' ;
xhr . onerror = function ( e ) {
xhr . onprogress = xhr . onload = null ;
cb ( xhr , null ) ;
} ;
xhr . onprogress = function ( e ) {
return res . progress ( {
status : 'downloading ' + langfile ,
loaded : e . loaded ,
progress : Math . min ( 1 , e . loaded / langdata [ lang ] )
} ) ;
} ;
xhr . onload = function ( e ) {
if ( ! ( xhr . status == 200 || xhr . status == 0 && xhr . response ) ) return res . reject ( 'Error downloading language ' + url ) ;
res . progress ( { status : 'unzipping ' + langfile , progress : 0 } ) ;
// in case the gzips are already ungzipped or extra gzipped
var response = new Uint8Array ( xhr . response ) ;
try {
var n = 2 ;
while ( response [ 0 ] == 0x1f && response [ 1 ] == 0x8b ) {
response = ungzip ( response ) ;
res . progress ( { status : 'unzipping ' + langfile , progress : 1 - 1 / n ++ } ) ;
} catch ( err ) {
return res . reject ( 'Error unzipping language file ' + langfile + '\n' + err . message ) ;
res . progress ( { status : 'unzipping ' + langfile , progress : 1 } ) ;
cb ( response ) ;
} ;
xhr . send ( ) ;
} , { "../common/langdata.json" : 18 , "pako/lib/inflate.js" : 1 } ] , 15 : [ function ( require , module , exports ) {
( function ( process , global ) {
'use strict' ;
var workerUtils = require ( '../common/worker.js' ) ;
if ( process . env . NODE _ENV === "development" ) {
console . debug ( 'Using Development Worker' ) ;
global . addEventListener ( 'message' , function ( e ) {
var packet = e . data ;
workerUtils . dispatchHandlers ( packet , function ( obj ) {
return postMessage ( obj ) ;
} ) ;
} ) ;
exports . getCore = function ( req , res ) {
if ( ! global . TesseractCore ) {
res . progress ( { status : 'loading tesseract core' , progress : 0 } ) ;
importScripts ( req . workerOptions . corePath ) ;
res . progress ( { status : 'loading tesseract core' , progress : 1 } ) ;
return TesseractCore ;
} ;
exports . getLanguageData = require ( './lang.js' ) ;
workerUtils . setAdapter ( module . exports ) ;
} ) . call ( this , require ( '_process' ) , typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : { } )
} , { "../common/worker.js" : 19 , "./lang.js" : 14 , "_process" : 13 } ] , 16 : [ function ( require , module , exports ) {
'use strict' ;
// This converts an image to grayscale
module . exports = function desaturate ( image ) {
var width , height ;
if ( image . data ) {
var src = image . data ;
width = image . width , height = image . height ;
var dst = new Uint8Array ( width * height ) ;
var srcLength = src . length | 0 ,
srcLength _16 = srcLength - 16 | 0 ;
for ( var i = 0 , j = 0 ; i <= srcLength _16 ; i += 16 , j += 4 ) {
// convert to grayscale 4 pixels at a time; eveything with alpha gets put in front of 50% gray
dst [ j ] = ( src [ i ] * 77 + src [ i + 1 ] * 151 + src [ i + 2 ] * 28 ) * src [ i + 3 ] + ( 255 - src [ i + 3 ] << 15 ) + 32768 >> 16 ;
dst [ j + 1 ] = ( src [ i + 4 ] * 77 + src [ i + 5 ] * 151 + src [ i + 6 ] * 28 ) * src [ i + 7 ] + ( 255 - src [ i + 7 ] << 15 ) + 32768 >> 16 ;
dst [ j + 2 ] = ( src [ i + 8 ] * 77 + src [ i + 9 ] * 151 + src [ i + 10 ] * 28 ) * src [ i + 11 ] + ( 255 - src [ i + 11 ] << 15 ) + 32768 >> 16 ;
dst [ j + 3 ] = ( src [ i + 12 ] * 77 + src [ i + 13 ] * 151 + src [ i + 14 ] * 28 ) * src [ i + 15 ] + ( 255 - src [ i + 15 ] << 15 ) + 32768 >> 16 ;
for ( ; i < srcLength ; i += 4 , ++ j ) {
//finish up
dst [ j ] = ( src [ i ] * 77 + src [ i + 1 ] * 151 + src [ i + 2 ] * 28 ) * src [ i + 3 ] + ( 255 - src [ i + 3 ] << 15 ) + 32768 >> 16 ;
} image = dst ;
} else {
throw 'Invalid ImageData' ;
return image ;
} ;
} , { } ] , 17 : [ function ( require , module , exports ) {
'use strict' ;
module . exports = function DumpLiterallyEverything ( Module , base ) {
var ri = base . GetIterator ( ) ;
var blocks = [ ] ;
var block , para , textline , word , symbol ;
function enumToString ( value , prefix ) {
return Object . keys ( Module ) . filter ( function ( e ) {
return e . substr ( 0 , prefix . length + 1 ) == prefix + '_' ;
} ) . filter ( function ( e ) {
return Module [ e ] === value ;
} ) . map ( function ( e ) {
return e . slice ( prefix . length + 1 ) ;
} ) [ 0 ] ;
ri . Begin ( ) ;
do {
if ( ri . IsAtBeginningOf ( Module . RIL _BLOCK ) ) {
var poly = ri . BlockPolygon ( ) ;
var polygon = null ;
// BlockPolygon() returns null when automatic page segmentation is off
if ( Module . getPointer ( poly ) > 0 ) {
var n = poly . get _n ( ) ,
px = poly . get _x ( ) ,
py = poly . get _y ( ) ,
polygon = [ ] ;
for ( var i = 0 ; i < n ; i ++ ) {
polygon . push ( [ px . getValue ( i ) , py . getValue ( i ) ] ) ;
Module . _ptaDestroy ( Module . getPointer ( poly ) ) ;
block = {
paragraphs : [ ] ,
text : ri . GetUTF8Text ( Module . RIL _BLOCK ) ,
confidence : ri . Confidence ( Module . RIL _BLOCK ) ,
baseline : ri . getBaseline ( Module . RIL _BLOCK ) ,
bbox : ri . getBoundingBox ( Module . RIL _BLOCK ) ,
blocktype : enumToString ( ri . BlockType ( ) , 'PT' ) ,
polygon : polygon
} ;
blocks . push ( block ) ;
if ( ri . IsAtBeginningOf ( Module . RIL _PARA ) ) {
para = {
lines : [ ] ,
text : ri . GetUTF8Text ( Module . RIL _PARA ) ,
confidence : ri . Confidence ( Module . RIL _PARA ) ,
baseline : ri . getBaseline ( Module . RIL _PARA ) ,
bbox : ri . getBoundingBox ( Module . RIL _PARA ) ,
is _ltr : ! ! ri . ParagraphIsLtr ( )
} ;
block . paragraphs . push ( para ) ;
if ( ri . IsAtBeginningOf ( Module . RIL _TEXTLINE ) ) {
textline = {
words : [ ] ,
text : ri . GetUTF8Text ( Module . RIL _TEXTLINE ) ,
confidence : ri . Confidence ( Module . RIL _TEXTLINE ) ,
baseline : ri . getBaseline ( Module . RIL _TEXTLINE ) ,
bbox : ri . getBoundingBox ( Module . RIL _TEXTLINE )
} ;
para . lines . push ( textline ) ;
if ( ri . IsAtBeginningOf ( Module . RIL _WORD ) ) {
var fontInfo = ri . getWordFontAttributes ( ) ,
wordDir = ri . WordDirection ( ) ;
word = {
symbols : [ ] ,
choices : [ ] ,
text : ri . GetUTF8Text ( Module . RIL _WORD ) ,
confidence : ri . Confidence ( Module . RIL _WORD ) ,
baseline : ri . getBaseline ( Module . RIL _WORD ) ,
bbox : ri . getBoundingBox ( Module . RIL _WORD ) ,
is _numeric : ! ! ri . WordIsNumeric ( ) ,
in _dictionary : ! ! ri . WordIsFromDictionary ( ) ,
direction : enumToString ( wordDir , 'DIR' ) ,
language : ri . WordRecognitionLanguage ( ) ,
is _bold : fontInfo . is _bold ,
is _italic : fontInfo . is _italic ,
is _underlined : fontInfo . is _underlined ,
is _monospace : fontInfo . is _monospace ,
is _serif : fontInfo . is _serif ,
is _smallcaps : fontInfo . is _smallcaps ,
font _size : fontInfo . pointsize ,
font _id : fontInfo . font _id ,
font _name : fontInfo . font _name
} ;
var wc = new Module . WordChoiceIterator ( ri ) ;
do {
word . choices . push ( {
text : wc . GetUTF8Text ( ) ,
confidence : wc . Confidence ( )
} ) ;
} while ( wc . Next ( ) ) ;
Module . destroy ( wc ) ;
textline . words . push ( word ) ;
var image = null ;
// var pix = ri.GetBinaryImage(Module.RIL_SYMBOL)
// var image = pix2array(pix);
// // for some reason it seems that things stop working if you destroy pics
// Module._pixDestroy(Module.getPointer(pix));
if ( ri . IsAtBeginningOf ( Module . RIL _SYMBOL ) ) {
symbol = {
choices : [ ] ,
image : image ,
text : ri . GetUTF8Text ( Module . RIL _SYMBOL ) ,
confidence : ri . Confidence ( Module . RIL _SYMBOL ) ,
baseline : ri . getBaseline ( Module . RIL _SYMBOL ) ,
bbox : ri . getBoundingBox ( Module . RIL _SYMBOL ) ,
is _superscript : ! ! ri . SymbolIsSuperscript ( ) ,
is _subscript : ! ! ri . SymbolIsSubscript ( ) ,
is _dropcap : ! ! ri . SymbolIsDropcap ( )
} ;
word . symbols . push ( symbol ) ;
var ci = new Module . ChoiceIterator ( ri ) ;
do {
symbol . choices . push ( {
text : ci . GetUTF8Text ( ) ,
confidence : ci . Confidence ( )
} ) ;
} while ( ci . Next ( ) ) ;
Module . destroy ( ci ) ;
} while ( ri . Next ( Module . RIL _SYMBOL ) ) ;
Module . destroy ( ri ) ;
return {
text : base . GetUTF8Text ( ) ,
html : deindent ( base . GetHOCRText ( ) ) ,
confidence : base . MeanTextConf ( ) ,
blocks : blocks ,
psm : enumToString ( base . GetPageSegMode ( ) , 'PSM' ) ,
oem : enumToString ( base . oem ( ) , 'OEM' ) ,
version : base . Version ( )
} ;
} ;
// the generated HOCR is excessively indented, so
// we get rid of that indentation
function deindent ( html ) {
var lines = html . split ( '\n' ) ;
if ( lines [ 0 ] . substring ( 0 , 2 ) === " " ) {
for ( var i = 0 ; i < lines . length ; i ++ ) {
if ( lines [ i ] . substring ( 0 , 2 ) === " " ) {
lines [ i ] = lines [ i ] . slice ( 2 ) ;
} ;
return lines . join ( '\n' ) ;
} , { } ] , 18 : [ function ( require , module , exports ) {
module . exports = { "afr" : 1079573 , "ara" : 1701536 , "aze" : 1420865 , "bel" : 1276820 , "ben" : 6772012 , "bul" : 1605615 , "cat" : 1652368 , "ces" : 1035441 , "chi_sim" : 17710414 , "chi_tra" : 24717749 , "chr" : 320649 , "dan-frak" : 677656 , "dan" : 1972936 , "deu-frak" : 822644 , "deu" : 991656 , "ell" : 859719 , "eng" : 9453554 , "enm" : 619254 , "epo" : 1241212 , "equ" : 821130 , "est" : 1905040 , "eus" : 1641190 , "fin" : 979418 , "fra" : 1376221 , "frk" : 5912963 , "frm" : 5147082 , "glg" : 1674938 , "grc" : 3012615 , "heb" : 1051501 , "hin" : 6590065 , "hrv" : 1926995 , "hun" : 3074473 , "ind" : 1874776 , "isl" : 1634041 , "ita" : 948593 , "ita_old" : 3436571 , "jpn" : 13507168 , "kan" : 4390317 , "kor" : 5353098 , "lav" : 1843944 , "lit" : 1779240 , "mal" : 5966263 , "meme" : 88453 , "mkd" : 1163087 , "mlt" : 1463001 , "msa" : 1665427 , "nld" : 1134708 , "nor" : 2191610 , "osd" : 4274649 , "pol" : 7024662 , "por" : 909359 , "ron" : 915680 , "rus" : 5969957 , "slk-frak" : 289885 , "slk" : 2217342 , "slv" : 1611338 , "spa" : 883170 , "spa_old" : 5647453 , "sqi" : 1667041 , "srp" : 1770244 , "swa" : 757916 , "swe" : 2451917 , "tam" : 3498763 , "tel" : 5795246 , "tgl" : 1496256 , "tha" : 3811136 , "tur" : 3563264 , "ukr" : 937566 , "vie" : 2195922 }
} , { } ] , 19 : [ function ( require , module , exports ) {
'use strict' ;
var latestJob ;
var Module ;
var base ;
var adapter = { } ;
function dispatchHandlers ( packet , send ) {
function respond ( status , data ) {
send ( {
jobId : packet . jobId ,
status : status ,
action : packet . action ,
data : data
} ) ;
respond . resolve = respond . bind ( this , 'resolve' ) ;
respond . reject = respond . bind ( this , 'reject' ) ;
respond . progress = respond . bind ( this , 'progress' ) ;
latestJob = respond ;
try {
if ( packet . action === 'recognize' ) {
handleRecognize ( packet . payload , respond ) ;
} else if ( packet . action === 'detect' ) {
handleDetect ( packet . payload , respond ) ;
} catch ( err ) {
respond . reject ( err ) ;
exports . dispatchHandlers = dispatchHandlers ;
exports . setAdapter = function setAdapter ( impl ) {
adapter = impl ;
} ;
function handleInit ( req , res ) {
var MIN _MEMORY = 100663296 ;
if ( [ 'chi_sim' , 'chi_tra' , 'jpn' ] . indexOf ( req . options . lang ) != - 1 ) {
MIN _MEMORY = 167772160 ;
if ( ! Module || Module . TOTAL _MEMORY < MIN _MEMORY ) {
var Core = adapter . getCore ( req , res ) ;
res . progress ( { status : 'initializing tesseract' , progress : 0 } ) ;
Module = Core ( {
TesseractProgress : function TesseractProgress ( percent ) {
latestJob . progress ( { status : 'recognizing text' , progress : Math . max ( 0 , ( percent - 30 ) / 70 ) } ) ;
} ,
onRuntimeInitialized : function onRuntimeInitialized ( ) { }
} ) ;
Module . FS _createPath ( "/" , "tessdata" , true , true ) ;
base = new Module . TessBaseAPI ( ) ;
res . progress ( { status : 'initializing tesseract' , progress : 1 } ) ;
var dump = require ( './dump.js' ) ;
var desaturate = require ( './desaturate.js' ) ;
function setImage ( Module , base , image ) {
var imgbin = desaturate ( image ) ,
width = image . width ,
height = image . height ;
var ptr = Module . allocate ( imgbin , 'i8' , Module . ALLOC _NORMAL ) ;
base . SetImage ( Module . wrapPointer ( ptr ) , width , height , 1 , width ) ;
base . SetRectangle ( 0 , 0 , width , height ) ;
return ptr ;
function loadLanguage ( req , res , cb ) {
var lang = req . options . lang ;
if ( ! Module . _loadedLanguages ) Module . _loadedLanguages = { } ;
if ( lang in Module . _loadedLanguages ) return cb ( ) ;
adapter . getLanguageData ( req , res , function ( data ) {
res . progress ( { status : 'loading ' + lang + '.traineddata' , progress : 0 } ) ;
Module . FS _createDataFile ( 'tessdata' , lang + ".traineddata" , data , true , false ) ;
Module . _loadedLanguages [ lang ] = true ;
res . progress ( { status : 'loading ' + lang + '.traineddata' , progress : 1 } ) ;
cb ( ) ;
} ) ;
function handleRecognize ( req , res ) {
handleInit ( req , res ) ;
loadLanguage ( req , res , function ( ) {
var lang = req . options . lang ;
res . progress ( { status : 'initializing api' , progress : 0 } ) ;
base . Init ( null , lang ) ;
res . progress ( { status : 'initializing api' , progress : 0.3 } ) ;
var options = req . options ;
for ( var option in options ) {
if ( options . hasOwnProperty ( option ) ) {
base . SetVariable ( option , options [ option ] ) ;
res . progress ( { status : 'initializing api' , progress : 0.6 } ) ;
var ptr = setImage ( Module , base , req . image ) ;
res . progress ( { status : 'initializing api' , progress : 1 } ) ;
base . Recognize ( null ) ;
var result = dump ( Module , base ) ;
base . End ( ) ;
Module . _free ( ptr ) ;
res . resolve ( result ) ;
} ) ;
function handleDetect ( req , res ) {
handleInit ( req , res ) ;
req . options . lang = 'osd' ;
loadLanguage ( req , res , function ( ) {
base . Init ( null , 'osd' ) ;
base . SetPageSegMode ( Module . PSM _OSD _ONLY ) ;
var ptr = setImage ( Module , base , req . image ) ;
var results = new Module . OSResults ( ) ;
var success = base . DetectOS ( results ) ;
if ( ! success ) {
base . End ( ) ;
Module . _free ( ptr ) ;
res . reject ( "failed to detect os" ) ;
} else {
var charset = results . get _unicharset ( ) ;
var best = results . get _best _result ( ) ;
var oid = best . get _orientation _id ( ) ,
sid = best . get _script _id ( ) ;
var result = {
tesseract _script _id : sid ,
script : charset . get _script _from _script _id ( sid ) ,
script _confidence : best . get _sconfidence ( ) ,
orientation _degrees : [ 0 , 270 , 180 , 90 ] [ oid ] ,
orientation _confidence : best . get _oconfidence ( )
} ;
base . End ( ) ;
Module . _free ( ptr ) ;
res . resolve ( result ) ;
} ) ;
} , { "./desaturate.js" : 16 , "./dump.js" : 17 } ] } , { } , [ 15 ] ) ;