const { TesseractWorker , PSM } = Tesseract ;
const isBrowser = typeof window !== 'undefined' && typeof window . document !== 'undefined' ;
const SIMPLE _PNG = '' ;
const SIMPLE _JPG = ' data : image / jpeg ; base64 , / 9 j / 4 A A Q S k Z J R g A B A Q I A J Q A l A A D / 2 w B D A A M C A g I C A g M C A g I D A w M D B A Y E B A Q E B A g G B g U G C Q g K C g k I C Q k K D A 8 M C g s O C w k J D R E N D g 8 Q E B E Q C g w S E x I Q E w 8 Q E B D / 2 w B D A Q M D A w Q D B A g E B A g Q C w k L E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B A Q E B D / w A A R C A C 0 A U A D A R E A A h E B A x E B / 8 Q A G w A B A A M A A w E A A A A A A A A A A A A A A A Y H C A E F C Q T / x A A y E A A B A w Q C A Q M C B A U F A Q A A A A A A A Q I D B A U G B w g R E h M U I R U i C R Y j M R c k Q U N R N T h 0 d b K 1 / 8 Q A G g E B A Q E B A Q E B A A A A A A A A A A A A A A Q D A g E F B v / E A D Q R A Q A C A Q Q B A w I E B A Y C A w A A A A A B A h E D B B I h M R M i Q Q V R M k J h c R Q j U o E W N H J z k b E V M 6 G y w f / a A A w D A Q A C E Q M R A D 8 A 9 U we 3 9 z c p M I z C u t W s u H n 5 / x u l g j l i v 3 8 Q b d a v W c s a O k b 7 a Z i y N 8 H d t 7 V f u 6 7 T 9 z C 2 v 6 V d T U 1 o x W v e f O Y i I m Z x H 9 4 x 5 6 z 8 t v S i 3 C u n O Z t 5 j x i c z G P 1 6 x O f 1 x 8 K o 1 H z b 5 T 7 v x O 3 5 9 r r g f 9 T x q 4 z y Q R 3 D + K F u h + Y 5 V j l X 0 p o G S f a 5 r k + W p 3 1 8 d / u f Q r t + N 9 O N e e N b 4 n P n F Z + c R / z j y l 1 L z S d S l I z e k z E x 4 9 0 R n G f H z H f h t J q q r U V y d K q f K f 4 J p d V m Z i J m M S 5 D o A A A A A A A A A A I f r z b m v d r S Z F F g O Q f V H Y n e Z 8 f v C e 0 n g 9 t X w 9 e p D + q x v n 1 2 n 3 M 8 m r / R V F I 9 T Q 0 9 1 X 8 F + 6 z 9 / 7 e Y / u 9 1 I 9 H W t t 7 9 X r E T M f p O c f p 8 J g H i r t W 7 a z X P d h 7 F w 3 J N N X 3 E b X h d w h o 7 R f K 9 z 1 p 8 i i f 6 n l P T + U T E R r f B v f i 6 R P 1 G 9 q i / A 2 / 8 A O 2 k b i / t t N 7 1 4 z 5 i K z i L f t b z H W P t N o 7 N f + V u f Q r 7 q 8 a 2 5 R 4 z M R M 1 / e M 4 8 5 6 n M R 0 t E A A A A A A A A A A A A A A A A A A A A A A A A A A A H x 3 j / A E m t / w C P J / 5 U g + q f 5 H W / 0 W / 6 l R t P 8 x p / 6 o / 7 Z X / C z / 2 X 4 j / 2 F 4 / + h O f o N / 8 A + v b / A O 3 X / w D U U / 5 7 d / 7 s / w D 1 q o j f X K / X 2 b 8 j s 4 1 j t 7 l l m u l 8 I 1 9 P B b r Z R 4 P D W Q 3 O 9 3 H w X 3 F R P W 0 9 P M r I o 1 V Y 0 g V O n L 4 u + F Z 2 v x N j b T 3 O l b d 2 v M 2 m 1 6 1 r H U V r W Y j l P 9 U 3 m J m P H G I 6 8 z N r d 5 F 9 C 9 N t S s Y 4 1 t a 0 + Z m 0 Z i t e 8 x F Y m J n 7 z P 5 u u E 4 4 N c k q 7 L 9 h 7 M 0 p g + 4 b x u b F s c s 0 d + w / I c g g l g u f b n K 2 S h q p Z 2 R y T q k k j E S V 6 J 8 I 7 r p v i 1 l W r O 5 1 f p W v u I p H r a d u N Z j G L x a k z T r 4 m J p M T n u 0 z M z 8 J K z t 9 P 6 j o a H O f T 1 K z a 2 e 5 r N b 1 i 2 J x 8 x b M R i Y i M R H c T m G 8 W b 3 L t z L K G v 2 P z b 2 n i u / r d f E k v u v b t X t o b P 2 y o d 5 U U F p l Y 2 O o a + l j c n 6 M i q x X p K 5 q K i I t G y r o V r p 6 u 0 n 1 a Y n l y 7 t M 4 t z j H U 1 4 z n v G K 8 c R x m M V 5 3 c 6 s 3 1 N L d x 6 d s + 3 j 4 j x 6 c 5 7 i 2 Z m s 4 n 8 c Z j N o n n M 0 5 g 5 5 c 6 b k z Y M B 3 P v T Y u m N P 1 1 h j k s l / x C p d b 2 3 K / u n 8 H w 1 V f H H I s T G x O 7 V k i J G i N R 7 l a n 3 L B s o p r b j W p r X x q R j 0 6 z O K z T E T e 3 2 m Y t 1 3 P U Y x j l i 9 e 6 m + l t 9 K + j W J p 3 6 k + b R a M 8 a 4 8 x E x 3 1 n M 5 j F s Z 0 5 t u n c d 2 4 m 8 J 3 5 f j O 4 V 2 T X z T s t u L 5 b d p I 6 1 0 s N X O 7 2 8 0 0 s P k 2 s d B T 9 q s q I v q u i 7 V v 3 K 0 7 3 + t a d X b b a / 8 q 2 p N a 2 t F f E R W b T e K / r W O v P c x a e W c T z s t P T 4 6 2 4 r n U p S J t F c x M z P V e P L M d e p M / M c Y 9 s T G I l j / A D H k t q r X O L O 2 b p 3 8 S P b O a 7 T t r 2 V s l k y C h u L 8 c v U j n f z F O y g l p G R U k b k c 5 W f q q k S I i N V F 8 X N 0 n V / h L 0 9 D T 5 6 e a x a t p z a a 5 j O L z 3 E x 5 m f x T G Y i Z m e 8 6 a c b q t v 4 q 3 G 0 x a Y m v U R b u Y j G M T H 5 f E V z i 0 x i O L T H L f Z e 5 s k v H F + X S G d 1 G E X X Z N X O r v U n k k o 2 J U U M L 2 r U Q N V G V X o p K 5 7 G S N V i v a 3 t O l U 9 1 d n e n 1 + / 0 + L + 2 t N W J n 9 K z 3 a I / q 4 x P H u J i c Y m P L L b 7 u u r 9 C r v r 1 z a 1 t C Y + O 7 8 v b n z F Z n H L G e v i V u U + i 7 / A K Y 0 x m F u o u X + e 0 d d c W 0 9 X N m m e 3 G m v D L G 2 J z f X d A 2 o S O O G O R n m 1 U e 9 U Y r m u R e 2 / P G 7 1 N K d O u n P t 0 4 t 3 3 7 7 V t i v D n 9 / H G Y j P K Z x H e F G 2 j U 9 e 2 v a O d 5 r P W M U i 0 R a e U U j 4 i Z z N c 4 m t Y i e u 2 E M 8 5 H 6 i 0 9 J Z 8 x 4 z c / d z b E z C 3 X y k g q M a z S s r 7 n a b n Q v k 8 K h i p P S R R N X p e 0 k R 6 u R O / D p y t e 2 r 6 b a 2 v v t v o x S L a e r a K 2 z G J r F v F o z i Y m J i I x j P f f t i 1 Z m 3 1 I 0 t p r W t b G p p 1 m a z E 5 i b R 1 3 E Z i 0 Y m Z j M 4 6 j 5 x L Z H K 3 P N n 5 X u D W H F L V W d V e D T Z / T 1 9 2 y L I q F j V r q e 1 U 8 S + U N K 9 y f p S y L 5 9 S t 6 e x W s V F 6 7 R Y t v p f x e + 1 N K 9 p j T 0 a V 1 L R H U 2 5 X 4 1 i J + M T X v z E x b u J i O N t t T c R t 9 j p 6 1 a x O p r W m l c 9 x X F e V p n x m e M 9 f t O M T M W r 3 O I 8 V N m 4 P d L z j d D y s 2 j f 8 C y i w 1 V D X t y G 9 e 7 y G 1 3 F y t S G s t t x S N F h R G K 9 F Y r e m u R H f e r v s 1 n + b p W 0 d X x m t q z X q 0 T E 1 m a 2 n 5 r a I n M f 2 j G b T K I j T v X V 0 / x d x a J 7 r N Z r a I m I + L 1 t M T E / M e f w x n O f A T j f 9 R z f a G T / A M e t x U v 5 H 2 v c K L 6 f T Z T 4 U V 8 9 u 6 N 3 q 3 O L 0 v 5 q W T 9 p H d t 8 k + O k N v p + p x + l 7 X d Y i e d b e 3 8 t c x j 2 x 8 Y z m P 1 i J c / U 9 L P 1 D W 2 3 K e q 6 c 8 s + 6 c 5 n E z 9 u s R H 2 m X p C Y u m T u O m b 5 / e O R f K i z V G Q X a 9 w 4 5 d r Y y w W u u u E k l N S O d R y u 9 K B j 3 e E L X v R v f j 4 o v 7 q Y a d t X / D 8 6 + l H L V 9 X c R G f M 8 Z j j X P 2 j x E e I d 6 0 a c f W q 6 O p P H T 9 P S m c f G Y j l O P v 8 5 x 2 o v i z e 5 d u Z Z Q 1 + x + b e 0 8 V 3 9 b r 4 k l 9 1 7 d q 9 t D Z + 2 V D v K i g t M r G x 1 D X 0 s b k / R k V W K 9 J X N R U R F v 2 V d C t d P V 2 k + r T E 8 u X d p n F u c Y 6 m v G c 9 4 x X j i O M x i s + 7 n V m + p p b u P T t n 2 8 f E e P T n P c W z M 1 n E / j j M Z t E 8 5 9 J S d q A A A A A A A A A A A A A A A A A A A A A A A A A D 5 r l F J U W 6 q g i b 5 P k h e x q d 9 d q r V R C T f 6 V 9 f a a u l p x m b V t E f v M S 2 2 9 4 0 9 a l 7 e I m P 8 A t Q P A b V O f a V 4 x 4 7 r z Z t h + j Z B Q V l y l q K P 3 U N R 4 N l r J Z I 1 8 4 X v Y v b H N X 4 c v X f S 9 K f X 3 e r T V p o x S f w 0 r E / v G c p Z r P 8 X u d X 4 v e Z j 9 Y x W M / w D x P l A r / r X k r x x 3 / n W 3 u P m s 7 V t X F N q S U 1 b e s c n y C K z 3 C 2 3 O F j m + v F U V H c T o H I q q q d O f 5 P R E a 1 r P J 3 z t n a + 1 0 L b G 9 c 0 5 3 v W 0 T 3 E 3 m s 2 i 3 3 z O c Y j x E Z m P F q N 1 T T 3 G r X e V n G p x r S 0 Y 6 t W s Y r M T 8 T H z n 7 z j P L 2 W 3 q 3 L e V u a W r K r z s b U e H 6 4 m 9 l 6 O L 2 W o v y 3 i p 9 8 1 r / K a t q K Z E i 9 s 5 y w o 1 s S e o i N k 7 / d p p u K 2 p t b W 0 b R O r P d Y x M V i M Y x b 5 m 2 Y z m O u M x H m J Z 6 N o t u K x q V m N O P x T m M z 3 E 5 r 8 R G
const IMAGE _PATH = 'http://localhost:3000/tests/assets/images' ;
const SIMPLE _TEXT = 'Tesseract.js\n' ;
const SIMPLE _TEXT _HALF = 'Tesse\n' ;
const COMSIC _TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n' ;
const TESTOCR _TEXT = 'This is a lot of 12 point text to test the\nocr code and see if it works on all types\nof file format.\n\nThe quick brown dog jumped over the\nlazy fox. The quick brown dog jumped\nover the lazy fox. The quick brown dog\njumped over the lazy fox. The quick\nbrown dog jumped over the lazy fox.\n' ;
const CHINESE _TEXT = '繁 體 中 文 測 試\n' ;
const FORMATS = [ 'png' , 'jpg' , 'bmp' , 'pbm' ] ;
const loadLangOptions = {
langPath : 'http://localhost:3000/tests/assets/traineddata' ,
cachePath : './tests/assets/traineddata' ,
} ;
const getWorker = options => (
new TesseractWorker ( {
cacheMethod : 'readOnly' ,
... ( isBrowser ? { workerPath : 'http://localhost:3000/dist/worker.dev.js' } : { } ) ,
... loadLangOptions ,
... options ,
} )
) ;
describe ( 'recognize()' , ( ) => {
describe ( 'should recognize different langs' , ( ) => {
[
{ name : 'chinese.png' , lang : 'chi_tra' , ans : CHINESE _TEXT } ,
] . forEach ( ( { name , lang , ans } ) => (
it ( ` recongize ${ lang } ` , ( done ) => {
const worker = getWorker ( ) ;
worker
. recognize ( ` ${ IMAGE _PATH } / ${ name } ` , lang )
. then ( ( { text } ) => {
expect ( text ) . to . be ( ans ) ;
worker . terminate ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 30000 )
) ) ;
} ) ;
describe ( 'should read bmp, jpg, png and pbm format images' , ( ) => {
FORMATS . forEach ( format => (
it ( ` support ${ format } format ` , ( done ) => {
const worker = getWorker ( ) ;
worker
. recognize ( ` ${ IMAGE _PATH } /simple. ${ format } ` )
. then ( ( { text } ) => {
expect ( text ) . to . be ( SIMPLE _TEXT ) ;
worker . terminate ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 10000 )
) ) ;
} ) ;
describe ( 'should recognize part of the image' , ( ) => {
[
{
name : 'simple.png' , left : 0 , top : 0 , width : 140 , height : 180 , ans : SIMPLE _TEXT _HALF ,
} ,
] . forEach ( ( {
name , left , top , width , height , ans ,
} ) => (
it ( ` recongize half ${ name } ` , ( done ) => {
const worker = getWorker ( ) ;
worker
. recognize ( ` ${ IMAGE _PATH } / ${ name } ` , 'eng' , {
tessjs _image _rectangle _left : left ,
tessjs _image _rectangle _top : top ,
tessjs _image _rectangle _width : width ,
tessjs _image _rectangle _height : height ,
} )
. then ( ( { text } ) => {
expect ( text ) . to . be ( ans ) ;
worker . terminate ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 30000 )
) ) ;
} ) ;
describe ( 'should be able to recognize multiple images with 1 worker' , ( ) => {
[ 3 , 10 , 20 ] . forEach ( num => (
it ( ` recognize ${ num } images with 1 worker ` , ( done ) => {
const worker = getWorker ( ) ;
Promise . all (
Array ( num ) . fill ( 0 ) . map ( ( ) => worker . recognize ( ` ${ IMAGE _PATH } /simple.png ` ) ) ,
) . then ( ( results ) => {
results . forEach ( ( { text } ) => {
expect ( text ) . to . be ( SIMPLE _TEXT ) ;
} ) ;
worker . terminate ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 60000 )
) ) ;
} ) ;
describe ( 'should recognize multiple images in order' , ( ) => {
[ 1 , 2 ] . forEach ( ( num ) => {
it ( ` recognize ${ num * 2 } images with 1 worker in order ` , ( done ) => {
const worker = getWorker ( ) ;
const cases = Array ( num ) . fill ( 0 )
. reduce ( acc => (
acc . concat ( [
{ name : 'simple.png' , ans : SIMPLE _TEXT } ,
{ name : 'cosmic.png' , ans : COMSIC _TEXT } ,
] )
) ,
[ ] ) ;
Promise . all (
cases . map ( ( { name } ) => worker . recognize ( ` ${ IMAGE _PATH } / ${ name } ` ) ) ,
) . then ( ( results ) => {
results . forEach ( ( { text } , idx ) => {
expect ( text ) . to . be ( cases [ idx ] . ans ) ;
} ) ;
worker . terminate ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 30000 ) ;
} ) ;
} ) ;
describe ( 'supports different complexity' , ( ) => {
[
{ name : 'simple.png' , desc : 'simple' , ans : SIMPLE _TEXT } ,
{ name : 'cosmic.png' , desc : 'normal' , ans : COMSIC _TEXT } ,
{ name : 'testocr.png' , desc : 'complex' , ans : TESTOCR _TEXT } ,
] . forEach ( ( { name , desc , ans } ) => (
it ( ` recongize ${ desc } image ` , ( done ) => {
const worker = getWorker ( ) ;
worker
. recognize ( ` ${ IMAGE _PATH } / ${ name } ` )
. then ( ( { text } ) => {
expect ( text ) . to . be ( ans ) ;
worker . terminate ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 60000 )
) ) ;
} ) ;
describe ( 'should support all page seg modes' , ( ) => {
Object
. keys ( PSM )
. map ( name => ( { name , mode : PSM [ name ] } ) )
. forEach ( ( { name , mode } ) => (
it ( ` support PSM. ${ name } mode ` , ( done ) => {
const worker = getWorker ( ) ;
worker
. recognize ( ` ${ IMAGE _PATH } /testocr.png ` , 'eng' , { tessedit _pageseg _mode : mode } )
. then ( ( ret ) => {
expect ( Object . keys ( ret ) . length ) . not . to . be ( 0 ) ;
worker . terminate ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 30000 )
) ) ;
} ) ;
describe ( 'should recognize base64 image' , ( ) => {
[
{ format : 'png' , image : SIMPLE _PNG , ans : SIMPLE _TEXT } ,
{ format : 'jpg' , image : SIMPLE _JPG , ans : SIMPLE _TEXT } ,
] . forEach ( ( { format , image , ans } ) => (
it ( ` recongize ${ format } in base64 ` , ( done ) => {
const worker = getWorker ( ) ;
worker
. recognize ( image )
. then ( ( { text } ) => {
expect ( text ) . to . be ( ans ) ;
worker . terminate ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 30000 )
) ) ;
} ) ;
( isBrowser ? describe . skip : describe ) ( 'should recognize image in Buffer (Node.js only)' , ( ) => {
FORMATS . forEach ( format => (
it ( ` support ${ format } format ` , ( done ) => {
const worker = getWorker ( ) ;
worker
. recognize ( fs . readFileSync ( path . join ( _ _dirname , 'assets' , 'images' , ` simple. ${ format } ` ) ) )
. then ( ( { text } ) => {
expect ( text ) . to . be ( SIMPLE _TEXT ) ;
worker . terminate ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 10000 )
) ) ;
} ) ;
( isBrowser ? describe : describe . skip ) ( 'should read image from img DOM element (browser only)' , ( ) => {
FORMATS . forEach ( format => (
it ( ` support ${ format } format ` , ( done ) => {
const imageDOM = document . createElement ( 'img' ) ;
imageDOM . setAttribute ( 'src' , ` ${ IMAGE _PATH } /simple. ${ format } ` ) ;
const worker = getWorker ( ) ;
worker
. recognize ( imageDOM )
. then ( ( { text } ) => {
expect ( text ) . to . be ( SIMPLE _TEXT ) ;
worker . terminate ( ) ;
imageDOM . remove ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 10000 )
) ) ;
} ) ;
( isBrowser ? describe : describe . skip ) ( 'should read image from video DOM element (browser only)' , ( ) => {
FORMATS . forEach ( format => (
it ( ` support ${ format } format ` , ( done ) => {
const videoDOM = document . createElement ( 'video' ) ;
videoDOM . setAttribute ( 'poster' , ` ${ IMAGE _PATH } /simple. ${ format } ` ) ;
const worker = getWorker ( ) ;
worker
. recognize ( videoDOM )
. then ( ( { text } ) => {
expect ( text ) . to . be ( SIMPLE _TEXT ) ;
worker . terminate ( ) ;
videoDOM . remove ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 10000 )
) ) ;
} ) ;
( isBrowser ? describe : describe . skip ) ( 'should read video from canvas DOM element (browser only)' , ( ) => {
/ *
* img tag is unable to render pbm , so let ' s skip it .
* /
const formats = FORMATS . filter ( f => f !== 'pbm' ) ;
let canvasDOM = null ;
let imageDOM = null ;
let idx = 0 ;
beforeEach ( ( done ) => {
canvasDOM = document . createElement ( 'canvas' ) ;
imageDOM = document . createElement ( 'img' ) ;
imageDOM . setAttribute ( 'crossOrigin' , 'Anonymous' ) ;
imageDOM . onload = ( ) => {
canvasDOM . getContext ( '2d' ) . drawImage ( imageDOM , 0 , 0 ) ;
done ( ) ;
} ;
imageDOM . setAttribute ( 'src' , ` ${ IMAGE _PATH } /simple. ${ formats [ idx ] } ` ) ;
idx += 1 ;
} ) ;
afterEach ( ( ) => {
canvasDOM . remove ( ) ;
imageDOM . remove ( ) ;
} ) ;
formats . forEach ( format => (
it ( ` support ${ format } format ` , ( done ) => {
const worker = getWorker ( ) ;
worker
. recognize ( canvasDOM )
. then ( ( { text } ) => {
expect ( text ) . to . be ( SIMPLE _TEXT ) ;
worker . terminate ( ) ;
done ( ) ;
} ) ;
} ) . timeout ( 10000 )
) ) ;
} ) ;
} ) ;