Compare commits
661 Commits
Author | SHA1 | Date |
---|---|---|
Réda Housni Alaoui | 80aef15861 | 2 years ago |
Balearica | 263dbb87b1 | 2 years ago |
Balearica | c9200839df | 2 years ago |
Balearica | 1b87e30ae3 | 2 years ago |
Balearica | bce7cd84fe | 2 years ago |
Balearica | 2c77f33461 | 2 years ago |
Balearica | d8b29522c4 | 2 years ago |
Balearica | dd6c40b681 | 2 years ago |
Balearica | 5ff17fdeb1 | 2 years ago |
Balearica | 363690a421 | 2 years ago |
Balearica | b64eba3db0 | 2 years ago |
Balearica | 85e73216be | 2 years ago |
Balearica | b419e45114 | 2 years ago |
Balearica | ea33463120 | 2 years ago |
Balearica | 90c8d99b3c | 2 years ago |
Balearica | 0e368c69d6 | 2 years ago |
Balearica | ba394673bd | 2 years ago |
Balearica | 44d322e6ff | 2 years ago |
Balearica | e3c4a6bc6e | 2 years ago |
Balearica | f372818146 | 2 years ago |
Balearica | 8b567609e3 | 2 years ago |
Balearica | 13b95f6371 | 2 years ago |
Balearica | a9ac00ccac | 2 years ago |
Balearica | 75ddd63041 | 2 years ago |
Balearica | 1136e0a941 | 2 years ago |
Balearica | 2e478bd8a5 | 2 years ago |
WintrySnowman | 67848464ac | 2 years ago |
Balearica | be956cd889 | 2 years ago |
Balearica | 61d0e553c6 | 2 years ago |
Balearica | 74be03c5b9 | 2 years ago |
Balearica | 9442d9cb69 | 2 years ago |
Your Name | 6aba9599ec | 2 years ago |
Your Name | 58d28944d3 | 2 years ago |
Balearica | a8287a99aa | 2 years ago |
Balearica | 66085a7d70 | 2 years ago |
Susan Cheng | 50a53f51d9 | 3 years ago |
Andrei Alecu | 01e8335768 | 3 years ago |
jeromewu | adcb5b8759 | 3 years ago |
Jerome Wu | 294ced5c85 | 4 years ago |
jeromewu | 90466c3b55 | 4 years ago |
jeromewu | 7b7f9afaa6 | 4 years ago |
stonefruit | 8f2c33fd89 | 4 years ago |
Ilya | 83a424f9b9 | 4 years ago |
miguelm3 | bf43f447b6 | 4 years ago |
miguelm3 | dec119fa7b | 4 years ago |
jeromewu | 909263b94b | 4 years ago |
jeromewu | eb287e763a | 4 years ago |
Bert Verhelst | 1781487273 | 4 years ago |
Jerome Wu | 69355a7c07 | 4 years ago |
jeromewu | 4f3aa3145c | 4 years ago |
Jerome Wu | ed016bdc26 | 4 years ago |
Jerome Wu | 804c238950 | 4 years ago |
Jerome Wu | 88290dc541 | 4 years ago |
Jerome Wu | 920dd902f8 | 4 years ago |
Jerome Wu | e883f87ab7 | 4 years ago |
Jerome Wu | 34839a7a0c | 4 years ago |
Jerome Wu | e68d520155 | 4 years ago |
Jerome Wu | d98eeab91f | 4 years ago |
jeromewu | c2f4ecef9a | 4 years ago |
Jerome Wu | eb6b8594d3 | 4 years ago |
Jerome Wu | afadfffa6a | 4 years ago |
jeromewu | ccb8ec3d67 | 4 years ago |
jeromewu | d64ebcaea1 | 4 years ago |
jeromewu | 341360bf44 | 4 years ago |
jeromewu | 2cd68fb97e | 4 years ago |
jeromewu | 066c59d1f9 | 4 years ago |
Jerome Wu | 820c91fa52 | 4 years ago |
Cédric Couton | 3f98fdeb0f | 4 years ago |
abhishek7553 | cd08357833 | 4 years ago |
Jerome Wu | 2bbd1e896c | 4 years ago |
Jerome Wu | 5bad2e68ce | 4 years ago |
Jerome Wu | 59392e96ff | 4 years ago |
dependabot[bot] | 0f9cecd544 | 4 years ago |
Jerome Wu | 13ab9cec70 | 4 years ago |
dependabot[bot] | 389bf71381 | 4 years ago |
Jerome Wu | 6481256f5e | 4 years ago |
Jerome Wu | 5e295b75b6 | 4 years ago |
Jerome Wu | f3cbc3ee43 | 4 years ago |
jeromewu | 3e97749585 | 4 years ago |
dependabot[bot] | 8435f4c6e2 | 5 years ago |
jeromewu | 6ec5a5a092 | 5 years ago |
dependabot[bot] | d3cf791e6a | 5 years ago |
jeromewu | cc1f2bfe81 | 5 years ago |
jeromewu | fcd01ceb32 | 5 years ago |
Ivan Schneider | 6ef2e116cd | 5 years ago |
jeromewu | 885908f22e | 5 years ago |
Joshua Mack | 6df9ef2f1f | 5 years ago |
Joshua Mack | bb7d7093f4 | 5 years ago |
jeromewu | 40051c6761 | 5 years ago |
Connor Adams | 0ce8e139ab | 5 years ago |
rogerxaic | b221071b8e | 5 years ago |
rogerxaic | a54dbc345b | 5 years ago |
jeromewu | 1df208ffab | 5 years ago |
jeromewu | 7a398f6420 | 5 years ago |
jeromewu | ecf03503f1 | 5 years ago |
jeromewu | 0c7ade85e3 | 5 years ago |
jeromewu | d6e434338f | 5 years ago |
jeromewu | d37f045e99 | 5 years ago |
jeromewu | 50df652f33 | 5 years ago |
jeromewu | 1ff4b79f75 | 5 years ago |
jeromewu | 93dab17bf1 | 5 years ago |
Jerome Wu | 20cd04a848 | 5 years ago |
Jerome Wu | 363018bffe | 5 years ago |
Jerome Wu | 89d0a6b12d | 5 years ago |
Jerome Wu | 25d6664937 | 5 years ago |
jeromewu | a008d0780a | 5 years ago |
jeromewu | 0778add1c4 | 5 years ago |
jeromewu | 331fbbe6be | 5 years ago |
dependabot[bot] | 6d9adfd2ce | 5 years ago |
Jerome Wu | 3d456e3eb0 | 5 years ago |
Jerome Wu | 205178646b | 5 years ago |
Jerome Wu | 40aea6d886 | 5 years ago |
Jerome Wu | 6f8e69206b | 5 years ago |
Jerome Wu | c5935b2636 | 5 years ago |
jeromewu | 5a40559db2 | 5 years ago |
jeromewu | 023d484209 | 5 years ago |
jeromewu | 641d0c5fe4 | 5 years ago |
Nir | a56de79f22 | 5 years ago |
jeromewu | b418554cf0 | 5 years ago |
jeromewu | f8aa46ec9d | 5 years ago |
Tomasz Ferens | 13c16f472e | 5 years ago |
jeromewu | 247a1635f8 | 5 years ago |
Jerome Wu | 66e2ce842e | 5 years ago |
jeromewu | 2956afc013 | 5 years ago |
jeromewu | 781f2f80ba | 5 years ago |
jeromewu | 548a5a5142 | 5 years ago |
Nisar Hassan Naqvi | 9b91579b16 | 5 years ago |
jeromewu | 7cc3427a38 | 5 years ago |
Oliver Cole | 71dc2a80ed | 5 years ago |
jeromewu | 9db6ebf36e | 5 years ago |
Yvonnick FRIN | db3eed3915 | 5 years ago |
Jerome Wu | e975d29f38 | 5 years ago |
Jerome Wu | f9c76a9b7f | 5 years ago |
Jerome Wu | 945f5d3f35 | 5 years ago |
Jerome Wu | b8aba2eddd | 5 years ago |
Jerome Wu | b603d42547 | 5 years ago |
Jerome Wu | c7a74a6575 | 5 years ago |
Jerome Wu | 3bb543d3cd | 5 years ago |
Jerome Wu | fa5b267f17 | 5 years ago |
Jerome Wu | 15b7983619 | 5 years ago |
Jerome Wu | b2bc416dd2 | 5 years ago |
Jerome Wu | a5fa14cc3b | 5 years ago |
Jerome Wu | 2aba7285e9 | 5 years ago |
jeromewu | 1051b2ab41 | 5 years ago |
jeromewu | 5c22cd52bd | 5 years ago |
Andrea Giammarchi | 5a7576ceb4 | 5 years ago |
jeromewu | 2db7607496 | 5 years ago |
Andrea Giammarchi | 953689fedf | 5 years ago |
jeromewu | 0e8aee660f | 5 years ago |
jeromewu | 625ded7921 | 5 years ago |
jeromewu | 73147afd87 | 5 years ago |
jeromewu | 1a781a6ae4 | 5 years ago |
dependabot[bot] | 9c62422573 | 5 years ago |
jeromewu | 1fddc9a86c | 5 years ago |
jeromewu | 4aaf462564 | 5 years ago |
jeromewu | 7b06e0edcb | 5 years ago |
jeromewu | 663be93420 | 5 years ago |
Charles Francoise | d51d7bd8ac | 5 years ago |
Charles Francoise | 67ccfcfb6e | 5 years ago |
Charles Francoise | ebc56a1899 | 5 years ago |
Charles Francoise | bcdcace9ec | 5 years ago |
jeromewu | dd61663b31 | 5 years ago |
dependabot[bot] | f524945084 | 5 years ago |
dependabot[bot] | 7663c7c589 | 5 years ago |
dependabot[bot] | 571c8c2452 | 5 years ago |
dependabot[bot] | 775e48176c | 5 years ago |
dependabot[bot] | 4182c5d55a | 5 years ago |
jeromewu | a97e2dad98 | 5 years ago |
Johan Gustafsson | 8d926873b3 | 5 years ago |
Tomasz Martyński | b588e9f618 | 5 years ago |
jeromewu | 086c7a507a | 5 years ago |
Anthony Ng | 2de597c1a9 | 5 years ago |
Jerome Wu | 6fd12b5012 | 5 years ago |
Jerome Wu | 15da58127d | 5 years ago |
Jerome Wu | 1d28cc5e52 | 5 years ago |
Jerome Wu | 6d772c7070 | 5 years ago |
Jerome Wu | e7a57363c2 | 5 years ago |
jeromewu | fd53a9d29e | 5 years ago |
jeromewu | 06967cb7f3 | 5 years ago |
jeromewu | 61a7531acb | 5 years ago |
Jerome Wu | dc83e6f437 | 5 years ago |
jeromewu | e9017f3ed6 | 5 years ago |
Jerome Wu | 41d82da35b | 5 years ago |
Jerome Wu | d947f6e554 | 5 years ago |
Jerome Wu | 5db60be949 | 5 years ago |
Jason | 7c5172846b | 5 years ago |
Jerome Wu | 6021eca27a | 5 years ago |
Jerome Wu | b5d0651698 | 5 years ago |
Jerome Wu | 93186a5dc7 | 5 years ago |
Jerome Wu | c124f7767d | 5 years ago |
Jerome Wu | d6ada618a1 | 5 years ago |
Jerome Wu | b59d144af3 | 5 years ago |
Jerome Wu | 7f485c4461 | 5 years ago |
Jerome Wu | 3c1fba8fee | 5 years ago |
Jerome Wu | 22d16c0e65 | 5 years ago |
Jerome Wu | 0ca4f78565 | 5 years ago |
Jerome Wu | 369e794afa | 5 years ago |
Jerome Wu | 0e968d1829 | 5 years ago |
Jerome Wu | d1c4a33c9e | 5 years ago |
Jerome Wu | a470b836d5 | 5 years ago |
Jerome Wu | a31424e3e7 | 5 years ago |
Jerome Wu | 6d7eb50429 | 5 years ago |
Jerome Wu | 4100c0ed7a | 5 years ago |
Jerome Wu | 3ec7092105 | 5 years ago |
Jerome Wu | 97fa545936 | 5 years ago |
Jerome Wu | 142f4dd92d | 5 years ago |
Jerome Wu | 8fcbfecb2f | 5 years ago |
Jerome Wu | c028a8e833 | 5 years ago |
Jerome Wu | f4ddbc49b3 | 5 years ago |
Jerome Wu | 740f2cab0c | 5 years ago |
Jerome Wu | b76b7b8383 | 5 years ago |
jeromewu | 06539f7405 | 5 years ago |
York Yao | c4b53040f5 | 5 years ago |
York Yao | 7c8f4f181b | 5 years ago |
jeromewu | b6e9d6c6a2 | 5 years ago |
Jerome Wu | 6fb2195b75 | 5 years ago |
Jerome Wu | ad668166fd | 5 years ago |
Jerome Wu | 4aaeeee0a3 | 5 years ago |
Jerome Wu | 01ae9e8e36 | 5 years ago |
Jerome Wu | ce4d4d51be | 5 years ago |
Thibaut Girka | eb288fb885 | 6 years ago |
Jerome Wu | 9f1e7829d3 | 6 years ago |
jeromewu | e80056d304 | 6 years ago |
jeromewu | f655fbbacd | 6 years ago |
Blune | ba99279eef | 6 years ago |
jeromewu | 9a4f0b7eaa | 6 years ago |
Seven Lju | d2a68d5a90 | 6 years ago |
Jerome Wu | f1c4ebe2e4 | 6 years ago |
Jerome Wu | 8bc9f64177 | 6 years ago |
Jerome Wu | 26ab198a84 | 6 years ago |
Jerome Wu | f3786d8e2b | 6 years ago |
Jerome Wu | bdef5b4bb5 | 6 years ago |
jeromewu | 3c231b6d2b | 6 years ago |
nisarhassan12 | b1b41b63de | 6 years ago |
Jess | a55f68c3be | 6 years ago |
Jess | 41b4be50fc | 6 years ago |
jeromewu | 1171c7b670 | 6 years ago |
nisarhassan12 | 6a5a83790e | 6 years ago |
Jerome Wu | c87746ec5d | 6 years ago |
jeromewu | 7bd5fda7ff | 6 years ago |
jeromewu | 5cd2c083eb | 6 years ago |
jeromewu | 2a2d05150d | 6 years ago |
nisarhassan12 | 5cf6fc78ce | 6 years ago |
nisarhassan12 | 90bccf7eca | 6 years ago |
Jerome Wu | fb60a2389f | 6 years ago |
Jerome Wu | 94dc428e98 | 6 years ago |
Jerome Wu | 7cdeb6cacf | 6 years ago |
Jerome Wu | 57ae937646 | 6 years ago |
Jerome Wu | bc51e340cb | 6 years ago |
Jerome Wu | 5dd382a67a | 6 years ago |
jeromewu | c9444f3262 | 6 years ago |
Jerome Wu | b7b2148b71 | 6 years ago |
jeromewu | 42012d663a | 6 years ago |
Jerome Wu | 244a4802b0 | 6 years ago |
Jerome Wu | 7f406f438d | 6 years ago |
Jerome Wu | d3be9d662d | 6 years ago |
Jerome Wu | ed0c5c9e06 | 6 years ago |
Jerome Wu | 0d0e795b93 | 6 years ago |
jeromewu | 7f2d736baf | 6 years ago |
Ravi van Rooijen | 7ef9ad95f6 | 6 years ago |
antonrifco | 7566d3550f | 6 years ago |
Tom MacWright | d0803e4dab | 6 years ago |
Tom MacWright | eedabf1a05 | 6 years ago |
Rafael Cruz | 3f1847c05f | 6 years ago |
jeromewu | 92521d6276 | 6 years ago |
Jerome Wu | cc8fce8baf | 6 years ago |
jeromewu | 74f5b639b0 | 6 years ago |
jeromewu | 288905e6be | 6 years ago |
jeromewu | 1e8ea8f465 | 6 years ago |
Ravi van Rooijen | a2402eaeae | 6 years ago |
Jerome Wu | 2ac6b9828e | 6 years ago |
Jerome Wu | a1512f61a4 | 6 years ago |
Jerome Wu | af115e4940 | 6 years ago |
Jerome Wu | 84a9b80c46 | 6 years ago |
Jerome Wu | cc61520fe4 | 6 years ago |
Jerome Wu | c8fe295c94 | 6 years ago |
Jerome Wu | 559cabd69c | 6 years ago |
Jerome Wu | 26d3aecbcc | 6 years ago |
Jerome Wu | a1c851d6c5 | 6 years ago |
Jerome Wu | 53e9298ab6 | 6 years ago |
Jerome Wu | 9c854a4afa | 6 years ago |
Jerome Wu | 698a7d60b2 | 6 years ago |
Jerome Wu | 313e699765 | 6 years ago |
Jerome Wu | 4f14b4b79c | 6 years ago |
Jerome Wu | de4b98ae23 | 6 years ago |
Jerome Wu | 46e84ad95d | 6 years ago |
Jerome Wu | 936cfa6f48 | 6 years ago |
Jerome Wu | 87cff487e2 | 6 years ago |
Jerome Wu | 462f641482 | 6 years ago |
Jerome Wu | 3fa475195e | 6 years ago |
Jerome Wu | 6503dd4e73 | 6 years ago |
Jerome Wu | 080151711b | 6 years ago |
Jerome Wu | 269629d601 | 6 years ago |
jeromewu | 3905ecf600 | 6 years ago |
jeromewu | 46196f9afa | 6 years ago |
Jerome Wu | d41d0ce4a7 | 6 years ago |
Jerome Wu | 5e383d7a14 | 6 years ago |
Jerome Wu | 0f28382dcc | 6 years ago |
Jerome Wu | 794a17472a | 6 years ago |
Jerome Wu | 51e89ed8e5 | 6 years ago |
Jerome Wu | 154b600670 | 6 years ago |
Jerome Wu | 0b48d820b8 | 6 years ago |
Jerome Wu | e32272ea2e | 6 years ago |
Jerome Wu | 4ab9094a50 | 6 years ago |
Jerome Wu | 3cbfb044a1 | 6 years ago |
Jerome Wu | f1c9e2f08c | 6 years ago |
Jerome Wu | 4136b586b2 | 6 years ago |
Jerome Wu | 3450e28087 | 6 years ago |
Jerome Wu | b6e4f30967 | 6 years ago |
Jerome Wu | 8e73c8d1e1 | 6 years ago |
Jerome Wu | ef4fd6c61f | 6 years ago |
jeromewu | f576fca12f | 6 years ago |
Jerome Wu | dcf186b047 | 6 years ago |
Jerome Wu | 824e4710e2 | 6 years ago |
Jerome Wu | 453061d9ae | 6 years ago |
Jerome Wu | 53be7e5726 | 6 years ago |
Jerome Wu | dbd1e6b4de | 6 years ago |
jeromewu | 697b8bf79a | 6 years ago |
jeromewu | 6a44230162 | 6 years ago |
Jerome Wu | 7cff0bb097 | 6 years ago |
Jerome Wu | 6e5c09bac7 | 6 years ago |
Jerome Wu | 0b528af40f | 6 years ago |
Jerome Wu | 969b605713 | 6 years ago |
Jerome Wu | 31cf08e2e2 | 6 years ago |
Jerome Wu | 154f3f60db | 6 years ago |
Jerome Wu | f32f2cabd1 | 6 years ago |
Jerome Wu | 4624c6f404 | 6 years ago |
Jerome Wu | 8f0eeb9f84 | 6 years ago |
Jerome Wu | b322febf97 | 6 years ago |
Jerome Wu | 9206c022ad | 6 years ago |
Jerome Wu | b488097551 | 6 years ago |
Jerome Wu | a873bbbc80 | 6 years ago |
Jerome Wu | 30f36a0f76 | 6 years ago |
Jerome Wu | 38e0d0087e | 6 years ago |
Jerome Wu | 584205372d | 6 years ago |
Jerome Wu | 08be88c890 | 6 years ago |
Jerome Wu | 6ee38849d2 | 6 years ago |
Jerome Wu | d8a7bb8ba4 | 6 years ago |
Jerome Wu | 566b105c19 | 6 years ago |
Jerome Wu | 3809498e5f | 6 years ago |
Jerome Wu | 10583c57e4 | 6 years ago |
Jerome Wu | 8a3e0a6e28 | 6 years ago |
jeromewu | 9236ff0ebb | 6 years ago |
Caeman Toombs | 0ae0f71b5e | 6 years ago |
Jerome Wu | 9893557862 | 6 years ago |
Jerome Wu | 9aafe4aa24 | 6 years ago |
Jerome Wu | 71a9cb9210 | 6 years ago |
Jerome Wu | cfb7d5673c | 6 years ago |
Jerome Wu | 62a67ac430 | 6 years ago |
Jerome Wu | f6714f0055 | 6 years ago |
Jerome Wu | b1c30dfad9 | 6 years ago |
Jerome Wu | 69042fc1bf | 6 years ago |
Jerome Wu | ce520007ec | 6 years ago |
Jerome Wu | 40740db822 | 6 years ago |
Jerome Wu | 8146655c55 | 6 years ago |
Jerome Wu | ad918d349a | 6 years ago |
Jerome Wu | b4c6565acf | 6 years ago |
Jerome Wu | 3b2b8bd2b8 | 6 years ago |
Jerome Wu | a4fdded574 | 6 years ago |
Jerome Wu | a45e35467e | 6 years ago |
Jerome Wu | 7fa06577d5 | 6 years ago |
Jerome Wu | b843f08705 | 6 years ago |
Jerome Wu | b31c45faf3 | 6 years ago |
Jerome Wu | e0b263b88b | 6 years ago |
Jerome Wu | f488f4d204 | 6 years ago |
Jerome Wu | 098c070e6d | 6 years ago |
Jerome Wu | 12eaac4d2b | 6 years ago |
Jerome Wu | 07b45ec0b5 | 6 years ago |
Jerome Wu | 259562a422 | 6 years ago |
Jerome Wu | ec1a81e72d | 6 years ago |
Jerome Wu | 38b107b094 | 6 years ago |
Jerome Wu | e9cde564e5 | 6 years ago |
Jerome Wu | af05a27d35 | 6 years ago |
Jerome Wu | 84bde05028 | 6 years ago |
Jerome Wu | 30d3421d15 | 6 years ago |
Jerome Wu | 2b3221fbfe | 6 years ago |
Jerome Wu | 6dcef53dbc | 6 years ago |
Jerome Wu | 876bfc5f9a | 6 years ago |
Jerome Wu | 17b103746e | 6 years ago |
Jerome Wu | 1e80e42a6f | 6 years ago |
Jerome Wu | c6ea108325 | 6 years ago |
jeromewu | cccd9baea3 | 6 years ago |
Jerome Wu | e5c693a43c | 6 years ago |
Jerome Wu | a0e1764813 | 6 years ago |
jeromewu | a9c2fe6686 | 6 years ago |
jeromewu | 3e29fa6420 | 6 years ago |
Jerome Wu | 5f5072aca4 | 6 years ago |
Jerome Wu | 84612f2772 | 6 years ago |
jeromewu | 2fc29127db | 6 years ago |
jeromewu | 28f54693f2 | 6 years ago |
jeromewu | 281b59fe26 | 6 years ago |
jeromewu | 343246f2ca | 6 years ago |
jeromewu | 0d07f57ec5 | 6 years ago |
jeromewu | a4f69237a4 | 6 years ago |
Jerome Wu | 464565e335 | 6 years ago |
Jerome Wu | 6132f2169d | 6 years ago |
Jerome Wu | d2ab2a1486 | 6 years ago |
Jerome Wu | 244a948991 | 6 years ago |
jeromewu | 797de3aae4 | 6 years ago |
Jerome Wu | eb50d77354 | 6 years ago |
Jerome Wu | 64e7dea45c | 6 years ago |
Jerome Wu | 5841c6bbfd | 6 years ago |
Jerome Wu | a900000254 | 6 years ago |
Jerome Wu | 8b56811de7 | 6 years ago |
jeromewu | 7701c2db38 | 6 years ago |
jeromewu | 418fc4c4ff | 6 years ago |
swifmaneum | 6ad1810996 | 7 years ago |
Carlos Ferreira | fbe920d66e | 7 years ago |
Sukmin Lim | a62e4b832b | 7 years ago |
yonmey | 2cb3d92f79 | 7 years ago |
Jerome Wu | 2581b662dc | 7 years ago |
jeromewu | 16aaea19a9 | 7 years ago |
jeromewu | 0ed7584ddb | 7 years ago |
Jerome Wu | 3e23f2b602 | 7 years ago |
jeromewu | 9d6117ee7a | 7 years ago |
Jerome Wu | 46a37aa07a | 7 years ago |
George Chung | ffe2360426 | 7 years ago |
IlyaRadinsky | 958c6a394e | 7 years ago |
Kevin Kwok | 799eafab90 | 7 years ago |
Kevin Kwok | 8e9f7861ac | 7 years ago |
Michael Hadley | 69678dd86b | 8 years ago |
Michael Hadley | 724f088328 | 8 years ago |
Michael Hadley | 4bff80585c | 8 years ago |
Michael Hadley | abcb593971 | 8 years ago |
mohammad reza | 142ee885f2 | 8 years ago |
James | 0cd348dfbe | 8 years ago |
Trevor Healy | 27fa9156ad | 8 years ago |
Yevhen Tiurin | 8350b5345f | 8 years ago |
Kevin Kwok | b99bf1cee5 | 8 years ago |
Rainer Eli | 157d9b0e0a | 8 years ago |
Kevin Kwok | d7ba0e32a8 | 8 years ago |
Kevin Kwok | 61447d8ebd | 8 years ago |
Guillermo | f4289d124e | 8 years ago |
Guillermo | c27f07a292 | 8 years ago |
Guillermo | beabea0c71 | 8 years ago |
Kevin Kwok | d079a59b00 | 8 years ago |
Varun Shenoy | 5e6e6d4ceb | 8 years ago |
Kütbach Christian | 5a0d8896fd | 8 years ago |
Guillermo | 15bb5c4145 | 8 years ago |
Kütbach Christian | 4244d89998 | 8 years ago |
Kütbach Christian | 8c265dbce5 | 8 years ago |
Kütbach Christian | 01c7c4a16f | 8 years ago |
Guillermo | d08ac24b82 | 8 years ago |
Kiran Kota | 91f9be7e81 | 8 years ago |
Guillermo | 21fe5729eb | 8 years ago |
Guillermo | e01c63e864 | 8 years ago |
Kevin Kwok | fc15b0ef43 | 8 years ago |
Kevin Kwok | afabe9eab8 | 8 years ago |
Kevin Kwok | 1078830628 | 8 years ago |
Guillermo | 5a536f1599 | 8 years ago |
Guillermo | 2f3cec857d | 8 years ago |
Guillermo | f18b01fd2a | 8 years ago |
Guillermo | 8b42b232cd | 8 years ago |
Guillermo | 30d252e6f6 | 8 years ago |
Guillermo Webster | 221908f3ff | 8 years ago |
Romina Suarez | 29ff2f283c | 8 years ago |
Guillermo Webster | e8e19d383b | 8 years ago |
Romina Suarez | 39f5fcc17a | 8 years ago |
Guillermo | 35c546d062 | 8 years ago |
Guillermo | 52ddac57f3 | 8 years ago |
Kevin Kwok | 3645946a5d | 8 years ago |
hemanth.hm | 64d2b8cb85 | 8 years ago |
Rohit Jha | 7fb328e3e6 | 8 years ago |
bijection | 915fadb9bc | 8 years ago |
Hemanth.HM | 32565dc725 | 8 years ago |
Kevin Kwok | ba2841329d | 8 years ago |
Kevin Kwok | a5cde75980 | 8 years ago |
Kevin Kwok | 15058e036c | 8 years ago |
Kevin Kwok | c09d4655b5 | 8 years ago |
Kevin Kwok | 9d13b2339f | 8 years ago |
Kevin Kwok | 2b141f8aa5 | 8 years ago |
Kevin Kwok | acc2b835ce | 8 years ago |
Kevin Kwok | 2733c9a030 | 8 years ago |
Kevin Kwok | 9a9db10771 | 8 years ago |
Kevin Kwok | ea54eaa23f | 8 years ago |
Guillermo | 752038921e | 8 years ago |
Kevin Kwok | b20c4a8c6e | 8 years ago |
bijection | 46add665c0 | 8 years ago |
bijection | d8a30989c0 | 8 years ago |
Hector Leon Zarco Garcia | 122b74d7d7 | 8 years ago |
Your Name | b04e4ccf55 | 8 years ago |
Your Name | 91b326981f | 8 years ago |
Nick Heindl | 0e50365683 | 8 years ago |
bijection | 471ed0997a | 8 years ago |
bijection | b38b3a8c1f | 8 years ago |
bijection | e9735cdc9c | 8 years ago |
Hector Leon Zarco Garcia | 800c83440d | 8 years ago |
Hector Leon Zarco Garcia | 87643df023 | 8 years ago |
Andrew Han | 5a71378efc | 8 years ago |
Andrew Murray | 162deeebdb | 8 years ago |
bijection | b3cc0c0293 | 8 years ago |
Andre Bluehs | 7a47cdaa98 | 8 years ago |
Guillermo | 95769915be | 8 years ago |
Guillermo | 3fb3d2f45b | 8 years ago |
Guillermo | edfa1d8b4f | 8 years ago |
Guillermo | 7a872d0367 | 8 years ago |
Guillermo | c26cae7ee9 | 8 years ago |
bijection | 8434f59977 | 8 years ago |
Guillermo | 8585b7fa83 | 8 years ago |
Guillermo | 287e8289d4 | 8 years ago |
Kevin Kwok | 41a7b4d427 | 8 years ago |
Kevin Kwok | 048384951f | 8 years ago |
Kevin Kwok | ada7e8364c | 8 years ago |
Kevin Kwok | 5886ebba1f | 8 years ago |
Kevin Kwok | 681d08c1d5 | 8 years ago |
Kevin Kwok | 1ef222cca8 | 8 years ago |
Kevin Kwok | 16685bab65 | 8 years ago |
Kevin Kwok | 45aa21603c | 8 years ago |
Kevin Kwok | 872284b112 | 8 years ago |
Kevin Kwok | 8e88eaca5e | 8 years ago |
Kevin Kwok | 1a7429d022 | 8 years ago |
Kevin Kwok | 7bf3725fc9 | 8 years ago |
Kevin Kwok | 036f8debfc | 8 years ago |
Guillermo | ab13d7b891 | 8 years ago |
Guillermo | d8f3db3d91 | 8 years ago |
Guillermo | d296f16314 | 8 years ago |
Guillermo | 22b3ab8423 | 8 years ago |
Guillermo | 34d4fb799d | 8 years ago |
Guillermo | 0acbb71e66 | 8 years ago |
Guillermo | e8a286988f | 8 years ago |
Guillermo | e5c8df3aac | 8 years ago |
Guillermo | 274c54db1e | 8 years ago |
Guillermo | c61ee098d7 | 8 years ago |
Guillermo | c7f6364710 | 8 years ago |
Guillermo | 2fa9421ced | 8 years ago |
Guillermo | 4b43a0ef04 | 8 years ago |
Guillermo | 3042a8ca06 | 8 years ago |
Guillermo | f67374a03c | 8 years ago |
Guillermo | 7675a98673 | 8 years ago |
Kevin Kwok | dbf43bbe60 | 8 years ago |
Kevin Kwok | 090d0c786e | 8 years ago |
Kevin Kwok | 1331ef8f18 | 8 years ago |
Kevin Kwok | cb6419203f | 8 years ago |
Kevin Kwok | c2a06b4b12 | 8 years ago |
Kevin Kwok | 16d9e9d088 | 8 years ago |
Kevin Kwok | 8817639572 | 8 years ago |
Kevin Kwok | b7dd138ea4 | 8 years ago |
Guillermo | c5e3aeb5ef | 8 years ago |
Guillermo | e25f31aac5 | 8 years ago |
Kevin Kwok | 4d120482ba | 8 years ago |
Kevin Kwok | 973ed2fed5 | 8 years ago |
Kevin Kwok | 19ac89478a | 8 years ago |
Kevin Kwok | 7c66c3844c | 8 years ago |
Kevin Kwok | 699c029e89 | 8 years ago |
Kevin Kwok | c5a1726e1d | 8 years ago |
Guillermo | b5267918fd | 8 years ago |
Guillermo | d16101b686 | 8 years ago |
Guillermo | ca7f4966e4 | 8 years ago |
Guillermo | 9b1d09a72e | 8 years ago |
Guillermo | 5972594575 | 8 years ago |
Guillermo | 0c6f96de8d | 8 years ago |
Guillermo | 6a92dd3a94 | 8 years ago |
Guillermo | 5ee01d4265 | 8 years ago |
Guillermo | c88a769818 | 8 years ago |
Guillermo | 9644f87640 | 8 years ago |
Guillermo | 7d978ac813 | 8 years ago |
Guillermo | 26c0f4cc30 | 8 years ago |
Guillermo | f8b9d2e70e | 8 years ago |
Guillermo | b1aa51da67 | 8 years ago |
Guillermo | 87858dd8b6 | 8 years ago |
Guillermo | b09665cf22 | 8 years ago |
Guillermo | fb476b24b5 | 8 years ago |
Guillermo | 628febf0a6 | 8 years ago |
Guillermo | a770a67911 | 8 years ago |
Guillermo | 3501505423 | 8 years ago |
Kevin Kwok | 733328e99b | 8 years ago |
Guillermo | d31667831a | 8 years ago |
Guillermo | df676ff110 | 8 years ago |
Guillermo | 1594f7528d | 8 years ago |
Guillermo | 82ea60d2c3 | 8 years ago |
Guillermo | 3216553d62 | 8 years ago |
Guillermo | 8cdbecd085 | 8 years ago |
Guillermo | 549b2c1238 | 8 years ago |
Guillermo | 16b120b178 | 8 years ago |
Guillermo | d09e1930fc | 8 years ago |
Guillermo | 4eed6f5137 | 8 years ago |
Guillermo | 21b3136a83 | 8 years ago |
Guillermo | a91acffec6 | 8 years ago |
Guillermo | 2e6757da81 | 8 years ago |
Guillermo | 0e88969f0a | 8 years ago |
Guillermo | 905427fe95 | 8 years ago |
Guillermo | debcb1cd7a | 8 years ago |
Guillermo | 491cc2a312 | 8 years ago |
Guillermo | 0ee94b2169 | 8 years ago |
Guillermo | 4a8ee3729d | 8 years ago |
Guillermo | 2a98b0ac07 | 8 years ago |
Guillermo | 32f62de7ad | 8 years ago |
Guillermo | 886a3d6704 | 8 years ago |
Guillermo | 2f4e85237d | 8 years ago |
Guillermo | dac0fdea3e | 8 years ago |
Guillermo | 9aac85ac4f | 8 years ago |
Guillermo | dff5b07613 | 8 years ago |
Guillermo | 4eda874325 | 8 years ago |
Guillermo | a91dce8b16 | 8 years ago |
Guillermo | 2b679123fa | 8 years ago |
Guillermo | d23b6d8728 | 8 years ago |
Guillermo | 37e6de16bb | 8 years ago |
Guillermo | c80038b6cb | 8 years ago |
Guillermo | 4741312b3e | 8 years ago |
Guillermo | 53b6c3ab6e | 8 years ago |
Guillermo | c4ceacea5f | 8 years ago |
Guillermo | b7bec0f1b1 | 8 years ago |
Guillermo | 3dc7322e67 | 8 years ago |
Guillermo | fbb830e061 | 8 years ago |
Guillermo | 0e8094afa7 | 8 years ago |
Guillermo | f5fc87638c | 8 years ago |
Guillermo | 2e657b9f81 | 8 years ago |
Guillermo | 93e5b706b3 | 8 years ago |
Guillermo | 94ca863eb6 | 8 years ago |
Guillermo | 15820de3b2 | 8 years ago |
Guillermo | 6117345bec | 8 years ago |
Guillermo | 1a6c9236bc | 8 years ago |
Guillermo | 01238cb300 | 8 years ago |
Guillermo | 1e9b4748d4 | 8 years ago |
Guillermo | 28bf468cb4 | 8 years ago |
Guillermo | 20fea90b05 | 8 years ago |
Guillermo | 35f450efca | 8 years ago |
Guillermo | b130433437 | 8 years ago |
Guillermo | 1fabf60105 | 10 years ago |
Guillermo | b26adef7c6 | 10 years ago |
Guillermo | 52e64abcd9 | 10 years ago |
Guillermo | 6f219d7ed7 | 10 years ago |
Guillermo | 167746a418 | 10 years ago |
Guillermo | 6c539e5d60 | 10 years ago |
Guillermo | 4648367569 | 10 years ago |
Guillermo | 9b38d4f91b | 10 years ago |
Guillermo | 4738187d5b | 10 years ago |
Guillermo | e9423bb032 | 10 years ago |
Guillermo | d7c713386d | 10 years ago |
Guillermo | 27150fa4cd | 10 years ago |
Guillermo | 52e67dce57 | 10 years ago |
Guillermo | 1cba6a36be | 10 years ago |
Guillermo | 85304c54fb | 10 years ago |
Guillermo | f7f13245ba | 10 years ago |
Guillermo | 422e71dc4b | 10 years ago |
Guillermo | d85281eb3f | 10 years ago |
Guillermo | 71fbe3d8ea | 10 years ago |
Guillermo | 61f98fb036 | 10 years ago |
Guillermo | 7b239c67d0 | 10 years ago |
Guillermo | 1df4b5a752 | 10 years ago |
Kevin Kwok | 38ce3353ef | 10 years ago |
Kevin Kwok | 20e7916975 | 10 years ago |
Guillermo | c2df75e589 | 10 years ago |
Guillermo | f237763f29 | 10 years ago |
Guillermo | 3901492461 | 10 years ago |
Guillermo | c7771ed2cc | 10 years ago |
Guillermo | 7a207402bd | 10 years ago |
Guillermo | f056c1702b | 10 years ago |
Guillermo | 36bd7636e0 | 10 years ago |
Guillermo | 5fc117bb10 | 10 years ago |
Guillermo | b6b1b4a2c1 | 10 years ago |
Guillermo | 601605a0b9 | 10 years ago |
Guillermo | 6e89a557b4 | 10 years ago |
Guillermo | 127ade5d91 | 10 years ago |
Guillermo | 2c059becca | 10 years ago |
Guillermo | 8fe135af7f | 10 years ago |
Guillermo | 29d7e56d20 | 10 years ago |
Guillermo | 690dbab1d3 | 10 years ago |
Guillermo | 1636e1674e | 10 years ago |
Guillermo | b990b06c44 | 10 years ago |
Guillermo | 4b3b4c3f21 | 10 years ago |
@ -0,0 +1,17 @@ |
|||||||
|
{ |
||||||
|
"extends": "airbnb-base", |
||||||
|
"parser": "babel-eslint", |
||||||
|
"env": { |
||||||
|
"browser": true, |
||||||
|
"node": true, |
||||||
|
"mocha": true, |
||||||
|
"worker": true |
||||||
|
}, |
||||||
|
"rules": { |
||||||
|
"no-underscore-dangle": 0, |
||||||
|
"no-console": 0, |
||||||
|
"global-require": 0, |
||||||
|
"camelcase": 0, |
||||||
|
"no-control-regex": 0 |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,9 @@ |
|||||||
|
# These are supported funding model platforms |
||||||
|
|
||||||
|
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] |
||||||
|
patreon: # Replace with a single Patreon username |
||||||
|
open_collective: tesseractjs |
||||||
|
ko_fi: # Replace with a single Ko-fi username |
||||||
|
tidelift: npm/tesseract.js |
||||||
|
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry |
||||||
|
custom: ["https://etherscan.io/address/0x74ace8c74535d6dac03ebdc708ca2fba54796ef2"] |
@ -0,0 +1,38 @@ |
|||||||
|
--- |
||||||
|
name: Bug report |
||||||
|
about: Create a report to help us improve |
||||||
|
title: '' |
||||||
|
labels: '' |
||||||
|
assignees: '' |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
**Describe the bug** |
||||||
|
A clear and concise description of what the bug is. |
||||||
|
|
||||||
|
**To Reproduce** |
||||||
|
Steps to reproduce the behavior: |
||||||
|
1. Go to '...' |
||||||
|
2. Click on '....' |
||||||
|
3. Scroll down to '....' |
||||||
|
4. See error |
||||||
|
|
||||||
|
**Expected behavior** |
||||||
|
A clear and concise description of what you expected to happen. |
||||||
|
|
||||||
|
**Screenshots** |
||||||
|
If applicable, add screenshots to help explain your problem. |
||||||
|
|
||||||
|
**Desktop (please complete the following information):** |
||||||
|
- OS: [e.g. iOS] |
||||||
|
- Browser [e.g. chrome, safari] |
||||||
|
- Version [e.g. 22] |
||||||
|
|
||||||
|
**Smartphone (please complete the following information):** |
||||||
|
- Device: [e.g. iPhone6] |
||||||
|
- OS: [e.g. iOS8.1] |
||||||
|
- Browser [e.g. stock browser, safari] |
||||||
|
- Version [e.g. 22] |
||||||
|
|
||||||
|
**Additional context** |
||||||
|
Add any other context about the problem here. |
@ -0,0 +1,20 @@ |
|||||||
|
--- |
||||||
|
name: Feature request |
||||||
|
about: Suggest an idea for this project |
||||||
|
title: '' |
||||||
|
labels: '' |
||||||
|
assignees: '' |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
**Is your feature request related to a problem? Please describe.** |
||||||
|
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] |
||||||
|
|
||||||
|
**Describe the solution you'd like** |
||||||
|
A clear and concise description of what you want to happen. |
||||||
|
|
||||||
|
**Describe alternatives you've considered** |
||||||
|
A clear and concise description of any alternative solutions or features you've considered. |
||||||
|
|
||||||
|
**Additional context** |
||||||
|
Add any other context or screenshots about the feature request here. |
@ -0,0 +1,5 @@ |
|||||||
|
## Security contact information |
||||||
|
|
||||||
|
To report a security vulnerability, please use the |
||||||
|
[Tidelift security contact](https://tidelift.com/security). |
||||||
|
Tidelift will coordinate the fix and disclosure. |
@ -0,0 +1,29 @@ |
|||||||
|
# This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node |
||||||
|
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions |
||||||
|
|
||||||
|
name: Node.js CI |
||||||
|
|
||||||
|
on: |
||||||
|
push: |
||||||
|
branches: [ master ] |
||||||
|
pull_request: |
||||||
|
branches: [ master ] |
||||||
|
|
||||||
|
jobs: |
||||||
|
build: |
||||||
|
|
||||||
|
runs-on: ubuntu-latest |
||||||
|
|
||||||
|
strategy: |
||||||
|
matrix: |
||||||
|
node-version: [14.x, 16.x] |
||||||
|
|
||||||
|
steps: |
||||||
|
- uses: actions/checkout@v2 |
||||||
|
- name: Use Node.js ${{ matrix.node-version }} |
||||||
|
uses: actions/setup-node@v1 |
||||||
|
with: |
||||||
|
node-version: ${{ matrix.node-version }} |
||||||
|
- run: npm ci |
||||||
|
- run: npm run lint |
||||||
|
- run: npm test |
@ -1,2 +1,10 @@ |
|||||||
.DS_Store |
.DS_Store |
||||||
node_modules/* |
node_modules/* |
||||||
|
yarn.lock |
||||||
|
tesseract.dev.js |
||||||
|
worker.dev.js |
||||||
|
/*.traineddata |
||||||
|
/examples/**/*.traineddata |
||||||
|
.nyc_output |
||||||
|
dist/ |
||||||
|
*.swp |
||||||
|
@ -0,0 +1,2 @@ |
|||||||
|
FROM gitpod/workspace-full |
||||||
|
RUN sudo apt-get update && sudo apt-get install -y libgtk-3-0 libx11-xcb1 libnss3 libxss1 libasound2 |
@ -0,0 +1,9 @@ |
|||||||
|
image: |
||||||
|
file: .gitpod.Dockerfile |
||||||
|
tasks: |
||||||
|
- command: gp await-port 3000 && sleep 3 && gp preview $(gp url 3000)/examples/browser/demo.html |
||||||
|
- init: npm install |
||||||
|
command: npm start |
||||||
|
ports: |
||||||
|
- port: 3000 |
||||||
|
onOpen: ignore |
@ -0,0 +1,201 @@ |
|||||||
|
Apache License |
||||||
|
Version 2.0, January 2004 |
||||||
|
http://www.apache.org/licenses/ |
||||||
|
|
||||||
|
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
||||||
|
|
||||||
|
1. Definitions. |
||||||
|
|
||||||
|
"License" shall mean the terms and conditions for use, reproduction, |
||||||
|
and distribution as defined by Sections 1 through 9 of this document. |
||||||
|
|
||||||
|
"Licensor" shall mean the copyright owner or entity authorized by |
||||||
|
the copyright owner that is granting the License. |
||||||
|
|
||||||
|
"Legal Entity" shall mean the union of the acting entity and all |
||||||
|
other entities that control, are controlled by, or are under common |
||||||
|
control with that entity. For the purposes of this definition, |
||||||
|
"control" means (i) the power, direct or indirect, to cause the |
||||||
|
direction or management of such entity, whether by contract or |
||||||
|
otherwise, or (ii) ownership of fifty percent (50%) or more of the |
||||||
|
outstanding shares, or (iii) beneficial ownership of such entity. |
||||||
|
|
||||||
|
"You" (or "Your") shall mean an individual or Legal Entity |
||||||
|
exercising permissions granted by this License. |
||||||
|
|
||||||
|
"Source" form shall mean the preferred form for making modifications, |
||||||
|
including but not limited to software source code, documentation |
||||||
|
source, and configuration files. |
||||||
|
|
||||||
|
"Object" form shall mean any form resulting from mechanical |
||||||
|
transformation or translation of a Source form, including but |
||||||
|
not limited to compiled object code, generated documentation, |
||||||
|
and conversions to other media types. |
||||||
|
|
||||||
|
"Work" shall mean the work of authorship, whether in Source or |
||||||
|
Object form, made available under the License, as indicated by a |
||||||
|
copyright notice that is included in or attached to the work |
||||||
|
(an example is provided in the Appendix below). |
||||||
|
|
||||||
|
"Derivative Works" shall mean any work, whether in Source or Object |
||||||
|
form, that is based on (or derived from) the Work and for which the |
||||||
|
editorial revisions, annotations, elaborations, or other modifications |
||||||
|
represent, as a whole, an original work of authorship. For the purposes |
||||||
|
of this License, Derivative Works shall not include works that remain |
||||||
|
separable from, or merely link (or bind by name) to the interfaces of, |
||||||
|
the Work and Derivative Works thereof. |
||||||
|
|
||||||
|
"Contribution" shall mean any work of authorship, including |
||||||
|
the original version of the Work and any modifications or additions |
||||||
|
to that Work or Derivative Works thereof, that is intentionally |
||||||
|
submitted to Licensor for inclusion in the Work by the copyright owner |
||||||
|
or by an individual or Legal Entity authorized to submit on behalf of |
||||||
|
the copyright owner. For the purposes of this definition, "submitted" |
||||||
|
means any form of electronic, verbal, or written communication sent |
||||||
|
to the Licensor or its representatives, including but not limited to |
||||||
|
communication on electronic mailing lists, source code control systems, |
||||||
|
and issue tracking systems that are managed by, or on behalf of, the |
||||||
|
Licensor for the purpose of discussing and improving the Work, but |
||||||
|
excluding communication that is conspicuously marked or otherwise |
||||||
|
designated in writing by the copyright owner as "Not a Contribution." |
||||||
|
|
||||||
|
"Contributor" shall mean Licensor and any individual or Legal Entity |
||||||
|
on behalf of whom a Contribution has been received by Licensor and |
||||||
|
subsequently incorporated within the Work. |
||||||
|
|
||||||
|
2. Grant of Copyright License. Subject to the terms and conditions of |
||||||
|
this License, each Contributor hereby grants to You a perpetual, |
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||||
|
copyright license to reproduce, prepare Derivative Works of, |
||||||
|
publicly display, publicly perform, sublicense, and distribute the |
||||||
|
Work and such Derivative Works in Source or Object form. |
||||||
|
|
||||||
|
3. Grant of Patent License. Subject to the terms and conditions of |
||||||
|
this License, each Contributor hereby grants to You a perpetual, |
||||||
|
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||||
|
(except as stated in this section) patent license to make, have made, |
||||||
|
use, offer to sell, sell, import, and otherwise transfer the Work, |
||||||
|
where such license applies only to those patent claims licensable |
||||||
|
by such Contributor that are necessarily infringed by their |
||||||
|
Contribution(s) alone or by combination of their Contribution(s) |
||||||
|
with the Work to which such Contribution(s) was submitted. If You |
||||||
|
institute patent litigation against any entity (including a |
||||||
|
cross-claim or counterclaim in a lawsuit) alleging that the Work |
||||||
|
or a Contribution incorporated within the Work constitutes direct |
||||||
|
or contributory patent infringement, then any patent licenses |
||||||
|
granted to You under this License for that Work shall terminate |
||||||
|
as of the date such litigation is filed. |
||||||
|
|
||||||
|
4. Redistribution. You may reproduce and distribute copies of the |
||||||
|
Work or Derivative Works thereof in any medium, with or without |
||||||
|
modifications, and in Source or Object form, provided that You |
||||||
|
meet the following conditions: |
||||||
|
|
||||||
|
(a) You must give any other recipients of the Work or |
||||||
|
Derivative Works a copy of this License; and |
||||||
|
|
||||||
|
(b) You must cause any modified files to carry prominent notices |
||||||
|
stating that You changed the files; and |
||||||
|
|
||||||
|
(c) You must retain, in the Source form of any Derivative Works |
||||||
|
that You distribute, all copyright, patent, trademark, and |
||||||
|
attribution notices from the Source form of the Work, |
||||||
|
excluding those notices that do not pertain to any part of |
||||||
|
the Derivative Works; and |
||||||
|
|
||||||
|
(d) If the Work includes a "NOTICE" text file as part of its |
||||||
|
distribution, then any Derivative Works that You distribute must |
||||||
|
include a readable copy of the attribution notices contained |
||||||
|
within such NOTICE file, excluding those notices that do not |
||||||
|
pertain to any part of the Derivative Works, in at least one |
||||||
|
of the following places: within a NOTICE text file distributed |
||||||
|
as part of the Derivative Works; within the Source form or |
||||||
|
documentation, if provided along with the Derivative Works; or, |
||||||
|
within a display generated by the Derivative Works, if and |
||||||
|
wherever such third-party notices normally appear. The contents |
||||||
|
of the NOTICE file are for informational purposes only and |
||||||
|
do not modify the License. You may add Your own attribution |
||||||
|
notices within Derivative Works that You distribute, alongside |
||||||
|
or as an addendum to the NOTICE text from the Work, provided |
||||||
|
that such additional attribution notices cannot be construed |
||||||
|
as modifying the License. |
||||||
|
|
||||||
|
You may add Your own copyright statement to Your modifications and |
||||||
|
may provide additional or different license terms and conditions |
||||||
|
for use, reproduction, or distribution of Your modifications, or |
||||||
|
for any such Derivative Works as a whole, provided Your use, |
||||||
|
reproduction, and distribution of the Work otherwise complies with |
||||||
|
the conditions stated in this License. |
||||||
|
|
||||||
|
5. Submission of Contributions. Unless You explicitly state otherwise, |
||||||
|
any Contribution intentionally submitted for inclusion in the Work |
||||||
|
by You to the Licensor shall be under the terms and conditions of |
||||||
|
this License, without any additional terms or conditions. |
||||||
|
Notwithstanding the above, nothing herein shall supersede or modify |
||||||
|
the terms of any separate license agreement you may have executed |
||||||
|
with Licensor regarding such Contributions. |
||||||
|
|
||||||
|
6. Trademarks. This License does not grant permission to use the trade |
||||||
|
names, trademarks, service marks, or product names of the Licensor, |
||||||
|
except as required for reasonable and customary use in describing the |
||||||
|
origin of the Work and reproducing the content of the NOTICE file. |
||||||
|
|
||||||
|
7. Disclaimer of Warranty. Unless required by applicable law or |
||||||
|
agreed to in writing, Licensor provides the Work (and each |
||||||
|
Contributor provides its Contributions) on an "AS IS" BASIS, |
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
||||||
|
implied, including, without limitation, any warranties or conditions |
||||||
|
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
||||||
|
PARTICULAR PURPOSE. You are solely responsible for determining the |
||||||
|
appropriateness of using or redistributing the Work and assume any |
||||||
|
risks associated with Your exercise of permissions under this License. |
||||||
|
|
||||||
|
8. Limitation of Liability. In no event and under no legal theory, |
||||||
|
whether in tort (including negligence), contract, or otherwise, |
||||||
|
unless required by applicable law (such as deliberate and grossly |
||||||
|
negligent acts) or agreed to in writing, shall any Contributor be |
||||||
|
liable to You for damages, including any direct, indirect, special, |
||||||
|
incidental, or consequential damages of any character arising as a |
||||||
|
result of this License or out of the use or inability to use the |
||||||
|
Work (including but not limited to damages for loss of goodwill, |
||||||
|
work stoppage, computer failure or malfunction, or any and all |
||||||
|
other commercial damages or losses), even if such Contributor |
||||||
|
has been advised of the possibility of such damages. |
||||||
|
|
||||||
|
9. Accepting Warranty or Additional Liability. While redistributing |
||||||
|
the Work or Derivative Works thereof, You may choose to offer, |
||||||
|
and charge a fee for, acceptance of support, warranty, indemnity, |
||||||
|
or other liability obligations and/or rights consistent with this |
||||||
|
License. However, in accepting such obligations, You may act only |
||||||
|
on Your own behalf and on Your sole responsibility, not on behalf |
||||||
|
of any other Contributor, and only if You agree to indemnify, |
||||||
|
defend, and hold each Contributor harmless for any liability |
||||||
|
incurred by, or claims asserted against, such Contributor by reason |
||||||
|
of your accepting any such warranty or additional liability. |
||||||
|
|
||||||
|
END OF TERMS AND CONDITIONS |
||||||
|
|
||||||
|
APPENDIX: How to apply the Apache License to your work. |
||||||
|
|
||||||
|
To apply the Apache License to your work, attach the following |
||||||
|
boilerplate notice, with the fields enclosed by brackets "{}" |
||||||
|
replaced with your own identifying information. (Don't include |
||||||
|
the brackets!) The text should be enclosed in the appropriate |
||||||
|
comment syntax for the file format. We also recommend that a |
||||||
|
file or class name and description of purpose be included on the |
||||||
|
same "printed page" as the copyright notice for easier |
||||||
|
identification within third-party archives. |
||||||
|
|
||||||
|
Copyright {yyyy} {name of copyright owner} |
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License"); |
||||||
|
you may not use this file except in compliance with the License. |
||||||
|
You may obtain a copy of the License at |
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0 |
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software |
||||||
|
distributed under the License is distributed on an "AS IS" BASIS, |
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||||
|
See the License for the specific language governing permissions and |
||||||
|
limitations under the License. |
@ -1 +1,197 @@ |
|||||||
# tesseract.js |
<p align="center"> |
||||||
|
<a href="https://tesseract.projectnaptha.com/"><img width="256px" height="256px" alt="Tesseract.js" src="./docs/images/tesseract.png"></a> |
||||||
|
</p> |
||||||
|
|
||||||
|
![Lint & Test](https://github.com/naptha/tesseract.js/workflows/Node.js%20CI/badge.svg) |
||||||
|
![CodeQL](https://github.com/naptha/tesseract.js/workflows/CodeQL/badge.svg) |
||||||
|
[![Gitpod Ready-to-Code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://github.com/naptha/tesseract.js) |
||||||
|
[![Financial Contributors on Open Collective](https://opencollective.com/tesseractjs/all/badge.svg?label=financial+contributors)](https://opencollective.com/tesseractjs) [![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js) |
||||||
|
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/naptha/tesseract.js/graphs/commit-activity) |
||||||
|
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) |
||||||
|
[![Code Style](https://badgen.net/badge/code%20style/airbnb/ff5a5f?icon=airbnb)](https://github.com/airbnb/javascript) |
||||||
|
[![Downloads Total](https://img.shields.io/npm/dt/tesseract.js.svg)](https://www.npmjs.com/package/tesseract.js) |
||||||
|
[![Downloads Month](https://img.shields.io/npm/dm/tesseract.js.svg)](https://www.npmjs.com/package/tesseract.js) |
||||||
|
|
||||||
|
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/)) |
||||||
|
|
||||||
|
Image Recognition |
||||||
|
|
||||||
|
[![fancy demo gif](./docs/images/demo.gif)](http://tesseract.projectnaptha.com) |
||||||
|
|
||||||
|
Video Real-time Recognition |
||||||
|
|
||||||
|
<p align="center"> |
||||||
|
<a href="https://github.com/jeromewu/tesseract.js-video"><img alt="Tesseract.js Video" src="./docs/images/video-demo.gif"></a> |
||||||
|
</p> |
||||||
|
|
||||||
|
|
||||||
|
Tesseract.js wraps an [emscripten](https://github.com/kripken/emscripten) [port](https://github.com/naptha/tesseract.js-core) of the [Tesseract](https://github.com/tesseract-ocr/tesseract) [OCR](https://en.wikipedia.org/wiki/Optical_character_recognition) Engine. |
||||||
|
It works in the browser using [webpack](https://webpack.js.org/) or plain script tags with a [CDN](#CDN) and on the server with [Node.js](https://nodejs.org/en/). |
||||||
|
After you [install it](#installation), using it is as simple as: |
||||||
|
|
||||||
|
```javascript |
||||||
|
import Tesseract from 'tesseract.js'; |
||||||
|
|
||||||
|
Tesseract.recognize( |
||||||
|
'https://tesseract.projectnaptha.com/img/eng_bw.png', |
||||||
|
'eng', |
||||||
|
{ logger: m => console.log(m) } |
||||||
|
).then(({ data: { text } }) => { |
||||||
|
console.log(text); |
||||||
|
}) |
||||||
|
``` |
||||||
|
|
||||||
|
Or more imperative |
||||||
|
|
||||||
|
```javascript |
||||||
|
import { createWorker } from 'tesseract.js'; |
||||||
|
|
||||||
|
const worker = createWorker({ |
||||||
|
logger: m => console.log(m) |
||||||
|
}); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||||
|
console.log(text); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
[Check out the docs](#documentation) for a full explanation of the API. |
||||||
|
|
||||||
|
## Major changes in v3 |
||||||
|
- Significantly faster performance |
||||||
|
- Runtime reduction of 84% for Browser and 96% for Node.js when recognizing the [example images](./examples/data) |
||||||
|
- Upgrade to Tesseract v5.1.0 (using emscripten 3.1.18) |
||||||
|
- Added SIMD-enabled build for supported devices |
||||||
|
- Added support: |
||||||
|
- Node.js version 18 |
||||||
|
- Removed support: |
||||||
|
- ASM.js version, any other old versions of Tesseract.js-core (<3.0.0) |
||||||
|
- Node.js versions 10 and 12 |
||||||
|
|
||||||
|
## Major changes in v2 |
||||||
|
- Upgrade to tesseract v4.1.1 (using emscripten 1.39.10 upstream) |
||||||
|
- Support multiple languages at the same time, eg: eng+chi\_tra for English and Traditional Chinese |
||||||
|
- Supported image formats: png, jpg, bmp, pbm |
||||||
|
- Support WebAssembly (fallback to ASM.js when browser doesn't support) |
||||||
|
- Support Typescript |
||||||
|
|
||||||
|
Read a story about v2: <a href="https://jeromewu.github.io/why-i-refactor-tesseract.js-v2/">Why I refactor tesseract.js v2?</a><br> |
||||||
|
Check the <a href="https://github.com/naptha/tesseract.js/tree/support/1.x">support/1.x</a> branch for version 1 |
||||||
|
## Installation |
||||||
|
Tesseract.js works with a `<script>` tag via local copy or CDN, with webpack via `npm` and on Node.js with `npm/yarn`. |
||||||
|
|
||||||
|
|
||||||
|
### CDN |
||||||
|
```html |
||||||
|
<!-- v2 --> |
||||||
|
<script src='https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js'></script> |
||||||
|
|
||||||
|
<!-- v1 --> |
||||||
|
<script src='https://unpkg.com/tesseract.js@1.0.19/src/index.js'></script> |
||||||
|
``` |
||||||
|
After including the script the `Tesseract` variable will be globally available. |
||||||
|
|
||||||
|
|
||||||
|
### Node.js |
||||||
|
|
||||||
|
**Tesseract.js v3 requires Node.js v14 or higher** |
||||||
|
|
||||||
|
```shell |
||||||
|
# For v3 |
||||||
|
npm install tesseract.js |
||||||
|
yarn add tesseract.js |
||||||
|
|
||||||
|
# For v2 |
||||||
|
npm install tesseract.js@2 |
||||||
|
yarn add tesseract.js@2 |
||||||
|
``` |
||||||
|
|
||||||
|
|
||||||
|
## Documentation |
||||||
|
|
||||||
|
* [Examples](./docs/examples.md) |
||||||
|
* [Image Format](./docs/image-format.md) |
||||||
|
* [API](./docs/api.md) |
||||||
|
* [Local Installation](./docs/local-installation.md) |
||||||
|
* [FAQ](./docs/faq.md) |
||||||
|
|
||||||
|
## Use tesseract.js the way you like! |
||||||
|
|
||||||
|
- Offline Version: https://github.com/jeromewu/tesseract.js-offline |
||||||
|
- Electron Version: https://github.com/jeromewu/tesseract.js-electron |
||||||
|
- Custom Traineddata: https://github.com/jeromewu/tesseract.js-custom-traineddata |
||||||
|
- Chrome Extension #1: https://github.com/jeromewu/tesseract.js-chrome-extension |
||||||
|
- Chrome Extension #2: https://github.com/fxnoob/image-to-text |
||||||
|
- Firefox Extension: https://github.com/gnonio/korporize |
||||||
|
- With Vue: https://github.com/jeromewu/tesseract.js-vue-app |
||||||
|
- With Angular: https://github.com/jeromewu/tesseract.js-angular-app |
||||||
|
- With React: https://github.com/jeromewu/tesseract.js-react-app |
||||||
|
- Typescript: https://github.com/jeromewu/tesseract.js-typescript |
||||||
|
- Video Real-time Recognition: https://github.com/jeromewu/tesseract.js-video |
||||||
|
|
||||||
|
## Contributing |
||||||
|
|
||||||
|
### Development |
||||||
|
To run a development copy of Tesseract.js do the following: |
||||||
|
```shell |
||||||
|
# First we clone the repository |
||||||
|
git clone https://github.com/naptha/tesseract.js.git |
||||||
|
cd tesseract.js |
||||||
|
|
||||||
|
# Then we install the dependencies |
||||||
|
npm install |
||||||
|
|
||||||
|
# And finally we start the development server |
||||||
|
npm start |
||||||
|
``` |
||||||
|
|
||||||
|
The development server will be available at http://localhost:3000/examples/browser/demo.html in your favorite browser. |
||||||
|
It will automatically rebuild `tesseract.dev.js` and `worker.dev.js` when you change files in the **src** folder. |
||||||
|
|
||||||
|
### Online Setup with a single Click |
||||||
|
|
||||||
|
You can use Gitpod(A free online VS Code like IDE) for contributing. With a single click it will launch a ready to code workspace with the build & start scripts already in process and within a few seconds it will spin up the dev server so that you can start contributing straight away without wasting any time. |
||||||
|
|
||||||
|
[![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/naptha/tesseract.js/blob/master/examples/browser/demo.html) |
||||||
|
|
||||||
|
### Building Static Files |
||||||
|
To build the compiled static files just execute the following: |
||||||
|
```shell |
||||||
|
npm run build |
||||||
|
``` |
||||||
|
This will output the files into the `dist` directory. |
||||||
|
|
||||||
|
## Contributors |
||||||
|
|
||||||
|
### Code Contributors |
||||||
|
|
||||||
|
This project exists thanks to all the people who contribute. [[Contribute](CONTRIBUTING.md)]. |
||||||
|
<a href="https://github.com/naptha/tesseract.js/graphs/contributors"><img src="https://opencollective.com/tesseractjs/contributors.svg?width=890&button=false" /></a> |
||||||
|
|
||||||
|
### Financial Contributors |
||||||
|
|
||||||
|
Become a financial contributor and help us sustain our community. [[Contribute](https://opencollective.com/tesseractjs/contribute)] |
||||||
|
|
||||||
|
#### Individuals |
||||||
|
|
||||||
|
<a href="https://opencollective.com/tesseractjs"><img src="https://opencollective.com/tesseractjs/individuals.svg?width=890"></a> |
||||||
|
|
||||||
|
#### Organizations |
||||||
|
|
||||||
|
Support this project with your organization. Your logo will show up here with a link to your website. [[Contribute](https://opencollective.com/tesseractjs/contribute)] |
||||||
|
|
||||||
|
<a href="https://opencollective.com/tesseractjs/organization/0/website"><img src="https://opencollective.com/tesseractjs/organization/0/avatar.svg"></a> |
||||||
|
<a href="https://opencollective.com/tesseractjs/organization/1/website"><img src="https://opencollective.com/tesseractjs/organization/1/avatar.svg"></a> |
||||||
|
<a href="https://opencollective.com/tesseractjs/organization/2/website"><img src="https://opencollective.com/tesseractjs/organization/2/avatar.svg"></a> |
||||||
|
<a href="https://opencollective.com/tesseractjs/organization/3/website"><img src="https://opencollective.com/tesseractjs/organization/3/avatar.svg"></a> |
||||||
|
<a href="https://opencollective.com/tesseractjs/organization/4/website"><img src="https://opencollective.com/tesseractjs/organization/4/avatar.svg"></a> |
||||||
|
<a href="https://opencollective.com/tesseractjs/organization/5/website"><img src="https://opencollective.com/tesseractjs/organization/5/avatar.svg"></a> |
||||||
|
<a href="https://opencollective.com/tesseractjs/organization/6/website"><img src="https://opencollective.com/tesseractjs/organization/6/avatar.svg"></a> |
||||||
|
<a href="https://opencollective.com/tesseractjs/organization/7/website"><img src="https://opencollective.com/tesseractjs/organization/7/avatar.svg"></a> |
||||||
|
<a href="https://opencollective.com/tesseractjs/organization/8/website"><img src="https://opencollective.com/tesseractjs/organization/8/avatar.svg"></a> |
||||||
|
<a href="https://opencollective.com/tesseractjs/organization/9/website"><img src="https://opencollective.com/tesseractjs/organization/9/avatar.svg"></a> |
||||||
|
@ -1,60 +0,0 @@ |
|||||||
var Tesseract = {} |
|
||||||
|
|
||||||
Tesseract.recognize = function(image, options, callback){ |
|
||||||
var lang = options.lang |
|
||||||
if(typeof lang === "undefined"){ |
|
||||||
lang = 'eng' |
|
||||||
} |
|
||||||
|
|
||||||
if (typeof options === 'string') { |
|
||||||
lang = options |
|
||||||
options = {} |
|
||||||
} |
|
||||||
|
|
||||||
if (typeof options === "function") { |
|
||||||
callback = options |
|
||||||
options = {} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
if(image.getContext){ |
|
||||||
image = image.getContext('2d'); |
|
||||||
}else if(image.tagName == "IMG" || image.tagName == "VIDEO"){ |
|
||||||
var c = document.createElement('canvas'); |
|
||||||
if(image.tagName == "IMG"){ |
|
||||||
c.width = image.naturalWidth; |
|
||||||
c.height = image.naturalHeight; |
|
||||||
}else if(image.tagName == "VIDEO"){ |
|
||||||
c.width = image.videoWidth; |
|
||||||
c.height = image.videoHeight; |
|
||||||
} |
|
||||||
var ctx = c.getContext('2d'); |
|
||||||
ctx.drawImage(image, 0, 0); |
|
||||||
image = ctx; |
|
||||||
} |
|
||||||
if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height); |
|
||||||
|
|
||||||
var worker = new Worker('./worker.js') |
|
||||||
|
|
||||||
if(typeof callback === "function"){ |
|
||||||
worker.onmessage = function(e){ |
|
||||||
callback(e.data.err, e.data.result) |
|
||||||
} |
|
||||||
worker.postMessage({image: image, lang: lang}) |
|
||||||
console.log('callback') |
|
||||||
} |
|
||||||
else { |
|
||||||
return new Promise(function(resolve, reject){ |
|
||||||
worker.onmessage = function(e){ |
|
||||||
if(e.data.err){ |
|
||||||
reject(e.data.err) |
|
||||||
} |
|
||||||
else { |
|
||||||
resolve(e.data.result) |
|
||||||
} |
|
||||||
} |
|
||||||
worker.postMessage({image: image, lang: lang, options: options}) |
|
||||||
console.log('promise') |
|
||||||
}) |
|
||||||
} |
|
||||||
} |
|
@ -0,0 +1,448 @@ |
|||||||
|
# API |
||||||
|
|
||||||
|
- [createWorker()](#create-worker) |
||||||
|
- [Worker.load](#worker-load) |
||||||
|
- [Worker.writeText](#worker-writeText) |
||||||
|
- [Worker.readText](#worker-readText) |
||||||
|
- [Worker.removeFile](#worker-removeFile) |
||||||
|
- [Worker.FS](#worker-FS) |
||||||
|
- [Worker.loadLanguage](#worker-load-language) |
||||||
|
- [Worker.initialize](#worker-initialize) |
||||||
|
- [Worker.setParameters](#worker-set-parameters) |
||||||
|
- [Worker.recognize](#worker-recognize) |
||||||
|
- [Worker.detect](#worker-detect) |
||||||
|
- [Worker.terminate](#worker-terminate) |
||||||
|
- [createScheduler()](#create-scheduler) |
||||||
|
- [Scheduler.addWorker](#scheduler-add-worker) |
||||||
|
- [Scheduler.addJob](#scheduler-add-job) |
||||||
|
- [Scheduler.getQueueLen](#scheduler-get-queue-len) |
||||||
|
- [Scheduler.getNumWorkers](#scheduler-get-num-workers) |
||||||
|
- [setLogging()](#set-logging) |
||||||
|
- [recognize()](#recognize) |
||||||
|
- [detect()](#detect) |
||||||
|
- [PSM](#psm) |
||||||
|
- [OEM](#oem) |
||||||
|
|
||||||
|
--- |
||||||
|
|
||||||
|
<a name="create-worker"></a> |
||||||
|
## createWorker(options): Worker |
||||||
|
|
||||||
|
createWorker is a factory function that creates a tesseract worker, a worker is basically a Web Worker in browser and Child Process in Node. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `options` an object of customized options |
||||||
|
- `corePath` path for tesseract-core.js script |
||||||
|
- `langPath` path for downloading traineddata, do not include `/` at the end of the path |
||||||
|
- `workerPath` path for downloading worker script |
||||||
|
- `dataPath` path for saving traineddata in WebAssembly file system, not common to modify |
||||||
|
- `cachePath` path for the cached traineddata, more useful for Node, for browser it only changes the key in IndexDB |
||||||
|
- `cacheMethod` a string to indicate the method of cache management, should be one of the following options |
||||||
|
- write: read cache and write back (default method) |
||||||
|
- readOnly: read cache and not to write back |
||||||
|
- refresh: not to read cache and write back |
||||||
|
- none: not to read cache and not to write back |
||||||
|
- `workerBlobURL` a boolean to define whether to use Blob URL for worker script, default: true |
||||||
|
- `gzip` a boolean to define whether the traineddata from the remote is gzipped, default: true |
||||||
|
- `logger` a function to log the progress, a quick example is `m => console.log(m)` |
||||||
|
- `errorHandler` a function to handle worker errors, a quick example is `err => console.error(err)` |
||||||
|
|
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker } = Tesseract; |
||||||
|
const worker = createWorker({ |
||||||
|
langPath: '...', |
||||||
|
logger: m => console.log(m), |
||||||
|
}); |
||||||
|
``` |
||||||
|
|
||||||
|
## Worker |
||||||
|
|
||||||
|
A Worker helps you to do the OCR related tasks, it takes few steps to setup Worker before it is fully functional. The full flow is: |
||||||
|
|
||||||
|
- load |
||||||
|
- FS functions // optional |
||||||
|
- loadLanguauge |
||||||
|
- initialize |
||||||
|
- setParameters // optional |
||||||
|
- recognize or detect |
||||||
|
- terminate |
||||||
|
|
||||||
|
Each function is async, so using async/await or Promise is required. When it is resolved, you get an object: |
||||||
|
|
||||||
|
```json |
||||||
|
{ |
||||||
|
"jobId": "Job-1-123", |
||||||
|
"data": { ... } |
||||||
|
} |
||||||
|
``` |
||||||
|
|
||||||
|
jobId is generated by Tesseract.js, but you can put your own when calling any of the function above. |
||||||
|
|
||||||
|
<a name="worker-load"></a> |
||||||
|
### Worker.load(jobId): Promise |
||||||
|
|
||||||
|
Worker.load() loads tesseract.js-core scripts (download from remote if not presented), it makes Web Worker/Child Process ready for next action. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `jobId` Please see details above |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="worker-writeText"></a> |
||||||
|
### Worker.writeText(path, text, jobId): Promise |
||||||
|
|
||||||
|
Worker.writeText() writes a text file to the path specified in MEMFS, it is useful when you want to use some features that requires tesseract.js |
||||||
|
to read file from file system. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `path` text file path |
||||||
|
- `text` content of the text file |
||||||
|
- `jobId` Please see details above |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
await worker.writeText('tmp.txt', 'Hi\nTesseract.js\n'); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="worker-readText"></a> |
||||||
|
### Worker.readText(path, jobId): Promise |
||||||
|
|
||||||
|
Worker.readText() reads a text file to the path specified in MEMFS, it is useful when you want to check the content. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `path` text file path |
||||||
|
- `jobId` Please see details above |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
const { data } = await worker.readText('tmp.txt'); |
||||||
|
console.log(data); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="worker-removeFile"></a> |
||||||
|
### Worker.removeFile(path, jobId): Promise |
||||||
|
|
||||||
|
Worker.readFile() remove a file in MEMFS, it is useful when you want to free the memory. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `path` file path |
||||||
|
- `jobId` Please see details above |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
await worker.removeFile('tmp.txt'); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="worker-FS"></a> |
||||||
|
### Worker.FS(method, args, jobId): Promise |
||||||
|
|
||||||
|
Worker.FS() is a generic FS function to do anything you want, you can check [HERE](ihttps://emscripten.org/docs/api_reference/Filesystem-API.html) for all functions. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `method` method name |
||||||
|
- `args` array of arguments to pass |
||||||
|
- `jobId` Please see details above |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
await worker.FS('writeFile', ['tmp.txt', 'Hi\nTesseract.js\n']); |
||||||
|
// equal to: |
||||||
|
// await worker.readText('tmp.txt', 'Hi\nTesseract.js\n'); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="worker-load-language"></a> |
||||||
|
### Worker.loadLanguage(langs, jobId): Promise |
||||||
|
|
||||||
|
Worker.loadLanguage() loads traineddata from cache or download traineddata from remote, and put traineddata into the WebAssembly file system. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `langs` a string to indicate the languages traineddata to download, multiple languages are concated with **+**, ex: **eng+chi\_tra** |
||||||
|
- `jobId` Please see details above |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
await worker.loadLanguage('eng+chi_tra'); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="worker-initialize"></a> |
||||||
|
### Worker.initialize(langs, oem, jobId): Promise |
||||||
|
|
||||||
|
Worker.initialize() initializes the Tesseract API, make sure it is ready for doing OCR tasks. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `langs` a string to indicate the languages loaded by Tesseract API, it can be the subset of the languauge traineddata you loaded from Worker.loadLanguage. |
||||||
|
- `oem` a enum to indicate the OCR Engine Mode you use |
||||||
|
- `jobId` Please see details above |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
/** You can load more languages in advance, but use only part of them in Worker.initialize() */ |
||||||
|
await worker.loadLanguage('eng+chi_tra'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
<a name="worker-set-parameters"></a> |
||||||
|
### Worker.setParameters(params, jobId): Promise |
||||||
|
|
||||||
|
Worker.setParameters() set parameters for Tesseract API (using SetVariable()), it changes the behavior of Tesseract and some parameters like tessedit\_char\_whitelist is very useful. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `params` an object with key and value of the parameters |
||||||
|
- `jobId` Please see details above |
||||||
|
|
||||||
|
**Supported Paramters:** |
||||||
|
|
||||||
|
| name | type | default value | description | |
||||||
|
| --------------------------- | ------ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------- | |
||||||
|
| tessedit\_ocr\_engine\_mode | enum | OEM.DEFAULT | Check [HERE](https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L268) for definition of each mode | |
||||||
|
| tessedit\_pageseg\_mode | enum | PSM.SINGLE\_BLOCK | Check [HERE](https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163) for definition of each mode | |
||||||
|
| tessedit\_char\_whitelist | string | '' | setting white list characters makes the result only contains these characters, useful the content in image is limited | |
||||||
|
| preserve\_interword\_spaces | string | '0' | '0' or '1', keeps the space between words | |
||||||
|
| user\_defined\_dpi | string | '' | Define custom dpi, use to fix **Warning: Invalid resolution 0 dpi. Using 70 instead.** | |
||||||
|
| tessjs\_create\_hocr | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes hocr in the result | |
||||||
|
| tessjs\_create\_tsv | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes tsv in the result | |
||||||
|
| tessjs\_create\_box | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes box in the result | |
||||||
|
| tessjs\_create\_unlv | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes unlv in the result | |
||||||
|
| tessjs\_create\_osd | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes osd in the result | |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
await worker.setParameters({ |
||||||
|
tessedit_char_whitelist: '0123456789', |
||||||
|
}); |
||||||
|
}) |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="worker-recognize"></a> |
||||||
|
### Worker.recognize(image, options, jobId): Promise |
||||||
|
|
||||||
|
Worker.recognize() provides core function of Tesseract.js as it executes OCR |
||||||
|
|
||||||
|
Figures out what words are in `image`, where the words are in `image`, etc. |
||||||
|
> Note: `image` should be sufficiently high resolution. |
||||||
|
> Often, the same image will get much better results if you upscale it before calling `recognize`. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `image` see [Image Format](./image-format.md) for more details. |
||||||
|
- `options` a object of customized options |
||||||
|
- `rectangle` an object to specify the regions you want to recognized in the image, should contain top, left, width and height, see example below. |
||||||
|
- `jobId` Please see details above |
||||||
|
|
||||||
|
**Output:** |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker } = Tesseract; |
||||||
|
(async () => { |
||||||
|
const worker = createWorker(); |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize(image); |
||||||
|
console.log(text); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
With rectangle |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker } = Tesseract; |
||||||
|
(async () => { |
||||||
|
const worker = createWorker(); |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize(image, { |
||||||
|
rectangle: { top: 0, left: 0, width: 100, height: 100 }, |
||||||
|
}); |
||||||
|
console.log(text); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="worker-detect"></a> |
||||||
|
### Worker.detect(image, jobId): Promise |
||||||
|
|
||||||
|
Worker.detect() does OSD (Orientation and Script Detection) to the image instead of OCR. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `image` see [Image Format](./image-format.md) for more details. |
||||||
|
- `jobId` Please see details above |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker } = Tesseract; |
||||||
|
(async () => { |
||||||
|
const worker = createWorker(); |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data } = await worker.detect(image); |
||||||
|
console.log(data); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="worker-terminate"></a> |
||||||
|
### Worker.terminate(jobId): Promise |
||||||
|
|
||||||
|
Worker.terminate() terminates the worker and cleans up |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="create-scheduler"></a> |
||||||
|
## createScheduler(): Scheduler |
||||||
|
|
||||||
|
createScheduler() is a factory function to create a scheduler, a scheduler manages a job queue and workers to enable multiple workers to work together, it is useful when you want to speed up your performance. |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createScheduler } = Tesseract; |
||||||
|
const scheduler = createScheduler(); |
||||||
|
``` |
||||||
|
|
||||||
|
### Scheduler |
||||||
|
|
||||||
|
<a name="scheduler-add-worker"></a> |
||||||
|
### Scheduler.addWorker(worker): string |
||||||
|
|
||||||
|
Scheduler.addWorker() adds a worker into the worker pool inside scheduler, it is suggested to add one worker to only one scheduler. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `worker` see Worker above |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker, createScheduler } = Tesseract; |
||||||
|
const scheduler = createScheduler(); |
||||||
|
const worker = createWorker(); |
||||||
|
scheduler.addWorker(worker); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="scheduler-add-job"></a> |
||||||
|
### Scheduler.addJob(action, ...payload): Promise |
||||||
|
|
||||||
|
Scheduler.addJob() adds a job to the job queue and scheduler waits and finds an idle worker to take the job. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `action` a string to indicate the action you want to do, right now only **recognize** and **detect** are supported |
||||||
|
- `payload` a arbitrary number of args depending on the action you called. |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
const { data: { text } } = await scheduler.addJob('recognize', image, options); |
||||||
|
const { data } = await scheduler.addJob('detect', image); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="scheduler-get-queue-len"></a> |
||||||
|
### Scheduler.getQueueLen(): number |
||||||
|
|
||||||
|
Scheduler.getNumWorkers() returns the length of job queue. |
||||||
|
|
||||||
|
<a name="scheduler-get-num-workers"></a> |
||||||
|
### Scheduler.getNumWorkers(): number |
||||||
|
|
||||||
|
Scheduler.getNumWorkers() returns number of workers added into the scheduler |
||||||
|
|
||||||
|
<a name="scheduler-terminate"></a> |
||||||
|
### Scheduler.terminate(): Promise |
||||||
|
|
||||||
|
Scheduler.terminate() terminates all workers added, useful to do quick clean up. |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
(async () => { |
||||||
|
await scheduler.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="set-logging"></a> |
||||||
|
## setLogging(logging: boolean) |
||||||
|
|
||||||
|
setLogging() sets the logging flag, you can `setLogging(true)` to see detailed information, useful for debugging. |
||||||
|
|
||||||
|
**Arguments:** |
||||||
|
|
||||||
|
- `logging` boolean to define whether to see detailed logs, default: false |
||||||
|
|
||||||
|
**Examples:** |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { setLogging } = Tesseract; |
||||||
|
setLogging(true); |
||||||
|
``` |
||||||
|
|
||||||
|
<a name="recognize"></a> |
||||||
|
## recognize(image, langs, options): Promise |
||||||
|
|
||||||
|
recognize() is a function to quickly do recognize() task, it is not recommended to use in real application, but useful when you want to save some time. |
||||||
|
|
||||||
|
See [Tesseract.js](../src/Tesseract.js) |
||||||
|
|
||||||
|
<a name="detect"></a> |
||||||
|
## detect(image, options): Promise |
||||||
|
|
||||||
|
Same background as recognize(), but it does detect instead. |
||||||
|
|
||||||
|
See [Tesseract.js](../src/Tesseract.js) |
||||||
|
|
||||||
|
<a name="psm"></a> |
||||||
|
## PSM |
||||||
|
|
||||||
|
See [PSM.js](../src/constants/PSM.js) |
||||||
|
|
||||||
|
<a name="oem"></a> |
||||||
|
## OEM |
||||||
|
|
||||||
|
See [OEM.js](../src/constants/OEM.js) |
@ -0,0 +1,226 @@ |
|||||||
|
# Tesseract.js Examples |
||||||
|
|
||||||
|
You can also check [examples](../examples) folder. |
||||||
|
|
||||||
|
### basic |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker } = require('tesseract.js'); |
||||||
|
|
||||||
|
const worker = createWorker(); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||||
|
console.log(text); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
### with detailed progress |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker } = require('tesseract.js'); |
||||||
|
|
||||||
|
const worker = createWorker({ |
||||||
|
logger: m => console.log(m), // Add logger here |
||||||
|
}); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||||
|
console.log(text); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
### with multiple languages, separate by '+' |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker } = require('tesseract.js'); |
||||||
|
|
||||||
|
const worker = createWorker(); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng+chi_tra'); |
||||||
|
await worker.initialize('eng+chi_tra'); |
||||||
|
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||||
|
console.log(text); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
### with whitelist char (^2.0.0-beta.1) |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker } = require('tesseract.js'); |
||||||
|
|
||||||
|
const worker = createWorker(); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
await worker.setParameters({ |
||||||
|
tessedit_char_whitelist: '0123456789', |
||||||
|
}); |
||||||
|
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||||
|
console.log(text); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
### with different pageseg mode (^2.0.0-beta.1) |
||||||
|
|
||||||
|
Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163 |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker, PSM } = require('tesseract.js'); |
||||||
|
|
||||||
|
const worker = createWorker(); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
await worker.setParameters({ |
||||||
|
tessedit_pageseg_mode: PSM.SINGLE_BLOCK, |
||||||
|
}); |
||||||
|
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||||
|
console.log(text); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
### with pdf output (^2.0.0-beta.1) |
||||||
|
|
||||||
|
Please check **examples** folder for details. |
||||||
|
|
||||||
|
Browser: [download-pdf.html](../examples/browser/download-pdf.html) |
||||||
|
Node: [download-pdf.js](../examples/node/download-pdf.js) |
||||||
|
|
||||||
|
### with only part of the image (^2.0.1) |
||||||
|
|
||||||
|
**One rectangle** |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker } = require('tesseract.js'); |
||||||
|
|
||||||
|
const worker = createWorker(); |
||||||
|
const rectangle = { left: 0, top: 0, width: 500, height: 250 }; |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle }); |
||||||
|
console.log(text); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
**Multiple Rectangles** |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker } = require('tesseract.js'); |
||||||
|
|
||||||
|
const worker = createWorker(); |
||||||
|
const rectangles = [ |
||||||
|
{ |
||||||
|
left: 0, |
||||||
|
top: 0, |
||||||
|
width: 500, |
||||||
|
height: 250, |
||||||
|
}, |
||||||
|
{ |
||||||
|
left: 500, |
||||||
|
top: 0, |
||||||
|
width: 500, |
||||||
|
height: 250, |
||||||
|
}, |
||||||
|
]; |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const values = []; |
||||||
|
for (let i = 0; i < rectangles.length; i++) { |
||||||
|
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle: rectangles[i] }); |
||||||
|
values.push(text); |
||||||
|
} |
||||||
|
console.log(values); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
**Multiple Rectangles (with scheduler to do recognition in parallel)** |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker, createScheduler } = require('tesseract.js'); |
||||||
|
|
||||||
|
const scheduler = createScheduler(); |
||||||
|
const worker1 = createWorker(); |
||||||
|
const worker2 = createWorker(); |
||||||
|
const rectangles = [ |
||||||
|
{ |
||||||
|
left: 0, |
||||||
|
top: 0, |
||||||
|
width: 500, |
||||||
|
height: 250, |
||||||
|
}, |
||||||
|
{ |
||||||
|
left: 500, |
||||||
|
top: 0, |
||||||
|
width: 500, |
||||||
|
height: 250, |
||||||
|
}, |
||||||
|
]; |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker1.load(); |
||||||
|
await worker2.load(); |
||||||
|
await worker1.loadLanguage('eng'); |
||||||
|
await worker2.loadLanguage('eng'); |
||||||
|
await worker1.initialize('eng'); |
||||||
|
await worker2.initialize('eng'); |
||||||
|
scheduler.addWorker(worker1); |
||||||
|
scheduler.addWorker(worker2); |
||||||
|
const results = await Promise.all(rectangles.map((rectangle) => ( |
||||||
|
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle }) |
||||||
|
))); |
||||||
|
console.log(results.map(r => r.data.text)); |
||||||
|
await scheduler.terminate(); |
||||||
|
})(); |
||||||
|
``` |
||||||
|
|
||||||
|
### with multiple workers to speed up (^2.0.0-beta.1) |
||||||
|
|
||||||
|
```javascript |
||||||
|
const { createWorker, createScheduler } = require('tesseract.js'); |
||||||
|
|
||||||
|
const scheduler = createScheduler(); |
||||||
|
const worker1 = createWorker(); |
||||||
|
const worker2 = createWorker(); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker1.load(); |
||||||
|
await worker2.load(); |
||||||
|
await worker1.loadLanguage('eng'); |
||||||
|
await worker2.loadLanguage('eng'); |
||||||
|
await worker1.initialize('eng'); |
||||||
|
await worker2.initialize('eng'); |
||||||
|
scheduler.addWorker(worker1); |
||||||
|
scheduler.addWorker(worker2); |
||||||
|
/** Add 10 recognition jobs */ |
||||||
|
const results = await Promise.all(Array(10).fill(0).map(() => ( |
||||||
|
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png') |
||||||
|
))) |
||||||
|
console.log(results); |
||||||
|
await scheduler.terminate(); // It also terminates all workers. |
||||||
|
})(); |
||||||
|
``` |
@ -0,0 +1,42 @@ |
|||||||
|
FAQ |
||||||
|
=== |
||||||
|
|
||||||
|
## How does tesseract.js download and keep \*.traineddata? |
||||||
|
|
||||||
|
The language model is downloaded by `worker.loadLanguage()` and you need to pass the langs to `worker.initialize()`. |
||||||
|
|
||||||
|
During the downloading of language model, Tesseract.js will first check if \*.traineddata already exists. (browser: [IndexedDB](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), Node.js: fs, in the folder you execute the command) If the \*.traineddata doesn't exist, it will fetch \*.traineddata.gz from [tessdata](https://github.com/naptha/tessdata), ungzip and store in IndexedDB or fs, you can delete it manually and it will download again for you. |
||||||
|
|
||||||
|
## How can I train my own \*.traineddata? |
||||||
|
|
||||||
|
For tesseract.js v2, check [TrainingTesseract 4.00](https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00) |
||||||
|
|
||||||
|
For tesseract.js v1, check [Training Tesseract 3.03–3.05](https://tesseract-ocr.github.io/tessdoc/Training-Tesseract-3.03%E2%80%933.05) |
||||||
|
|
||||||
|
## How can I get HOCR, TSV, Box, UNLV, OSD? |
||||||
|
|
||||||
|
Starting from 2.0.0-beta.1, you can get all these information in the final result. |
||||||
|
|
||||||
|
```javascript |
||||||
|
import { createWorker } from 'tesseract.js'; |
||||||
|
const worker = createWorker({ |
||||||
|
logger: m => console.log(m) |
||||||
|
}); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
await worker.setParameters({ |
||||||
|
tessedit_create_box: '1', |
||||||
|
tessedit_create_unlv: '1', |
||||||
|
tessedit_create_osd: '1', |
||||||
|
}); |
||||||
|
const { data: { text, hocr, tsv, box, unlv } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||||
|
console.log(text); |
||||||
|
console.log(hocr); |
||||||
|
console.log(tsv); |
||||||
|
console.log(box); |
||||||
|
console.log(unlv); |
||||||
|
})(); |
||||||
|
``` |
@ -0,0 +1,18 @@ |
|||||||
|
# Image Format |
||||||
|
|
||||||
|
The main Tesseract.js functions (ex. recognize, detect) take an `image` parameter. The image formats and data types supported are listed below. |
||||||
|
|
||||||
|
Support Image Formats: **bmp, jpg, png, pbm, webp** |
||||||
|
|
||||||
|
For browser and Node, supported data types are: |
||||||
|
- string with base64 encoded image (fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp) |
||||||
|
- buffer |
||||||
|
|
||||||
|
For browser only, supported data types are: |
||||||
|
- `File` or `Blob` object |
||||||
|
- `img` or `canvas` element |
||||||
|
|
||||||
|
For Node only, supported data types are: |
||||||
|
- string containing a path to local image |
||||||
|
|
||||||
|
Note: images must be a supported image format **and** a supported data type. For example, a buffer containing a png image is supported. A buffer containing raw pixel data is not supported. |
After Width: | Height: | Size: 97 KiB |
After Width: | Height: | Size: 105 KiB |
After Width: | Height: | Size: 237 KiB |
@ -0,0 +1,38 @@ |
|||||||
|
## Local Installation |
||||||
|
|
||||||
|
Check here for examples: https://github.com/naptha/tesseract.js/blob/master/docs/examples.md |
||||||
|
|
||||||
|
In browser environment, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN. |
||||||
|
|
||||||
|
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can pass extra arguments to `TesseractWorker` to specify custom paths for workers, languages, and core. |
||||||
|
|
||||||
|
In Node.js environment, the only path you may want to customize is languages/langPath. |
||||||
|
|
||||||
|
```javascript |
||||||
|
Tesseract.recognize(image, langs, { |
||||||
|
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0/dist/worker.min.js', |
||||||
|
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
||||||
|
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js', |
||||||
|
}) |
||||||
|
``` |
||||||
|
|
||||||
|
Or |
||||||
|
|
||||||
|
```javascript |
||||||
|
const worker = createWorker({ |
||||||
|
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0/dist/worker.min.js', |
||||||
|
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
||||||
|
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js', |
||||||
|
}); |
||||||
|
``` |
||||||
|
|
||||||
|
### workerPath |
||||||
|
A string specifying the location of the [worker.js](./dist/worker.min.js) file. |
||||||
|
|
||||||
|
### langPath |
||||||
|
A string specifying the location of the tesseract language files, with default value 'https://tessdata.projectnaptha.com/4.0.0'. Language file URLs are calculated according to the formula `langPath + langCode + '.traineddata.gz'`. |
||||||
|
|
||||||
|
### corePath |
||||||
|
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js' (fallback to tesseract-core.asm.js when WebAssembly is not available). |
||||||
|
|
||||||
|
Another WASM option is 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.js' which is a script that loads 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm'. But it fails to fetch at this moment. |
@ -0,0 +1,3 @@ |
|||||||
|
# Tesseract Languages |
||||||
|
|
||||||
|
Please check [HERE](https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016) for supported languages |
@ -1,17 +0,0 @@ |
|||||||
<canvas id="c"></canvas> |
|
||||||
<script type="text/javascript" src="./Tesseract.js"></script> |
|
||||||
<script type="text/javascript"> |
|
||||||
var canvas = document.getElementById('c') |
|
||||||
canvas.width = 400 |
|
||||||
canvas.height = 400 |
|
||||||
var ctx = canvas.getContext('2d'); |
|
||||||
ctx.font = '30px "Arial Black"' |
|
||||||
ctx.fillText('Hell0 World', 100, 40) |
|
||||||
// ctx.fillText("囚犯離奇掙脫囚犯離奇掙脫", 100, 40) |
|
||||||
ctx.font = '30px "Times New Roman"' |
|
||||||
ctx.fillText('from beyond', 100, 80) |
|
||||||
// ctx.fillText('2小時可換乘2次2小時可換乘2次', 100, 80) |
|
||||||
ctx.font = '30px sans-serif' |
|
||||||
ctx.fillText('the Cosmic Void', 100, 120) |
|
||||||
Tesseract.recognize(canvas,{tessedit_char_blacklist:'e'}).then( function(d){ console.log(d) } ) |
|
||||||
</script> |
|
@ -0,0 +1,37 @@ |
|||||||
|
<!DOCTYPE HTML> |
||||||
|
<html> |
||||||
|
<head> |
||||||
|
<script src="/dist/tesseract.dev.js"></script> |
||||||
|
</head> |
||||||
|
<body> |
||||||
|
<input type="file" id="uploader"> |
||||||
|
<script> |
||||||
|
const recognize = function(evt){ |
||||||
|
const files = evt.target.files; |
||||||
|
const worker = Tesseract.createWorker({ |
||||||
|
/* |
||||||
|
* As Edge don't support webassembly, |
||||||
|
* here we force to use asm.js version. |
||||||
|
*/ |
||||||
|
corePath: '../../node_modules/tesseract.js-core/tesseract-core.asm.js', |
||||||
|
logger: function(m){console.log(m);}, |
||||||
|
/* |
||||||
|
* As there is no indexedDB in earlier version |
||||||
|
* of Edge, here we disable cache. |
||||||
|
*/ |
||||||
|
cacheMethod: 'none', |
||||||
|
}); |
||||||
|
Promise.resolve() |
||||||
|
.then(() => worker.load()) |
||||||
|
.then(() => worker.loadLanguage('eng')) |
||||||
|
.then(() => worker.initialize('eng')) |
||||||
|
.then(() => worker.recognize(files[0])) |
||||||
|
.then((ret) => { |
||||||
|
console.log(ret.data.text); |
||||||
|
}); |
||||||
|
} |
||||||
|
const elm = document.getElementById('uploader'); |
||||||
|
elm.addEventListener('change', recognize); |
||||||
|
</script> |
||||||
|
</body> |
||||||
|
</html> |
@ -0,0 +1,19 @@ |
|||||||
|
<html> |
||||||
|
<head> |
||||||
|
<script src="/dist/tesseract.dev.js"></script> |
||||||
|
</head> |
||||||
|
<body> |
||||||
|
<input type="file" id="uploader"> |
||||||
|
<script> |
||||||
|
const recognize = async ({ target: { files } }) => { |
||||||
|
const { data: { text } } = await Tesseract.recognize(files[0], 'eng', { |
||||||
|
corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js', |
||||||
|
logger: m => console.log(m), |
||||||
|
}); |
||||||
|
console.log(text); |
||||||
|
} |
||||||
|
const elm = document.getElementById('uploader'); |
||||||
|
elm.addEventListener('change', recognize); |
||||||
|
</script> |
||||||
|
</body> |
||||||
|
</html> |
@ -0,0 +1,33 @@ |
|||||||
|
<html> |
||||||
|
<head> |
||||||
|
<script src="/dist/tesseract.dev.js"></script> |
||||||
|
</head> |
||||||
|
<body> |
||||||
|
<textarea id="message">Working...</textarea> |
||||||
|
|
||||||
|
<script> |
||||||
|
const { createWorker } = Tesseract; |
||||||
|
const worker = createWorker(); |
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
|
||||||
|
const fileArr = ["../data/meditations.jpg", "../data/tyger.jpg", "../data/testocr.png"]; |
||||||
|
let timeTotal = 0; |
||||||
|
for (let file of fileArr) { |
||||||
|
let time1 = Date.now(); |
||||||
|
for (let i=0; i < 10; i++) { |
||||||
|
await worker.recognize(file); |
||||||
|
} |
||||||
|
let time2 = Date.now(); |
||||||
|
const timeDif = (time2 - time1) / 1e3; |
||||||
|
timeTotal += timeDif; |
||||||
|
document.getElementById('message').innerHTML += "\n" + file + " [x10] runtime: " + timeDif + "s"; |
||||||
|
} |
||||||
|
document.getElementById('message').innerHTML += "\nTotal runtime: " + timeTotal + "s"; |
||||||
|
|
||||||
|
})(); |
||||||
|
</script> |
||||||
|
</body> |
||||||
|
</html> |
@ -0,0 +1,162 @@ |
|||||||
|
<script src="/dist/tesseract.dev.js"></script> |
||||||
|
<script> |
||||||
|
|
||||||
|
function progressUpdate(packet){ |
||||||
|
var log = document.getElementById('log'); |
||||||
|
|
||||||
|
if(log.firstChild && log.firstChild.status === packet.status){ |
||||||
|
if('progress' in packet){ |
||||||
|
var progress = log.firstChild.querySelector('progress') |
||||||
|
progress.value = packet.progress |
||||||
|
} |
||||||
|
}else{ |
||||||
|
var line = document.createElement('div'); |
||||||
|
line.status = packet.status; |
||||||
|
var status = document.createElement('div') |
||||||
|
status.className = 'status' |
||||||
|
status.appendChild(document.createTextNode(packet.status)) |
||||||
|
line.appendChild(status) |
||||||
|
|
||||||
|
if('progress' in packet){ |
||||||
|
var progress = document.createElement('progress') |
||||||
|
progress.value = packet.progress |
||||||
|
progress.max = 1 |
||||||
|
line.appendChild(progress) |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
if(packet.status == 'done'){ |
||||||
|
var pre = document.createElement('pre') |
||||||
|
pre.appendChild(document.createTextNode(packet.data.data.text)) |
||||||
|
line.innerHTML = '' |
||||||
|
line.appendChild(pre) |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
log.insertBefore(line, log.firstChild) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
async function recognizeFile(file) { |
||||||
|
document.querySelector("#log").innerHTML = '' |
||||||
|
const corePath = window.navigator.userAgent.indexOf("Edge") > -1 |
||||||
|
? '../../node_modules/tesseract.js-core/tesseract-core.asm.js' |
||||||
|
: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js'; |
||||||
|
|
||||||
|
const lang = document.querySelector('#langsel').value |
||||||
|
const data = await Tesseract.recognize(file, lang, { |
||||||
|
corePath, |
||||||
|
logger: progressUpdate, |
||||||
|
}); |
||||||
|
progressUpdate({ status: 'done', data }); |
||||||
|
} |
||||||
|
</script> |
||||||
|
<select id="langsel" onchange="window.lastFile && recognizeFile(window.lastFile)"> |
||||||
|
<option value='afr' > Afrikaans </option> |
||||||
|
<option value='ara' > Arabic </option> |
||||||
|
<option value='aze' > Azerbaijani </option> |
||||||
|
<option value='bel' > Belarusian </option> |
||||||
|
<option value='ben' > Bengali </option> |
||||||
|
<option value='bul' > Bulgarian </option> |
||||||
|
<option value='cat' > Catalan </option> |
||||||
|
<option value='ces' > Czech </option> |
||||||
|
<option value='chi_sim' > Chinese </option> |
||||||
|
<option value='chi_tra' > Traditional Chinese </option> |
||||||
|
<option value='chr' > Cherokee </option> |
||||||
|
<option value='dan' > Danish </option> |
||||||
|
<option value='deu' > German </option> |
||||||
|
<option value='ell' > Greek </option> |
||||||
|
<option value='eng' selected> English </option> |
||||||
|
<option value='enm' > English (Old) </option> |
||||||
|
<option value='meme' > Internet Meme </option> |
||||||
|
<option value='epo' > Esperanto </option> |
||||||
|
<option value='epo_alt' > Esperanto alternative </option> |
||||||
|
<option value='est' > Estonian </option> |
||||||
|
<option value='eus' > Basque </option> |
||||||
|
<option value='fin' > Finnish </option> |
||||||
|
<option value='fra' > French </option> |
||||||
|
<option value='frk' > Frankish </option> |
||||||
|
<option value='frm' > French (Old) </option> |
||||||
|
<option value='glg' > Galician </option> |
||||||
|
<option value='grc' > Ancient Greek </option> |
||||||
|
<option value='heb' > Hebrew </option> |
||||||
|
<option value='hin' > Hindi </option> |
||||||
|
<option value='hrv' > Croatian </option> |
||||||
|
<option value='hun' > Hungarian </option> |
||||||
|
<option value='ind' > Indonesian </option> |
||||||
|
<option value='isl' > Icelandic </option> |
||||||
|
<option value='ita' > Italian </option> |
||||||
|
<option value='ita_old' > Italian (Old) </option> |
||||||
|
<option value='jpn' > Japanese </option> |
||||||
|
<option value='kan' > Kannada </option> |
||||||
|
<option value='kor' > Korean </option> |
||||||
|
<option value='lav' > Latvian </option> |
||||||
|
<option value='lit' > Lithuanian </option> |
||||||
|
<option value='mal' > Malayalam </option> |
||||||
|
<option value='mkd' > Macedonian </option> |
||||||
|
<option value='mlt' > Maltese </option> |
||||||
|
<option value='msa' > Malay </option> |
||||||
|
<option value='nld' > Dutch </option> |
||||||
|
<option value='nor' > Norwegian </option> |
||||||
|
<option value='pol' > Polish </option> |
||||||
|
<option value='por' > Portuguese </option> |
||||||
|
<option value='ron' > Romanian </option> |
||||||
|
<option value='rus' > Russian </option> |
||||||
|
<option value='slk' > Slovakian </option> |
||||||
|
<option value='slv' > Slovenian </option> |
||||||
|
<option value='spa' > Spanish </option> |
||||||
|
<option value='spa_old' > Old Spanish </option> |
||||||
|
<option value='sqi' > Albanian </option> |
||||||
|
<option value='srp' > Serbian (Latin) </option> |
||||||
|
<option value='swa' > Swahili </option> |
||||||
|
<option value='swe' > Swedish </option> |
||||||
|
<option value='tam' > Tamil </option> |
||||||
|
<option value='tel' > Telugu </option> |
||||||
|
<option value='tgl' > Tagalog </option> |
||||||
|
<option value='tha' > Thai </option> |
||||||
|
<option value='tur' > Turkish </option> |
||||||
|
<option value='ukr' > Ukrainian </option> |
||||||
|
<option value='vie' > Vietnamese </option> |
||||||
|
</select> |
||||||
|
|
||||||
|
<button onclick="recognizeFile('../../tests/assets/images/simple.png')">Sample Image</button> |
||||||
|
<input type="file" onchange="recognizeFile(window.lastFile=this.files[0])"> |
||||||
|
|
||||||
|
<div id="log"></div> |
||||||
|
|
||||||
|
|
||||||
|
<style> |
||||||
|
#log > div { |
||||||
|
color: #313131; |
||||||
|
border-top: 1px solid #dadada; |
||||||
|
padding: 9px; |
||||||
|
display: flex; |
||||||
|
} |
||||||
|
#log > div:first-child { |
||||||
|
border: 0; |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
.status { |
||||||
|
min-width: 250px; |
||||||
|
} |
||||||
|
#log { |
||||||
|
border: 1px solid #dadada; |
||||||
|
padding: 10px; |
||||||
|
margin-top: 20px; |
||||||
|
min-height: 100px; |
||||||
|
} |
||||||
|
body { |
||||||
|
font-family: sans-serif; |
||||||
|
margin: 30px; |
||||||
|
} |
||||||
|
|
||||||
|
progress { |
||||||
|
display: block; |
||||||
|
width: 100%; |
||||||
|
transition: opacity 0.5s linear; |
||||||
|
} |
||||||
|
progress[value="1"] { |
||||||
|
opacity: 0.5; |
||||||
|
} |
||||||
|
</style> |
@ -0,0 +1,52 @@ |
|||||||
|
<html> |
||||||
|
<head> |
||||||
|
<script src="/dist/tesseract.dev.js"></script> |
||||||
|
</head> |
||||||
|
<body> |
||||||
|
<div> |
||||||
|
<input type="file" id="uploader"> |
||||||
|
<button id="download-pdf" disabled="true">Download PDF</button> |
||||||
|
</div> |
||||||
|
<textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea> |
||||||
|
<script> |
||||||
|
const { createWorker } = Tesseract; |
||||||
|
const worker = createWorker({ |
||||||
|
corePath: '/node_modules/tesseract.js-core/tesseract-core.wasm.js', |
||||||
|
logger: m => console.log(m), |
||||||
|
}); |
||||||
|
const uploader = document.getElementById('uploader'); |
||||||
|
const dlBtn = document.getElementById('download-pdf'); |
||||||
|
const recognize = async ({ target: { files } }) => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize(files[0]); |
||||||
|
const board = document.getElementById('board'); |
||||||
|
board.value = text; |
||||||
|
dlBtn.disabled = false; |
||||||
|
}; |
||||||
|
const downloadPDF = async () => { |
||||||
|
const filename = 'tesseract-ocr-result.pdf'; |
||||||
|
const { data } = await worker.getPDF('Tesseract OCR Result'); |
||||||
|
const blob = new Blob([new Uint8Array(data)], { type: 'application/pdf' }); |
||||||
|
if (navigator.msSaveBlob) { |
||||||
|
// IE 10+ |
||||||
|
navigator.msSaveBlob(blob, filename); |
||||||
|
} else { |
||||||
|
const link = document.createElement('a'); |
||||||
|
if (link.download !== undefined) { |
||||||
|
const url = URL.createObjectURL(blob); |
||||||
|
link.setAttribute('href', url); |
||||||
|
link.setAttribute('download', filename); |
||||||
|
link.style.visibility = 'hidden'; |
||||||
|
document.body.appendChild(link); |
||||||
|
link.click(); |
||||||
|
document.body.removeChild(link); |
||||||
|
} |
||||||
|
} |
||||||
|
}; |
||||||
|
uploader.addEventListener('change', recognize); |
||||||
|
dlBtn.addEventListener('click', downloadPDF); |
||||||
|
</script> |
||||||
|
</body> |
||||||
|
</html> |
After Width: | Height: | Size: 1011 KiB |
After Width: | Height: | Size: 23 KiB |
After Width: | Height: | Size: 408 KiB |
@ -0,0 +1,27 @@ |
|||||||
|
#!/usr/bin/env node
|
||||||
|
const path = require('path'); |
||||||
|
const { createWorker } = require('../../'); |
||||||
|
|
||||||
|
const worker = createWorker(); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const fileArr = ["../data/meditations.jpg", "../data/tyger.jpg", "../data/testocr.png"]; |
||||||
|
let timeTotal = 0; |
||||||
|
for (let file of fileArr) { |
||||||
|
let time1 = Date.now(); |
||||||
|
for (let i=0; i < 10; i++) { |
||||||
|
await worker.recognize(file) |
||||||
|
} |
||||||
|
let time2 = Date.now(); |
||||||
|
const timeDif = (time2 - time1) / 1e3; |
||||||
|
timeTotal += timeDif; |
||||||
|
|
||||||
|
console.log(file + " [x10] runtime: " + timeDif + "s"); |
||||||
|
} |
||||||
|
console.log("Total runtime: " + timeTotal + "s"); |
||||||
|
|
||||||
|
await worker.terminate(); |
||||||
|
})(); |
@ -0,0 +1,13 @@ |
|||||||
|
#!/usr/bin/env node
|
||||||
|
const path = require('path'); |
||||||
|
const Tesseract = require('../../'); |
||||||
|
|
||||||
|
const [,, imagePath] = process.argv; |
||||||
|
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
||||||
|
|
||||||
|
console.log(`Recognizing ${image}`); |
||||||
|
|
||||||
|
Tesseract.detect(image, { logger: m => console.log(m) }) |
||||||
|
.then(({ data }) => { |
||||||
|
console.log(data); |
||||||
|
}); |
@ -0,0 +1,22 @@ |
|||||||
|
#!/usr/bin/env node
|
||||||
|
const path = require('path'); |
||||||
|
const fs = require('fs'); |
||||||
|
const { createWorker } = require('../../'); |
||||||
|
|
||||||
|
const [,, imagePath] = process.argv; |
||||||
|
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
||||||
|
|
||||||
|
console.log(`Recognizing ${image}`); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
const worker = createWorker(); |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize(image); |
||||||
|
console.log(text); |
||||||
|
const { data } = await worker.getPDF('Tesseract OCR Result'); |
||||||
|
fs.writeFileSync('tesseract-ocr-result.pdf', Buffer.from(data)); |
||||||
|
console.log('Generate PDF: tesseract-ocr-result.pdf'); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
@ -0,0 +1,20 @@ |
|||||||
|
#!/usr/bin/env node
|
||||||
|
const path = require('path'); |
||||||
|
const { createWorker } = require('../../'); |
||||||
|
|
||||||
|
const [,, imagePath] = process.argv; |
||||||
|
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
||||||
|
|
||||||
|
console.log(`Recognizing ${image}`); |
||||||
|
const worker = createWorker({ |
||||||
|
logger: m => console.log(m), |
||||||
|
}); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize(image); |
||||||
|
console.log(text); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
@ -1,23 +1,89 @@ |
|||||||
{ |
{ |
||||||
"name": "tesseract.js", |
"name": "tesseract.js", |
||||||
"version": "1.0.0", |
"version": "3.0.3", |
||||||
"description": "", |
"description": "Pure Javascript Multilingual OCR", |
||||||
"main": "Tesseract.js", |
"main": "src/index.js", |
||||||
"dependencies": { |
"types": "src/index.d.ts", |
||||||
"pako": "^0.2.7" |
"unpkg": "dist/tesseract.min.js", |
||||||
}, |
"jsdelivr": "dist/tesseract.min.js", |
||||||
"devDependencies": {}, |
|
||||||
"scripts": { |
"scripts": { |
||||||
"test": "echo \"Error: no test specified\" && exit 1" |
"start": "node scripts/server.js", |
||||||
|
"build": "rimraf dist && webpack --config scripts/webpack.config.prod.js && rollup -c scripts/rollup.esm.js", |
||||||
|
"profile:tesseract": "webpack-bundle-analyzer dist/tesseract-stats.json", |
||||||
|
"profile:worker": "webpack-bundle-analyzer dist/worker-stats.json", |
||||||
|
"prepublishOnly": "npm run build", |
||||||
|
"wait": "rimraf dist && wait-on http://localhost:3000/dist/tesseract.dev.js", |
||||||
|
"test": "npm-run-all -p -r start test:all", |
||||||
|
"test:all": "npm-run-all wait test:browser:* test:node:all", |
||||||
|
"test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js", |
||||||
|
"test:node:all": "npm run test:node -- ./tests/*.test.js", |
||||||
|
"test:browser-tpl": "mocha-headless-chrome -a incognito -a no-sandbox -a disable-setuid-sandbox -a disable-logging -t 300000", |
||||||
|
"test:browser:detect": "npm run test:browser-tpl -- -f ./tests/detect.test.html", |
||||||
|
"test:browser:recognize": "npm run test:browser-tpl -- -f ./tests/recognize.test.html", |
||||||
|
"test:browser:scheduler": "npm run test:browser-tpl -- -f ./tests/scheduler.test.html", |
||||||
|
"test:browser:FS": "npm run test:browser-tpl -- -f ./tests/FS.test.html", |
||||||
|
"lint": "eslint src", |
||||||
|
"lint:fix": "eslint --fix src", |
||||||
|
"postinstall": "opencollective-postinstall || true" |
||||||
|
}, |
||||||
|
"browser": { |
||||||
|
"./src/worker/node/index.js": "./src/worker/browser/index.js" |
||||||
|
}, |
||||||
|
"author": "", |
||||||
|
"contributors": [ |
||||||
|
"jeromewu" |
||||||
|
], |
||||||
|
"license": "Apache-2.0", |
||||||
|
"devDependencies": { |
||||||
|
"@babel/core": "^7.18.7", |
||||||
|
"@babel/preset-env": "^7.18.7", |
||||||
|
"@rollup/plugin-commonjs": "^22.0.2", |
||||||
|
"acorn": "^6.4.0", |
||||||
|
"babel-loader": "^8.2.0", |
||||||
|
"buffer": "^6.0.3", |
||||||
|
"cors": "^2.8.5", |
||||||
|
"eslint": "^7.2.0", |
||||||
|
"eslint-config-airbnb-base": "^14.2.0", |
||||||
|
"eslint-plugin-import": "^2.22.1", |
||||||
|
"expect.js": "^0.3.1", |
||||||
|
"express": "^4.17.1", |
||||||
|
"mocha": "^8.1.3", |
||||||
|
"mocha-headless-chrome": "^2.0.3", |
||||||
|
"npm-run-all": "^4.1.5", |
||||||
|
"nyc": "^15.1.0", |
||||||
|
"rimraf": "^2.7.1", |
||||||
|
"rollup": "^2.79.0", |
||||||
|
"wait-on": "^3.3.0", |
||||||
|
"webpack": "^5.74.0", |
||||||
|
"webpack-bundle-analyzer": "^4.6.0", |
||||||
|
"webpack-cli": "^4.10.0", |
||||||
|
"webpack-dev-middleware": "^5.3.3" |
||||||
|
}, |
||||||
|
"dependencies": { |
||||||
|
"babel-eslint": "^10.1.0", |
||||||
|
"bmp-js": "^0.1.0", |
||||||
|
"file-type": "^12.4.1", |
||||||
|
"idb-keyval": "^3.2.0", |
||||||
|
"is-electron": "^2.2.0", |
||||||
|
"is-url": "^1.2.4", |
||||||
|
"node-fetch": "^2.6.0", |
||||||
|
"opencollective-postinstall": "^2.0.2", |
||||||
|
"regenerator-runtime": "^0.13.3", |
||||||
|
"resolve-url": "^0.2.1", |
||||||
|
"tesseract.js-core": "^3.0.2", |
||||||
|
"wasm-feature-detect": "^1.2.11", |
||||||
|
"zlibjs": "^0.3.1" |
||||||
}, |
}, |
||||||
"repository": { |
"repository": { |
||||||
"type": "git", |
"type": "git", |
||||||
"url": "https://github.com/naptha/tesseract.js.git" |
"url": "https://github.com/naptha/tesseract.js.git" |
||||||
}, |
}, |
||||||
"author": "", |
|
||||||
"license": "ISC", |
|
||||||
"bugs": { |
"bugs": { |
||||||
"url": "https://github.com/naptha/tesseract.js/issues" |
"url": "https://github.com/naptha/tesseract.js/issues" |
||||||
}, |
}, |
||||||
"homepage": "https://github.com/naptha/tesseract.js" |
"homepage": "https://github.com/naptha/tesseract.js", |
||||||
|
"collective": { |
||||||
|
"type": "opencollective", |
||||||
|
"url": "https://opencollective.com/tesseractjs" |
||||||
|
} |
||||||
} |
} |
||||||
|
@ -0,0 +1,5 @@ |
|||||||
|
{ |
||||||
|
"rules": { |
||||||
|
"import/no-extraneous-dependencies": 0 |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,13 @@ |
|||||||
|
import commonjs from "@rollup/plugin-commonjs"; |
||||||
|
|
||||||
|
export default [ |
||||||
|
{ |
||||||
|
input: "dist/tesseract.min.js", |
||||||
|
output: { |
||||||
|
file: "dist/tesseract.esm.min.js", |
||||||
|
format: "esm", |
||||||
|
banner: "/* eslint-disable */", |
||||||
|
}, |
||||||
|
plugins: [commonjs()], |
||||||
|
}, |
||||||
|
]; |
@ -0,0 +1,17 @@ |
|||||||
|
const webpack = require('webpack'); |
||||||
|
const middleware = require('webpack-dev-middleware'); |
||||||
|
const express = require('express'); |
||||||
|
const path = require('path'); |
||||||
|
const cors = require('cors'); |
||||||
|
const webpackConfig = require('./webpack.config.dev'); |
||||||
|
|
||||||
|
const compiler = webpack(webpackConfig); |
||||||
|
const app = express(); |
||||||
|
|
||||||
|
app.use(cors()); |
||||||
|
app.use('/', express.static(path.resolve(__dirname, '..'))); |
||||||
|
app.use(middleware(compiler, { publicPath: '/dist', writeToDisk: true })); |
||||||
|
|
||||||
|
module.exports = app.listen(3000, () => { |
||||||
|
console.log('Server is running on the port no. 3000'); |
||||||
|
}); |
@ -0,0 +1,9 @@ |
|||||||
|
const constants = require('../tests/constants'); |
||||||
|
global.expect = require('expect.js'); |
||||||
|
global.fs = require('fs'); |
||||||
|
global.path = require('path'); |
||||||
|
global.Tesseract = require('../src'); |
||||||
|
|
||||||
|
Object.keys(constants).forEach((key) => { |
||||||
|
global[key] = constants[key]; |
||||||
|
}); |
@ -0,0 +1,28 @@ |
|||||||
|
module.exports = { |
||||||
|
resolve: { |
||||||
|
fallback: { |
||||||
|
buffer: require.resolve('buffer/'), |
||||||
|
}, |
||||||
|
}, |
||||||
|
module: { |
||||||
|
rules: [ |
||||||
|
{ |
||||||
|
test: /\.m?js$/, |
||||||
|
// exclude: /(node_modules|bower_components)/,
|
||||||
|
use: { |
||||||
|
loader: 'babel-loader', |
||||||
|
options: { |
||||||
|
presets: [ |
||||||
|
[ |
||||||
|
'@babel/preset-env', |
||||||
|
{ |
||||||
|
targets: 'last 2 versions', |
||||||
|
}, |
||||||
|
], |
||||||
|
], |
||||||
|
}, |
||||||
|
}, |
||||||
|
}, |
||||||
|
], |
||||||
|
}, |
||||||
|
}; |
@ -0,0 +1,48 @@ |
|||||||
|
const path = require('path'); |
||||||
|
const webpack = require('webpack'); |
||||||
|
const { BundleAnalyzerPlugin } = require('webpack-bundle-analyzer'); |
||||||
|
const common = require('./webpack.config.common'); |
||||||
|
|
||||||
|
const genConfig = ({ |
||||||
|
entry, filename, library, libraryTarget, |
||||||
|
}) => ({ |
||||||
|
...common, |
||||||
|
mode: 'development', |
||||||
|
entry, |
||||||
|
output: { |
||||||
|
filename, |
||||||
|
library, |
||||||
|
libraryTarget, |
||||||
|
}, |
||||||
|
plugins: [ |
||||||
|
new webpack.ProvidePlugin({ |
||||||
|
Buffer: ['buffer', 'Buffer'], |
||||||
|
}), |
||||||
|
new webpack.DefinePlugin({ |
||||||
|
'process.env': { |
||||||
|
TESS_ENV: JSON.stringify('development'), |
||||||
|
}, |
||||||
|
}), |
||||||
|
new BundleAnalyzerPlugin({ |
||||||
|
analyzerMode: 'disable', |
||||||
|
statsFilename: `${filename.split('.')[0]}-stats.json`, |
||||||
|
generateStatsFile: true |
||||||
|
}), |
||||||
|
], |
||||||
|
devServer: { |
||||||
|
allowedHosts: ['localhost', '.gitpod.io'], |
||||||
|
}, |
||||||
|
}); |
||||||
|
|
||||||
|
module.exports = [ |
||||||
|
genConfig({ |
||||||
|
entry: path.resolve(__dirname, '..', 'src', 'index.js'), |
||||||
|
filename: 'tesseract.dev.js', |
||||||
|
library: 'Tesseract', |
||||||
|
libraryTarget: 'umd', |
||||||
|
}), |
||||||
|
genConfig({ |
||||||
|
entry: path.resolve(__dirname, '..', 'src', 'worker-script', 'browser', 'index.js'), |
||||||
|
filename: 'worker.dev.js', |
||||||
|
}), |
||||||
|
]; |
@ -0,0 +1,36 @@ |
|||||||
|
const path = require('path'); |
||||||
|
const common = require('./webpack.config.common'); |
||||||
|
const webpack = require('webpack'); |
||||||
|
|
||||||
|
const genConfig = ({ |
||||||
|
entry, filename, library, libraryTarget, |
||||||
|
}) => ({ |
||||||
|
...common, |
||||||
|
mode: 'production', |
||||||
|
devtool: 'source-map', |
||||||
|
entry, |
||||||
|
output: { |
||||||
|
path: path.resolve(__dirname, '..', 'dist'), |
||||||
|
filename, |
||||||
|
library, |
||||||
|
libraryTarget, |
||||||
|
}, |
||||||
|
plugins: [ |
||||||
|
new webpack.ProvidePlugin({ |
||||||
|
Buffer: ['buffer', 'Buffer'], |
||||||
|
}), |
||||||
|
] |
||||||
|
}); |
||||||
|
|
||||||
|
module.exports = [ |
||||||
|
genConfig({ |
||||||
|
entry: path.resolve(__dirname, '..', 'src', 'index.js'), |
||||||
|
filename: 'tesseract.min.js', |
||||||
|
library: 'Tesseract', |
||||||
|
libraryTarget: 'umd', |
||||||
|
}), |
||||||
|
genConfig({ |
||||||
|
entry: path.resolve(__dirname, '..', 'src', 'worker-script', 'browser', 'index.js'), |
||||||
|
filename: 'worker.min.js', |
||||||
|
}), |
||||||
|
]; |
@ -0,0 +1,28 @@ |
|||||||
|
const createWorker = require('./createWorker'); |
||||||
|
|
||||||
|
const recognize = async (image, langs, options) => { |
||||||
|
const worker = createWorker(options); |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage(langs); |
||||||
|
await worker.initialize(langs); |
||||||
|
return worker.recognize(image) |
||||||
|
.finally(async () => { |
||||||
|
await worker.terminate(); |
||||||
|
}); |
||||||
|
}; |
||||||
|
|
||||||
|
const detect = async (image, options) => { |
||||||
|
const worker = createWorker(options); |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('osd'); |
||||||
|
await worker.initialize('osd'); |
||||||
|
return worker.detect(image) |
||||||
|
.finally(async () => { |
||||||
|
await worker.terminate(); |
||||||
|
}); |
||||||
|
}; |
||||||
|
|
||||||
|
module.exports = { |
||||||
|
recognize, |
||||||
|
detect, |
||||||
|
}; |
@ -0,0 +1,12 @@ |
|||||||
|
/* |
||||||
|
* OEM = OCR Engine Mode, and there are 4 possible modes. |
||||||
|
* |
||||||
|
* By default tesseract.js uses LSTM_ONLY mode. |
||||||
|
* |
||||||
|
*/ |
||||||
|
module.exports = { |
||||||
|
TESSERACT_ONLY: 0, |
||||||
|
LSTM_ONLY: 1, |
||||||
|
TESSERACT_LSTM_COMBINED: 2, |
||||||
|
DEFAULT: 3, |
||||||
|
}; |
@ -0,0 +1,19 @@ |
|||||||
|
/* |
||||||
|
* PSM = Page Segmentation Mode |
||||||
|
*/ |
||||||
|
module.exports = { |
||||||
|
OSD_ONLY: '0', |
||||||
|
AUTO_OSD: '1', |
||||||
|
AUTO_ONLY: '2', |
||||||
|
AUTO: '3', |
||||||
|
SINGLE_COLUMN: '4', |
||||||
|
SINGLE_BLOCK_VERT_TEXT: '5', |
||||||
|
SINGLE_BLOCK: '6', |
||||||
|
SINGLE_LINE: '7', |
||||||
|
SINGLE_WORD: '8', |
||||||
|
CIRCLE_WORD: '9', |
||||||
|
SINGLE_CHAR: '10', |
||||||
|
SPARSE_TEXT: '11', |
||||||
|
SPARSE_TEXT_OSD: '12', |
||||||
|
RAW_LINE: '13', |
||||||
|
}; |
@ -0,0 +1,5 @@ |
|||||||
|
const OEM = require('./OEM'); |
||||||
|
|
||||||
|
module.exports = { |
||||||
|
defaultOEM: OEM.DEFAULT, |
||||||
|
}; |
@ -0,0 +1,13 @@ |
|||||||
|
module.exports = { |
||||||
|
/* |
||||||
|
* default path for downloading *.traineddata |
||||||
|
*/ |
||||||
|
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
||||||
|
/* |
||||||
|
* Use BlobURL for worker script by default |
||||||
|
* TODO: remove this option |
||||||
|
* |
||||||
|
*/ |
||||||
|
workerBlobURL: true, |
||||||
|
logger: () => {}, |
||||||
|
}; |
@ -0,0 +1,218 @@ |
|||||||
|
/* |
||||||
|
* languages with existing tesseract traineddata |
||||||
|
* https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016
|
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* @typedef {object} Languages |
||||||
|
* @property {string} AFR Afrikaans |
||||||
|
* @property {string} AMH Amharic |
||||||
|
* @property {string} ARA Arabic |
||||||
|
* @property {string} ASM Assamese |
||||||
|
* @property {string} AZE Azerbaijani |
||||||
|
* @property {string} AZE_CYRL Azerbaijani - Cyrillic |
||||||
|
* @property {string} BEL Belarusian |
||||||
|
* @property {string} BEN Bengali |
||||||
|
* @property {string} BOD Tibetan |
||||||
|
* @property {string} BOS Bosnian |
||||||
|
* @property {string} BUL Bulgarian |
||||||
|
* @property {string} CAT Catalan; Valencian |
||||||
|
* @property {string} CEB Cebuano |
||||||
|
* @property {string} CES Czech |
||||||
|
* @property {string} CHI_SIM Chinese - Simplified |
||||||
|
* @property {string} CHI_TRA Chinese - Traditional |
||||||
|
* @property {string} CHR Cherokee |
||||||
|
* @property {string} CYM Welsh |
||||||
|
* @property {string} DAN Danish |
||||||
|
* @property {string} DEU German |
||||||
|
* @property {string} DZO Dzongkha |
||||||
|
* @property {string} ELL Greek, Modern (1453-) |
||||||
|
* @property {string} ENG English |
||||||
|
* @property {string} ENM English, Middle (1100-1500) |
||||||
|
* @property {string} EPO Esperanto |
||||||
|
* @property {string} EST Estonian |
||||||
|
* @property {string} EUS Basque |
||||||
|
* @property {string} FAS Persian |
||||||
|
* @property {string} FIN Finnish |
||||||
|
* @property {string} FRA French |
||||||
|
* @property {string} FRK German Fraktur |
||||||
|
* @property {string} FRM French, Middle (ca. 1400-1600) |
||||||
|
* @property {string} GLE Irish |
||||||
|
* @property {string} GLG Galician |
||||||
|
* @property {string} GRC Greek, Ancient (-1453) |
||||||
|
* @property {string} GUJ Gujarati |
||||||
|
* @property {string} HAT Haitian; Haitian Creole |
||||||
|
* @property {string} HEB Hebrew |
||||||
|
* @property {string} HIN Hindi |
||||||
|
* @property {string} HRV Croatian |
||||||
|
* @property {string} HUN Hungarian |
||||||
|
* @property {string} IKU Inuktitut |
||||||
|
* @property {string} IND Indonesian |
||||||
|
* @property {string} ISL Icelandic |
||||||
|
* @property {string} ITA Italian |
||||||
|
* @property {string} ITA_OLD Italian - Old |
||||||
|
* @property {string} JAV Javanese |
||||||
|
* @property {string} JPN Japanese |
||||||
|
* @property {string} KAN Kannada |
||||||
|
* @property {string} KAT Georgian |
||||||
|
* @property {string} KAT_OLD Georgian - Old |
||||||
|
* @property {string} KAZ Kazakh |
||||||
|
* @property {string} KHM Central Khmer |
||||||
|
* @property {string} KIR Kirghiz; Kyrgyz |
||||||
|
* @property {string} KOR Korean |
||||||
|
* @property {string} KUR Kurdish |
||||||
|
* @property {string} LAO Lao |
||||||
|
* @property {string} LAT Latin |
||||||
|
* @property {string} LAV Latvian |
||||||
|
* @property {string} LIT Lithuanian |
||||||
|
* @property {string} MAL Malayalam |
||||||
|
* @property {string} MAR Marathi |
||||||
|
* @property {string} MKD Macedonian |
||||||
|
* @property {string} MLT Maltese |
||||||
|
* @property {string} MSA Malay |
||||||
|
* @property {string} MYA Burmese |
||||||
|
* @property {string} NEP Nepali |
||||||
|
* @property {string} NLD Dutch; Flemish |
||||||
|
* @property {string} NOR Norwegian |
||||||
|
* @property {string} ORI Oriya |
||||||
|
* @property {string} PAN Panjabi; Punjabi |
||||||
|
* @property {string} POL Polish |
||||||
|
* @property {string} POR Portuguese |
||||||
|
* @property {string} PUS Pushto; Pashto |
||||||
|
* @property {string} RON Romanian; Moldavian; Moldovan |
||||||
|
* @property {string} RUS Russian |
||||||
|
* @property {string} SAN Sanskrit |
||||||
|
* @property {string} SIN Sinhala; Sinhalese |
||||||
|
* @property {string} SLK Slovak |
||||||
|
* @property {string} SLV Slovenian |
||||||
|
* @property {string} SPA Spanish; Castilian |
||||||
|
* @property {string} SPA_OLD Spanish; Castilian - Old |
||||||
|
* @property {string} SQI Albanian |
||||||
|
* @property {string} SRP Serbian |
||||||
|
* @property {string} SRP_LATN Serbian - Latin |
||||||
|
* @property {string} SWA Swahili |
||||||
|
* @property {string} SWE Swedish |
||||||
|
* @property {string} SYR Syriac |
||||||
|
* @property {string} TAM Tamil |
||||||
|
* @property {string} TEL Telugu |
||||||
|
* @property {string} TGK Tajik |
||||||
|
* @property {string} TGL Tagalog |
||||||
|
* @property {string} THA Thai |
||||||
|
* @property {string} TIR Tigrinya |
||||||
|
* @property {string} TUR Turkish |
||||||
|
* @property {string} UIG Uighur; Uyghur |
||||||
|
* @property {string} UKR Ukrainian |
||||||
|
* @property {string} URD Urdu |
||||||
|
* @property {string} UZB Uzbek |
||||||
|
* @property {string} UZB_CYRL Uzbek - Cyrillic |
||||||
|
* @property {string} VIE Vietnamese |
||||||
|
* @property {string} YID Yiddish |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* @type {Languages} |
||||||
|
*/ |
||||||
|
module.exports = { |
||||||
|
AFR: 'afr', |
||||||
|
AMH: 'amh', |
||||||
|
ARA: 'ara', |
||||||
|
ASM: 'asm', |
||||||
|
AZE: 'aze', |
||||||
|
AZE_CYRL: 'aze_cyrl', |
||||||
|
BEL: 'bel', |
||||||
|
BEN: 'ben', |
||||||
|
BOD: 'bod', |
||||||
|
BOS: 'bos', |
||||||
|
BUL: 'bul', |
||||||
|
CAT: 'cat', |
||||||
|
CEB: 'ceb', |
||||||
|
CES: 'ces', |
||||||
|
CHI_SIM: 'chi_sim', |
||||||
|
CHI_TRA: 'chi_tra', |
||||||
|
CHR: 'chr', |
||||||
|
CYM: 'cym', |
||||||
|
DAN: 'dan', |
||||||
|
DEU: 'deu', |
||||||
|
DZO: 'dzo', |
||||||
|
ELL: 'ell', |
||||||
|
ENG: 'eng', |
||||||
|
ENM: 'enm', |
||||||
|
EPO: 'epo', |
||||||
|
EST: 'est', |
||||||
|
EUS: 'eus', |
||||||
|
FAS: 'fas', |
||||||
|
FIN: 'fin', |
||||||
|
FRA: 'fra', |
||||||
|
FRK: 'frk', |
||||||
|
FRM: 'frm', |
||||||
|
GLE: 'gle', |
||||||
|
GLG: 'glg', |
||||||
|
GRC: 'grc', |
||||||
|
GUJ: 'guj', |
||||||
|
HAT: 'hat', |
||||||
|
HEB: 'heb', |
||||||
|
HIN: 'hin', |
||||||
|
HRV: 'hrv', |
||||||
|
HUN: 'hun', |
||||||
|
IKU: 'iku', |
||||||
|
IND: 'ind', |
||||||
|
ISL: 'isl', |
||||||
|
ITA: 'ita', |
||||||
|
ITA_OLD: 'ita_old', |
||||||
|
JAV: 'jav', |
||||||
|
JPN: 'jpn', |
||||||
|
KAN: 'kan', |
||||||
|
KAT: 'kat', |
||||||
|
KAT_OLD: 'kat_old', |
||||||
|
KAZ: 'kaz', |
||||||
|
KHM: 'khm', |
||||||
|
KIR: 'kir', |
||||||
|
KOR: 'kor', |
||||||
|
KUR: 'kur', |
||||||
|
LAO: 'lao', |
||||||
|
LAT: 'lat', |
||||||
|
LAV: 'lav', |
||||||
|
LIT: 'lit', |
||||||
|
MAL: 'mal', |
||||||
|
MAR: 'mar', |
||||||
|
MKD: 'mkd', |
||||||
|
MLT: 'mlt', |
||||||
|
MSA: 'msa', |
||||||
|
MYA: 'mya', |
||||||
|
NEP: 'nep', |
||||||
|
NLD: 'nld', |
||||||
|
NOR: 'nor', |
||||||
|
ORI: 'ori', |
||||||
|
PAN: 'pan', |
||||||
|
POL: 'pol', |
||||||
|
POR: 'por', |
||||||
|
PUS: 'pus', |
||||||
|
RON: 'ron', |
||||||
|
RUS: 'rus', |
||||||
|
SAN: 'san', |
||||||
|
SIN: 'sin', |
||||||
|
SLK: 'slk', |
||||||
|
SLV: 'slv', |
||||||
|
SPA: 'spa', |
||||||
|
SPA_OLD: 'spa_old', |
||||||
|
SQI: 'sqi', |
||||||
|
SRP: 'srp', |
||||||
|
SRP_LATN: 'srp_latn', |
||||||
|
SWA: 'swa', |
||||||
|
SWE: 'swe', |
||||||
|
SYR: 'syr', |
||||||
|
TAM: 'tam', |
||||||
|
TEL: 'tel', |
||||||
|
TGK: 'tgk', |
||||||
|
TGL: 'tgl', |
||||||
|
THA: 'tha', |
||||||
|
TIR: 'tir', |
||||||
|
TUR: 'tur', |
||||||
|
UIG: 'uig', |
||||||
|
UKR: 'ukr', |
||||||
|
URD: 'urd', |
||||||
|
UZB: 'uzb', |
||||||
|
UZB_CYRL: 'uzb_cyrl', |
||||||
|
VIE: 'vie', |
||||||
|
YID: 'yid', |
||||||
|
}; |
@ -0,0 +1,21 @@ |
|||||||
|
const getId = require('./utils/getId'); |
||||||
|
|
||||||
|
let jobCounter = 0; |
||||||
|
|
||||||
|
module.exports = ({ |
||||||
|
id: _id, |
||||||
|
action, |
||||||
|
payload = {}, |
||||||
|
}) => { |
||||||
|
let id = _id; |
||||||
|
if (typeof id === 'undefined') { |
||||||
|
id = getId('Job', jobCounter); |
||||||
|
jobCounter += 1; |
||||||
|
} |
||||||
|
|
||||||
|
return { |
||||||
|
id, |
||||||
|
action, |
||||||
|
payload, |
||||||
|
}; |
||||||
|
}; |
@ -0,0 +1,80 @@ |
|||||||
|
const createJob = require('./createJob'); |
||||||
|
const { log } = require('./utils/log'); |
||||||
|
const getId = require('./utils/getId'); |
||||||
|
|
||||||
|
let schedulerCounter = 0; |
||||||
|
|
||||||
|
module.exports = () => { |
||||||
|
const id = getId('Scheduler', schedulerCounter); |
||||||
|
const workers = {}; |
||||||
|
const runningWorkers = {}; |
||||||
|
let jobQueue = []; |
||||||
|
|
||||||
|
schedulerCounter += 1; |
||||||
|
|
||||||
|
const getQueueLen = () => jobQueue.length; |
||||||
|
const getNumWorkers = () => Object.keys(workers).length; |
||||||
|
|
||||||
|
const dequeue = () => { |
||||||
|
if (jobQueue.length !== 0) { |
||||||
|
const wIds = Object.keys(workers); |
||||||
|
for (let i = 0; i < wIds.length; i += 1) { |
||||||
|
if (typeof runningWorkers[wIds[i]] === 'undefined') { |
||||||
|
jobQueue[0](workers[wIds[i]]); |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
const queue = (action, payload) => ( |
||||||
|
new Promise((resolve, reject) => { |
||||||
|
const job = createJob({ action, payload }); |
||||||
|
jobQueue.push(async (w) => { |
||||||
|
jobQueue.shift(); |
||||||
|
runningWorkers[w.id] = job; |
||||||
|
try { |
||||||
|
resolve(await w[action].apply(this, [...payload, job.id])); |
||||||
|
} catch (err) { |
||||||
|
reject(err); |
||||||
|
} finally { |
||||||
|
delete runningWorkers[w.id]; |
||||||
|
dequeue(); |
||||||
|
} |
||||||
|
}); |
||||||
|
log(`[${id}]: Add ${job.id} to JobQueue`); |
||||||
|
log(`[${id}]: JobQueue length=${jobQueue.length}`); |
||||||
|
dequeue(); |
||||||
|
}) |
||||||
|
); |
||||||
|
|
||||||
|
const addWorker = (w) => { |
||||||
|
workers[w.id] = w; |
||||||
|
log(`[${id}]: Add ${w.id}`); |
||||||
|
log(`[${id}]: Number of workers=${getNumWorkers()}`); |
||||||
|
dequeue(); |
||||||
|
return w.id; |
||||||
|
}; |
||||||
|
|
||||||
|
const addJob = async (action, ...payload) => { |
||||||
|
if (getNumWorkers() === 0) { |
||||||
|
throw Error(`[${id}]: You need to have at least one worker before adding jobs`); |
||||||
|
} |
||||||
|
return queue(action, payload); |
||||||
|
}; |
||||||
|
|
||||||
|
const terminate = async () => { |
||||||
|
Object.keys(workers).forEach(async (wid) => { |
||||||
|
await workers[wid].terminate(); |
||||||
|
}); |
||||||
|
jobQueue = []; |
||||||
|
}; |
||||||
|
|
||||||
|
return { |
||||||
|
addWorker, |
||||||
|
addJob, |
||||||
|
terminate, |
||||||
|
getQueueLen, |
||||||
|
getNumWorkers, |
||||||
|
}; |
||||||
|
}; |
@ -0,0 +1,198 @@ |
|||||||
|
const resolvePaths = require('./utils/resolvePaths'); |
||||||
|
const circularize = require('./utils/circularize'); |
||||||
|
const createJob = require('./createJob'); |
||||||
|
const { log } = require('./utils/log'); |
||||||
|
const getId = require('./utils/getId'); |
||||||
|
const { defaultOEM } = require('./constants/config'); |
||||||
|
const { |
||||||
|
defaultOptions, |
||||||
|
spawnWorker, |
||||||
|
terminateWorker, |
||||||
|
onMessage, |
||||||
|
loadImage, |
||||||
|
send, |
||||||
|
} = require('./worker/node'); |
||||||
|
|
||||||
|
let workerCounter = 0; |
||||||
|
|
||||||
|
module.exports = (_options = {}) => { |
||||||
|
const id = getId('Worker', workerCounter); |
||||||
|
const { |
||||||
|
logger, |
||||||
|
errorHandler, |
||||||
|
...options |
||||||
|
} = resolvePaths({ |
||||||
|
...defaultOptions, |
||||||
|
..._options, |
||||||
|
}); |
||||||
|
const resolves = {}; |
||||||
|
const rejects = {}; |
||||||
|
let worker = spawnWorker(options); |
||||||
|
|
||||||
|
workerCounter += 1; |
||||||
|
|
||||||
|
const setResolve = (action, res) => { |
||||||
|
resolves[action] = res; |
||||||
|
}; |
||||||
|
|
||||||
|
const setReject = (action, rej) => { |
||||||
|
rejects[action] = rej; |
||||||
|
}; |
||||||
|
|
||||||
|
const startJob = ({ id: jobId, action, payload }) => ( |
||||||
|
new Promise((resolve, reject) => { |
||||||
|
log(`[${id}]: Start ${jobId}, action=${action}`); |
||||||
|
setResolve(action, resolve); |
||||||
|
setReject(action, reject); |
||||||
|
send(worker, { |
||||||
|
workerId: id, |
||||||
|
jobId, |
||||||
|
action, |
||||||
|
payload, |
||||||
|
}); |
||||||
|
}) |
||||||
|
); |
||||||
|
|
||||||
|
const load = (jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, action: 'load', payload: { options }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const writeText = (path, text, jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'FS', |
||||||
|
payload: { method: 'writeFile', args: [path, text] }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const readText = (path, jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'FS', |
||||||
|
payload: { method: 'readFile', args: [path, { encoding: 'utf8' }] }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const removeFile = (path, jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'FS', |
||||||
|
payload: { method: 'unlink', args: [path] }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const FS = (method, args, jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'FS', |
||||||
|
payload: { method, args }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const loadLanguage = (langs = 'eng', jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'loadLanguage', |
||||||
|
payload: { langs, options }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const initialize = (langs = 'eng', oem = defaultOEM, jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'initialize', |
||||||
|
payload: { langs, oem }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const setParameters = (params = {}, jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'setParameters', |
||||||
|
payload: { params }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const recognize = async (image, opts = {}, jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'recognize', |
||||||
|
payload: { image: await loadImage(image), options: opts }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'getPDF', |
||||||
|
payload: { title, textonly }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const detect = async (image, jobId) => ( |
||||||
|
startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'detect', |
||||||
|
payload: { image: await loadImage(image) }, |
||||||
|
})) |
||||||
|
); |
||||||
|
|
||||||
|
const terminate = async () => { |
||||||
|
if (worker !== null) { |
||||||
|
/* |
||||||
|
await startJob(createJob({ |
||||||
|
id: jobId, |
||||||
|
action: 'terminate', |
||||||
|
})); |
||||||
|
*/ |
||||||
|
terminateWorker(worker); |
||||||
|
worker = null; |
||||||
|
} |
||||||
|
return Promise.resolve(); |
||||||
|
}; |
||||||
|
|
||||||
|
onMessage(worker, ({ |
||||||
|
workerId, jobId, status, action, data, |
||||||
|
}) => { |
||||||
|
if (status === 'resolve') { |
||||||
|
log(`[${workerId}]: Complete ${jobId}`); |
||||||
|
let d = data; |
||||||
|
if (action === 'recognize') { |
||||||
|
d = circularize(data); |
||||||
|
} else if (action === 'getPDF') { |
||||||
|
d = Array.from({ ...data, length: Object.keys(data).length }); |
||||||
|
} |
||||||
|
resolves[action]({ jobId, data: d }); |
||||||
|
} else if (status === 'reject') { |
||||||
|
rejects[action](data); |
||||||
|
if (errorHandler) { |
||||||
|
errorHandler(data); |
||||||
|
} else { |
||||||
|
throw Error(data); |
||||||
|
} |
||||||
|
} else if (status === 'progress') { |
||||||
|
logger({ ...data, userJobId: jobId }); |
||||||
|
} |
||||||
|
}); |
||||||
|
|
||||||
|
return { |
||||||
|
id, |
||||||
|
worker, |
||||||
|
setResolve, |
||||||
|
setReject, |
||||||
|
load, |
||||||
|
writeText, |
||||||
|
readText, |
||||||
|
removeFile, |
||||||
|
FS, |
||||||
|
loadLanguage, |
||||||
|
initialize, |
||||||
|
setParameters, |
||||||
|
recognize, |
||||||
|
getPDF, |
||||||
|
detect, |
||||||
|
terminate, |
||||||
|
}; |
||||||
|
}; |
@ -0,0 +1,231 @@ |
|||||||
|
declare namespace Tesseract { |
||||||
|
function createScheduler(): Scheduler |
||||||
|
function createWorker(options?: Partial<WorkerOptions>): Worker |
||||||
|
function setLogging(logging: boolean): void |
||||||
|
function recognize(image: ImageLike, langs?: string, options?: Partial<WorkerOptions>): Promise<RecognizeResult> |
||||||
|
function detect(image: ImageLike, options?: Partial<WorkerOptions>): any |
||||||
|
|
||||||
|
interface Scheduler { |
||||||
|
addWorker(worker: Worker): string |
||||||
|
addJob(action: string, ...args: any[]): Promise<ConfigResult | RecognizeResult | DetectResult> |
||||||
|
terminate(): Promise<any> |
||||||
|
getQueueLen(): number |
||||||
|
getNumWorkers(): number |
||||||
|
} |
||||||
|
|
||||||
|
interface Worker { |
||||||
|
load(jobId?: string): Promise<ConfigResult> |
||||||
|
writeText(path: string, text: string, jobId?: string): Promise<ConfigResult> |
||||||
|
readText(path: string, jobId?: string): Promise<ConfigResult> |
||||||
|
removeText(path: string, jobId?: string): Promise<ConfigResult> |
||||||
|
FS(method: string, args: any[], jobId?: string): Promise<ConfigResult> |
||||||
|
loadLanguage(langs?: string | Lang[], jobId?: string): Promise<ConfigResult> |
||||||
|
initialize(langs?: string | Lang[], oem?: OEM, jobId?: string): Promise<ConfigResult> |
||||||
|
setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult> |
||||||
|
recognize(image: ImageLike, options?: Partial<RecognizeOptions>, jobId?: string): Promise<RecognizeResult> |
||||||
|
detect(image: ImageLike, jobId?: string): Promise<DetectResult> |
||||||
|
terminate(jobId?: string): Promise<ConfigResult> |
||||||
|
getPDF(title?: string, textonly?: boolean, jobId?: string):Promise<GetPDFResult> |
||||||
|
} |
||||||
|
|
||||||
|
interface Lang { |
||||||
|
code: string; |
||||||
|
data: unknown; |
||||||
|
} |
||||||
|
|
||||||
|
interface WorkerOptions { |
||||||
|
corePath: string |
||||||
|
langPath: string |
||||||
|
cachePath: string |
||||||
|
dataPath: string |
||||||
|
workerPath: string |
||||||
|
cacheMethod: string |
||||||
|
workerBlobURL: boolean |
||||||
|
gzip: boolean |
||||||
|
logger: (arg: any) => void, |
||||||
|
errorHandler: (arg: any) => void |
||||||
|
} |
||||||
|
interface WorkerParams { |
||||||
|
tessedit_ocr_engine_mode: OEM |
||||||
|
tessedit_pageseg_mode: PSM |
||||||
|
tessedit_char_whitelist: string |
||||||
|
preserve_interword_spaces: string |
||||||
|
user_defined_dpi: string |
||||||
|
tessjs_create_hocr: string |
||||||
|
tessjs_create_tsv: string |
||||||
|
tessjs_create_box: string |
||||||
|
tessjs_create_unlv: string |
||||||
|
tessjs_create_osd: string |
||||||
|
} |
||||||
|
interface RecognizeOptions { |
||||||
|
rectangle: Rectangle |
||||||
|
} |
||||||
|
interface ConfigResult { |
||||||
|
jobId: string |
||||||
|
data: any |
||||||
|
} |
||||||
|
interface RecognizeResult { |
||||||
|
jobId: string |
||||||
|
data: Page |
||||||
|
} |
||||||
|
interface GetPDFResult { |
||||||
|
jobId: string |
||||||
|
data: number[] |
||||||
|
} |
||||||
|
interface DetectResult { |
||||||
|
jobId: string |
||||||
|
data: DetectData |
||||||
|
} |
||||||
|
interface DetectData { |
||||||
|
tesseract_script_id: number |
||||||
|
script: string |
||||||
|
script_confidence: number |
||||||
|
orientation_degrees: number |
||||||
|
orientation_confidence: number |
||||||
|
} |
||||||
|
interface Rectangle { |
||||||
|
left: number |
||||||
|
top: number |
||||||
|
width: number |
||||||
|
height: number |
||||||
|
} |
||||||
|
enum OEM { |
||||||
|
TESSERACT_ONLY, |
||||||
|
LSTM_ONLY, |
||||||
|
TESSERACT_LSTM_COMBINED, |
||||||
|
DEFAULT, |
||||||
|
} |
||||||
|
enum PSM { |
||||||
|
OSD_ONLY = '0', |
||||||
|
AUTO_OSD = '1', |
||||||
|
AUTO_ONLY = '2', |
||||||
|
AUTO = '3', |
||||||
|
SINGLE_COLUMN = '4', |
||||||
|
SINGLE_BLOCK_VERT_TEXT = '5', |
||||||
|
SINGLE_BLOCK = '6', |
||||||
|
SINGLE_LINE = '7', |
||||||
|
SINGLE_WORD = '8', |
||||||
|
CIRCLE_WORD = '9', |
||||||
|
SINGLE_CHAR = '10', |
||||||
|
SPARSE_TEXT = '11', |
||||||
|
SPARSE_TEXT_OSD = '12', |
||||||
|
RAW_LINE = '13' |
||||||
|
} |
||||||
|
type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement |
||||||
|
| CanvasRenderingContext2D | File | Blob | ImageData | Buffer; |
||||||
|
interface Block { |
||||||
|
paragraphs: Paragraph[]; |
||||||
|
text: string; |
||||||
|
confidence: number; |
||||||
|
baseline: Baseline; |
||||||
|
bbox: Bbox; |
||||||
|
blocktype: string; |
||||||
|
polygon: any; |
||||||
|
page: Page; |
||||||
|
lines: Line[]; |
||||||
|
words: Word[]; |
||||||
|
symbols: Symbol[]; |
||||||
|
} |
||||||
|
interface Baseline { |
||||||
|
x0: number; |
||||||
|
y0: number; |
||||||
|
x1: number; |
||||||
|
y1: number; |
||||||
|
has_baseline: boolean; |
||||||
|
} |
||||||
|
interface Bbox { |
||||||
|
x0: number; |
||||||
|
y0: number; |
||||||
|
x1: number; |
||||||
|
y1: number; |
||||||
|
} |
||||||
|
interface Line { |
||||||
|
words: Word[]; |
||||||
|
text: string; |
||||||
|
confidence: number; |
||||||
|
baseline: Baseline; |
||||||
|
bbox: Bbox; |
||||||
|
paragraph: Paragraph; |
||||||
|
block: Block; |
||||||
|
page: Page; |
||||||
|
symbols: Symbol[]; |
||||||
|
} |
||||||
|
interface Paragraph { |
||||||
|
lines: Line[]; |
||||||
|
text: string; |
||||||
|
confidence: number; |
||||||
|
baseline: Baseline; |
||||||
|
bbox: Bbox; |
||||||
|
is_ltr: boolean; |
||||||
|
block: Block; |
||||||
|
page: Page; |
||||||
|
words: Word[]; |
||||||
|
symbols: Symbol[]; |
||||||
|
} |
||||||
|
interface Symbol { |
||||||
|
choices: Choice[]; |
||||||
|
image: any; |
||||||
|
text: string; |
||||||
|
confidence: number; |
||||||
|
baseline: Baseline; |
||||||
|
bbox: Bbox; |
||||||
|
is_superscript: boolean; |
||||||
|
is_subscript: boolean; |
||||||
|
is_dropcap: boolean; |
||||||
|
word: Word; |
||||||
|
line: Line; |
||||||
|
paragraph: Paragraph; |
||||||
|
block: Block; |
||||||
|
page: Page; |
||||||
|
} |
||||||
|
interface Choice { |
||||||
|
text: string; |
||||||
|
confidence: number; |
||||||
|
} |
||||||
|
interface Word { |
||||||
|
symbols: Symbol[]; |
||||||
|
choices: Choice[]; |
||||||
|
text: string; |
||||||
|
confidence: number; |
||||||
|
baseline: Baseline; |
||||||
|
bbox: Bbox; |
||||||
|
is_numeric: boolean; |
||||||
|
in_dictionary: boolean; |
||||||
|
direction: string; |
||||||
|
language: string; |
||||||
|
is_bold: boolean; |
||||||
|
is_italic: boolean; |
||||||
|
is_underlined: boolean; |
||||||
|
is_monospace: boolean; |
||||||
|
is_serif: boolean; |
||||||
|
is_smallcaps: boolean; |
||||||
|
font_size: number; |
||||||
|
font_id: number; |
||||||
|
font_name: string; |
||||||
|
line: Line; |
||||||
|
paragraph: Paragraph; |
||||||
|
block: Block; |
||||||
|
page: Page; |
||||||
|
} |
||||||
|
interface Page { |
||||||
|
blocks: Block[]; |
||||||
|
confidence: number; |
||||||
|
lines: Line[]; |
||||||
|
oem: string; |
||||||
|
osd: string; |
||||||
|
paragraphs: Paragraph[]; |
||||||
|
psm: string; |
||||||
|
symbols: Symbol[]; |
||||||
|
text: string; |
||||||
|
version: string; |
||||||
|
words: Word[]; |
||||||
|
hocr: string | null; |
||||||
|
tsv: string | null; |
||||||
|
box: string | null; |
||||||
|
unlv: string | null; |
||||||
|
sd: string | null; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
export = Tesseract; |
||||||
|
export as namespace Tesseract; |
@ -0,0 +1,27 @@ |
|||||||
|
/** |
||||||
|
* |
||||||
|
* Entry point for tesseract.js, should be the entry when bundling. |
||||||
|
* |
||||||
|
* @fileoverview entry point for tesseract.js |
||||||
|
* @author Kevin Kwok <antimatter15@gmail.com> |
||||||
|
* @author Guillermo Webster <gui@mit.edu> |
||||||
|
* @author Jerome Wu <jeromewus@gmail.com> |
||||||
|
*/ |
||||||
|
require('regenerator-runtime/runtime'); |
||||||
|
const createScheduler = require('./createScheduler'); |
||||||
|
const createWorker = require('./createWorker'); |
||||||
|
const Tesseract = require('./Tesseract'); |
||||||
|
const languages = require('./constants/languages'); |
||||||
|
const OEM = require('./constants/OEM'); |
||||||
|
const PSM = require('./constants/PSM'); |
||||||
|
const { setLogging } = require('./utils/log'); |
||||||
|
|
||||||
|
module.exports = { |
||||||
|
languages, |
||||||
|
OEM, |
||||||
|
PSM, |
||||||
|
createScheduler, |
||||||
|
createWorker, |
||||||
|
setLogging, |
||||||
|
...Tesseract, |
||||||
|
}; |
@ -0,0 +1,54 @@ |
|||||||
|
/** |
||||||
|
* In the recognition result of tesseract, there |
||||||
|
* is a deep JSON object for details, it has around |
||||||
|
* |
||||||
|
* The result of dump.js is a big JSON tree |
||||||
|
* which can be easily serialized (for instance |
||||||
|
* to be sent from a webworker to the main app |
||||||
|
* or through Node's IPC), but we want |
||||||
|
* a (circular) DOM-like interface for walking |
||||||
|
* through the data. |
||||||
|
* |
||||||
|
* @fileoverview DOM-like interface for walking through data |
||||||
|
* @author Kevin Kwok <antimatter15@gmail.com> |
||||||
|
* @author Guillermo Webster <gui@mit.edu> |
||||||
|
* @author Jerome Wu <jeromewus@gmail.com> |
||||||
|
*/ |
||||||
|
|
||||||
|
module.exports = (page) => { |
||||||
|
const blocks = []; |
||||||
|
const paragraphs = []; |
||||||
|
const lines = []; |
||||||
|
const words = []; |
||||||
|
const symbols = []; |
||||||
|
|
||||||
|
page.blocks.forEach((block) => { |
||||||
|
block.paragraphs.forEach((paragraph) => { |
||||||
|
paragraph.lines.forEach((line) => { |
||||||
|
line.words.forEach((word) => { |
||||||
|
word.symbols.forEach((sym) => { |
||||||
|
symbols.push({ |
||||||
|
...sym, page, block, paragraph, line, word, |
||||||
|
}); |
||||||
|
}); |
||||||
|
words.push({ |
||||||
|
...word, page, block, paragraph, line, |
||||||
|
}); |
||||||
|
}); |
||||||
|
lines.push({ |
||||||
|
...line, page, block, paragraph, |
||||||
|
}); |
||||||
|
}); |
||||||
|
paragraphs.push({ |
||||||
|
...paragraph, page, block, |
||||||
|
}); |
||||||
|
}); |
||||||
|
blocks.push({ |
||||||
|
...block, page, |
||||||
|
}); |
||||||
|
}); |
||||||
|
|
||||||
|
return { |
||||||
|
...page, blocks, paragraphs, lines, words, symbols, |
||||||
|
}; |
||||||
|
}; |
@ -0,0 +1,21 @@ |
|||||||
|
const isElectron = require('is-electron'); |
||||||
|
|
||||||
|
module.exports = (key) => { |
||||||
|
const env = {}; |
||||||
|
|
||||||
|
if (typeof WorkerGlobalScope !== 'undefined') { |
||||||
|
env.type = 'webworker'; |
||||||
|
} else if (isElectron()) { |
||||||
|
env.type = 'electron'; |
||||||
|
} else if (typeof window === 'object') { |
||||||
|
env.type = 'browser'; |
||||||
|
} else if (typeof process === 'object' && typeof require === 'function') { |
||||||
|
env.type = 'node'; |
||||||
|
} |
||||||
|
|
||||||
|
if (typeof key === 'undefined') { |
||||||
|
return env; |
||||||
|
} |
||||||
|
|
||||||
|
return env[key]; |
||||||
|
}; |
@ -0,0 +1,3 @@ |
|||||||
|
module.exports = (prefix, cnt) => ( |
||||||
|
`${prefix}-${cnt}-${Math.random().toString(16).slice(3, 8)}` |
||||||
|
); |
@ -0,0 +1,9 @@ |
|||||||
|
let logging = false; |
||||||
|
|
||||||
|
exports.logging = logging; |
||||||
|
|
||||||
|
exports.setLogging = (_logging) => { |
||||||
|
logging = _logging; |
||||||
|
}; |
||||||
|
|
||||||
|
exports.log = (...args) => (logging ? console.log.apply(this, args) : null); |
@ -0,0 +1,12 @@ |
|||||||
|
const isBrowser = require('./getEnvironment')('type') === 'browser'; |
||||||
|
const resolveURL = isBrowser ? require('resolve-url') : s => s; // eslint-disable-line
|
||||||
|
|
||||||
|
module.exports = (options) => { |
||||||
|
const opts = { ...options }; |
||||||
|
['corePath', 'workerPath', 'langPath'].forEach((key) => { |
||||||
|
if (options[key]) { |
||||||
|
opts[key] = resolveURL(opts[key]); |
||||||
|
} |
||||||
|
}); |
||||||
|
return opts; |
||||||
|
}; |
@ -0,0 +1,10 @@ |
|||||||
|
const { set, get, del } = require('idb-keyval'); |
||||||
|
|
||||||
|
module.exports = { |
||||||
|
readCache: get, |
||||||
|
writeCache: set, |
||||||
|
deleteCache: del, |
||||||
|
checkCache: (path) => ( |
||||||
|
get(path).then((v) => typeof v !== 'undefined') |
||||||
|
), |
||||||
|
}; |
@ -0,0 +1,30 @@ |
|||||||
|
const { simd } = require('wasm-feature-detect'); |
||||||
|
const { dependencies } = require('../../../package.json'); |
||||||
|
|
||||||
|
module.exports = async (corePath, res) => { |
||||||
|
if (typeof global.TesseractCore === 'undefined') { |
||||||
|
res.progress({ status: 'loading tesseract core', progress: 0 }); |
||||||
|
|
||||||
|
// If the user specifies a core path, we use that
|
||||||
|
// Otherwise, we detect the correct core based on SIMD support
|
||||||
|
let corePathImport = corePath; |
||||||
|
if (!corePathImport) { |
||||||
|
const simdSupport = await simd(); |
||||||
|
if (simdSupport) { |
||||||
|
corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core-simd.wasm.js`; |
||||||
|
} else { |
||||||
|
corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.wasm.js`; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
global.importScripts(corePathImport); |
||||||
|
|
||||||
|
if (typeof global.TesseractCoreWASM !== 'undefined' && typeof WebAssembly === 'object') { |
||||||
|
global.TesseractCore = global.TesseractCoreWASM; |
||||||
|
} else { |
||||||
|
throw Error('Failed to load TesseractCore'); |
||||||
|
} |
||||||
|
res.progress({ status: 'loading tesseract core', progress: 1 }); |
||||||
|
} |
||||||
|
return global.TesseractCore; |
||||||
|
}; |
@ -0,0 +1 @@ |
|||||||
|
module.exports = require('zlibjs').gunzipSync; |
@ -0,0 +1,32 @@ |
|||||||
|
/** |
||||||
|
* |
||||||
|
* Browser worker scripts |
||||||
|
* |
||||||
|
* @fileoverview Browser worker implementation |
||||||
|
* @author Kevin Kwok <antimatter15@gmail.com> |
||||||
|
* @author Guillermo Webster <gui@mit.edu> |
||||||
|
* @author Jerome Wu <jeromewus@gmail.com> |
||||||
|
*/ |
||||||
|
|
||||||
|
const worker = require('..'); |
||||||
|
const getCore = require('./getCore'); |
||||||
|
const gunzip = require('./gunzip'); |
||||||
|
const cache = require('./cache'); |
||||||
|
|
||||||
|
/* |
||||||
|
* register message handler |
||||||
|
*/ |
||||||
|
global.addEventListener('message', ({ data }) => { |
||||||
|
worker.dispatchHandlers(data, (obj) => postMessage(obj)); |
||||||
|
}); |
||||||
|
|
||||||
|
/* |
||||||
|
* getCore is a sync function to load and return |
||||||
|
* TesseractCore. |
||||||
|
*/ |
||||||
|
worker.setAdapter({ |
||||||
|
getCore, |
||||||
|
gunzip, |
||||||
|
fetch: () => {}, |
||||||
|
...cache, |
||||||
|
}); |
@ -0,0 +1,14 @@ |
|||||||
|
/* |
||||||
|
* default params for tesseract.js |
||||||
|
*/ |
||||||
|
const PSM = require('../../constants/PSM'); |
||||||
|
|
||||||
|
module.exports = { |
||||||
|
tessedit_pageseg_mode: PSM.SINGLE_BLOCK, |
||||||
|
tessedit_char_whitelist: '', |
||||||
|
tessjs_create_hocr: '1', |
||||||
|
tessjs_create_tsv: '1', |
||||||
|
tessjs_create_box: '0', |
||||||
|
tessjs_create_unlv: '0', |
||||||
|
tessjs_create_osd: '0', |
||||||
|
}; |
@ -0,0 +1,313 @@ |
|||||||
|
/** |
||||||
|
* |
||||||
|
* Worker script for browser and node |
||||||
|
* |
||||||
|
* @fileoverview Worker script for browser and node |
||||||
|
* @author Kevin Kwok <antimatter15@gmail.com> |
||||||
|
* @author Guillermo Webster <gui@mit.edu> |
||||||
|
* @author Jerome Wu <jeromewus@gmail.com> |
||||||
|
*/ |
||||||
|
require('regenerator-runtime/runtime'); |
||||||
|
const fileType = require('file-type'); |
||||||
|
const isURL = require('is-url'); |
||||||
|
const dump = require('./utils/dump'); |
||||||
|
const isWebWorker = require('../utils/getEnvironment')('type') === 'webworker'; |
||||||
|
const setImage = require('./utils/setImage'); |
||||||
|
const defaultParams = require('./constants/defaultParams'); |
||||||
|
const { log, setLogging } = require('../utils/log'); |
||||||
|
|
||||||
|
/* |
||||||
|
* Tesseract Module returned by TesseractCore. |
||||||
|
*/ |
||||||
|
let TessModule; |
||||||
|
/* |
||||||
|
* TessearctBaseAPI instance |
||||||
|
*/ |
||||||
|
let api = null; |
||||||
|
let latestJob; |
||||||
|
let adapter = {}; |
||||||
|
let params = defaultParams; |
||||||
|
|
||||||
|
const load = async ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => { |
||||||
|
setLogging(logging); |
||||||
|
if (!TessModule) { |
||||||
|
const Core = await adapter.getCore(corePath, res); |
||||||
|
|
||||||
|
res.progress({ workerId, status: 'initializing tesseract', progress: 0 }); |
||||||
|
|
||||||
|
Core({ |
||||||
|
TesseractProgress(percent) { |
||||||
|
latestJob.progress({ |
||||||
|
workerId, |
||||||
|
jobId, |
||||||
|
status: 'recognizing text', |
||||||
|
progress: Math.max(0, (percent - 30) / 70), |
||||||
|
}); |
||||||
|
}, |
||||||
|
}).then((tessModule) => { |
||||||
|
TessModule = tessModule; |
||||||
|
res.progress({ workerId, status: 'initialized tesseract', progress: 1 }); |
||||||
|
res.resolve({ loaded: true }); |
||||||
|
}); |
||||||
|
} else { |
||||||
|
res.resolve({ loaded: true }); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
const FS = ({ workerId, payload: { method, args } }, res) => { |
||||||
|
log(`[${workerId}]: FS.${method} with args ${args}`); |
||||||
|
res.resolve(TessModule.FS[method](...args)); |
||||||
|
}; |
||||||
|
|
||||||
|
const loadLanguage = async ({ |
||||||
|
workerId, |
||||||
|
payload: { |
||||||
|
langs, |
||||||
|
options: { |
||||||
|
langPath, |
||||||
|
dataPath, |
||||||
|
cachePath, |
||||||
|
cacheMethod, |
||||||
|
gzip = true, |
||||||
|
}, |
||||||
|
}, |
||||||
|
}, |
||||||
|
res) => { |
||||||
|
const loadAndGunzipFile = async (_lang) => { |
||||||
|
const lang = typeof _lang === 'string' ? _lang : _lang.code; |
||||||
|
const readCache = ['refresh', 'none'].includes(cacheMethod) |
||||||
|
? () => Promise.resolve() |
||||||
|
: adapter.readCache; |
||||||
|
let data = null; |
||||||
|
|
||||||
|
try { |
||||||
|
const _data = await readCache(`${cachePath || '.'}/${lang}.traineddata`); |
||||||
|
if (typeof _data !== 'undefined') { |
||||||
|
log(`[${workerId}]: Load ${lang}.traineddata from cache`); |
||||||
|
res.progress({ workerId, status: 'loading language traineddata (from cache)', progress: 0.5 }); |
||||||
|
data = _data; |
||||||
|
} else { |
||||||
|
throw Error('Not found in cache'); |
||||||
|
} |
||||||
|
} catch (e) { |
||||||
|
log(`[${workerId}]: Load ${lang}.traineddata from ${langPath}`); |
||||||
|
if (typeof _lang === 'string') { |
||||||
|
let path = null; |
||||||
|
|
||||||
|
if (isURL(langPath) || langPath.startsWith('moz-extension://') || langPath.startsWith('chrome-extension://') || langPath.startsWith('file://')) { /** When langPath is an URL */ |
||||||
|
path = langPath; |
||||||
|
} |
||||||
|
|
||||||
|
if (path !== null) { |
||||||
|
const fetchUrl = `${path}/${lang}.traineddata${gzip ? '.gz' : ''}`; |
||||||
|
const resp = await (isWebWorker ? fetch : adapter.fetch)(fetchUrl); |
||||||
|
if (!resp.ok) { |
||||||
|
throw Error(`Network error while fetching ${fetchUrl}. Response code: ${resp.status}`); |
||||||
|
} |
||||||
|
data = await resp.arrayBuffer(); |
||||||
|
} else { |
||||||
|
data = await adapter.readCache(`${langPath}/${lang}.traineddata${gzip ? '.gz' : ''}`); |
||||||
|
} |
||||||
|
} else { |
||||||
|
data = _lang.data; // eslint-disable-line
|
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
data = new Uint8Array(data); |
||||||
|
|
||||||
|
const type = fileType(data); |
||||||
|
if (typeof type !== 'undefined' && type.mime === 'application/gzip') { |
||||||
|
data = adapter.gunzip(data); |
||||||
|
} |
||||||
|
|
||||||
|
if (TessModule) { |
||||||
|
if (dataPath) { |
||||||
|
try { |
||||||
|
TessModule.FS.mkdir(dataPath); |
||||||
|
} catch (err) { |
||||||
|
res.reject(err.toString()); |
||||||
|
} |
||||||
|
} |
||||||
|
TessModule.FS.writeFile(`${dataPath || '.'}/${lang}.traineddata`, data); |
||||||
|
} |
||||||
|
|
||||||
|
if (['write', 'refresh', undefined].includes(cacheMethod)) { |
||||||
|
await adapter.writeCache(`${cachePath || '.'}/${lang}.traineddata`, data); |
||||||
|
} |
||||||
|
|
||||||
|
return Promise.resolve(data); |
||||||
|
}; |
||||||
|
|
||||||
|
res.progress({ workerId, status: 'loading language traineddata', progress: 0 }); |
||||||
|
try { |
||||||
|
await Promise.all((typeof langs === 'string' ? langs.split('+') : langs).map(loadAndGunzipFile)); |
||||||
|
res.progress({ workerId, status: 'loaded language traineddata', progress: 1 }); |
||||||
|
res.resolve(langs); |
||||||
|
} catch (err) { |
||||||
|
res.reject(err.toString()); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
const setParameters = ({ payload: { params: _params } }, res) => { |
||||||
|
Object.keys(_params) |
||||||
|
.filter((k) => !k.startsWith('tessjs_')) |
||||||
|
.forEach((key) => { |
||||||
|
api.SetVariable(key, _params[key]); |
||||||
|
}); |
||||||
|
params = { ...params, ..._params }; |
||||||
|
|
||||||
|
if (typeof res !== 'undefined') { |
||||||
|
res.resolve(params); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
const initialize = ({ |
||||||
|
workerId, |
||||||
|
payload: { langs: _langs, oem }, |
||||||
|
}, res) => { |
||||||
|
const langs = (typeof _langs === 'string') |
||||||
|
? _langs |
||||||
|
: _langs.map((l) => ((typeof l === 'string') ? l : l.data)).join('+'); |
||||||
|
|
||||||
|
try { |
||||||
|
res.progress({ |
||||||
|
workerId, status: 'initializing api', progress: 0, |
||||||
|
}); |
||||||
|
if (api !== null) { |
||||||
|
api.End(); |
||||||
|
} |
||||||
|
api = new TessModule.TessBaseAPI(); |
||||||
|
const status = api.Init(null, langs, oem); |
||||||
|
if (status === -1) { |
||||||
|
res.reject('initialization failed'); |
||||||
|
} |
||||||
|
params = defaultParams; |
||||||
|
setParameters({ payload: { params } }); |
||||||
|
res.progress({ |
||||||
|
workerId, status: 'initialized api', progress: 1, |
||||||
|
}); |
||||||
|
res.resolve(); |
||||||
|
} catch (err) { |
||||||
|
res.reject(err.toString()); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
const recognize = ({ payload: { image, options: { rectangle: rec } } }, res) => { |
||||||
|
try { |
||||||
|
const ptr = setImage(TessModule, api, image); |
||||||
|
if (typeof rec === 'object') { |
||||||
|
api.SetRectangle(rec.left, rec.top, rec.width, rec.height); |
||||||
|
} |
||||||
|
api.Recognize(null); |
||||||
|
res.resolve(dump(TessModule, api, params)); |
||||||
|
TessModule._free(ptr); |
||||||
|
} catch (err) { |
||||||
|
res.reject(err.toString()); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
const getPDF = ({ payload: { title, textonly } }, res) => { |
||||||
|
const pdfRenderer = new TessModule.TessPDFRenderer('tesseract-ocr', '/', textonly); |
||||||
|
pdfRenderer.BeginDocument(title); |
||||||
|
pdfRenderer.AddImage(api); |
||||||
|
pdfRenderer.EndDocument(); |
||||||
|
TessModule._free(pdfRenderer); |
||||||
|
|
||||||
|
res.resolve(TessModule.FS.readFile('/tesseract-ocr.pdf')); |
||||||
|
}; |
||||||
|
|
||||||
|
const detect = ({ payload: { image } }, res) => { |
||||||
|
try { |
||||||
|
const ptr = setImage(TessModule, api, image); |
||||||
|
const results = new TessModule.OSResults(); |
||||||
|
|
||||||
|
if (!api.DetectOS(results)) { |
||||||
|
api.End(); |
||||||
|
TessModule._free(ptr); |
||||||
|
res.reject('Failed to detect OS'); |
||||||
|
} else { |
||||||
|
const best = results.best_result; |
||||||
|
const oid = best.orientation_id; |
||||||
|
const sid = best.script_id; |
||||||
|
|
||||||
|
TessModule._free(ptr); |
||||||
|
|
||||||
|
res.resolve({ |
||||||
|
tesseract_script_id: sid, |
||||||
|
script: results.unicharset.get_script_from_script_id(sid), |
||||||
|
script_confidence: best.sconfidence, |
||||||
|
orientation_degrees: [0, 270, 180, 90][oid], |
||||||
|
orientation_confidence: best.oconfidence, |
||||||
|
}); |
||||||
|
} |
||||||
|
} catch (err) { |
||||||
|
res.reject(err.toString()); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
const terminate = (_, res) => { |
||||||
|
try { |
||||||
|
if (api !== null) { |
||||||
|
api.End(); |
||||||
|
} |
||||||
|
res.resolve({ terminated: true }); |
||||||
|
} catch (err) { |
||||||
|
res.reject(err.toString()); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
/** |
||||||
|
* dispatchHandlers |
||||||
|
* |
||||||
|
* @name dispatchHandlers |
||||||
|
* @function worker data handler |
||||||
|
* @access public |
||||||
|
* @param {object} data |
||||||
|
* @param {string} data.jobId - unique job id |
||||||
|
* @param {string} data.action - action of the job, only recognize and detect for now |
||||||
|
* @param {object} data.payload - data for the job |
||||||
|
* @param {function} send - trigger job to work |
||||||
|
*/ |
||||||
|
exports.dispatchHandlers = (packet, send) => { |
||||||
|
const res = (status, data) => { |
||||||
|
send({ |
||||||
|
...packet, |
||||||
|
status, |
||||||
|
data, |
||||||
|
}); |
||||||
|
}; |
||||||
|
res.resolve = res.bind(this, 'resolve'); |
||||||
|
res.reject = res.bind(this, 'reject'); |
||||||
|
res.progress = res.bind(this, 'progress'); |
||||||
|
|
||||||
|
latestJob = res; |
||||||
|
|
||||||
|
try { |
||||||
|
({ |
||||||
|
load, |
||||||
|
FS, |
||||||
|
loadLanguage, |
||||||
|
initialize, |
||||||
|
setParameters, |
||||||
|
recognize, |
||||||
|
getPDF, |
||||||
|
detect, |
||||||
|
terminate, |
||||||
|
})[packet.action](packet, res); |
||||||
|
} catch (err) { |
||||||
|
/** Prepare exception to travel through postMessage */ |
||||||
|
res.reject(err.toString()); |
||||||
|
} |
||||||
|
}; |
||||||
|
|
||||||
|
/** |
||||||
|
* setAdapter |
||||||
|
* |
||||||
|
* @name setAdapter |
||||||
|
* @function |
||||||
|
* @access public |
||||||
|
* @param {object} adapter - implementation of the worker, different in browser and node environment |
||||||
|
*/ |
||||||
|
exports.setAdapter = (_adapter) => { |
||||||
|
adapter = _adapter; |
||||||
|
}; |
@ -0,0 +1,16 @@ |
|||||||
|
const util = require('util'); |
||||||
|
const fs = require('fs'); |
||||||
|
|
||||||
|
module.exports = { |
||||||
|
readCache: util.promisify(fs.readFile), |
||||||
|
writeCache: util.promisify(fs.writeFile), |
||||||
|
deleteCache: (path) => ( |
||||||
|
util.promisify(fs.unlink)(path) |
||||||
|
.catch(() => {}) |
||||||
|
), |
||||||
|
checkCache: (path) => ( |
||||||
|
util.promisify(fs.access)(path, fs.F_OK) |
||||||
|
.then((err) => (err === null)) |
||||||
|
.catch(() => false) |
||||||
|
), |
||||||
|
}; |
@ -0,0 +1,20 @@ |
|||||||
|
const { simd } = require('wasm-feature-detect'); |
||||||
|
|
||||||
|
let TesseractCore = null; |
||||||
|
/* |
||||||
|
* getCore is a sync function to load and return |
||||||
|
* TesseractCore. |
||||||
|
*/ |
||||||
|
module.exports = async (_, res) => { |
||||||
|
if (TesseractCore === null) { |
||||||
|
const simdSupport = await simd(); |
||||||
|
res.progress({ status: 'loading tesseract core', progress: 0 }); |
||||||
|
if (simdSupport) { |
||||||
|
TesseractCore = require('tesseract.js-core/tesseract-core-simd'); |
||||||
|
} else { |
||||||
|
TesseractCore = require('tesseract.js-core/tesseract-core'); |
||||||
|
} |
||||||
|
res.progress({ status: 'loaded tesseract core', progress: 1 }); |
||||||
|
} |
||||||
|
return TesseractCore; |
||||||
|
}; |
@ -0,0 +1 @@ |
|||||||
|
module.exports = require('zlib').gunzipSync; |
@ -0,0 +1,30 @@ |
|||||||
|
/** |
||||||
|
* |
||||||
|
* Tesseract Worker Script for Node |
||||||
|
* |
||||||
|
* @fileoverview Node worker implementation |
||||||
|
* @author Kevin Kwok <antimatter15@gmail.com> |
||||||
|
* @author Guillermo Webster <gui@mit.edu> |
||||||
|
* @author Jerome Wu <jeromewus@gmail.com> |
||||||
|
*/ |
||||||
|
|
||||||
|
const fetch = require('node-fetch'); |
||||||
|
const { parentPort } = require('worker_threads'); |
||||||
|
const worker = require('..'); |
||||||
|
const getCore = require('./getCore'); |
||||||
|
const gunzip = require('./gunzip'); |
||||||
|
const cache = require('./cache'); |
||||||
|
|
||||||
|
/* |
||||||
|
* register message handler |
||||||
|
*/ |
||||||
|
parentPort.on('message', (packet) => { |
||||||
|
worker.dispatchHandlers(packet, (obj) => parentPort.postMessage(obj)); |
||||||
|
}); |
||||||
|
|
||||||
|
worker.setAdapter({ |
||||||
|
getCore, |
||||||
|
gunzip, |
||||||
|
fetch, |
||||||
|
...cache, |
||||||
|
}); |
@ -0,0 +1,201 @@ |
|||||||
|
/** |
||||||
|
* |
||||||
|
* Dump data to a big JSON tree |
||||||
|
* |
||||||
|
* @fileoverview dump data to JSON tree |
||||||
|
* @author Kevin Kwok <antimatter15@gmail.com> |
||||||
|
* @author Guillermo Webster <gui@mit.edu> |
||||||
|
* @author Jerome Wu <jeromewus@gmail.com> |
||||||
|
*/ |
||||||
|
|
||||||
|
/** |
||||||
|
* deindent |
||||||
|
* |
||||||
|
* The generated HOCR is excessively indented, so |
||||||
|
* we get rid of that indentation |
||||||
|
* |
||||||
|
* @name deindent |
||||||
|
* @function deindent string |
||||||
|
* @access public |
||||||
|
*/ |
||||||
|
const deindent = (html) => { |
||||||
|
const lines = html.split('\n'); |
||||||
|
if (lines[0].substring(0, 2) === ' ') { |
||||||
|
for (let i = 0; i < lines.length; i += 1) { |
||||||
|
if (lines[i].substring(0, 2) === ' ') { |
||||||
|
lines[i] = lines[i].slice(2); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
return lines.join('\n'); |
||||||
|
}; |
||||||
|
|
||||||
|
/** |
||||||
|
* dump |
||||||
|
* |
||||||
|
* @name dump |
||||||
|
* @function dump recognition result to a JSON object |
||||||
|
* @access public |
||||||
|
*/ |
||||||
|
module.exports = (TessModule, api, { |
||||||
|
tessjs_create_hocr, |
||||||
|
tessjs_create_tsv, |
||||||
|
tessjs_create_box, |
||||||
|
tessjs_create_unlv, |
||||||
|
tessjs_create_osd, |
||||||
|
}) => { |
||||||
|
const ri = api.GetIterator(); |
||||||
|
const { |
||||||
|
RIL_BLOCK, |
||||||
|
RIL_PARA, |
||||||
|
RIL_TEXTLINE, |
||||||
|
RIL_WORD, |
||||||
|
RIL_SYMBOL, |
||||||
|
} = TessModule; |
||||||
|
const blocks = []; |
||||||
|
let block; |
||||||
|
let para; |
||||||
|
let textline; |
||||||
|
let word; |
||||||
|
let symbol; |
||||||
|
|
||||||
|
const enumToString = (value, prefix) => ( |
||||||
|
Object.keys(TessModule) |
||||||
|
.filter((e) => (e.startsWith(`${prefix}_`) && TessModule[e] === value)) |
||||||
|
.map((e) => e.slice(prefix.length + 1))[0] |
||||||
|
); |
||||||
|
|
||||||
|
ri.Begin(); |
||||||
|
do { |
||||||
|
if (ri.IsAtBeginningOf(RIL_BLOCK)) { |
||||||
|
const poly = ri.BlockPolygon(); |
||||||
|
let polygon = null; |
||||||
|
// BlockPolygon() returns null when automatic page segmentation is off
|
||||||
|
if (TessModule.getPointer(poly) > 0) { |
||||||
|
const n = poly.get_n(); |
||||||
|
const px = poly.get_x(); |
||||||
|
const py = poly.get_y(); |
||||||
|
polygon = []; |
||||||
|
for (let i = 0; i < n; i += 1) { |
||||||
|
polygon.push([px.getValue(i), py.getValue(i)]); |
||||||
|
} |
||||||
|
/* |
||||||
|
* TODO: find out why _ptaDestroy doesn't work |
||||||
|
*/ |
||||||
|
// TessModule._ptaDestroy(TessModule.getPointer(poly));
|
||||||
|
} |
||||||
|
|
||||||
|
block = { |
||||||
|
paragraphs: [], |
||||||
|
text: ri.GetUTF8Text(RIL_BLOCK), |
||||||
|
confidence: ri.Confidence(RIL_BLOCK), |
||||||
|
baseline: ri.getBaseline(RIL_BLOCK), |
||||||
|
bbox: ri.getBoundingBox(RIL_BLOCK), |
||||||
|
blocktype: enumToString(ri.BlockType(), 'PT'), |
||||||
|
polygon, |
||||||
|
}; |
||||||
|
blocks.push(block); |
||||||
|
} |
||||||
|
if (ri.IsAtBeginningOf(RIL_PARA)) { |
||||||
|
para = { |
||||||
|
lines: [], |
||||||
|
text: ri.GetUTF8Text(RIL_PARA), |
||||||
|
confidence: ri.Confidence(RIL_PARA), |
||||||
|
baseline: ri.getBaseline(RIL_PARA), |
||||||
|
bbox: ri.getBoundingBox(RIL_PARA), |
||||||
|
is_ltr: !!ri.ParagraphIsLtr(), |
||||||
|
}; |
||||||
|
block.paragraphs.push(para); |
||||||
|
} |
||||||
|
if (ri.IsAtBeginningOf(RIL_TEXTLINE)) { |
||||||
|
textline = { |
||||||
|
words: [], |
||||||
|
text: ri.GetUTF8Text(RIL_TEXTLINE), |
||||||
|
confidence: ri.Confidence(RIL_TEXTLINE), |
||||||
|
baseline: ri.getBaseline(RIL_TEXTLINE), |
||||||
|
bbox: ri.getBoundingBox(RIL_TEXTLINE), |
||||||
|
}; |
||||||
|
para.lines.push(textline); |
||||||
|
} |
||||||
|
if (ri.IsAtBeginningOf(RIL_WORD)) { |
||||||
|
const fontInfo = ri.getWordFontAttributes(); |
||||||
|
const wordDir = ri.WordDirection(); |
||||||
|
word = { |
||||||
|
symbols: [], |
||||||
|
choices: [], |
||||||
|
|
||||||
|
text: ri.GetUTF8Text(RIL_WORD), |
||||||
|
confidence: ri.Confidence(RIL_WORD), |
||||||
|
baseline: ri.getBaseline(RIL_WORD), |
||||||
|
bbox: ri.getBoundingBox(RIL_WORD), |
||||||
|
|
||||||
|
is_numeric: !!ri.WordIsNumeric(), |
||||||
|
in_dictionary: !!ri.WordIsFromDictionary(), |
||||||
|
direction: enumToString(wordDir, 'DIR'), |
||||||
|
language: ri.WordRecognitionLanguage(), |
||||||
|
|
||||||
|
is_bold: fontInfo.is_bold, |
||||||
|
is_italic: fontInfo.is_italic, |
||||||
|
is_underlined: fontInfo.is_underlined, |
||||||
|
is_monospace: fontInfo.is_monospace, |
||||||
|
is_serif: fontInfo.is_serif, |
||||||
|
is_smallcaps: fontInfo.is_smallcaps, |
||||||
|
font_size: fontInfo.pointsize, |
||||||
|
font_id: fontInfo.font_id, |
||||||
|
font_name: fontInfo.font_name, |
||||||
|
}; |
||||||
|
const wc = new TessModule.WordChoiceIterator(ri); |
||||||
|
do { |
||||||
|
word.choices.push({ |
||||||
|
text: wc.GetUTF8Text(), |
||||||
|
confidence: wc.Confidence(), |
||||||
|
}); |
||||||
|
} while (wc.Next()); |
||||||
|
TessModule.destroy(wc); |
||||||
|
textline.words.push(word); |
||||||
|
} |
||||||
|
|
||||||
|
// let image = null;
|
||||||
|
// var pix = ri.GetBinaryImage(TessModule.RIL_SYMBOL)
|
||||||
|
// var image = pix2array(pix);
|
||||||
|
// // for some reason it seems that things stop working if you destroy pics
|
||||||
|
// TessModule._pixDestroy(TessModule.getPointer(pix));
|
||||||
|
if (ri.IsAtBeginningOf(RIL_SYMBOL)) { |
||||||
|
symbol = { |
||||||
|
choices: [], |
||||||
|
image: null, |
||||||
|
text: ri.GetUTF8Text(RIL_SYMBOL), |
||||||
|
confidence: ri.Confidence(RIL_SYMBOL), |
||||||
|
baseline: ri.getBaseline(RIL_SYMBOL), |
||||||
|
bbox: ri.getBoundingBox(RIL_SYMBOL), |
||||||
|
is_superscript: !!ri.SymbolIsSuperscript(), |
||||||
|
is_subscript: !!ri.SymbolIsSubscript(), |
||||||
|
is_dropcap: !!ri.SymbolIsDropcap(), |
||||||
|
}; |
||||||
|
word.symbols.push(symbol); |
||||||
|
const ci = new TessModule.ChoiceIterator(ri); |
||||||
|
do { |
||||||
|
symbol.choices.push({ |
||||||
|
text: ci.GetUTF8Text(), |
||||||
|
confidence: ci.Confidence(), |
||||||
|
}); |
||||||
|
} while (ci.Next()); |
||||||
|
// TessModule.destroy(i);
|
||||||
|
} |
||||||
|
} while (ri.Next(RIL_SYMBOL)); |
||||||
|
TessModule.destroy(ri); |
||||||
|
|
||||||
|
return { |
||||||
|
text: api.GetUTF8Text(), |
||||||
|
hocr: tessjs_create_hocr === '1' ? deindent(api.GetHOCRText()) : null, |
||||||
|
tsv: tessjs_create_tsv === '1' ? api.GetTSVText() : null, |
||||||
|
box: tessjs_create_box === '1' ? api.GetBoxText() : null, |
||||||
|
unlv: tessjs_create_unlv === '1' ? api.GetUNLVText() : null, |
||||||
|
osd: tessjs_create_osd === '1' ? api.GetOsdText() : null, |
||||||
|
confidence: api.MeanTextConf(), |
||||||
|
blocks, |
||||||
|
psm: enumToString(api.GetPageSegMode(), 'PSM'), |
||||||
|
oem: enumToString(api.oem(), 'OEM'), |
||||||
|
version: api.Version(), |
||||||
|
}; |
||||||
|
}; |
@ -0,0 +1,63 @@ |
|||||||
|
const bmp = require('bmp-js'); |
||||||
|
const fileType = require('file-type'); |
||||||
|
|
||||||
|
/** |
||||||
|
* setImage |
||||||
|
* |
||||||
|
* @name setImage |
||||||
|
* @function set image in tesseract for recognition |
||||||
|
* @access public |
||||||
|
*/ |
||||||
|
module.exports = (TessModule, api, image) => { |
||||||
|
const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length })); |
||||||
|
const type = fileType(buf); |
||||||
|
let bytesPerPixel = 0; |
||||||
|
let data = null; |
||||||
|
let pix = null; |
||||||
|
let w = 0; |
||||||
|
let h = 0; |
||||||
|
|
||||||
|
const exif = buf.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1; |
||||||
|
|
||||||
|
/* |
||||||
|
* Leptonica supports uncompressed but not compressed bmp files |
||||||
|
* @see https://github.com/DanBloomberg/leptonica/issues/607#issuecomment-1068802516
|
||||||
|
* We therefore use bmp-js to process all bmp files |
||||||
|
*/ |
||||||
|
if (type && type.mime === 'image/bmp') { |
||||||
|
const bmpBuf = bmp.decode(buf); |
||||||
|
data = TessModule._malloc(bmpBuf.data.length * Uint8Array.BYTES_PER_ELEMENT); |
||||||
|
TessModule.HEAPU8.set(bmpBuf.data, data); |
||||||
|
w = bmpBuf.width; |
||||||
|
h = bmpBuf.height; |
||||||
|
bytesPerPixel = 4; |
||||||
|
} else { |
||||||
|
const ptr = TessModule._malloc(buf.length * Uint8Array.BYTES_PER_ELEMENT); |
||||||
|
TessModule.HEAPU8.set(buf, ptr); |
||||||
|
pix = TessModule._pixReadMem(ptr, buf.length); |
||||||
|
if (TessModule.getValue(pix + (7 * 4), 'i32') === 0) { |
||||||
|
/* |
||||||
|
* Set a yres default value to prevent warning from tesseract |
||||||
|
* See kMinCredibleResolution in tesseract/src/ccstruct/publictypes.h |
||||||
|
*/ |
||||||
|
TessModule.setValue(pix + (7 * 4), 300, 'i32'); |
||||||
|
} |
||||||
|
[w, h] = Array(2).fill(0) |
||||||
|
.map((v, idx) => ( |
||||||
|
TessModule.getValue(pix + (idx * 4), 'i32') |
||||||
|
)); |
||||||
|
} |
||||||
|
|
||||||
|
/* |
||||||
|
* As some image format (ex. bmp) is not supported natiely by tesseract, |
||||||
|
* sometimes it will not return pix directly, but data and bytesPerPixel |
||||||
|
* for another SetImage usage. |
||||||
|
* |
||||||
|
*/ |
||||||
|
if (data === null) { |
||||||
|
api.SetImage(pix, undefined, undefined, undefined, undefined, exif); |
||||||
|
} else { |
||||||
|
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif); |
||||||
|
} |
||||||
|
return data === null ? pix : data; |
||||||
|
}; |
@ -0,0 +1,18 @@ |
|||||||
|
const resolveURL = require('resolve-url'); |
||||||
|
const { version } = require('../../../package.json'); |
||||||
|
const defaultOptions = require('../../constants/defaultOptions'); |
||||||
|
|
||||||
|
/* |
||||||
|
* Default options for browser worker |
||||||
|
*/ |
||||||
|
module.exports = { |
||||||
|
...defaultOptions, |
||||||
|
workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development') |
||||||
|
? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`) |
||||||
|
: `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`, |
||||||
|
/* |
||||||
|
* If browser doesn't support WebAssembly, |
||||||
|
* load ASM version instead |
||||||
|
*/ |
||||||
|
corePath: null, |
||||||
|
}; |
@ -0,0 +1,24 @@ |
|||||||
|
/** |
||||||
|
* |
||||||
|
* Tesseract Worker adapter for browser |
||||||
|
* |
||||||
|
* @fileoverview Tesseract Worker adapter for browser |
||||||
|
* @author Kevin Kwok <antimatter15@gmail.com> |
||||||
|
* @author Guillermo Webster <gui@mit.edu> |
||||||
|
* @author Jerome Wu <jeromewus@gmail.com> |
||||||
|
*/ |
||||||
|
const defaultOptions = require('./defaultOptions'); |
||||||
|
const spawnWorker = require('./spawnWorker'); |
||||||
|
const terminateWorker = require('./terminateWorker'); |
||||||
|
const onMessage = require('./onMessage'); |
||||||
|
const send = require('./send'); |
||||||
|
const loadImage = require('./loadImage'); |
||||||
|
|
||||||
|
module.exports = { |
||||||
|
defaultOptions, |
||||||
|
spawnWorker, |
||||||
|
terminateWorker, |
||||||
|
onMessage, |
||||||
|
send, |
||||||
|
loadImage, |
||||||
|
}; |
@ -0,0 +1,68 @@ |
|||||||
|
const resolveURL = require('resolve-url'); |
||||||
|
|
||||||
|
/** |
||||||
|
* readFromBlobOrFile |
||||||
|
* |
||||||
|
* @name readFromBlobOrFile |
||||||
|
* @function |
||||||
|
* @access private |
||||||
|
*/ |
||||||
|
const readFromBlobOrFile = (blob) => ( |
||||||
|
new Promise((resolve, reject) => { |
||||||
|
const fileReader = new FileReader(); |
||||||
|
fileReader.onload = () => { |
||||||
|
resolve(fileReader.result); |
||||||
|
}; |
||||||
|
fileReader.onerror = ({ target: { error: { code } } }) => { |
||||||
|
reject(Error(`File could not be read! Code=${code}`)); |
||||||
|
}; |
||||||
|
fileReader.readAsArrayBuffer(blob); |
||||||
|
}) |
||||||
|
); |
||||||
|
|
||||||
|
/** |
||||||
|
* loadImage |
||||||
|
* |
||||||
|
* @name loadImage |
||||||
|
* @function load image from different source |
||||||
|
* @access private |
||||||
|
*/ |
||||||
|
const loadImage = async (image) => { |
||||||
|
let data = image; |
||||||
|
if (typeof image === 'undefined') { |
||||||
|
return 'undefined'; |
||||||
|
} |
||||||
|
|
||||||
|
if (typeof image === 'string') { |
||||||
|
// Base64 Image
|
||||||
|
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { |
||||||
|
data = atob(image.split(',')[1]) |
||||||
|
.split('') |
||||||
|
.map((c) => c.charCodeAt(0)); |
||||||
|
} else { |
||||||
|
const resp = await fetch(resolveURL(image)); |
||||||
|
data = await resp.arrayBuffer(); |
||||||
|
} |
||||||
|
} else if (image instanceof HTMLElement) { |
||||||
|
if (image.tagName === 'IMG') { |
||||||
|
data = await loadImage(image.src); |
||||||
|
} |
||||||
|
if (image.tagName === 'VIDEO') { |
||||||
|
data = await loadImage(image.poster); |
||||||
|
} |
||||||
|
if (image.tagName === 'CANVAS') { |
||||||
|
await new Promise((resolve) => { |
||||||
|
image.toBlob(async (blob) => { |
||||||
|
data = await readFromBlobOrFile(blob); |
||||||
|
resolve(); |
||||||
|
}); |
||||||
|
}); |
||||||
|
} |
||||||
|
} else if (image instanceof File || image instanceof Blob) { |
||||||
|
data = await readFromBlobOrFile(image); |
||||||
|
} |
||||||
|
|
||||||
|
return new Uint8Array(data); |
||||||
|
}; |
||||||
|
|
||||||
|
module.exports = loadImage; |
@ -0,0 +1,5 @@ |
|||||||
|
module.exports = (worker, handler) => { |
||||||
|
worker.onmessage = ({ data }) => { // eslint-disable-line
|
||||||
|
handler(data); |
||||||
|
}; |
||||||
|
}; |
@ -0,0 +1,10 @@ |
|||||||
|
/** |
||||||
|
* send |
||||||
|
* |
||||||
|
* @name send |
||||||
|
* @function send packet to worker and create a job |
||||||
|
* @access public |
||||||
|
*/ |
||||||
|
module.exports = async (worker, packet) => { |
||||||
|
worker.postMessage(packet); |
||||||
|
}; |
@ -0,0 +1,20 @@ |
|||||||
|
/** |
||||||
|
* spawnWorker |
||||||
|
* |
||||||
|
* @name spawnWorker |
||||||
|
* @function create a new Worker in browser |
||||||
|
* @access public |
||||||
|
*/ |
||||||
|
module.exports = ({ workerPath, workerBlobURL }) => { |
||||||
|
let worker; |
||||||
|
if (Blob && URL && workerBlobURL) { |
||||||
|
const blob = new Blob([`importScripts("${workerPath}");`], { |
||||||
|
type: 'application/javascript', |
||||||
|
}); |
||||||
|
worker = new Worker(URL.createObjectURL(blob)); |
||||||
|
} else { |
||||||
|
worker = new Worker(workerPath); |
||||||
|
} |
||||||
|
|
||||||
|
return worker; |
||||||
|
}; |
@ -0,0 +1,10 @@ |
|||||||
|
/** |
||||||
|
* terminateWorker |
||||||
|
* |
||||||
|
* @name terminateWorker |
||||||
|
* @function terminate worker |
||||||
|
* @access public |
||||||
|
*/ |
||||||
|
module.exports = (worker) => { |
||||||
|
worker.terminate(); |
||||||
|
}; |
@ -0,0 +1,10 @@ |
|||||||
|
const path = require('path'); |
||||||
|
const defaultOptions = require('../../constants/defaultOptions'); |
||||||
|
|
||||||
|
/* |
||||||
|
* Default options for node worker |
||||||
|
*/ |
||||||
|
module.exports = { |
||||||
|
...defaultOptions, |
||||||
|
workerPath: path.join(__dirname, '..', '..', 'worker-script', 'node', 'index.js'), |
||||||
|
}; |
@ -0,0 +1,24 @@ |
|||||||
|
/** |
||||||
|
* |
||||||
|
* Tesseract Worker impl. for node (using child_process) |
||||||
|
* |
||||||
|
* @fileoverview Tesseract Worker impl. for node |
||||||
|
* @author Kevin Kwok <antimatter15@gmail.com> |
||||||
|
* @author Guillermo Webster <gui@mit.edu> |
||||||
|
* @author Jerome Wu <jeromewus@gmail.com> |
||||||
|
*/ |
||||||
|
const defaultOptions = require('./defaultOptions'); |
||||||
|
const spawnWorker = require('./spawnWorker'); |
||||||
|
const terminateWorker = require('./terminateWorker'); |
||||||
|
const onMessage = require('./onMessage'); |
||||||
|
const send = require('./send'); |
||||||
|
const loadImage = require('./loadImage'); |
||||||
|
|
||||||
|
module.exports = { |
||||||
|
defaultOptions, |
||||||
|
spawnWorker, |
||||||
|
terminateWorker, |
||||||
|
onMessage, |
||||||
|
send, |
||||||
|
loadImage, |
||||||
|
}; |
@ -0,0 +1,35 @@ |
|||||||
|
const util = require('util'); |
||||||
|
const fs = require('fs'); |
||||||
|
const fetch = require('node-fetch'); |
||||||
|
const isURL = require('is-url'); |
||||||
|
|
||||||
|
const readFile = util.promisify(fs.readFile); |
||||||
|
|
||||||
|
/** |
||||||
|
* loadImage |
||||||
|
* |
||||||
|
* @name loadImage |
||||||
|
* @function load image from different source |
||||||
|
* @access public |
||||||
|
*/ |
||||||
|
module.exports = async (image) => { |
||||||
|
let data = image; |
||||||
|
if (typeof image === 'undefined') { |
||||||
|
return image; |
||||||
|
} |
||||||
|
|
||||||
|
if (typeof image === 'string') { |
||||||
|
if (isURL(image) || image.startsWith('moz-extension://') || image.startsWith('chrome-extension://') || image.startsWith('file://')) { |
||||||
|
const resp = await fetch(image); |
||||||
|
data = await resp.arrayBuffer(); |
||||||
|
} else if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { |
||||||
|
data = Buffer.from(image.split(',')[1], 'base64'); |
||||||
|
} else { |
||||||
|
data = await readFile(image); |
||||||
|
} |
||||||
|
} else if (Buffer.isBuffer(image)) { |
||||||
|
data = image; |
||||||
|
} |
||||||
|
|
||||||
|
return new Uint8Array(data); |
||||||
|
}; |
@ -0,0 +1,3 @@ |
|||||||
|
module.exports = (worker, handler) => { |
||||||
|
worker.on('message', handler); |
||||||
|
}; |
@ -0,0 +1,10 @@ |
|||||||
|
/** |
||||||
|
* send |
||||||
|
* |
||||||
|
* @name send |
||||||
|
* @function send packet to worker and create a job |
||||||
|
* @access public |
||||||
|
*/ |
||||||
|
module.exports = async (worker, packet) => { |
||||||
|
worker.postMessage(packet); |
||||||
|
}; |
@ -0,0 +1,10 @@ |
|||||||
|
const { Worker } = require('worker_threads'); |
||||||
|
|
||||||
|
/** |
||||||
|
* spawnWorker |
||||||
|
* |
||||||
|
* @name spawnWorker |
||||||
|
* @function fork a new process in node |
||||||
|
* @access public |
||||||
|
*/ |
||||||
|
module.exports = ({ workerPath }) => new Worker(workerPath); |
@ -0,0 +1,10 @@ |
|||||||
|
/** |
||||||
|
* terminateWorker |
||||||
|
* |
||||||
|
* @name terminateWorker |
||||||
|
* @function kill worker |
||||||
|
* @access public |
||||||
|
*/ |
||||||
|
module.exports = (worker) => { |
||||||
|
worker.terminate(); |
||||||
|
}; |
@ -0,0 +1,18 @@ |
|||||||
|
<html> |
||||||
|
<head> |
||||||
|
<meta charset="utf-8"> |
||||||
|
<link rel="stylesheet" href="../node_modules/mocha/mocha.css"> |
||||||
|
</head> |
||||||
|
<body> |
||||||
|
<div id="mocha"></div> |
||||||
|
<script src="../node_modules/mocha/mocha.js"></script> |
||||||
|
<script src="../node_modules/expect.js/index.js"></script> |
||||||
|
<script src="../dist/tesseract.dev.js"></script> |
||||||
|
<script src="./constants.js"></script> |
||||||
|
<script>mocha.setup('bdd');</script> |
||||||
|
<script src="./FS.test.js"></script> |
||||||
|
<script> |
||||||
|
mocha.run(); |
||||||
|
</script> |
||||||
|
</body> |
||||||
|
</html> |
@ -0,0 +1,37 @@ |
|||||||
|
const { createWorker } = Tesseract; |
||||||
|
const FS_WAIT = 500; |
||||||
|
const worker = createWorker(OPTIONS); |
||||||
|
before(function cb() { |
||||||
|
this.timeout(0); |
||||||
|
return worker.load(); |
||||||
|
}); |
||||||
|
|
||||||
|
describe('FS', async () => { |
||||||
|
it('should write and read text from FS (using FS only)', () => { |
||||||
|
[ |
||||||
|
SIMPLE_TEXT, |
||||||
|
].forEach(async (text) => { |
||||||
|
const path = 'tmp.txt'; |
||||||
|
await worker.FS('writeFile', [path, SIMPLE_TEXT]); |
||||||
|
setTimeout(async () => { |
||||||
|
const { data } = await worker.FS('readFile', [path]); |
||||||
|
await worker.FS('unlink', [path]); |
||||||
|
expect(data.toString()).to.be(text); |
||||||
|
}, FS_WAIT); |
||||||
|
}); |
||||||
|
}).timeout(TIMEOUT); |
||||||
|
|
||||||
|
it('should write and read text from FS (using writeFile, readFile)', () => { |
||||||
|
[ |
||||||
|
SIMPLE_TEXT, |
||||||
|
].forEach(async (text) => { |
||||||
|
const path = 'tmp2.txt'; |
||||||
|
await worker.writeText(path, SIMPLE_TEXT); |
||||||
|
setTimeout(async () => { |
||||||
|
const { data } = await worker.readText(path); |
||||||
|
await worker.removeFile(path); |
||||||
|
expect(data.toString()).to.be(text); |
||||||
|
}, FS_WAIT); |
||||||
|
}); |
||||||
|
}).timeout(TIMEOUT); |
||||||
|
}); |
After Width: | Height: | Size: 91 KiB |
After Width: | Height: | Size: 2.4 KiB |
After Width: | Height: | Size: 13 KiB |
After Width: | Height: | Size: 169 KiB |
After Width: | Height: | Size: 1011 B |
After Width: | Height: | Size: 4.9 KiB |
After Width: | Height: | Size: 1.3 KiB |
After Width: | Height: | Size: 3.7 KiB |
After Width: | Height: | Size: 69 KiB |