Compare commits
661 Commits
Author | SHA1 | Date |
---|---|---|
Réda Housni Alaoui | 80aef15861 | 2 years ago |
Balearica | 263dbb87b1 | 2 years ago |
Balearica | c9200839df | 2 years ago |
Balearica | 1b87e30ae3 | 2 years ago |
Balearica | bce7cd84fe | 2 years ago |
Balearica | 2c77f33461 | 2 years ago |
Balearica | d8b29522c4 | 2 years ago |
Balearica | dd6c40b681 | 2 years ago |
Balearica | 5ff17fdeb1 | 2 years ago |
Balearica | 363690a421 | 2 years ago |
Balearica | b64eba3db0 | 2 years ago |
Balearica | 85e73216be | 2 years ago |
Balearica | b419e45114 | 2 years ago |
Balearica | ea33463120 | 2 years ago |
Balearica | 90c8d99b3c | 2 years ago |
Balearica | 0e368c69d6 | 2 years ago |
Balearica | ba394673bd | 2 years ago |
Balearica | 44d322e6ff | 2 years ago |
Balearica | e3c4a6bc6e | 2 years ago |
Balearica | f372818146 | 2 years ago |
Balearica | 8b567609e3 | 2 years ago |
Balearica | 13b95f6371 | 2 years ago |
Balearica | a9ac00ccac | 2 years ago |
Balearica | 75ddd63041 | 2 years ago |
Balearica | 1136e0a941 | 2 years ago |
Balearica | 2e478bd8a5 | 2 years ago |
WintrySnowman | 67848464ac | 2 years ago |
Balearica | be956cd889 | 2 years ago |
Balearica | 61d0e553c6 | 2 years ago |
Balearica | 74be03c5b9 | 2 years ago |
Balearica | 9442d9cb69 | 2 years ago |
Your Name | 6aba9599ec | 2 years ago |
Your Name | 58d28944d3 | 2 years ago |
Balearica | a8287a99aa | 2 years ago |
Balearica | 66085a7d70 | 2 years ago |
Susan Cheng | 50a53f51d9 | 3 years ago |
Andrei Alecu | 01e8335768 | 3 years ago |
jeromewu | adcb5b8759 | 3 years ago |
Jerome Wu | 294ced5c85 | 4 years ago |
jeromewu | 90466c3b55 | 4 years ago |
jeromewu | 7b7f9afaa6 | 4 years ago |
stonefruit | 8f2c33fd89 | 4 years ago |
Ilya | 83a424f9b9 | 4 years ago |
miguelm3 | bf43f447b6 | 4 years ago |
miguelm3 | dec119fa7b | 4 years ago |
jeromewu | 909263b94b | 4 years ago |
jeromewu | eb287e763a | 4 years ago |
Bert Verhelst | 1781487273 | 4 years ago |
Jerome Wu | 69355a7c07 | 4 years ago |
jeromewu | 4f3aa3145c | 4 years ago |
Jerome Wu | ed016bdc26 | 4 years ago |
Jerome Wu | 804c238950 | 4 years ago |
Jerome Wu | 88290dc541 | 4 years ago |
Jerome Wu | 920dd902f8 | 4 years ago |
Jerome Wu | e883f87ab7 | 4 years ago |
Jerome Wu | 34839a7a0c | 4 years ago |
Jerome Wu | e68d520155 | 4 years ago |
Jerome Wu | d98eeab91f | 4 years ago |
jeromewu | c2f4ecef9a | 4 years ago |
Jerome Wu | eb6b8594d3 | 4 years ago |
Jerome Wu | afadfffa6a | 4 years ago |
jeromewu | ccb8ec3d67 | 4 years ago |
jeromewu | d64ebcaea1 | 4 years ago |
jeromewu | 341360bf44 | 4 years ago |
jeromewu | 2cd68fb97e | 4 years ago |
jeromewu | 066c59d1f9 | 4 years ago |
Jerome Wu | 820c91fa52 | 4 years ago |
Cédric Couton | 3f98fdeb0f | 4 years ago |
abhishek7553 | cd08357833 | 4 years ago |
Jerome Wu | 2bbd1e896c | 4 years ago |
Jerome Wu | 5bad2e68ce | 4 years ago |
Jerome Wu | 59392e96ff | 4 years ago |
dependabot[bot] | 0f9cecd544 | 4 years ago |
Jerome Wu | 13ab9cec70 | 4 years ago |
dependabot[bot] | 389bf71381 | 4 years ago |
Jerome Wu | 6481256f5e | 4 years ago |
Jerome Wu | 5e295b75b6 | 4 years ago |
Jerome Wu | f3cbc3ee43 | 4 years ago |
jeromewu | 3e97749585 | 4 years ago |
dependabot[bot] | 8435f4c6e2 | 5 years ago |
jeromewu | 6ec5a5a092 | 5 years ago |
dependabot[bot] | d3cf791e6a | 5 years ago |
jeromewu | cc1f2bfe81 | 5 years ago |
jeromewu | fcd01ceb32 | 5 years ago |
Ivan Schneider | 6ef2e116cd | 5 years ago |
jeromewu | 885908f22e | 5 years ago |
Joshua Mack | 6df9ef2f1f | 5 years ago |
Joshua Mack | bb7d7093f4 | 5 years ago |
jeromewu | 40051c6761 | 5 years ago |
Connor Adams | 0ce8e139ab | 5 years ago |
rogerxaic | b221071b8e | 5 years ago |
rogerxaic | a54dbc345b | 5 years ago |
jeromewu | 1df208ffab | 5 years ago |
jeromewu | 7a398f6420 | 5 years ago |
jeromewu | ecf03503f1 | 5 years ago |
jeromewu | 0c7ade85e3 | 5 years ago |
jeromewu | d6e434338f | 5 years ago |
jeromewu | d37f045e99 | 5 years ago |
jeromewu | 50df652f33 | 5 years ago |
jeromewu | 1ff4b79f75 | 5 years ago |
jeromewu | 93dab17bf1 | 5 years ago |
Jerome Wu | 20cd04a848 | 5 years ago |
Jerome Wu | 363018bffe | 5 years ago |
Jerome Wu | 89d0a6b12d | 5 years ago |
Jerome Wu | 25d6664937 | 5 years ago |
jeromewu | a008d0780a | 5 years ago |
jeromewu | 0778add1c4 | 5 years ago |
jeromewu | 331fbbe6be | 5 years ago |
dependabot[bot] | 6d9adfd2ce | 5 years ago |
Jerome Wu | 3d456e3eb0 | 5 years ago |
Jerome Wu | 205178646b | 5 years ago |
Jerome Wu | 40aea6d886 | 5 years ago |
Jerome Wu | 6f8e69206b | 5 years ago |
Jerome Wu | c5935b2636 | 5 years ago |
jeromewu | 5a40559db2 | 5 years ago |
jeromewu | 023d484209 | 5 years ago |
jeromewu | 641d0c5fe4 | 5 years ago |
Nir | a56de79f22 | 5 years ago |
jeromewu | b418554cf0 | 5 years ago |
jeromewu | f8aa46ec9d | 5 years ago |
Tomasz Ferens | 13c16f472e | 5 years ago |
jeromewu | 247a1635f8 | 5 years ago |
Jerome Wu | 66e2ce842e | 5 years ago |
jeromewu | 2956afc013 | 5 years ago |
jeromewu | 781f2f80ba | 5 years ago |
jeromewu | 548a5a5142 | 5 years ago |
Nisar Hassan Naqvi | 9b91579b16 | 5 years ago |
jeromewu | 7cc3427a38 | 5 years ago |
Oliver Cole | 71dc2a80ed | 5 years ago |
jeromewu | 9db6ebf36e | 5 years ago |
Yvonnick FRIN | db3eed3915 | 5 years ago |
Jerome Wu | e975d29f38 | 5 years ago |
Jerome Wu | f9c76a9b7f | 5 years ago |
Jerome Wu | 945f5d3f35 | 5 years ago |
Jerome Wu | b8aba2eddd | 5 years ago |
Jerome Wu | b603d42547 | 5 years ago |
Jerome Wu | c7a74a6575 | 5 years ago |
Jerome Wu | 3bb543d3cd | 5 years ago |
Jerome Wu | fa5b267f17 | 5 years ago |
Jerome Wu | 15b7983619 | 5 years ago |
Jerome Wu | b2bc416dd2 | 5 years ago |
Jerome Wu | a5fa14cc3b | 5 years ago |
Jerome Wu | 2aba7285e9 | 5 years ago |
jeromewu | 1051b2ab41 | 5 years ago |
jeromewu | 5c22cd52bd | 5 years ago |
Andrea Giammarchi | 5a7576ceb4 | 5 years ago |
jeromewu | 2db7607496 | 5 years ago |
Andrea Giammarchi | 953689fedf | 5 years ago |
jeromewu | 0e8aee660f | 5 years ago |
jeromewu | 625ded7921 | 5 years ago |
jeromewu | 73147afd87 | 5 years ago |
jeromewu | 1a781a6ae4 | 5 years ago |
dependabot[bot] | 9c62422573 | 5 years ago |
jeromewu | 1fddc9a86c | 5 years ago |
jeromewu | 4aaf462564 | 5 years ago |
jeromewu | 7b06e0edcb | 5 years ago |
jeromewu | 663be93420 | 5 years ago |
Charles Francoise | d51d7bd8ac | 5 years ago |
Charles Francoise | 67ccfcfb6e | 5 years ago |
Charles Francoise | ebc56a1899 | 5 years ago |
Charles Francoise | bcdcace9ec | 5 years ago |
jeromewu | dd61663b31 | 5 years ago |
dependabot[bot] | f524945084 | 5 years ago |
dependabot[bot] | 7663c7c589 | 5 years ago |
dependabot[bot] | 571c8c2452 | 5 years ago |
dependabot[bot] | 775e48176c | 5 years ago |
dependabot[bot] | 4182c5d55a | 5 years ago |
jeromewu | a97e2dad98 | 5 years ago |
Johan Gustafsson | 8d926873b3 | 5 years ago |
Tomasz Martyński | b588e9f618 | 5 years ago |
jeromewu | 086c7a507a | 5 years ago |
Anthony Ng | 2de597c1a9 | 5 years ago |
Jerome Wu | 6fd12b5012 | 5 years ago |
Jerome Wu | 15da58127d | 5 years ago |
Jerome Wu | 1d28cc5e52 | 5 years ago |
Jerome Wu | 6d772c7070 | 5 years ago |
Jerome Wu | e7a57363c2 | 5 years ago |
jeromewu | fd53a9d29e | 5 years ago |
jeromewu | 06967cb7f3 | 5 years ago |
jeromewu | 61a7531acb | 5 years ago |
Jerome Wu | dc83e6f437 | 5 years ago |
jeromewu | e9017f3ed6 | 5 years ago |
Jerome Wu | 41d82da35b | 5 years ago |
Jerome Wu | d947f6e554 | 5 years ago |
Jerome Wu | 5db60be949 | 5 years ago |
Jason | 7c5172846b | 5 years ago |
Jerome Wu | 6021eca27a | 5 years ago |
Jerome Wu | b5d0651698 | 5 years ago |
Jerome Wu | 93186a5dc7 | 5 years ago |
Jerome Wu | c124f7767d | 5 years ago |
Jerome Wu | d6ada618a1 | 5 years ago |
Jerome Wu | b59d144af3 | 5 years ago |
Jerome Wu | 7f485c4461 | 5 years ago |
Jerome Wu | 3c1fba8fee | 5 years ago |
Jerome Wu | 22d16c0e65 | 5 years ago |
Jerome Wu | 0ca4f78565 | 5 years ago |
Jerome Wu | 369e794afa | 5 years ago |
Jerome Wu | 0e968d1829 | 5 years ago |
Jerome Wu | d1c4a33c9e | 5 years ago |
Jerome Wu | a470b836d5 | 5 years ago |
Jerome Wu | a31424e3e7 | 5 years ago |
Jerome Wu | 6d7eb50429 | 5 years ago |
Jerome Wu | 4100c0ed7a | 5 years ago |
Jerome Wu | 3ec7092105 | 5 years ago |
Jerome Wu | 97fa545936 | 5 years ago |
Jerome Wu | 142f4dd92d | 5 years ago |
Jerome Wu | 8fcbfecb2f | 5 years ago |
Jerome Wu | c028a8e833 | 5 years ago |
Jerome Wu | f4ddbc49b3 | 5 years ago |
Jerome Wu | 740f2cab0c | 5 years ago |
Jerome Wu | b76b7b8383 | 5 years ago |
jeromewu | 06539f7405 | 5 years ago |
York Yao | c4b53040f5 | 5 years ago |
York Yao | 7c8f4f181b | 5 years ago |
jeromewu | b6e9d6c6a2 | 5 years ago |
Jerome Wu | 6fb2195b75 | 5 years ago |
Jerome Wu | ad668166fd | 5 years ago |
Jerome Wu | 4aaeeee0a3 | 5 years ago |
Jerome Wu | 01ae9e8e36 | 5 years ago |
Jerome Wu | ce4d4d51be | 5 years ago |
Thibaut Girka | eb288fb885 | 6 years ago |
Jerome Wu | 9f1e7829d3 | 6 years ago |
jeromewu | e80056d304 | 6 years ago |
jeromewu | f655fbbacd | 6 years ago |
Blune | ba99279eef | 6 years ago |
jeromewu | 9a4f0b7eaa | 6 years ago |
Seven Lju | d2a68d5a90 | 6 years ago |
Jerome Wu | f1c4ebe2e4 | 6 years ago |
Jerome Wu | 8bc9f64177 | 6 years ago |
Jerome Wu | 26ab198a84 | 6 years ago |
Jerome Wu | f3786d8e2b | 6 years ago |
Jerome Wu | bdef5b4bb5 | 6 years ago |
jeromewu | 3c231b6d2b | 6 years ago |
nisarhassan12 | b1b41b63de | 6 years ago |
Jess | a55f68c3be | 6 years ago |
Jess | 41b4be50fc | 6 years ago |
jeromewu | 1171c7b670 | 6 years ago |
nisarhassan12 | 6a5a83790e | 6 years ago |
Jerome Wu | c87746ec5d | 6 years ago |
jeromewu | 7bd5fda7ff | 6 years ago |
jeromewu | 5cd2c083eb | 6 years ago |
jeromewu | 2a2d05150d | 6 years ago |
nisarhassan12 | 5cf6fc78ce | 6 years ago |
nisarhassan12 | 90bccf7eca | 6 years ago |
Jerome Wu | fb60a2389f | 6 years ago |
Jerome Wu | 94dc428e98 | 6 years ago |
Jerome Wu | 7cdeb6cacf | 6 years ago |
Jerome Wu | 57ae937646 | 6 years ago |
Jerome Wu | bc51e340cb | 6 years ago |
Jerome Wu | 5dd382a67a | 6 years ago |
jeromewu | c9444f3262 | 6 years ago |
Jerome Wu | b7b2148b71 | 6 years ago |
jeromewu | 42012d663a | 6 years ago |
Jerome Wu | 244a4802b0 | 6 years ago |
Jerome Wu | 7f406f438d | 6 years ago |
Jerome Wu | d3be9d662d | 6 years ago |
Jerome Wu | ed0c5c9e06 | 6 years ago |
Jerome Wu | 0d0e795b93 | 6 years ago |
jeromewu | 7f2d736baf | 6 years ago |
Ravi van Rooijen | 7ef9ad95f6 | 6 years ago |
antonrifco | 7566d3550f | 6 years ago |
Tom MacWright | d0803e4dab | 6 years ago |
Tom MacWright | eedabf1a05 | 6 years ago |
Rafael Cruz | 3f1847c05f | 6 years ago |
jeromewu | 92521d6276 | 6 years ago |
Jerome Wu | cc8fce8baf | 6 years ago |
jeromewu | 74f5b639b0 | 6 years ago |
jeromewu | 288905e6be | 6 years ago |
jeromewu | 1e8ea8f465 | 6 years ago |
Ravi van Rooijen | a2402eaeae | 6 years ago |
Jerome Wu | 2ac6b9828e | 6 years ago |
Jerome Wu | a1512f61a4 | 6 years ago |
Jerome Wu | af115e4940 | 6 years ago |
Jerome Wu | 84a9b80c46 | 6 years ago |
Jerome Wu | cc61520fe4 | 6 years ago |
Jerome Wu | c8fe295c94 | 6 years ago |
Jerome Wu | 559cabd69c | 6 years ago |
Jerome Wu | 26d3aecbcc | 6 years ago |
Jerome Wu | a1c851d6c5 | 6 years ago |
Jerome Wu | 53e9298ab6 | 6 years ago |
Jerome Wu | 9c854a4afa | 6 years ago |
Jerome Wu | 698a7d60b2 | 6 years ago |
Jerome Wu | 313e699765 | 6 years ago |
Jerome Wu | 4f14b4b79c | 6 years ago |
Jerome Wu | de4b98ae23 | 6 years ago |
Jerome Wu | 46e84ad95d | 6 years ago |
Jerome Wu | 936cfa6f48 | 6 years ago |
Jerome Wu | 87cff487e2 | 6 years ago |
Jerome Wu | 462f641482 | 6 years ago |
Jerome Wu | 3fa475195e | 6 years ago |
Jerome Wu | 6503dd4e73 | 6 years ago |
Jerome Wu | 080151711b | 6 years ago |
Jerome Wu | 269629d601 | 6 years ago |
jeromewu | 3905ecf600 | 6 years ago |
jeromewu | 46196f9afa | 6 years ago |
Jerome Wu | d41d0ce4a7 | 6 years ago |
Jerome Wu | 5e383d7a14 | 6 years ago |
Jerome Wu | 0f28382dcc | 6 years ago |
Jerome Wu | 794a17472a | 6 years ago |
Jerome Wu | 51e89ed8e5 | 6 years ago |
Jerome Wu | 154b600670 | 6 years ago |
Jerome Wu | 0b48d820b8 | 6 years ago |
Jerome Wu | e32272ea2e | 6 years ago |
Jerome Wu | 4ab9094a50 | 6 years ago |
Jerome Wu | 3cbfb044a1 | 6 years ago |
Jerome Wu | f1c9e2f08c | 6 years ago |
Jerome Wu | 4136b586b2 | 6 years ago |
Jerome Wu | 3450e28087 | 6 years ago |
Jerome Wu | b6e4f30967 | 6 years ago |
Jerome Wu | 8e73c8d1e1 | 6 years ago |
Jerome Wu | ef4fd6c61f | 6 years ago |
jeromewu | f576fca12f | 6 years ago |
Jerome Wu | dcf186b047 | 6 years ago |
Jerome Wu | 824e4710e2 | 6 years ago |
Jerome Wu | 453061d9ae | 6 years ago |
Jerome Wu | 53be7e5726 | 6 years ago |
Jerome Wu | dbd1e6b4de | 6 years ago |
jeromewu | 697b8bf79a | 6 years ago |
jeromewu | 6a44230162 | 6 years ago |
Jerome Wu | 7cff0bb097 | 6 years ago |
Jerome Wu | 6e5c09bac7 | 6 years ago |
Jerome Wu | 0b528af40f | 6 years ago |
Jerome Wu | 969b605713 | 6 years ago |
Jerome Wu | 31cf08e2e2 | 6 years ago |
Jerome Wu | 154f3f60db | 6 years ago |
Jerome Wu | f32f2cabd1 | 6 years ago |
Jerome Wu | 4624c6f404 | 6 years ago |
Jerome Wu | 8f0eeb9f84 | 6 years ago |
Jerome Wu | b322febf97 | 6 years ago |
Jerome Wu | 9206c022ad | 6 years ago |
Jerome Wu | b488097551 | 6 years ago |
Jerome Wu | a873bbbc80 | 6 years ago |
Jerome Wu | 30f36a0f76 | 6 years ago |
Jerome Wu | 38e0d0087e | 6 years ago |
Jerome Wu | 584205372d | 6 years ago |
Jerome Wu | 08be88c890 | 6 years ago |
Jerome Wu | 6ee38849d2 | 6 years ago |
Jerome Wu | d8a7bb8ba4 | 6 years ago |
Jerome Wu | 566b105c19 | 6 years ago |
Jerome Wu | 3809498e5f | 6 years ago |
Jerome Wu | 10583c57e4 | 6 years ago |
Jerome Wu | 8a3e0a6e28 | 6 years ago |
jeromewu | 9236ff0ebb | 6 years ago |
Caeman Toombs | 0ae0f71b5e | 6 years ago |
Jerome Wu | 9893557862 | 6 years ago |
Jerome Wu | 9aafe4aa24 | 6 years ago |
Jerome Wu | 71a9cb9210 | 6 years ago |
Jerome Wu | cfb7d5673c | 6 years ago |
Jerome Wu | 62a67ac430 | 6 years ago |
Jerome Wu | f6714f0055 | 6 years ago |
Jerome Wu | b1c30dfad9 | 6 years ago |
Jerome Wu | 69042fc1bf | 6 years ago |
Jerome Wu | ce520007ec | 6 years ago |
Jerome Wu | 40740db822 | 6 years ago |
Jerome Wu | 8146655c55 | 6 years ago |
Jerome Wu | ad918d349a | 6 years ago |
Jerome Wu | b4c6565acf | 6 years ago |
Jerome Wu | 3b2b8bd2b8 | 6 years ago |
Jerome Wu | a4fdded574 | 6 years ago |
Jerome Wu | a45e35467e | 6 years ago |
Jerome Wu | 7fa06577d5 | 6 years ago |
Jerome Wu | b843f08705 | 6 years ago |
Jerome Wu | b31c45faf3 | 6 years ago |
Jerome Wu | e0b263b88b | 6 years ago |
Jerome Wu | f488f4d204 | 6 years ago |
Jerome Wu | 098c070e6d | 6 years ago |
Jerome Wu | 12eaac4d2b | 6 years ago |
Jerome Wu | 07b45ec0b5 | 6 years ago |
Jerome Wu | 259562a422 | 6 years ago |
Jerome Wu | ec1a81e72d | 6 years ago |
Jerome Wu | 38b107b094 | 6 years ago |
Jerome Wu | e9cde564e5 | 6 years ago |
Jerome Wu | af05a27d35 | 6 years ago |
Jerome Wu | 84bde05028 | 6 years ago |
Jerome Wu | 30d3421d15 | 6 years ago |
Jerome Wu | 2b3221fbfe | 6 years ago |
Jerome Wu | 6dcef53dbc | 6 years ago |
Jerome Wu | 876bfc5f9a | 6 years ago |
Jerome Wu | 17b103746e | 6 years ago |
Jerome Wu | 1e80e42a6f | 6 years ago |
Jerome Wu | c6ea108325 | 6 years ago |
jeromewu | cccd9baea3 | 6 years ago |
Jerome Wu | e5c693a43c | 6 years ago |
Jerome Wu | a0e1764813 | 6 years ago |
jeromewu | a9c2fe6686 | 6 years ago |
jeromewu | 3e29fa6420 | 6 years ago |
Jerome Wu | 5f5072aca4 | 6 years ago |
Jerome Wu | 84612f2772 | 6 years ago |
jeromewu | 2fc29127db | 6 years ago |
jeromewu | 28f54693f2 | 6 years ago |
jeromewu | 281b59fe26 | 6 years ago |
jeromewu | 343246f2ca | 6 years ago |
jeromewu | 0d07f57ec5 | 6 years ago |
jeromewu | a4f69237a4 | 6 years ago |
Jerome Wu | 464565e335 | 6 years ago |
Jerome Wu | 6132f2169d | 6 years ago |
Jerome Wu | d2ab2a1486 | 6 years ago |
Jerome Wu | 244a948991 | 6 years ago |
jeromewu | 797de3aae4 | 6 years ago |
Jerome Wu | eb50d77354 | 6 years ago |
Jerome Wu | 64e7dea45c | 6 years ago |
Jerome Wu | 5841c6bbfd | 6 years ago |
Jerome Wu | a900000254 | 6 years ago |
Jerome Wu | 8b56811de7 | 6 years ago |
jeromewu | 7701c2db38 | 6 years ago |
jeromewu | 418fc4c4ff | 6 years ago |
swifmaneum | 6ad1810996 | 7 years ago |
Carlos Ferreira | fbe920d66e | 7 years ago |
Sukmin Lim | a62e4b832b | 7 years ago |
yonmey | 2cb3d92f79 | 7 years ago |
Jerome Wu | 2581b662dc | 7 years ago |
jeromewu | 16aaea19a9 | 7 years ago |
jeromewu | 0ed7584ddb | 7 years ago |
Jerome Wu | 3e23f2b602 | 7 years ago |
jeromewu | 9d6117ee7a | 7 years ago |
Jerome Wu | 46a37aa07a | 7 years ago |
George Chung | ffe2360426 | 7 years ago |
IlyaRadinsky | 958c6a394e | 7 years ago |
Kevin Kwok | 799eafab90 | 7 years ago |
Kevin Kwok | 8e9f7861ac | 7 years ago |
Michael Hadley | 69678dd86b | 8 years ago |
Michael Hadley | 724f088328 | 8 years ago |
Michael Hadley | 4bff80585c | 8 years ago |
Michael Hadley | abcb593971 | 8 years ago |
mohammad reza | 142ee885f2 | 8 years ago |
James | 0cd348dfbe | 8 years ago |
Trevor Healy | 27fa9156ad | 8 years ago |
Yevhen Tiurin | 8350b5345f | 8 years ago |
Kevin Kwok | b99bf1cee5 | 8 years ago |
Rainer Eli | 157d9b0e0a | 8 years ago |
Kevin Kwok | d7ba0e32a8 | 8 years ago |
Kevin Kwok | 61447d8ebd | 8 years ago |
Guillermo | f4289d124e | 8 years ago |
Guillermo | c27f07a292 | 8 years ago |
Guillermo | beabea0c71 | 8 years ago |
Kevin Kwok | d079a59b00 | 8 years ago |
Varun Shenoy | 5e6e6d4ceb | 8 years ago |
Kütbach Christian | 5a0d8896fd | 8 years ago |
Guillermo | 15bb5c4145 | 8 years ago |
Kütbach Christian | 4244d89998 | 8 years ago |
Kütbach Christian | 8c265dbce5 | 8 years ago |
Kütbach Christian | 01c7c4a16f | 8 years ago |
Guillermo | d08ac24b82 | 8 years ago |
Kiran Kota | 91f9be7e81 | 8 years ago |
Guillermo | 21fe5729eb | 8 years ago |
Guillermo | e01c63e864 | 8 years ago |
Kevin Kwok | fc15b0ef43 | 8 years ago |
Kevin Kwok | afabe9eab8 | 8 years ago |
Kevin Kwok | 1078830628 | 8 years ago |
Guillermo | 5a536f1599 | 8 years ago |
Guillermo | 2f3cec857d | 8 years ago |
Guillermo | f18b01fd2a | 8 years ago |
Guillermo | 8b42b232cd | 8 years ago |
Guillermo | 30d252e6f6 | 8 years ago |
Guillermo Webster | 221908f3ff | 8 years ago |
Romina Suarez | 29ff2f283c | 8 years ago |
Guillermo Webster | e8e19d383b | 8 years ago |
Romina Suarez | 39f5fcc17a | 8 years ago |
Guillermo | 35c546d062 | 8 years ago |
Guillermo | 52ddac57f3 | 8 years ago |
Kevin Kwok | 3645946a5d | 8 years ago |
hemanth.hm | 64d2b8cb85 | 8 years ago |
Rohit Jha | 7fb328e3e6 | 8 years ago |
bijection | 915fadb9bc | 8 years ago |
Hemanth.HM | 32565dc725 | 8 years ago |
Kevin Kwok | ba2841329d | 8 years ago |
Kevin Kwok | a5cde75980 | 8 years ago |
Kevin Kwok | 15058e036c | 8 years ago |
Kevin Kwok | c09d4655b5 | 8 years ago |
Kevin Kwok | 9d13b2339f | 8 years ago |
Kevin Kwok | 2b141f8aa5 | 8 years ago |
Kevin Kwok | acc2b835ce | 8 years ago |
Kevin Kwok | 2733c9a030 | 8 years ago |
Kevin Kwok | 9a9db10771 | 8 years ago |
Kevin Kwok | ea54eaa23f | 8 years ago |
Guillermo | 752038921e | 8 years ago |
Kevin Kwok | b20c4a8c6e | 8 years ago |
bijection | 46add665c0 | 8 years ago |
bijection | d8a30989c0 | 8 years ago |
Hector Leon Zarco Garcia | 122b74d7d7 | 8 years ago |
Your Name | b04e4ccf55 | 8 years ago |
Your Name | 91b326981f | 8 years ago |
Nick Heindl | 0e50365683 | 8 years ago |
bijection | 471ed0997a | 8 years ago |
bijection | b38b3a8c1f | 8 years ago |
bijection | e9735cdc9c | 8 years ago |
Hector Leon Zarco Garcia | 800c83440d | 8 years ago |
Hector Leon Zarco Garcia | 87643df023 | 8 years ago |
Andrew Han | 5a71378efc | 8 years ago |
Andrew Murray | 162deeebdb | 8 years ago |
bijection | b3cc0c0293 | 8 years ago |
Andre Bluehs | 7a47cdaa98 | 8 years ago |
Guillermo | 95769915be | 8 years ago |
Guillermo | 3fb3d2f45b | 8 years ago |
Guillermo | edfa1d8b4f | 8 years ago |
Guillermo | 7a872d0367 | 8 years ago |
Guillermo | c26cae7ee9 | 8 years ago |
bijection | 8434f59977 | 8 years ago |
Guillermo | 8585b7fa83 | 8 years ago |
Guillermo | 287e8289d4 | 8 years ago |
Kevin Kwok | 41a7b4d427 | 8 years ago |
Kevin Kwok | 048384951f | 8 years ago |
Kevin Kwok | ada7e8364c | 8 years ago |
Kevin Kwok | 5886ebba1f | 8 years ago |
Kevin Kwok | 681d08c1d5 | 8 years ago |
Kevin Kwok | 1ef222cca8 | 8 years ago |
Kevin Kwok | 16685bab65 | 8 years ago |
Kevin Kwok | 45aa21603c | 8 years ago |
Kevin Kwok | 872284b112 | 8 years ago |
Kevin Kwok | 8e88eaca5e | 8 years ago |
Kevin Kwok | 1a7429d022 | 8 years ago |
Kevin Kwok | 7bf3725fc9 | 8 years ago |
Kevin Kwok | 036f8debfc | 8 years ago |
Guillermo | ab13d7b891 | 8 years ago |
Guillermo | d8f3db3d91 | 8 years ago |
Guillermo | d296f16314 | 8 years ago |
Guillermo | 22b3ab8423 | 8 years ago |
Guillermo | 34d4fb799d | 8 years ago |
Guillermo | 0acbb71e66 | 8 years ago |
Guillermo | e8a286988f | 8 years ago |
Guillermo | e5c8df3aac | 8 years ago |
Guillermo | 274c54db1e | 8 years ago |
Guillermo | c61ee098d7 | 8 years ago |
Guillermo | c7f6364710 | 8 years ago |
Guillermo | 2fa9421ced | 8 years ago |
Guillermo | 4b43a0ef04 | 8 years ago |
Guillermo | 3042a8ca06 | 8 years ago |
Guillermo | f67374a03c | 8 years ago |
Guillermo | 7675a98673 | 8 years ago |
Kevin Kwok | dbf43bbe60 | 8 years ago |
Kevin Kwok | 090d0c786e | 8 years ago |
Kevin Kwok | 1331ef8f18 | 8 years ago |
Kevin Kwok | cb6419203f | 8 years ago |
Kevin Kwok | c2a06b4b12 | 8 years ago |
Kevin Kwok | 16d9e9d088 | 8 years ago |
Kevin Kwok | 8817639572 | 8 years ago |
Kevin Kwok | b7dd138ea4 | 8 years ago |
Guillermo | c5e3aeb5ef | 8 years ago |
Guillermo | e25f31aac5 | 8 years ago |
Kevin Kwok | 4d120482ba | 8 years ago |
Kevin Kwok | 973ed2fed5 | 8 years ago |
Kevin Kwok | 19ac89478a | 8 years ago |
Kevin Kwok | 7c66c3844c | 8 years ago |
Kevin Kwok | 699c029e89 | 8 years ago |
Kevin Kwok | c5a1726e1d | 8 years ago |
Guillermo | b5267918fd | 8 years ago |
Guillermo | d16101b686 | 8 years ago |
Guillermo | ca7f4966e4 | 8 years ago |
Guillermo | 9b1d09a72e | 8 years ago |
Guillermo | 5972594575 | 8 years ago |
Guillermo | 0c6f96de8d | 8 years ago |
Guillermo | 6a92dd3a94 | 8 years ago |
Guillermo | 5ee01d4265 | 8 years ago |
Guillermo | c88a769818 | 8 years ago |
Guillermo | 9644f87640 | 8 years ago |
Guillermo | 7d978ac813 | 8 years ago |
Guillermo | 26c0f4cc30 | 8 years ago |
Guillermo | f8b9d2e70e | 8 years ago |
Guillermo | b1aa51da67 | 8 years ago |
Guillermo | 87858dd8b6 | 8 years ago |
Guillermo | b09665cf22 | 8 years ago |
Guillermo | fb476b24b5 | 8 years ago |
Guillermo | 628febf0a6 | 8 years ago |
Guillermo | a770a67911 | 8 years ago |
Guillermo | 3501505423 | 8 years ago |
Kevin Kwok | 733328e99b | 8 years ago |
Guillermo | d31667831a | 8 years ago |
Guillermo | df676ff110 | 8 years ago |
Guillermo | 1594f7528d | 8 years ago |
Guillermo | 82ea60d2c3 | 8 years ago |
Guillermo | 3216553d62 | 8 years ago |
Guillermo | 8cdbecd085 | 8 years ago |
Guillermo | 549b2c1238 | 8 years ago |
Guillermo | 16b120b178 | 8 years ago |
Guillermo | d09e1930fc | 8 years ago |
Guillermo | 4eed6f5137 | 8 years ago |
Guillermo | 21b3136a83 | 8 years ago |
Guillermo | a91acffec6 | 8 years ago |
Guillermo | 2e6757da81 | 8 years ago |
Guillermo | 0e88969f0a | 8 years ago |
Guillermo | 905427fe95 | 8 years ago |
Guillermo | debcb1cd7a | 8 years ago |
Guillermo | 491cc2a312 | 8 years ago |
Guillermo | 0ee94b2169 | 8 years ago |
Guillermo | 4a8ee3729d | 8 years ago |
Guillermo | 2a98b0ac07 | 8 years ago |
Guillermo | 32f62de7ad | 8 years ago |
Guillermo | 886a3d6704 | 8 years ago |
Guillermo | 2f4e85237d | 8 years ago |
Guillermo | dac0fdea3e | 8 years ago |
Guillermo | 9aac85ac4f | 8 years ago |
Guillermo | dff5b07613 | 8 years ago |
Guillermo | 4eda874325 | 8 years ago |
Guillermo | a91dce8b16 | 8 years ago |
Guillermo | 2b679123fa | 8 years ago |
Guillermo | d23b6d8728 | 8 years ago |
Guillermo | 37e6de16bb | 8 years ago |
Guillermo | c80038b6cb | 8 years ago |
Guillermo | 4741312b3e | 8 years ago |
Guillermo | 53b6c3ab6e | 8 years ago |
Guillermo | c4ceacea5f | 8 years ago |
Guillermo | b7bec0f1b1 | 8 years ago |
Guillermo | 3dc7322e67 | 8 years ago |
Guillermo | fbb830e061 | 8 years ago |
Guillermo | 0e8094afa7 | 8 years ago |
Guillermo | f5fc87638c | 8 years ago |
Guillermo | 2e657b9f81 | 8 years ago |
Guillermo | 93e5b706b3 | 8 years ago |
Guillermo | 94ca863eb6 | 8 years ago |
Guillermo | 15820de3b2 | 8 years ago |
Guillermo | 6117345bec | 8 years ago |
Guillermo | 1a6c9236bc | 8 years ago |
Guillermo | 01238cb300 | 8 years ago |
Guillermo | 1e9b4748d4 | 8 years ago |
Guillermo | 28bf468cb4 | 8 years ago |
Guillermo | 20fea90b05 | 8 years ago |
Guillermo | 35f450efca | 8 years ago |
Guillermo | b130433437 | 8 years ago |
Guillermo | 1fabf60105 | 10 years ago |
Guillermo | b26adef7c6 | 10 years ago |
Guillermo | 52e64abcd9 | 10 years ago |
Guillermo | 6f219d7ed7 | 10 years ago |
Guillermo | 167746a418 | 10 years ago |
Guillermo | 6c539e5d60 | 10 years ago |
Guillermo | 4648367569 | 10 years ago |
Guillermo | 9b38d4f91b | 10 years ago |
Guillermo | 4738187d5b | 10 years ago |
Guillermo | e9423bb032 | 10 years ago |
Guillermo | d7c713386d | 10 years ago |
Guillermo | 27150fa4cd | 10 years ago |
Guillermo | 52e67dce57 | 10 years ago |
Guillermo | 1cba6a36be | 10 years ago |
Guillermo | 85304c54fb | 10 years ago |
Guillermo | f7f13245ba | 10 years ago |
Guillermo | 422e71dc4b | 10 years ago |
Guillermo | d85281eb3f | 10 years ago |
Guillermo | 71fbe3d8ea | 10 years ago |
Guillermo | 61f98fb036 | 10 years ago |
Guillermo | 7b239c67d0 | 10 years ago |
Guillermo | 1df4b5a752 | 10 years ago |
Kevin Kwok | 38ce3353ef | 10 years ago |
Kevin Kwok | 20e7916975 | 10 years ago |
Guillermo | c2df75e589 | 10 years ago |
Guillermo | f237763f29 | 10 years ago |
Guillermo | 3901492461 | 10 years ago |
Guillermo | c7771ed2cc | 10 years ago |
Guillermo | 7a207402bd | 10 years ago |
Guillermo | f056c1702b | 10 years ago |
Guillermo | 36bd7636e0 | 10 years ago |
Guillermo | 5fc117bb10 | 10 years ago |
Guillermo | b6b1b4a2c1 | 10 years ago |
Guillermo | 601605a0b9 | 10 years ago |
Guillermo | 6e89a557b4 | 10 years ago |
Guillermo | 127ade5d91 | 10 years ago |
Guillermo | 2c059becca | 10 years ago |
Guillermo | 8fe135af7f | 10 years ago |
Guillermo | 29d7e56d20 | 10 years ago |
Guillermo | 690dbab1d3 | 10 years ago |
Guillermo | 1636e1674e | 10 years ago |
Guillermo | b990b06c44 | 10 years ago |
Guillermo | 4b3b4c3f21 | 10 years ago |
@ -0,0 +1,17 @@
@@ -0,0 +1,17 @@
|
||||
{ |
||||
"extends": "airbnb-base", |
||||
"parser": "babel-eslint", |
||||
"env": { |
||||
"browser": true, |
||||
"node": true, |
||||
"mocha": true, |
||||
"worker": true |
||||
}, |
||||
"rules": { |
||||
"no-underscore-dangle": 0, |
||||
"no-console": 0, |
||||
"global-require": 0, |
||||
"camelcase": 0, |
||||
"no-control-regex": 0 |
||||
} |
||||
} |
@ -0,0 +1,9 @@
@@ -0,0 +1,9 @@
|
||||
# These are supported funding model platforms |
||||
|
||||
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] |
||||
patreon: # Replace with a single Patreon username |
||||
open_collective: tesseractjs |
||||
ko_fi: # Replace with a single Ko-fi username |
||||
tidelift: npm/tesseract.js |
||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry |
||||
custom: ["https://etherscan.io/address/0x74ace8c74535d6dac03ebdc708ca2fba54796ef2"] |
@ -0,0 +1,38 @@
@@ -0,0 +1,38 @@
|
||||
--- |
||||
name: Bug report |
||||
about: Create a report to help us improve |
||||
title: '' |
||||
labels: '' |
||||
assignees: '' |
||||
|
||||
--- |
||||
|
||||
**Describe the bug** |
||||
A clear and concise description of what the bug is. |
||||
|
||||
**To Reproduce** |
||||
Steps to reproduce the behavior: |
||||
1. Go to '...' |
||||
2. Click on '....' |
||||
3. Scroll down to '....' |
||||
4. See error |
||||
|
||||
**Expected behavior** |
||||
A clear and concise description of what you expected to happen. |
||||
|
||||
**Screenshots** |
||||
If applicable, add screenshots to help explain your problem. |
||||
|
||||
**Desktop (please complete the following information):** |
||||
- OS: [e.g. iOS] |
||||
- Browser [e.g. chrome, safari] |
||||
- Version [e.g. 22] |
||||
|
||||
**Smartphone (please complete the following information):** |
||||
- Device: [e.g. iPhone6] |
||||
- OS: [e.g. iOS8.1] |
||||
- Browser [e.g. stock browser, safari] |
||||
- Version [e.g. 22] |
||||
|
||||
**Additional context** |
||||
Add any other context about the problem here. |
@ -0,0 +1,20 @@
@@ -0,0 +1,20 @@
|
||||
--- |
||||
name: Feature request |
||||
about: Suggest an idea for this project |
||||
title: '' |
||||
labels: '' |
||||
assignees: '' |
||||
|
||||
--- |
||||
|
||||
**Is your feature request related to a problem? Please describe.** |
||||
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] |
||||
|
||||
**Describe the solution you'd like** |
||||
A clear and concise description of what you want to happen. |
||||
|
||||
**Describe alternatives you've considered** |
||||
A clear and concise description of any alternative solutions or features you've considered. |
||||
|
||||
**Additional context** |
||||
Add any other context or screenshots about the feature request here. |
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
## Security contact information |
||||
|
||||
To report a security vulnerability, please use the |
||||
[Tidelift security contact](https://tidelift.com/security). |
||||
Tidelift will coordinate the fix and disclosure. |
@ -0,0 +1,29 @@
@@ -0,0 +1,29 @@
|
||||
# This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node |
||||
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions |
||||
|
||||
name: Node.js CI |
||||
|
||||
on: |
||||
push: |
||||
branches: [ master ] |
||||
pull_request: |
||||
branches: [ master ] |
||||
|
||||
jobs: |
||||
build: |
||||
|
||||
runs-on: ubuntu-latest |
||||
|
||||
strategy: |
||||
matrix: |
||||
node-version: [14.x, 16.x] |
||||
|
||||
steps: |
||||
- uses: actions/checkout@v2 |
||||
- name: Use Node.js ${{ matrix.node-version }} |
||||
uses: actions/setup-node@v1 |
||||
with: |
||||
node-version: ${{ matrix.node-version }} |
||||
- run: npm ci |
||||
- run: npm run lint |
||||
- run: npm test |
@ -1,2 +1,10 @@
@@ -1,2 +1,10 @@
|
||||
.DS_Store |
||||
node_modules/* |
||||
yarn.lock |
||||
tesseract.dev.js |
||||
worker.dev.js |
||||
/*.traineddata |
||||
/examples/**/*.traineddata |
||||
.nyc_output |
||||
dist/ |
||||
*.swp |
||||
|
@ -0,0 +1,2 @@
@@ -0,0 +1,2 @@
|
||||
FROM gitpod/workspace-full |
||||
RUN sudo apt-get update && sudo apt-get install -y libgtk-3-0 libx11-xcb1 libnss3 libxss1 libasound2 |
@ -0,0 +1,9 @@
@@ -0,0 +1,9 @@
|
||||
image: |
||||
file: .gitpod.Dockerfile |
||||
tasks: |
||||
- command: gp await-port 3000 && sleep 3 && gp preview $(gp url 3000)/examples/browser/demo.html |
||||
- init: npm install |
||||
command: npm start |
||||
ports: |
||||
- port: 3000 |
||||
onOpen: ignore |
@ -0,0 +1,201 @@
@@ -0,0 +1,201 @@
|
||||
Apache License |
||||
Version 2.0, January 2004 |
||||
http://www.apache.org/licenses/ |
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION |
||||
|
||||
1. Definitions. |
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, |
||||
and distribution as defined by Sections 1 through 9 of this document. |
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by |
||||
the copyright owner that is granting the License. |
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all |
||||
other entities that control, are controlled by, or are under common |
||||
control with that entity. For the purposes of this definition, |
||||
"control" means (i) the power, direct or indirect, to cause the |
||||
direction or management of such entity, whether by contract or |
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the |
||||
outstanding shares, or (iii) beneficial ownership of such entity. |
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity |
||||
exercising permissions granted by this License. |
||||
|
||||
"Source" form shall mean the preferred form for making modifications, |
||||
including but not limited to software source code, documentation |
||||
source, and configuration files. |
||||
|
||||
"Object" form shall mean any form resulting from mechanical |
||||
transformation or translation of a Source form, including but |
||||
not limited to compiled object code, generated documentation, |
||||
and conversions to other media types. |
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or |
||||
Object form, made available under the License, as indicated by a |
||||
copyright notice that is included in or attached to the work |
||||
(an example is provided in the Appendix below). |
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object |
||||
form, that is based on (or derived from) the Work and for which the |
||||
editorial revisions, annotations, elaborations, or other modifications |
||||
represent, as a whole, an original work of authorship. For the purposes |
||||
of this License, Derivative Works shall not include works that remain |
||||
separable from, or merely link (or bind by name) to the interfaces of, |
||||
the Work and Derivative Works thereof. |
||||
|
||||
"Contribution" shall mean any work of authorship, including |
||||
the original version of the Work and any modifications or additions |
||||
to that Work or Derivative Works thereof, that is intentionally |
||||
submitted to Licensor for inclusion in the Work by the copyright owner |
||||
or by an individual or Legal Entity authorized to submit on behalf of |
||||
the copyright owner. For the purposes of this definition, "submitted" |
||||
means any form of electronic, verbal, or written communication sent |
||||
to the Licensor or its representatives, including but not limited to |
||||
communication on electronic mailing lists, source code control systems, |
||||
and issue tracking systems that are managed by, or on behalf of, the |
||||
Licensor for the purpose of discussing and improving the Work, but |
||||
excluding communication that is conspicuously marked or otherwise |
||||
designated in writing by the copyright owner as "Not a Contribution." |
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity |
||||
on behalf of whom a Contribution has been received by Licensor and |
||||
subsequently incorporated within the Work. |
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of |
||||
this License, each Contributor hereby grants to You a perpetual, |
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||
copyright license to reproduce, prepare Derivative Works of, |
||||
publicly display, publicly perform, sublicense, and distribute the |
||||
Work and such Derivative Works in Source or Object form. |
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of |
||||
this License, each Contributor hereby grants to You a perpetual, |
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable |
||||
(except as stated in this section) patent license to make, have made, |
||||
use, offer to sell, sell, import, and otherwise transfer the Work, |
||||
where such license applies only to those patent claims licensable |
||||
by such Contributor that are necessarily infringed by their |
||||
Contribution(s) alone or by combination of their Contribution(s) |
||||
with the Work to which such Contribution(s) was submitted. If You |
||||
institute patent litigation against any entity (including a |
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work |
||||
or a Contribution incorporated within the Work constitutes direct |
||||
or contributory patent infringement, then any patent licenses |
||||
granted to You under this License for that Work shall terminate |
||||
as of the date such litigation is filed. |
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the |
||||
Work or Derivative Works thereof in any medium, with or without |
||||
modifications, and in Source or Object form, provided that You |
||||
meet the following conditions: |
||||
|
||||
(a) You must give any other recipients of the Work or |
||||
Derivative Works a copy of this License; and |
||||
|
||||
(b) You must cause any modified files to carry prominent notices |
||||
stating that You changed the files; and |
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works |
||||
that You distribute, all copyright, patent, trademark, and |
||||
attribution notices from the Source form of the Work, |
||||
excluding those notices that do not pertain to any part of |
||||
the Derivative Works; and |
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its |
||||
distribution, then any Derivative Works that You distribute must |
||||
include a readable copy of the attribution notices contained |
||||
within such NOTICE file, excluding those notices that do not |
||||
pertain to any part of the Derivative Works, in at least one |
||||
of the following places: within a NOTICE text file distributed |
||||
as part of the Derivative Works; within the Source form or |
||||
documentation, if provided along with the Derivative Works; or, |
||||
within a display generated by the Derivative Works, if and |
||||
wherever such third-party notices normally appear. The contents |
||||
of the NOTICE file are for informational purposes only and |
||||
do not modify the License. You may add Your own attribution |
||||
notices within Derivative Works that You distribute, alongside |
||||
or as an addendum to the NOTICE text from the Work, provided |
||||
that such additional attribution notices cannot be construed |
||||
as modifying the License. |
||||
|
||||
You may add Your own copyright statement to Your modifications and |
||||
may provide additional or different license terms and conditions |
||||
for use, reproduction, or distribution of Your modifications, or |
||||
for any such Derivative Works as a whole, provided Your use, |
||||
reproduction, and distribution of the Work otherwise complies with |
||||
the conditions stated in this License. |
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise, |
||||
any Contribution intentionally submitted for inclusion in the Work |
||||
by You to the Licensor shall be under the terms and conditions of |
||||
this License, without any additional terms or conditions. |
||||
Notwithstanding the above, nothing herein shall supersede or modify |
||||
the terms of any separate license agreement you may have executed |
||||
with Licensor regarding such Contributions. |
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade |
||||
names, trademarks, service marks, or product names of the Licensor, |
||||
except as required for reasonable and customary use in describing the |
||||
origin of the Work and reproducing the content of the NOTICE file. |
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or |
||||
agreed to in writing, Licensor provides the Work (and each |
||||
Contributor provides its Contributions) on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
||||
implied, including, without limitation, any warranties or conditions |
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A |
||||
PARTICULAR PURPOSE. You are solely responsible for determining the |
||||
appropriateness of using or redistributing the Work and assume any |
||||
risks associated with Your exercise of permissions under this License. |
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory, |
||||
whether in tort (including negligence), contract, or otherwise, |
||||
unless required by applicable law (such as deliberate and grossly |
||||
negligent acts) or agreed to in writing, shall any Contributor be |
||||
liable to You for damages, including any direct, indirect, special, |
||||
incidental, or consequential damages of any character arising as a |
||||
result of this License or out of the use or inability to use the |
||||
Work (including but not limited to damages for loss of goodwill, |
||||
work stoppage, computer failure or malfunction, or any and all |
||||
other commercial damages or losses), even if such Contributor |
||||
has been advised of the possibility of such damages. |
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing |
||||
the Work or Derivative Works thereof, You may choose to offer, |
||||
and charge a fee for, acceptance of support, warranty, indemnity, |
||||
or other liability obligations and/or rights consistent with this |
||||
License. However, in accepting such obligations, You may act only |
||||
on Your own behalf and on Your sole responsibility, not on behalf |
||||
of any other Contributor, and only if You agree to indemnify, |
||||
defend, and hold each Contributor harmless for any liability |
||||
incurred by, or claims asserted against, such Contributor by reason |
||||
of your accepting any such warranty or additional liability. |
||||
|
||||
END OF TERMS AND CONDITIONS |
||||
|
||||
APPENDIX: How to apply the Apache License to your work. |
||||
|
||||
To apply the Apache License to your work, attach the following |
||||
boilerplate notice, with the fields enclosed by brackets "{}" |
||||
replaced with your own identifying information. (Don't include |
||||
the brackets!) The text should be enclosed in the appropriate |
||||
comment syntax for the file format. We also recommend that a |
||||
file or class name and description of purpose be included on the |
||||
same "printed page" as the copyright notice for easier |
||||
identification within third-party archives. |
||||
|
||||
Copyright {yyyy} {name of copyright owner} |
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); |
||||
you may not use this file except in compliance with the License. |
||||
You may obtain a copy of the License at |
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 |
||||
|
||||
Unless required by applicable law or agreed to in writing, software |
||||
distributed under the License is distributed on an "AS IS" BASIS, |
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
||||
See the License for the specific language governing permissions and |
||||
limitations under the License. |
@ -1 +1,197 @@
@@ -1 +1,197 @@
|
||||
# tesseract.js |
||||
<p align="center"> |
||||
<a href="https://tesseract.projectnaptha.com/"><img width="256px" height="256px" alt="Tesseract.js" src="./docs/images/tesseract.png"></a> |
||||
</p> |
||||
|
||||
![Lint & Test](https://github.com/naptha/tesseract.js/workflows/Node.js%20CI/badge.svg) |
||||
![CodeQL](https://github.com/naptha/tesseract.js/workflows/CodeQL/badge.svg) |
||||
[![Gitpod Ready-to-Code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://github.com/naptha/tesseract.js) |
||||
[![Financial Contributors on Open Collective](https://opencollective.com/tesseractjs/all/badge.svg?label=financial+contributors)](https://opencollective.com/tesseractjs) [![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js) |
||||
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/naptha/tesseract.js/graphs/commit-activity) |
||||
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) |
||||
[![Code Style](https://badgen.net/badge/code%20style/airbnb/ff5a5f?icon=airbnb)](https://github.com/airbnb/javascript) |
||||
[![Downloads Total](https://img.shields.io/npm/dt/tesseract.js.svg)](https://www.npmjs.com/package/tesseract.js) |
||||
[![Downloads Month](https://img.shields.io/npm/dm/tesseract.js.svg)](https://www.npmjs.com/package/tesseract.js) |
||||
|
||||
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/)) |
||||
|
||||
Image Recognition |
||||
|
||||
[![fancy demo gif](./docs/images/demo.gif)](http://tesseract.projectnaptha.com) |
||||
|
||||
Video Real-time Recognition |
||||
|
||||
<p align="center"> |
||||
<a href="https://github.com/jeromewu/tesseract.js-video"><img alt="Tesseract.js Video" src="./docs/images/video-demo.gif"></a> |
||||
</p> |
||||
|
||||
|
||||
Tesseract.js wraps an [emscripten](https://github.com/kripken/emscripten) [port](https://github.com/naptha/tesseract.js-core) of the [Tesseract](https://github.com/tesseract-ocr/tesseract) [OCR](https://en.wikipedia.org/wiki/Optical_character_recognition) Engine. |
||||
It works in the browser using [webpack](https://webpack.js.org/) or plain script tags with a [CDN](#CDN) and on the server with [Node.js](https://nodejs.org/en/). |
||||
After you [install it](#installation), using it is as simple as: |
||||
|
||||
```javascript |
||||
import Tesseract from 'tesseract.js'; |
||||
|
||||
Tesseract.recognize( |
||||
'https://tesseract.projectnaptha.com/img/eng_bw.png', |
||||
'eng', |
||||
{ logger: m => console.log(m) } |
||||
).then(({ data: { text } }) => { |
||||
console.log(text); |
||||
}) |
||||
``` |
||||
|
||||
Or more imperative |
||||
|
||||
```javascript |
||||
import { createWorker } from 'tesseract.js'; |
||||
|
||||
const worker = createWorker({ |
||||
logger: m => console.log(m) |
||||
}); |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||
console.log(text); |
||||
await worker.terminate(); |
||||
})(); |
||||
``` |
||||
|
||||
[Check out the docs](#documentation) for a full explanation of the API. |
||||
|
||||
## Major changes in v3 |
||||
- Significantly faster performance |
||||
- Runtime reduction of 84% for Browser and 96% for Node.js when recognizing the [example images](./examples/data) |
||||
- Upgrade to Tesseract v5.1.0 (using emscripten 3.1.18) |
||||
- Added SIMD-enabled build for supported devices |
||||
- Added support: |
||||
- Node.js version 18 |
||||
- Removed support: |
||||
- ASM.js version, any other old versions of Tesseract.js-core (<3.0.0) |
||||
- Node.js versions 10 and 12 |
||||
|
||||
## Major changes in v2 |
||||
- Upgrade to tesseract v4.1.1 (using emscripten 1.39.10 upstream) |
||||
- Support multiple languages at the same time, eg: eng+chi\_tra for English and Traditional Chinese |
||||
- Supported image formats: png, jpg, bmp, pbm |
||||
- Support WebAssembly (fallback to ASM.js when browser doesn't support) |
||||
- Support Typescript |
||||
|
||||
Read a story about v2: <a href="https://jeromewu.github.io/why-i-refactor-tesseract.js-v2/">Why I refactor tesseract.js v2?</a><br> |
||||
Check the <a href="https://github.com/naptha/tesseract.js/tree/support/1.x">support/1.x</a> branch for version 1 |
||||
## Installation |
||||
Tesseract.js works with a `<script>` tag via local copy or CDN, with webpack via `npm` and on Node.js with `npm/yarn`. |
||||
|
||||
|
||||
### CDN |
||||
```html |
||||
<!-- v2 --> |
||||
<script src='https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js'></script> |
||||
|
||||
<!-- v1 --> |
||||
<script src='https://unpkg.com/tesseract.js@1.0.19/src/index.js'></script> |
||||
``` |
||||
After including the script the `Tesseract` variable will be globally available. |
||||
|
||||
|
||||
### Node.js |
||||
|
||||
**Tesseract.js v3 requires Node.js v14 or higher** |
||||
|
||||
```shell |
||||
# For v3 |
||||
npm install tesseract.js |
||||
yarn add tesseract.js |
||||
|
||||
# For v2 |
||||
npm install tesseract.js@2 |
||||
yarn add tesseract.js@2 |
||||
``` |
||||
|
||||
|
||||
## Documentation |
||||
|
||||
* [Examples](./docs/examples.md) |
||||
* [Image Format](./docs/image-format.md) |
||||
* [API](./docs/api.md) |
||||
* [Local Installation](./docs/local-installation.md) |
||||
* [FAQ](./docs/faq.md) |
||||
|
||||
## Use tesseract.js the way you like! |
||||
|
||||
- Offline Version: https://github.com/jeromewu/tesseract.js-offline |
||||
- Electron Version: https://github.com/jeromewu/tesseract.js-electron |
||||
- Custom Traineddata: https://github.com/jeromewu/tesseract.js-custom-traineddata |
||||
- Chrome Extension #1: https://github.com/jeromewu/tesseract.js-chrome-extension |
||||
- Chrome Extension #2: https://github.com/fxnoob/image-to-text |
||||
- Firefox Extension: https://github.com/gnonio/korporize |
||||
- With Vue: https://github.com/jeromewu/tesseract.js-vue-app |
||||
- With Angular: https://github.com/jeromewu/tesseract.js-angular-app |
||||
- With React: https://github.com/jeromewu/tesseract.js-react-app |
||||
- Typescript: https://github.com/jeromewu/tesseract.js-typescript |
||||
- Video Real-time Recognition: https://github.com/jeromewu/tesseract.js-video |
||||
|
||||
## Contributing |
||||
|
||||
### Development |
||||
To run a development copy of Tesseract.js do the following: |
||||
```shell |
||||
# First we clone the repository |
||||
git clone https://github.com/naptha/tesseract.js.git |
||||
cd tesseract.js |
||||
|
||||
# Then we install the dependencies |
||||
npm install |
||||
|
||||
# And finally we start the development server |
||||
npm start |
||||
``` |
||||
|
||||
The development server will be available at http://localhost:3000/examples/browser/demo.html in your favorite browser. |
||||
It will automatically rebuild `tesseract.dev.js` and `worker.dev.js` when you change files in the **src** folder. |
||||
|
||||
### Online Setup with a single Click |
||||
|
||||
You can use Gitpod(A free online VS Code like IDE) for contributing. With a single click it will launch a ready to code workspace with the build & start scripts already in process and within a few seconds it will spin up the dev server so that you can start contributing straight away without wasting any time. |
||||
|
||||
[![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/naptha/tesseract.js/blob/master/examples/browser/demo.html) |
||||
|
||||
### Building Static Files |
||||
To build the compiled static files just execute the following: |
||||
```shell |
||||
npm run build |
||||
``` |
||||
This will output the files into the `dist` directory. |
||||
|
||||
## Contributors |
||||
|
||||
### Code Contributors |
||||
|
||||
This project exists thanks to all the people who contribute. [[Contribute](CONTRIBUTING.md)]. |
||||
<a href="https://github.com/naptha/tesseract.js/graphs/contributors"><img src="https://opencollective.com/tesseractjs/contributors.svg?width=890&button=false" /></a> |
||||
|
||||
### Financial Contributors |
||||
|
||||
Become a financial contributor and help us sustain our community. [[Contribute](https://opencollective.com/tesseractjs/contribute)] |
||||
|
||||
#### Individuals |
||||
|
||||
<a href="https://opencollective.com/tesseractjs"><img src="https://opencollective.com/tesseractjs/individuals.svg?width=890"></a> |
||||
|
||||
#### Organizations |
||||
|
||||
Support this project with your organization. Your logo will show up here with a link to your website. [[Contribute](https://opencollective.com/tesseractjs/contribute)] |
||||
|
||||
<a href="https://opencollective.com/tesseractjs/organization/0/website"><img src="https://opencollective.com/tesseractjs/organization/0/avatar.svg"></a> |
||||
<a href="https://opencollective.com/tesseractjs/organization/1/website"><img src="https://opencollective.com/tesseractjs/organization/1/avatar.svg"></a> |
||||
<a href="https://opencollective.com/tesseractjs/organization/2/website"><img src="https://opencollective.com/tesseractjs/organization/2/avatar.svg"></a> |
||||
<a href="https://opencollective.com/tesseractjs/organization/3/website"><img src="https://opencollective.com/tesseractjs/organization/3/avatar.svg"></a> |
||||
<a href="https://opencollective.com/tesseractjs/organization/4/website"><img src="https://opencollective.com/tesseractjs/organization/4/avatar.svg"></a> |
||||
<a href="https://opencollective.com/tesseractjs/organization/5/website"><img src="https://opencollective.com/tesseractjs/organization/5/avatar.svg"></a> |
||||
<a href="https://opencollective.com/tesseractjs/organization/6/website"><img src="https://opencollective.com/tesseractjs/organization/6/avatar.svg"></a> |
||||
<a href="https://opencollective.com/tesseractjs/organization/7/website"><img src="https://opencollective.com/tesseractjs/organization/7/avatar.svg"></a> |
||||
<a href="https://opencollective.com/tesseractjs/organization/8/website"><img src="https://opencollective.com/tesseractjs/organization/8/avatar.svg"></a> |
||||
<a href="https://opencollective.com/tesseractjs/organization/9/website"><img src="https://opencollective.com/tesseractjs/organization/9/avatar.svg"></a> |
||||
|
@ -1,60 +0,0 @@
@@ -1,60 +0,0 @@
|
||||
var Tesseract = {} |
||||
|
||||
Tesseract.recognize = function(image, options, callback){ |
||||
var lang = options.lang |
||||
if(typeof lang === "undefined"){ |
||||
lang = 'eng' |
||||
} |
||||
|
||||
if (typeof options === 'string') { |
||||
lang = options |
||||
options = {} |
||||
} |
||||
|
||||
if (typeof options === "function") { |
||||
callback = options |
||||
options = {} |
||||
} |
||||
|
||||
|
||||
if(image.getContext){ |
||||
image = image.getContext('2d'); |
||||
}else if(image.tagName == "IMG" || image.tagName == "VIDEO"){ |
||||
var c = document.createElement('canvas'); |
||||
if(image.tagName == "IMG"){ |
||||
c.width = image.naturalWidth; |
||||
c.height = image.naturalHeight; |
||||
}else if(image.tagName == "VIDEO"){ |
||||
c.width = image.videoWidth; |
||||
c.height = image.videoHeight; |
||||
} |
||||
var ctx = c.getContext('2d'); |
||||
ctx.drawImage(image, 0, 0); |
||||
image = ctx; |
||||
} |
||||
if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height); |
||||
|
||||
var worker = new Worker('./worker.js') |
||||
|
||||
if(typeof callback === "function"){ |
||||
worker.onmessage = function(e){ |
||||
callback(e.data.err, e.data.result) |
||||
} |
||||
worker.postMessage({image: image, lang: lang}) |
||||
console.log('callback') |
||||
} |
||||
else { |
||||
return new Promise(function(resolve, reject){ |
||||
worker.onmessage = function(e){ |
||||
if(e.data.err){ |
||||
reject(e.data.err) |
||||
} |
||||
else { |
||||
resolve(e.data.result) |
||||
} |
||||
} |
||||
worker.postMessage({image: image, lang: lang, options: options}) |
||||
console.log('promise') |
||||
}) |
||||
} |
||||
} |
@ -0,0 +1,448 @@
@@ -0,0 +1,448 @@
|
||||
# API |
||||
|
||||
- [createWorker()](#create-worker) |
||||
- [Worker.load](#worker-load) |
||||
- [Worker.writeText](#worker-writeText) |
||||
- [Worker.readText](#worker-readText) |
||||
- [Worker.removeFile](#worker-removeFile) |
||||
- [Worker.FS](#worker-FS) |
||||
- [Worker.loadLanguage](#worker-load-language) |
||||
- [Worker.initialize](#worker-initialize) |
||||
- [Worker.setParameters](#worker-set-parameters) |
||||
- [Worker.recognize](#worker-recognize) |
||||
- [Worker.detect](#worker-detect) |
||||
- [Worker.terminate](#worker-terminate) |
||||
- [createScheduler()](#create-scheduler) |
||||
- [Scheduler.addWorker](#scheduler-add-worker) |
||||
- [Scheduler.addJob](#scheduler-add-job) |
||||
- [Scheduler.getQueueLen](#scheduler-get-queue-len) |
||||
- [Scheduler.getNumWorkers](#scheduler-get-num-workers) |
||||
- [setLogging()](#set-logging) |
||||
- [recognize()](#recognize) |
||||
- [detect()](#detect) |
||||
- [PSM](#psm) |
||||
- [OEM](#oem) |
||||
|
||||
--- |
||||
|
||||
<a name="create-worker"></a> |
||||
## createWorker(options): Worker |
||||
|
||||
createWorker is a factory function that creates a tesseract worker, a worker is basically a Web Worker in browser and Child Process in Node. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `options` an object of customized options |
||||
- `corePath` path for tesseract-core.js script |
||||
- `langPath` path for downloading traineddata, do not include `/` at the end of the path |
||||
- `workerPath` path for downloading worker script |
||||
- `dataPath` path for saving traineddata in WebAssembly file system, not common to modify |
||||
- `cachePath` path for the cached traineddata, more useful for Node, for browser it only changes the key in IndexDB |
||||
- `cacheMethod` a string to indicate the method of cache management, should be one of the following options |
||||
- write: read cache and write back (default method) |
||||
- readOnly: read cache and not to write back |
||||
- refresh: not to read cache and write back |
||||
- none: not to read cache and not to write back |
||||
- `workerBlobURL` a boolean to define whether to use Blob URL for worker script, default: true |
||||
- `gzip` a boolean to define whether the traineddata from the remote is gzipped, default: true |
||||
- `logger` a function to log the progress, a quick example is `m => console.log(m)` |
||||
- `errorHandler` a function to handle worker errors, a quick example is `err => console.error(err)` |
||||
|
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
const { createWorker } = Tesseract; |
||||
const worker = createWorker({ |
||||
langPath: '...', |
||||
logger: m => console.log(m), |
||||
}); |
||||
``` |
||||
|
||||
## Worker |
||||
|
||||
A Worker helps you to do the OCR related tasks, it takes few steps to setup Worker before it is fully functional. The full flow is: |
||||
|
||||
- load |
||||
- FS functions // optional |
||||
- loadLanguauge |
||||
- initialize |
||||
- setParameters // optional |
||||
- recognize or detect |
||||
- terminate |
||||
|
||||
Each function is async, so using async/await or Promise is required. When it is resolved, you get an object: |
||||
|
||||
```json |
||||
{ |
||||
"jobId": "Job-1-123", |
||||
"data": { ... } |
||||
} |
||||
``` |
||||
|
||||
jobId is generated by Tesseract.js, but you can put your own when calling any of the function above. |
||||
|
||||
<a name="worker-load"></a> |
||||
### Worker.load(jobId): Promise |
||||
|
||||
Worker.load() loads tesseract.js-core scripts (download from remote if not presented), it makes Web Worker/Child Process ready for next action. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `jobId` Please see details above |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
(async () => { |
||||
await worker.load(); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="worker-writeText"></a> |
||||
### Worker.writeText(path, text, jobId): Promise |
||||
|
||||
Worker.writeText() writes a text file to the path specified in MEMFS, it is useful when you want to use some features that requires tesseract.js |
||||
to read file from file system. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `path` text file path |
||||
- `text` content of the text file |
||||
- `jobId` Please see details above |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
(async () => { |
||||
await worker.writeText('tmp.txt', 'Hi\nTesseract.js\n'); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="worker-readText"></a> |
||||
### Worker.readText(path, jobId): Promise |
||||
|
||||
Worker.readText() reads a text file to the path specified in MEMFS, it is useful when you want to check the content. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `path` text file path |
||||
- `jobId` Please see details above |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
(async () => { |
||||
const { data } = await worker.readText('tmp.txt'); |
||||
console.log(data); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="worker-removeFile"></a> |
||||
### Worker.removeFile(path, jobId): Promise |
||||
|
||||
Worker.readFile() remove a file in MEMFS, it is useful when you want to free the memory. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `path` file path |
||||
- `jobId` Please see details above |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
(async () => { |
||||
await worker.removeFile('tmp.txt'); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="worker-FS"></a> |
||||
### Worker.FS(method, args, jobId): Promise |
||||
|
||||
Worker.FS() is a generic FS function to do anything you want, you can check [HERE](ihttps://emscripten.org/docs/api_reference/Filesystem-API.html) for all functions. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `method` method name |
||||
- `args` array of arguments to pass |
||||
- `jobId` Please see details above |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
(async () => { |
||||
await worker.FS('writeFile', ['tmp.txt', 'Hi\nTesseract.js\n']); |
||||
// equal to: |
||||
// await worker.readText('tmp.txt', 'Hi\nTesseract.js\n'); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="worker-load-language"></a> |
||||
### Worker.loadLanguage(langs, jobId): Promise |
||||
|
||||
Worker.loadLanguage() loads traineddata from cache or download traineddata from remote, and put traineddata into the WebAssembly file system. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `langs` a string to indicate the languages traineddata to download, multiple languages are concated with **+**, ex: **eng+chi\_tra** |
||||
- `jobId` Please see details above |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
(async () => { |
||||
await worker.loadLanguage('eng+chi_tra'); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="worker-initialize"></a> |
||||
### Worker.initialize(langs, oem, jobId): Promise |
||||
|
||||
Worker.initialize() initializes the Tesseract API, make sure it is ready for doing OCR tasks. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `langs` a string to indicate the languages loaded by Tesseract API, it can be the subset of the languauge traineddata you loaded from Worker.loadLanguage. |
||||
- `oem` a enum to indicate the OCR Engine Mode you use |
||||
- `jobId` Please see details above |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
(async () => { |
||||
/** You can load more languages in advance, but use only part of them in Worker.initialize() */ |
||||
await worker.loadLanguage('eng+chi_tra'); |
||||
await worker.initialize('eng'); |
||||
})(); |
||||
``` |
||||
<a name="worker-set-parameters"></a> |
||||
### Worker.setParameters(params, jobId): Promise |
||||
|
||||
Worker.setParameters() set parameters for Tesseract API (using SetVariable()), it changes the behavior of Tesseract and some parameters like tessedit\_char\_whitelist is very useful. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `params` an object with key and value of the parameters |
||||
- `jobId` Please see details above |
||||
|
||||
**Supported Paramters:** |
||||
|
||||
| name | type | default value | description | |
||||
| --------------------------- | ------ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------- | |
||||
| tessedit\_ocr\_engine\_mode | enum | OEM.DEFAULT | Check [HERE](https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L268) for definition of each mode | |
||||
| tessedit\_pageseg\_mode | enum | PSM.SINGLE\_BLOCK | Check [HERE](https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163) for definition of each mode | |
||||
| tessedit\_char\_whitelist | string | '' | setting white list characters makes the result only contains these characters, useful the content in image is limited | |
||||
| preserve\_interword\_spaces | string | '0' | '0' or '1', keeps the space between words | |
||||
| user\_defined\_dpi | string | '' | Define custom dpi, use to fix **Warning: Invalid resolution 0 dpi. Using 70 instead.** | |
||||
| tessjs\_create\_hocr | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes hocr in the result | |
||||
| tessjs\_create\_tsv | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes tsv in the result | |
||||
| tessjs\_create\_box | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes box in the result | |
||||
| tessjs\_create\_unlv | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes unlv in the result | |
||||
| tessjs\_create\_osd | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes osd in the result | |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
(async () => { |
||||
await worker.setParameters({ |
||||
tessedit_char_whitelist: '0123456789', |
||||
}); |
||||
}) |
||||
``` |
||||
|
||||
<a name="worker-recognize"></a> |
||||
### Worker.recognize(image, options, jobId): Promise |
||||
|
||||
Worker.recognize() provides core function of Tesseract.js as it executes OCR |
||||
|
||||
Figures out what words are in `image`, where the words are in `image`, etc. |
||||
> Note: `image` should be sufficiently high resolution. |
||||
> Often, the same image will get much better results if you upscale it before calling `recognize`. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `image` see [Image Format](./image-format.md) for more details. |
||||
- `options` a object of customized options |
||||
- `rectangle` an object to specify the regions you want to recognized in the image, should contain top, left, width and height, see example below. |
||||
- `jobId` Please see details above |
||||
|
||||
**Output:** |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
const { createWorker } = Tesseract; |
||||
(async () => { |
||||
const worker = createWorker(); |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize(image); |
||||
console.log(text); |
||||
})(); |
||||
``` |
||||
|
||||
With rectangle |
||||
|
||||
```javascript |
||||
const { createWorker } = Tesseract; |
||||
(async () => { |
||||
const worker = createWorker(); |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize(image, { |
||||
rectangle: { top: 0, left: 0, width: 100, height: 100 }, |
||||
}); |
||||
console.log(text); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="worker-detect"></a> |
||||
### Worker.detect(image, jobId): Promise |
||||
|
||||
Worker.detect() does OSD (Orientation and Script Detection) to the image instead of OCR. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `image` see [Image Format](./image-format.md) for more details. |
||||
- `jobId` Please see details above |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
const { createWorker } = Tesseract; |
||||
(async () => { |
||||
const worker = createWorker(); |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data } = await worker.detect(image); |
||||
console.log(data); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="worker-terminate"></a> |
||||
### Worker.terminate(jobId): Promise |
||||
|
||||
Worker.terminate() terminates the worker and cleans up |
||||
|
||||
```javascript |
||||
(async () => { |
||||
await worker.terminate(); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="create-scheduler"></a> |
||||
## createScheduler(): Scheduler |
||||
|
||||
createScheduler() is a factory function to create a scheduler, a scheduler manages a job queue and workers to enable multiple workers to work together, it is useful when you want to speed up your performance. |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
const { createScheduler } = Tesseract; |
||||
const scheduler = createScheduler(); |
||||
``` |
||||
|
||||
### Scheduler |
||||
|
||||
<a name="scheduler-add-worker"></a> |
||||
### Scheduler.addWorker(worker): string |
||||
|
||||
Scheduler.addWorker() adds a worker into the worker pool inside scheduler, it is suggested to add one worker to only one scheduler. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `worker` see Worker above |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
const { createWorker, createScheduler } = Tesseract; |
||||
const scheduler = createScheduler(); |
||||
const worker = createWorker(); |
||||
scheduler.addWorker(worker); |
||||
``` |
||||
|
||||
<a name="scheduler-add-job"></a> |
||||
### Scheduler.addJob(action, ...payload): Promise |
||||
|
||||
Scheduler.addJob() adds a job to the job queue and scheduler waits and finds an idle worker to take the job. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `action` a string to indicate the action you want to do, right now only **recognize** and **detect** are supported |
||||
- `payload` a arbitrary number of args depending on the action you called. |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
(async () => { |
||||
const { data: { text } } = await scheduler.addJob('recognize', image, options); |
||||
const { data } = await scheduler.addJob('detect', image); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="scheduler-get-queue-len"></a> |
||||
### Scheduler.getQueueLen(): number |
||||
|
||||
Scheduler.getNumWorkers() returns the length of job queue. |
||||
|
||||
<a name="scheduler-get-num-workers"></a> |
||||
### Scheduler.getNumWorkers(): number |
||||
|
||||
Scheduler.getNumWorkers() returns number of workers added into the scheduler |
||||
|
||||
<a name="scheduler-terminate"></a> |
||||
### Scheduler.terminate(): Promise |
||||
|
||||
Scheduler.terminate() terminates all workers added, useful to do quick clean up. |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
(async () => { |
||||
await scheduler.terminate(); |
||||
})(); |
||||
``` |
||||
|
||||
<a name="set-logging"></a> |
||||
## setLogging(logging: boolean) |
||||
|
||||
setLogging() sets the logging flag, you can `setLogging(true)` to see detailed information, useful for debugging. |
||||
|
||||
**Arguments:** |
||||
|
||||
- `logging` boolean to define whether to see detailed logs, default: false |
||||
|
||||
**Examples:** |
||||
|
||||
```javascript |
||||
const { setLogging } = Tesseract; |
||||
setLogging(true); |
||||
``` |
||||
|
||||
<a name="recognize"></a> |
||||
## recognize(image, langs, options): Promise |
||||
|
||||
recognize() is a function to quickly do recognize() task, it is not recommended to use in real application, but useful when you want to save some time. |
||||
|
||||
See [Tesseract.js](../src/Tesseract.js) |
||||
|
||||
<a name="detect"></a> |
||||
## detect(image, options): Promise |
||||
|
||||
Same background as recognize(), but it does detect instead. |
||||
|
||||
See [Tesseract.js](../src/Tesseract.js) |
||||
|
||||
<a name="psm"></a> |
||||
## PSM |
||||
|
||||
See [PSM.js](../src/constants/PSM.js) |
||||
|
||||
<a name="oem"></a> |
||||
## OEM |
||||
|
||||
See [OEM.js](../src/constants/OEM.js) |
@ -0,0 +1,226 @@
@@ -0,0 +1,226 @@
|
||||
# Tesseract.js Examples |
||||
|
||||
You can also check [examples](../examples) folder. |
||||
|
||||
### basic |
||||
|
||||
```javascript |
||||
const { createWorker } = require('tesseract.js'); |
||||
|
||||
const worker = createWorker(); |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||
console.log(text); |
||||
await worker.terminate(); |
||||
})(); |
||||
``` |
||||
|
||||
### with detailed progress |
||||
|
||||
```javascript |
||||
const { createWorker } = require('tesseract.js'); |
||||
|
||||
const worker = createWorker({ |
||||
logger: m => console.log(m), // Add logger here |
||||
}); |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||
console.log(text); |
||||
await worker.terminate(); |
||||
})(); |
||||
``` |
||||
|
||||
### with multiple languages, separate by '+' |
||||
|
||||
```javascript |
||||
const { createWorker } = require('tesseract.js'); |
||||
|
||||
const worker = createWorker(); |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng+chi_tra'); |
||||
await worker.initialize('eng+chi_tra'); |
||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||
console.log(text); |
||||
await worker.terminate(); |
||||
})(); |
||||
``` |
||||
### with whitelist char (^2.0.0-beta.1) |
||||
|
||||
```javascript |
||||
const { createWorker } = require('tesseract.js'); |
||||
|
||||
const worker = createWorker(); |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
await worker.setParameters({ |
||||
tessedit_char_whitelist: '0123456789', |
||||
}); |
||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||
console.log(text); |
||||
await worker.terminate(); |
||||
})(); |
||||
``` |
||||
|
||||
### with different pageseg mode (^2.0.0-beta.1) |
||||
|
||||
Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163 |
||||
|
||||
```javascript |
||||
const { createWorker, PSM } = require('tesseract.js'); |
||||
|
||||
const worker = createWorker(); |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
await worker.setParameters({ |
||||
tessedit_pageseg_mode: PSM.SINGLE_BLOCK, |
||||
}); |
||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||
console.log(text); |
||||
await worker.terminate(); |
||||
})(); |
||||
``` |
||||
|
||||
### with pdf output (^2.0.0-beta.1) |
||||
|
||||
Please check **examples** folder for details. |
||||
|
||||
Browser: [download-pdf.html](../examples/browser/download-pdf.html) |
||||
Node: [download-pdf.js](../examples/node/download-pdf.js) |
||||
|
||||
### with only part of the image (^2.0.1) |
||||
|
||||
**One rectangle** |
||||
|
||||
```javascript |
||||
const { createWorker } = require('tesseract.js'); |
||||
|
||||
const worker = createWorker(); |
||||
const rectangle = { left: 0, top: 0, width: 500, height: 250 }; |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle }); |
||||
console.log(text); |
||||
await worker.terminate(); |
||||
})(); |
||||
``` |
||||
|
||||
**Multiple Rectangles** |
||||
|
||||
```javascript |
||||
const { createWorker } = require('tesseract.js'); |
||||
|
||||
const worker = createWorker(); |
||||
const rectangles = [ |
||||
{ |
||||
left: 0, |
||||
top: 0, |
||||
width: 500, |
||||
height: 250, |
||||
}, |
||||
{ |
||||
left: 500, |
||||
top: 0, |
||||
width: 500, |
||||
height: 250, |
||||
}, |
||||
]; |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const values = []; |
||||
for (let i = 0; i < rectangles.length; i++) { |
||||
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle: rectangles[i] }); |
||||
values.push(text); |
||||
} |
||||
console.log(values); |
||||
await worker.terminate(); |
||||
})(); |
||||
``` |
||||
|
||||
**Multiple Rectangles (with scheduler to do recognition in parallel)** |
||||
|
||||
```javascript |
||||
const { createWorker, createScheduler } = require('tesseract.js'); |
||||
|
||||
const scheduler = createScheduler(); |
||||
const worker1 = createWorker(); |
||||
const worker2 = createWorker(); |
||||
const rectangles = [ |
||||
{ |
||||
left: 0, |
||||
top: 0, |
||||
width: 500, |
||||
height: 250, |
||||
}, |
||||
{ |
||||
left: 500, |
||||
top: 0, |
||||
width: 500, |
||||
height: 250, |
||||
}, |
||||
]; |
||||
|
||||
(async () => { |
||||
await worker1.load(); |
||||
await worker2.load(); |
||||
await worker1.loadLanguage('eng'); |
||||
await worker2.loadLanguage('eng'); |
||||
await worker1.initialize('eng'); |
||||
await worker2.initialize('eng'); |
||||
scheduler.addWorker(worker1); |
||||
scheduler.addWorker(worker2); |
||||
const results = await Promise.all(rectangles.map((rectangle) => ( |
||||
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle }) |
||||
))); |
||||
console.log(results.map(r => r.data.text)); |
||||
await scheduler.terminate(); |
||||
})(); |
||||
``` |
||||
|
||||
### with multiple workers to speed up (^2.0.0-beta.1) |
||||
|
||||
```javascript |
||||
const { createWorker, createScheduler } = require('tesseract.js'); |
||||
|
||||
const scheduler = createScheduler(); |
||||
const worker1 = createWorker(); |
||||
const worker2 = createWorker(); |
||||
|
||||
(async () => { |
||||
await worker1.load(); |
||||
await worker2.load(); |
||||
await worker1.loadLanguage('eng'); |
||||
await worker2.loadLanguage('eng'); |
||||
await worker1.initialize('eng'); |
||||
await worker2.initialize('eng'); |
||||
scheduler.addWorker(worker1); |
||||
scheduler.addWorker(worker2); |
||||
/** Add 10 recognition jobs */ |
||||
const results = await Promise.all(Array(10).fill(0).map(() => ( |
||||
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png') |
||||
))) |
||||
console.log(results); |
||||
await scheduler.terminate(); // It also terminates all workers. |
||||
})(); |
||||
``` |
@ -0,0 +1,42 @@
@@ -0,0 +1,42 @@
|
||||
FAQ |
||||
=== |
||||
|
||||
## How does tesseract.js download and keep \*.traineddata? |
||||
|
||||
The language model is downloaded by `worker.loadLanguage()` and you need to pass the langs to `worker.initialize()`. |
||||
|
||||
During the downloading of language model, Tesseract.js will first check if \*.traineddata already exists. (browser: [IndexedDB](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), Node.js: fs, in the folder you execute the command) If the \*.traineddata doesn't exist, it will fetch \*.traineddata.gz from [tessdata](https://github.com/naptha/tessdata), ungzip and store in IndexedDB or fs, you can delete it manually and it will download again for you. |
||||
|
||||
## How can I train my own \*.traineddata? |
||||
|
||||
For tesseract.js v2, check [TrainingTesseract 4.00](https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00) |
||||
|
||||
For tesseract.js v1, check [Training Tesseract 3.03–3.05](https://tesseract-ocr.github.io/tessdoc/Training-Tesseract-3.03%E2%80%933.05) |
||||
|
||||
## How can I get HOCR, TSV, Box, UNLV, OSD? |
||||
|
||||
Starting from 2.0.0-beta.1, you can get all these information in the final result. |
||||
|
||||
```javascript |
||||
import { createWorker } from 'tesseract.js'; |
||||
const worker = createWorker({ |
||||
logger: m => console.log(m) |
||||
}); |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
await worker.setParameters({ |
||||
tessedit_create_box: '1', |
||||
tessedit_create_unlv: '1', |
||||
tessedit_create_osd: '1', |
||||
}); |
||||
const { data: { text, hocr, tsv, box, unlv } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png'); |
||||
console.log(text); |
||||
console.log(hocr); |
||||
console.log(tsv); |
||||
console.log(box); |
||||
console.log(unlv); |
||||
})(); |
||||
``` |
@ -0,0 +1,18 @@
@@ -0,0 +1,18 @@
|
||||
# Image Format |
||||
|
||||
The main Tesseract.js functions (ex. recognize, detect) take an `image` parameter. The image formats and data types supported are listed below. |
||||
|
||||
Support Image Formats: **bmp, jpg, png, pbm, webp** |
||||
|
||||
For browser and Node, supported data types are: |
||||
- string with base64 encoded image (fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp) |
||||
- buffer |
||||
|
||||
For browser only, supported data types are: |
||||
- `File` or `Blob` object |
||||
- `img` or `canvas` element |
||||
|
||||
For Node only, supported data types are: |
||||
- string containing a path to local image |
||||
|
||||
Note: images must be a supported image format **and** a supported data type. For example, a buffer containing a png image is supported. A buffer containing raw pixel data is not supported. |
After Width: | Height: | Size: 97 KiB |
After Width: | Height: | Size: 105 KiB |
After Width: | Height: | Size: 237 KiB |
@ -0,0 +1,38 @@
@@ -0,0 +1,38 @@
|
||||
## Local Installation |
||||
|
||||
Check here for examples: https://github.com/naptha/tesseract.js/blob/master/docs/examples.md |
||||
|
||||
In browser environment, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN. |
||||
|
||||
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can pass extra arguments to `TesseractWorker` to specify custom paths for workers, languages, and core. |
||||
|
||||
In Node.js environment, the only path you may want to customize is languages/langPath. |
||||
|
||||
```javascript |
||||
Tesseract.recognize(image, langs, { |
||||
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0/dist/worker.min.js', |
||||
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
||||
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js', |
||||
}) |
||||
``` |
||||
|
||||
Or |
||||
|
||||
```javascript |
||||
const worker = createWorker({ |
||||
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0/dist/worker.min.js', |
||||
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
||||
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js', |
||||
}); |
||||
``` |
||||
|
||||
### workerPath |
||||
A string specifying the location of the [worker.js](./dist/worker.min.js) file. |
||||
|
||||
### langPath |
||||
A string specifying the location of the tesseract language files, with default value 'https://tessdata.projectnaptha.com/4.0.0'. Language file URLs are calculated according to the formula `langPath + langCode + '.traineddata.gz'`. |
||||
|
||||
### corePath |
||||
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js' (fallback to tesseract-core.asm.js when WebAssembly is not available). |
||||
|
||||
Another WASM option is 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.js' which is a script that loads 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm'. But it fails to fetch at this moment. |
@ -0,0 +1,3 @@
@@ -0,0 +1,3 @@
|
||||
# Tesseract Languages |
||||
|
||||
Please check [HERE](https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016) for supported languages |
@ -1,17 +0,0 @@
@@ -1,17 +0,0 @@
|
||||
<canvas id="c"></canvas> |
||||
<script type="text/javascript" src="./Tesseract.js"></script> |
||||
<script type="text/javascript"> |
||||
var canvas = document.getElementById('c') |
||||
canvas.width = 400 |
||||
canvas.height = 400 |
||||
var ctx = canvas.getContext('2d'); |
||||
ctx.font = '30px "Arial Black"' |
||||
ctx.fillText('Hell0 World', 100, 40) |
||||
// ctx.fillText("囚犯離奇掙脫囚犯離奇掙脫", 100, 40) |
||||
ctx.font = '30px "Times New Roman"' |
||||
ctx.fillText('from beyond', 100, 80) |
||||
// ctx.fillText('2小時可換乘2次2小時可換乘2次', 100, 80) |
||||
ctx.font = '30px sans-serif' |
||||
ctx.fillText('the Cosmic Void', 100, 120) |
||||
Tesseract.recognize(canvas,{tessedit_char_blacklist:'e'}).then( function(d){ console.log(d) } ) |
||||
</script> |
@ -0,0 +1,37 @@
@@ -0,0 +1,37 @@
|
||||
<!DOCTYPE HTML> |
||||
<html> |
||||
<head> |
||||
<script src="/dist/tesseract.dev.js"></script> |
||||
</head> |
||||
<body> |
||||
<input type="file" id="uploader"> |
||||
<script> |
||||
const recognize = function(evt){ |
||||
const files = evt.target.files; |
||||
const worker = Tesseract.createWorker({ |
||||
/* |
||||
* As Edge don't support webassembly, |
||||
* here we force to use asm.js version. |
||||
*/ |
||||
corePath: '../../node_modules/tesseract.js-core/tesseract-core.asm.js', |
||||
logger: function(m){console.log(m);}, |
||||
/* |
||||
* As there is no indexedDB in earlier version |
||||
* of Edge, here we disable cache. |
||||
*/ |
||||
cacheMethod: 'none', |
||||
}); |
||||
Promise.resolve() |
||||
.then(() => worker.load()) |
||||
.then(() => worker.loadLanguage('eng')) |
||||
.then(() => worker.initialize('eng')) |
||||
.then(() => worker.recognize(files[0])) |
||||
.then((ret) => { |
||||
console.log(ret.data.text); |
||||
}); |
||||
} |
||||
const elm = document.getElementById('uploader'); |
||||
elm.addEventListener('change', recognize); |
||||
</script> |
||||
</body> |
||||
</html> |
@ -0,0 +1,19 @@
@@ -0,0 +1,19 @@
|
||||
<html> |
||||
<head> |
||||
<script src="/dist/tesseract.dev.js"></script> |
||||
</head> |
||||
<body> |
||||
<input type="file" id="uploader"> |
||||
<script> |
||||
const recognize = async ({ target: { files } }) => { |
||||
const { data: { text } } = await Tesseract.recognize(files[0], 'eng', { |
||||
corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js', |
||||
logger: m => console.log(m), |
||||
}); |
||||
console.log(text); |
||||
} |
||||
const elm = document.getElementById('uploader'); |
||||
elm.addEventListener('change', recognize); |
||||
</script> |
||||
</body> |
||||
</html> |
@ -0,0 +1,33 @@
@@ -0,0 +1,33 @@
|
||||
<html> |
||||
<head> |
||||
<script src="/dist/tesseract.dev.js"></script> |
||||
</head> |
||||
<body> |
||||
<textarea id="message">Working...</textarea> |
||||
|
||||
<script> |
||||
const { createWorker } = Tesseract; |
||||
const worker = createWorker(); |
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
|
||||
const fileArr = ["../data/meditations.jpg", "../data/tyger.jpg", "../data/testocr.png"]; |
||||
let timeTotal = 0; |
||||
for (let file of fileArr) { |
||||
let time1 = Date.now(); |
||||
for (let i=0; i < 10; i++) { |
||||
await worker.recognize(file); |
||||
} |
||||
let time2 = Date.now(); |
||||
const timeDif = (time2 - time1) / 1e3; |
||||
timeTotal += timeDif; |
||||
document.getElementById('message').innerHTML += "\n" + file + " [x10] runtime: " + timeDif + "s"; |
||||
} |
||||
document.getElementById('message').innerHTML += "\nTotal runtime: " + timeTotal + "s"; |
||||
|
||||
})(); |
||||
</script> |
||||
</body> |
||||
</html> |
@ -0,0 +1,162 @@
@@ -0,0 +1,162 @@
|
||||
<script src="/dist/tesseract.dev.js"></script> |
||||
<script> |
||||
|
||||
function progressUpdate(packet){ |
||||
var log = document.getElementById('log'); |
||||
|
||||
if(log.firstChild && log.firstChild.status === packet.status){ |
||||
if('progress' in packet){ |
||||
var progress = log.firstChild.querySelector('progress') |
||||
progress.value = packet.progress |
||||
} |
||||
}else{ |
||||
var line = document.createElement('div'); |
||||
line.status = packet.status; |
||||
var status = document.createElement('div') |
||||
status.className = 'status' |
||||
status.appendChild(document.createTextNode(packet.status)) |
||||
line.appendChild(status) |
||||
|
||||
if('progress' in packet){ |
||||
var progress = document.createElement('progress') |
||||
progress.value = packet.progress |
||||
progress.max = 1 |
||||
line.appendChild(progress) |
||||
} |
||||
|
||||
|
||||
if(packet.status == 'done'){ |
||||
var pre = document.createElement('pre') |
||||
pre.appendChild(document.createTextNode(packet.data.data.text)) |
||||
line.innerHTML = '' |
||||
line.appendChild(pre) |
||||
|
||||
} |
||||
|
||||
log.insertBefore(line, log.firstChild) |
||||
} |
||||
} |
||||
|
||||
async function recognizeFile(file) { |
||||
document.querySelector("#log").innerHTML = '' |
||||
const corePath = window.navigator.userAgent.indexOf("Edge") > -1 |
||||
? '../../node_modules/tesseract.js-core/tesseract-core.asm.js' |
||||
: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js'; |
||||
|
||||
const lang = document.querySelector('#langsel').value |
||||
const data = await Tesseract.recognize(file, lang, { |
||||
corePath, |
||||
logger: progressUpdate, |
||||
}); |
||||
progressUpdate({ status: 'done', data }); |
||||
} |
||||
</script> |
||||
<select id="langsel" onchange="window.lastFile && recognizeFile(window.lastFile)"> |
||||
<option value='afr' > Afrikaans </option> |
||||
<option value='ara' > Arabic </option> |
||||
<option value='aze' > Azerbaijani </option> |
||||
<option value='bel' > Belarusian </option> |
||||
<option value='ben' > Bengali </option> |
||||
<option value='bul' > Bulgarian </option> |
||||
<option value='cat' > Catalan </option> |
||||
<option value='ces' > Czech </option> |
||||
<option value='chi_sim' > Chinese </option> |
||||
<option value='chi_tra' > Traditional Chinese </option> |
||||
<option value='chr' > Cherokee </option> |
||||
<option value='dan' > Danish </option> |
||||
<option value='deu' > German </option> |
||||
<option value='ell' > Greek </option> |
||||
<option value='eng' selected> English </option> |
||||
<option value='enm' > English (Old) </option> |
||||
<option value='meme' > Internet Meme </option> |
||||
<option value='epo' > Esperanto </option> |
||||
<option value='epo_alt' > Esperanto alternative </option> |
||||
<option value='est' > Estonian </option> |
||||
<option value='eus' > Basque </option> |
||||
<option value='fin' > Finnish </option> |
||||
<option value='fra' > French </option> |
||||
<option value='frk' > Frankish </option> |
||||
<option value='frm' > French (Old) </option> |
||||
<option value='glg' > Galician </option> |
||||
<option value='grc' > Ancient Greek </option> |
||||
<option value='heb' > Hebrew </option> |
||||
<option value='hin' > Hindi </option> |
||||
<option value='hrv' > Croatian </option> |
||||
<option value='hun' > Hungarian </option> |
||||
<option value='ind' > Indonesian </option> |
||||
<option value='isl' > Icelandic </option> |
||||
<option value='ita' > Italian </option> |
||||
<option value='ita_old' > Italian (Old) </option> |
||||
<option value='jpn' > Japanese </option> |
||||
<option value='kan' > Kannada </option> |
||||
<option value='kor' > Korean </option> |
||||
<option value='lav' > Latvian </option> |
||||
<option value='lit' > Lithuanian </option> |
||||
<option value='mal' > Malayalam </option> |
||||
<option value='mkd' > Macedonian </option> |
||||
<option value='mlt' > Maltese </option> |
||||
<option value='msa' > Malay </option> |
||||
<option value='nld' > Dutch </option> |
||||
<option value='nor' > Norwegian </option> |
||||
<option value='pol' > Polish </option> |
||||
<option value='por' > Portuguese </option> |
||||
<option value='ron' > Romanian </option> |
||||
<option value='rus' > Russian </option> |
||||
<option value='slk' > Slovakian </option> |
||||
<option value='slv' > Slovenian </option> |
||||
<option value='spa' > Spanish </option> |
||||
<option value='spa_old' > Old Spanish </option> |
||||
<option value='sqi' > Albanian </option> |
||||
<option value='srp' > Serbian (Latin) </option> |
||||
<option value='swa' > Swahili </option> |
||||
<option value='swe' > Swedish </option> |
||||
<option value='tam' > Tamil </option> |
||||
<option value='tel' > Telugu </option> |
||||
<option value='tgl' > Tagalog </option> |
||||
<option value='tha' > Thai </option> |
||||
<option value='tur' > Turkish </option> |
||||
<option value='ukr' > Ukrainian </option> |
||||
<option value='vie' > Vietnamese </option> |
||||
</select> |
||||
|
||||
<button onclick="recognizeFile('../../tests/assets/images/simple.png')">Sample Image</button> |
||||
<input type="file" onchange="recognizeFile(window.lastFile=this.files[0])"> |
||||
|
||||
<div id="log"></div> |
||||
|
||||
|
||||
<style> |
||||
#log > div { |
||||
color: #313131; |
||||
border-top: 1px solid #dadada; |
||||
padding: 9px; |
||||
display: flex; |
||||
} |
||||
#log > div:first-child { |
||||
border: 0; |
||||
} |
||||
|
||||
|
||||
.status { |
||||
min-width: 250px; |
||||
} |
||||
#log { |
||||
border: 1px solid #dadada; |
||||
padding: 10px; |
||||
margin-top: 20px; |
||||
min-height: 100px; |
||||
} |
||||
body { |
||||
font-family: sans-serif; |
||||
margin: 30px; |
||||
} |
||||
|
||||
progress { |
||||
display: block; |
||||
width: 100%; |
||||
transition: opacity 0.5s linear; |
||||
} |
||||
progress[value="1"] { |
||||
opacity: 0.5; |
||||
} |
||||
</style> |
@ -0,0 +1,52 @@
@@ -0,0 +1,52 @@
|
||||
<html> |
||||
<head> |
||||
<script src="/dist/tesseract.dev.js"></script> |
||||
</head> |
||||
<body> |
||||
<div> |
||||
<input type="file" id="uploader"> |
||||
<button id="download-pdf" disabled="true">Download PDF</button> |
||||
</div> |
||||
<textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea> |
||||
<script> |
||||
const { createWorker } = Tesseract; |
||||
const worker = createWorker({ |
||||
corePath: '/node_modules/tesseract.js-core/tesseract-core.wasm.js', |
||||
logger: m => console.log(m), |
||||
}); |
||||
const uploader = document.getElementById('uploader'); |
||||
const dlBtn = document.getElementById('download-pdf'); |
||||
const recognize = async ({ target: { files } }) => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize(files[0]); |
||||
const board = document.getElementById('board'); |
||||
board.value = text; |
||||
dlBtn.disabled = false; |
||||
}; |
||||
const downloadPDF = async () => { |
||||
const filename = 'tesseract-ocr-result.pdf'; |
||||
const { data } = await worker.getPDF('Tesseract OCR Result'); |
||||
const blob = new Blob([new Uint8Array(data)], { type: 'application/pdf' }); |
||||
if (navigator.msSaveBlob) { |
||||
// IE 10+ |
||||
navigator.msSaveBlob(blob, filename); |
||||
} else { |
||||
const link = document.createElement('a'); |
||||
if (link.download !== undefined) { |
||||
const url = URL.createObjectURL(blob); |
||||
link.setAttribute('href', url); |
||||
link.setAttribute('download', filename); |
||||
link.style.visibility = 'hidden'; |
||||
document.body.appendChild(link); |
||||
link.click(); |
||||
document.body.removeChild(link); |
||||
} |
||||
} |
||||
}; |
||||
uploader.addEventListener('change', recognize); |
||||
dlBtn.addEventListener('click', downloadPDF); |
||||
</script> |
||||
</body> |
||||
</html> |
After Width: | Height: | Size: 1011 KiB |
After Width: | Height: | Size: 23 KiB |
After Width: | Height: | Size: 408 KiB |
@ -0,0 +1,27 @@
@@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env node
|
||||
const path = require('path'); |
||||
const { createWorker } = require('../../'); |
||||
|
||||
const worker = createWorker(); |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const fileArr = ["../data/meditations.jpg", "../data/tyger.jpg", "../data/testocr.png"]; |
||||
let timeTotal = 0; |
||||
for (let file of fileArr) { |
||||
let time1 = Date.now(); |
||||
for (let i=0; i < 10; i++) { |
||||
await worker.recognize(file) |
||||
} |
||||
let time2 = Date.now(); |
||||
const timeDif = (time2 - time1) / 1e3; |
||||
timeTotal += timeDif; |
||||
|
||||
console.log(file + " [x10] runtime: " + timeDif + "s"); |
||||
} |
||||
console.log("Total runtime: " + timeTotal + "s"); |
||||
|
||||
await worker.terminate(); |
||||
})(); |
@ -0,0 +1,13 @@
@@ -0,0 +1,13 @@
|
||||
#!/usr/bin/env node
|
||||
const path = require('path'); |
||||
const Tesseract = require('../../'); |
||||
|
||||
const [,, imagePath] = process.argv; |
||||
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
||||
|
||||
console.log(`Recognizing ${image}`); |
||||
|
||||
Tesseract.detect(image, { logger: m => console.log(m) }) |
||||
.then(({ data }) => { |
||||
console.log(data); |
||||
}); |
@ -0,0 +1,22 @@
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env node
|
||||
const path = require('path'); |
||||
const fs = require('fs'); |
||||
const { createWorker } = require('../../'); |
||||
|
||||
const [,, imagePath] = process.argv; |
||||
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
||||
|
||||
console.log(`Recognizing ${image}`); |
||||
|
||||
(async () => { |
||||
const worker = createWorker(); |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize(image); |
||||
console.log(text); |
||||
const { data } = await worker.getPDF('Tesseract OCR Result'); |
||||
fs.writeFileSync('tesseract-ocr-result.pdf', Buffer.from(data)); |
||||
console.log('Generate PDF: tesseract-ocr-result.pdf'); |
||||
await worker.terminate(); |
||||
})(); |
@ -0,0 +1,20 @@
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env node
|
||||
const path = require('path'); |
||||
const { createWorker } = require('../../'); |
||||
|
||||
const [,, imagePath] = process.argv; |
||||
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
||||
|
||||
console.log(`Recognizing ${image}`); |
||||
const worker = createWorker({ |
||||
logger: m => console.log(m), |
||||
}); |
||||
|
||||
(async () => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize(image); |
||||
console.log(text); |
||||
await worker.terminate(); |
||||
})(); |
@ -1,23 +1,89 @@
@@ -1,23 +1,89 @@
|
||||
{ |
||||
"name": "tesseract.js", |
||||
"version": "1.0.0", |
||||
"description": "", |
||||
"main": "Tesseract.js", |
||||
"dependencies": { |
||||
"pako": "^0.2.7" |
||||
}, |
||||
"devDependencies": {}, |
||||
"version": "3.0.3", |
||||
"description": "Pure Javascript Multilingual OCR", |
||||
"main": "src/index.js", |
||||
"types": "src/index.d.ts", |
||||
"unpkg": "dist/tesseract.min.js", |
||||
"jsdelivr": "dist/tesseract.min.js", |
||||
"scripts": { |
||||
"test": "echo \"Error: no test specified\" && exit 1" |
||||
"start": "node scripts/server.js", |
||||
"build": "rimraf dist && webpack --config scripts/webpack.config.prod.js && rollup -c scripts/rollup.esm.js", |
||||
"profile:tesseract": "webpack-bundle-analyzer dist/tesseract-stats.json", |
||||
"profile:worker": "webpack-bundle-analyzer dist/worker-stats.json", |
||||
"prepublishOnly": "npm run build", |
||||
"wait": "rimraf dist && wait-on http://localhost:3000/dist/tesseract.dev.js", |
||||
"test": "npm-run-all -p -r start test:all", |
||||
"test:all": "npm-run-all wait test:browser:* test:node:all", |
||||
"test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js", |
||||
"test:node:all": "npm run test:node -- ./tests/*.test.js", |
||||
"test:browser-tpl": "mocha-headless-chrome -a incognito -a no-sandbox -a disable-setuid-sandbox -a disable-logging -t 300000", |
||||
"test:browser:detect": "npm run test:browser-tpl -- -f ./tests/detect.test.html", |
||||
"test:browser:recognize": "npm run test:browser-tpl -- -f ./tests/recognize.test.html", |
||||
"test:browser:scheduler": "npm run test:browser-tpl -- -f ./tests/scheduler.test.html", |
||||
"test:browser:FS": "npm run test:browser-tpl -- -f ./tests/FS.test.html", |
||||
"lint": "eslint src", |
||||
"lint:fix": "eslint --fix src", |
||||
"postinstall": "opencollective-postinstall || true" |
||||
}, |
||||
"browser": { |
||||
"./src/worker/node/index.js": "./src/worker/browser/index.js" |
||||
}, |
||||
"author": "", |
||||
"contributors": [ |
||||
"jeromewu" |
||||
], |
||||
"license": "Apache-2.0", |
||||
"devDependencies": { |
||||
"@babel/core": "^7.18.7", |
||||
"@babel/preset-env": "^7.18.7", |
||||
"@rollup/plugin-commonjs": "^22.0.2", |
||||
"acorn": "^6.4.0", |
||||
"babel-loader": "^8.2.0", |
||||
"buffer": "^6.0.3", |
||||
"cors": "^2.8.5", |
||||
"eslint": "^7.2.0", |
||||
"eslint-config-airbnb-base": "^14.2.0", |
||||
"eslint-plugin-import": "^2.22.1", |
||||
"expect.js": "^0.3.1", |
||||
"express": "^4.17.1", |
||||
"mocha": "^8.1.3", |
||||
"mocha-headless-chrome": "^2.0.3", |
||||
"npm-run-all": "^4.1.5", |
||||
"nyc": "^15.1.0", |
||||
"rimraf": "^2.7.1", |
||||
"rollup": "^2.79.0", |
||||
"wait-on": "^3.3.0", |
||||
"webpack": "^5.74.0", |
||||
"webpack-bundle-analyzer": "^4.6.0", |
||||
"webpack-cli": "^4.10.0", |
||||
"webpack-dev-middleware": "^5.3.3" |
||||
}, |
||||
"dependencies": { |
||||
"babel-eslint": "^10.1.0", |
||||
"bmp-js": "^0.1.0", |
||||
"file-type": "^12.4.1", |
||||
"idb-keyval": "^3.2.0", |
||||
"is-electron": "^2.2.0", |
||||
"is-url": "^1.2.4", |
||||
"node-fetch": "^2.6.0", |
||||
"opencollective-postinstall": "^2.0.2", |
||||
"regenerator-runtime": "^0.13.3", |
||||
"resolve-url": "^0.2.1", |
||||
"tesseract.js-core": "^3.0.2", |
||||
"wasm-feature-detect": "^1.2.11", |
||||
"zlibjs": "^0.3.1" |
||||
}, |
||||
"repository": { |
||||
"type": "git", |
||||
"url": "https://github.com/naptha/tesseract.js.git" |
||||
}, |
||||
"author": "", |
||||
"license": "ISC", |
||||
"bugs": { |
||||
"url": "https://github.com/naptha/tesseract.js/issues" |
||||
}, |
||||
"homepage": "https://github.com/naptha/tesseract.js" |
||||
"homepage": "https://github.com/naptha/tesseract.js", |
||||
"collective": { |
||||
"type": "opencollective", |
||||
"url": "https://opencollective.com/tesseractjs" |
||||
} |
||||
} |
||||
|
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
{ |
||||
"rules": { |
||||
"import/no-extraneous-dependencies": 0 |
||||
} |
||||
} |
@ -0,0 +1,13 @@
@@ -0,0 +1,13 @@
|
||||
import commonjs from "@rollup/plugin-commonjs"; |
||||
|
||||
export default [ |
||||
{ |
||||
input: "dist/tesseract.min.js", |
||||
output: { |
||||
file: "dist/tesseract.esm.min.js", |
||||
format: "esm", |
||||
banner: "/* eslint-disable */", |
||||
}, |
||||
plugins: [commonjs()], |
||||
}, |
||||
]; |
@ -0,0 +1,17 @@
@@ -0,0 +1,17 @@
|
||||
const webpack = require('webpack'); |
||||
const middleware = require('webpack-dev-middleware'); |
||||
const express = require('express'); |
||||
const path = require('path'); |
||||
const cors = require('cors'); |
||||
const webpackConfig = require('./webpack.config.dev'); |
||||
|
||||
const compiler = webpack(webpackConfig); |
||||
const app = express(); |
||||
|
||||
app.use(cors()); |
||||
app.use('/', express.static(path.resolve(__dirname, '..'))); |
||||
app.use(middleware(compiler, { publicPath: '/dist', writeToDisk: true })); |
||||
|
||||
module.exports = app.listen(3000, () => { |
||||
console.log('Server is running on the port no. 3000'); |
||||
}); |
@ -0,0 +1,9 @@
@@ -0,0 +1,9 @@
|
||||
const constants = require('../tests/constants'); |
||||
global.expect = require('expect.js'); |
||||
global.fs = require('fs'); |
||||
global.path = require('path'); |
||||
global.Tesseract = require('../src'); |
||||
|
||||
Object.keys(constants).forEach((key) => { |
||||
global[key] = constants[key]; |
||||
}); |
@ -0,0 +1,28 @@
@@ -0,0 +1,28 @@
|
||||
module.exports = { |
||||
resolve: { |
||||
fallback: { |
||||
buffer: require.resolve('buffer/'), |
||||
}, |
||||
}, |
||||
module: { |
||||
rules: [ |
||||
{ |
||||
test: /\.m?js$/, |
||||
// exclude: /(node_modules|bower_components)/,
|
||||
use: { |
||||
loader: 'babel-loader', |
||||
options: { |
||||
presets: [ |
||||
[ |
||||
'@babel/preset-env', |
||||
{ |
||||
targets: 'last 2 versions', |
||||
}, |
||||
], |
||||
], |
||||
}, |
||||
}, |
||||
}, |
||||
], |
||||
}, |
||||
}; |
@ -0,0 +1,48 @@
@@ -0,0 +1,48 @@
|
||||
const path = require('path'); |
||||
const webpack = require('webpack'); |
||||
const { BundleAnalyzerPlugin } = require('webpack-bundle-analyzer'); |
||||
const common = require('./webpack.config.common'); |
||||
|
||||
const genConfig = ({ |
||||
entry, filename, library, libraryTarget, |
||||
}) => ({ |
||||
...common, |
||||
mode: 'development', |
||||
entry, |
||||
output: { |
||||
filename, |
||||
library, |
||||
libraryTarget, |
||||
}, |
||||
plugins: [ |
||||
new webpack.ProvidePlugin({ |
||||
Buffer: ['buffer', 'Buffer'], |
||||
}), |
||||
new webpack.DefinePlugin({ |
||||
'process.env': { |
||||
TESS_ENV: JSON.stringify('development'), |
||||
}, |
||||
}), |
||||
new BundleAnalyzerPlugin({ |
||||
analyzerMode: 'disable', |
||||
statsFilename: `${filename.split('.')[0]}-stats.json`, |
||||
generateStatsFile: true |
||||
}), |
||||
], |
||||
devServer: { |
||||
allowedHosts: ['localhost', '.gitpod.io'], |
||||
}, |
||||
}); |
||||
|
||||
module.exports = [ |
||||
genConfig({ |
||||
entry: path.resolve(__dirname, '..', 'src', 'index.js'), |
||||
filename: 'tesseract.dev.js', |
||||
library: 'Tesseract', |
||||
libraryTarget: 'umd', |
||||
}), |
||||
genConfig({ |
||||
entry: path.resolve(__dirname, '..', 'src', 'worker-script', 'browser', 'index.js'), |
||||
filename: 'worker.dev.js', |
||||
}), |
||||
]; |
@ -0,0 +1,36 @@
@@ -0,0 +1,36 @@
|
||||
const path = require('path'); |
||||
const common = require('./webpack.config.common'); |
||||
const webpack = require('webpack'); |
||||
|
||||
const genConfig = ({ |
||||
entry, filename, library, libraryTarget, |
||||
}) => ({ |
||||
...common, |
||||
mode: 'production', |
||||
devtool: 'source-map', |
||||
entry, |
||||
output: { |
||||
path: path.resolve(__dirname, '..', 'dist'), |
||||
filename, |
||||
library, |
||||
libraryTarget, |
||||
}, |
||||
plugins: [ |
||||
new webpack.ProvidePlugin({ |
||||
Buffer: ['buffer', 'Buffer'], |
||||
}), |
||||
] |
||||
}); |
||||
|
||||
module.exports = [ |
||||
genConfig({ |
||||
entry: path.resolve(__dirname, '..', 'src', 'index.js'), |
||||
filename: 'tesseract.min.js', |
||||
library: 'Tesseract', |
||||
libraryTarget: 'umd', |
||||
}), |
||||
genConfig({ |
||||
entry: path.resolve(__dirname, '..', 'src', 'worker-script', 'browser', 'index.js'), |
||||
filename: 'worker.min.js', |
||||
}), |
||||
]; |
@ -0,0 +1,28 @@
@@ -0,0 +1,28 @@
|
||||
const createWorker = require('./createWorker'); |
||||
|
||||
const recognize = async (image, langs, options) => { |
||||
const worker = createWorker(options); |
||||
await worker.load(); |
||||
await worker.loadLanguage(langs); |
||||
await worker.initialize(langs); |
||||
return worker.recognize(image) |
||||
.finally(async () => { |
||||
await worker.terminate(); |
||||
}); |
||||
}; |
||||
|
||||
const detect = async (image, options) => { |
||||
const worker = createWorker(options); |
||||
await worker.load(); |
||||
await worker.loadLanguage('osd'); |
||||
await worker.initialize('osd'); |
||||
return worker.detect(image) |
||||
.finally(async () => { |
||||
await worker.terminate(); |
||||
}); |
||||
}; |
||||
|
||||
module.exports = { |
||||
recognize, |
||||
detect, |
||||
}; |
@ -0,0 +1,12 @@
@@ -0,0 +1,12 @@
|
||||
/* |
||||
* OEM = OCR Engine Mode, and there are 4 possible modes. |
||||
* |
||||
* By default tesseract.js uses LSTM_ONLY mode. |
||||
* |
||||
*/ |
||||
module.exports = { |
||||
TESSERACT_ONLY: 0, |
||||
LSTM_ONLY: 1, |
||||
TESSERACT_LSTM_COMBINED: 2, |
||||
DEFAULT: 3, |
||||
}; |
@ -0,0 +1,19 @@
@@ -0,0 +1,19 @@
|
||||
/* |
||||
* PSM = Page Segmentation Mode |
||||
*/ |
||||
module.exports = { |
||||
OSD_ONLY: '0', |
||||
AUTO_OSD: '1', |
||||
AUTO_ONLY: '2', |
||||
AUTO: '3', |
||||
SINGLE_COLUMN: '4', |
||||
SINGLE_BLOCK_VERT_TEXT: '5', |
||||
SINGLE_BLOCK: '6', |
||||
SINGLE_LINE: '7', |
||||
SINGLE_WORD: '8', |
||||
CIRCLE_WORD: '9', |
||||
SINGLE_CHAR: '10', |
||||
SPARSE_TEXT: '11', |
||||
SPARSE_TEXT_OSD: '12', |
||||
RAW_LINE: '13', |
||||
}; |
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
const OEM = require('./OEM'); |
||||
|
||||
module.exports = { |
||||
defaultOEM: OEM.DEFAULT, |
||||
}; |
@ -0,0 +1,13 @@
@@ -0,0 +1,13 @@
|
||||
module.exports = { |
||||
/* |
||||
* default path for downloading *.traineddata |
||||
*/ |
||||
langPath: 'https://tessdata.projectnaptha.com/4.0.0', |
||||
/* |
||||
* Use BlobURL for worker script by default |
||||
* TODO: remove this option |
||||
* |
||||
*/ |
||||
workerBlobURL: true, |
||||
logger: () => {}, |
||||
}; |
@ -0,0 +1,218 @@
@@ -0,0 +1,218 @@
|
||||
/* |
||||
* languages with existing tesseract traineddata |
||||
* https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016
|
||||
*/ |
||||
|
||||
/** |
||||
* @typedef {object} Languages |
||||
* @property {string} AFR Afrikaans |
||||
* @property {string} AMH Amharic |
||||
* @property {string} ARA Arabic |
||||
* @property {string} ASM Assamese |
||||
* @property {string} AZE Azerbaijani |
||||
* @property {string} AZE_CYRL Azerbaijani - Cyrillic |
||||
* @property {string} BEL Belarusian |
||||
* @property {string} BEN Bengali |
||||
* @property {string} BOD Tibetan |
||||
* @property {string} BOS Bosnian |
||||
* @property {string} BUL Bulgarian |
||||
* @property {string} CAT Catalan; Valencian |
||||
* @property {string} CEB Cebuano |
||||
* @property {string} CES Czech |
||||
* @property {string} CHI_SIM Chinese - Simplified |
||||
* @property {string} CHI_TRA Chinese - Traditional |
||||
* @property {string} CHR Cherokee |
||||
* @property {string} CYM Welsh |
||||
* @property {string} DAN Danish |
||||
* @property {string} DEU German |
||||
* @property {string} DZO Dzongkha |
||||
* @property {string} ELL Greek, Modern (1453-) |
||||
* @property {string} ENG English |
||||
* @property {string} ENM English, Middle (1100-1500) |
||||
* @property {string} EPO Esperanto |
||||
* @property {string} EST Estonian |
||||
* @property {string} EUS Basque |
||||
* @property {string} FAS Persian |
||||
* @property {string} FIN Finnish |
||||
* @property {string} FRA French |
||||
* @property {string} FRK German Fraktur |
||||
* @property {string} FRM French, Middle (ca. 1400-1600) |
||||
* @property {string} GLE Irish |
||||
* @property {string} GLG Galician |
||||
* @property {string} GRC Greek, Ancient (-1453) |
||||
* @property {string} GUJ Gujarati |
||||
* @property {string} HAT Haitian; Haitian Creole |
||||
* @property {string} HEB Hebrew |
||||
* @property {string} HIN Hindi |
||||
* @property {string} HRV Croatian |
||||
* @property {string} HUN Hungarian |
||||
* @property {string} IKU Inuktitut |
||||
* @property {string} IND Indonesian |
||||
* @property {string} ISL Icelandic |
||||
* @property {string} ITA Italian |
||||
* @property {string} ITA_OLD Italian - Old |
||||
* @property {string} JAV Javanese |
||||
* @property {string} JPN Japanese |
||||
* @property {string} KAN Kannada |
||||
* @property {string} KAT Georgian |
||||
* @property {string} KAT_OLD Georgian - Old |
||||
* @property {string} KAZ Kazakh |
||||
* @property {string} KHM Central Khmer |
||||
* @property {string} KIR Kirghiz; Kyrgyz |
||||
* @property {string} KOR Korean |
||||
* @property {string} KUR Kurdish |
||||
* @property {string} LAO Lao |
||||
* @property {string} LAT Latin |
||||
* @property {string} LAV Latvian |
||||
* @property {string} LIT Lithuanian |
||||
* @property {string} MAL Malayalam |
||||
* @property {string} MAR Marathi |
||||
* @property {string} MKD Macedonian |
||||
* @property {string} MLT Maltese |
||||
* @property {string} MSA Malay |
||||
* @property {string} MYA Burmese |
||||
* @property {string} NEP Nepali |
||||
* @property {string} NLD Dutch; Flemish |
||||
* @property {string} NOR Norwegian |
||||
* @property {string} ORI Oriya |
||||
* @property {string} PAN Panjabi; Punjabi |
||||
* @property {string} POL Polish |
||||
* @property {string} POR Portuguese |
||||
* @property {string} PUS Pushto; Pashto |
||||
* @property {string} RON Romanian; Moldavian; Moldovan |
||||
* @property {string} RUS Russian |
||||
* @property {string} SAN Sanskrit |
||||
* @property {string} SIN Sinhala; Sinhalese |
||||
* @property {string} SLK Slovak |
||||
* @property {string} SLV Slovenian |
||||
* @property {string} SPA Spanish; Castilian |
||||
* @property {string} SPA_OLD Spanish; Castilian - Old |
||||
* @property {string} SQI Albanian |
||||
* @property {string} SRP Serbian |
||||
* @property {string} SRP_LATN Serbian - Latin |
||||
* @property {string} SWA Swahili |
||||
* @property {string} SWE Swedish |
||||
* @property {string} SYR Syriac |
||||
* @property {string} TAM Tamil |
||||
* @property {string} TEL Telugu |
||||
* @property {string} TGK Tajik |
||||
* @property {string} TGL Tagalog |
||||
* @property {string} THA Thai |
||||
* @property {string} TIR Tigrinya |
||||
* @property {string} TUR Turkish |
||||
* @property {string} UIG Uighur; Uyghur |
||||
* @property {string} UKR Ukrainian |
||||
* @property {string} URD Urdu |
||||
* @property {string} UZB Uzbek |
||||
* @property {string} UZB_CYRL Uzbek - Cyrillic |
||||
* @property {string} VIE Vietnamese |
||||
* @property {string} YID Yiddish |
||||
*/ |
||||
|
||||
/** |
||||
* @type {Languages} |
||||
*/ |
||||
module.exports = { |
||||
AFR: 'afr', |
||||
AMH: 'amh', |
||||
ARA: 'ara', |
||||
ASM: 'asm', |
||||
AZE: 'aze', |
||||
AZE_CYRL: 'aze_cyrl', |
||||
BEL: 'bel', |
||||
BEN: 'ben', |
||||
BOD: 'bod', |
||||
BOS: 'bos', |
||||
BUL: 'bul', |
||||
CAT: 'cat', |
||||
CEB: 'ceb', |
||||
CES: 'ces', |
||||
CHI_SIM: 'chi_sim', |
||||
CHI_TRA: 'chi_tra', |
||||
CHR: 'chr', |
||||
CYM: 'cym', |
||||
DAN: 'dan', |
||||
DEU: 'deu', |
||||
DZO: 'dzo', |
||||
ELL: 'ell', |
||||
ENG: 'eng', |
||||
ENM: 'enm', |
||||
EPO: 'epo', |
||||
EST: 'est', |
||||
EUS: 'eus', |
||||
FAS: 'fas', |
||||
FIN: 'fin', |
||||
FRA: 'fra', |
||||
FRK: 'frk', |
||||
FRM: 'frm', |
||||
GLE: 'gle', |
||||
GLG: 'glg', |
||||
GRC: 'grc', |
||||
GUJ: 'guj', |
||||
HAT: 'hat', |
||||
HEB: 'heb', |
||||
HIN: 'hin', |
||||
HRV: 'hrv', |
||||
HUN: 'hun', |
||||
IKU: 'iku', |
||||
IND: 'ind', |
||||
ISL: 'isl', |
||||
ITA: 'ita', |
||||
ITA_OLD: 'ita_old', |
||||
JAV: 'jav', |
||||
JPN: 'jpn', |
||||
KAN: 'kan', |
||||
KAT: 'kat', |
||||
KAT_OLD: 'kat_old', |
||||
KAZ: 'kaz', |
||||
KHM: 'khm', |
||||
KIR: 'kir', |
||||
KOR: 'kor', |
||||
KUR: 'kur', |
||||
LAO: 'lao', |
||||
LAT: 'lat', |
||||
LAV: 'lav', |
||||
LIT: 'lit', |
||||
MAL: 'mal', |
||||
MAR: 'mar', |
||||
MKD: 'mkd', |
||||
MLT: 'mlt', |
||||
MSA: 'msa', |
||||
MYA: 'mya', |
||||
NEP: 'nep', |
||||
NLD: 'nld', |
||||
NOR: 'nor', |
||||
ORI: 'ori', |
||||
PAN: 'pan', |
||||
POL: 'pol', |
||||
POR: 'por', |
||||
PUS: 'pus', |
||||
RON: 'ron', |
||||
RUS: 'rus', |
||||
SAN: 'san', |
||||
SIN: 'sin', |
||||
SLK: 'slk', |
||||
SLV: 'slv', |
||||
SPA: 'spa', |
||||
SPA_OLD: 'spa_old', |
||||
SQI: 'sqi', |
||||
SRP: 'srp', |
||||
SRP_LATN: 'srp_latn', |
||||
SWA: 'swa', |
||||
SWE: 'swe', |
||||
SYR: 'syr', |
||||
TAM: 'tam', |
||||
TEL: 'tel', |
||||
TGK: 'tgk', |
||||
TGL: 'tgl', |
||||
THA: 'tha', |
||||
TIR: 'tir', |
||||
TUR: 'tur', |
||||
UIG: 'uig', |
||||
UKR: 'ukr', |
||||
URD: 'urd', |
||||
UZB: 'uzb', |
||||
UZB_CYRL: 'uzb_cyrl', |
||||
VIE: 'vie', |
||||
YID: 'yid', |
||||
}; |
@ -0,0 +1,21 @@
@@ -0,0 +1,21 @@
|
||||
const getId = require('./utils/getId'); |
||||
|
||||
let jobCounter = 0; |
||||
|
||||
module.exports = ({ |
||||
id: _id, |
||||
action, |
||||
payload = {}, |
||||
}) => { |
||||
let id = _id; |
||||
if (typeof id === 'undefined') { |
||||
id = getId('Job', jobCounter); |
||||
jobCounter += 1; |
||||
} |
||||
|
||||
return { |
||||
id, |
||||
action, |
||||
payload, |
||||
}; |
||||
}; |
@ -0,0 +1,80 @@
@@ -0,0 +1,80 @@
|
||||
const createJob = require('./createJob'); |
||||
const { log } = require('./utils/log'); |
||||
const getId = require('./utils/getId'); |
||||
|
||||
let schedulerCounter = 0; |
||||
|
||||
module.exports = () => { |
||||
const id = getId('Scheduler', schedulerCounter); |
||||
const workers = {}; |
||||
const runningWorkers = {}; |
||||
let jobQueue = []; |
||||
|
||||
schedulerCounter += 1; |
||||
|
||||
const getQueueLen = () => jobQueue.length; |
||||
const getNumWorkers = () => Object.keys(workers).length; |
||||
|
||||
const dequeue = () => { |
||||
if (jobQueue.length !== 0) { |
||||
const wIds = Object.keys(workers); |
||||
for (let i = 0; i < wIds.length; i += 1) { |
||||
if (typeof runningWorkers[wIds[i]] === 'undefined') { |
||||
jobQueue[0](workers[wIds[i]]); |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
}; |
||||
|
||||
const queue = (action, payload) => ( |
||||
new Promise((resolve, reject) => { |
||||
const job = createJob({ action, payload }); |
||||
jobQueue.push(async (w) => { |
||||
jobQueue.shift(); |
||||
runningWorkers[w.id] = job; |
||||
try { |
||||
resolve(await w[action].apply(this, [...payload, job.id])); |
||||
} catch (err) { |
||||
reject(err); |
||||
} finally { |
||||
delete runningWorkers[w.id]; |
||||
dequeue(); |
||||
} |
||||
}); |
||||
log(`[${id}]: Add ${job.id} to JobQueue`); |
||||
log(`[${id}]: JobQueue length=${jobQueue.length}`); |
||||
dequeue(); |
||||
}) |
||||
); |
||||
|
||||
const addWorker = (w) => { |
||||
workers[w.id] = w; |
||||
log(`[${id}]: Add ${w.id}`); |
||||
log(`[${id}]: Number of workers=${getNumWorkers()}`); |
||||
dequeue(); |
||||
return w.id; |
||||
}; |
||||
|
||||
const addJob = async (action, ...payload) => { |
||||
if (getNumWorkers() === 0) { |
||||
throw Error(`[${id}]: You need to have at least one worker before adding jobs`); |
||||
} |
||||
return queue(action, payload); |
||||
}; |
||||
|
||||
const terminate = async () => { |
||||
Object.keys(workers).forEach(async (wid) => { |
||||
await workers[wid].terminate(); |
||||
}); |
||||
jobQueue = []; |
||||
}; |
||||
|
||||
return { |
||||
addWorker, |
||||
addJob, |
||||
terminate, |
||||
getQueueLen, |
||||
getNumWorkers, |
||||
}; |
||||
}; |
@ -0,0 +1,198 @@
@@ -0,0 +1,198 @@
|
||||
const resolvePaths = require('./utils/resolvePaths'); |
||||
const circularize = require('./utils/circularize'); |
||||
const createJob = require('./createJob'); |
||||
const { log } = require('./utils/log'); |
||||
const getId = require('./utils/getId'); |
||||
const { defaultOEM } = require('./constants/config'); |
||||
const { |
||||
defaultOptions, |
||||
spawnWorker, |
||||
terminateWorker, |
||||
onMessage, |
||||
loadImage, |
||||
send, |
||||
} = require('./worker/node'); |
||||
|
||||
let workerCounter = 0; |
||||
|
||||
module.exports = (_options = {}) => { |
||||
const id = getId('Worker', workerCounter); |
||||
const { |
||||
logger, |
||||
errorHandler, |
||||
...options |
||||
} = resolvePaths({ |
||||
...defaultOptions, |
||||
..._options, |
||||
}); |
||||
const resolves = {}; |
||||
const rejects = {}; |
||||
let worker = spawnWorker(options); |
||||
|
||||
workerCounter += 1; |
||||
|
||||
const setResolve = (action, res) => { |
||||
resolves[action] = res; |
||||
}; |
||||
|
||||
const setReject = (action, rej) => { |
||||
rejects[action] = rej; |
||||
}; |
||||
|
||||
const startJob = ({ id: jobId, action, payload }) => ( |
||||
new Promise((resolve, reject) => { |
||||
log(`[${id}]: Start ${jobId}, action=${action}`); |
||||
setResolve(action, resolve); |
||||
setReject(action, reject); |
||||
send(worker, { |
||||
workerId: id, |
||||
jobId, |
||||
action, |
||||
payload, |
||||
}); |
||||
}) |
||||
); |
||||
|
||||
const load = (jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, action: 'load', payload: { options }, |
||||
})) |
||||
); |
||||
|
||||
const writeText = (path, text, jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, |
||||
action: 'FS', |
||||
payload: { method: 'writeFile', args: [path, text] }, |
||||
})) |
||||
); |
||||
|
||||
const readText = (path, jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, |
||||
action: 'FS', |
||||
payload: { method: 'readFile', args: [path, { encoding: 'utf8' }] }, |
||||
})) |
||||
); |
||||
|
||||
const removeFile = (path, jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, |
||||
action: 'FS', |
||||
payload: { method: 'unlink', args: [path] }, |
||||
})) |
||||
); |
||||
|
||||
const FS = (method, args, jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, |
||||
action: 'FS', |
||||
payload: { method, args }, |
||||
})) |
||||
); |
||||
|
||||
const loadLanguage = (langs = 'eng', jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, |
||||
action: 'loadLanguage', |
||||
payload: { langs, options }, |
||||
})) |
||||
); |
||||
|
||||
const initialize = (langs = 'eng', oem = defaultOEM, jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, |
||||
action: 'initialize', |
||||
payload: { langs, oem }, |
||||
})) |
||||
); |
||||
|
||||
const setParameters = (params = {}, jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, |
||||
action: 'setParameters', |
||||
payload: { params }, |
||||
})) |
||||
); |
||||
|
||||
const recognize = async (image, opts = {}, jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, |
||||
action: 'recognize', |
||||
payload: { image: await loadImage(image), options: opts }, |
||||
})) |
||||
); |
||||
|
||||
const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, |
||||
action: 'getPDF', |
||||
payload: { title, textonly }, |
||||
})) |
||||
); |
||||
|
||||
const detect = async (image, jobId) => ( |
||||
startJob(createJob({ |
||||
id: jobId, |
||||
action: 'detect', |
||||
payload: { image: await loadImage(image) }, |
||||
})) |
||||
); |
||||
|
||||
const terminate = async () => { |
||||
if (worker !== null) { |
||||
/* |
||||
await startJob(createJob({ |
||||
id: jobId, |
||||
action: 'terminate', |
||||
})); |
||||
*/ |
||||
terminateWorker(worker); |
||||
worker = null; |
||||
} |
||||
return Promise.resolve(); |
||||
}; |
||||
|
||||
onMessage(worker, ({ |
||||
workerId, jobId, status, action, data, |
||||
}) => { |
||||
if (status === 'resolve') { |
||||
log(`[${workerId}]: Complete ${jobId}`); |
||||
let d = data; |
||||
if (action === 'recognize') { |
||||
d = circularize(data); |
||||
} else if (action === 'getPDF') { |
||||
d = Array.from({ ...data, length: Object.keys(data).length }); |
||||
} |
||||
resolves[action]({ jobId, data: d }); |
||||
} else if (status === 'reject') { |
||||
rejects[action](data); |
||||
if (errorHandler) { |
||||
errorHandler(data); |
||||
} else { |
||||
throw Error(data); |
||||
} |
||||
} else if (status === 'progress') { |
||||
logger({ ...data, userJobId: jobId }); |
||||
} |
||||
}); |
||||
|
||||
return { |
||||
id, |
||||
worker, |
||||
setResolve, |
||||
setReject, |
||||
load, |
||||
writeText, |
||||
readText, |
||||
removeFile, |
||||
FS, |
||||
loadLanguage, |
||||
initialize, |
||||
setParameters, |
||||
recognize, |
||||
getPDF, |
||||
detect, |
||||
terminate, |
||||
}; |
||||
}; |
@ -0,0 +1,231 @@
@@ -0,0 +1,231 @@
|
||||
declare namespace Tesseract { |
||||
function createScheduler(): Scheduler |
||||
function createWorker(options?: Partial<WorkerOptions>): Worker |
||||
function setLogging(logging: boolean): void |
||||
function recognize(image: ImageLike, langs?: string, options?: Partial<WorkerOptions>): Promise<RecognizeResult> |
||||
function detect(image: ImageLike, options?: Partial<WorkerOptions>): any |
||||
|
||||
interface Scheduler { |
||||
addWorker(worker: Worker): string |
||||
addJob(action: string, ...args: any[]): Promise<ConfigResult | RecognizeResult | DetectResult> |
||||
terminate(): Promise<any> |
||||
getQueueLen(): number |
||||
getNumWorkers(): number |
||||
} |
||||
|
||||
interface Worker { |
||||
load(jobId?: string): Promise<ConfigResult> |
||||
writeText(path: string, text: string, jobId?: string): Promise<ConfigResult> |
||||
readText(path: string, jobId?: string): Promise<ConfigResult> |
||||
removeText(path: string, jobId?: string): Promise<ConfigResult> |
||||
FS(method: string, args: any[], jobId?: string): Promise<ConfigResult> |
||||
loadLanguage(langs?: string | Lang[], jobId?: string): Promise<ConfigResult> |
||||
initialize(langs?: string | Lang[], oem?: OEM, jobId?: string): Promise<ConfigResult> |
||||
setParameters(params: Partial<WorkerParams>, jobId?: string): Promise<ConfigResult> |
||||
recognize(image: ImageLike, options?: Partial<RecognizeOptions>, jobId?: string): Promise<RecognizeResult> |
||||
detect(image: ImageLike, jobId?: string): Promise<DetectResult> |
||||
terminate(jobId?: string): Promise<ConfigResult> |
||||
getPDF(title?: string, textonly?: boolean, jobId?: string):Promise<GetPDFResult> |
||||
} |
||||
|
||||
interface Lang { |
||||
code: string; |
||||
data: unknown; |
||||
} |
||||
|
||||
interface WorkerOptions { |
||||
corePath: string |
||||
langPath: string |
||||
cachePath: string |
||||
dataPath: string |
||||
workerPath: string |
||||
cacheMethod: string |
||||
workerBlobURL: boolean |
||||
gzip: boolean |
||||
logger: (arg: any) => void, |
||||
errorHandler: (arg: any) => void |
||||
} |
||||
interface WorkerParams { |
||||
tessedit_ocr_engine_mode: OEM |
||||
tessedit_pageseg_mode: PSM |
||||
tessedit_char_whitelist: string |
||||
preserve_interword_spaces: string |
||||
user_defined_dpi: string |
||||
tessjs_create_hocr: string |
||||
tessjs_create_tsv: string |
||||
tessjs_create_box: string |
||||
tessjs_create_unlv: string |
||||
tessjs_create_osd: string |
||||
} |
||||
interface RecognizeOptions { |
||||
rectangle: Rectangle |
||||
} |
||||
interface ConfigResult { |
||||
jobId: string |
||||
data: any |
||||
} |
||||
interface RecognizeResult { |
||||
jobId: string |
||||
data: Page |
||||
} |
||||
interface GetPDFResult { |
||||
jobId: string |
||||
data: number[] |
||||
} |
||||
interface DetectResult { |
||||
jobId: string |
||||
data: DetectData |
||||
} |
||||
interface DetectData { |
||||
tesseract_script_id: number |
||||
script: string |
||||
script_confidence: number |
||||
orientation_degrees: number |
||||
orientation_confidence: number |
||||
} |
||||
interface Rectangle { |
||||
left: number |
||||
top: number |
||||
width: number |
||||
height: number |
||||
} |
||||
enum OEM { |
||||
TESSERACT_ONLY, |
||||
LSTM_ONLY, |
||||
TESSERACT_LSTM_COMBINED, |
||||
DEFAULT, |
||||
} |
||||
enum PSM { |
||||
OSD_ONLY = '0', |
||||
AUTO_OSD = '1', |
||||
AUTO_ONLY = '2', |
||||
AUTO = '3', |
||||
SINGLE_COLUMN = '4', |
||||
SINGLE_BLOCK_VERT_TEXT = '5', |
||||
SINGLE_BLOCK = '6', |
||||
SINGLE_LINE = '7', |
||||
SINGLE_WORD = '8', |
||||
CIRCLE_WORD = '9', |
||||
SINGLE_CHAR = '10', |
||||
SPARSE_TEXT = '11', |
||||
SPARSE_TEXT_OSD = '12', |
||||
RAW_LINE = '13' |
||||
} |
||||
type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement |
||||
| CanvasRenderingContext2D | File | Blob | ImageData | Buffer; |
||||
interface Block { |
||||
paragraphs: Paragraph[]; |
||||
text: string; |
||||
confidence: number; |
||||
baseline: Baseline; |
||||
bbox: Bbox; |
||||
blocktype: string; |
||||
polygon: any; |
||||
page: Page; |
||||
lines: Line[]; |
||||
words: Word[]; |
||||
symbols: Symbol[]; |
||||
} |
||||
interface Baseline { |
||||
x0: number; |
||||
y0: number; |
||||
x1: number; |
||||
y1: number; |
||||
has_baseline: boolean; |
||||
} |
||||
interface Bbox { |
||||
x0: number; |
||||
y0: number; |
||||
x1: number; |
||||
y1: number; |
||||
} |
||||
interface Line { |
||||
words: Word[]; |
||||
text: string; |
||||
confidence: number; |
||||
baseline: Baseline; |
||||
bbox: Bbox; |
||||
paragraph: Paragraph; |
||||
block: Block; |
||||
page: Page; |
||||
symbols: Symbol[]; |
||||
} |
||||
interface Paragraph { |
||||
lines: Line[]; |
||||
text: string; |
||||
confidence: number; |
||||
baseline: Baseline; |
||||
bbox: Bbox; |
||||
is_ltr: boolean; |
||||
block: Block; |
||||
page: Page; |
||||
words: Word[]; |
||||
symbols: Symbol[]; |
||||
} |
||||
interface Symbol { |
||||
choices: Choice[]; |
||||
image: any; |
||||
text: string; |
||||
confidence: number; |
||||
baseline: Baseline; |
||||
bbox: Bbox; |
||||
is_superscript: boolean; |
||||
is_subscript: boolean; |
||||
is_dropcap: boolean; |
||||
word: Word; |
||||
line: Line; |
||||
paragraph: Paragraph; |
||||
block: Block; |
||||
page: Page; |
||||
} |
||||
interface Choice { |
||||
text: string; |
||||
confidence: number; |
||||
} |
||||
interface Word { |
||||
symbols: Symbol[]; |
||||
choices: Choice[]; |
||||
text: string; |
||||
confidence: number; |
||||
baseline: Baseline; |
||||
bbox: Bbox; |
||||
is_numeric: boolean; |
||||
in_dictionary: boolean; |
||||
direction: string; |
||||
language: string; |
||||
is_bold: boolean; |
||||
is_italic: boolean; |
||||
is_underlined: boolean; |
||||
is_monospace: boolean; |
||||
is_serif: boolean; |
||||
is_smallcaps: boolean; |
||||
font_size: number; |
||||
font_id: number; |
||||
font_name: string; |
||||
line: Line; |
||||
paragraph: Paragraph; |
||||
block: Block; |
||||
page: Page; |
||||
} |
||||
interface Page { |
||||
blocks: Block[]; |
||||
confidence: number; |
||||
lines: Line[]; |
||||
oem: string; |
||||
osd: string; |
||||
paragraphs: Paragraph[]; |
||||
psm: string; |
||||
symbols: Symbol[]; |
||||
text: string; |
||||
version: string; |
||||
words: Word[]; |
||||
hocr: string | null; |
||||
tsv: string | null; |
||||
box: string | null; |
||||
unlv: string | null; |
||||
sd: string | null; |
||||
} |
||||
} |
||||
|
||||
export = Tesseract; |
||||
export as namespace Tesseract; |
@ -0,0 +1,27 @@
@@ -0,0 +1,27 @@
|
||||
/** |
||||
* |
||||
* Entry point for tesseract.js, should be the entry when bundling. |
||||
* |
||||
* @fileoverview entry point for tesseract.js |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
require('regenerator-runtime/runtime'); |
||||
const createScheduler = require('./createScheduler'); |
||||
const createWorker = require('./createWorker'); |
||||
const Tesseract = require('./Tesseract'); |
||||
const languages = require('./constants/languages'); |
||||
const OEM = require('./constants/OEM'); |
||||
const PSM = require('./constants/PSM'); |
||||
const { setLogging } = require('./utils/log'); |
||||
|
||||
module.exports = { |
||||
languages, |
||||
OEM, |
||||
PSM, |
||||
createScheduler, |
||||
createWorker, |
||||
setLogging, |
||||
...Tesseract, |
||||
}; |
@ -0,0 +1,54 @@
@@ -0,0 +1,54 @@
|
||||
/** |
||||
* In the recognition result of tesseract, there |
||||
* is a deep JSON object for details, it has around |
||||
* |
||||
* The result of dump.js is a big JSON tree |
||||
* which can be easily serialized (for instance |
||||
* to be sent from a webworker to the main app |
||||
* or through Node's IPC), but we want |
||||
* a (circular) DOM-like interface for walking |
||||
* through the data. |
||||
* |
||||
* @fileoverview DOM-like interface for walking through data |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
|
||||
module.exports = (page) => { |
||||
const blocks = []; |
||||
const paragraphs = []; |
||||
const lines = []; |
||||
const words = []; |
||||
const symbols = []; |
||||
|
||||
page.blocks.forEach((block) => { |
||||
block.paragraphs.forEach((paragraph) => { |
||||
paragraph.lines.forEach((line) => { |
||||
line.words.forEach((word) => { |
||||
word.symbols.forEach((sym) => { |
||||
symbols.push({ |
||||
...sym, page, block, paragraph, line, word, |
||||
}); |
||||
}); |
||||
words.push({ |
||||
...word, page, block, paragraph, line, |
||||
}); |
||||
}); |
||||
lines.push({ |
||||
...line, page, block, paragraph, |
||||
}); |
||||
}); |
||||
paragraphs.push({ |
||||
...paragraph, page, block, |
||||
}); |
||||
}); |
||||
blocks.push({ |
||||
...block, page, |
||||
}); |
||||
}); |
||||
|
||||
return { |
||||
...page, blocks, paragraphs, lines, words, symbols, |
||||
}; |
||||
}; |
@ -0,0 +1,21 @@
@@ -0,0 +1,21 @@
|
||||
const isElectron = require('is-electron'); |
||||
|
||||
module.exports = (key) => { |
||||
const env = {}; |
||||
|
||||
if (typeof WorkerGlobalScope !== 'undefined') { |
||||
env.type = 'webworker'; |
||||
} else if (isElectron()) { |
||||
env.type = 'electron'; |
||||
} else if (typeof window === 'object') { |
||||
env.type = 'browser'; |
||||
} else if (typeof process === 'object' && typeof require === 'function') { |
||||
env.type = 'node'; |
||||
} |
||||
|
||||
if (typeof key === 'undefined') { |
||||
return env; |
||||
} |
||||
|
||||
return env[key]; |
||||
}; |
@ -0,0 +1,3 @@
@@ -0,0 +1,3 @@
|
||||
module.exports = (prefix, cnt) => ( |
||||
`${prefix}-${cnt}-${Math.random().toString(16).slice(3, 8)}` |
||||
); |
@ -0,0 +1,9 @@
@@ -0,0 +1,9 @@
|
||||
let logging = false; |
||||
|
||||
exports.logging = logging; |
||||
|
||||
exports.setLogging = (_logging) => { |
||||
logging = _logging; |
||||
}; |
||||
|
||||
exports.log = (...args) => (logging ? console.log.apply(this, args) : null); |
@ -0,0 +1,12 @@
@@ -0,0 +1,12 @@
|
||||
const isBrowser = require('./getEnvironment')('type') === 'browser'; |
||||
const resolveURL = isBrowser ? require('resolve-url') : s => s; // eslint-disable-line
|
||||
|
||||
module.exports = (options) => { |
||||
const opts = { ...options }; |
||||
['corePath', 'workerPath', 'langPath'].forEach((key) => { |
||||
if (options[key]) { |
||||
opts[key] = resolveURL(opts[key]); |
||||
} |
||||
}); |
||||
return opts; |
||||
}; |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
const { set, get, del } = require('idb-keyval'); |
||||
|
||||
module.exports = { |
||||
readCache: get, |
||||
writeCache: set, |
||||
deleteCache: del, |
||||
checkCache: (path) => ( |
||||
get(path).then((v) => typeof v !== 'undefined') |
||||
), |
||||
}; |
@ -0,0 +1,30 @@
@@ -0,0 +1,30 @@
|
||||
const { simd } = require('wasm-feature-detect'); |
||||
const { dependencies } = require('../../../package.json'); |
||||
|
||||
module.exports = async (corePath, res) => { |
||||
if (typeof global.TesseractCore === 'undefined') { |
||||
res.progress({ status: 'loading tesseract core', progress: 0 }); |
||||
|
||||
// If the user specifies a core path, we use that
|
||||
// Otherwise, we detect the correct core based on SIMD support
|
||||
let corePathImport = corePath; |
||||
if (!corePathImport) { |
||||
const simdSupport = await simd(); |
||||
if (simdSupport) { |
||||
corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core-simd.wasm.js`; |
||||
} else { |
||||
corePathImport = `https://unpkg.com/tesseract.js-core@v${dependencies['tesseract.js-core'].substring(1)}/tesseract-core.wasm.js`; |
||||
} |
||||
} |
||||
|
||||
global.importScripts(corePathImport); |
||||
|
||||
if (typeof global.TesseractCoreWASM !== 'undefined' && typeof WebAssembly === 'object') { |
||||
global.TesseractCore = global.TesseractCoreWASM; |
||||
} else { |
||||
throw Error('Failed to load TesseractCore'); |
||||
} |
||||
res.progress({ status: 'loading tesseract core', progress: 1 }); |
||||
} |
||||
return global.TesseractCore; |
||||
}; |
@ -0,0 +1 @@
@@ -0,0 +1 @@
|
||||
module.exports = require('zlibjs').gunzipSync; |
@ -0,0 +1,32 @@
@@ -0,0 +1,32 @@
|
||||
/** |
||||
* |
||||
* Browser worker scripts |
||||
* |
||||
* @fileoverview Browser worker implementation |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
|
||||
const worker = require('..'); |
||||
const getCore = require('./getCore'); |
||||
const gunzip = require('./gunzip'); |
||||
const cache = require('./cache'); |
||||
|
||||
/* |
||||
* register message handler |
||||
*/ |
||||
global.addEventListener('message', ({ data }) => { |
||||
worker.dispatchHandlers(data, (obj) => postMessage(obj)); |
||||
}); |
||||
|
||||
/* |
||||
* getCore is a sync function to load and return |
||||
* TesseractCore. |
||||
*/ |
||||
worker.setAdapter({ |
||||
getCore, |
||||
gunzip, |
||||
fetch: () => {}, |
||||
...cache, |
||||
}); |
@ -0,0 +1,14 @@
@@ -0,0 +1,14 @@
|
||||
/* |
||||
* default params for tesseract.js |
||||
*/ |
||||
const PSM = require('../../constants/PSM'); |
||||
|
||||
module.exports = { |
||||
tessedit_pageseg_mode: PSM.SINGLE_BLOCK, |
||||
tessedit_char_whitelist: '', |
||||
tessjs_create_hocr: '1', |
||||
tessjs_create_tsv: '1', |
||||
tessjs_create_box: '0', |
||||
tessjs_create_unlv: '0', |
||||
tessjs_create_osd: '0', |
||||
}; |
@ -0,0 +1,313 @@
@@ -0,0 +1,313 @@
|
||||
/** |
||||
* |
||||
* Worker script for browser and node |
||||
* |
||||
* @fileoverview Worker script for browser and node |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
require('regenerator-runtime/runtime'); |
||||
const fileType = require('file-type'); |
||||
const isURL = require('is-url'); |
||||
const dump = require('./utils/dump'); |
||||
const isWebWorker = require('../utils/getEnvironment')('type') === 'webworker'; |
||||
const setImage = require('./utils/setImage'); |
||||
const defaultParams = require('./constants/defaultParams'); |
||||
const { log, setLogging } = require('../utils/log'); |
||||
|
||||
/* |
||||
* Tesseract Module returned by TesseractCore. |
||||
*/ |
||||
let TessModule; |
||||
/* |
||||
* TessearctBaseAPI instance |
||||
*/ |
||||
let api = null; |
||||
let latestJob; |
||||
let adapter = {}; |
||||
let params = defaultParams; |
||||
|
||||
const load = async ({ workerId, jobId, payload: { options: { corePath, logging } } }, res) => { |
||||
setLogging(logging); |
||||
if (!TessModule) { |
||||
const Core = await adapter.getCore(corePath, res); |
||||
|
||||
res.progress({ workerId, status: 'initializing tesseract', progress: 0 }); |
||||
|
||||
Core({ |
||||
TesseractProgress(percent) { |
||||
latestJob.progress({ |
||||
workerId, |
||||
jobId, |
||||
status: 'recognizing text', |
||||
progress: Math.max(0, (percent - 30) / 70), |
||||
}); |
||||
}, |
||||
}).then((tessModule) => { |
||||
TessModule = tessModule; |
||||
res.progress({ workerId, status: 'initialized tesseract', progress: 1 }); |
||||
res.resolve({ loaded: true }); |
||||
}); |
||||
} else { |
||||
res.resolve({ loaded: true }); |
||||
} |
||||
}; |
||||
|
||||
const FS = ({ workerId, payload: { method, args } }, res) => { |
||||
log(`[${workerId}]: FS.${method} with args ${args}`); |
||||
res.resolve(TessModule.FS[method](...args)); |
||||
}; |
||||
|
||||
const loadLanguage = async ({ |
||||
workerId, |
||||
payload: { |
||||
langs, |
||||
options: { |
||||
langPath, |
||||
dataPath, |
||||
cachePath, |
||||
cacheMethod, |
||||
gzip = true, |
||||
}, |
||||
}, |
||||
}, |
||||
res) => { |
||||
const loadAndGunzipFile = async (_lang) => { |
||||
const lang = typeof _lang === 'string' ? _lang : _lang.code; |
||||
const readCache = ['refresh', 'none'].includes(cacheMethod) |
||||
? () => Promise.resolve() |
||||
: adapter.readCache; |
||||
let data = null; |
||||
|
||||
try { |
||||
const _data = await readCache(`${cachePath || '.'}/${lang}.traineddata`); |
||||
if (typeof _data !== 'undefined') { |
||||
log(`[${workerId}]: Load ${lang}.traineddata from cache`); |
||||
res.progress({ workerId, status: 'loading language traineddata (from cache)', progress: 0.5 }); |
||||
data = _data; |
||||
} else { |
||||
throw Error('Not found in cache'); |
||||
} |
||||
} catch (e) { |
||||
log(`[${workerId}]: Load ${lang}.traineddata from ${langPath}`); |
||||
if (typeof _lang === 'string') { |
||||
let path = null; |
||||
|
||||
if (isURL(langPath) || langPath.startsWith('moz-extension://') || langPath.startsWith('chrome-extension://') || langPath.startsWith('file://')) { /** When langPath is an URL */ |
||||
path = langPath; |
||||
} |
||||
|
||||
if (path !== null) { |
||||
const fetchUrl = `${path}/${lang}.traineddata${gzip ? '.gz' : ''}`; |
||||
const resp = await (isWebWorker ? fetch : adapter.fetch)(fetchUrl); |
||||
if (!resp.ok) { |
||||
throw Error(`Network error while fetching ${fetchUrl}. Response code: ${resp.status}`); |
||||
} |
||||
data = await resp.arrayBuffer(); |
||||
} else { |
||||
data = await adapter.readCache(`${langPath}/${lang}.traineddata${gzip ? '.gz' : ''}`); |
||||
} |
||||
} else { |
||||
data = _lang.data; // eslint-disable-line
|
||||
} |
||||
} |
||||
|
||||
data = new Uint8Array(data); |
||||
|
||||
const type = fileType(data); |
||||
if (typeof type !== 'undefined' && type.mime === 'application/gzip') { |
||||
data = adapter.gunzip(data); |
||||
} |
||||
|
||||
if (TessModule) { |
||||
if (dataPath) { |
||||
try { |
||||
TessModule.FS.mkdir(dataPath); |
||||
} catch (err) { |
||||
res.reject(err.toString()); |
||||
} |
||||
} |
||||
TessModule.FS.writeFile(`${dataPath || '.'}/${lang}.traineddata`, data); |
||||
} |
||||
|
||||
if (['write', 'refresh', undefined].includes(cacheMethod)) { |
||||
await adapter.writeCache(`${cachePath || '.'}/${lang}.traineddata`, data); |
||||
} |
||||
|
||||
return Promise.resolve(data); |
||||
}; |
||||
|
||||
res.progress({ workerId, status: 'loading language traineddata', progress: 0 }); |
||||
try { |
||||
await Promise.all((typeof langs === 'string' ? langs.split('+') : langs).map(loadAndGunzipFile)); |
||||
res.progress({ workerId, status: 'loaded language traineddata', progress: 1 }); |
||||
res.resolve(langs); |
||||
} catch (err) { |
||||
res.reject(err.toString()); |
||||
} |
||||
}; |
||||
|
||||
const setParameters = ({ payload: { params: _params } }, res) => { |
||||
Object.keys(_params) |
||||
.filter((k) => !k.startsWith('tessjs_')) |
||||
.forEach((key) => { |
||||
api.SetVariable(key, _params[key]); |
||||
}); |
||||
params = { ...params, ..._params }; |
||||
|
||||
if (typeof res !== 'undefined') { |
||||
res.resolve(params); |
||||
} |
||||
}; |
||||
|
||||
const initialize = ({ |
||||
workerId, |
||||
payload: { langs: _langs, oem }, |
||||
}, res) => { |
||||
const langs = (typeof _langs === 'string') |
||||
? _langs |
||||
: _langs.map((l) => ((typeof l === 'string') ? l : l.data)).join('+'); |
||||
|
||||
try { |
||||
res.progress({ |
||||
workerId, status: 'initializing api', progress: 0, |
||||
}); |
||||
if (api !== null) { |
||||
api.End(); |
||||
} |
||||
api = new TessModule.TessBaseAPI(); |
||||
const status = api.Init(null, langs, oem); |
||||
if (status === -1) { |
||||
res.reject('initialization failed'); |
||||
} |
||||
params = defaultParams; |
||||
setParameters({ payload: { params } }); |
||||
res.progress({ |
||||
workerId, status: 'initialized api', progress: 1, |
||||
}); |
||||
res.resolve(); |
||||
} catch (err) { |
||||
res.reject(err.toString()); |
||||
} |
||||
}; |
||||
|
||||
const recognize = ({ payload: { image, options: { rectangle: rec } } }, res) => { |
||||
try { |
||||
const ptr = setImage(TessModule, api, image); |
||||
if (typeof rec === 'object') { |
||||
api.SetRectangle(rec.left, rec.top, rec.width, rec.height); |
||||
} |
||||
api.Recognize(null); |
||||
res.resolve(dump(TessModule, api, params)); |
||||
TessModule._free(ptr); |
||||
} catch (err) { |
||||
res.reject(err.toString()); |
||||
} |
||||
}; |
||||
|
||||
const getPDF = ({ payload: { title, textonly } }, res) => { |
||||
const pdfRenderer = new TessModule.TessPDFRenderer('tesseract-ocr', '/', textonly); |
||||
pdfRenderer.BeginDocument(title); |
||||
pdfRenderer.AddImage(api); |
||||
pdfRenderer.EndDocument(); |
||||
TessModule._free(pdfRenderer); |
||||
|
||||
res.resolve(TessModule.FS.readFile('/tesseract-ocr.pdf')); |
||||
}; |
||||
|
||||
const detect = ({ payload: { image } }, res) => { |
||||
try { |
||||
const ptr = setImage(TessModule, api, image); |
||||
const results = new TessModule.OSResults(); |
||||
|
||||
if (!api.DetectOS(results)) { |
||||
api.End(); |
||||
TessModule._free(ptr); |
||||
res.reject('Failed to detect OS'); |
||||
} else { |
||||
const best = results.best_result; |
||||
const oid = best.orientation_id; |
||||
const sid = best.script_id; |
||||
|
||||
TessModule._free(ptr); |
||||
|
||||
res.resolve({ |
||||
tesseract_script_id: sid, |
||||
script: results.unicharset.get_script_from_script_id(sid), |
||||
script_confidence: best.sconfidence, |
||||
orientation_degrees: [0, 270, 180, 90][oid], |
||||
orientation_confidence: best.oconfidence, |
||||
}); |
||||
} |
||||
} catch (err) { |
||||
res.reject(err.toString()); |
||||
} |
||||
}; |
||||
|
||||
const terminate = (_, res) => { |
||||
try { |
||||
if (api !== null) { |
||||
api.End(); |
||||
} |
||||
res.resolve({ terminated: true }); |
||||
} catch (err) { |
||||
res.reject(err.toString()); |
||||
} |
||||
}; |
||||
|
||||
/** |
||||
* dispatchHandlers |
||||
* |
||||
* @name dispatchHandlers |
||||
* @function worker data handler |
||||
* @access public |
||||
* @param {object} data |
||||
* @param {string} data.jobId - unique job id |
||||
* @param {string} data.action - action of the job, only recognize and detect for now |
||||
* @param {object} data.payload - data for the job |
||||
* @param {function} send - trigger job to work |
||||
*/ |
||||
exports.dispatchHandlers = (packet, send) => { |
||||
const res = (status, data) => { |
||||
send({ |
||||
...packet, |
||||
status, |
||||
data, |
||||
}); |
||||
}; |
||||
res.resolve = res.bind(this, 'resolve'); |
||||
res.reject = res.bind(this, 'reject'); |
||||
res.progress = res.bind(this, 'progress'); |
||||
|
||||
latestJob = res; |
||||
|
||||
try { |
||||
({ |
||||
load, |
||||
FS, |
||||
loadLanguage, |
||||
initialize, |
||||
setParameters, |
||||
recognize, |
||||
getPDF, |
||||
detect, |
||||
terminate, |
||||
})[packet.action](packet, res); |
||||
} catch (err) { |
||||
/** Prepare exception to travel through postMessage */ |
||||
res.reject(err.toString()); |
||||
} |
||||
}; |
||||
|
||||
/** |
||||
* setAdapter |
||||
* |
||||
* @name setAdapter |
||||
* @function |
||||
* @access public |
||||
* @param {object} adapter - implementation of the worker, different in browser and node environment |
||||
*/ |
||||
exports.setAdapter = (_adapter) => { |
||||
adapter = _adapter; |
||||
}; |
@ -0,0 +1,16 @@
@@ -0,0 +1,16 @@
|
||||
const util = require('util'); |
||||
const fs = require('fs'); |
||||
|
||||
module.exports = { |
||||
readCache: util.promisify(fs.readFile), |
||||
writeCache: util.promisify(fs.writeFile), |
||||
deleteCache: (path) => ( |
||||
util.promisify(fs.unlink)(path) |
||||
.catch(() => {}) |
||||
), |
||||
checkCache: (path) => ( |
||||
util.promisify(fs.access)(path, fs.F_OK) |
||||
.then((err) => (err === null)) |
||||
.catch(() => false) |
||||
), |
||||
}; |
@ -0,0 +1,20 @@
@@ -0,0 +1,20 @@
|
||||
const { simd } = require('wasm-feature-detect'); |
||||
|
||||
let TesseractCore = null; |
||||
/* |
||||
* getCore is a sync function to load and return |
||||
* TesseractCore. |
||||
*/ |
||||
module.exports = async (_, res) => { |
||||
if (TesseractCore === null) { |
||||
const simdSupport = await simd(); |
||||
res.progress({ status: 'loading tesseract core', progress: 0 }); |
||||
if (simdSupport) { |
||||
TesseractCore = require('tesseract.js-core/tesseract-core-simd'); |
||||
} else { |
||||
TesseractCore = require('tesseract.js-core/tesseract-core'); |
||||
} |
||||
res.progress({ status: 'loaded tesseract core', progress: 1 }); |
||||
} |
||||
return TesseractCore; |
||||
}; |
@ -0,0 +1 @@
@@ -0,0 +1 @@
|
||||
module.exports = require('zlib').gunzipSync; |
@ -0,0 +1,30 @@
@@ -0,0 +1,30 @@
|
||||
/** |
||||
* |
||||
* Tesseract Worker Script for Node |
||||
* |
||||
* @fileoverview Node worker implementation |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
|
||||
const fetch = require('node-fetch'); |
||||
const { parentPort } = require('worker_threads'); |
||||
const worker = require('..'); |
||||
const getCore = require('./getCore'); |
||||
const gunzip = require('./gunzip'); |
||||
const cache = require('./cache'); |
||||
|
||||
/* |
||||
* register message handler |
||||
*/ |
||||
parentPort.on('message', (packet) => { |
||||
worker.dispatchHandlers(packet, (obj) => parentPort.postMessage(obj)); |
||||
}); |
||||
|
||||
worker.setAdapter({ |
||||
getCore, |
||||
gunzip, |
||||
fetch, |
||||
...cache, |
||||
}); |
@ -0,0 +1,201 @@
@@ -0,0 +1,201 @@
|
||||
/** |
||||
* |
||||
* Dump data to a big JSON tree |
||||
* |
||||
* @fileoverview dump data to JSON tree |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
|
||||
/** |
||||
* deindent |
||||
* |
||||
* The generated HOCR is excessively indented, so |
||||
* we get rid of that indentation |
||||
* |
||||
* @name deindent |
||||
* @function deindent string |
||||
* @access public |
||||
*/ |
||||
const deindent = (html) => { |
||||
const lines = html.split('\n'); |
||||
if (lines[0].substring(0, 2) === ' ') { |
||||
for (let i = 0; i < lines.length; i += 1) { |
||||
if (lines[i].substring(0, 2) === ' ') { |
||||
lines[i] = lines[i].slice(2); |
||||
} |
||||
} |
||||
} |
||||
return lines.join('\n'); |
||||
}; |
||||
|
||||
/** |
||||
* dump |
||||
* |
||||
* @name dump |
||||
* @function dump recognition result to a JSON object |
||||
* @access public |
||||
*/ |
||||
module.exports = (TessModule, api, { |
||||
tessjs_create_hocr, |
||||
tessjs_create_tsv, |
||||
tessjs_create_box, |
||||
tessjs_create_unlv, |
||||
tessjs_create_osd, |
||||
}) => { |
||||
const ri = api.GetIterator(); |
||||
const { |
||||
RIL_BLOCK, |
||||
RIL_PARA, |
||||
RIL_TEXTLINE, |
||||
RIL_WORD, |
||||
RIL_SYMBOL, |
||||
} = TessModule; |
||||
const blocks = []; |
||||
let block; |
||||
let para; |
||||
let textline; |
||||
let word; |
||||
let symbol; |
||||
|
||||
const enumToString = (value, prefix) => ( |
||||
Object.keys(TessModule) |
||||
.filter((e) => (e.startsWith(`${prefix}_`) && TessModule[e] === value)) |
||||
.map((e) => e.slice(prefix.length + 1))[0] |
||||
); |
||||
|
||||
ri.Begin(); |
||||
do { |
||||
if (ri.IsAtBeginningOf(RIL_BLOCK)) { |
||||
const poly = ri.BlockPolygon(); |
||||
let polygon = null; |
||||
// BlockPolygon() returns null when automatic page segmentation is off
|
||||
if (TessModule.getPointer(poly) > 0) { |
||||
const n = poly.get_n(); |
||||
const px = poly.get_x(); |
||||
const py = poly.get_y(); |
||||
polygon = []; |
||||
for (let i = 0; i < n; i += 1) { |
||||
polygon.push([px.getValue(i), py.getValue(i)]); |
||||
} |
||||
/* |
||||
* TODO: find out why _ptaDestroy doesn't work |
||||
*/ |
||||
// TessModule._ptaDestroy(TessModule.getPointer(poly));
|
||||
} |
||||
|
||||
block = { |
||||
paragraphs: [], |
||||
text: ri.GetUTF8Text(RIL_BLOCK), |
||||
confidence: ri.Confidence(RIL_BLOCK), |
||||
baseline: ri.getBaseline(RIL_BLOCK), |
||||
bbox: ri.getBoundingBox(RIL_BLOCK), |
||||
blocktype: enumToString(ri.BlockType(), 'PT'), |
||||
polygon, |
||||
}; |
||||
blocks.push(block); |
||||
} |
||||
if (ri.IsAtBeginningOf(RIL_PARA)) { |
||||
para = { |
||||
lines: [], |
||||
text: ri.GetUTF8Text(RIL_PARA), |
||||
confidence: ri.Confidence(RIL_PARA), |
||||
baseline: ri.getBaseline(RIL_PARA), |
||||
bbox: ri.getBoundingBox(RIL_PARA), |
||||
is_ltr: !!ri.ParagraphIsLtr(), |
||||
}; |
||||
block.paragraphs.push(para); |
||||
} |
||||
if (ri.IsAtBeginningOf(RIL_TEXTLINE)) { |
||||
textline = { |
||||
words: [], |
||||
text: ri.GetUTF8Text(RIL_TEXTLINE), |
||||
confidence: ri.Confidence(RIL_TEXTLINE), |
||||
baseline: ri.getBaseline(RIL_TEXTLINE), |
||||
bbox: ri.getBoundingBox(RIL_TEXTLINE), |
||||
}; |
||||
para.lines.push(textline); |
||||
} |
||||
if (ri.IsAtBeginningOf(RIL_WORD)) { |
||||
const fontInfo = ri.getWordFontAttributes(); |
||||
const wordDir = ri.WordDirection(); |
||||
word = { |
||||
symbols: [], |
||||
choices: [], |
||||
|
||||
text: ri.GetUTF8Text(RIL_WORD), |
||||
confidence: ri.Confidence(RIL_WORD), |
||||
baseline: ri.getBaseline(RIL_WORD), |
||||
bbox: ri.getBoundingBox(RIL_WORD), |
||||
|
||||
is_numeric: !!ri.WordIsNumeric(), |
||||
in_dictionary: !!ri.WordIsFromDictionary(), |
||||
direction: enumToString(wordDir, 'DIR'), |
||||
language: ri.WordRecognitionLanguage(), |
||||
|
||||
is_bold: fontInfo.is_bold, |
||||
is_italic: fontInfo.is_italic, |
||||
is_underlined: fontInfo.is_underlined, |
||||
is_monospace: fontInfo.is_monospace, |
||||
is_serif: fontInfo.is_serif, |
||||
is_smallcaps: fontInfo.is_smallcaps, |
||||
font_size: fontInfo.pointsize, |
||||
font_id: fontInfo.font_id, |
||||
font_name: fontInfo.font_name, |
||||
}; |
||||
const wc = new TessModule.WordChoiceIterator(ri); |
||||
do { |
||||
word.choices.push({ |
||||
text: wc.GetUTF8Text(), |
||||
confidence: wc.Confidence(), |
||||
}); |
||||
} while (wc.Next()); |
||||
TessModule.destroy(wc); |
||||
textline.words.push(word); |
||||
} |
||||
|
||||
// let image = null;
|
||||
// var pix = ri.GetBinaryImage(TessModule.RIL_SYMBOL)
|
||||
// var image = pix2array(pix);
|
||||
// // for some reason it seems that things stop working if you destroy pics
|
||||
// TessModule._pixDestroy(TessModule.getPointer(pix));
|
||||
if (ri.IsAtBeginningOf(RIL_SYMBOL)) { |
||||
symbol = { |
||||
choices: [], |
||||
image: null, |
||||
text: ri.GetUTF8Text(RIL_SYMBOL), |
||||
confidence: ri.Confidence(RIL_SYMBOL), |
||||
baseline: ri.getBaseline(RIL_SYMBOL), |
||||
bbox: ri.getBoundingBox(RIL_SYMBOL), |
||||
is_superscript: !!ri.SymbolIsSuperscript(), |
||||
is_subscript: !!ri.SymbolIsSubscript(), |
||||
is_dropcap: !!ri.SymbolIsDropcap(), |
||||
}; |
||||
word.symbols.push(symbol); |
||||
const ci = new TessModule.ChoiceIterator(ri); |
||||
do { |
||||
symbol.choices.push({ |
||||
text: ci.GetUTF8Text(), |
||||
confidence: ci.Confidence(), |
||||
}); |
||||
} while (ci.Next()); |
||||
// TessModule.destroy(i);
|
||||
} |
||||
} while (ri.Next(RIL_SYMBOL)); |
||||
TessModule.destroy(ri); |
||||
|
||||
return { |
||||
text: api.GetUTF8Text(), |
||||
hocr: tessjs_create_hocr === '1' ? deindent(api.GetHOCRText()) : null, |
||||
tsv: tessjs_create_tsv === '1' ? api.GetTSVText() : null, |
||||
box: tessjs_create_box === '1' ? api.GetBoxText() : null, |
||||
unlv: tessjs_create_unlv === '1' ? api.GetUNLVText() : null, |
||||
osd: tessjs_create_osd === '1' ? api.GetOsdText() : null, |
||||
confidence: api.MeanTextConf(), |
||||
blocks, |
||||
psm: enumToString(api.GetPageSegMode(), 'PSM'), |
||||
oem: enumToString(api.oem(), 'OEM'), |
||||
version: api.Version(), |
||||
}; |
||||
}; |
@ -0,0 +1,63 @@
@@ -0,0 +1,63 @@
|
||||
const bmp = require('bmp-js'); |
||||
const fileType = require('file-type'); |
||||
|
||||
/** |
||||
* setImage |
||||
* |
||||
* @name setImage |
||||
* @function set image in tesseract for recognition |
||||
* @access public |
||||
*/ |
||||
module.exports = (TessModule, api, image) => { |
||||
const buf = Buffer.from(Array.from({ ...image, length: Object.keys(image).length })); |
||||
const type = fileType(buf); |
||||
let bytesPerPixel = 0; |
||||
let data = null; |
||||
let pix = null; |
||||
let w = 0; |
||||
let h = 0; |
||||
|
||||
const exif = buf.slice(0, 500).toString().match(/\x01\x12\x00\x03\x00\x00\x00\x01\x00(.)/)?.[1]?.charCodeAt(0) || 1; |
||||
|
||||
/* |
||||
* Leptonica supports uncompressed but not compressed bmp files |
||||
* @see https://github.com/DanBloomberg/leptonica/issues/607#issuecomment-1068802516
|
||||
* We therefore use bmp-js to process all bmp files |
||||
*/ |
||||
if (type && type.mime === 'image/bmp') { |
||||
const bmpBuf = bmp.decode(buf); |
||||
data = TessModule._malloc(bmpBuf.data.length * Uint8Array.BYTES_PER_ELEMENT); |
||||
TessModule.HEAPU8.set(bmpBuf.data, data); |
||||
w = bmpBuf.width; |
||||
h = bmpBuf.height; |
||||
bytesPerPixel = 4; |
||||
} else { |
||||
const ptr = TessModule._malloc(buf.length * Uint8Array.BYTES_PER_ELEMENT); |
||||
TessModule.HEAPU8.set(buf, ptr); |
||||
pix = TessModule._pixReadMem(ptr, buf.length); |
||||
if (TessModule.getValue(pix + (7 * 4), 'i32') === 0) { |
||||
/* |
||||
* Set a yres default value to prevent warning from tesseract |
||||
* See kMinCredibleResolution in tesseract/src/ccstruct/publictypes.h |
||||
*/ |
||||
TessModule.setValue(pix + (7 * 4), 300, 'i32'); |
||||
} |
||||
[w, h] = Array(2).fill(0) |
||||
.map((v, idx) => ( |
||||
TessModule.getValue(pix + (idx * 4), 'i32') |
||||
)); |
||||
} |
||||
|
||||
/* |
||||
* As some image format (ex. bmp) is not supported natiely by tesseract, |
||||
* sometimes it will not return pix directly, but data and bytesPerPixel |
||||
* for another SetImage usage. |
||||
* |
||||
*/ |
||||
if (data === null) { |
||||
api.SetImage(pix, undefined, undefined, undefined, undefined, exif); |
||||
} else { |
||||
api.SetImage(data, w, h, bytesPerPixel, w * bytesPerPixel, exif); |
||||
} |
||||
return data === null ? pix : data; |
||||
}; |
@ -0,0 +1,18 @@
@@ -0,0 +1,18 @@
|
||||
const resolveURL = require('resolve-url'); |
||||
const { version } = require('../../../package.json'); |
||||
const defaultOptions = require('../../constants/defaultOptions'); |
||||
|
||||
/* |
||||
* Default options for browser worker |
||||
*/ |
||||
module.exports = { |
||||
...defaultOptions, |
||||
workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development') |
||||
? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`) |
||||
: `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`, |
||||
/* |
||||
* If browser doesn't support WebAssembly, |
||||
* load ASM version instead |
||||
*/ |
||||
corePath: null, |
||||
}; |
@ -0,0 +1,24 @@
@@ -0,0 +1,24 @@
|
||||
/** |
||||
* |
||||
* Tesseract Worker adapter for browser |
||||
* |
||||
* @fileoverview Tesseract Worker adapter for browser |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
const defaultOptions = require('./defaultOptions'); |
||||
const spawnWorker = require('./spawnWorker'); |
||||
const terminateWorker = require('./terminateWorker'); |
||||
const onMessage = require('./onMessage'); |
||||
const send = require('./send'); |
||||
const loadImage = require('./loadImage'); |
||||
|
||||
module.exports = { |
||||
defaultOptions, |
||||
spawnWorker, |
||||
terminateWorker, |
||||
onMessage, |
||||
send, |
||||
loadImage, |
||||
}; |
@ -0,0 +1,68 @@
@@ -0,0 +1,68 @@
|
||||
const resolveURL = require('resolve-url'); |
||||
|
||||
/** |
||||
* readFromBlobOrFile |
||||
* |
||||
* @name readFromBlobOrFile |
||||
* @function |
||||
* @access private |
||||
*/ |
||||
const readFromBlobOrFile = (blob) => ( |
||||
new Promise((resolve, reject) => { |
||||
const fileReader = new FileReader(); |
||||
fileReader.onload = () => { |
||||
resolve(fileReader.result); |
||||
}; |
||||
fileReader.onerror = ({ target: { error: { code } } }) => { |
||||
reject(Error(`File could not be read! Code=${code}`)); |
||||
}; |
||||
fileReader.readAsArrayBuffer(blob); |
||||
}) |
||||
); |
||||
|
||||
/** |
||||
* loadImage |
||||
* |
||||
* @name loadImage |
||||
* @function load image from different source |
||||
* @access private |
||||
*/ |
||||
const loadImage = async (image) => { |
||||
let data = image; |
||||
if (typeof image === 'undefined') { |
||||
return 'undefined'; |
||||
} |
||||
|
||||
if (typeof image === 'string') { |
||||
// Base64 Image
|
||||
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { |
||||
data = atob(image.split(',')[1]) |
||||
.split('') |
||||
.map((c) => c.charCodeAt(0)); |
||||
} else { |
||||
const resp = await fetch(resolveURL(image)); |
||||
data = await resp.arrayBuffer(); |
||||
} |
||||
} else if (image instanceof HTMLElement) { |
||||
if (image.tagName === 'IMG') { |
||||
data = await loadImage(image.src); |
||||
} |
||||
if (image.tagName === 'VIDEO') { |
||||
data = await loadImage(image.poster); |
||||
} |
||||
if (image.tagName === 'CANVAS') { |
||||
await new Promise((resolve) => { |
||||
image.toBlob(async (blob) => { |
||||
data = await readFromBlobOrFile(blob); |
||||
resolve(); |
||||
}); |
||||
}); |
||||
} |
||||
} else if (image instanceof File || image instanceof Blob) { |
||||
data = await readFromBlobOrFile(image); |
||||
} |
||||
|
||||
return new Uint8Array(data); |
||||
}; |
||||
|
||||
module.exports = loadImage; |
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
module.exports = (worker, handler) => { |
||||
worker.onmessage = ({ data }) => { // eslint-disable-line
|
||||
handler(data); |
||||
}; |
||||
}; |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
/** |
||||
* send |
||||
* |
||||
* @name send |
||||
* @function send packet to worker and create a job |
||||
* @access public |
||||
*/ |
||||
module.exports = async (worker, packet) => { |
||||
worker.postMessage(packet); |
||||
}; |
@ -0,0 +1,20 @@
@@ -0,0 +1,20 @@
|
||||
/** |
||||
* spawnWorker |
||||
* |
||||
* @name spawnWorker |
||||
* @function create a new Worker in browser |
||||
* @access public |
||||
*/ |
||||
module.exports = ({ workerPath, workerBlobURL }) => { |
||||
let worker; |
||||
if (Blob && URL && workerBlobURL) { |
||||
const blob = new Blob([`importScripts("${workerPath}");`], { |
||||
type: 'application/javascript', |
||||
}); |
||||
worker = new Worker(URL.createObjectURL(blob)); |
||||
} else { |
||||
worker = new Worker(workerPath); |
||||
} |
||||
|
||||
return worker; |
||||
}; |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
/** |
||||
* terminateWorker |
||||
* |
||||
* @name terminateWorker |
||||
* @function terminate worker |
||||
* @access public |
||||
*/ |
||||
module.exports = (worker) => { |
||||
worker.terminate(); |
||||
}; |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
const path = require('path'); |
||||
const defaultOptions = require('../../constants/defaultOptions'); |
||||
|
||||
/* |
||||
* Default options for node worker |
||||
*/ |
||||
module.exports = { |
||||
...defaultOptions, |
||||
workerPath: path.join(__dirname, '..', '..', 'worker-script', 'node', 'index.js'), |
||||
}; |
@ -0,0 +1,24 @@
@@ -0,0 +1,24 @@
|
||||
/** |
||||
* |
||||
* Tesseract Worker impl. for node (using child_process) |
||||
* |
||||
* @fileoverview Tesseract Worker impl. for node |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
const defaultOptions = require('./defaultOptions'); |
||||
const spawnWorker = require('./spawnWorker'); |
||||
const terminateWorker = require('./terminateWorker'); |
||||
const onMessage = require('./onMessage'); |
||||
const send = require('./send'); |
||||
const loadImage = require('./loadImage'); |
||||
|
||||
module.exports = { |
||||
defaultOptions, |
||||
spawnWorker, |
||||
terminateWorker, |
||||
onMessage, |
||||
send, |
||||
loadImage, |
||||
}; |
@ -0,0 +1,35 @@
@@ -0,0 +1,35 @@
|
||||
const util = require('util'); |
||||
const fs = require('fs'); |
||||
const fetch = require('node-fetch'); |
||||
const isURL = require('is-url'); |
||||
|
||||
const readFile = util.promisify(fs.readFile); |
||||
|
||||
/** |
||||
* loadImage |
||||
* |
||||
* @name loadImage |
||||
* @function load image from different source |
||||
* @access public |
||||
*/ |
||||
module.exports = async (image) => { |
||||
let data = image; |
||||
if (typeof image === 'undefined') { |
||||
return image; |
||||
} |
||||
|
||||
if (typeof image === 'string') { |
||||
if (isURL(image) || image.startsWith('moz-extension://') || image.startsWith('chrome-extension://') || image.startsWith('file://')) { |
||||
const resp = await fetch(image); |
||||
data = await resp.arrayBuffer(); |
||||
} else if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { |
||||
data = Buffer.from(image.split(',')[1], 'base64'); |
||||
} else { |
||||
data = await readFile(image); |
||||
} |
||||
} else if (Buffer.isBuffer(image)) { |
||||
data = image; |
||||
} |
||||
|
||||
return new Uint8Array(data); |
||||
}; |
@ -0,0 +1,3 @@
@@ -0,0 +1,3 @@
|
||||
module.exports = (worker, handler) => { |
||||
worker.on('message', handler); |
||||
}; |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
/** |
||||
* send |
||||
* |
||||
* @name send |
||||
* @function send packet to worker and create a job |
||||
* @access public |
||||
*/ |
||||
module.exports = async (worker, packet) => { |
||||
worker.postMessage(packet); |
||||
}; |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
const { Worker } = require('worker_threads'); |
||||
|
||||
/** |
||||
* spawnWorker |
||||
* |
||||
* @name spawnWorker |
||||
* @function fork a new process in node |
||||
* @access public |
||||
*/ |
||||
module.exports = ({ workerPath }) => new Worker(workerPath); |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
/** |
||||
* terminateWorker |
||||
* |
||||
* @name terminateWorker |
||||
* @function kill worker |
||||
* @access public |
||||
*/ |
||||
module.exports = (worker) => { |
||||
worker.terminate(); |
||||
}; |
@ -0,0 +1,6 @@
@@ -0,0 +1,6 @@
|
||||
{ |
||||
"rules": { |
||||
"no-undef": 0, |
||||
"camelcase": 0 |
||||
} |
||||
} |
@ -0,0 +1,18 @@
@@ -0,0 +1,18 @@
|
||||
<html> |
||||
<head> |
||||
<meta charset="utf-8"> |
||||
<link rel="stylesheet" href="../node_modules/mocha/mocha.css"> |
||||
</head> |
||||
<body> |
||||
<div id="mocha"></div> |
||||
<script src="../node_modules/mocha/mocha.js"></script> |
||||
<script src="../node_modules/expect.js/index.js"></script> |
||||
<script src="../dist/tesseract.dev.js"></script> |
||||
<script src="./constants.js"></script> |
||||
<script>mocha.setup('bdd');</script> |
||||
<script src="./FS.test.js"></script> |
||||
<script> |
||||
mocha.run(); |
||||
</script> |
||||
</body> |
||||
</html> |
@ -0,0 +1,37 @@
@@ -0,0 +1,37 @@
|
||||
const { createWorker } = Tesseract; |
||||
const FS_WAIT = 500; |
||||
const worker = createWorker(OPTIONS); |
||||
before(function cb() { |
||||
this.timeout(0); |
||||
return worker.load(); |
||||
}); |
||||
|
||||
describe('FS', async () => { |
||||
it('should write and read text from FS (using FS only)', () => { |
||||
[ |
||||
SIMPLE_TEXT, |
||||
].forEach(async (text) => { |
||||
const path = 'tmp.txt'; |
||||
await worker.FS('writeFile', [path, SIMPLE_TEXT]); |
||||
setTimeout(async () => { |
||||
const { data } = await worker.FS('readFile', [path]); |
||||
await worker.FS('unlink', [path]); |
||||
expect(data.toString()).to.be(text); |
||||
}, FS_WAIT); |
||||
}); |
||||
}).timeout(TIMEOUT); |
||||
|
||||
it('should write and read text from FS (using writeFile, readFile)', () => { |
||||
[ |
||||
SIMPLE_TEXT, |
||||
].forEach(async (text) => { |
||||
const path = 'tmp2.txt'; |
||||
await worker.writeText(path, SIMPLE_TEXT); |
||||
setTimeout(async () => { |
||||
const { data } = await worker.readText(path); |
||||
await worker.removeFile(path); |
||||
expect(data.toString()).to.be(text); |
||||
}, FS_WAIT); |
||||
}); |
||||
}).timeout(TIMEOUT); |
||||
}); |
After Width: | Height: | Size: 91 KiB |
After Width: | Height: | Size: 2.4 KiB |
After Width: | Height: | Size: 13 KiB |
After Width: | Height: | Size: 169 KiB |
After Width: | Height: | Size: 1011 B |
After Width: | Height: | Size: 4.9 KiB |
After Width: | Height: | Size: 1.3 KiB |
After Width: | Height: | Size: 3.7 KiB |
After Width: | Height: | Size: 69 KiB |