Compare commits

..

54 Commits

Author SHA1 Message Date
jeromewu d6e9177290
Merge pull request #389 from moebrowne/security/sri 5 years ago
jeromewu 3d1aa49c33
Merge pull request #390 from moebrowne/feature/update-image-preview-first 5 years ago
MoeBrowne a86cb05519
Updates the image preview to update immediately 5 years ago
MoeBrowne 95e74bdbed
Adds an SRI attribute to the tesseract.js dependency 5 years ago
Jerome Wu 363ecd7b22 Upgrade to v2.0.0 5 years ago
Jerome Wu 9863bd8f74 Upgrade to tesseract.js@2.0.0-beta.1 6 years ago
jeromewu 18dde5cbac
Upgrade to tesseract.js v2.0.0-alpha.11 6 years ago
jeromewu ebe263ad0f
Update index.html 6 years ago
Jerome Wu 91c39ceaa5 Upgrade to alpha.8 6 years ago
Jerome Wu 24ebdeacb8 Upgrade to latest version of tesseract.js 6 years ago
Jerome Wu d58b778d0b Add redirection from http to https 6 years ago
Jerome Wu 233cc8bdd3 Update font url to be https 6 years ago
Jerome Wu b2c819b219 Upgrade to tesseract.js@2.0.0-alpha.3 6 years ago
Jerome Wu 8ef238714b Replace jsdeliver with unpack 6 years ago
Jerome Wu 2ec4a0433f Upgrade to tesseract.js@2.0.0-alpha.2 6 years ago
jeromewu 1a4da18407
Revert changes as it doesn't work 6 years ago
jeromewu 232d8e854d
Fix unable to fetch chi_sim traineddata issue 6 years ago
jeromewu 7128fc5572
Add one more name 6 years ago
jeromewu ab5776f2a9
Fix typo 6 years ago
jeromewu 8978faa04a
Update usage from v1 to v2 6 years ago
jeromewu ef1ac48467
Add another GA tracker and upgrade tesseract.js 6 years ago
Guillermo 8e755745b7 add link 9 years ago
Kevin Kwok 0709222de3 new update 9 years ago
Kevin Kwok 2f3fd699d7 Merge branch 'gh-pages' of github.com:naptha/tesseract.js into gh-pages 9 years ago
Kevin Kwok d00d676fc9 updating index 9 years ago
bijection 7f23b4c073 Merge pull request #24 from pborreli/typos 9 years ago
Pascal Borreli 476fa56084 Fixed typos 9 years ago
Guillermo 1896dc0807 fix progress 9 years ago
Guillermo 325747e0a7 ad ga to demo page 9 years ago
Kevin Kwok be29f9ab92 words 9 years ago
Kevin Kwok 244d0b98d7 fixing demo 9 years ago
Guillermo 8a79897546 make image smaller 9 years ago
Guillermo cb9e2d5928 demo 9 years ago
Guillermo 200b770c97 interim 9 years ago
Guillermo 0243eb6bd0 walp 9 years ago
Guillermo 8527a70b36 simplify 9 years ago
Guillermo 923d75a047 nautical theme 9 years ago
Guillermo 26d7681e72 new progress bar, fixed onload issue, changed to tenso.rs 10 years ago
Guillermo 2d85c0fbe8 fixed some safari flex problems 10 years ago
Guillermo 5660a8c454 added lato 10 years ago
Guillermo 73f381f595 meme is still broken 10 years ago
Guillermo 5b02e236c1 wow it all works kinda 10 years ago
Guillermo 1270e278c9 geen 10 years ago
Guillermo 8d23fa0bdf oops fixed relative url problem 10 years ago
Guillermo abf7ae1190 push all the things 10 years ago
Guillermo 379781c8a5 merch 10 years ago
Guillermo e23d75f0a9 moved main login into separate file 10 years ago
bijection 1073a8ca61 woo i can type! 10 years ago
Guillermo 61415571bd yay new internet page\! 10 years ago
Guillermo 58b816a40b I like turtles 10 years ago
Guillermo 78d70d73d4 changed to remote webworkers 10 years ago
Guillermo 3a47aed70f added local language model caching 10 years ago
Guillermo b2ff251c7d added progress callback 10 years ago
Guillermo 0f0ed005e5 browserified worker.js 10 years ago
  1. 17
      .eslintrc
  2. 9
      .github/FUNDING.yml
  3. 38
      .github/ISSUE_TEMPLATE/bug_report.md
  4. 20
      .github/ISSUE_TEMPLATE/feature_request.md
  5. 5
      .github/SECURITY.md
  6. 71
      .github/workflows/codeql-analysis.yml
  7. 29
      .github/workflows/node.js.yml
  8. 9
      .gitignore
  9. 2
      .gitpod.Dockerfile
  10. 9
      .gitpod.yml
  11. 3
      .npmignore
  12. 1
      CNAME
  13. 201
      LICENSE.md
  14. 197
      README.md
  15. 70
      animation/animate.js
  16. 43
      animation/dimensions.js
  17. 79
      animation/hypercube.js
  18. 56
      animation/mouse.js
  19. 27
      animation/raf.js
  20. 329
      codemirror/codemirror.css
  21. 8735
      codemirror/codemirror.js
  22. 704
      codemirror/javascript.js
  23. 72
      codemirror/runmode.js
  24. 384
      codemirror/xml.js
  25. 84
      css/explorer.css
  26. 313
      css/main.css
  27. 435
      css/wau.css
  28. 229
      demo.js
  29. 448
      docs/api.md
  30. 226
      docs/examples.md
  31. 42
      docs/faq.md
  32. 18
      docs/image-format.md
  33. BIN
      docs/images/demo.gif
  34. BIN
      docs/images/tesseract.png
  35. BIN
      docs/images/video-demo.gif
  36. 38
      docs/local-installation.md
  37. 3
      docs/tesseract_lang_list.md
  38. 37
      examples/browser/basic-edge.html
  39. 19
      examples/browser/basic.html
  40. 33
      examples/browser/benchmark.html
  41. 162
      examples/browser/demo.html
  42. 52
      examples/browser/download-pdf.html
  43. BIN
      examples/data/meditations.jpg
  44. BIN
      examples/data/testocr.png
  45. BIN
      examples/data/tyger.jpg
  46. 27
      examples/node/benchmark.js
  47. 13
      examples/node/detect.js
  48. 22
      examples/node/download-pdf.js
  49. 20
      examples/node/recognize.js
  50. 392
      explorer/explorer.js
  51. 19602
      explorer/react.js
  52. 16
      explorer/react.min.js
  53. BIN
      img/bg.jpg
  54. BIN
      img/bg.png
  55. BIN
      img/chi_sim.png
  56. BIN
      img/deu.png
  57. BIN
      img/eng.jpg
  58. BIN
      img/eng.png
  59. BIN
      img/eng_bw.png
  60. BIN
      img/fork.png
  61. BIN
      img/hayden.jpg
  62. BIN
      img/keyboard.png
  63. BIN
      img/logo.png
  64. BIN
      img/logo.psd
  65. BIN
      img/logo_small.png
  66. BIN
      img/logoblue.png
  67. BIN
      img/logowhite.png
  68. BIN
      img/meme.png
  69. BIN
      img/redstars.png
  70. BIN
      img/run.png
  71. BIN
      img/rus.png
  72. BIN
      img/shake.png
  73. BIN
      img/splash.png
  74. BIN
      img/starblur.jpg
  75. BIN
      img/stars.jpeg
  76. 110
      index.html
  77. 163
      index_dev.html
  78. 162
      index_old.html
  79. 242
      main.js
  80. 227
      main_dev.js
  81. 16860
      package-lock.json
  82. 89
      package.json
  83. 61
      perlinish/perlinish.js
  84. 5
      scripts/.eslintrc
  85. 13
      scripts/rollup.esm.js
  86. 17
      scripts/server.js
  87. 9
      scripts/test-helper.js
  88. 28
      scripts/webpack.config.common.js
  89. 48
      scripts/webpack.config.dev.js
  90. 36
      scripts/webpack.config.prod.js
  91. 28
      src/Tesseract.js
  92. 12
      src/constants/OEM.js
  93. 19
      src/constants/PSM.js
  94. 5
      src/constants/config.js
  95. 13
      src/constants/defaultOptions.js
  96. 218
      src/constants/languages.js
  97. 21
      src/createJob.js
  98. 80
      src/createScheduler.js
  99. 198
      src/createWorker.js
  100. 191
      src/explorer.js
  101. Some files were not shown because too many files have changed in this diff Show More

17
.eslintrc

@ -1,17 +0,0 @@ @@ -1,17 +0,0 @@
{
"extends": "airbnb-base",
"parser": "babel-eslint",
"env": {
"browser": true,
"node": true,
"mocha": true,
"worker": true
},
"rules": {
"no-underscore-dangle": 0,
"no-console": 0,
"global-require": 0,
"camelcase": 0,
"no-control-regex": 0
}
}

9
.github/FUNDING.yml

@ -1,9 +0,0 @@ @@ -1,9 +0,0 @@
# These are supported funding model platforms
github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: tesseractjs
ko_fi: # Replace with a single Ko-fi username
tidelift: npm/tesseract.js
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
custom: ["https://etherscan.io/address/0x74ace8c74535d6dac03ebdc708ca2fba54796ef2"]

38
.github/ISSUE_TEMPLATE/bug_report.md

@ -1,38 +0,0 @@ @@ -1,38 +0,0 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error
**Expected behavior**
A clear and concise description of what you expected to happen.
**Screenshots**
If applicable, add screenshots to help explain your problem.
**Desktop (please complete the following information):**
- OS: [e.g. iOS]
- Browser [e.g. chrome, safari]
- Version [e.g. 22]
**Smartphone (please complete the following information):**
- Device: [e.g. iPhone6]
- OS: [e.g. iOS8.1]
- Browser [e.g. stock browser, safari]
- Version [e.g. 22]
**Additional context**
Add any other context about the problem here.

20
.github/ISSUE_TEMPLATE/feature_request.md

@ -1,20 +0,0 @@ @@ -1,20 +0,0 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Add any other context or screenshots about the feature request here.

5
.github/SECURITY.md

@ -1,5 +0,0 @@ @@ -1,5 +0,0 @@
## Security contact information
To report a security vulnerability, please use the
[Tidelift security contact](https://tidelift.com/security).
Tidelift will coordinate the fix and disclosure.

71
.github/workflows/codeql-analysis.yml

@ -1,71 +0,0 @@ @@ -1,71 +0,0 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
name: "CodeQL"
on:
push:
branches: [master]
pull_request:
# The branches below must be a subset of the branches above
branches: [master]
schedule:
- cron: '0 17 * * 6'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
# Override automatic language detection by changing the below list
# Supported options are ['csharp', 'cpp', 'go', 'java', 'javascript', 'python']
language: ['javascript']
# Learn more...
# https://docs.github.com/en/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#overriding-automatic-language-detection
steps:
- name: Checkout repository
uses: actions/checkout@v2
with:
# We must fetch at least the immediate parents so that if this is
# a pull request then we can checkout the head.
fetch-depth: 2
# If this run was triggered by a pull request event, then checkout
# the head of the pull request instead of the merge commit.
- run: git checkout HEAD^2
if: ${{ github.event_name == 'pull_request' }}
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v1
# ℹ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
# ✏ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
#- run: |
# make bootstrap
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1

29
.github/workflows/node.js.yml

@ -1,29 +0,0 @@ @@ -1,29 +0,0 @@
# This workflow will do a clean install of node dependencies, build the source code and run tests across different versions of node
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-nodejs-with-github-actions
name: Node.js CI
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [14.x, 16.x]
steps:
- uses: actions/checkout@v2
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v1
with:
node-version: ${{ matrix.node-version }}
- run: npm ci
- run: npm run lint
- run: npm test

9
.gitignore vendored

@ -1,10 +1,3 @@ @@ -1,10 +1,3 @@
.DS_Store
node_modules/*
yarn.lock
tesseract.dev.js
worker.dev.js
/*.traineddata
/examples/**/*.traineddata
.nyc_output
dist/
*.swp
explorer/.module-cache/*

2
.gitpod.Dockerfile

@ -1,2 +0,0 @@ @@ -1,2 +0,0 @@
FROM gitpod/workspace-full
RUN sudo apt-get update && sudo apt-get install -y libgtk-3-0 libx11-xcb1 libnss3 libxss1 libasound2

9
.gitpod.yml

@ -1,9 +0,0 @@ @@ -1,9 +0,0 @@
image:
file: .gitpod.Dockerfile
tasks:
- command: gp await-port 3000 && sleep 3 && gp preview $(gp url 3000)/examples/browser/demo.html
- init: npm install
command: npm start
ports:
- port: 3000
onOpen: ignore

3
.npmignore

@ -1,3 +0,0 @@ @@ -1,3 +0,0 @@
tests
.nyc_output
.github

1
CNAME

@ -0,0 +1 @@ @@ -0,0 +1 @@
tesseract.projectnaptha.com

201
LICENSE.md

@ -1,201 +0,0 @@ @@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

197
README.md

@ -1,197 +0,0 @@ @@ -1,197 +0,0 @@
<p align="center">
<a href="https://tesseract.projectnaptha.com/"><img width="256px" height="256px" alt="Tesseract.js" src="./docs/images/tesseract.png"></a>
</p>
![Lint & Test](https://github.com/naptha/tesseract.js/workflows/Node.js%20CI/badge.svg)
![CodeQL](https://github.com/naptha/tesseract.js/workflows/CodeQL/badge.svg)
[![Gitpod Ready-to-Code](https://img.shields.io/badge/Gitpod-ready--to--code-blue?logo=gitpod)](https://github.com/naptha/tesseract.js)
[![Financial Contributors on Open Collective](https://opencollective.com/tesseractjs/all/badge.svg?label=financial+contributors)](https://opencollective.com/tesseractjs) [![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js)
[![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://github.com/naptha/tesseract.js/graphs/commit-activity)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Code Style](https://badgen.net/badge/code%20style/airbnb/ff5a5f?icon=airbnb)](https://github.com/airbnb/javascript)
[![Downloads Total](https://img.shields.io/npm/dt/tesseract.js.svg)](https://www.npmjs.com/package/tesseract.js)
[![Downloads Month](https://img.shields.io/npm/dm/tesseract.js.svg)](https://www.npmjs.com/package/tesseract.js)
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/))
Image Recognition
[![fancy demo gif](./docs/images/demo.gif)](http://tesseract.projectnaptha.com)
Video Real-time Recognition
<p align="center">
<a href="https://github.com/jeromewu/tesseract.js-video"><img alt="Tesseract.js Video" src="./docs/images/video-demo.gif"></a>
</p>
Tesseract.js wraps an [emscripten](https://github.com/kripken/emscripten) [port](https://github.com/naptha/tesseract.js-core) of the [Tesseract](https://github.com/tesseract-ocr/tesseract) [OCR](https://en.wikipedia.org/wiki/Optical_character_recognition) Engine.
It works in the browser using [webpack](https://webpack.js.org/) or plain script tags with a [CDN](#CDN) and on the server with [Node.js](https://nodejs.org/en/).
After you [install it](#installation), using it is as simple as:
```javascript
import Tesseract from 'tesseract.js';
Tesseract.recognize(
'https://tesseract.projectnaptha.com/img/eng_bw.png',
'eng',
{ logger: m => console.log(m) }
).then(({ data: { text } }) => {
console.log(text);
})
```
Or more imperative
```javascript
import { createWorker } from 'tesseract.js';
const worker = createWorker({
logger: m => console.log(m)
});
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png');
console.log(text);
await worker.terminate();
})();
```
[Check out the docs](#documentation) for a full explanation of the API.
## Major changes in v3
- Significantly faster performance
- Runtime reduction of 84% for Browser and 96% for Node.js when recognizing the [example images](./examples/data)
- Upgrade to Tesseract v5.1.0 (using emscripten 3.1.18)
- Added SIMD-enabled build for supported devices
- Added support:
- Node.js version 18
- Removed support:
- ASM.js version, any other old versions of Tesseract.js-core (<3.0.0)
- Node.js versions 10 and 12
## Major changes in v2
- Upgrade to tesseract v4.1.1 (using emscripten 1.39.10 upstream)
- Support multiple languages at the same time, eg: eng+chi\_tra for English and Traditional Chinese
- Supported image formats: png, jpg, bmp, pbm
- Support WebAssembly (fallback to ASM.js when browser doesn't support)
- Support Typescript
Read a story about v2: <a href="https://jeromewu.github.io/why-i-refactor-tesseract.js-v2/">Why I refactor tesseract.js v2?</a><br>
Check the <a href="https://github.com/naptha/tesseract.js/tree/support/1.x">support/1.x</a> branch for version 1
## Installation
Tesseract.js works with a `<script>` tag via local copy or CDN, with webpack via `npm` and on Node.js with `npm/yarn`.
### CDN
```html
<!-- v2 -->
<script src='https://unpkg.com/tesseract.js@v2.1.0/dist/tesseract.min.js'></script>
<!-- v1 -->
<script src='https://unpkg.com/tesseract.js@1.0.19/src/index.js'></script>
```
After including the script the `Tesseract` variable will be globally available.
### Node.js
**Tesseract.js v3 requires Node.js v14 or higher**
```shell
# For v3
npm install tesseract.js
yarn add tesseract.js
# For v2
npm install tesseract.js@2
yarn add tesseract.js@2
```
## Documentation
* [Examples](./docs/examples.md)
* [Image Format](./docs/image-format.md)
* [API](./docs/api.md)
* [Local Installation](./docs/local-installation.md)
* [FAQ](./docs/faq.md)
## Use tesseract.js the way you like!
- Offline Version: https://github.com/jeromewu/tesseract.js-offline
- Electron Version: https://github.com/jeromewu/tesseract.js-electron
- Custom Traineddata: https://github.com/jeromewu/tesseract.js-custom-traineddata
- Chrome Extension #1: https://github.com/jeromewu/tesseract.js-chrome-extension
- Chrome Extension #2: https://github.com/fxnoob/image-to-text
- Firefox Extension: https://github.com/gnonio/korporize
- With Vue: https://github.com/jeromewu/tesseract.js-vue-app
- With Angular: https://github.com/jeromewu/tesseract.js-angular-app
- With React: https://github.com/jeromewu/tesseract.js-react-app
- Typescript: https://github.com/jeromewu/tesseract.js-typescript
- Video Real-time Recognition: https://github.com/jeromewu/tesseract.js-video
## Contributing
### Development
To run a development copy of Tesseract.js do the following:
```shell
# First we clone the repository
git clone https://github.com/naptha/tesseract.js.git
cd tesseract.js
# Then we install the dependencies
npm install
# And finally we start the development server
npm start
```
The development server will be available at http://localhost:3000/examples/browser/demo.html in your favorite browser.
It will automatically rebuild `tesseract.dev.js` and `worker.dev.js` when you change files in the **src** folder.
### Online Setup with a single Click
You can use Gitpod(A free online VS Code like IDE) for contributing. With a single click it will launch a ready to code workspace with the build & start scripts already in process and within a few seconds it will spin up the dev server so that you can start contributing straight away without wasting any time.
[![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#https://github.com/naptha/tesseract.js/blob/master/examples/browser/demo.html)
### Building Static Files
To build the compiled static files just execute the following:
```shell
npm run build
```
This will output the files into the `dist` directory.
## Contributors
### Code Contributors
This project exists thanks to all the people who contribute. [[Contribute](CONTRIBUTING.md)].
<a href="https://github.com/naptha/tesseract.js/graphs/contributors"><img src="https://opencollective.com/tesseractjs/contributors.svg?width=890&button=false" /></a>
### Financial Contributors
Become a financial contributor and help us sustain our community. [[Contribute](https://opencollective.com/tesseractjs/contribute)]
#### Individuals
<a href="https://opencollective.com/tesseractjs"><img src="https://opencollective.com/tesseractjs/individuals.svg?width=890"></a>
#### Organizations
Support this project with your organization. Your logo will show up here with a link to your website. [[Contribute](https://opencollective.com/tesseractjs/contribute)]
<a href="https://opencollective.com/tesseractjs/organization/0/website"><img src="https://opencollective.com/tesseractjs/organization/0/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/1/website"><img src="https://opencollective.com/tesseractjs/organization/1/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/2/website"><img src="https://opencollective.com/tesseractjs/organization/2/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/3/website"><img src="https://opencollective.com/tesseractjs/organization/3/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/4/website"><img src="https://opencollective.com/tesseractjs/organization/4/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/5/website"><img src="https://opencollective.com/tesseractjs/organization/5/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/6/website"><img src="https://opencollective.com/tesseractjs/organization/6/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/7/website"><img src="https://opencollective.com/tesseractjs/organization/7/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/8/website"><img src="https://opencollective.com/tesseractjs/organization/8/avatar.svg"></a>
<a href="https://opencollective.com/tesseractjs/organization/9/website"><img src="https://opencollective.com/tesseractjs/organization/9/avatar.svg"></a>

70
animation/animate.js

@ -0,0 +1,70 @@ @@ -0,0 +1,70 @@
var canvas = document.getElementById('logo-canvas'),
ctx = canvas.getContext('2d'),
logo_wrap = document.getElementById('logo-wrap'),
splash = document.getElementById('splash'),
logo_img = document.getElementById('logo-img'),
color = "white",
lasttime,
freeze
function fixdim() {
dimensions.update()
var displaywidth = Math.sqrt(dimensions.width)*18//dimensions.width > 900 ? 900 : 450
var doc = document.documentElement;
var top = (window.pageYOffset || doc.scrollTop) - (doc.clientTop || 0);
// logo_wrap.style.top = top / 2 + 'px'
var rect = splash.getBoundingClientRect()
var bottom = rect.top + rect.height
var fadestart = rect.height/2
logo_wrap.style.opacity = Math.max(Math.min((bottom-fadestart)/fadestart,1),0)
logo_img.style.width = displaywidth + 'px'
if(!freeze){
var displayheight = displaywidth * 4/15 //dimensions.width > 900 ? 250 : 125
canvas.width = displayheight*window.devicePixelRatio
canvas.style.width = displayheight + 'px'
canvas.height = displayheight*window.devicePixelRatio
canvas.style.height = displayheight + 'px'
}
}
addEventListener('scroll', fixdim)
var gh = .12;
function main (time) {
fixdim()
ctx.clearRect(0,0,canvas.width,canvas.height)
var t = time/10000
ctx.strokeStyle = ctx.fillStyle = color
var sm = 1
var m = tesseractwithrotation(t, t*2, t*3, mouse.x/100, mouse.y/100, 0)
drawtesseract(ctx, m, {
x: canvas.width/2,
y: canvas.height/2,
size: gh*canvas.height,
line_width: 2,
})
lasttime = time
requestAnimationFrame(main)
}
requestAnimationFrame(function init(t) {
fixdim()
lasttime = t
requestAnimationFrame(main)
})

43
animation/dimensions.js

@ -0,0 +1,43 @@ @@ -0,0 +1,43 @@
var dimensions = {
width:0,
height:0,
getWidth: function () {
if (window.innerWidth) {
return window.innerWidth;
}
if (document.documentElement && document.documentElement.clientHeight){
return document.documentElement.clientWidth;
}
if (document.body) {
return document.body.clientWidth;
}
return 0;
},
getHeight: function () {
if (window.innerWidth) {
return window.innerHeight;
}
if (document.documentElement && document.documentElement.clientHeight){
return document.documentElement.clientHeight;
}
if (document.body) {
return document.body.clientHeight;
}
return 0;
},
update: function () {
var curW = this.getWidth()
var curH = this.getHeight()
if (curW!=this.width||curH!=this.height){
this.width=curW
this.height=curH
return true
}
return false
}
}

79
animation/hypercube.js

@ -0,0 +1,79 @@ @@ -0,0 +1,79 @@
function app1(p,a,c1,c2){
var l = Math.cos(a)*p[c1]+Math.sin(a)*p[c2]
var k = -Math.sin(a)*p[c1]+Math.cos(a)*p[c2]
p[c1] = l
p[c2] = k
}
function app2(p,a,c1,c2){
var l = Math.cos(a)*p[c1]-Math.sin(a)*p[c2]
var k = Math.sin(a)*p[c1]+Math.cos(a)*p[c2]
p[c1] = l
p[c2] = k
}
var _edges
function tesseractedges(){
if(!_edges){
var m = tesseractwithrotation(0,0,0,0,0,0)
var edges = []
var indicies = ['x','y','z','w']
for (var i = 0; i < m.length; i++) {
for (var j = i+1; j < m.length; j++) {
var count = 0
for (var k = 0; k < 4; k++) {
if (m[i][indicies[k]] === m[j][indicies[k]]) count++
};
if (count === 3) edges.push([i,j])
}
}
_edges = edges
}
return _edges
}
function tesseractwithrotation(a,b,c,d,e,f) {
var verticies = []
for (var i = 0; i < 16; i++) {
var p = {
x: (i&1)*2 - 1,
y: ((i>>1)&1)*2 - 1,
z: ((i>>2)&1)*2 - 1,
w: ((i>>3)&1)*2 - 1
}
app1(p,a,'x','y')
app1(p,b,'y','z')
app1(p,c,'x','w')
app2(p,d,'x','z')
app2(p,e,'y','w')
app2(p,f,'z','w')
verticies.push(p)
}
return verticies
}
function project(point, size){
return {
x: (point.x+Math.SQRT2*point.z)*size,
y: (point.y+Math.SQRT2*point.w)*size
}
}
function drawtesseract(ctx, tesseract, opts){
var edges = tesseractedges()
for (var i = 0; i < tesseract.length; i++) {
var proj = project(tesseract[i], opts.size)
ctx.beginPath()
ctx.arc(proj.x + opts.x, proj.y + opts.y, opts.corner_radius, 0, 2 * Math.PI)
ctx.fill()
};
ctx.lineWidth = opts.line_width || 1
ctx.beginPath()
for (var i = 0; i < edges.length; i++) {
var v1 = project(tesseract[edges[i][0]], opts.size),
v2 = project(tesseract[edges[i][1]], opts.size)
ctx.moveTo(v1.x+opts.x,v1.y+opts.y)
ctx.lineTo(v2.x+opts.x,v2.y+opts.y)
};
ctx.stroke()
}

56
animation/mouse.js

@ -0,0 +1,56 @@ @@ -0,0 +1,56 @@
var mouse = {
x: 0,
y: 0,
direction:0,
start: {
x:0,
y:0
},
dragging: false,
set: function (x,y) {
mouse.x = x
mouse.y = y
mouse.direction = Math.atan2(y-mouse.start.y,x-mouse.start.x)
},
coords: function (e) {
// e.preventDefault();
if(e.pageX){
mouse.set(e.pageX,e.pageY)
}
else if(e.offsetX) {
mouse.set(e.offsetX,e.offsetY)
}
else if(e.layerX) {
mouse.set(e.layerX,e.layerY)
}
else if(e.targetTouches && e.targetTouches.length > 0){
mouse.set(e.targetTouches[0].pageX,e.targetTouches[0].pageY)
}
},
down: function (e) {
mouse.coords(e)
mouse.start.x=mouse.x
mouse.start.y=mouse.y
mouse.dragging = true
// console.log(e)
},
up: function (e) {
mouse.coords(e)
mouse.dragging = false
}
}
document.addEventListener("touchstart", mouse.down, true);
document.addEventListener("touchend", mouse.up, true);
document.addEventListener("touchmove", mouse.coords, true);
document.addEventListener("mousedown", mouse.down, true);
document.addEventListener("mouseup", mouse.up, true);
document.addEventListener("mousemove", mouse.coords, true);

27
animation/raf.js

@ -0,0 +1,27 @@ @@ -0,0 +1,27 @@
// http://paulirish.com/2011/requestanimationframe-for-smart-animating/
// http://my.opera.com/emoller/blog/2011/12/20/requestanimationframe-for-smart-er-animating
// requestAnimationFrame polyfill by Erik Möller. fixes from Paul Irish and Tino Zijdel
// MIT license
;(function() {
var lastTime = 0;
var vendors = ['ms', 'moz', 'webkit', 'o'];
if(typeof window != "undefined"){
for(var x = 0; x < vendors.length && !window.requestAnimationFrame; ++x) {
window.requestAnimationFrame = window[vendors[x]+'RequestAnimationFrame'];
window.cancelAnimationFrame = window[vendors[x]+'CancelAnimationFrame']
|| window[vendors[x]+'CancelRequestAnimationFrame'];
}
if (!window.requestAnimationFrame)
window.requestAnimationFrame = function(callback, element) {
var currTime = new Date().getTime();
var timeToCall = Math.max(0, 16 - (currTime - lastTime));
var id = window.setTimeout(function() { callback(currTime + timeToCall); },
timeToCall);
lastTime = currTime + timeToCall;
return id;
};
if (!window.cancelAnimationFrame) window.cancelAnimationFrame = function(id) { clearTimeout(id); };
}
}());

329
codemirror/codemirror.css

@ -0,0 +1,329 @@ @@ -0,0 +1,329 @@
/* BASICS */
.CodeMirror {
/* Set height, width, borders, and global font properties here */
font-family: monospace;
height: auto;
color: rgb(156, 154, 193);
padding: 10px;
padding-top: 5px;
}
/* PADDING */
.CodeMirror-lines {
padding: 4px 0; /* Vertical padding around content */
}
.CodeMirror pre {
padding: 0 4px; /* Horizontal padding of content */
}
.CodeMirror-scrollbar-filler, .CodeMirror-gutter-filler {
background-color: white; /* The little square between H and V scrollbars */
}
/* GUTTER */
.CodeMirror-gutters {
border-right: 1px solid #ddd;
background-color: #f7f7f7;
white-space: nowrap;
}
.CodeMirror-linenumbers {}
.CodeMirror-linenumber {
padding: 0 3px 0 5px;
min-width: 20px;
text-align: right;
color: #999;
white-space: nowrap;
}
.CodeMirror-guttermarker { color: black; }
.CodeMirror-guttermarker-subtle { color: #999; }
/* CURSOR */
.CodeMirror div.CodeMirror-cursor {
border-left: 1px solid black;
}
/* Shown when moving in bi-directional text */
.CodeMirror div.CodeMirror-secondarycursor {
border-left: 1px solid silver;
}
.CodeMirror.cm-fat-cursor div.CodeMirror-cursor {
width: auto;
border: 0;
background: #7e7;
}
.CodeMirror.cm-fat-cursor div.CodeMirror-cursors {
z-index: 1;
}
.cm-animate-fat-cursor {
width: auto;
border: 0;
-webkit-animation: blink 1.06s steps(1) infinite;
-moz-animation: blink 1.06s steps(1) infinite;
animation: blink 1.06s steps(1) infinite;
}
@-moz-keyframes blink {
0% { background: #7e7; }
50% { background: none; }
100% { background: #7e7; }
}
@-webkit-keyframes blink {
0% { background: #7e7; }
50% { background: none; }
100% { background: #7e7; }
}
@keyframes blink {
0% { background: #7e7; }
50% { background: none; }
100% { background: #7e7; }
}
/* Can style cursor different in overwrite (non-insert) mode */
div.CodeMirror-overwrite div.CodeMirror-cursor {}
.cm-tab { display: inline-block; text-decoration: inherit; }
.CodeMirror-ruler {
border-left: 1px solid #ccc;
position: absolute;
}
/* DEFAULT THEME */
.cm-s-default .cm-header {color: blue;}
.cm-s-default .cm-quote {color: #090;}
.cm-negative {color: #d44;}
.cm-positive {color: #292;}
.cm-header, .cm-strong {font-weight: bold;}
.cm-em {font-style: italic;}
.cm-link {text-decoration: underline;}
.cm-strikethrough {text-decoration: line-through;}
.cm-s-default .cm-keyword {color: #0DAEFF;}
.cm-s-default .cm-atom {color: #219;}
.cm-s-default .cm-number {color: #164;}
.cm-s-default .cm-def {color: #00f;}
.cm-s-default .cm-variable,
.cm-s-default .cm-punctuation {
color: black;
}
.cm-s-default .cm-property {color: #4CA2F2;}
.cm-s-default .cm-operator {}
.cm-s-default .cm-variable-2 {color: #05a;}
.cm-s-default .cm-variable-3 {color: #085;}
.cm-s-default .cm-comment {color: #DB0000;}
.cm-s-default .cm-string {color: #5426C9;}
.cm-s-default .cm-string-2 {color: #f50;}
.cm-s-default .cm-meta {color: #555;}
.cm-s-default .cm-qualifier {color: #555;}
.cm-s-default .cm-builtin {color: #30a;}
.cm-s-default .cm-bracket {color: #997;}
.cm-s-default .cm-tag {color: #050216;}
.cm-s-default .cm-attribute {color: #00c;}
.cm-s-default .cm-hr {color: #999;}
.cm-s-default .cm-link {color: #00c;}
.cm-s-default .cm-error {color: #f00;}
.cm-invalidchar {color: #f00;}
.CodeMirror-composing { border-bottom: 2px solid; }
/* Default styles for common addons */
div.CodeMirror span.CodeMirror-matchingbracket {color: #0f0;}
div.CodeMirror span.CodeMirror-nonmatchingbracket {color: #f22;}
.CodeMirror-matchingtag { background: rgba(255, 150, 0, .3); }
.CodeMirror-activeline-background {background: #e8f2ff;}
/* STOP */
/* The rest of this file contains styles related to the mechanics of
the editor. You probably shouldn't touch them. */
.CodeMirror {
position: relative;
overflow: hidden;
background: transparent;
}
.CodeMirror-scroll {
overflow: scroll !important; /* Things will break if this is overridden */
/* 30px is the magic margin used to hide the element's real scrollbars */
/* See overflow: hidden in .CodeMirror */
margin-bottom: -30px; margin-right: -30px;
padding-bottom: 30px;
/*height: 100%;*/
outline: none; /* Prevent dragging from highlighting the element */
position: relative;
}
.CodeMirror-sizer {
position: relative;
border-right: 30px solid transparent;
}
/* The fake, visible scrollbars. Used to force redraw during scrolling
before actuall scrolling happens, thus preventing shaking and
flickering artifacts. */
.CodeMirror-vscrollbar, .CodeMirror-hscrollbar, .CodeMirror-scrollbar-filler, .CodeMirror-gutter-filler {
position: absolute;
z-index: 6;
display: none;
}
.CodeMirror-vscrollbar {
right: 0; top: 0;
overflow-x: hidden;
overflow-y: scroll;
}
.CodeMirror-hscrollbar {
bottom: 0; left: 0;
overflow-y: hidden;
overflow-x: scroll;
}
.CodeMirror-scrollbar-filler {
right: 0; bottom: 0;
}
.CodeMirror-gutter-filler {
left: 0; bottom: 0;
}
.CodeMirror-gutters {
position: absolute; left: 0; top: 0;
z-index: 3;
}
.CodeMirror-gutter {
white-space: normal;
height: 100%;
display: inline-block;
margin-bottom: -30px;
/* Hack to make IE7 behave */
*zoom:1;
*display:inline;
}
.CodeMirror-gutter-wrapper {
position: absolute;
z-index: 4;
height: 100%;
}
.CodeMirror-gutter-elt {
position: absolute;
cursor: default;
z-index: 4;
}
.CodeMirror-gutter-wrapper {
-webkit-user-select: none;
-moz-user-select: none;
user-select: none;
}
.CodeMirror-lines {
cursor: text;
min-height: 1px; /* prevents collapsing before first draw */
}
.CodeMirror pre {
/* Reset some styles that the rest of the page might have set */
-moz-border-radius: 0; -webkit-border-radius: 0; border-radius: 0;
border-width: 0;
background: transparent;
font-family: inherit;
font-size: inherit;
margin: 0;
white-space: pre;
word-wrap: normal;
line-height: inherit;
color: inherit;
z-index: 2;
position: relative;
overflow: visible;
-webkit-tap-highlight-color: transparent;
}
.CodeMirror-wrap pre {
word-wrap: break-word;
white-space: pre-wrap;
word-break: normal;
}
.CodeMirror-linebackground {
position: absolute;
left: 0; right: 0; top: 0; bottom: 0;
z-index: 0;
}
.CodeMirror-linewidget {
position: relative;
z-index: 2;
overflow: auto;
}
.CodeMirror-widget {}
.CodeMirror-code {
outline: none;
}
/* Force content-box sizing for the elements where we expect it */
.CodeMirror-scroll,
.CodeMirror-sizer,
.CodeMirror-gutter,
.CodeMirror-gutters,
.CodeMirror-linenumber {
-moz-box-sizing: content-box;
box-sizing: content-box;
}
.CodeMirror-measure {
position: absolute;
width: 100%;
height: 0;
overflow: hidden;
visibility: hidden;
}
.CodeMirror-measure pre { position: static; }
.CodeMirror div.CodeMirror-cursor {
position: absolute;
border-right: none;
width: 0;
}
div.CodeMirror-cursors {
visibility: hidden;
position: relative;
z-index: 3;
}
.CodeMirror-focused div.CodeMirror-cursors {
visibility: visible;
}
.CodeMirror-selected { background: #d9d9d9; }
.CodeMirror-focused .CodeMirror-selected { background: #d7d4f0; }
.CodeMirror-crosshair { cursor: crosshair; }
.CodeMirror ::selection { background: #d7d4f0; }
.CodeMirror ::-moz-selection { background: #d7d4f0; }
.cm-searching {
background: #ffa;
background: rgba(255, 255, 0, .4);
}
/* IE7 hack to prevent it from returning funny offsetTops on the spans */
.CodeMirror span { *vertical-align: text-bottom; }
/* Used to force a border model for a node */
.cm-force-border { padding-right: .1px; }
@media print {
/* Hide the cursor when printing */
.CodeMirror div.CodeMirror-cursors {
visibility: hidden;
}
}
/* See issue #2901 */
.cm-tab-wrap-hack:after { content: ''; }
/* Help users use markselection to safely style text background */
span.CodeMirror-selectedtext { background: none; }

8735
codemirror/codemirror.js

File diff suppressed because it is too large Load Diff

704
codemirror/javascript.js

@ -0,0 +1,704 @@ @@ -0,0 +1,704 @@
// CodeMirror, copyright (c) by Marijn Haverbeke and others
// Distributed under an MIT license: http://codemirror.net/LICENSE
// TODO actually recognize syntax of TypeScript constructs
(function(mod) {
if (typeof exports == "object" && typeof module == "object") // CommonJS
mod(require("../../lib/codemirror"));
else if (typeof define == "function" && define.amd) // AMD
define(["../../lib/codemirror"], mod);
else // Plain browser env
mod(CodeMirror);
})(function(CodeMirror) {
"use strict";
CodeMirror.defineMode("javascript", function(config, parserConfig) {
var indentUnit = config.indentUnit;
var statementIndent = parserConfig.statementIndent;
var jsonldMode = parserConfig.jsonld;
var jsonMode = parserConfig.json || jsonldMode;
var isTS = parserConfig.typescript;
var wordRE = parserConfig.wordCharacters || /[\w$\xa1-\uffff]/;
// Tokenizer
var keywords = function(){
function kw(type) {return {type: type, style: "keyword"};}
var A = kw("keyword a"), B = kw("keyword b"), C = kw("keyword c");
var operator = kw("operator"), atom = {type: "atom", style: "atom"};
var jsKeywords = {
"if": kw("if"), "while": A, "with": A, "else": B, "do": B, "try": B, "finally": B,
"return": C, "break": C, "continue": C, "new": C, "delete": C, "throw": C, "debugger": C,
"var": kw("var"), "const": kw("var"), "let": kw("var"),
"function": kw("function"), "catch": kw("catch"),
"for": kw("for"), "switch": kw("switch"), "case": kw("case"), "default": kw("default"),
"in": operator, "typeof": operator, "instanceof": operator,
"true": atom, "false": atom, "null": atom, "undefined": atom, "NaN": atom, "Infinity": atom,
"this": kw("this"), "module": kw("module"), "class": kw("class"), "super": kw("atom"),
"yield": C, "export": kw("export"), "import": kw("import"), "extends": C
};
// Extend the 'normal' keywords with the TypeScript language extensions
if (isTS) {
var type = {type: "variable", style: "variable-3"};
var tsKeywords = {
// object-like things
"interface": kw("interface"),
"extends": kw("extends"),
"constructor": kw("constructor"),
// scope modifiers
"public": kw("public"),
"private": kw("private"),
"protected": kw("protected"),
"static": kw("static"),
// types
"string": type, "number": type, "bool": type, "any": type
};
for (var attr in tsKeywords) {
jsKeywords[attr] = tsKeywords[attr];
}
}
return jsKeywords;
}();
var isOperatorChar = /[+\-*&%=<>!?|~^]/;
var isJsonldKeyword = /^@(context|id|value|language|type|container|list|set|reverse|index|base|vocab|graph)"/;
function readRegexp(stream) {
var escaped = false, next, inSet = false;
while ((next = stream.next()) != null) {
if (!escaped) {
if (next == "/" && !inSet) return;
if (next == "[") inSet = true;
else if (inSet && next == "]") inSet = false;
}
escaped = !escaped && next == "\\";
}
}
// Used as scratch variables to communicate multiple values without
// consing up tons of objects.
var type, content;
function ret(tp, style, cont) {
type = tp; content = cont;
return style;
}
function tokenBase(stream, state) {
var ch = stream.next();
if (ch == '"' || ch == "'") {
state.tokenize = tokenString(ch);
return state.tokenize(stream, state);
} else if (ch == "." && stream.match(/^\d+(?:[eE][+\-]?\d+)?/)) {
return ret("number", "number");
} else if (ch == "." && stream.match("..")) {
return ret("spread", "meta");
} else if (/[\[\]{}\(\),;\:\.]/.test(ch)) {
return ret(ch);
} else if (ch == "=" && stream.eat(">")) {
return ret("=>", "operator");
} else if (ch == "0" && stream.eat(/x/i)) {
stream.eatWhile(/[\da-f]/i);
return ret("number", "number");
} else if (/\d/.test(ch)) {
stream.match(/^\d*(?:\.\d*)?(?:[eE][+\-]?\d+)?/);
return ret("number", "number");
} else if (ch == "/") {
if (stream.eat("*")) {
state.tokenize = tokenComment;
return tokenComment(stream, state);
} else if (stream.eat("/")) {
stream.skipToEnd();
return ret("comment", "comment");
} else if (state.lastType == "operator" || state.lastType == "keyword c" ||
state.lastType == "sof" || /^[\[{}\(,;:]$/.test(state.lastType)) {
readRegexp(stream);
stream.match(/^\b(([gimyu])(?![gimyu]*\2))+\b/);
return ret("regexp", "string-2");
} else {
stream.eatWhile(isOperatorChar);
return ret("operator", "operator", stream.current());
}
} else if (ch == "`") {
state.tokenize = tokenQuasi;
return tokenQuasi(stream, state);
} else if (ch == "#") {
stream.skipToEnd();
return ret("error", "error");
} else if (isOperatorChar.test(ch)) {
stream.eatWhile(isOperatorChar);
return ret("operator", "operator", stream.current());
} else if (wordRE.test(ch)) {
stream.eatWhile(wordRE);
var word = stream.current(), known = keywords.propertyIsEnumerable(word) && keywords[word];
return (known && state.lastType != ".") ? ret(known.type, known.style, word) :
ret("variable", "variable", word);
}
}
function tokenString(quote) {
return function(stream, state) {
var escaped = false, next;
if (jsonldMode && stream.peek() == "@" && stream.match(isJsonldKeyword)){
state.tokenize = tokenBase;
return ret("jsonld-keyword", "meta");
}
while ((next = stream.next()) != null) {
if (next == quote && !escaped) break;
escaped = !escaped && next == "\\";
}
if (!escaped) state.tokenize = tokenBase;
return ret("string", "string");
};
}
function tokenComment(stream, state) {
var maybeEnd = false, ch;
while (ch = stream.next()) {
if (ch == "/" && maybeEnd) {
state.tokenize = tokenBase;
break;
}
maybeEnd = (ch == "*");
}
return ret("comment", "comment");
}
function tokenQuasi(stream, state) {
var escaped = false, next;
while ((next = stream.next()) != null) {
if (!escaped && (next == "`" || next == "$" && stream.eat("{"))) {
state.tokenize = tokenBase;
break;
}
escaped = !escaped && next == "\\";
}
return ret("quasi", "string-2", stream.current());
}
var brackets = "([{}])";
// This is a crude lookahead trick to try and notice that we're
// parsing the argument patterns for a fat-arrow function before we
// actually hit the arrow token. It only works if the arrow is on
// the same line as the arguments and there's no strange noise
// (comments) in between. Fallback is to only notice when we hit the
// arrow, and not declare the arguments as locals for the arrow
// body.
function findFatArrow(stream, state) {
if (state.fatArrowAt) state.fatArrowAt = null;
var arrow = stream.string.indexOf("=>", stream.start);
if (arrow < 0) return;
var depth = 0, sawSomething = false;
for (var pos = arrow - 1; pos >= 0; --pos) {
var ch = stream.string.charAt(pos);
var bracket = brackets.indexOf(ch);
if (bracket >= 0 && bracket < 3) {
if (!depth) { ++pos; break; }
if (--depth == 0) break;
} else if (bracket >= 3 && bracket < 6) {
++depth;
} else if (wordRE.test(ch)) {
sawSomething = true;
} else if (/["'\/]/.test(ch)) {
return;
} else if (sawSomething && !depth) {
++pos;
break;
}
}
if (sawSomething && !depth) state.fatArrowAt = pos;
}
// Parser
var atomicTypes = {"atom": true, "number": true, "variable": true, "string": true, "regexp": true, "this": true, "jsonld-keyword": true};
function JSLexical(indented, column, type, align, prev, info) {
this.indented = indented;
this.column = column;
this.type = type;
this.prev = prev;
this.info = info;
if (align != null) this.align = align;
}
function inScope(state, varname) {
for (var v = state.localVars; v; v = v.next)
if (v.name == varname) return true;
for (var cx = state.context; cx; cx = cx.prev) {
for (var v = cx.vars; v; v = v.next)
if (v.name == varname) return true;
}
}
function parseJS(state, style, type, content, stream) {
var cc = state.cc;
// Communicate our context to the combinators.
// (Less wasteful than consing up a hundred closures on every call.)
cx.state = state; cx.stream = stream; cx.marked = null, cx.cc = cc; cx.style = style;
if (!state.lexical.hasOwnProperty("align"))
state.lexical.align = true;
while(true) {
var combinator = cc.length ? cc.pop() : jsonMode ? expression : statement;
if (combinator(type, content)) {
while(cc.length && cc[cc.length - 1].lex)
cc.pop()();
if (cx.marked) return cx.marked;
if (type == "variable" && inScope(state, content)) return "variable-2";
return style;
}
}
}
// Combinator utils
var cx = {state: null, column: null, marked: null, cc: null};
function pass() {
for (var i = arguments.length - 1; i >= 0; i--) cx.cc.push(arguments[i]);
}
function cont() {
pass.apply(null, arguments);
return true;
}
function register(varname) {
function inList(list) {
for (var v = list; v; v = v.next)
if (v.name == varname) return true;
return false;
}
var state = cx.state;
if (state.context) {
cx.marked = "def";
if (inList(state.localVars)) return;
state.localVars = {name: varname, next: state.localVars};
} else {
if (inList(state.globalVars)) return;
if (parserConfig.globalVars)
state.globalVars = {name: varname, next: state.globalVars};
}
}
// Combinators
var defaultVars = {name: "this", next: {name: "arguments"}};
function pushcontext() {
cx.state.context = {prev: cx.state.context, vars: cx.state.localVars};
cx.state.localVars = defaultVars;
}
function popcontext() {
cx.state.localVars = cx.state.context.vars;
cx.state.context = cx.state.context.prev;
}
function pushlex(type, info) {
var result = function() {
var state = cx.state, indent = state.indented;
if (state.lexical.type == "stat") indent = state.lexical.indented;
else for (var outer = state.lexical; outer && outer.type == ")" && outer.align; outer = outer.prev)
indent = outer.indented;
state.lexical = new JSLexical(indent, cx.stream.column(), type, null, state.lexical, info);
};
result.lex = true;
return result;
}
function poplex() {
var state = cx.state;
if (state.lexical.prev) {
if (state.lexical.type == ")")
state.indented = state.lexical.indented;
state.lexical = state.lexical.prev;
}
}
poplex.lex = true;
function expect(wanted) {
function exp(type) {
if (type == wanted) return cont();
else if (wanted == ";") return pass();
else return cont(exp);
};
return exp;
}
function statement(type, value) {
if (type == "var") return cont(pushlex("vardef", value.length), vardef, expect(";"), poplex);
if (type == "keyword a") return cont(pushlex("form"), expression, statement, poplex);
if (type == "keyword b") return cont(pushlex("form"), statement, poplex);
if (type == "{") return cont(pushlex("}"), block, poplex);
if (type == ";") return cont();
if (type == "if") {
if (cx.state.lexical.info == "else" && cx.state.cc[cx.state.cc.length - 1] == poplex)
cx.state.cc.pop()();
return cont(pushlex("form"), expression, statement, poplex, maybeelse);
}
if (type == "function") return cont(functiondef);
if (type == "for") return cont(pushlex("form"), forspec, statement, poplex);
if (type == "variable") return cont(pushlex("stat"), maybelabel);
if (type == "switch") return cont(pushlex("form"), expression, pushlex("}", "switch"), expect("{"),
block, poplex, poplex);
if (type == "case") return cont(expression, expect(":"));
if (type == "default") return cont(expect(":"));
if (type == "catch") return cont(pushlex("form"), pushcontext, expect("("), funarg, expect(")"),
statement, poplex, popcontext);
if (type == "module") return cont(pushlex("form"), pushcontext, afterModule, popcontext, poplex);
if (type == "class") return cont(pushlex("form"), className, poplex);
if (type == "export") return cont(pushlex("form"), afterExport, poplex);
if (type == "import") return cont(pushlex("form"), afterImport, poplex);
return pass(pushlex("stat"), expression, expect(";"), poplex);
}
function expression(type) {
return expressionInner(type, false);
}
function expressionNoComma(type) {
return expressionInner(type, true);
}
function expressionInner(type, noComma) {
if (cx.state.fatArrowAt == cx.stream.start) {
var body = noComma ? arrowBodyNoComma : arrowBody;
if (type == "(") return cont(pushcontext, pushlex(")"), commasep(pattern, ")"), poplex, expect("=>"), body, popcontext);
else if (type == "variable") return pass(pushcontext, pattern, expect("=>"), body, popcontext);
}
var maybeop = noComma ? maybeoperatorNoComma : maybeoperatorComma;
if (atomicTypes.hasOwnProperty(type)) return cont(maybeop);
if (type == "function") return cont(functiondef, maybeop);
if (type == "keyword c") return cont(noComma ? maybeexpressionNoComma : maybeexpression);
if (type == "(") return cont(pushlex(")"), maybeexpression, comprehension, expect(")"), poplex, maybeop);
if (type == "operator" || type == "spread") return cont(noComma ? expressionNoComma : expression);
if (type == "[") return cont(pushlex("]"), arrayLiteral, poplex, maybeop);
if (type == "{") return contCommasep(objprop, "}", null, maybeop);
if (type == "quasi") { return pass(quasi, maybeop); }
return cont();
}
function maybeexpression(type) {
if (type.match(/[;\}\)\],]/)) return pass();
return pass(expression);
}
function maybeexpressionNoComma(type) {
if (type.match(/[;\}\)\],]/)) return pass();
return pass(expressionNoComma);
}
function maybeoperatorComma(type, value) {
if (type == ",") return cont(expression);
return maybeoperatorNoComma(type, value, false);
}
function maybeoperatorNoComma(type, value, noComma) {
var me = noComma == false ? maybeoperatorComma : maybeoperatorNoComma;
var expr = noComma == false ? expression : expressionNoComma;
if (type == "=>") return cont(pushcontext, noComma ? arrowBodyNoComma : arrowBody, popcontext);
if (type == "operator") {
if (/\+\+|--/.test(value)) return cont(me);
if (value == "?") return cont(expression, expect(":"), expr);
return cont(expr);
}
if (type == "quasi") { return pass(quasi, me); }
if (type == ";") return;
if (type == "(") return contCommasep(expressionNoComma, ")", "call", me);
if (type == ".") return cont(property, me);
if (type == "[") return cont(pushlex("]"), maybeexpression, expect("]"), poplex, me);
}
function quasi(type, value) {
if (type != "quasi") return pass();
if (value.slice(value.length - 2) != "${") return cont(quasi);
return cont(expression, continueQuasi);
}
function continueQuasi(type) {
if (type == "}") {
cx.marked = "string-2";
cx.state.tokenize = tokenQuasi;
return cont(quasi);
}
}
function arrowBody(type) {
findFatArrow(cx.stream, cx.state);
return pass(type == "{" ? statement : expression);
}
function arrowBodyNoComma(type) {
findFatArrow(cx.stream, cx.state);
return pass(type == "{" ? statement : expressionNoComma);
}
function maybelabel(type) {
if (type == ":") return cont(poplex, statement);
return pass(maybeoperatorComma, expect(";"), poplex);
}
function property(type) {
if (type == "variable") {cx.marked = "property"; return cont();}
}
function objprop(type, value) {
if (type == "variable" || cx.style == "keyword") {
cx.marked = "property";
if (value == "get" || value == "set") return cont(getterSetter);
return cont(afterprop);
} else if (type == "number" || type == "string") {
cx.marked = jsonldMode ? "property" : (cx.style + " property");
return cont(afterprop);
} else if (type == "jsonld-keyword") {
return cont(afterprop);
} else if (type == "[") {
return cont(expression, expect("]"), afterprop);
}
}
function getterSetter(type) {
if (type != "variable") return pass(afterprop);
cx.marked = "property";
return cont(functiondef);
}
function afterprop(type) {
if (type == ":") return cont(expressionNoComma);
if (type == "(") return pass(functiondef);
}
function commasep(what, end) {
function proceed(type) {
if (type == ",") {
var lex = cx.state.lexical;
if (lex.info == "call") lex.pos = (lex.pos || 0) + 1;
return cont(what, proceed);
}
if (type == end) return cont();
return cont(expect(end));
}
return function(type) {
if (type == end) return cont();
return pass(what, proceed);
};
}
function contCommasep(what, end, info) {
for (var i = 3; i < arguments.length; i++)
cx.cc.push(arguments[i]);
return cont(pushlex(end, info), commasep(what, end), poplex);
}
function block(type) {
if (type == "}") return cont();
return pass(statement, block);
}
function maybetype(type) {
if (isTS && type == ":") return cont(typedef);
}
function maybedefault(_, value) {
if (value == "=") return cont(expressionNoComma);
}
function typedef(type) {
if (type == "variable") {cx.marked = "variable-3"; return cont();}
}
function vardef() {
return pass(pattern, maybetype, maybeAssign, vardefCont);
}
function pattern(type, value) {
if (type == "variable") { register(value); return cont(); }
if (type == "[") return contCommasep(pattern, "]");
if (type == "{") return contCommasep(proppattern, "}");
}
function proppattern(type, value) {
if (type == "variable" && !cx.stream.match(/^\s*:/, false)) {
register(value);
return cont(maybeAssign);
}
if (type == "variable") cx.marked = "property";
return cont(expect(":"), pattern, maybeAssign);
}
function maybeAssign(_type, value) {
if (value == "=") return cont(expressionNoComma);
}
function vardefCont(type) {
if (type == ",") return cont(vardef);
}
function maybeelse(type, value) {
if (type == "keyword b" && value == "else") return cont(pushlex("form", "else"), statement, poplex);
}
function forspec(type) {
if (type == "(") return cont(pushlex(")"), forspec1, expect(")"), poplex);
}
function forspec1(type) {
if (type == "var") return cont(vardef, expect(";"), forspec2);
if (type == ";") return cont(forspec2);
if (type == "variable") return cont(formaybeinof);
return pass(expression, expect(";"), forspec2);
}
function formaybeinof(_type, value) {
if (value == "in" || value == "of") { cx.marked = "keyword"; return cont(expression); }
return cont(maybeoperatorComma, forspec2);
}
function forspec2(type, value) {
if (type == ";") return cont(forspec3);
if (value == "in" || value == "of") { cx.marked = "keyword"; return cont(expression); }
return pass(expression, expect(";"), forspec3);
}
function forspec3(type) {
if (type != ")") cont(expression);
}
function functiondef(type, value) {
if (value == "*") {cx.marked = "keyword"; return cont(functiondef);}
if (type == "variable") {register(value); return cont(functiondef);}
if (type == "(") return cont(pushcontext, pushlex(")"), commasep(funarg, ")"), poplex, statement, popcontext);
}
function funarg(type) {
if (type == "spread") return cont(funarg);
return pass(pattern, maybetype, maybedefault);
}
function className(type, value) {
if (type == "variable") {register(value); return cont(classNameAfter);}
}
function classNameAfter(type, value) {
if (value == "extends") return cont(expression, classNameAfter);
if (type == "{") return cont(pushlex("}"), classBody, poplex);
}
function classBody(type, value) {
if (type == "variable" || cx.style == "keyword") {
if (value == "static") {
cx.marked = "keyword";
return cont(classBody);
}
cx.marked = "property";
if (value == "get" || value == "set") return cont(classGetterSetter, functiondef, classBody);
return cont(functiondef, classBody);
}
if (value == "*") {
cx.marked = "keyword";
return cont(classBody);
}
if (type == ";") return cont(classBody);
if (type == "}") return cont();
}
function classGetterSetter(type) {
if (type != "variable") return pass();
cx.marked = "property";
return cont();
}
function afterModule(type, value) {
if (type == "string") return cont(statement);
if (type == "variable") { register(value); return cont(maybeFrom); }
}
function afterExport(_type, value) {
if (value == "*") { cx.marked = "keyword"; return cont(maybeFrom, expect(";")); }
if (value == "default") { cx.marked = "keyword"; return cont(expression, expect(";")); }
return pass(statement);
}
function afterImport(type) {
if (type == "string") return cont();
return pass(importSpec, maybeFrom);
}
function importSpec(type, value) {
if (type == "{") return contCommasep(importSpec, "}");
if (type == "variable") register(value);
if (value == "*") cx.marked = "keyword";
return cont(maybeAs);
}
function maybeAs(_type, value) {
if (value == "as") { cx.marked = "keyword"; return cont(importSpec); }
}
function maybeFrom(_type, value) {
if (value == "from") { cx.marked = "keyword"; return cont(expression); }
}
function arrayLiteral(type) {
if (type == "]") return cont();
return pass(expressionNoComma, maybeArrayComprehension);
}
function maybeArrayComprehension(type) {
if (type == "for") return pass(comprehension, expect("]"));
if (type == ",") return cont(commasep(maybeexpressionNoComma, "]"));
return pass(commasep(expressionNoComma, "]"));
}
function comprehension(type) {
if (type == "for") return cont(forspec, comprehension);
if (type == "if") return cont(expression, comprehension);
}
function isContinuedStatement(state, textAfter) {
return state.lastType == "operator" || state.lastType == "," ||
isOperatorChar.test(textAfter.charAt(0)) ||
/[,.]/.test(textAfter.charAt(0));
}
// Interface
return {
startState: function(basecolumn) {
var state = {
tokenize: tokenBase,
lastType: "sof",
cc: [],
lexical: new JSLexical((basecolumn || 0) - indentUnit, 0, "block", false),
localVars: parserConfig.localVars,
context: parserConfig.localVars && {vars: parserConfig.localVars},
indented: 0
};
if (parserConfig.globalVars && typeof parserConfig.globalVars == "object")
state.globalVars = parserConfig.globalVars;
return state;
},
token: function(stream, state) {
if (stream.sol()) {
if (!state.lexical.hasOwnProperty("align"))
state.lexical.align = false;
state.indented = stream.indentation();
findFatArrow(stream, state);
}
if (state.tokenize != tokenComment && stream.eatSpace()) return null;
var style = state.tokenize(stream, state);
if (type == "comment") return style;
state.lastType = type == "operator" && (content == "++" || content == "--") ? "incdec" : type;
return parseJS(state, style, type, content, stream);
},
indent: function(state, textAfter) {
if (state.tokenize == tokenComment) return CodeMirror.Pass;
if (state.tokenize != tokenBase) return 0;
var firstChar = textAfter && textAfter.charAt(0), lexical = state.lexical;
// Kludge to prevent 'maybelse' from blocking lexical scope pops
if (!/^\s*else\b/.test(textAfter)) for (var i = state.cc.length - 1; i >= 0; --i) {
var c = state.cc[i];
if (c == poplex) lexical = lexical.prev;
else if (c != maybeelse) break;
}
if (lexical.type == "stat" && firstChar == "}") lexical = lexical.prev;
if (statementIndent && lexical.type == ")" && lexical.prev.type == "stat")
lexical = lexical.prev;
var type = lexical.type, closing = firstChar == type;
if (type == "vardef") return lexical.indented + (state.lastType == "operator" || state.lastType == "," ? lexical.info + 1 : 0);
else if (type == "form" && firstChar == "{") return lexical.indented;
else if (type == "form") return lexical.indented + indentUnit;
else if (type == "stat")
return lexical.indented + (isContinuedStatement(state, textAfter) ? statementIndent || indentUnit : 0);
else if (lexical.info == "switch" && !closing && parserConfig.doubleIndentSwitch != false)
return lexical.indented + (/^(?:case|default)\b/.test(textAfter) ? indentUnit : 2 * indentUnit);
else if (lexical.align) return lexical.column + (closing ? 0 : 1);
else return lexical.indented + (closing ? 0 : indentUnit);
},
electricInput: /^\s*(?:case .*?:|default:|\{|\})$/,
blockCommentStart: jsonMode ? null : "/*",
blockCommentEnd: jsonMode ? null : "*/",
lineComment: jsonMode ? null : "//",
fold: "brace",
closeBrackets: "()[]{}''\"\"``",
helperType: jsonMode ? "json" : "javascript",
jsonldMode: jsonldMode,
jsonMode: jsonMode
};
});
CodeMirror.registerHelper("wordChars", "javascript", /[\w$]/);
CodeMirror.defineMIME("text/javascript", "javascript");
CodeMirror.defineMIME("text/ecmascript", "javascript");
CodeMirror.defineMIME("application/javascript", "javascript");
CodeMirror.defineMIME("application/x-javascript", "javascript");
CodeMirror.defineMIME("application/ecmascript", "javascript");
CodeMirror.defineMIME("application/json", {name: "javascript", json: true});
CodeMirror.defineMIME("application/x-json", {name: "javascript", json: true});
CodeMirror.defineMIME("application/ld+json", {name: "javascript", jsonld: true});
CodeMirror.defineMIME("text/typescript", { name: "javascript", typescript: true });
CodeMirror.defineMIME("application/typescript", { name: "javascript", typescript: true });
});

72
codemirror/runmode.js

@ -0,0 +1,72 @@ @@ -0,0 +1,72 @@
// CodeMirror, copyright (c) by Marijn Haverbeke and others
// Distributed under an MIT license: http://codemirror.net/LICENSE
(function(mod) {
if (typeof exports == "object" && typeof module == "object") // CommonJS
mod(require("../../lib/codemirror"));
else if (typeof define == "function" && define.amd) // AMD
define(["../../lib/codemirror"], mod);
else // Plain browser env
mod(CodeMirror);
})(function(CodeMirror) {
"use strict";
CodeMirror.runMode = function(string, modespec, callback, options) {
var mode = CodeMirror.getMode(CodeMirror.defaults, modespec);
var ie = /MSIE \d/.test(navigator.userAgent);
var ie_lt9 = ie && (document.documentMode == null || document.documentMode < 9);
if (callback.nodeType == 1) {
var tabSize = (options && options.tabSize) || CodeMirror.defaults.tabSize;
var node = callback, col = 0;
node.innerHTML = "";
callback = function(text, style) {
if (text == "\n") {
// Emitting LF or CRLF on IE8 or earlier results in an incorrect display.
// Emitting a carriage return makes everything ok.
node.appendChild(document.createTextNode(ie_lt9 ? '\r' : text));
col = 0;
return;
}
var content = "";
// replace tabs
for (var pos = 0;;) {
var idx = text.indexOf("\t", pos);
if (idx == -1) {
content += text.slice(pos);
col += text.length - pos;
break;
} else {
col += idx - pos;
content += text.slice(pos, idx);
var size = tabSize - col % tabSize;
col += size;
for (var i = 0; i < size; ++i) content += " ";
pos = idx + 1;
}
}
if (style) {
var sp = node.appendChild(document.createElement("span"));
sp.className = "cm-" + style.replace(/ +/g, " cm-");
sp.appendChild(document.createTextNode(content));
} else {
node.appendChild(document.createTextNode(content));
}
};
}
var lines = CodeMirror.splitLines(string), state = (options && options.state) || CodeMirror.startState(mode);
for (var i = 0, e = lines.length; i < e; ++i) {
if (i) callback("\n");
var stream = new CodeMirror.StringStream(lines[i]);
if (!stream.string && mode.blankLine) mode.blankLine(state);
while (!stream.eol()) {
var style = mode.token(stream, state);
callback(stream.current(), style, i, stream.start, state);
stream.start = stream.pos;
}
}
};
});

384
codemirror/xml.js

@ -0,0 +1,384 @@ @@ -0,0 +1,384 @@
// CodeMirror, copyright (c) by Marijn Haverbeke and others
// Distributed under an MIT license: http://codemirror.net/LICENSE
(function(mod) {
if (typeof exports == "object" && typeof module == "object") // CommonJS
mod(require("../../lib/codemirror"));
else if (typeof define == "function" && define.amd) // AMD
define(["../../lib/codemirror"], mod);
else // Plain browser env
mod(CodeMirror);
})(function(CodeMirror) {
"use strict";
CodeMirror.defineMode("xml", function(config, parserConfig) {
var indentUnit = config.indentUnit;
var multilineTagIndentFactor = parserConfig.multilineTagIndentFactor || 1;
var multilineTagIndentPastTag = parserConfig.multilineTagIndentPastTag;
if (multilineTagIndentPastTag == null) multilineTagIndentPastTag = true;
var Kludges = parserConfig.htmlMode ? {
autoSelfClosers: {'area': true, 'base': true, 'br': true, 'col': true, 'command': true,
'embed': true, 'frame': true, 'hr': true, 'img': true, 'input': true,
'keygen': true, 'link': true, 'meta': true, 'param': true, 'source': true,
'track': true, 'wbr': true, 'menuitem': true},
implicitlyClosed: {'dd': true, 'li': true, 'optgroup': true, 'option': true, 'p': true,
'rp': true, 'rt': true, 'tbody': true, 'td': true, 'tfoot': true,
'th': true, 'tr': true},
contextGrabbers: {
'dd': {'dd': true, 'dt': true},
'dt': {'dd': true, 'dt': true},
'li': {'li': true},
'option': {'option': true, 'optgroup': true},
'optgroup': {'optgroup': true},
'p': {'address': true, 'article': true, 'aside': true, 'blockquote': true, 'dir': true,
'div': true, 'dl': true, 'fieldset': true, 'footer': true, 'form': true,
'h1': true, 'h2': true, 'h3': true, 'h4': true, 'h5': true, 'h6': true,
'header': true, 'hgroup': true, 'hr': true, 'menu': true, 'nav': true, 'ol': true,
'p': true, 'pre': true, 'section': true, 'table': true, 'ul': true},
'rp': {'rp': true, 'rt': true},
'rt': {'rp': true, 'rt': true},
'tbody': {'tbody': true, 'tfoot': true},
'td': {'td': true, 'th': true},
'tfoot': {'tbody': true},
'th': {'td': true, 'th': true},
'thead': {'tbody': true, 'tfoot': true},
'tr': {'tr': true}
},
doNotIndent: {"pre": true},
allowUnquoted: true,
allowMissing: true,
caseFold: true
} : {
autoSelfClosers: {},
implicitlyClosed: {},
contextGrabbers: {},
doNotIndent: {},
allowUnquoted: false,
allowMissing: false,
caseFold: false
};
var alignCDATA = parserConfig.alignCDATA;
// Return variables for tokenizers
var type, setStyle;
function inText(stream, state) {
function chain(parser) {
state.tokenize = parser;
return parser(stream, state);
}
var ch = stream.next();
if (ch == "<") {
if (stream.eat("!")) {
if (stream.eat("[")) {
if (stream.match("CDATA[")) return chain(inBlock("atom", "]]>"));
else return null;
} else if (stream.match("--")) {
return chain(inBlock("comment", "-->"));
} else if (stream.match("DOCTYPE", true, true)) {
stream.eatWhile(/[\w\._\-]/);
return chain(doctype(1));
} else {
return null;
}
} else if (stream.eat("?")) {
stream.eatWhile(/[\w\._\-]/);
state.tokenize = inBlock("meta", "?>");
return "meta";
} else {
type = stream.eat("/") ? "closeTag" : "openTag";
state.tokenize = inTag;
return "tag bracket";
}
} else if (ch == "&") {
var ok;
if (stream.eat("#")) {
if (stream.eat("x")) {
ok = stream.eatWhile(/[a-fA-F\d]/) && stream.eat(";");
} else {
ok = stream.eatWhile(/[\d]/) && stream.eat(";");
}
} else {
ok = stream.eatWhile(/[\w\.\-:]/) && stream.eat(";");
}
return ok ? "atom" : "error";
} else {
stream.eatWhile(/[^&<]/);
return null;
}
}
function inTag(stream, state) {
var ch = stream.next();
if (ch == ">" || (ch == "/" && stream.eat(">"))) {
state.tokenize = inText;
type = ch == ">" ? "endTag" : "selfcloseTag";
return "tag bracket";
} else if (ch == "=") {
type = "equals";
return null;
} else if (ch == "<") {
state.tokenize = inText;
state.state = baseState;
state.tagName = state.tagStart = null;
var next = state.tokenize(stream, state);
return next ? next + " tag error" : "tag error";
} else if (/[\'\"]/.test(ch)) {
state.tokenize = inAttribute(ch);
state.stringStartCol = stream.column();
return state.tokenize(stream, state);
} else {
stream.match(/^[^\s\u00a0=<>\"\']*[^\s\u00a0=<>\"\'\/]/);
return "word";
}
}
function inAttribute(quote) {
var closure = function(stream, state) {
while (!stream.eol()) {
if (stream.next() == quote) {
state.tokenize = inTag;
break;
}
}
return "string";
};
closure.isInAttribute = true;
return closure;
}
function inBlock(style, terminator) {
return function(stream, state) {
while (!stream.eol()) {
if (stream.match(terminator)) {
state.tokenize = inText;
break;
}
stream.next();
}
return style;
};
}
function doctype(depth) {
return function(stream, state) {
var ch;
while ((ch = stream.next()) != null) {
if (ch == "<") {
state.tokenize = doctype(depth + 1);
return state.tokenize(stream, state);
} else if (ch == ">") {
if (depth == 1) {
state.tokenize = inText;
break;
} else {
state.tokenize = doctype(depth - 1);
return state.tokenize(stream, state);
}
}
}
return "meta";
};
}
function Context(state, tagName, startOfLine) {
this.prev = state.context;
this.tagName = tagName;
this.indent = state.indented;
this.startOfLine = startOfLine;
if (Kludges.doNotIndent.hasOwnProperty(tagName) || (state.context && state.context.noIndent))
this.noIndent = true;
}
function popContext(state) {
if (state.context) state.context = state.context.prev;
}
function maybePopContext(state, nextTagName) {
var parentTagName;
while (true) {
if (!state.context) {
return;
}
parentTagName = state.context.tagName;
if (!Kludges.contextGrabbers.hasOwnProperty(parentTagName) ||
!Kludges.contextGrabbers[parentTagName].hasOwnProperty(nextTagName)) {
return;
}
popContext(state);
}
}
function baseState(type, stream, state) {
if (type == "openTag") {
state.tagStart = stream.column();
return tagNameState;
} else if (type == "closeTag") {
return closeTagNameState;
} else {
return baseState;
}
}
function tagNameState(type, stream, state) {
if (type == "word") {
state.tagName = stream.current();
setStyle = "tag";
return attrState;
} else {
setStyle = "error";
return tagNameState;
}
}
function closeTagNameState(type, stream, state) {
if (type == "word") {
var tagName = stream.current();
if (state.context && state.context.tagName != tagName &&
Kludges.implicitlyClosed.hasOwnProperty(state.context.tagName))
popContext(state);
if (state.context && state.context.tagName == tagName) {
setStyle = "tag";
return closeState;
} else {
setStyle = "tag error";
return closeStateErr;
}
} else {
setStyle = "error";
return closeStateErr;
}
}
function closeState(type, _stream, state) {
if (type != "endTag") {
setStyle = "error";
return closeState;
}
popContext(state);
return baseState;
}
function closeStateErr(type, stream, state) {
setStyle = "error";
return closeState(type, stream, state);
}
function attrState(type, _stream, state) {
if (type == "word") {
setStyle = "attribute";
return attrEqState;
} else if (type == "endTag" || type == "selfcloseTag") {
var tagName = state.tagName, tagStart = state.tagStart;
state.tagName = state.tagStart = null;
if (type == "selfcloseTag" ||
Kludges.autoSelfClosers.hasOwnProperty(tagName)) {
maybePopContext(state, tagName);
} else {
maybePopContext(state, tagName);
state.context = new Context(state, tagName, tagStart == state.indented);
}
return baseState;
}
setStyle = "error";
return attrState;
}
function attrEqState(type, stream, state) {
if (type == "equals") return attrValueState;
if (!Kludges.allowMissing) setStyle = "error";
return attrState(type, stream, state);
}
function attrValueState(type, stream, state) {
if (type == "string") return attrContinuedState;
if (type == "word" && Kludges.allowUnquoted) {setStyle = "string"; return attrState;}
setStyle = "error";
return attrState(type, stream, state);
}
function attrContinuedState(type, stream, state) {
if (type == "string") return attrContinuedState;
return attrState(type, stream, state);
}
return {
startState: function() {
return {tokenize: inText,
state: baseState,
indented: 0,
tagName: null, tagStart: null,
context: null};
},
token: function(stream, state) {
if (!state.tagName && stream.sol())
state.indented = stream.indentation();
if (stream.eatSpace()) return null;
type = null;
var style = state.tokenize(stream, state);
if ((style || type) && style != "comment") {
setStyle = null;
state.state = state.state(type || style, stream, state);
if (setStyle)
style = setStyle == "error" ? style + " error" : setStyle;
}
return style;
},
indent: function(state, textAfter, fullLine) {
var context = state.context;
// Indent multi-line strings (e.g. css).
if (state.tokenize.isInAttribute) {
if (state.tagStart == state.indented)
return state.stringStartCol + 1;
else
return state.indented + indentUnit;
}
if (context && context.noIndent) return CodeMirror.Pass;
if (state.tokenize != inTag && state.tokenize != inText)
return fullLine ? fullLine.match(/^(\s*)/)[0].length : 0;
// Indent the starts of attribute names.
if (state.tagName) {
if (multilineTagIndentPastTag)
return state.tagStart + state.tagName.length + 2;
else
return state.tagStart + indentUnit * multilineTagIndentFactor;
}
if (alignCDATA && /<!\[CDATA\[/.test(textAfter)) return 0;
var tagAfter = textAfter && /^<(\/)?([\w_:\.-]*)/.exec(textAfter);
if (tagAfter && tagAfter[1]) { // Closing tag spotted
while (context) {
if (context.tagName == tagAfter[2]) {
context = context.prev;
break;
} else if (Kludges.implicitlyClosed.hasOwnProperty(context.tagName)) {
context = context.prev;
} else {
break;
}
}
} else if (tagAfter) { // Opening tag spotted
while (context) {
var grabbers = Kludges.contextGrabbers[context.tagName];
if (grabbers && grabbers.hasOwnProperty(tagAfter[2]))
context = context.prev;
else
break;
}
}
while (context && !context.startOfLine)
context = context.prev;
if (context) return context.indent + indentUnit;
else return 0;
},
electricInput: /<\/[\s\w:]+>$/,
blockCommentStart: "<!--",
blockCommentEnd: "-->",
configuration: parserConfig.htmlMode ? "html" : "xml",
helperType: parserConfig.htmlMode ? "html" : "xml"
};
});
CodeMirror.defineMIME("text/xml", "xml");
CodeMirror.defineMIME("application/xml", "xml");
if (!CodeMirror.mimeModes.hasOwnProperty("text/html"))
CodeMirror.defineMIME("text/html", {name: "xml", htmlMode: true});
});

84
css/explorer.css

@ -0,0 +1,84 @@ @@ -0,0 +1,84 @@
#explorer {
max-height: 500px;
white-space: nowrap;
overflow: scroll;
padding: 10px;
/*border: 1px solid #eee;*/
border-radius: 5px;
font-family: monospace;
background-color: #fff;
}
.textNode {
display: inline-block;
max-width: 400px;
vertical-align: middle;
white-space: pre;
overflow: scroll;
margin-bottom: 1px;
margin-top: 1px;
}
.textNode {
background-color: rgb(255, 238, 174);
color: rgb(48, 48, 97);
padding: 2px;
border-radius: 3px;
}
.html {
background-color: #F5F5F5;
}
.comma, .ellipsis {
color: grey;
}
.nullNode {
color: grey;
}
.numberNode {
color: rgb(203, 48, 48);
}
.booleanNode {
color: rgb(174, 44, 164);
}
.label {
color: rgb(76, 162, 242);
}
.clickable {
cursor: pointer;
}
.indent {
padding-left: 30px;
display: inline-block;
}
/*.collapsed, .expanded {
position: relative;
}
*/
/*.collapsed::before {
content: "▶";
position: absolute;
left: -16px;
}
.expanded::before {
content: "▼";
position: absolute;
left: -16px;
}
.swiper{
padding-left: 40px;
margin-top: 8px;
}
*/

313
css/main.css

@ -0,0 +1,313 @@ @@ -0,0 +1,313 @@
body, html{
position: absolute;
top: 0;
margin: 0;
width: 100%;
font-family: Lato;
font-weight: 300;
}
#slogan {
text-align: center;
margin-top: -26px;
color: #fff;
}
#splash {
overflow: hidden;
background-color: #425565;
background: url(../img/bg.jpg);
background-size: cover;
background-position: center;
}
#logo-wrap {
margin-top: 50px;
text-align: center;
position: relative;
}
#demo-instructions {
display: none;
}
#logo-canvas {
margin-left: auto;
margin-right: auto;
pointer-events: none;
z-index: 0;
position: absolute;
}
#logo-img {
max-width: 100%;
}
.get-started-wrap {
text-align: center;
position: relative;
padding: 0 20px;
}
.get-started {
display: inline-block;
color: #714d26;
background: #fff;
padding: 17px 25px;
font-size: 30px;
margin: 50px 0;
border-radius: 7px;
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.35);
}
#demo-title {
padding: 40px 0;
color: #714d26;
font-size: 30px;
// text-align: center;
}
#options {
display: flex;
justify-content: space-around;
font-size: 20px;
color: #3580c0;
padding: 0 15px;
max-width: 700px;
margin-left: auto;
margin-right: auto;
box-sizing: border-box;
}
.option {
padding: 10px;
border-radius: 5px;
cursor: pointer;
text-align: center;
}
.option.selected {
color: #3580c0;
background: #d8edff;
font-weight: 400;
}
#input {
max-width: 100%;
box-sizing: border-box;
padding: 20px;
margin-top: 30px;
border: 1px solid #ddd;
}
#input-overlay {
position: absolute;
max-width: 100%;
box-sizing: border-box;
margin-top: 30px;
padding: 20px;
}
#github {
height: 1em;
width: 1em;
margin-bottom: -3px;
margin-right: 10px;
}
#github path {
fill: #826f5c;
}
#demo-content > #arrow {
text-align: center;
font-size: 30px;
color: #826f5c;
flex: 0;
}
#arrow::before {
content: '\2193'
}
#output {
max-width: 100%;
box-sizing: border-box;
padding: 20px;
}
#output-overlay {
white-space: pre;
font-family: Monaco, monospace;
position: absolute;
height: 100%;
width: 100%;
overflow: scroll;
border: 20px solid transparent;
box-sizing: border-box;
}
#clickme {
color: #2374b7;
text-decoration: underline;
cursor: pointer;
}
#footer {
color: #826f5c;
background-color: #f6f6f6;
text-align: right;
padding: 0 20px;
}
#lengle {
margin-top: 8%;
margin-bottom: 8%;
line-height: normal;
display: inline-block;
vertical-align: middle;
}
#lengle img {
height: .7em;
}
#demo-content > div {
position: relative;
flex: 1;
border: 20px solid transparent;
}
#drop-instructions-main {
display: none;
}
.commentary {
position: relative;
padding: 20px;
max-width: 700px;
margin-left: auto;
margin-right: auto;
line-height: 180%;
letter-spacing: .5 px;
}
#output-text {
margin: 0px;
width: 100%;
min-height: 200px;
box-sizing: border-box;
padding: 20px;
border: 1px solid #ddd;
}
#demo-instructions {
border: 1px solid #dadada;
padding: 20px;
}
#log > div {
color: #313131;
border-top: 1px solid #dadada;
padding: 10px 0;
display: flex;
}
#log > div:first-child {
border: 0;
}
#log .status {
min-width: 250px;
}
#log {
border: 1px solid #dadada;
padding: 20px;
overflow: auto;
}
#log progress {
display: block;
width: 100%;
transition: opacity 0.5s linear;
}
#log progress[value="1"] {
opacity: 0.5;
}
#footer {
color: #ffffff;
background-color: #f6f6f6;
text-align: right;
padding: 0 20px;
background-color: #425565;
background: url(../img/bg.jpg);
background-size: cover;
background-position: bottom;
}
#footer a {
color: white !important;
}
@media (min-width: 900px) {
#drop-instructions-main {
display: block;
text-align: center;
color: #b1a79c;
}
#demo-content {
display: flex;
align-items: center;
}
#input, #input-overlay {
margin-top: 0;
}
#arrow::before {
content: '\2192';
}
#options {
margin: 40px auto;
}
// #demo-title {
// width: 250px;
// background: #fff;
// margin-top: -70px;
// margin-left: 40px;
// position: relative;
// padding: 20px 0;
// }
// #demo-title::after {
// border-right: 40px solid transparent;
// border-bottom: 70px solid white;
// content: ' ';
// position: absolute;
// top: 0;
// left: 100%;
// }
// #demo-title::before {
// border-left: 40px solid transparent;
// border-bottom: 70px solid white;
// content: ' ';
// position: absolute;
// top: 0;
// left: -40px;
// }
// body {
// box-sizing: border-box;
// border: 20px solid #425565;
// }
}

435
css/wau.css

@ -0,0 +1,435 @@ @@ -0,0 +1,435 @@
@import 'explorer.css';
.fullscreen {
width: 100%;
min-height: 100%;
}
.stretch {
max-width: 100%
}
.langpair {
display: inline-block;
padding: -1px;
font-family: monospace;
text-align: center;
/*border: 1px solid #F3F3F3;*/
margin-bottom: 10px;
margin-right: 5px;
font-size: initial;
line-height: 1.5em;
flex-grow:1;
-webkit-flex-grow:1;
}
.footer {
width: 100%;
color: #fff;
background-color: #6A6A6A;
text-align: right;
}
.lengle {
margin-top: 8%;
margin-bottom: 8%;
line-height: normal;
display: inline-block;
vertical-align: middle;
}
.lengle img {
height: .7em;
}
.shortlang {
/*background-color: #FFEEAE;*/
}
.longlang {
background-color: #59C6FF;
color:#fff;
padding-left: 5px;
padding-right: 5px;
border-radius: 3px;
}
.explorersection{
background-color: rgba(102, 142, 195, 0.19);
width: 100%;
padding-top: 8%;
padding-bottom: 8%;
}
.noheight{
height: 0;
}
.columnwrap {
max-width: 1100;
margin-left: auto;
margin-right: auto;
}
.column {
padding-left: 8%;
padding-right: 8%;
}
.smalllogo {
height: 50px;
vertical-align: middle;
position: absolute;
left: 3%;
opacity: 0;
}
.stickyheader {
position: absolute;
width: 100%;
z-index: 3;
text-align: right;
line-height: 50px;
}
.headernav {
padding-right: 3%;
color: #fff;
}
.run,
.running {
width: 100%;
height: 47px;
text-align: center;
position: relative;
}
.running {
display: none
}
#wow .notrunning {
display: none
}
#wow .running {
display: block
}
.runbutton {
padding-top: 9px;
padding-bottom: 10px;
font-weight: 500;
text-align: center;
color: #FFFFFF;
cursor: pointer;
-webkit-transition: all 0.7s ease;
transition: all 0.7s ease;
width: 100px;
margin-left: auto;
margin-right: auto;
background-color: #6A6A6A;
border-radius: 5px;
margin-bottom: 9px;
}
.runbutton:hover {
text-shadow: 0px 0px 10px #27C6F9;
}
a {
text-decoration: none;
color: #5BA9F2;
/*font-weight: bold;*/
}
.display {
// position: absolute;
/*background: rgba(0,0,255,.1);*/
}
svg {
height: 100%
}
.marterial {
/*position: absolute;*/
width: 100%;
z-index: -1;
overflow: hidden;
background-color: #668EC3;
/*background-image: url(../img/stars.jpeg);*/
background-size: cover;
background-position: bottom;
}
.out {
padding: 10px;
padding-bottom: 0px;
text-align: center;
font-family: monospace;
}
.hint {
text-align: left;
padding-left: 14px;
font-size: 10px;
color: #56607D;
}
.langlabel {
display: inline-block;
flex-grow: 1;
-webkit-flex-grow: 1;
height: 40px;
line-height: 40px;
cursor: pointer;
color: #fff;
background-color: #6A6A6A;
}
.selected {
background-color: #668EC3;
}
.bookend {
display: inline-block;
}
.prog {
height: 4px;
width: 100%;
position: absolute;
bottom:0;
background-color: #6a6a6a;
}
.prog div{
height: 100%;
background-color: #5BA9F2;
}
.demofooter {
padding: 10px;
/*padding-top: 0px;*/
text-align: center;
/*display: flex*/
}
#wow .demofooter {
display: flex;
display: -webkit-flex;
padding: 0px;
justify-content: space-between;
-webkit-justify-content: space-between;
align-items: center;
-webkit-align-items: center;
}
#logo {
text-align: center;
margin-top: 6%;
margin-bottom: 6%;
height: 182px;
position: relative;
/*z-index: 4;*/
}
.to_ocr {
width: 100%;
}
.demo {
border-radius: 5px;
width: 500px;
max-width: 100%;
text-align: left;
top:-50px;
overflow: hidden;
box-shadow: 0px 40px 60px 10px rgba(0, 0, 0, .2);
position: relative;
background-color: #fff;
opacity: 1;
-webkit-transition: .7s ease-out;
-moz-transition: .7s ease-out;
-ms-transition: .7s ease-out;
-o-transition: .7s ease-out;
transition: .7s ease-out;
}
/*.opaque {
top:-30;
box-shadow: 0px 1px 4px 0px rgba(0, 0, 0, .2);
}*/
.demoheader {
overflow: hidden;
border-radius: 5px 5px 0px 0px;
font-size: 8.3px;
}
#canvas {
margin-left: auto;
margin-right: auto;
pointer-events: none;
z-index: 0;
position: absolute;
/* background-image: url(../img/logowhite.png);
background-size: cover;
*/}
.desc {
text-align: left;
font-size: 21px;
color: #343E70;
}
.vr {
display: inline-block;
height: 40px;
border-left: 1px solid #5C5C5C;
}
.getStarted {
display: inline-block;
background-color: #668EC3;
color: white;
font-size: 40px;
padding: 20px;
padding-left: 50px;
border-radius: 5px;
margin-top: 40;
padding-right: 50px;
cursor: pointer;
}
.randombold
{
font-size: 18px;
/*font-family: Georgia;*/
letter-spacing: .8px;
line-height: 37px;
}
.rant {
padding-left: 8%;
padding-right: 8%;
}
.npm {
margin-top: 20px;
padding-left: 15px;
background-color: #F3F3F3;
border-radius: 5px;
margin-bottom: 0px;
padding-top: 10px;
padding-bottom: 10px;
border-left: 6px solid #303061;
color: rgb(48, 48, 97);
font-size: 19px;
font-family: monospace;
}
.longasstag {
text-align: left;
white-space: normal;
word-break: break-all;
}
#code {
text-align: center;
width: 335px;
border-radius: 5px;
margin-top: 80px;
margin-bottom: 80px;
max-width: 100%;
}
.woloasdf {
padding-top: 8%;
}
.fork {
position: absolute;
top: 0;
right: 0;
border: 0;
z-index: 2;
}
.fork img {
width: 149px;
}
.two {
width: 100%;
display: -webkit-flex;
display: flex;
flex-wrap: wrap;
-webkit-flex-wrap: wrap;
justify-content: space-between;
-webkit-justify-content: space-between;
max-width: 1100px;
margin-left: auto;
margin-right: auto;
}
.demoheader .CodeMirror {
padding-bottom: 9px;
padding-top: 9px;
padding-left: 13px;
background-color: rgb(255, 238, 174);
}
hr {
border: none;
border-top: 1px solid #ddd;
margin: 0px;
}
.ocroutput {
position: relative;
z-index: 2;
}
.manylangs{
display: flex;
display: -webkit-flex;
flex-wrap: wrap;
-webkit-flex-wrap: wrap;
}
body {
padding: 0px;
margin: 0px;
font-family: Lato;
/*, Helvetica, Arial, sans-serif;*/
font-weight: 300;
background-color: white;
}
button {
height: 60px;
width: 240px;
margin-top: 10px;
font: inherit;
font-size: 30px;
-webkit-transition: all 0.7s ease;
transition: all 0.7s ease;
outline: 0;
color: #FFF;
background: #5A4E60;
border-radius: 5px;
border: solid 1px #fff;
}
button:hover {
cursor: pointer;
background: rgba(255, 255, 255, 1);
border: solid 1px #ddd;
}
@media (max-width: 1015px) {
button {
color: #fff;
background-color: rgba(0, 0, 0, 0)
}
/* .desc {
color: #FFFFFF;
text-shadow: 0px 0px 3px rgba(58, 58, 58, 1);
}
.desc a {
color: #FFEEAE;
}
*/
.getStarted {
font-size: 33px;
}
.noheight {
height: auto;
}
.demo {
top:0;
margin-bottom: 20px;
box-shadow: 0px 1px 4px 0px rgba(0, 0, 0, .2);
}
#logo {
height: auto;
}
.two {
justify-content: space-around;
-webkit-justify-content: space-around;
}
#code {
margin-top: 20px;
margin-bottom: 20px;
width: 400px;
}
#fork {
width: 28%;
}
}

229
demo.js

@ -0,0 +1,229 @@ @@ -0,0 +1,229 @@
var input = document.getElementById('input')
var input_overlay = document.getElementById('input-overlay')
var ioctx = input_overlay.getContext('2d')
// var output = document.getElementById('output')
// var output_overlay = document.getElementById('output-overlay')
var output_text = document.getElementById('log')
var demo_instructions = document.getElementById('demo-instructions')
var drop_instructions = [].slice.call(document.querySelectorAll('.drop-instructions'))
var options = [].slice.call(document.querySelectorAll('.option'))
// var octx = output.getContext('2d')
var language = 'eng'
var demoStarted = false
var lang_demo_images = {
eng: 'img/eng_bw.png',
chi_sim: 'img/chi_sim.png',
rus: 'img/rus.png'
}
var lang_drop_instructions = {
eng: 'an English',
chi_sim: 'a Chinese',
rus: 'a Russian'
}
var worker = new Tesseract.createWorker({
logger: progressUpdate,
});
function setUp(){
input_overlay.width = input.naturalWidth
input_overlay.height = input.naturalHeight
output_text.style.height = input.height + 'px'
}
setUp()
input.onload = setUp
function isOutputVisible(){
return output_text.getBoundingClientRect().top < dimensions.height
}
function startDemoIfVisible(argument) {
if(isOutputVisible() && !demoStarted) startDemo();
}
function startDemo(){
demoStarted = true
async function start(){
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data } = await worker.recognize(input);
result(data);
input.removeEventListener('load', start)
}
if(input.complete) start();
else input.addEventListener('load', start)
}
// function progress(p){
// var text = JSON.stringify(p)
// // octx.clearRect(0, 0, output.width, output.height)
// // octx.textAlign = 'center'
// // octx.fillText(text, output.width/2, output.height/2)
// output_overlay.style.display = 'block'
// output_overlay.innerHTML += output_overlay.innerHTML.length ? "\n" + text : text
// output_overlay.scrollTop = output_overlay.scrollHeight;
// }
function progressUpdate(packet){
var log = document.getElementById('log');
if(log.firstChild && log.firstChild.status === packet.status){
if('progress' in packet){
var progress = log.firstChild.querySelector('progress')
progress.value = packet.progress
}
}else{
var line = document.createElement('div');
line.status = packet.status;
var status = document.createElement('div')
status.className = 'status'
status.appendChild(document.createTextNode(packet.status))
line.appendChild(status)
if('progress' in packet){
var progress = document.createElement('progress')
progress.value = packet.progress
progress.max = 1
line.appendChild(progress)
}
if(packet.status == 'done'){
var pre = document.createElement('pre')
pre.appendChild(document.createTextNode(packet.data.text))
line.innerHTML = ''
line.appendChild(pre)
}
log.insertBefore(line, log.firstChild)
}
}
function result(res){
// octx.clearRect(0, 0, output.width, output.height)
// octx.textAlign = 'left'
console.log('result was:', res)
// output_overlay.style.display = 'none'
// output_text.innerHTML = res.text
progressUpdate({ status: 'done', data: res })
res.words.forEach(function(w){
var b = w.bbox;
ioctx.strokeWidth = 2
ioctx.strokeStyle = 'red'
ioctx.strokeRect(b.x0, b.y0, b.x1-b.x0, b.y1-b.y0)
ioctx.beginPath()
ioctx.moveTo(w.baseline.x0, w.baseline.y0)
ioctx.lineTo(w.baseline.x1, w.baseline.y1)
ioctx.strokeStyle = 'green'
ioctx.stroke()
// octx.font = '20px Times';
// octx.font = 20 * (b.x1 - b.x0) / octx.measureText(w.text).width + "px Times";
// octx.fillText(w.text, b.x0, w.baseline.y0);
})
}
document.addEventListener('scroll', startDemoIfVisible)
startDemoIfVisible()
function clearOverLayAndOutput(){
ioctx.clearRect(0,0, input_overlay.width, input_overlay.height)
output_text.style.display = 'none'
demo_instructions.style.display = 'block'
// octx.clearRect(0,0,output.width, output.height)
}
// function displayPlayButtonFor(lang){
// output.addEventListener('click', function play(){
// output.removeEventListener('click', play)
// tessWorker.recognize(input, lang)
// .progress( progress )
// .then( result )
// })
// }
async function play(){
demo_instructions.style.display = 'none'
output_text.style.display = 'block'
output_text.innerHTML = ''
// output_overlay.innerHTML = ''
await worker.load();
await worker.loadLanguage(language);
await worker.initialize(language);
const { data } = await worker.recognize(input);
result(data);
}
options.forEach(function(option){
option.addEventListener('click', function(){
clearOverLayAndOutput()
drop_instructions.forEach(function(di){
di.innerHTML = lang_drop_instructions[option.lang]
})
language = option.lang
options.forEach(function(option){option.className = 'option'})
option.className = 'option selected'
if(option.lang in lang_demo_images){
input.src = lang_demo_images[option.lang]
// displayPlayButtonFor(option.lang)
}
})
})
document.body.addEventListener('drop', async function(e){
e.stopPropagation();
e.preventDefault();
var file = e.dataTransfer.files[0]
var reader = new FileReader();
reader.onload = function(e){
input.src = e.target.result;
input.onload = function(){
setUp();
}
};
reader.readAsDataURL(file);
await worker.load();
await worker.loadLanguage(language);
await worker.initialize(language);
const { data } = await worker.recognize(file);
result(data);
})

448
docs/api.md

@ -1,448 +0,0 @@ @@ -1,448 +0,0 @@
# API
- [createWorker()](#create-worker)
- [Worker.load](#worker-load)
- [Worker.writeText](#worker-writeText)
- [Worker.readText](#worker-readText)
- [Worker.removeFile](#worker-removeFile)
- [Worker.FS](#worker-FS)
- [Worker.loadLanguage](#worker-load-language)
- [Worker.initialize](#worker-initialize)
- [Worker.setParameters](#worker-set-parameters)
- [Worker.recognize](#worker-recognize)
- [Worker.detect](#worker-detect)
- [Worker.terminate](#worker-terminate)
- [createScheduler()](#create-scheduler)
- [Scheduler.addWorker](#scheduler-add-worker)
- [Scheduler.addJob](#scheduler-add-job)
- [Scheduler.getQueueLen](#scheduler-get-queue-len)
- [Scheduler.getNumWorkers](#scheduler-get-num-workers)
- [setLogging()](#set-logging)
- [recognize()](#recognize)
- [detect()](#detect)
- [PSM](#psm)
- [OEM](#oem)
---
<a name="create-worker"></a>
## createWorker(options): Worker
createWorker is a factory function that creates a tesseract worker, a worker is basically a Web Worker in browser and Child Process in Node.
**Arguments:**
- `options` an object of customized options
- `corePath` path for tesseract-core.js script
- `langPath` path for downloading traineddata, do not include `/` at the end of the path
- `workerPath` path for downloading worker script
- `dataPath` path for saving traineddata in WebAssembly file system, not common to modify
- `cachePath` path for the cached traineddata, more useful for Node, for browser it only changes the key in IndexDB
- `cacheMethod` a string to indicate the method of cache management, should be one of the following options
- write: read cache and write back (default method)
- readOnly: read cache and not to write back
- refresh: not to read cache and write back
- none: not to read cache and not to write back
- `workerBlobURL` a boolean to define whether to use Blob URL for worker script, default: true
- `gzip` a boolean to define whether the traineddata from the remote is gzipped, default: true
- `logger` a function to log the progress, a quick example is `m => console.log(m)`
- `errorHandler` a function to handle worker errors, a quick example is `err => console.error(err)`
**Examples:**
```javascript
const { createWorker } = Tesseract;
const worker = createWorker({
langPath: '...',
logger: m => console.log(m),
});
```
## Worker
A Worker helps you to do the OCR related tasks, it takes few steps to setup Worker before it is fully functional. The full flow is:
- load
- FS functions // optional
- loadLanguauge
- initialize
- setParameters // optional
- recognize or detect
- terminate
Each function is async, so using async/await or Promise is required. When it is resolved, you get an object:
```json
{
"jobId": "Job-1-123",
"data": { ... }
}
```
jobId is generated by Tesseract.js, but you can put your own when calling any of the function above.
<a name="worker-load"></a>
### Worker.load(jobId): Promise
Worker.load() loads tesseract.js-core scripts (download from remote if not presented), it makes Web Worker/Child Process ready for next action.
**Arguments:**
- `jobId` Please see details above
**Examples:**
```javascript
(async () => {
await worker.load();
})();
```
<a name="worker-writeText"></a>
### Worker.writeText(path, text, jobId): Promise
Worker.writeText() writes a text file to the path specified in MEMFS, it is useful when you want to use some features that requires tesseract.js
to read file from file system.
**Arguments:**
- `path` text file path
- `text` content of the text file
- `jobId` Please see details above
**Examples:**
```javascript
(async () => {
await worker.writeText('tmp.txt', 'Hi\nTesseract.js\n');
})();
```
<a name="worker-readText"></a>
### Worker.readText(path, jobId): Promise
Worker.readText() reads a text file to the path specified in MEMFS, it is useful when you want to check the content.
**Arguments:**
- `path` text file path
- `jobId` Please see details above
**Examples:**
```javascript
(async () => {
const { data } = await worker.readText('tmp.txt');
console.log(data);
})();
```
<a name="worker-removeFile"></a>
### Worker.removeFile(path, jobId): Promise
Worker.readFile() remove a file in MEMFS, it is useful when you want to free the memory.
**Arguments:**
- `path` file path
- `jobId` Please see details above
**Examples:**
```javascript
(async () => {
await worker.removeFile('tmp.txt');
})();
```
<a name="worker-FS"></a>
### Worker.FS(method, args, jobId): Promise
Worker.FS() is a generic FS function to do anything you want, you can check [HERE](ihttps://emscripten.org/docs/api_reference/Filesystem-API.html) for all functions.
**Arguments:**
- `method` method name
- `args` array of arguments to pass
- `jobId` Please see details above
**Examples:**
```javascript
(async () => {
await worker.FS('writeFile', ['tmp.txt', 'Hi\nTesseract.js\n']);
// equal to:
// await worker.readText('tmp.txt', 'Hi\nTesseract.js\n');
})();
```
<a name="worker-load-language"></a>
### Worker.loadLanguage(langs, jobId): Promise
Worker.loadLanguage() loads traineddata from cache or download traineddata from remote, and put traineddata into the WebAssembly file system.
**Arguments:**
- `langs` a string to indicate the languages traineddata to download, multiple languages are concated with **+**, ex: **eng+chi\_tra**
- `jobId` Please see details above
**Examples:**
```javascript
(async () => {
await worker.loadLanguage('eng+chi_tra');
})();
```
<a name="worker-initialize"></a>
### Worker.initialize(langs, oem, jobId): Promise
Worker.initialize() initializes the Tesseract API, make sure it is ready for doing OCR tasks.
**Arguments:**
- `langs` a string to indicate the languages loaded by Tesseract API, it can be the subset of the languauge traineddata you loaded from Worker.loadLanguage.
- `oem` a enum to indicate the OCR Engine Mode you use
- `jobId` Please see details above
**Examples:**
```javascript
(async () => {
/** You can load more languages in advance, but use only part of them in Worker.initialize() */
await worker.loadLanguage('eng+chi_tra');
await worker.initialize('eng');
})();
```
<a name="worker-set-parameters"></a>
### Worker.setParameters(params, jobId): Promise
Worker.setParameters() set parameters for Tesseract API (using SetVariable()), it changes the behavior of Tesseract and some parameters like tessedit\_char\_whitelist is very useful.
**Arguments:**
- `params` an object with key and value of the parameters
- `jobId` Please see details above
**Supported Paramters:**
| name | type | default value | description |
| --------------------------- | ------ | ----------------- | ------------------------------------------------------------------------------------------------------------------------------- |
| tessedit\_ocr\_engine\_mode | enum | OEM.DEFAULT | Check [HERE](https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L268) for definition of each mode |
| tessedit\_pageseg\_mode | enum | PSM.SINGLE\_BLOCK | Check [HERE](https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163) for definition of each mode |
| tessedit\_char\_whitelist | string | '' | setting white list characters makes the result only contains these characters, useful the content in image is limited |
| preserve\_interword\_spaces | string | '0' | '0' or '1', keeps the space between words |
| user\_defined\_dpi | string | '' | Define custom dpi, use to fix **Warning: Invalid resolution 0 dpi. Using 70 instead.** |
| tessjs\_create\_hocr | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes hocr in the result |
| tessjs\_create\_tsv | string | '1' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes tsv in the result |
| tessjs\_create\_box | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes box in the result |
| tessjs\_create\_unlv | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes unlv in the result |
| tessjs\_create\_osd | string | '0' | only 2 values, '0' or '1', when the value is '1', tesseract.js includes osd in the result |
**Examples:**
```javascript
(async () => {
await worker.setParameters({
tessedit_char_whitelist: '0123456789',
});
})
```
<a name="worker-recognize"></a>
### Worker.recognize(image, options, jobId): Promise
Worker.recognize() provides core function of Tesseract.js as it executes OCR
Figures out what words are in `image`, where the words are in `image`, etc.
> Note: `image` should be sufficiently high resolution.
> Often, the same image will get much better results if you upscale it before calling `recognize`.
**Arguments:**
- `image` see [Image Format](./image-format.md) for more details.
- `options` a object of customized options
- `rectangle` an object to specify the regions you want to recognized in the image, should contain top, left, width and height, see example below.
- `jobId` Please see details above
**Output:**
**Examples:**
```javascript
const { createWorker } = Tesseract;
(async () => {
const worker = createWorker();
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(image);
console.log(text);
})();
```
With rectangle
```javascript
const { createWorker } = Tesseract;
(async () => {
const worker = createWorker();
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(image, {
rectangle: { top: 0, left: 0, width: 100, height: 100 },
});
console.log(text);
})();
```
<a name="worker-detect"></a>
### Worker.detect(image, jobId): Promise
Worker.detect() does OSD (Orientation and Script Detection) to the image instead of OCR.
**Arguments:**
- `image` see [Image Format](./image-format.md) for more details.
- `jobId` Please see details above
**Examples:**
```javascript
const { createWorker } = Tesseract;
(async () => {
const worker = createWorker();
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data } = await worker.detect(image);
console.log(data);
})();
```
<a name="worker-terminate"></a>
### Worker.terminate(jobId): Promise
Worker.terminate() terminates the worker and cleans up
```javascript
(async () => {
await worker.terminate();
})();
```
<a name="create-scheduler"></a>
## createScheduler(): Scheduler
createScheduler() is a factory function to create a scheduler, a scheduler manages a job queue and workers to enable multiple workers to work together, it is useful when you want to speed up your performance.
**Examples:**
```javascript
const { createScheduler } = Tesseract;
const scheduler = createScheduler();
```
### Scheduler
<a name="scheduler-add-worker"></a>
### Scheduler.addWorker(worker): string
Scheduler.addWorker() adds a worker into the worker pool inside scheduler, it is suggested to add one worker to only one scheduler.
**Arguments:**
- `worker` see Worker above
**Examples:**
```javascript
const { createWorker, createScheduler } = Tesseract;
const scheduler = createScheduler();
const worker = createWorker();
scheduler.addWorker(worker);
```
<a name="scheduler-add-job"></a>
### Scheduler.addJob(action, ...payload): Promise
Scheduler.addJob() adds a job to the job queue and scheduler waits and finds an idle worker to take the job.
**Arguments:**
- `action` a string to indicate the action you want to do, right now only **recognize** and **detect** are supported
- `payload` a arbitrary number of args depending on the action you called.
**Examples:**
```javascript
(async () => {
const { data: { text } } = await scheduler.addJob('recognize', image, options);
const { data } = await scheduler.addJob('detect', image);
})();
```
<a name="scheduler-get-queue-len"></a>
### Scheduler.getQueueLen(): number
Scheduler.getNumWorkers() returns the length of job queue.
<a name="scheduler-get-num-workers"></a>
### Scheduler.getNumWorkers(): number
Scheduler.getNumWorkers() returns number of workers added into the scheduler
<a name="scheduler-terminate"></a>
### Scheduler.terminate(): Promise
Scheduler.terminate() terminates all workers added, useful to do quick clean up.
**Examples:**
```javascript
(async () => {
await scheduler.terminate();
})();
```
<a name="set-logging"></a>
## setLogging(logging: boolean)
setLogging() sets the logging flag, you can `setLogging(true)` to see detailed information, useful for debugging.
**Arguments:**
- `logging` boolean to define whether to see detailed logs, default: false
**Examples:**
```javascript
const { setLogging } = Tesseract;
setLogging(true);
```
<a name="recognize"></a>
## recognize(image, langs, options): Promise
recognize() is a function to quickly do recognize() task, it is not recommended to use in real application, but useful when you want to save some time.
See [Tesseract.js](../src/Tesseract.js)
<a name="detect"></a>
## detect(image, options): Promise
Same background as recognize(), but it does detect instead.
See [Tesseract.js](../src/Tesseract.js)
<a name="psm"></a>
## PSM
See [PSM.js](../src/constants/PSM.js)
<a name="oem"></a>
## OEM
See [OEM.js](../src/constants/OEM.js)

226
docs/examples.md

@ -1,226 +0,0 @@ @@ -1,226 +0,0 @@
# Tesseract.js Examples
You can also check [examples](../examples) folder.
### basic
```javascript
const { createWorker } = require('tesseract.js');
const worker = createWorker();
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png');
console.log(text);
await worker.terminate();
})();
```
### with detailed progress
```javascript
const { createWorker } = require('tesseract.js');
const worker = createWorker({
logger: m => console.log(m), // Add logger here
});
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png');
console.log(text);
await worker.terminate();
})();
```
### with multiple languages, separate by '+'
```javascript
const { createWorker } = require('tesseract.js');
const worker = createWorker();
(async () => {
await worker.load();
await worker.loadLanguage('eng+chi_tra');
await worker.initialize('eng+chi_tra');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png');
console.log(text);
await worker.terminate();
})();
```
### with whitelist char (^2.0.0-beta.1)
```javascript
const { createWorker } = require('tesseract.js');
const worker = createWorker();
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
await worker.setParameters({
tessedit_char_whitelist: '0123456789',
});
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png');
console.log(text);
await worker.terminate();
})();
```
### with different pageseg mode (^2.0.0-beta.1)
Check here for more details of pageseg mode: https://github.com/tesseract-ocr/tesseract/blob/4.0.0/src/ccstruct/publictypes.h#L163
```javascript
const { createWorker, PSM } = require('tesseract.js');
const worker = createWorker();
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
await worker.setParameters({
tessedit_pageseg_mode: PSM.SINGLE_BLOCK,
});
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png');
console.log(text);
await worker.terminate();
})();
```
### with pdf output (^2.0.0-beta.1)
Please check **examples** folder for details.
Browser: [download-pdf.html](../examples/browser/download-pdf.html)
Node: [download-pdf.js](../examples/node/download-pdf.js)
### with only part of the image (^2.0.1)
**One rectangle**
```javascript
const { createWorker } = require('tesseract.js');
const worker = createWorker();
const rectangle = { left: 0, top: 0, width: 500, height: 250 };
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle });
console.log(text);
await worker.terminate();
})();
```
**Multiple Rectangles**
```javascript
const { createWorker } = require('tesseract.js');
const worker = createWorker();
const rectangles = [
{
left: 0,
top: 0,
width: 500,
height: 250,
},
{
left: 500,
top: 0,
width: 500,
height: 250,
},
];
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const values = [];
for (let i = 0; i < rectangles.length; i++) {
const { data: { text } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle: rectangles[i] });
values.push(text);
}
console.log(values);
await worker.terminate();
})();
```
**Multiple Rectangles (with scheduler to do recognition in parallel)**
```javascript
const { createWorker, createScheduler } = require('tesseract.js');
const scheduler = createScheduler();
const worker1 = createWorker();
const worker2 = createWorker();
const rectangles = [
{
left: 0,
top: 0,
width: 500,
height: 250,
},
{
left: 500,
top: 0,
width: 500,
height: 250,
},
];
(async () => {
await worker1.load();
await worker2.load();
await worker1.loadLanguage('eng');
await worker2.loadLanguage('eng');
await worker1.initialize('eng');
await worker2.initialize('eng');
scheduler.addWorker(worker1);
scheduler.addWorker(worker2);
const results = await Promise.all(rectangles.map((rectangle) => (
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png', { rectangle })
)));
console.log(results.map(r => r.data.text));
await scheduler.terminate();
})();
```
### with multiple workers to speed up (^2.0.0-beta.1)
```javascript
const { createWorker, createScheduler } = require('tesseract.js');
const scheduler = createScheduler();
const worker1 = createWorker();
const worker2 = createWorker();
(async () => {
await worker1.load();
await worker2.load();
await worker1.loadLanguage('eng');
await worker2.loadLanguage('eng');
await worker1.initialize('eng');
await worker2.initialize('eng');
scheduler.addWorker(worker1);
scheduler.addWorker(worker2);
/** Add 10 recognition jobs */
const results = await Promise.all(Array(10).fill(0).map(() => (
scheduler.addJob('recognize', 'https://tesseract.projectnaptha.com/img/eng_bw.png')
)))
console.log(results);
await scheduler.terminate(); // It also terminates all workers.
})();
```

42
docs/faq.md

@ -1,42 +0,0 @@ @@ -1,42 +0,0 @@
FAQ
===
## How does tesseract.js download and keep \*.traineddata?
The language model is downloaded by `worker.loadLanguage()` and you need to pass the langs to `worker.initialize()`.
During the downloading of language model, Tesseract.js will first check if \*.traineddata already exists. (browser: [IndexedDB](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), Node.js: fs, in the folder you execute the command) If the \*.traineddata doesn't exist, it will fetch \*.traineddata.gz from [tessdata](https://github.com/naptha/tessdata), ungzip and store in IndexedDB or fs, you can delete it manually and it will download again for you.
## How can I train my own \*.traineddata?
For tesseract.js v2, check [TrainingTesseract 4.00](https://tesseract-ocr.github.io/tessdoc/TrainingTesseract-4.00)
For tesseract.js v1, check [Training Tesseract 3.03–3.05](https://tesseract-ocr.github.io/tessdoc/Training-Tesseract-3.03%E2%80%933.05)
## How can I get HOCR, TSV, Box, UNLV, OSD?
Starting from 2.0.0-beta.1, you can get all these information in the final result.
```javascript
import { createWorker } from 'tesseract.js';
const worker = createWorker({
logger: m => console.log(m)
});
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
await worker.setParameters({
tessedit_create_box: '1',
tessedit_create_unlv: '1',
tessedit_create_osd: '1',
});
const { data: { text, hocr, tsv, box, unlv } } = await worker.recognize('https://tesseract.projectnaptha.com/img/eng_bw.png');
console.log(text);
console.log(hocr);
console.log(tsv);
console.log(box);
console.log(unlv);
})();
```

18
docs/image-format.md

@ -1,18 +0,0 @@ @@ -1,18 +0,0 @@
# Image Format
The main Tesseract.js functions (ex. recognize, detect) take an `image` parameter. The image formats and data types supported are listed below.
Support Image Formats: **bmp, jpg, png, pbm, webp**
For browser and Node, supported data types are:
- string with base64 encoded image (fits `data:image\/([a-zA-Z]*);base64,([^"]*)` regexp)
- buffer
For browser only, supported data types are:
- `File` or `Blob` object
- `img` or `canvas` element
For Node only, supported data types are:
- string containing a path to local image
Note: images must be a supported image format **and** a supported data type. For example, a buffer containing a png image is supported. A buffer containing raw pixel data is not supported.

BIN
docs/images/demo.gif

Binary file not shown.

Before

Width:  |  Height:  |  Size: 97 KiB

BIN
docs/images/tesseract.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 105 KiB

BIN
docs/images/video-demo.gif

Binary file not shown.

Before

Width:  |  Height:  |  Size: 237 KiB

38
docs/local-installation.md

@ -1,38 +0,0 @@ @@ -1,38 +0,0 @@
## Local Installation
Check here for examples: https://github.com/naptha/tesseract.js/blob/master/docs/examples.md
In browser environment, `tesseract.js` simply provides the API layer. Internally, it opens a WebWorker to handle requests. That worker itself loads code from the Emscripten-built `tesseract.js-core` which itself is hosted on a CDN. Then it dynamically loads language files hosted on another CDN.
Because of this we recommend loading `tesseract.js` from a CDN. But if you really need to have all your files local, you can pass extra arguments to `TesseractWorker` to specify custom paths for workers, languages, and core.
In Node.js environment, the only path you may want to customize is languages/langPath.
```javascript
Tesseract.recognize(image, langs, {
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0/dist/worker.min.js',
langPath: 'https://tessdata.projectnaptha.com/4.0.0',
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js',
})
```
Or
```javascript
const worker = createWorker({
workerPath: 'https://unpkg.com/tesseract.js@v2.0.0/dist/worker.min.js',
langPath: 'https://tessdata.projectnaptha.com/4.0.0',
corePath: 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js',
});
```
### workerPath
A string specifying the location of the [worker.js](./dist/worker.min.js) file.
### langPath
A string specifying the location of the tesseract language files, with default value 'https://tessdata.projectnaptha.com/4.0.0'. Language file URLs are calculated according to the formula `langPath + langCode + '.traineddata.gz'`.
### corePath
A string specifying the location of the [tesseract.js-core library](https://github.com/naptha/tesseract.js-core), with default value 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm.js' (fallback to tesseract-core.asm.js when WebAssembly is not available).
Another WASM option is 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.js' which is a script that loads 'https://unpkg.com/tesseract.js-core@v2.0.0/tesseract-core.wasm'. But it fails to fetch at this moment.

3
docs/tesseract_lang_list.md

@ -1,3 +0,0 @@ @@ -1,3 +0,0 @@
# Tesseract Languages
Please check [HERE](https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016) for supported languages

37
examples/browser/basic-edge.html

@ -1,37 +0,0 @@ @@ -1,37 +0,0 @@
<!DOCTYPE HTML>
<html>
<head>
<script src="/dist/tesseract.dev.js"></script>
</head>
<body>
<input type="file" id="uploader">
<script>
const recognize = function(evt){
const files = evt.target.files;
const worker = Tesseract.createWorker({
/*
* As Edge don't support webassembly,
* here we force to use asm.js version.
*/
corePath: '../../node_modules/tesseract.js-core/tesseract-core.asm.js',
logger: function(m){console.log(m);},
/*
* As there is no indexedDB in earlier version
* of Edge, here we disable cache.
*/
cacheMethod: 'none',
});
Promise.resolve()
.then(() => worker.load())
.then(() => worker.loadLanguage('eng'))
.then(() => worker.initialize('eng'))
.then(() => worker.recognize(files[0]))
.then((ret) => {
console.log(ret.data.text);
});
}
const elm = document.getElementById('uploader');
elm.addEventListener('change', recognize);
</script>
</body>
</html>

19
examples/browser/basic.html

@ -1,19 +0,0 @@ @@ -1,19 +0,0 @@
<html>
<head>
<script src="/dist/tesseract.dev.js"></script>
</head>
<body>
<input type="file" id="uploader">
<script>
const recognize = async ({ target: { files } }) => {
const { data: { text } } = await Tesseract.recognize(files[0], 'eng', {
corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js',
logger: m => console.log(m),
});
console.log(text);
}
const elm = document.getElementById('uploader');
elm.addEventListener('change', recognize);
</script>
</body>
</html>

33
examples/browser/benchmark.html

@ -1,33 +0,0 @@ @@ -1,33 +0,0 @@
<html>
<head>
<script src="/dist/tesseract.dev.js"></script>
</head>
<body>
<textarea id="message">Working...</textarea>
<script>
const { createWorker } = Tesseract;
const worker = createWorker();
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const fileArr = ["../data/meditations.jpg", "../data/tyger.jpg", "../data/testocr.png"];
let timeTotal = 0;
for (let file of fileArr) {
let time1 = Date.now();
for (let i=0; i < 10; i++) {
await worker.recognize(file);
}
let time2 = Date.now();
const timeDif = (time2 - time1) / 1e3;
timeTotal += timeDif;
document.getElementById('message').innerHTML += "\n" + file + " [x10] runtime: " + timeDif + "s";
}
document.getElementById('message').innerHTML += "\nTotal runtime: " + timeTotal + "s";
})();
</script>
</body>
</html>

162
examples/browser/demo.html

@ -1,162 +0,0 @@ @@ -1,162 +0,0 @@
<script src="/dist/tesseract.dev.js"></script>
<script>
function progressUpdate(packet){
var log = document.getElementById('log');
if(log.firstChild && log.firstChild.status === packet.status){
if('progress' in packet){
var progress = log.firstChild.querySelector('progress')
progress.value = packet.progress
}
}else{
var line = document.createElement('div');
line.status = packet.status;
var status = document.createElement('div')
status.className = 'status'
status.appendChild(document.createTextNode(packet.status))
line.appendChild(status)
if('progress' in packet){
var progress = document.createElement('progress')
progress.value = packet.progress
progress.max = 1
line.appendChild(progress)
}
if(packet.status == 'done'){
var pre = document.createElement('pre')
pre.appendChild(document.createTextNode(packet.data.data.text))
line.innerHTML = ''
line.appendChild(pre)
}
log.insertBefore(line, log.firstChild)
}
}
async function recognizeFile(file) {
document.querySelector("#log").innerHTML = ''
const corePath = window.navigator.userAgent.indexOf("Edge") > -1
? '../../node_modules/tesseract.js-core/tesseract-core.asm.js'
: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js';
const lang = document.querySelector('#langsel').value
const data = await Tesseract.recognize(file, lang, {
corePath,
logger: progressUpdate,
});
progressUpdate({ status: 'done', data });
}
</script>
<select id="langsel" onchange="window.lastFile && recognizeFile(window.lastFile)">
<option value='afr' > Afrikaans </option>
<option value='ara' > Arabic </option>
<option value='aze' > Azerbaijani </option>
<option value='bel' > Belarusian </option>
<option value='ben' > Bengali </option>
<option value='bul' > Bulgarian </option>
<option value='cat' > Catalan </option>
<option value='ces' > Czech </option>
<option value='chi_sim' > Chinese </option>
<option value='chi_tra' > Traditional Chinese </option>
<option value='chr' > Cherokee </option>
<option value='dan' > Danish </option>
<option value='deu' > German </option>
<option value='ell' > Greek </option>
<option value='eng' selected> English </option>
<option value='enm' > English (Old) </option>
<option value='meme' > Internet Meme </option>
<option value='epo' > Esperanto </option>
<option value='epo_alt' > Esperanto alternative </option>
<option value='est' > Estonian </option>
<option value='eus' > Basque </option>
<option value='fin' > Finnish </option>
<option value='fra' > French </option>
<option value='frk' > Frankish </option>
<option value='frm' > French (Old) </option>
<option value='glg' > Galician </option>
<option value='grc' > Ancient Greek </option>
<option value='heb' > Hebrew </option>
<option value='hin' > Hindi </option>
<option value='hrv' > Croatian </option>
<option value='hun' > Hungarian </option>
<option value='ind' > Indonesian </option>
<option value='isl' > Icelandic </option>
<option value='ita' > Italian </option>
<option value='ita_old' > Italian (Old) </option>
<option value='jpn' > Japanese </option>
<option value='kan' > Kannada </option>
<option value='kor' > Korean </option>
<option value='lav' > Latvian </option>
<option value='lit' > Lithuanian </option>
<option value='mal' > Malayalam </option>
<option value='mkd' > Macedonian </option>
<option value='mlt' > Maltese </option>
<option value='msa' > Malay </option>
<option value='nld' > Dutch </option>
<option value='nor' > Norwegian </option>
<option value='pol' > Polish </option>
<option value='por' > Portuguese </option>
<option value='ron' > Romanian </option>
<option value='rus' > Russian </option>
<option value='slk' > Slovakian </option>
<option value='slv' > Slovenian </option>
<option value='spa' > Spanish </option>
<option value='spa_old' > Old Spanish </option>
<option value='sqi' > Albanian </option>
<option value='srp' > Serbian (Latin) </option>
<option value='swa' > Swahili </option>
<option value='swe' > Swedish </option>
<option value='tam' > Tamil </option>
<option value='tel' > Telugu </option>
<option value='tgl' > Tagalog </option>
<option value='tha' > Thai </option>
<option value='tur' > Turkish </option>
<option value='ukr' > Ukrainian </option>
<option value='vie' > Vietnamese </option>
</select>
<button onclick="recognizeFile('../../tests/assets/images/simple.png')">Sample Image</button>
<input type="file" onchange="recognizeFile(window.lastFile=this.files[0])">
<div id="log"></div>
<style>
#log > div {
color: #313131;
border-top: 1px solid #dadada;
padding: 9px;
display: flex;
}
#log > div:first-child {
border: 0;
}
.status {
min-width: 250px;
}
#log {
border: 1px solid #dadada;
padding: 10px;
margin-top: 20px;
min-height: 100px;
}
body {
font-family: sans-serif;
margin: 30px;
}
progress {
display: block;
width: 100%;
transition: opacity 0.5s linear;
}
progress[value="1"] {
opacity: 0.5;
}
</style>

52
examples/browser/download-pdf.html

@ -1,52 +0,0 @@ @@ -1,52 +0,0 @@
<html>
<head>
<script src="/dist/tesseract.dev.js"></script>
</head>
<body>
<div>
<input type="file" id="uploader">
<button id="download-pdf" disabled="true">Download PDF</button>
</div>
<textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea>
<script>
const { createWorker } = Tesseract;
const worker = createWorker({
corePath: '/node_modules/tesseract.js-core/tesseract-core.wasm.js',
logger: m => console.log(m),
});
const uploader = document.getElementById('uploader');
const dlBtn = document.getElementById('download-pdf');
const recognize = async ({ target: { files } }) => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(files[0]);
const board = document.getElementById('board');
board.value = text;
dlBtn.disabled = false;
};
const downloadPDF = async () => {
const filename = 'tesseract-ocr-result.pdf';
const { data } = await worker.getPDF('Tesseract OCR Result');
const blob = new Blob([new Uint8Array(data)], { type: 'application/pdf' });
if (navigator.msSaveBlob) {
// IE 10+
navigator.msSaveBlob(blob, filename);
} else {
const link = document.createElement('a');
if (link.download !== undefined) {
const url = URL.createObjectURL(blob);
link.setAttribute('href', url);
link.setAttribute('download', filename);
link.style.visibility = 'hidden';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}
}
};
uploader.addEventListener('change', recognize);
dlBtn.addEventListener('click', downloadPDF);
</script>
</body>
</html>

BIN
examples/data/meditations.jpg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1011 KiB

BIN
examples/data/testocr.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 23 KiB

BIN
examples/data/tyger.jpg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 408 KiB

27
examples/node/benchmark.js

@ -1,27 +0,0 @@ @@ -1,27 +0,0 @@
#!/usr/bin/env node
const path = require('path');
const { createWorker } = require('../../');
const worker = createWorker();
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const fileArr = ["../data/meditations.jpg", "../data/tyger.jpg", "../data/testocr.png"];
let timeTotal = 0;
for (let file of fileArr) {
let time1 = Date.now();
for (let i=0; i < 10; i++) {
await worker.recognize(file)
}
let time2 = Date.now();
const timeDif = (time2 - time1) / 1e3;
timeTotal += timeDif;
console.log(file + " [x10] runtime: " + timeDif + "s");
}
console.log("Total runtime: " + timeTotal + "s");
await worker.terminate();
})();

13
examples/node/detect.js

@ -1,13 +0,0 @@ @@ -1,13 +0,0 @@
#!/usr/bin/env node
const path = require('path');
const Tesseract = require('../../');
const [,, imagePath] = process.argv;
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png'));
console.log(`Recognizing ${image}`);
Tesseract.detect(image, { logger: m => console.log(m) })
.then(({ data }) => {
console.log(data);
});

22
examples/node/download-pdf.js

@ -1,22 +0,0 @@ @@ -1,22 +0,0 @@
#!/usr/bin/env node
const path = require('path');
const fs = require('fs');
const { createWorker } = require('../../');
const [,, imagePath] = process.argv;
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png'));
console.log(`Recognizing ${image}`);
(async () => {
const worker = createWorker();
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(image);
console.log(text);
const { data } = await worker.getPDF('Tesseract OCR Result');
fs.writeFileSync('tesseract-ocr-result.pdf', Buffer.from(data));
console.log('Generate PDF: tesseract-ocr-result.pdf');
await worker.terminate();
})();

20
examples/node/recognize.js

@ -1,20 +0,0 @@ @@ -1,20 +0,0 @@
#!/usr/bin/env node
const path = require('path');
const { createWorker } = require('../../');
const [,, imagePath] = process.argv;
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png'));
console.log(`Recognizing ${image}`);
const worker = createWorker({
logger: m => console.log(m),
});
(async () => {
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
const { data: { text } } = await worker.recognize(image);
console.log(text);
await worker.terminate();
})();

392
explorer/explorer.js

@ -0,0 +1,392 @@ @@ -0,0 +1,392 @@
"use strict";
var _createClass = (function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; })();
var _get = function get(_x, _x2, _x3) { var _again = true; _function: while (_again) { var object = _x, property = _x2, receiver = _x3; desc = parent = getter = undefined; _again = false; if (object === null) object = Function.prototype; var desc = Object.getOwnPropertyDescriptor(object, property); if (desc === undefined) { var parent = Object.getPrototypeOf(object); if (parent === null) { return undefined; } else { _x = parent; _x2 = property; _x3 = receiver; _again = true; continue _function; } } else if ("value" in desc) { return desc.value; } else { var getter = desc.get; if (getter === undefined) { return undefined; } return getter.call(receiver); } } };
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; }
function array_join(array, glue) {
var new_array = [];
for (var i = 0; i < array.length; i++) {
new_array.push(array[i]);
if (i != array.length - 1) new_array.push(glue);
}
return new_array;
}
var Node = (function (_React$Component) {
_inherits(Node, _React$Component);
function Node(props) {
var _this = this;
_classCallCheck(this, Node);
_get(Object.getPrototypeOf(Node.prototype), "constructor", this).call(this, props);
this.toggleExpand = function (e) {
_this.setState({ expanded: !_this.state.expanded });
};
this.state = {
expanded: props.expanded
};
}
_createClass(Node, [{
key: "render",
value: function render() {
var _props = this.props;
var node = _props.node;
var label = _props.label;
var expanded = this.state.expanded;
var rep;
if (typeof node === "string") {
rep = React.createElement(TextNode, { html: label === "html", node: node, className: "clickable", onClick: this.toggleExpand, toggleExpand: this.toggleExpand, expanded: expanded });
} else if (typeof node === "boolean") {
rep = React.createElement(BooleanNode, { node: node, className: "clickable", onClick: this.toggleExpand, toggleExpand: this.toggleExpand, expanded: expanded });
} else if (typeof node === "number") {
rep = React.createElement(NumberNode, { node: node, className: "clickable", onClick: this.toggleExpand, toggleExpand: this.toggleExpand, expanded: expanded });
} else if (Array.isArray(node)) {
rep = React.createElement(ListNode, { node: node, className: "clickable", onClick: this.toggleExpand, toggleExpand: this.toggleExpand, expanded: expanded });
} else {
rep = React.createElement(ObjectNode, { node: node, className: "clickable", onClick: this.toggleExpand, toggleExpand: this.toggleExpand, expanded: expanded });
}
if (!label) {
return rep;
}
return React.createElement(
"span",
null,
React.createElement(
"span",
{ className: "label clickable", onClick: this.toggleExpand },
label
),
": ",
rep
);
}
}]);
return Node;
})(React.Component);
var TextNode = (function (_React$Component2) {
_inherits(TextNode, _React$Component2);
function TextNode() {
_classCallCheck(this, TextNode);
_get(Object.getPrototypeOf(TextNode.prototype), "constructor", this).apply(this, arguments);
}
_createClass(TextNode, [{
key: "render",
value: function render() {
var _props2 = this.props;
var node = _props2.node;
var expanded = _props2.expanded;
var html = _props2.html;
var toggleExpand = _props2.toggleExpand;
if (expanded) {
var content = node;
if (html) {
var content = [];
CodeMirror.runMode(node, { name: 'xml', htmlMode: true }, function (text, className) {
content.push(React.createElement(
"span",
{ className: "cm-" + className },
text
));
});
}
return React.createElement(
"span",
{ className: (html ? "cm-s-default html " : "") + "textNode expanded clickable", onClick: toggleExpand },
content
);
} else {
return React.createElement(
"span",
null,
React.createElement(
"span",
{ className: (html ? "html " : "") + "textNode clickable", onClick: toggleExpand },
node.substring(0, 30)
),
node.length > 30 ? React.createElement(Ellipsis, null) : ''
);
}
}
}]);
return TextNode;
})(React.Component);
var BooleanNode = (function (_React$Component3) {
_inherits(BooleanNode, _React$Component3);
function BooleanNode() {
_classCallCheck(this, BooleanNode);
_get(Object.getPrototypeOf(BooleanNode.prototype), "constructor", this).apply(this, arguments);
}
_createClass(BooleanNode, [{
key: "render",
value: function render() {
var node = this.props.node;
return React.createElement(
"span",
{ className: "booleanNode" },
JSON.stringify(node)
);
}
}]);
return BooleanNode;
})(React.Component);
var NumberNode = (function (_React$Component4) {
_inherits(NumberNode, _React$Component4);
function NumberNode() {
_classCallCheck(this, NumberNode);
_get(Object.getPrototypeOf(NumberNode.prototype), "constructor", this).apply(this, arguments);
}
_createClass(NumberNode, [{
key: "render",
value: function render() {
var node = this.props.node;
return React.createElement(
"span",
{ className: "numberNode" },
JSON.stringify(node)
);
}
}]);
return NumberNode;
})(React.Component);
var ListNode = (function (_React$Component5) {
_inherits(ListNode, _React$Component5);
function ListNode() {
_classCallCheck(this, ListNode);
_get(Object.getPrototypeOf(ListNode.prototype), "constructor", this).apply(this, arguments);
}
_createClass(ListNode, [{
key: "render",
value: function render() {
var _props3 = this.props;
var node = _props3.node;
var expanded = _props3.expanded;
var toggleExpand = _props3.toggleExpand;
if (expanded) {
return React.createElement(
"span",
{ className: "listNode expanded" },
React.createElement(
"span",
{ className: "clickable", onClick: toggleExpand },
"["
),
React.createElement("br", null),
React.createElement(
"span",
{ className: "indent" },
array_join(node.map(function (e, i) {
return React.createElement(Node, { node: e, key: i });
}), React.createElement(Comma, { br: true }))
),
React.createElement("br", null),
React.createElement(
"span",
{ onClick: toggleExpand },
"]"
)
);
} else {
return React.createElement(
"span",
{ className: "listNode clickable", onClick: toggleExpand },
"[",
node.length,
"]"
);
}
}
}]);
return ListNode;
})(React.Component);
var ObjectNode = (function (_React$Component6) {
_inherits(ObjectNode, _React$Component6);
function ObjectNode() {
_classCallCheck(this, ObjectNode);
_get(Object.getPrototypeOf(ObjectNode.prototype), "constructor", this).apply(this, arguments);
}
_createClass(ObjectNode, [{
key: "render",
value: function render() {
var _props4 = this.props;
var node = _props4.node;
var expanded = _props4.expanded;
var toggleExpand = _props4.toggleExpand;
if (null === node) {
return React.createElement(
"span",
{ className: "nullNode" },
"null"
);
} else if (expanded) {
return React.createElement(
"span",
{ className: "objectNode expanded" },
React.createElement(
"span",
{ className: "clickable", onClick: toggleExpand },
"{"
),
React.createElement("br", null),
React.createElement(
"span",
{ className: "indent" },
array_join(Object.keys(node).map(function (key) {
return React.createElement(Node, { node: node[key], label: key, key: key });
}), React.createElement(Comma, { br: true }))
),
React.createElement("br", null),
React.createElement(
"span",
{ onClick: toggleExpand },
"}"
)
);
} else {
var keys = Object.keys(node),
toolong = false;
if (keys.length > 4) {
keys = keys.slice(0, 4);
toolong = true;
}
var contents = array_join(keys.map(function (k) {
return React.createElement(
"span",
{ className: "label" },
k
);
}), React.createElement(Comma, null));
return React.createElement(
"span",
{ className: "objectNode clickable", onClick: toggleExpand },
"{",
contents,
toolong ? React.createElement(Ellipsis, null) : '',
"}"
);
}
}
}]);
return ObjectNode;
})(React.Component);
var Comma = (function (_React$Component7) {
_inherits(Comma, _React$Component7);
function Comma() {
_classCallCheck(this, Comma);
_get(Object.getPrototypeOf(Comma.prototype), "constructor", this).apply(this, arguments);
}
_createClass(Comma, [{
key: "render",
value: function render() {
var br = this.props.br;
return React.createElement(
"span",
{ className: "comma" },
", ",
br ? React.createElement("br", null) : ''
);
}
}]);
return Comma;
})(React.Component);
var Ellipsis = (function (_React$Component8) {
_inherits(Ellipsis, _React$Component8);
function Ellipsis() {
_classCallCheck(this, Ellipsis);
_get(Object.getPrototypeOf(Ellipsis.prototype), "constructor", this).apply(this, arguments);
}
_createClass(Ellipsis, [{
key: "render",
value: function render() {
return React.createElement(
"span",
{ className: "ellipsis" },
"..."
);
}
}]);
return Ellipsis;
})(React.Component);
var simplething = {
hello: 42,
derp: 324,
wumbo: [1, 2, 3, 4, "hello", {
blah: 32,
asdf: [],
walp: 32,
strings: "asdfsd"
}],
merp: {
blah: 32,
asdf: [],
walp: 32,
strings: "asdfsd"
},
strings: "asdfsd",
asdoijfo: {
strings: "asdfsd",
adfds: {
asdf: {
asdfadsf: {},
merp: 32
}
}
}
};
// React.render(<Node node={simplething} />, document.getElementById('explorer'))

19602
explorer/react.js vendored

File diff suppressed because it is too large Load Diff

16
explorer/react.min.js vendored

File diff suppressed because one or more lines are too long

BIN
img/bg.jpg

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

BIN
img/bg.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

BIN
img/chi_sim.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 276 KiB

BIN
img/deu.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 261 KiB

BIN
img/eng.jpg

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

BIN
img/eng.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

BIN
img/eng_bw.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
img/fork.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

BIN
img/hayden.jpg

Binary file not shown.

After

Width:  |  Height:  |  Size: 141 KiB

BIN
img/keyboard.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 KiB

BIN
img/logo.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

BIN
img/logo.psd

Binary file not shown.

BIN
img/logo_small.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

BIN
img/logoblue.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

BIN
img/logowhite.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

BIN
img/meme.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 628 KiB

BIN
img/redstars.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 MiB

BIN
img/run.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.8 KiB

BIN
img/rus.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 183 KiB

BIN
img/shake.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 953 KiB

BIN
img/splash.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 357 KiB

BIN
img/starblur.jpg

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 MiB

BIN
img/stars.jpeg

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

110
index.html

@ -0,0 +1,110 @@ @@ -0,0 +1,110 @@
<!DOCTYPE html>
<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Tesseract.js | Pure Javascript OCR for 100 Languages!</title>
<link rel="stylesheet" type="text/css" href="css/main.css">
<link href='https://fonts.googleapis.com/css?family=Lato:300,400' rel='stylesheet' type='text/css'>
<script>
if (location.protocol === "http:" && !location.host.startsWith('localhost')) {
location.replace("https" + window.location.href.slice(4));
}
</script>
</head>
<body ondragover="return false">
<div id="splash">
<div id="logo-wrap" class="header">
<canvas id="logo-canvas"></canvas>
<img id="logo-img" src="img/logowhite.png">
<div id='slogan'>Pure Javascript Multilingual OCR</div>
</div>
<div class='get-started-wrap'>
<a href="https://github.com/naptha/tesseract.js#tesseractjs">
<div class='get-started'>
<svg id="github" viewBox="0 0 16 16" aria-hidden="true"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path></svg>
Get Started
</div>
</a>
</div>
</div>
<div id='demo'>
<!-- <div class="commentary">
<div id='demo-title'>Demo</div>
<p>Wherein we show you that Tesseract.js works, and let you try it with your own images.</p>
</div>
-->
<div class='commentary'>
<p><b>Tesseract.js</b> is a pure Javascript port of the popular <a href='https://github.com/tesseract-ocr/tesseract'>Tesseract OCR engine</a>. </p>
<p>This library supports <b>more than 100 languages</b>, automatic text <b>orientation and script detection</b>, a simple interface for reading paragraph, word, and character <b>bounding boxes</b>. Tesseract.js can run either in a <b>browser</b> and on a server with <b>NodeJS</b>. </p>
<p>Check out the <a href='https://github.com/naptha/tesseract.js#tesseractjs'>Example code and API docs on GitHub</a>.</p>
</div>
<div id='demo-body'>
<div id='options'>
<div class="option selected" lang='eng'>English Demo</div>
<div class="option" lang='chi_sim'>Chinese Demo</div>
<div class="option" lang='rus'>Russian Demo</div>
</div>
<div id='drop-instructions-main'><p>Drop <span class='drop-instructions'>an English</span> image on this page to OCR it!</p></div>
<div id='demo-content'>
<div>
<canvas id='input-overlay'></canvas>
<img id='input' src="img/eng_bw.png"/>
</div>
<div id='arrow'></div>
<div>
<!-- <div id='output-overlay'></div> -->
<!-- <textarea id='output-text'></textarea> -->
<div id='log'></div>
<div id='demo-instructions'>
<span id='clickme' onclick='play()'>Click here to recognize text in the demo image</span>, or drop <span class='drop-instructions'>an English</span> image anywhere on this page.
</div>
<!-- <canvas id='output'></canvas> -->
</div>
</div>
<div class='get-started-wrap'>
<br/>
<a href="https://github.com/naptha/tesseract.js#tesseractjs">
<div class='get-started'>
<svg id="github" viewBox="0 0 16 16" aria-hidden="true"><path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0 0 16 8c0-4.42-3.58-8-8-8z"></path></svg>
Actually Get Started
</div>
</a>
</div>
</div>
</div>
<div id='footer'>
<span id="lengle">“Speaking of ways, pet, by the way, <b>there <i>is</i> such a thing as a tesseract</b>.”
<br>
<br>
<!-- Made with <img src="img/keyboard.png"> --> By <a href="https://twitter.com/biject">@biject</a>, <a href="https://twitter.com/antimatter15">@antimatter15</a> and <a href="https://github.com/jeromewu">@jeromewu</a></span>
</div>
<script src="animation/raf.js"></script>
<script src="animation/mouse.js"></script>
<script src="animation/dimensions.js"></script>
<script src="animation/hypercube.js"></script>
<script src="animation/animate.js"></script>
<script src="https://unpkg.com/tesseract.js@2.0.0/dist/tesseract.min.js" integrity="sha384-MTEb82ufpBJ2VCTmPZlD/+vgiT5z6zmOwqU/uDO9IobYm9xCOUEN0WH3czf6ppl8" crossorigin="anonymous"></script>
<script src="demo.js"></script>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-83153710-3', 'auto', 'origin');
ga('origin.send', 'pageview');
ga('create', 'UA-139963961-1', 'auto', 'addon');
ga('addon.send', 'pageview');
</script>
</body>
</html>

163
index_dev.html

File diff suppressed because one or more lines are too long

162
index_old.html

File diff suppressed because one or more lines are too long

242
main.js

@ -0,0 +1,242 @@ @@ -0,0 +1,242 @@
'use strict';
function builddemo(id, val) {
var demo = document.getElementById(id);
var prog = demo.querySelector('.prog');
var out = demo.querySelector('.out');
var disp = demo.querySelector('.display');
var dctx = disp.getContext('2d');
disp.width = 0;
disp.height = 0;
demo.querySelector('.runbutton').onclick = function () {
setrunning(0);
run(editor);
editor.clear();
prog.setValue(0);
};
prog.setValue = function (v) {
prog.querySelector('div').style.width = v * 100 + '%';
};
function show_progress(p) {
setrunning(0);
console.log(p);
if (p.loaded_lang_model) prog.setValue(p.loaded_lang_model);
if (p.recognized) prog.setValue(p.recognized);
out.innerText = JSON.stringify(p);
}
function setrunning(v) {
if (v == 1) {
demo.querySelector('.running').style.display = 'none';
demo.querySelector('.notrunning').style.display = 'block';
// out.style.visibility = 'hidden'
} else {
demo.querySelector('.running').style.display = 'block';
demo.querySelector('.notrunning').style.display = 'none';
}
}
function display(result) {
React.render(React.createElement(Node, { node: result, expanded: true, label: "output_of_above_demo_plz_click_stuff" }), document.getElementById("explorer"));
setrunning(1);
out.innerText = "Lightning Speeeeeeed";
prog.value = 0;
console.log(result);
disp.width = demo.querySelector('.to_ocr').naturalWidth;
disp.height = demo.querySelector('.to_ocr').naturalHeight;
disp.style.width = demo.querySelector('.to_ocr').offsetWidth;
disp.style.height = demo.querySelector('.to_ocr').offsetHeight;
// dctx.shadowColor = "rgba(255,255,255,.1)";
// dctx.shadowOffsetX = 0;
// dctx.shadowOffsetY = 0;
// dctx.shadowBlur = 10;
// dctx.fillRect(0, 0, disp.width, disp.height);
var m = result.words.map(function (w) {
var b = w.bbox;
dctx.font = '20px Times';
var font = 20 * (b.x1 - b.x0) / dctx.measureText(w.text).width + "px Times";
return function k() {
dctx.font = font;
// dctx.font = '40px Times';
dctx.fillText(w.text, b.x0, w.baseline.y0);
// dctx.fillStyle='rgba(255,255,255,.3)'
// dctx.fillRect(b.x0, b.y0, b.x1 - b.x0, b.y1 - b.y0);
return font;
};
});
var times = 0;
var maxtimes = m.length + 100;
function draw(i) {
times++;
// dctx.fillStyle="rgba(30, 29, 49, .8)"
dctx.fillStyle = "rgba(0, 219, 157, " + Math.min(i / 100, 1) + ")";
// dctx.fillStyle="rgba(0, 219, 199, "+Math.min(i/100,1)+")"
// dctx.globalAlpha = .1;
dctx.clearRect(0, 0, disp.width, disp.height);
// dctx.fillRect(0, 0, disp.width, disp.height);
for (var j = 0; j < Math.min(i, m.length); j++) {
var asdf = Math.min(Math.max(i - j, 0), 100);
dctx.fillStyle = "rgba(0,0,0," + asdf * .01 + ")";
m[j]();
};
if (i < maxtimes) {
setTimeout(function () {
draw(i + 1);
}, 10);
} else {
console.log('done');
}
}
draw(0);
result.words.forEach(function (word, index) {
var wdiv = document.createElement('div');
wdiv.innerText = word.text + ' ';
wdiv.style['font-family'] = "Times";
wdiv.style.position = 'absolute';
var to_ocr = document.querySelector('.to_ocr');
var scale = to_ocr.offsetHeight / to_ocr.naturalHeight;
wdiv.style['font-size'] = parseFloat(m[index]().split('px')[0]) * scale;
wdiv.style.color = "rgba(0,0,0,0)";
wdiv.style.top = word.bbox.y0 * scale;
wdiv.style.left = word.bbox.x0 * scale;
wdiv.style.height = (word.bbox.y1 - word.bbox.y0) * scale;
wdiv.style.width = (word.bbox.x1 - word.bbox.x0) * scale;
document.querySelector('.ocroutput').appendChild(wdiv);
});
}
window.addEventListener('resize', function () {
disp.style.width = demo.querySelector('.to_ocr').offsetWidth;
disp.style.height = demo.querySelector('.to_ocr').offsetHeight;
});
function run(c) {
eval(c.getValue());
}
var editor = CodeMirror(demo.querySelector('.editor'), {
// lineNumbers: true,
viewportMargin: Infinity,
value: val
});
var sc = demo.querySelector('.demoheader');
// var scdiv = document.createElement('div');
// sc.appendChild(scdiv);
// scdiv.className = 'CodeMirror cm-s-default';
// // scdiv.className = 'cm-s-default'
// CodeMirror.runMode('<script src="http://tenso.rs/tesseract.js"></script>', {
// name: 'xml',
// htmlMode: true
// }, scdiv);
// var scripttag = CodeMirror(,{
// mode: {name: 'xml', htmlMode: true},
// readOnly: 'nocursor',
// value:
// });
editor.clear = function () {
dctx.clearRect(0, 0, disp.width, disp.height);
disp.style.height = 0;
document.querySelector('.ocroutput').innerHTML = '';
};
editor.img = demo.querySelector('.to_ocr');
editor.run = function () {
if (editor.img.complete) {
run(editor);
} else {
editor.img.onload = function () {
run(editor);
editor.img.onload = function () {};
};
}
};
return editor;
}
setTimeout(function () {
document.getElementById('wow').className += ' opaque';
}, 100);
var wow = builddemo('wow',
["var img = demo.querySelector('img.to_ocr')",
"Tesseract",
" .recognize( img )",
" .progress( show_progress )",
" .then( display ) // scroll down for full output",
" // you can edit this code"].join('\n'));
wow.run();
var tabs = Array.prototype.slice.call(document.querySelectorAll('.langlabel'));
var ltabs = Array.prototype.slice.call(document.querySelectorAll('.ltab'));
var langs = ['eng', 'chi_sim', 'rus', 'meme'];
var langims = langs.map(function (lang) {
var limg = new Image();
limg.src = 'img/' + lang + '.png';
return limg;
});
function setlang(i) {
tabs.forEach(function (t) {
t.className = 'langlabel';
});
tabs[i].className = 'langlabel selected';
console.log(tabs[i]);
wow.setValue(["var img = demo.querySelector('img.to_ocr')",
"Tesseract",
" .recognize( img, '"+langs[i]+"' )",
" .progress( show_progress )",
" .then( display ) // scroll down for full output",
" // you can edit this code"].join('\n'));
wow.img.src = 'img/' + langs[i] + '.png';
wow.clear();
wow.run()
}
ltabs.forEach(function (ltab, i) {
ltab.onclick = function () {
setlang(i);
};
});
tabs.forEach(function (tab, i) {
tab.onclick = function () {
setlang(i);
};
});
// document.querySelector('.getStarted')[0].onclick = function(){
// location.href = '#'
// location.href = '#get_started'
// }
// builddemo('demo2',
// "var img = demo.querySelector('img.to_ocr')\n\n\
// Tesseract\n\
// .recognize( img, {progress: show_progress, lang:'chi_sim'} )\n\
// .then( display )")

227
main_dev.js

@ -0,0 +1,227 @@ @@ -0,0 +1,227 @@
'use strict';
function builddemo(id, val) {
var demo = document.getElementById(id);
var prog = demo.querySelector('.prog');
var out = demo.querySelector('.out');
var disp = demo.querySelector('.display');
var dctx = disp.getContext('2d');
disp.width = 0;
disp.height = 0;
demo.querySelector('.runbutton').onclick = function () {
setrunning(0);
run(editor);
editor.clear();
prog.setValue(0);
};
prog.setValue = function (v) {
prog.querySelector('div').style.width = v * 100 + '%';
};
function show_progress(p) {
setrunning(0);
console.log(p);
if (p.loaded_lang_model) prog.setValue(p.loaded_lang_model);
if (p.recognized) prog.setValue(p.recognized);
out.innerText = JSON.stringify(p);
}
function setrunning(v) {
if (v == 1) {
demo.querySelector('.running').style.display = 'none';
demo.querySelector('.notrunning').style.display = 'block';
// out.style.visibility = 'hidden'
} else {
demo.querySelector('.running').style.display = 'block';
demo.querySelector('.notrunning').style.display = 'none';
}
}
function display(result) {
React.render(React.createElement(Node, { node: result, expanded: true, label: "output_of_above_demo_plz_click_stuff" }), document.getElementById("explorer"));
setrunning(1);
out.innerText = "Lightning Speeeeeeed";
prog.value = 0;
console.log(result);
disp.width = demo.querySelector('.to_ocr').naturalWidth;
disp.height = demo.querySelector('.to_ocr').naturalHeight;
disp.style.width = demo.querySelector('.to_ocr').offsetWidth;
disp.style.height = demo.querySelector('.to_ocr').offsetHeight;
dctx.shadowColor = "rgba(255,255,255,.1)";
dctx.shadowOffsetX = 0;
dctx.shadowOffsetY = 0;
dctx.shadowBlur = 10;
dctx.fillRect(0, 0, disp.width, disp.height);
var m = result.words.map(function (w) {
var b = w.bbox;
dctx.font = '20px Times';
var font = 20 * (b.x1 - b.x0) / dctx.measureText(w.text).width + "px Times";
var k = function k() {
dctx.font = font;
dctx.fillText(w.text, b.x0, w.baseline.y0);
return font;
};
return k;
});
var times = 0;
var maxtimes = m.length + 100;
function draw(i) {
times++;
// dctx.fillStyle="rgba(30, 29, 49, .8)"
dctx.fillStyle = "rgba(0, 219, 157, " + Math.min(i / 100, 1) + ")";
// dctx.fillStyle="rgba(0, 219, 199, "+Math.min(i/100,1)+")"
// dctx.globalAlpha = .1;
dctx.clearRect(0, 0, disp.width, disp.height);
dctx.fillRect(0, 0, disp.width, disp.height);
for (var j = 0; j < Math.min(i, m.length); j++) {
var asdf = Math.min(Math.max(i - j, 0), 100);
dctx.fillStyle = "rgba(255,255,255," + asdf * .01 + ")";
m[j]();
};
if (i < maxtimes) {
setTimeout(function () {
draw(i + 1);
}, 10);
} else {
console.log('done');
}
}
draw(0);
result.words.forEach(function (word, index) {
var wdiv = document.createElement('div');
wdiv.innerText = word.text + ' ';
wdiv.style['font-family'] = "Times";
wdiv.style.position = 'absolute';
var to_ocr = document.querySelector('.to_ocr');
var scale = to_ocr.offsetHeight / to_ocr.naturalHeight;
wdiv.style['font-size'] = parseFloat(m[index]().split('px')[0]) * scale;
wdiv.style.color = "rgba(0,0,0,0)";
wdiv.style.top = word.bbox.y0 * scale;
wdiv.style.left = word.bbox.x0 * scale;
wdiv.style.height = (word.bbox.y1 - word.bbox.y0) * scale;
wdiv.style.width = (word.bbox.x1 - word.bbox.x0) * scale;
document.querySelector('.ocroutput').appendChild(wdiv);
});
}
window.addEventListener('resize', function () {
disp.style.width = demo.querySelector('.to_ocr').offsetWidth;
disp.style.height = demo.querySelector('.to_ocr').offsetHeight;
});
function run(c) {
eval(c.getValue());
}
var editor = CodeMirror(demo.querySelector('.editor'), {
// lineNumbers: true,
viewportMargin: Infinity,
value: val
});
var sc = demo.querySelector('.demoheader');
var scdiv = document.createElement('div');
sc.appendChild(scdiv);
scdiv.className = 'CodeMirror cm-s-default';
// scdiv.className = 'cm-s-default'
CodeMirror.runMode('<script src="http://localhost:1234/master/lib/Tesseract_dev.js"></script>', {
name: 'xml',
htmlMode: true
}, scdiv);
// var scripttag = CodeMirror(,{
// mode: {name: 'xml', htmlMode: true},
// readOnly: 'nocursor',
// value:
// });
editor.clear = function () {
dctx.clearRect(0, 0, disp.width, disp.height);
document.querySelector('.ocroutput').innerHTML = '';
};
editor.img = demo.querySelector('.to_ocr');
editor.run = function () {
if (editor.img.complete) {
run(editor);
} else {
editor.img.onload = function () {
run(editor);
editor.img.onload = function () {};
};
}
};
return editor;
}
setTimeout(function () {
document.getElementById('wow').className += ' opaque';
}, 100);
var wow = builddemo('wow', "var img = demo.querySelector('img.to_ocr')\n\n" + "Tesseract\n" + " .recognize( img, {\n" + " progress: show_progress} )\n" + " .then( display ) // scroll down for full output\n" + " // you can edit this code");
wow.run();
var tabs = Array.prototype.slice.call(document.querySelectorAll('.langlabel'));
var ltabs = Array.prototype.slice.call(document.querySelectorAll('.ltab'));
var langs = ['eng', 'chi_sim', 'rus', 'meme'];
var langims = langs.map(function (lang) {
var limg = new Image();
limg.src = 'img/' + lang + '.png';
return limg;
});
function setlang(i) {
tabs.forEach(function (t) {
t.className = 'langlabel';
});
tabs[i].className = 'langlabel selected';
console.log(tabs[i]);
wow.setValue("var img = demo.querySelector('img.to_ocr')\n\n" + "Tesseract\n" + " .recognize( img, {\n" + " progress: show_progress, lang: '" + langs[i] + "'} )\n" + " .then( display ) // scroll down for full output\n" + " // you can edit this code");
wow.img.src = 'img/' + langs[i] + '.png';
wow.clear();
}
ltabs.forEach(function (ltab, i) {
ltab.onclick = function () {
setlang(i);
};
});
tabs.forEach(function (tab, i) {
tab.onclick = function () {
setlang(i);
};
});
// document.querySelector('.getStarted')[0].onclick = function(){
// location.href = '#'
// location.href = '#get_started'
// }
// builddemo('demo2',
// "var img = demo.querySelector('img.to_ocr')\n\n\
// Tesseract\n\
// .recognize( img, {progress: show_progress, lang:'chi_sim'} )\n\
// .then( display )")

16860
package-lock.json generated

File diff suppressed because it is too large Load Diff

89
package.json

@ -1,89 +0,0 @@ @@ -1,89 +0,0 @@
{
"name": "tesseract.js",
"version": "3.0.3",
"description": "Pure Javascript Multilingual OCR",
"main": "src/index.js",
"types": "src/index.d.ts",
"unpkg": "dist/tesseract.min.js",
"jsdelivr": "dist/tesseract.min.js",
"scripts": {
"start": "node scripts/server.js",
"build": "rimraf dist && webpack --config scripts/webpack.config.prod.js && rollup -c scripts/rollup.esm.js",
"profile:tesseract": "webpack-bundle-analyzer dist/tesseract-stats.json",
"profile:worker": "webpack-bundle-analyzer dist/worker-stats.json",
"prepublishOnly": "npm run build",
"wait": "rimraf dist && wait-on http://localhost:3000/dist/tesseract.dev.js",
"test": "npm-run-all -p -r start test:all",
"test:all": "npm-run-all wait test:browser:* test:node:all",
"test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js",
"test:node:all": "npm run test:node -- ./tests/*.test.js",
"test:browser-tpl": "mocha-headless-chrome -a incognito -a no-sandbox -a disable-setuid-sandbox -a disable-logging -t 300000",
"test:browser:detect": "npm run test:browser-tpl -- -f ./tests/detect.test.html",
"test:browser:recognize": "npm run test:browser-tpl -- -f ./tests/recognize.test.html",
"test:browser:scheduler": "npm run test:browser-tpl -- -f ./tests/scheduler.test.html",
"test:browser:FS": "npm run test:browser-tpl -- -f ./tests/FS.test.html",
"lint": "eslint src",
"lint:fix": "eslint --fix src",
"postinstall": "opencollective-postinstall || true"
},
"browser": {
"./src/worker/node/index.js": "./src/worker/browser/index.js"
},
"author": "",
"contributors": [
"jeromewu"
],
"license": "Apache-2.0",
"devDependencies": {
"@babel/core": "^7.18.7",
"@babel/preset-env": "^7.18.7",
"@rollup/plugin-commonjs": "^22.0.2",
"acorn": "^6.4.0",
"babel-loader": "^8.2.0",
"buffer": "^6.0.3",
"cors": "^2.8.5",
"eslint": "^7.2.0",
"eslint-config-airbnb-base": "^14.2.0",
"eslint-plugin-import": "^2.22.1",
"expect.js": "^0.3.1",
"express": "^4.17.1",
"mocha": "^8.1.3",
"mocha-headless-chrome": "^2.0.3",
"npm-run-all": "^4.1.5",
"nyc": "^15.1.0",
"rimraf": "^2.7.1",
"rollup": "^2.79.0",
"wait-on": "^3.3.0",
"webpack": "^5.74.0",
"webpack-bundle-analyzer": "^4.6.0",
"webpack-cli": "^4.10.0",
"webpack-dev-middleware": "^5.3.3"
},
"dependencies": {
"babel-eslint": "^10.1.0",
"bmp-js": "^0.1.0",
"file-type": "^12.4.1",
"idb-keyval": "^3.2.0",
"is-electron": "^2.2.0",
"is-url": "^1.2.4",
"node-fetch": "^2.6.0",
"opencollective-postinstall": "^2.0.2",
"regenerator-runtime": "^0.13.3",
"resolve-url": "^0.2.1",
"tesseract.js-core": "^3.0.2",
"wasm-feature-detect": "^1.2.11",
"zlibjs": "^0.3.1"
},
"repository": {
"type": "git",
"url": "https://github.com/naptha/tesseract.js.git"
},
"bugs": {
"url": "https://github.com/naptha/tesseract.js/issues"
},
"homepage": "https://github.com/naptha/tesseract.js",
"collective": {
"type": "opencollective",
"url": "https://opencollective.com/tesseractjs"
}
}

61
perlinish/perlinish.js

@ -0,0 +1,61 @@ @@ -0,0 +1,61 @@
var svg = (function(colors){
var canvas = document.createElement('canvas'),
ctx = canvas.getContext('2d');
canvas.width = 10
canvas.height = 2
var upscale = 10
var im = ctx.getImageData(0,0,canvas.width,canvas.height)
for (var i = 0; i < im.data.length; i+=4) {
im.data[i] = Math.round(Math.random()*255)
im.data[i+1] = Math.round(Math.random()*255)
im.data[i+2] = Math.round(Math.random()*255)
im.data[i+3] = 255
};
ctx.putImageData(im,0,0)
var url = canvas.toDataURL()
im = new Image()
im.src = url
canvas.width *= upscale
canvas.height *= upscale
ctx.drawImage(im,0,0,canvas.width, canvas.height)
im = ctx.getImageData(0,0,canvas.width,canvas.height)
var xmlns = "http://www.w3.org/2000/svg";
var svg = document.createElementNS(xmlns, 'svg')
svg.setAttribute('viewBox',"0 0 "+im.width+" "+im.height)
for (var i = 0; i < im.data.length; i+=4) {
var mindist = 195075
var mincolor = [0,0,0]
var pix = [im.data[i],im.data[i+1],im.data[i+2]]
for (var j = 0; j < colors.length; j++) {
var color = colors[j]
var d0 = color[0] - pix[0]
var d1 = color[1] - pix[1]
var d2 = color[2] - pix[2]
var d = Math.abs(d0)+Math.abs(d1)+Math.abs(d2)
if (d<mindist) {
mindist = d
mincolor = color
}
};
var n = i/4
var elem = document.createElementNS(xmlns, "rect");
elem.setAttributeNS(null,"x",n%im.width);
elem.setAttributeNS(null,"y",Math.floor(n/im.width));
elem.setAttributeNS(null,"width",1.5);
elem.setAttributeNS(null,"height",1.5);
elem.setAttributeNS(null,"fill", 'rgba('+Math.min(mincolor[0]+0%2*20,255)+', '+mincolor[1]+', '+mincolor[2]+',1)');
svg.appendChild(elem);
};
return svg
})([
[39, 198, 249],
[6, 188, 249],
// [154, 218, 198],
[116, 218, 251],
[91, 211, 251]])
document.getElementById('marterial').appendChild(svg)

5
scripts/.eslintrc

@ -1,5 +0,0 @@ @@ -1,5 +0,0 @@
{
"rules": {
"import/no-extraneous-dependencies": 0
}
}

13
scripts/rollup.esm.js

@ -1,13 +0,0 @@ @@ -1,13 +0,0 @@
import commonjs from "@rollup/plugin-commonjs";
export default [
{
input: "dist/tesseract.min.js",
output: {
file: "dist/tesseract.esm.min.js",
format: "esm",
banner: "/* eslint-disable */",
},
plugins: [commonjs()],
},
];

17
scripts/server.js

@ -1,17 +0,0 @@ @@ -1,17 +0,0 @@
const webpack = require('webpack');
const middleware = require('webpack-dev-middleware');
const express = require('express');
const path = require('path');
const cors = require('cors');
const webpackConfig = require('./webpack.config.dev');
const compiler = webpack(webpackConfig);
const app = express();
app.use(cors());
app.use('/', express.static(path.resolve(__dirname, '..')));
app.use(middleware(compiler, { publicPath: '/dist', writeToDisk: true }));
module.exports = app.listen(3000, () => {
console.log('Server is running on the port no. 3000');
});

9
scripts/test-helper.js

@ -1,9 +0,0 @@ @@ -1,9 +0,0 @@
const constants = require('../tests/constants');
global.expect = require('expect.js');
global.fs = require('fs');
global.path = require('path');
global.Tesseract = require('../src');
Object.keys(constants).forEach((key) => {
global[key] = constants[key];
});

28
scripts/webpack.config.common.js

@ -1,28 +0,0 @@ @@ -1,28 +0,0 @@
module.exports = {
resolve: {
fallback: {
buffer: require.resolve('buffer/'),
},
},
module: {
rules: [
{
test: /\.m?js$/,
// exclude: /(node_modules|bower_components)/,
use: {
loader: 'babel-loader',
options: {
presets: [
[
'@babel/preset-env',
{
targets: 'last 2 versions',
},
],
],
},
},
},
],
},
};

48
scripts/webpack.config.dev.js

@ -1,48 +0,0 @@ @@ -1,48 +0,0 @@
const path = require('path');
const webpack = require('webpack');
const { BundleAnalyzerPlugin } = require('webpack-bundle-analyzer');
const common = require('./webpack.config.common');
const genConfig = ({
entry, filename, library, libraryTarget,
}) => ({
...common,
mode: 'development',
entry,
output: {
filename,
library,
libraryTarget,
},
plugins: [
new webpack.ProvidePlugin({
Buffer: ['buffer', 'Buffer'],
}),
new webpack.DefinePlugin({
'process.env': {
TESS_ENV: JSON.stringify('development'),
},
}),
new BundleAnalyzerPlugin({
analyzerMode: 'disable',
statsFilename: `${filename.split('.')[0]}-stats.json`,
generateStatsFile: true
}),
],
devServer: {
allowedHosts: ['localhost', '.gitpod.io'],
},
});
module.exports = [
genConfig({
entry: path.resolve(__dirname, '..', 'src', 'index.js'),
filename: 'tesseract.dev.js',
library: 'Tesseract',
libraryTarget: 'umd',
}),
genConfig({
entry: path.resolve(__dirname, '..', 'src', 'worker-script', 'browser', 'index.js'),
filename: 'worker.dev.js',
}),
];

36
scripts/webpack.config.prod.js

@ -1,36 +0,0 @@ @@ -1,36 +0,0 @@
const path = require('path');
const common = require('./webpack.config.common');
const webpack = require('webpack');
const genConfig = ({
entry, filename, library, libraryTarget,
}) => ({
...common,
mode: 'production',
devtool: 'source-map',
entry,
output: {
path: path.resolve(__dirname, '..', 'dist'),
filename,
library,
libraryTarget,
},
plugins: [
new webpack.ProvidePlugin({
Buffer: ['buffer', 'Buffer'],
}),
]
});
module.exports = [
genConfig({
entry: path.resolve(__dirname, '..', 'src', 'index.js'),
filename: 'tesseract.min.js',
library: 'Tesseract',
libraryTarget: 'umd',
}),
genConfig({
entry: path.resolve(__dirname, '..', 'src', 'worker-script', 'browser', 'index.js'),
filename: 'worker.min.js',
}),
];

28
src/Tesseract.js

@ -1,28 +0,0 @@ @@ -1,28 +0,0 @@
const createWorker = require('./createWorker');
const recognize = async (image, langs, options) => {
const worker = createWorker(options);
await worker.load();
await worker.loadLanguage(langs);
await worker.initialize(langs);
return worker.recognize(image)
.finally(async () => {
await worker.terminate();
});
};
const detect = async (image, options) => {
const worker = createWorker(options);
await worker.load();
await worker.loadLanguage('osd');
await worker.initialize('osd');
return worker.detect(image)
.finally(async () => {
await worker.terminate();
});
};
module.exports = {
recognize,
detect,
};

12
src/constants/OEM.js

@ -1,12 +0,0 @@ @@ -1,12 +0,0 @@
/*
* OEM = OCR Engine Mode, and there are 4 possible modes.
*
* By default tesseract.js uses LSTM_ONLY mode.
*
*/
module.exports = {
TESSERACT_ONLY: 0,
LSTM_ONLY: 1,
TESSERACT_LSTM_COMBINED: 2,
DEFAULT: 3,
};

19
src/constants/PSM.js

@ -1,19 +0,0 @@ @@ -1,19 +0,0 @@
/*
* PSM = Page Segmentation Mode
*/
module.exports = {
OSD_ONLY: '0',
AUTO_OSD: '1',
AUTO_ONLY: '2',
AUTO: '3',
SINGLE_COLUMN: '4',
SINGLE_BLOCK_VERT_TEXT: '5',
SINGLE_BLOCK: '6',
SINGLE_LINE: '7',
SINGLE_WORD: '8',
CIRCLE_WORD: '9',
SINGLE_CHAR: '10',
SPARSE_TEXT: '11',
SPARSE_TEXT_OSD: '12',
RAW_LINE: '13',
};

5
src/constants/config.js

@ -1,5 +0,0 @@ @@ -1,5 +0,0 @@
const OEM = require('./OEM');
module.exports = {
defaultOEM: OEM.DEFAULT,
};

13
src/constants/defaultOptions.js

@ -1,13 +0,0 @@ @@ -1,13 +0,0 @@
module.exports = {
/*
* default path for downloading *.traineddata
*/
langPath: 'https://tessdata.projectnaptha.com/4.0.0',
/*
* Use BlobURL for worker script by default
* TODO: remove this option
*
*/
workerBlobURL: true,
logger: () => {},
};

218
src/constants/languages.js

@ -1,218 +0,0 @@ @@ -1,218 +0,0 @@
/*
* languages with existing tesseract traineddata
* https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016
*/
/**
* @typedef {object} Languages
* @property {string} AFR Afrikaans
* @property {string} AMH Amharic
* @property {string} ARA Arabic
* @property {string} ASM Assamese
* @property {string} AZE Azerbaijani
* @property {string} AZE_CYRL Azerbaijani - Cyrillic
* @property {string} BEL Belarusian
* @property {string} BEN Bengali
* @property {string} BOD Tibetan
* @property {string} BOS Bosnian
* @property {string} BUL Bulgarian
* @property {string} CAT Catalan; Valencian
* @property {string} CEB Cebuano
* @property {string} CES Czech
* @property {string} CHI_SIM Chinese - Simplified
* @property {string} CHI_TRA Chinese - Traditional
* @property {string} CHR Cherokee
* @property {string} CYM Welsh
* @property {string} DAN Danish
* @property {string} DEU German
* @property {string} DZO Dzongkha
* @property {string} ELL Greek, Modern (1453-)
* @property {string} ENG English
* @property {string} ENM English, Middle (1100-1500)
* @property {string} EPO Esperanto
* @property {string} EST Estonian
* @property {string} EUS Basque
* @property {string} FAS Persian
* @property {string} FIN Finnish
* @property {string} FRA French
* @property {string} FRK German Fraktur
* @property {string} FRM French, Middle (ca. 1400-1600)
* @property {string} GLE Irish
* @property {string} GLG Galician
* @property {string} GRC Greek, Ancient (-1453)
* @property {string} GUJ Gujarati
* @property {string} HAT Haitian; Haitian Creole
* @property {string} HEB Hebrew
* @property {string} HIN Hindi
* @property {string} HRV Croatian
* @property {string} HUN Hungarian
* @property {string} IKU Inuktitut
* @property {string} IND Indonesian
* @property {string} ISL Icelandic
* @property {string} ITA Italian
* @property {string} ITA_OLD Italian - Old
* @property {string} JAV Javanese
* @property {string} JPN Japanese
* @property {string} KAN Kannada
* @property {string} KAT Georgian
* @property {string} KAT_OLD Georgian - Old
* @property {string} KAZ Kazakh
* @property {string} KHM Central Khmer
* @property {string} KIR Kirghiz; Kyrgyz
* @property {string} KOR Korean
* @property {string} KUR Kurdish
* @property {string} LAO Lao
* @property {string} LAT Latin
* @property {string} LAV Latvian
* @property {string} LIT Lithuanian
* @property {string} MAL Malayalam
* @property {string} MAR Marathi
* @property {string} MKD Macedonian
* @property {string} MLT Maltese
* @property {string} MSA Malay
* @property {string} MYA Burmese
* @property {string} NEP Nepali
* @property {string} NLD Dutch; Flemish
* @property {string} NOR Norwegian
* @property {string} ORI Oriya
* @property {string} PAN Panjabi; Punjabi
* @property {string} POL Polish
* @property {string} POR Portuguese
* @property {string} PUS Pushto; Pashto
* @property {string} RON Romanian; Moldavian; Moldovan
* @property {string} RUS Russian
* @property {string} SAN Sanskrit
* @property {string} SIN Sinhala; Sinhalese
* @property {string} SLK Slovak
* @property {string} SLV Slovenian
* @property {string} SPA Spanish; Castilian
* @property {string} SPA_OLD Spanish; Castilian - Old
* @property {string} SQI Albanian
* @property {string} SRP Serbian
* @property {string} SRP_LATN Serbian - Latin
* @property {string} SWA Swahili
* @property {string} SWE Swedish
* @property {string} SYR Syriac
* @property {string} TAM Tamil
* @property {string} TEL Telugu
* @property {string} TGK Tajik
* @property {string} TGL Tagalog
* @property {string} THA Thai
* @property {string} TIR Tigrinya
* @property {string} TUR Turkish
* @property {string} UIG Uighur; Uyghur
* @property {string} UKR Ukrainian
* @property {string} URD Urdu
* @property {string} UZB Uzbek
* @property {string} UZB_CYRL Uzbek - Cyrillic
* @property {string} VIE Vietnamese
* @property {string} YID Yiddish
*/
/**
* @type {Languages}
*/
module.exports = {
AFR: 'afr',
AMH: 'amh',
ARA: 'ara',
ASM: 'asm',
AZE: 'aze',
AZE_CYRL: 'aze_cyrl',
BEL: 'bel',
BEN: 'ben',
BOD: 'bod',
BOS: 'bos',
BUL: 'bul',
CAT: 'cat',
CEB: 'ceb',
CES: 'ces',
CHI_SIM: 'chi_sim',
CHI_TRA: 'chi_tra',
CHR: 'chr',
CYM: 'cym',
DAN: 'dan',
DEU: 'deu',
DZO: 'dzo',
ELL: 'ell',
ENG: 'eng',
ENM: 'enm',
EPO: 'epo',
EST: 'est',
EUS: 'eus',
FAS: 'fas',
FIN: 'fin',
FRA: 'fra',
FRK: 'frk',
FRM: 'frm',
GLE: 'gle',
GLG: 'glg',
GRC: 'grc',
GUJ: 'guj',
HAT: 'hat',
HEB: 'heb',
HIN: 'hin',
HRV: 'hrv',
HUN: 'hun',
IKU: 'iku',
IND: 'ind',
ISL: 'isl',
ITA: 'ita',
ITA_OLD: 'ita_old',
JAV: 'jav',
JPN: 'jpn',
KAN: 'kan',
KAT: 'kat',
KAT_OLD: 'kat_old',
KAZ: 'kaz',
KHM: 'khm',
KIR: 'kir',
KOR: 'kor',
KUR: 'kur',
LAO: 'lao',
LAT: 'lat',
LAV: 'lav',
LIT: 'lit',
MAL: 'mal',
MAR: 'mar',
MKD: 'mkd',
MLT: 'mlt',
MSA: 'msa',
MYA: 'mya',
NEP: 'nep',
NLD: 'nld',
NOR: 'nor',
ORI: 'ori',
PAN: 'pan',
POL: 'pol',
POR: 'por',
PUS: 'pus',
RON: 'ron',
RUS: 'rus',
SAN: 'san',
SIN: 'sin',
SLK: 'slk',
SLV: 'slv',
SPA: 'spa',
SPA_OLD: 'spa_old',
SQI: 'sqi',
SRP: 'srp',
SRP_LATN: 'srp_latn',
SWA: 'swa',
SWE: 'swe',
SYR: 'syr',
TAM: 'tam',
TEL: 'tel',
TGK: 'tgk',
TGL: 'tgl',
THA: 'tha',
TIR: 'tir',
TUR: 'tur',
UIG: 'uig',
UKR: 'ukr',
URD: 'urd',
UZB: 'uzb',
UZB_CYRL: 'uzb_cyrl',
VIE: 'vie',
YID: 'yid',
};

21
src/createJob.js

@ -1,21 +0,0 @@ @@ -1,21 +0,0 @@
const getId = require('./utils/getId');
let jobCounter = 0;
module.exports = ({
id: _id,
action,
payload = {},
}) => {
let id = _id;
if (typeof id === 'undefined') {
id = getId('Job', jobCounter);
jobCounter += 1;
}
return {
id,
action,
payload,
};
};

80
src/createScheduler.js

@ -1,80 +0,0 @@ @@ -1,80 +0,0 @@
const createJob = require('./createJob');
const { log } = require('./utils/log');
const getId = require('./utils/getId');
let schedulerCounter = 0;
module.exports = () => {
const id = getId('Scheduler', schedulerCounter);
const workers = {};
const runningWorkers = {};
let jobQueue = [];
schedulerCounter += 1;
const getQueueLen = () => jobQueue.length;
const getNumWorkers = () => Object.keys(workers).length;
const dequeue = () => {
if (jobQueue.length !== 0) {
const wIds = Object.keys(workers);
for (let i = 0; i < wIds.length; i += 1) {
if (typeof runningWorkers[wIds[i]] === 'undefined') {
jobQueue[0](workers[wIds[i]]);
break;
}
}
}
};
const queue = (action, payload) => (
new Promise((resolve, reject) => {
const job = createJob({ action, payload });
jobQueue.push(async (w) => {
jobQueue.shift();
runningWorkers[w.id] = job;
try {
resolve(await w[action].apply(this, [...payload, job.id]));
} catch (err) {
reject(err);
} finally {
delete runningWorkers[w.id];
dequeue();
}
});
log(`[${id}]: Add ${job.id} to JobQueue`);
log(`[${id}]: JobQueue length=${jobQueue.length}`);
dequeue();
})
);
const addWorker = (w) => {
workers[w.id] = w;
log(`[${id}]: Add ${w.id}`);
log(`[${id}]: Number of workers=${getNumWorkers()}`);
dequeue();
return w.id;
};
const addJob = async (action, ...payload) => {
if (getNumWorkers() === 0) {
throw Error(`[${id}]: You need to have at least one worker before adding jobs`);
}
return queue(action, payload);
};
const terminate = async () => {
Object.keys(workers).forEach(async (wid) => {
await workers[wid].terminate();
});
jobQueue = [];
};
return {
addWorker,
addJob,
terminate,
getQueueLen,
getNumWorkers,
};
};

198
src/createWorker.js

@ -1,198 +0,0 @@ @@ -1,198 +0,0 @@
const resolvePaths = require('./utils/resolvePaths');
const circularize = require('./utils/circularize');
const createJob = require('./createJob');
const { log } = require('./utils/log');
const getId = require('./utils/getId');
const { defaultOEM } = require('./constants/config');
const {
defaultOptions,
spawnWorker,
terminateWorker,
onMessage,
loadImage,
send,
} = require('./worker/node');
let workerCounter = 0;
module.exports = (_options = {}) => {
const id = getId('Worker', workerCounter);
const {
logger,
errorHandler,
...options
} = resolvePaths({
...defaultOptions,
..._options,
});
const resolves = {};
const rejects = {};
let worker = spawnWorker(options);
workerCounter += 1;
const setResolve = (action, res) => {
resolves[action] = res;
};
const setReject = (action, rej) => {
rejects[action] = rej;
};
const startJob = ({ id: jobId, action, payload }) => (
new Promise((resolve, reject) => {
log(`[${id}]: Start ${jobId}, action=${action}`);
setResolve(action, resolve);
setReject(action, reject);
send(worker, {
workerId: id,
jobId,
action,
payload,
});
})
);
const load = (jobId) => (
startJob(createJob({
id: jobId, action: 'load', payload: { options },
}))
);
const writeText = (path, text, jobId) => (
startJob(createJob({
id: jobId,
action: 'FS',
payload: { method: 'writeFile', args: [path, text] },
}))
);
const readText = (path, jobId) => (
startJob(createJob({
id: jobId,
action: 'FS',
payload: { method: 'readFile', args: [path, { encoding: 'utf8' }] },
}))
);
const removeFile = (path, jobId) => (
startJob(createJob({
id: jobId,
action: 'FS',
payload: { method: 'unlink', args: [path] },
}))
);
const FS = (method, args, jobId) => (
startJob(createJob({
id: jobId,
action: 'FS',
payload: { method, args },
}))
);
const loadLanguage = (langs = 'eng', jobId) => (
startJob(createJob({
id: jobId,
action: 'loadLanguage',
payload: { langs, options },
}))
);
const initialize = (langs = 'eng', oem = defaultOEM, jobId) => (
startJob(createJob({
id: jobId,
action: 'initialize',
payload: { langs, oem },
}))
);
const setParameters = (params = {}, jobId) => (
startJob(createJob({
id: jobId,
action: 'setParameters',
payload: { params },
}))
);
const recognize = async (image, opts = {}, jobId) => (
startJob(createJob({
id: jobId,
action: 'recognize',
payload: { image: await loadImage(image), options: opts },
}))
);
const getPDF = (title = 'Tesseract OCR Result', textonly = false, jobId) => (
startJob(createJob({
id: jobId,
action: 'getPDF',
payload: { title, textonly },
}))
);
const detect = async (image, jobId) => (
startJob(createJob({
id: jobId,
action: 'detect',
payload: { image: await loadImage(image) },
}))
);
const terminate = async () => {
if (worker !== null) {
/*
await startJob(createJob({
id: jobId,
action: 'terminate',
}));
*/
terminateWorker(worker);
worker = null;
}
return Promise.resolve();
};
onMessage(worker, ({
workerId, jobId, status, action, data,
}) => {
if (status === 'resolve') {
log(`[${workerId}]: Complete ${jobId}`);
let d = data;
if (action === 'recognize') {
d = circularize(data);
} else if (action === 'getPDF') {
d = Array.from({ ...data, length: Object.keys(data).length });
}
resolves[action]({ jobId, data: d });
} else if (status === 'reject') {
rejects[action](data);
if (errorHandler) {
errorHandler(data);
} else {
throw Error(data);
}
} else if (status === 'progress') {
logger({ ...data, userJobId: jobId });
}
});
return {
id,
worker,
setResolve,
setReject,
load,
writeText,
readText,
removeFile,
FS,
loadLanguage,
initialize,
setParameters,
recognize,
getPDF,
detect,
terminate,
};
};

191
src/explorer.js

@ -0,0 +1,191 @@ @@ -0,0 +1,191 @@
function array_join(array, glue){
var new_array = []
for(var i = 0; i < array.length; i++){
new_array.push(array[i])
if(i != array.length - 1) new_array.push(glue);
}
return new_array
}
class Node extends React.Component {
constructor(props){
super(props)
this.state = {
expanded: props.expanded
}
}
toggleExpand = e => {
this.setState({expanded: !this.state.expanded})
}
render(){
var {node, label} = this.props
var {expanded} = this.state
var rep
if(typeof node === "string"){
rep = <TextNode html={label === "html"} node={node} className="clickable" onClick={this.toggleExpand} toggleExpand={this.toggleExpand} expanded={expanded}/>
}
else if(typeof node === "boolean"){
rep = <BooleanNode node={node} className="clickable" onClick={this.toggleExpand} toggleExpand={this.toggleExpand} expanded={expanded}/>
}
else if(typeof node === "number"){
rep = <NumberNode node={node} className="clickable" onClick={this.toggleExpand} toggleExpand={this.toggleExpand} expanded={expanded}/>
}
else if(Array.isArray(node)){
rep = <ListNode node={node} className="clickable" onClick={this.toggleExpand} toggleExpand={this.toggleExpand} expanded={expanded}/>
}
else {
rep = <ObjectNode node={node} className="clickable" onClick={this.toggleExpand} toggleExpand={this.toggleExpand} expanded={expanded}/>
}
if(!label){
return rep
}
return <span><span className="label clickable" onClick={this.toggleExpand}>{label}</span>: {rep}</span>
}
}
class TextNode extends React.Component {
render(){
var {node, expanded, html, toggleExpand} = this.props
if(expanded){
var content = node
if (html) {
var content = []
CodeMirror.runMode(node, {name: 'xml', htmlMode: true}, (text, className) => {
content.push(<span className={"cm-"+className}>{text}</span>)
})
}
return <span className={(html ? "cm-s-default html ":"") + "textNode expanded clickable"} onClick={toggleExpand} >{content}</span>
}
else{
return <span>
<span className={(html? "html " : "")+"textNode clickable"} onClick={toggleExpand} >{node.substring(0,30)}</span>
{node.length > 30 ? <Ellipsis /> : ''}
</span>
}
}
}
class BooleanNode extends React.Component {
render(){
var {node} = this.props
return <span className="booleanNode">{JSON.stringify(node)}</span>
}
}
class NumberNode extends React.Component {
render(){
var {node} = this.props
return <span className="numberNode">{JSON.stringify(node)}</span>
}
}
class ListNode extends React.Component {
render(){
var {node, expanded, toggleExpand} = this.props
if(expanded){
return <span className="listNode expanded">
<span className="clickable" onClick={toggleExpand}>[</span>
<br />
<span className="indent">
{array_join(node.map((e, i) =>
<Node node={e} key={i}/>
),<Comma br/>)}
</span>
<br />
<span onClick={toggleExpand}>]</span>
</span>
}
else{
return <span className="listNode clickable" onClick={toggleExpand}>[{node.length}]</span>
}
}
}
class ObjectNode extends React.Component {
render(){
var {node, expanded, toggleExpand} = this.props
if(null === node){
return <span className="nullNode">null</span>
}
else if(expanded){
return <span className="objectNode expanded">
<span className="clickable" onClick={toggleExpand}>{"{"}</span>
<br />
<span className="indent">
{array_join(Object.keys(node).map(
key => <Node node={node[key]} label={key} key={key}/>
),<Comma br/>)}
</span>
<br />
<span onClick={toggleExpand}>{"}"}</span>
</span>
}
else{
var keys = Object.keys(node), toolong = false
if (keys.length > 4) {
keys = keys.slice(0,4)
toolong = true
}
var contents = array_join(keys.map(k => <span className="label">{k}</span>), <Comma />)
return <span className="objectNode clickable" onClick={toggleExpand} >{"{"}{contents}{toolong?<Ellipsis /> : ''}{"}"}</span>
}
}
}
class Comma extends React.Component {
render(){
var {br} = this.props
return <span className="comma">, {br?<br />:''}</span>
}
}
class Ellipsis extends React.Component {
render(){
return <span className="ellipsis">...</span>
}
}
var simplething = {
hello: 42,
derp: 324,
wumbo: [
1,
2,
3,
4,
"hello",
{
blah: 32,
asdf: [],
walp: 32,
strings: "asdfsd",
}
],
merp: {
blah: 32,
asdf: [],
walp: 32,
strings: "asdfsd",
},
strings: "asdfsd",
asdoijfo: {
strings: "asdfsd",
adfds: {
asdf: {
asdfadsf: {},
merp: 32
}
}
}
}
// React.render(<Node node={simplething} />, document.getElementById('explorer'))

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save