Compare commits

...

14 Commits

Author SHA1 Message Date
Jerome Wu aba237af2e Fix image url in README.md and release 1.0.19 6 years ago
Jerome Wu a26566be04 Update README.md 6 years ago
Jerome Wu 55e355eff5 Release 1.0.17 6 years ago
Jerome Wu 1f497271b5 Update README.md and add error handler in loadImage 6 years ago
jeromewu 1a12ead46f
Merge pull request #280 from uwolfer/fix-call-from-worker 6 years ago
Urs Wolfer 5c930514f5 Fix initialization when calling from other Worker 6 years ago
jeromewu 9268572644
Merge pull request #267 from HoldYourWaffle/pr-load-error-handling 6 years ago
HoldYourWaffle 7911518b39 Add stubs for error handling 6 years ago
Jerome Wu 613a19c7e1 Force upgrade to 1.0.16 due to npm issue 6 years ago
Jerome Wu 07ea31a9cd Update dist/tesseract.js to 1.0.15 6 years ago
Jerome Wu 741ff413b3 1.0.15 6 years ago
Jerome Wu cdb86c694a Fix module is not defined issue 6 years ago
Jerome Wu 06d32c6804 1.0.14 6 years ago
Jerome Wu 8e1b21cd2c Replace langPath from jsDelivr to Github Page as there is 20MB limitation 6 years ago
  1. 10
      README.md
  2. 35
      dist/tesseract.js
  3. 2
      dist/tesseract.min.js
  4. 2
      dist/tesseract.min.js.map
  5. 5
      dist/worker.js
  6. 2
      dist/worker.min.js
  7. 2
      dist/worker.min.js.map
  8. 0
      docs/images/demo.gif
  9. 2
      package-lock.json
  10. 4
      package.json
  11. 29
      src/browser/index.js
  12. 2
      src/browser/worker.js
  13. 2
      src/node/index.js

10
README.md

@ -1,13 +1,16 @@
# [Tesseract.js](http://tesseract.projectnaptha.com/) # [Tesseract.js](http://tesseract.projectnaptha.com/)
[![NPM version][tesseractjs-npm-image]][tesseractjs-npm-url] [![NPM version][tesseractjs-npm-image]][tesseractjs-npm-url]
[tesseractjs-npm-image]: https://img.shields.io/npm/v/tesseract.js.svg [tesseractjs-npm-image]: https://img.shields.io/npm/v/tesseract.js.svg
[tesseractjs-npm-url]: https://npmjs.org/package/tesseract.js [tesseractjs-npm-url]: https://npmjs.org/package/tesseract.js
**Tesseract.js v2 alpha is now available!! Check [HERE](https://github.com/naptha/tesseract.js) for more information.**
Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/)) Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images. ([Demo](http://tesseract.projectnaptha.com/))
[![fancy demo gif](https://github.com/naptha/tesseract.js/blob/master/demo.gif)](http://tesseract.projectnaptha.com) [![fancy demo gif](https://raw.githubusercontent.com/naptha/tesseract.js/support/1.x/docs/images/demo.gif)](http://tesseract.projectnaptha.com)
Tesseract.js works with script tags, [webpack](https://webpack.js.org/)/[Browserify](http://browserify.org/), and [Node.js](https://nodejs.org/en/). [After you install it](#installation), using it is as simple as Tesseract.js works with script tags, [webpack](https://webpack.js.org/)/[Browserify](http://browserify.org/), and [Node.js](https://nodejs.org/en/). [After you install it](#installation), using it is as simple as
@ -24,13 +27,14 @@ Tesseract.js wraps an [emscripten](https://github.com/kripken/emscripten) [port]
# Installation # Installation
Tesseract.js works with a `<script>` tag via local copy or CDN, with webpack and Browserify via `npm`, and on Node.js via `npm`. [Check out the docs](#docs) for a full treatment of the API. Tesseract.js works with a `<script>` tag via local copy or CDN, with webpack and Browserify via `npm`, and on Node.js via `npm`. [Check out the docs](#docs) for a full treatment of the API.
## &lt;script /> ## &lt;script />
You can simply include Tesseract.js with a CDN like this: You can simply include Tesseract.js with a CDN like this:
```html ```html
<script src='https://cdnjs.cloudflare.com/ajax/libs/tesseract.js/1.0.13/tesseract.min.js'></script> <script src='https://cdn.jsdelivr.net/gh/naptha/tesseract.js@v1.0.14/dist/tesseract.min.js'></script>
``` ```
After including your scripts, the `Tesseract` variable will be defined globally! After including your scripts, the `Tesseract` variable will be defined globally!
@ -251,7 +255,7 @@ Because of this we recommend loading `tesseract.js` from a CDN. But if you reall
```javascript ```javascript
window.Tesseract = Tesseract.create({ window.Tesseract = Tesseract.create({
workerPath: '/path/to/worker.js', workerPath: '/path/to/worker.js',
langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/3.02/', langPath: 'https://tessdata.projectnaptha.com/3.02/',
corePath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@0.1.0/index.js', corePath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@0.1.0/index.js',
}) })
``` ```

35
dist/tesseract.js vendored

@ -187,11 +187,11 @@ process.umask = function() { return 0; };
},{}],2:[function(require,module,exports){ },{}],2:[function(require,module,exports){
module.exports={ module.exports={
"name": "tesseract.js", "name": "tesseract.js",
"version": "1.0.13", "version": "1.0.19",
"description": "Pure Javascript Multilingual OCR", "description": "Pure Javascript Multilingual OCR",
"main": "src/index.js", "main": "src/index.js",
"scripts": { "scripts": {
"start": "concurrently --kill-others \"watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract\" \"watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js\" \"http-server -p 7355\"", "start": "concurrently --kill-others \"watchify src/index.js -t [ envify --TESS_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract\" \"watchify src/browser/worker.js -t [ envify --TESS_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js\" \"http-server -p 7355\"",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js && uglifyjs dist/tesseract.js --source-map -o dist/tesseract.min.js && uglifyjs dist/worker.js --source-map -o dist/worker.min.js", "build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js && uglifyjs dist/tesseract.js --source-map -o dist/tesseract.min.js && uglifyjs dist/worker.js --source-map -o dist/worker.min.js",
"release": "npm run build && git commit -am 'new release' && git push && git tag `jq -r '.version' package.json` && git push origin --tags && npm publish" "release": "npm run build && git commit -am 'new release' && git push && git tag `jq -r '.version' package.json` && git push origin --tags && npm publish"
}, },
@ -239,10 +239,10 @@ module.exports={
var defaultOptions = { var defaultOptions = {
// workerPath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js@0.2.0/dist/worker.js', // workerPath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js@0.2.0/dist/worker.js',
corePath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@0.1.0/index.js', corePath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@0.1.0/index.js',
langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/3.02/' langPath: 'https://tessdata.projectnaptha.com/3.02/'
}; };
if (process.env.NODE_ENV === "development") { if (process.env.TESS_ENV === "development") {
console.debug('Using Development Configuration'); console.debug('Using Development Configuration');
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js?nocache=' + Math.random().toString(36).slice(3); defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js?nocache=' + Math.random().toString(36).slice(3);
} else { } else {
@ -253,9 +253,11 @@ if (process.env.NODE_ENV === "development") {
exports.defaultOptions = defaultOptions; exports.defaultOptions = defaultOptions;
exports.spawnWorker = function spawnWorker(instance, workerOptions) { exports.spawnWorker = function spawnWorker(instance, workerOptions) {
if (window.Blob && window.URL) { if (Blob && URL) {
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']); var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");'], {
var worker = new Worker(window.URL.createObjectURL(blob)); type: 'application/javascript'
});
var worker = new Worker(URL.createObjectURL(blob));
} else { } else {
var worker = new Worker(workerOptions.workerPath); var worker = new Worker(workerOptions.workerPath);
} }
@ -290,20 +292,26 @@ function loadImage(image, cb) {
im.onload = function (e) { im.onload = function (e) {
return loadImage(im, cb); return loadImage(im, cb);
}; };
im.onerror = function (e) {
throw e;
};
return; return;
} else { } else {
var xhr = new XMLHttpRequest(); var xhr = new XMLHttpRequest();
xhr.open('GET', image, true); xhr.open('GET', image, true);
xhr.responseType = "blob"; xhr.responseType = "blob";
xhr.onload = function (e) { xhr.onload = function (e) {
return loadImage(xhr.response, cb); if (xhr.status >= 400) {
throw new Error('Fail to get image as Blob');
} else {
loadImage(xhr.response, cb);
}
}; };
xhr.onerror = function (e) { xhr.onerror = function (e) {
if (/^https?:\/\//.test(image) && !/^https:\/\/crossorigin.me/.test(image)) { throw e;
console.debug('Attempting to load image with CORS proxy');
loadImage('https://crossorigin.me/' + image, cb);
}
}; };
xhr.send(null); xhr.send(null);
return; return;
} }
@ -313,6 +321,9 @@ function loadImage(image, cb) {
fr.onload = function (e) { fr.onload = function (e) {
return loadImage(fr.result, cb); return loadImage(fr.result, cb);
}; };
fr.onerror = function (e) {
throw e;
};
fr.readAsDataURL(image); fr.readAsDataURL(image);
return; return;
} else if (image instanceof Blob) { } else if (image instanceof Blob) {

2
dist/tesseract.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/tesseract.min.js.map vendored

File diff suppressed because one or more lines are too long

5
dist/worker.js vendored

@ -8654,7 +8654,7 @@ function fetchLanguageData(req, res, cb) {
var workerUtils = require('../common/worker.js'); var workerUtils = require('../common/worker.js');
if (process.env.NODE_ENV === "development") { if (process.env.TESS_ENV === "development") {
console.debug('Using Development Worker'); console.debug('Using Development Worker');
} }
@ -8915,6 +8915,9 @@ function dispatchHandlers(packet, send) {
handleDetect(packet.payload, respond); handleDetect(packet.payload, respond);
} }
} catch (err) { } catch (err) {
// Prepare exception to travel through postMessage
err = err.toString();
respond.reject(err); respond.reject(err);
} }
} }

2
dist/worker.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/worker.min.js.map vendored

File diff suppressed because one or more lines are too long

0
demo.gif → docs/images/demo.gif

After

Width:  |  Height:  |  Size: 97 KiB

2
package-lock.json generated

@ -1,6 +1,6 @@
{ {
"name": "tesseract.js", "name": "tesseract.js",
"version": "1.0.13", "version": "1.0.19",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {

4
package.json

@ -1,10 +1,10 @@
{ {
"name": "tesseract.js", "name": "tesseract.js",
"version": "1.0.13", "version": "1.0.19",
"description": "Pure Javascript Multilingual OCR", "description": "Pure Javascript Multilingual OCR",
"main": "src/index.js", "main": "src/index.js",
"scripts": { "scripts": {
"start": "concurrently --kill-others \"watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract\" \"watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js\" \"http-server -p 7355\"", "start": "concurrently --kill-others \"watchify src/index.js -t [ envify --TESS_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract\" \"watchify src/browser/worker.js -t [ envify --TESS_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js\" \"http-server -p 7355\"",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js && uglifyjs dist/tesseract.js --source-map -o dist/tesseract.min.js && uglifyjs dist/worker.js --source-map -o dist/worker.min.js", "build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js && uglifyjs dist/tesseract.js --source-map -o dist/tesseract.min.js && uglifyjs dist/worker.js --source-map -o dist/worker.min.js",
"release": "npm run build && git commit -am 'new release' && git push && git tag `jq -r '.version' package.json` && git push origin --tags && npm publish" "release": "npm run build && git commit -am 'new release' && git push && git tag `jq -r '.version' package.json` && git push origin --tags && npm publish"
}, },

29
src/browser/index.js

@ -1,10 +1,10 @@
var defaultOptions = { var defaultOptions = {
// workerPath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js@0.2.0/dist/worker.js', // workerPath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js@0.2.0/dist/worker.js',
corePath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@0.1.0/index.js', corePath: 'https://cdn.jsdelivr.net/gh/naptha/tesseract.js-core@0.1.0/index.js',
langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/3.02/', langPath: 'https://tessdata.projectnaptha.com/3.02/',
} }
if (process.env.NODE_ENV === "development") { if (process.env.TESS_ENV === "development") {
console.debug('Using Development Configuration') console.debug('Using Development Configuration')
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js?nocache=' + Math.random().toString(36).slice(3) defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js?nocache=' + Math.random().toString(36).slice(3)
}else{ }else{
@ -16,9 +16,11 @@ exports.defaultOptions = defaultOptions;
exports.spawnWorker = function spawnWorker(instance, workerOptions){ exports.spawnWorker = function spawnWorker(instance, workerOptions){
if(window.Blob && window.URL){ if(Blob && URL){
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']) var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");'], {
var worker = new Worker(window.URL.createObjectURL(blob)); type: 'application/javascript'
});
var worker = new Worker(URL.createObjectURL(blob));
}else{ }else{
var worker = new Worker(workerOptions.workerPath) var worker = new Worker(workerOptions.workerPath)
} }
@ -52,18 +54,22 @@ function loadImage(image, cb){
var im = new Image var im = new Image
im.src = image; im.src = image;
im.onload = e => loadImage(im, cb); im.onload = e => loadImage(im, cb);
im.onerror = e => { throw e; };
return return
}else{ }else{
var xhr = new XMLHttpRequest(); var xhr = new XMLHttpRequest();
xhr.open('GET', image, true) xhr.open('GET', image, true)
xhr.responseType = "blob"; xhr.responseType = "blob";
xhr.onload = e => loadImage(xhr.response, cb);
xhr.onerror = function(e){ xhr.onload = e => {
if(/^https?:\/\//.test(image) && !/^https:\/\/crossorigin.me/.test(image)){ if (xhr.status >= 400){
console.debug('Attempting to load image with CORS proxy') throw new Error('Fail to get image as Blob');
loadImage('https://crossorigin.me/' + image, cb) }else{
loadImage(xhr.response, cb);
} }
} };
xhr.onerror = e => { throw e; };
xhr.send(null) xhr.send(null)
return return
} }
@ -71,6 +77,7 @@ function loadImage(image, cb){
// files // files
var fr = new FileReader() var fr = new FileReader()
fr.onload = e => loadImage(fr.result, cb); fr.onload = e => loadImage(fr.result, cb);
fr.onerror = e => { throw e; };
fr.readAsDataURL(image) fr.readAsDataURL(image)
return return
}else if(image instanceof Blob){ }else if(image instanceof Blob){

2
src/browser/worker.js

@ -1,6 +1,6 @@
const workerUtils = require('../common/worker.js') const workerUtils = require('../common/worker.js')
if (process.env.NODE_ENV === "development") { if (process.env.TESS_ENV === "development") {
console.debug('Using Development Worker') console.debug('Using Development Worker')
} }

2
src/node/index.js

@ -5,7 +5,7 @@ const fetch = require('isomorphic-fetch'),
exports.defaultOptions = { exports.defaultOptions = {
workerPath: require('path').join(__dirname, 'worker.js'), workerPath: require('path').join(__dirname, 'worker.js'),
langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/3.02/', langPath: 'https://tessdata.projectnaptha.com/3.02/',
} }
exports.spawnWorker = function spawnWorker(instance, workerOptions){ exports.spawnWorker = function spawnWorker(instance, workerOptions){

Loading…
Cancel
Save