Browse Source

stuff

pull/12/head
Kevin Kwok 8 years ago
parent
commit
7bf3725fc9
  1. 2
      .gitignore
  2. 7
      README.md
  3. 307
      dist/tesseract.js
  4. 12
      dist/worker.js
  5. 2
      examples/file-input/demo.html
  6. 4
      package.json
  7. 21
      src/browser/index.js
  8. 4
      src/browser/lang.js
  9. 6
      src/browser/worker.js
  10. 16
      src/index.js
  11. 7
      src/node/index.js
  12. 8
      src/node/lang.js
  13. 4
      src/node/worker.js

2
.gitignore vendored

@ -1,3 +1,5 @@ @@ -1,3 +1,5 @@
.DS_Store
node_modules/*
yarn.lock
tesseract.dev.js
worker.dev.js

7
README.md

@ -34,12 +34,11 @@ Or you can grab copies of `tesseract.js` and `worker.js` from the [dist folder]( @@ -34,12 +34,11 @@ Or you can grab copies of `tesseract.js` and `worker.js` from the [dist folder](
```html
<script src='/path/to/tesseract.js'></script>
<script>
var LocalTesseract = Tesseract.create({
window.Tesseract = Tesseract.create({
workerPath: '/path/to/worker.js',
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js',
})
// from now on use LocalTesseract instead of Tesseract
</script>
```
@ -156,8 +155,8 @@ In NodeJS, an image can be @@ -156,8 +155,8 @@ In NodeJS, an image can be
## TesseractJob
A TesseractJob is an an object returned by a call to recognize or detect.
All methods of a given TesseractJob return that TesseractJob to enable chaining.
A TesseractJob is an an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. One important difference is that these methods return the job itself (to enable chaining) rather than new.
Typical use is:
```javascript

307
dist/tesseract.js vendored

@ -1,5 +1,273 @@ @@ -1,5 +1,273 @@
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
'use strict';
/* eslint-disable no-unused-vars */
var hasOwnProperty = Object.prototype.hasOwnProperty;
var propIsEnumerable = Object.prototype.propertyIsEnumerable;
function toObject(val) {
if (val === null || val === undefined) {
throw new TypeError('Object.assign cannot be called with null or undefined');
}
return Object(val);
}
function shouldUseNative() {
try {
if (!Object.assign) {
return false;
}
// Detect buggy property enumeration order in older V8 versions.
// https://bugs.chromium.org/p/v8/issues/detail?id=4118
var test1 = new String('abc'); // eslint-disable-line
test1[5] = 'de';
if (Object.getOwnPropertyNames(test1)[0] === '5') {
return false;
}
// https://bugs.chromium.org/p/v8/issues/detail?id=3056
var test2 = {};
for (var i = 0; i < 10; i++) {
test2['_' + String.fromCharCode(i)] = i;
}
var order2 = Object.getOwnPropertyNames(test2).map(function (n) {
return test2[n];
});
if (order2.join('') !== '0123456789') {
return false;
}
// https://bugs.chromium.org/p/v8/issues/detail?id=3056
var test3 = {};
'abcdefghijklmnopqrst'.split('').forEach(function (letter) {
test3[letter] = letter;
});
if (Object.keys(Object.assign({}, test3)).join('') !==
'abcdefghijklmnopqrst') {
return false;
}
return true;
} catch (e) {
// We don't expect any of the above to throw, but better to be safe.
return false;
}
}
module.exports = shouldUseNative() ? Object.assign : function (target, source) {
var from;
var to = toObject(target);
var symbols;
for (var s = 1; s < arguments.length; s++) {
from = Object(arguments[s]);
for (var key in from) {
if (hasOwnProperty.call(from, key)) {
to[key] = from[key];
}
}
if (Object.getOwnPropertySymbols) {
symbols = Object.getOwnPropertySymbols(from);
for (var i = 0; i < symbols.length; i++) {
if (propIsEnumerable.call(from, symbols[i])) {
to[symbols[i]] = from[symbols[i]];
}
}
}
}
return to;
};
},{}],2:[function(require,module,exports){
// shim for using process in browser
var process = module.exports = {};
// cached from whatever global is present so that test runners that stub it
// don't break things. But we need to wrap it in a try catch in case it is
// wrapped in strict mode code which doesn't define any globals. It's inside a
// function because try/catches deoptimize in certain engines.
var cachedSetTimeout;
var cachedClearTimeout;
function defaultSetTimout() {
throw new Error('setTimeout has not been defined');
}
function defaultClearTimeout () {
throw new Error('clearTimeout has not been defined');
}
(function () {
try {
if (typeof setTimeout === 'function') {
cachedSetTimeout = setTimeout;
} else {
cachedSetTimeout = defaultSetTimout;
}
} catch (e) {
cachedSetTimeout = defaultSetTimout;
}
try {
if (typeof clearTimeout === 'function') {
cachedClearTimeout = clearTimeout;
} else {
cachedClearTimeout = defaultClearTimeout;
}
} catch (e) {
cachedClearTimeout = defaultClearTimeout;
}
} ())
function runTimeout(fun) {
if (cachedSetTimeout === setTimeout) {
//normal enviroments in sane situations
return setTimeout(fun, 0);
}
// if setTimeout wasn't available but was latter defined
if ((cachedSetTimeout === defaultSetTimout || !cachedSetTimeout) && setTimeout) {
cachedSetTimeout = setTimeout;
return setTimeout(fun, 0);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedSetTimeout(fun, 0);
} catch(e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedSetTimeout.call(null, fun, 0);
} catch(e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error
return cachedSetTimeout.call(this, fun, 0);
}
}
}
function runClearTimeout(marker) {
if (cachedClearTimeout === clearTimeout) {
//normal enviroments in sane situations
return clearTimeout(marker);
}
// if clearTimeout wasn't available but was latter defined
if ((cachedClearTimeout === defaultClearTimeout || !cachedClearTimeout) && clearTimeout) {
cachedClearTimeout = clearTimeout;
return clearTimeout(marker);
}
try {
// when when somebody has screwed with setTimeout but no I.E. maddness
return cachedClearTimeout(marker);
} catch (e){
try {
// When we are in I.E. but the script has been evaled so I.E. doesn't trust the global object when called normally
return cachedClearTimeout.call(null, marker);
} catch (e){
// same as above but when it's a version of I.E. that must have the global object for 'this', hopfully our context correct otherwise it will throw a global error.
// Some versions of I.E. have different rules for clearTimeout vs setTimeout
return cachedClearTimeout.call(this, marker);
}
}
}
var queue = [];
var draining = false;
var currentQueue;
var queueIndex = -1;
function cleanUpNextTick() {
if (!draining || !currentQueue) {
return;
}
draining = false;
if (currentQueue.length) {
queue = currentQueue.concat(queue);
} else {
queueIndex = -1;
}
if (queue.length) {
drainQueue();
}
}
function drainQueue() {
if (draining) {
return;
}
var timeout = runTimeout(cleanUpNextTick);
draining = true;
var len = queue.length;
while(len) {
currentQueue = queue;
queue = [];
while (++queueIndex < len) {
if (currentQueue) {
currentQueue[queueIndex].run();
}
}
queueIndex = -1;
len = queue.length;
}
currentQueue = null;
draining = false;
runClearTimeout(timeout);
}
process.nextTick = function (fun) {
var args = new Array(arguments.length - 1);
if (arguments.length > 1) {
for (var i = 1; i < arguments.length; i++) {
args[i - 1] = arguments[i];
}
}
queue.push(new Item(fun, args));
if (queue.length === 1 && !draining) {
runTimeout(drainQueue);
}
};
// v8 likes predictible objects
function Item(fun, array) {
this.fun = fun;
this.array = array;
}
Item.prototype.run = function () {
this.fun.apply(null, this.array);
};
process.title = 'browser';
process.browser = true;
process.env = {};
process.argv = [];
process.version = ''; // empty string to avoid regexp issues
process.versions = {};
function noop() {}
process.on = noop;
process.addListener = noop;
process.once = noop;
process.off = noop;
process.removeListener = noop;
process.removeAllListeners = noop;
process.emit = noop;
process.binding = function (name) {
throw new Error('process.binding is not supported');
};
process.cwd = function () { return '/' };
process.chdir = function (dir) {
throw new Error('process.chdir is not supported');
};
process.umask = function() { return 0; };
},{}],3:[function(require,module,exports){
(function (process){
'use strict';
var defaultOptions = {
workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.3/dist/worker.js',
@ -7,9 +275,9 @@ var defaultOptions = { @@ -7,9 +275,9 @@ var defaultOptions = {
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'
};
if (location.hostname === '127.0.0.1' && location.port == '7355') {
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Configuration');
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.js';
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js';
}
exports.defaultOptions = defaultOptions;
@ -45,14 +313,23 @@ function loadImage(image, cb) { @@ -45,14 +313,23 @@ function loadImage(image, cb) {
if (/^\#/.test(image)) {
// element css selector
return loadImage(document.querySelector(image), cb);
} else {
// url or path
} else if (/(blob|data)\:/.test(image)) {
// data url
var im = new Image();
im.src = image;
im.onload = function (e) {
return loadImage(im, cb);
};
return;
} else {
var xhr = new XMLHttpRequest();
xhr.open('GET', image, true);
xhr.responseType = "blob";
xhr.onload = function (e) {
return loadImage(xhr.response, cb);
};
xhr.send(null);
return;
}
} else if (image instanceof File) {
// files
@ -79,11 +356,14 @@ function loadImage(image, cb) { @@ -79,11 +356,14 @@ function loadImage(image, cb) {
// canvas context
var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return loadImage(data, cb);
} else {
return cb(image);
}
cb(image);
throw new Error('Missing return in loadImage cascade');
}
},{}],2:[function(require,module,exports){
}).call(this,require('_process'))
},{"_process":2}],4:[function(require,module,exports){
"use strict";
// The result of dump.js is a big JSON tree
@ -150,8 +430,8 @@ module.exports = function circularize(page) { @@ -150,8 +430,8 @@ module.exports = function circularize(page) {
return page;
};
},{}],3:[function(require,module,exports){
"use strict";
},{}],5:[function(require,module,exports){
'use strict';
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
@ -159,9 +439,13 @@ function _classCallCheck(instance, Constructor) { if (!(instance instanceof Cons @@ -159,9 +439,13 @@ function _classCallCheck(instance, Constructor) { if (!(instance instanceof Cons
var adapter = require('./node/index.js');
var circularize = require('./common/circularize.js');
var objectAssign = require('object-assign');
function create(workerOptions) {
return new TesseractWorker(workerOptions);
workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions));
worker.create = create;
return worker;
}
var TesseractWorker = function () {
@ -334,10 +618,9 @@ var TesseractJob = function () { @@ -334,10 +618,9 @@ var TesseractJob = function () {
return TesseractJob;
}();
var DefaultTesseract = create(adapter.defaultOptions);
DefaultTesseract.create = create;
var DefaultTesseract = create();
module.exports = DefaultTesseract;
},{"./common/circularize.js":2,"./node/index.js":1}]},{},[3])(3)
},{"./common/circularize.js":4,"./node/index.js":3,"object-assign":1}]},{},[5])(5)
});

12
dist/worker.js vendored

@ -11925,11 +11925,15 @@ function fetchLanguageData(req, res, cb) { @@ -11925,11 +11925,15 @@ function fetchLanguageData(req, res, cb) {
}
},{"../common/langdata.json":46,"level-js":12,"pako":21}],43:[function(require,module,exports){
(function (global){
"use strict";
(function (process,global){
'use strict';
var workerUtils = require('../common/worker.js');
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Worker');
}
global.addEventListener('message', function (e) {
var packet = e.data;
workerUtils.dispatchHandlers(packet, function (obj) {
@ -11950,8 +11954,8 @@ exports.getLanguageData = require('./lang.js'); @@ -11950,8 +11954,8 @@ exports.getLanguageData = require('./lang.js');
workerUtils.setAdapter(module.exports);
}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
},{"../common/worker.js":47,"./lang.js":42}],44:[function(require,module,exports){
}).call(this,require('_process'),typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {})
},{"../common/worker.js":47,"./lang.js":42,"_process":37}],44:[function(require,module,exports){
'use strict';
// This converts an image to grayscale

2
examples/file-input/demo.html

@ -1,2 +1,2 @@ @@ -1,2 +1,2 @@
<script src="/dist/tesseract.js"></script>
<script src="/dist/tesseract.dev.js"></script>
<input type="file" onchange="Tesseract.recognize(this.files[0]).progress(console.log)">

4
package.json

@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
"main": "src/index.js",
"scripts": {
"test": "echo \"Error: no test specified\" & exit 1",
"start": "watchify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract & watchify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js & http-server -p 7355",
"start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355",
"build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js"
},
"browser": {
@ -17,6 +17,7 @@ @@ -17,6 +17,7 @@
"babel-preset-es2015": "^6.16.0",
"babelify": "^7.3.0",
"browserify": "^13.1.0",
"envify": "^3.4.1",
"http-server": "^0.9.0",
"watchify": "^3.7.0"
},
@ -24,6 +25,7 @@ @@ -24,6 +25,7 @@
"file-type": "^3.8.0",
"jpeg-js": "^0.2.0",
"level-js": "^2.2.4",
"object-assign": "^4.1.0",
"pako": "^1.0.3",
"png.js": "^0.2.1",
"tesseract.js-core": "^1.0.2"

21
src/browser/index.js

@ -4,16 +4,15 @@ var defaultOptions = { @@ -4,16 +4,15 @@ var defaultOptions = {
langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
}
if(location.hostname === '127.0.0.1' && location.port == '7355'){
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Configuration')
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.js'
defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js'
}
exports.defaultOptions = defaultOptions;
exports.spawnWorker = function spawnWorker(instance, workerOptions){
if(window.Blob && window.URL){
var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");'])
@ -46,12 +45,19 @@ function loadImage(image, cb){ @@ -46,12 +45,19 @@ function loadImage(image, cb){
if(/^\#/.test(image)){
// element css selector
return loadImage(document.querySelector(image), cb)
}else{
// url or path
}else if(/(blob|data)\:/.test(image)){
// data url
var im = new Image
im.src = image;
im.onload = e => loadImage(im, cb);
return
}else{
var xhr = new XMLHttpRequest();
xhr.open('GET', image, true)
xhr.responseType = "blob";
xhr.onload = e => loadImage(xhr.response, cb);
xhr.send(null)
return
}
}else if(image instanceof File){
// files
@ -76,6 +82,9 @@ function loadImage(image, cb){ @@ -76,6 +82,9 @@ function loadImage(image, cb){
// canvas context
var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return loadImage(data, cb)
}else{
return cb(image)
}
cb(image)
throw new Error('Missing return in loadImage cascade')
}

4
src/browser/lang.js

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
var leveljs = require('level-js')
const leveljs = require('level-js')
var db = typeof indexedDB === 'undefined' ? { open: (_, cb) => cb(true) } : leveljs('./tessdata2')
var langdata = require('../common/langdata.json')
@ -22,7 +22,7 @@ module.exports = function getLanguageData(req, res, cb){ @@ -22,7 +22,7 @@ module.exports = function getLanguageData(req, res, cb){
}
var ungzip = require('pako').ungzip;
const ungzip = require('pako').ungzip;
function fetchLanguageData(req, res, cb){
var lang = req.options.lang;

6
src/browser/worker.js

@ -1,6 +1,8 @@ @@ -1,6 +1,8 @@
"use strict";
const workerUtils = require('../common/worker.js')
var workerUtils = require('../common/worker.js')
if (process.env.NODE_ENV === "development") {
console.debug('Using Development Worker')
}
global.addEventListener('message', function(e){
var packet = e.data;

16
src/index.js

@ -1,11 +1,12 @@ @@ -1,11 +1,12 @@
"use strict";
var adapter = require('./node/index.js')
var circularize = require('./common/circularize.js')
const adapter = require('./node/index.js')
const circularize = require('./common/circularize.js')
const objectAssign = require('object-assign');
function create(workerOptions){
return new TesseractWorker(workerOptions)
workerOptions = workerOptions || {};
var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions))
worker.create = create;
return worker;
}
class TesseractWorker {
@ -142,8 +143,7 @@ class TesseractJob { @@ -142,8 +143,7 @@ class TesseractJob {
}
var DefaultTesseract = create(adapter.defaultOptions)
DefaultTesseract.create = create;
var DefaultTesseract = create()
module.exports = DefaultTesseract

7
src/node/index.js

@ -1,12 +1,12 @@ @@ -1,12 +1,12 @@
var path = require('path')
const path = require('path')
exports.defaultOptions = {
workerPath: path.join(__dirname, 'worker.js'),
langPath: 'http://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/',
}
var fork = require('child_process').fork;
var fs = require('fs')
const fork = require('child_process').fork;
const fs = require('fs')
exports.spawnWorker = function spawnWorker(instance, workerOptions){
var cp = fork(workerOptions.workerPath);
@ -39,7 +39,6 @@ function loadImage(image, cb){ @@ -39,7 +39,6 @@ function loadImage(image, cb){
var fileType = require('file-type');
var mime = fileType(image).mime
if(mime === 'image/png'){
var PNGReader = require('png.js');
var reader = new PNGReader(image);

8
src/node/lang.js

@ -1,7 +1,7 @@ @@ -1,7 +1,7 @@
var http = require("http"),
zlib = require("zlib"),
fs = require("fs"),
path = require("path");
const http = require("http"),
zlib = require("zlib"),
fs = require("fs"),
path = require("path");
var langdata = require('../common/langdata.json')

4
src/node/worker.js

@ -1,6 +1,4 @@ @@ -1,6 +1,4 @@
"use strict";
var workerUtils = require('../common/worker.js')
const workerUtils = require('../common/worker.js')
process.on('message', function(packet){
workerUtils.dispatchHandlers(packet, obj => process.send(obj))

Loading…
Cancel
Save