From 7bf3725fc99c3536eee21c649196c451bb8aed02 Mon Sep 17 00:00:00 2001 From: Kevin Kwok Date: Wed, 12 Oct 2016 01:23:10 -0400 Subject: [PATCH] stuff --- .gitignore | 4 +- README.md | 7 +- dist/tesseract.js | 307 ++++++++++++++++++++++++++++++++-- dist/worker.js | 12 +- examples/file-input/demo.html | 2 +- package.json | 4 +- src/browser/index.js | 21 ++- src/browser/lang.js | 4 +- src/browser/worker.js | 6 +- src/index.js | 16 +- src/node/index.js | 7 +- src/node/lang.js | 8 +- src/node/worker.js | 4 +- 13 files changed, 350 insertions(+), 52 deletions(-) diff --git a/.gitignore b/.gitignore index a76066d..dd6d7b2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .DS_Store node_modules/* -yarn.lock \ No newline at end of file +yarn.lock +tesseract.dev.js +worker.dev.js \ No newline at end of file diff --git a/README.md b/README.md index 107f33d..a025a1e 100644 --- a/README.md +++ b/README.md @@ -34,12 +34,11 @@ Or you can grab copies of `tesseract.js` and `worker.js` from the [dist folder]( ```html ``` @@ -156,8 +155,8 @@ In NodeJS, an image can be ## TesseractJob -A TesseractJob is an an object returned by a call to recognize or detect. -All methods of a given TesseractJob return that TesseractJob to enable chaining. + +A TesseractJob is an an object returned by a call to `recognize` or `detect`. It's inspired by the ES6 Promise interface and provides `then` and `catch` methods. One important difference is that these methods return the job itself (to enable chaining) rather than new. Typical use is: ```javascript diff --git a/dist/tesseract.js b/dist/tesseract.js index 2323311..a4c6aaf 100644 --- a/dist/tesseract.js +++ b/dist/tesseract.js @@ -1,5 +1,273 @@ (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o 1) { + for (var i = 1; i < arguments.length; i++) { + args[i - 1] = arguments[i]; + } + } + queue.push(new Item(fun, args)); + if (queue.length === 1 && !draining) { + runTimeout(drainQueue); + } +}; + +// v8 likes predictible objects +function Item(fun, array) { + this.fun = fun; + this.array = array; +} +Item.prototype.run = function () { + this.fun.apply(null, this.array); +}; +process.title = 'browser'; +process.browser = true; +process.env = {}; +process.argv = []; +process.version = ''; // empty string to avoid regexp issues +process.versions = {}; + +function noop() {} + +process.on = noop; +process.addListener = noop; +process.once = noop; +process.off = noop; +process.removeListener = noop; +process.removeAllListeners = noop; +process.emit = noop; + +process.binding = function (name) { + throw new Error('process.binding is not supported'); +}; + +process.cwd = function () { return '/' }; +process.chdir = function (dir) { + throw new Error('process.chdir is not supported'); +}; +process.umask = function() { return 0; }; + +},{}],3:[function(require,module,exports){ +(function (process){ +'use strict'; var defaultOptions = { workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.3/dist/worker.js', @@ -7,9 +275,9 @@ var defaultOptions = { langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/' }; -if (location.hostname === '127.0.0.1' && location.port == '7355') { +if (process.env.NODE_ENV === "development") { console.debug('Using Development Configuration'); - defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.js'; + defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js'; } exports.defaultOptions = defaultOptions; @@ -45,14 +313,23 @@ function loadImage(image, cb) { if (/^\#/.test(image)) { // element css selector return loadImage(document.querySelector(image), cb); - } else { - // url or path + } else if (/(blob|data)\:/.test(image)) { + // data url var im = new Image(); im.src = image; im.onload = function (e) { return loadImage(im, cb); }; return; + } else { + var xhr = new XMLHttpRequest(); + xhr.open('GET', image, true); + xhr.responseType = "blob"; + xhr.onload = function (e) { + return loadImage(xhr.response, cb); + }; + xhr.send(null); + return; } } else if (image instanceof File) { // files @@ -79,11 +356,14 @@ function loadImage(image, cb) { // canvas context var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height); return loadImage(data, cb); + } else { + return cb(image); } - cb(image); + throw new Error('Missing return in loadImage cascade'); } -},{}],2:[function(require,module,exports){ +}).call(this,require('_process')) +},{"_process":2}],4:[function(require,module,exports){ "use strict"; // The result of dump.js is a big JSON tree @@ -150,8 +430,8 @@ module.exports = function circularize(page) { return page; }; -},{}],3:[function(require,module,exports){ -"use strict"; +},{}],5:[function(require,module,exports){ +'use strict'; var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); @@ -159,9 +439,13 @@ function _classCallCheck(instance, Constructor) { if (!(instance instanceof Cons var adapter = require('./node/index.js'); var circularize = require('./common/circularize.js'); +var objectAssign = require('object-assign'); function create(workerOptions) { - return new TesseractWorker(workerOptions); + workerOptions = workerOptions || {}; + var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions)); + worker.create = create; + return worker; } var TesseractWorker = function () { @@ -334,10 +618,9 @@ var TesseractJob = function () { return TesseractJob; }(); -var DefaultTesseract = create(adapter.defaultOptions); -DefaultTesseract.create = create; +var DefaultTesseract = create(); module.exports = DefaultTesseract; -},{"./common/circularize.js":2,"./node/index.js":1}]},{},[3])(3) +},{"./common/circularize.js":4,"./node/index.js":3,"object-assign":1}]},{},[5])(5) }); \ No newline at end of file diff --git a/dist/worker.js b/dist/worker.js index 72624e0..81960af 100644 --- a/dist/worker.js +++ b/dist/worker.js @@ -11925,11 +11925,15 @@ function fetchLanguageData(req, res, cb) { } },{"../common/langdata.json":46,"level-js":12,"pako":21}],43:[function(require,module,exports){ -(function (global){ -"use strict"; +(function (process,global){ +'use strict'; var workerUtils = require('../common/worker.js'); +if (process.env.NODE_ENV === "development") { + console.debug('Using Development Worker'); +} + global.addEventListener('message', function (e) { var packet = e.data; workerUtils.dispatchHandlers(packet, function (obj) { @@ -11950,8 +11954,8 @@ exports.getLanguageData = require('./lang.js'); workerUtils.setAdapter(module.exports); -}).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) -},{"../common/worker.js":47,"./lang.js":42}],44:[function(require,module,exports){ +}).call(this,require('_process'),typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) +},{"../common/worker.js":47,"./lang.js":42,"_process":37}],44:[function(require,module,exports){ 'use strict'; // This converts an image to grayscale diff --git a/examples/file-input/demo.html b/examples/file-input/demo.html index 08ceb38..a8644e8 100644 --- a/examples/file-input/demo.html +++ b/examples/file-input/demo.html @@ -1,2 +1,2 @@ - + \ No newline at end of file diff --git a/package.json b/package.json index 5f92626..c663903 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,7 @@ "main": "src/index.js", "scripts": { "test": "echo \"Error: no test specified\" & exit 1", - "start": "watchify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract & watchify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js & http-server -p 7355", + "start": "watchify src/index.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.dev.js --standalone Tesseract & watchify src/browser/worker.js -t [ envify --NODE_ENV development ] -t [ babelify --presets [ es2015 ] ] -o dist/worker.dev.js & http-server -p 7355", "build": "browserify src/index.js -t [ babelify --presets [ es2015 ] ] -o dist/tesseract.js --standalone Tesseract && browserify src/browser/worker.js -t [ babelify --presets [ es2015 ] ] -o dist/worker.js" }, "browser": { @@ -17,6 +17,7 @@ "babel-preset-es2015": "^6.16.0", "babelify": "^7.3.0", "browserify": "^13.1.0", + "envify": "^3.4.1", "http-server": "^0.9.0", "watchify": "^3.7.0" }, @@ -24,6 +25,7 @@ "file-type": "^3.8.0", "jpeg-js": "^0.2.0", "level-js": "^2.2.4", + "object-assign": "^4.1.0", "pako": "^1.0.3", "png.js": "^0.2.1", "tesseract.js-core": "^1.0.2" diff --git a/src/browser/index.js b/src/browser/index.js index 980b48c..656f4b0 100644 --- a/src/browser/index.js +++ b/src/browser/index.js @@ -4,16 +4,15 @@ var defaultOptions = { langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', } -if(location.hostname === '127.0.0.1' && location.port == '7355'){ +if (process.env.NODE_ENV === "development") { console.debug('Using Development Configuration') - defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.js' + defaultOptions.workerPath = location.protocol + '//' + location.host + '/dist/worker.dev.js' } exports.defaultOptions = defaultOptions; - exports.spawnWorker = function spawnWorker(instance, workerOptions){ if(window.Blob && window.URL){ var blob = new Blob(['importScripts("' + workerOptions.workerPath + '");']) @@ -46,12 +45,19 @@ function loadImage(image, cb){ if(/^\#/.test(image)){ // element css selector return loadImage(document.querySelector(image), cb) - }else{ - // url or path + }else if(/(blob|data)\:/.test(image)){ + // data url var im = new Image im.src = image; im.onload = e => loadImage(im, cb); return + }else{ + var xhr = new XMLHttpRequest(); + xhr.open('GET', image, true) + xhr.responseType = "blob"; + xhr.onload = e => loadImage(xhr.response, cb); + xhr.send(null) + return } }else if(image instanceof File){ // files @@ -76,6 +82,9 @@ function loadImage(image, cb){ // canvas context var data = image.getImageData(0, 0, image.canvas.width, image.canvas.height); return loadImage(data, cb) + }else{ + return cb(image) } - cb(image) + throw new Error('Missing return in loadImage cascade') + } diff --git a/src/browser/lang.js b/src/browser/lang.js index d47191a..9fc638e 100644 --- a/src/browser/lang.js +++ b/src/browser/lang.js @@ -1,4 +1,4 @@ -var leveljs = require('level-js') +const leveljs = require('level-js') var db = typeof indexedDB === 'undefined' ? { open: (_, cb) => cb(true) } : leveljs('./tessdata2') var langdata = require('../common/langdata.json') @@ -22,7 +22,7 @@ module.exports = function getLanguageData(req, res, cb){ } -var ungzip = require('pako').ungzip; +const ungzip = require('pako').ungzip; function fetchLanguageData(req, res, cb){ var lang = req.options.lang; diff --git a/src/browser/worker.js b/src/browser/worker.js index 33e8ff9..d392851 100644 --- a/src/browser/worker.js +++ b/src/browser/worker.js @@ -1,6 +1,8 @@ -"use strict"; +const workerUtils = require('../common/worker.js') -var workerUtils = require('../common/worker.js') +if (process.env.NODE_ENV === "development") { + console.debug('Using Development Worker') +} global.addEventListener('message', function(e){ var packet = e.data; diff --git a/src/index.js b/src/index.js index b6c3aa0..57b00f7 100644 --- a/src/index.js +++ b/src/index.js @@ -1,11 +1,12 @@ -"use strict"; - -var adapter = require('./node/index.js') -var circularize = require('./common/circularize.js') - +const adapter = require('./node/index.js') +const circularize = require('./common/circularize.js') +const objectAssign = require('object-assign'); function create(workerOptions){ - return new TesseractWorker(workerOptions) + workerOptions = workerOptions || {}; + var worker = new TesseractWorker(objectAssign({}, adapter.defaultOptions, workerOptions)) + worker.create = create; + return worker; } class TesseractWorker { @@ -142,8 +143,7 @@ class TesseractJob { } -var DefaultTesseract = create(adapter.defaultOptions) -DefaultTesseract.create = create; +var DefaultTesseract = create() module.exports = DefaultTesseract diff --git a/src/node/index.js b/src/node/index.js index 4ba50e0..a5e2603 100644 --- a/src/node/index.js +++ b/src/node/index.js @@ -1,12 +1,12 @@ -var path = require('path') +const path = require('path') exports.defaultOptions = { workerPath: path.join(__dirname, 'worker.js'), langPath: 'http://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', } -var fork = require('child_process').fork; -var fs = require('fs') +const fork = require('child_process').fork; +const fs = require('fs') exports.spawnWorker = function spawnWorker(instance, workerOptions){ var cp = fork(workerOptions.workerPath); @@ -39,7 +39,6 @@ function loadImage(image, cb){ var fileType = require('file-type'); var mime = fileType(image).mime - if(mime === 'image/png'){ var PNGReader = require('png.js'); var reader = new PNGReader(image); diff --git a/src/node/lang.js b/src/node/lang.js index 3046990..2b9eb96 100644 --- a/src/node/lang.js +++ b/src/node/lang.js @@ -1,7 +1,7 @@ -var http = require("http"), - zlib = require("zlib"), - fs = require("fs"), - path = require("path"); +const http = require("http"), + zlib = require("zlib"), + fs = require("fs"), + path = require("path"); var langdata = require('../common/langdata.json') diff --git a/src/node/worker.js b/src/node/worker.js index 56d0eab..bea5d5c 100644 --- a/src/node/worker.js +++ b/src/node/worker.js @@ -1,6 +1,4 @@ -"use strict"; - -var workerUtils = require('../common/worker.js') +const workerUtils = require('../common/worker.js') process.on('message', function(packet){ workerUtils.dispatchHandlers(packet, obj => process.send(obj))