diff --git a/README.md b/README.md index f1e8687..c149f27 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![npm version](https://badge.fury.io/js/tesseract.js.svg)](https://badge.fury.io/js/tesseract.js) [![Beerpay](https://img.shields.io/badge/Click%20Here%20-to%20Buy%20Us%20A%20Beer-red.svg)](https://beerpay.io/naptha/tesseract.js) -Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images through [Optical Character Recognition(OCR)](https://en.wikipedia.org/wiki/Optical_character_recognition). ([Demo]https://en.wikipedia.org/wiki/Optical_character_recognition(http://tesseract.projectnaptha.com/)) +Tesseract.js is a javascript library that gets words in [almost any language](./docs/tesseract_lang_list.md) out of images, using [OCR](https://en.wikipedia.org/wiki/Optical_character_recognition). ([Demo](http://tesseract.projectnaptha.com/)) [![fancy demo gif](./demo.gif "Demo")](http://tesseract.projectnaptha.com) diff --git a/dist/tesseract.js b/dist/tesseract.js index 5b928cc..f5feb64 100644 --- a/dist/tesseract.js +++ b/dist/tesseract.js @@ -1,89 +1,4 @@ (function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.Tesseract = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o 0 && arguments[0] !== undefined ? arguments[0] : {}; + + return new TesseractWorker(Object.assign({}, adapter.defaultOptions, workerOptions, { create: create, version: version })); } var TesseractWorker = function () { @@ -629,26 +540,25 @@ var TesseractWorker = function () { _createClass(TesseractWorker, [{ key: 'recognize', - value: function recognize(image, options) { + value: function recognize(image) { var _this = this; + var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; + return this._delay(function (job) { - if (typeof options === 'string') { - options = { lang: options }; - } else { - options = options || {}; - options.lang = options.lang || 'eng'; - } + if (typeof options === 'string') options = { lang: options }; + options.lang = options.lang || 'eng'; job._send('recognize', { image: image, options: options, workerOptions: _this.workerOptions }); }); } }, { key: 'detect', - value: function detect(image, options) { + value: function detect(image) { var _this2 = this; - options = options || {}; + var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; + return this._delay(function (job) { job._send('detect', { image: image, options: options, workerOptions: _this2.workerOptions }); }); @@ -679,14 +589,13 @@ var TesseractWorker = function () { key: '_dequeue', value: function _dequeue() { this._currentJob = null; - if (this._queue.length > 0) { + if (this._queue.length) { this._queue[0](); } } }, { key: '_recv', value: function _recv(packet) { - if (packet.status === 'resolve' && packet.action === 'recognize') { packet.data = circularize(packet.data); } @@ -702,9 +611,7 @@ var TesseractWorker = function () { return TesseractWorker; }(); -var DefaultTesseract = create(); - -module.exports = DefaultTesseract; +module.exports = create(); -},{"../package.json":3,"./common/circularize.js":5,"./common/job":6,"./node/index.js":4,"object-assign":1}]},{},[7])(7) +},{"../package.json":2,"./common/circularize.js":4,"./common/job":5,"./node/index.js":3}]},{},[6])(6) }); \ No newline at end of file diff --git a/dist/worker.js b/dist/worker.js index 172d81e..cdcd7f6 100644 --- a/dist/worker.js +++ b/dist/worker.js @@ -82,7 +82,7 @@ AbstractChainedBatch.prototype.write = function (options, callback) { module.exports = AbstractChainedBatch }).call(this,require('_process')) -},{"_process":33}],2:[function(require,module,exports){ +},{"_process":32}],2:[function(require,module,exports){ (function (process){ /* Copyright (c) 2013 Rod Vagg, MIT License */ @@ -135,7 +135,7 @@ AbstractIterator.prototype.end = function (callback) { module.exports = AbstractIterator }).call(this,require('_process')) -},{"_process":33}],3:[function(require,module,exports){ +},{"_process":32}],3:[function(require,module,exports){ (function (Buffer,process){ /* Copyright (c) 2013 Rod Vagg, MIT License */ @@ -395,7 +395,7 @@ module.exports.AbstractIterator = AbstractIterator module.exports.AbstractChainedBatch = AbstractChainedBatch }).call(this,{"isBuffer":require("../is-buffer/index.js")},require('_process')) -},{"../is-buffer/index.js":9,"./abstract-chained-batch":1,"./abstract-iterator":2,"_process":33,"xtend":4}],4:[function(require,module,exports){ +},{"../is-buffer/index.js":10,"./abstract-chained-batch":1,"./abstract-iterator":2,"_process":32,"xtend":4}],4:[function(require,module,exports){ module.exports = extend function extend() { @@ -2323,7 +2323,7 @@ function isnan (val) { } }).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) -},{"base64-js":5,"ieee754":8,"isarray":10}],7:[function(require,module,exports){ +},{"base64-js":5,"ieee754":8,"isarray":11}],7:[function(require,module,exports){ /*global window:false, self:false, define:false, module:false */ /** @@ -3817,6 +3817,31 @@ exports.write = function (buffer, value, offset, isLE, mLen, nBytes) { } },{}],9:[function(require,module,exports){ +if (typeof Object.create === 'function') { + // implementation from standard node.js 'util' module + module.exports = function inherits(ctor, superCtor) { + ctor.super_ = superCtor + ctor.prototype = Object.create(superCtor.prototype, { + constructor: { + value: ctor, + enumerable: false, + writable: true, + configurable: true + } + }); + }; +} else { + // old school shim for old browsers + module.exports = function inherits(ctor, superCtor) { + ctor.super_ = superCtor + var TempCtor = function () {} + TempCtor.prototype = superCtor.prototype + ctor.prototype = new TempCtor() + ctor.prototype.constructor = ctor + } +} + +},{}],10:[function(require,module,exports){ /*! * Determine if an object is a Buffer * @@ -3839,14 +3864,14 @@ function isSlowBuffer (obj) { return typeof obj.readFloatLE === 'function' && typeof obj.slice === 'function' && isBuffer(obj.slice(0, 0)) } -},{}],10:[function(require,module,exports){ +},{}],11:[function(require,module,exports){ var toString = {}.toString; module.exports = Array.isArray || function (arr) { return toString.call(arr) == '[object Array]'; }; -},{}],11:[function(require,module,exports){ +},{}],12:[function(require,module,exports){ var Buffer = require('buffer').Buffer; module.exports = isBuffer; @@ -3856,7 +3881,7 @@ function isBuffer (o) { || /\[object (.+Array|Array.+)\]/.test(Object.prototype.toString.call(o)); } -},{"buffer":6}],12:[function(require,module,exports){ +},{"buffer":6}],13:[function(require,module,exports){ (function (Buffer){ module.exports = Level @@ -4034,7 +4059,7 @@ var checkKeyValue = Level.prototype._checkKeyValue = function (obj, type) { } }).call(this,require("buffer").Buffer) -},{"./iterator":13,"abstract-leveldown":3,"buffer":6,"idb-wrapper":7,"isbuffer":11,"typedarray-to-buffer":34,"util":37,"xtend":15}],13:[function(require,module,exports){ +},{"./iterator":14,"abstract-leveldown":3,"buffer":6,"idb-wrapper":7,"isbuffer":12,"typedarray-to-buffer":33,"util":35,"xtend":37}],14:[function(require,module,exports){ var util = require('util') var AbstractIterator = require('abstract-leveldown').AbstractIterator var ltgt = require('ltgt') @@ -4108,43 +4133,7 @@ Iterator.prototype._next = function (callback) { this.callback = callback } -},{"abstract-leveldown":3,"ltgt":16,"util":37}],14:[function(require,module,exports){ -module.exports = hasKeys - -function hasKeys(source) { - return source !== null && - (typeof source === "object" || - typeof source === "function") -} - -},{}],15:[function(require,module,exports){ -var Keys = require("object-keys") -var hasKeys = require("./has-keys") - -module.exports = extend - -function extend() { - var target = {} - - for (var i = 0; i < arguments.length; i++) { - var source = arguments[i] - - if (!hasKeys(source)) { - continue - } - - var keys = Keys(source) - - for (var j = 0; j < keys.length; j++) { - var name = keys[j] - target[name] = source[name] - } - } - - return target -} - -},{"./has-keys":14,"object-keys":18}],16:[function(require,module,exports){ +},{"abstract-leveldown":3,"ltgt":15,"util":35}],15:[function(require,module,exports){ (function (Buffer){ exports.compare = function (a, b) { @@ -4294,7 +4283,7 @@ exports.filter = function (range, compare) { } }).call(this,{"isBuffer":require("../is-buffer/index.js")}) -},{"../is-buffer/index.js":9}],17:[function(require,module,exports){ +},{"../is-buffer/index.js":10}],16:[function(require,module,exports){ var hasOwn = Object.prototype.hasOwnProperty; var toString = Object.prototype.toString; @@ -4336,11 +4325,11 @@ module.exports = function forEach(obj, fn) { }; -},{}],18:[function(require,module,exports){ +},{}],17:[function(require,module,exports){ module.exports = Object.keys || require('./shim'); -},{"./shim":20}],19:[function(require,module,exports){ +},{"./shim":19}],18:[function(require,module,exports){ var toString = Object.prototype.toString; module.exports = function isArguments(value) { @@ -4358,7 +4347,7 @@ module.exports = function isArguments(value) { }; -},{}],20:[function(require,module,exports){ +},{}],19:[function(require,module,exports){ (function () { "use strict"; @@ -4422,7 +4411,7 @@ module.exports = function isArguments(value) { }()); -},{"./foreach":17,"./isArguments":19}],21:[function(require,module,exports){ +},{"./foreach":16,"./isArguments":18}],20:[function(require,module,exports){ 'use strict'; @@ -4842,7 +4831,7 @@ exports.inflate = inflate; exports.inflateRaw = inflateRaw; exports.ungzip = inflate; -},{"./utils/common":22,"./utils/strings":23,"./zlib/constants":25,"./zlib/gzheader":27,"./zlib/inflate":29,"./zlib/messages":31,"./zlib/zstream":32}],22:[function(require,module,exports){ +},{"./utils/common":21,"./utils/strings":22,"./zlib/constants":24,"./zlib/gzheader":26,"./zlib/inflate":28,"./zlib/messages":30,"./zlib/zstream":31}],21:[function(require,module,exports){ 'use strict'; @@ -4946,7 +4935,7 @@ exports.setTyped = function (on) { exports.setTyped(TYPED_OK); -},{}],23:[function(require,module,exports){ +},{}],22:[function(require,module,exports){ // String encode/decode helpers 'use strict'; @@ -5133,7 +5122,7 @@ exports.utf8border = function (buf, max) { return (pos + _utf8len[buf[pos]] > max) ? pos : max; }; -},{"./common":22}],24:[function(require,module,exports){ +},{"./common":21}],23:[function(require,module,exports){ 'use strict'; // Note: adler32 takes 12% for level 0 and 2% for level 6. @@ -5167,7 +5156,7 @@ function adler32(adler, buf, len, pos) { module.exports = adler32; -},{}],25:[function(require,module,exports){ +},{}],24:[function(require,module,exports){ 'use strict'; @@ -5219,7 +5208,7 @@ module.exports = { //Z_NULL: null // Use -1 or null inline, depending on var type }; -},{}],26:[function(require,module,exports){ +},{}],25:[function(require,module,exports){ 'use strict'; // Note: we can't get significant speed boost here. @@ -5262,7 +5251,7 @@ function crc32(crc, buf, len, pos) { module.exports = crc32; -},{}],27:[function(require,module,exports){ +},{}],26:[function(require,module,exports){ 'use strict'; @@ -5304,7 +5293,7 @@ function GZheader() { module.exports = GZheader; -},{}],28:[function(require,module,exports){ +},{}],27:[function(require,module,exports){ 'use strict'; // See state defs from inflate.js @@ -5632,7 +5621,7 @@ module.exports = function inflate_fast(strm, start) { return; }; -},{}],29:[function(require,module,exports){ +},{}],28:[function(require,module,exports){ 'use strict'; @@ -7172,7 +7161,7 @@ exports.inflateSyncPoint = inflateSyncPoint; exports.inflateUndermine = inflateUndermine; */ -},{"../utils/common":22,"./adler32":24,"./crc32":26,"./inffast":28,"./inftrees":30}],30:[function(require,module,exports){ +},{"../utils/common":21,"./adler32":23,"./crc32":25,"./inffast":27,"./inftrees":29}],29:[function(require,module,exports){ 'use strict'; @@ -7501,7 +7490,7 @@ module.exports = function inflate_table(type, lens, lens_index, codes, table, ta return 0; }; -},{"../utils/common":22}],31:[function(require,module,exports){ +},{"../utils/common":21}],30:[function(require,module,exports){ 'use strict'; module.exports = { @@ -7516,7 +7505,7 @@ module.exports = { '-6': 'incompatible version' /* Z_VERSION_ERROR (-6) */ }; -},{}],32:[function(require,module,exports){ +},{}],31:[function(require,module,exports){ 'use strict'; @@ -7547,7 +7536,7 @@ function ZStream() { module.exports = ZStream; -},{}],33:[function(require,module,exports){ +},{}],32:[function(require,module,exports){ // shim for using process in browser var process = module.exports = {}; @@ -7729,7 +7718,7 @@ process.chdir = function (dir) { }; process.umask = function() { return 0; }; -},{}],34:[function(require,module,exports){ +},{}],33:[function(require,module,exports){ (function (Buffer){ /** * Convert a typed array to a Buffer without a copy @@ -7752,39 +7741,14 @@ module.exports = function (arr) { } }).call(this,require("buffer").Buffer) -},{"buffer":6}],35:[function(require,module,exports){ -if (typeof Object.create === 'function') { - // implementation from standard node.js 'util' module - module.exports = function inherits(ctor, superCtor) { - ctor.super_ = superCtor - ctor.prototype = Object.create(superCtor.prototype, { - constructor: { - value: ctor, - enumerable: false, - writable: true, - configurable: true - } - }); - }; -} else { - // old school shim for old browsers - module.exports = function inherits(ctor, superCtor) { - ctor.super_ = superCtor - var TempCtor = function () {} - TempCtor.prototype = superCtor.prototype - ctor.prototype = new TempCtor() - ctor.prototype.constructor = ctor - } -} - -},{}],36:[function(require,module,exports){ +},{"buffer":6}],34:[function(require,module,exports){ module.exports = function isBuffer(arg) { return arg && typeof arg === 'object' && typeof arg.copy === 'function' && typeof arg.fill === 'function' && typeof arg.readUInt8 === 'function'; } -},{}],37:[function(require,module,exports){ +},{}],35:[function(require,module,exports){ (function (process,global){ // Copyright Joyent, Inc. and other Node contributors. // @@ -8374,7 +8338,43 @@ function hasOwnProperty(obj, prop) { } }).call(this,require('_process'),typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) -},{"./support/isBuffer":36,"_process":33,"inherits":35}],38:[function(require,module,exports){ +},{"./support/isBuffer":34,"_process":32,"inherits":9}],36:[function(require,module,exports){ +module.exports = hasKeys + +function hasKeys(source) { + return source !== null && + (typeof source === "object" || + typeof source === "function") +} + +},{}],37:[function(require,module,exports){ +var Keys = require("object-keys") +var hasKeys = require("./has-keys") + +module.exports = extend + +function extend() { + var target = {} + + for (var i = 0; i < arguments.length; i++) { + var source = arguments[i] + + if (!hasKeys(source)) { + continue + } + + var keys = Keys(source) + + for (var j = 0; j < keys.length; j++) { + var name = keys[j] + target[name] = source[name] + } + } + + return target +} + +},{"./has-keys":36,"object-keys":17}],38:[function(require,module,exports){ 'use strict'; var leveljs = require('level-js'); @@ -8458,7 +8458,7 @@ function fetchLanguageData(req, res, cb) { xhr.send(); } -},{"../common/langdata.json":42,"level-js":12,"pako/lib/inflate.js":21}],39:[function(require,module,exports){ +},{"../common/langdata.json":42,"level-js":13,"pako/lib/inflate.js":20}],39:[function(require,module,exports){ (function (process,global){ 'use strict'; @@ -8489,7 +8489,7 @@ exports.getLanguageData = require('./lang.js'); workerUtils.setAdapter(module.exports); }).call(this,require('_process'),typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) -},{"../common/worker.js":43,"./lang.js":38,"_process":33}],40:[function(require,module,exports){ +},{"../common/worker.js":43,"./lang.js":38,"_process":32}],40:[function(require,module,exports){ 'use strict'; // This converts an image to grayscale @@ -8696,10 +8696,12 @@ module.exports={"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, },{}],43:[function(require,module,exports){ 'use strict'; -var latestJob; -var Module; -var base; -var adapter = {}; +var latestJob, + Module, + base, + adapter = {}, + dump = require('./dump.js'), + desaturate = require('./desaturate.js'); function dispatchHandlers(packet, send) { function respond(status, data) { @@ -8735,7 +8737,7 @@ exports.setAdapter = function setAdapter(impl) { function handleInit(req, res) { var MIN_MEMORY = 100663296; - if (['chi_sim', 'chi_tra', 'jpn'].indexOf(req.options.lang) != -1) { + if (['chi_sim', 'chi_tra', 'jpn'].includes(req.options.lang)) { MIN_MEMORY = 167772160; } @@ -8758,9 +8760,6 @@ function handleInit(req, res) { } } -var dump = require('./dump.js'); -var desaturate = require('./desaturate.js'); - function setImage(Module, base, image) { var imgbin = desaturate(image), width = image.width, @@ -8773,16 +8772,17 @@ function setImage(Module, base, image) { } function loadLanguage(req, res, cb) { - var lang = req.options.lang; + var lang = req.options.lang, + langFile = lang + '.traineddata'; if (!Module._loadedLanguages) Module._loadedLanguages = {}; if (lang in Module._loadedLanguages) return cb(); adapter.getLanguageData(req, res, function (data) { - res.progress({ status: 'loading ' + lang + '.traineddata', progress: 0 }); - Module.FS_createDataFile('tessdata', lang + ".traineddata", data, true, false); + res.progress({ status: 'loading ' + langFile, progress: 0 }); + Module.FS_createDataFile('tessdata', langFile, data, true, false); Module._loadedLanguages[lang] = true; - res.progress({ status: 'loading ' + lang + '.traineddata', progress: 1 }); + res.progress({ status: 'loading ' + langFile, progress: 1 }); cb(); }); } @@ -8791,22 +8791,25 @@ function handleRecognize(req, res) { handleInit(req, res); loadLanguage(req, res, function () { - var lang = req.options.lang; + var options = req.options; + + function progressUpdate(progress) { + res.progress({ status: 'initializing api', progress: progress }); + } - res.progress({ status: 'initializing api', progress: 0 }); - base.Init(null, lang); - res.progress({ status: 'initializing api', progress: 0.3 }); + progressUpdate(0); + base.Init(null, req.options.lang); + progressUpdate(.3); - var options = req.options; for (var option in options) { if (options.hasOwnProperty(option)) { base.SetVariable(option, options[option]); } } - res.progress({ status: 'initializing api', progress: 0.6 }); + progressUpdate(.6); var ptr = setImage(Module, base, req.image); - res.progress({ status: 'initializing api', progress: 1 }); + progressUpdate(1); base.Recognize(null); @@ -8823,37 +8826,31 @@ function handleDetect(req, res) { handleInit(req, res); req.options.lang = 'osd'; loadLanguage(req, res, function () { - base.Init(null, 'osd'); base.SetPageSegMode(Module.PSM_OSD_ONLY); - var ptr = setImage(Module, base, req.image); + var ptr = setImage(Module, base, req.image), + results = new Module.OSResults(); - var results = new Module.OSResults(); - var success = base.DetectOS(results); - if (!success) { + if (!base.DetectOS(results)) { base.End(); Module._free(ptr); - res.reject("failed to detect os"); + res.reject("Failed to detect OS"); } else { - var charset = results.get_unicharset(); - - var best = results.get_best_result(); - var oid = best.get_orientation_id(), + var best = results.get_best_result(), + oid = best.get_orientation_id(), sid = best.get_script_id(); - var result = { + base.End(); + Module._free(ptr); + + res.resolve({ tesseract_script_id: sid, - script: charset.get_script_from_script_id(sid), + script: results.get_unicharset().get_script_from_script_id(sid), script_confidence: best.get_sconfidence(), orientation_degrees: [0, 270, 180, 90][oid], orientation_confidence: best.get_oconfidence() - }; - - base.End(); - Module._free(ptr); - - res.resolve(result); + }); } }); }