diff --git a/dist/tesseract.js b/dist/tesseract.js index 4bc3575..117dbd4 100644 --- a/dist/tesseract.js +++ b/dist/tesseract.js @@ -3,7 +3,8 @@ exports.defaultOptions = { langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', - workerPath: 'dist/worker.js', + // workerPath: 'dist/worker.js', + workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.0/dist/worker.js', tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js' }; @@ -73,6 +74,13 @@ function loadImage(image, cb) { },{}],2:[function(require,module,exports){ "use strict"; +// The result of dump.js is a big JSON tree +// which can be easily serialized (for instance +// to be sent from a webworker to the main app +// or through Node's IPC), but we want +// a (circular) DOM-like interface for walking +// through the data. + module.exports = function circularize(page) { page.paragraphs = []; page.lines = []; diff --git a/dist/worker.js b/dist/worker.js index d68d7d1..dd3f60b 100644 --- a/dist/worker.js +++ b/dist/worker.js @@ -11937,8 +11937,6 @@ global.addEventListener('message', function (e) { }); }); -exports.getLanguageData = require('./lang.js'); - exports.getCore = function (req, res) { if (!global.TesseractCore) { res.progress({ status: 'loading tesseract core' }); @@ -11948,12 +11946,16 @@ exports.getCore = function (req, res) { return TesseractCore; }; +exports.getLanguageData = require('./lang.js'); + workerUtils.setAdapter(module.exports); }).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) },{"../common/worker.js":47,"./lang.js":42}],44:[function(require,module,exports){ 'use strict'; +// This converts an image to grayscale + module.exports = function desaturate(image) { var width, height; if (image.data) { @@ -11975,25 +11977,13 @@ module.exports = function desaturate(image) { dst[j] = (src[i] * 77 + src[i + 1] * 151 + src[i + 2] * 28) * src[i + 3] + (255 - src[i + 3] << 15) + 32768 >> 16; }image = dst; } else { - throw 'Expected ImageData'; + throw 'Invalid ImageData'; } return image; }; },{}],45:[function(require,module,exports){ -"use strict"; - -function deindent(html) { - var lines = html.split('\n'); - if (lines[0].substring(0, 2) === " ") { - for (var i = 0; i < lines.length; i++) { - if (lines[i].substring(0, 2) === " ") { - lines[i] = lines[i].slice(2); - } - }; - } - return lines.join('\n'); -} +'use strict'; module.exports = function DumpLiterallyEverything(Module, base) { var ri = base.GetIterator(); @@ -12148,6 +12138,21 @@ module.exports = function DumpLiterallyEverything(Module, base) { }; }; +// the generated HOCR is excessively indented, so +// we get rid of that indentation + +function deindent(html) { + var lines = html.split('\n'); + if (lines[0].substring(0, 2) === " ") { + for (var i = 0; i < lines.length; i++) { + if (lines[i].substring(0, 2) === " ") { + lines[i] = lines[i].slice(2); + } + }; + } + return lines.join('\n'); +} + },{}],46:[function(require,module,exports){ module.exports={"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922} },{}],47:[function(require,module,exports){ diff --git a/src/browser/index.js b/src/browser/index.js index 7ac97fa..1518353 100644 --- a/src/browser/index.js +++ b/src/browser/index.js @@ -1,6 +1,7 @@ exports.defaultOptions = { langPath: 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', - workerPath: 'dist/worker.js', + // workerPath: 'dist/worker.js', + workerPath: 'https://cdn.rawgit.com/naptha/tesseract.js/0.1.0/dist/worker.js', tesseractPath: 'https://cdn.rawgit.com/naptha/tesseract.js-core/0.1.0/index.js', }