diff --git a/src/browser/worker.js b/src/browser/worker.js index 58d8bb6..33e8ff9 100644 --- a/src/browser/worker.js +++ b/src/browser/worker.js @@ -7,8 +7,6 @@ global.addEventListener('message', function(e){ workerUtils.dispatchHandlers(packet, obj => postMessage(obj)) }) -exports.getLanguageData = require('./lang.js') - exports.getCore = function(req, res){ if(!global.TesseractCore){ res.progress({ status: 'loading tesseract core' }) @@ -18,4 +16,6 @@ exports.getCore = function(req, res){ return TesseractCore } +exports.getLanguageData = require('./lang.js') + workerUtils.setAdapter(module.exports); diff --git a/src/common/circularize.js b/src/common/circularize.js index 91d3ab2..804bdc2 100644 --- a/src/common/circularize.js +++ b/src/common/circularize.js @@ -1,3 +1,10 @@ +// The result of dump.js is a big JSON tree +// which can be easily serialized (for instance +// to be sent from a webworker to the main app +// or through Node's IPC), but we want +// a (circular) DOM-like interface for walking +// through the data. + module.exports = function circularize(page){ page.paragraphs = [] page.lines = [] diff --git a/src/common/desaturate.js b/src/common/desaturate.js index 8b7a909..ab83054 100644 --- a/src/common/desaturate.js +++ b/src/common/desaturate.js @@ -1,8 +1,11 @@ +// This converts an image to grayscale + module.exports = function desaturate(image){ var width, height; if(image.data){ var src = image.data; - width = image.width, height = image.height; + width = image.width, + height = image.height; var dst = new Uint8Array(width * height); var srcLength = src.length | 0, srcLength_16 = (srcLength - 16) | 0; @@ -16,8 +19,6 @@ module.exports = function desaturate(image){ for (; i < srcLength; i += 4, ++j) //finish up dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16 image = dst; - } else { - throw 'Expected ImageData' - } + } else { throw 'Invalid ImageData' } return image } \ No newline at end of file diff --git a/src/common/dump.js b/src/common/dump.js index b04ec09..9019077 100644 --- a/src/common/dump.js +++ b/src/common/dump.js @@ -1,15 +1,3 @@ -function deindent(html){ - var lines = html.split('\n') - if(lines[0].substring(0,2) === " "){ - for (var i = 0; i < lines.length; i++) { - if (lines[i].substring(0,2) === " ") { - lines[i] = lines[i].slice(2) - } - }; - } - return lines.join('\n') -} - module.exports = function DumpLiterallyEverything(Module, base){ var ri = base.GetIterator(); var blocks = []; @@ -158,4 +146,19 @@ module.exports = function DumpLiterallyEverything(Module, base){ oem: enumToString(base.oem(), 'OEM'), version: base.Version(), } -} \ No newline at end of file +} + +// the generated HOCR is excessively indented, so +// we get rid of that indentation + +function deindent(html){ + var lines = html.split('\n') + if(lines[0].substring(0, 2) === " "){ + for (var i = 0; i < lines.length; i++) { + if (lines[i].substring(0,2) === " ") { + lines[i] = lines[i].slice(2) + } + }; + } + return lines.join('\n') +} diff --git a/src/node/index.js b/src/node/index.js index 17e4db8..a894df9 100644 --- a/src/node/index.js +++ b/src/node/index.js @@ -1,4 +1,5 @@ var path = require('path') + exports.defaultOptions = { workerPath: path.join(__dirname, 'worker.js'), langPath: 'http://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/', diff --git a/src/node/worker.js b/src/node/worker.js index e05be34..56d0eab 100644 --- a/src/node/worker.js +++ b/src/node/worker.js @@ -6,9 +6,6 @@ process.on('message', function(packet){ workerUtils.dispatchHandlers(packet, obj => process.send(obj)) }) -exports.getLanguageData = require('./lang.js') - - var TesseractCore; exports.getCore = function(req, res){ if(!TesseractCore){ @@ -19,4 +16,6 @@ exports.getCore = function(req, res){ return TesseractCore } +exports.getLanguageData = require('./lang.js') + workerUtils.setAdapter(module.exports);