Browse Source

started working oden

pull/12/head
Guillermo 10 years ago
parent
commit
61f98fb036
  1. 17
      lib/Tesseract.js
  2. 11688
      worker/worker.js
  3. 36
      worker_src/worker.js

17
lib/Tesseract.js

@ -2,11 +2,14 @@ var Tesseract = (function(){
var Tesseract = {} var Tesseract = {}
var blob = new Blob(["importScripts('http://localhost:1234/master/worker/worker.js');"]); //https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js
//https://rawgit.com/naptha/tesseract.js/master/worker/worker.js for testing
var blob = new Blob(["importScripts('https://cdn.rawgit.com/naptha/tessdata/worker/worker.js');"]);
console.log('localhost') console.log('localhost')
var worker = new Worker(window.URL.createObjectURL(blob)); var worker = new Worker(window.URL.createObjectURL(blob));
worker.postMessage({init: {mem: 16777216*6}})
console.log(worker) var bigworker = false
var index = 0 var index = 0
var handlers = [] var handlers = []
@ -77,10 +80,16 @@ var Tesseract = (function(){
Tesseract.recognize = function(image, options, callback){ Tesseract.recognize = function(image, options, callback){
var lang = options.lang var lang = options.lang
if(typeof lang === "undefined"){ if (typeof lang === "undefined"){
lang = 'eng' lang = 'eng'
} }
if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(lang) != -1){
worker.postMessage({init: {mem: 16777216*10}})
bigworker = true
console.log('started big worker')
}
if (typeof options === 'string') { if (typeof options === 'string') {
lang = options lang = options
options = {} options = {}

11688
worker/worker.js

File diff suppressed because it is too large Load Diff

36
worker_src/worker.js

@ -1,3 +1,4 @@
var Tesseract304 = require('tesseract')
var leveljs = require('level-js') var leveljs = require('level-js')
var db; var db;
if (typeof indexedDB === 'undefined'){ if (typeof indexedDB === 'undefined'){
@ -13,12 +14,14 @@ var filesizes = {"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820,
var pako = require('pako') var pako = require('pako')
var T = (function createTesseractInstance(){ var T;
var tesseractinit = (function createTesseractInstance(memory){
curindex = 0 curindex = 0
var Module = Tesseract304({ var Module = Tesseract304({
TOTAL_MEMORY: 6*16777216, //must be a multiple of 10 megabytes TOTAL_MEMORY: memory, //must be a multiple of 10 megabytes
TesseractProgress: function(percent){ TesseractProgress: function(percent){
postMessage({ postMessage({
index: curindex, index: curindex,
@ -341,7 +344,7 @@ var T = (function createTesseractInstance(){
return { return {
text: base.GetUTF8Text(), text: base.GetUTF8Text(),
html: base.GetHOCRText(), html: deindent(base.GetHOCRText()),
confidence: base.MeanTextConf(), confidence: base.MeanTextConf(),
@ -353,6 +356,18 @@ var T = (function createTesseractInstance(){
} }
} }
function deindent(html){
var lines = html.split('\n')
if(lines[0].substring(0,2) === " "){
for (var i = 0; i < lines.length; i++) {
if (lines[i].substring(0,2) === " ") {
lines[i] = lines[i].slice(2)
}
};
}
return lines.join('\n')
}
function desaturate(image){ function desaturate(image){
var width, height; var width, height;
if(image.data){ if(image.data){
@ -413,8 +428,9 @@ var T = (function createTesseractInstance(){
if (options.hasOwnProperty(option)) { if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]); base.SetVariable(option, options[option]);
postMessage({ postMessage({
progress: { index: index,
set_variable: { 'progress': {
'set_variable': {
variable: option, variable: option,
value: options[option] value: options[option]
} }
@ -485,7 +501,7 @@ var T = (function createTesseractInstance(){
orientation_degrees: [0, 270, 180, 90][oid], orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence() orientation_confidence: best.get_oconfidence()
}) })
base.End(); base.End();
Module._free(ptr); Module._free(ptr);
} }
@ -497,10 +513,14 @@ var T = (function createTesseractInstance(){
recognize: recognize, recognize: recognize,
detect: detect detect: detect
} }
})() })
onmessage = function(e) { onmessage = function(e) {
if(e.data.fun === 'recognize'){
if(e.data.init){
T = tesseractinit(e.data.init.mem)
}
else if(e.data.fun === 'recognize'){
T.recognize(e.data.index, e.data.image, e.data.lang, e.data.options, function(err, result){ T.recognize(e.data.index, e.data.image, e.data.lang, e.data.options, function(err, result){
postMessage({index: e.data.index, err:err, result: result}) postMessage({index: e.data.index, err:err, result: result})
}) })

Loading…
Cancel
Save