Browse Source

started working oden

pull/12/head
Guillermo 10 years ago
parent
commit
71fbe3d8ea
  1. 17
      lib/Tesseract.js
  2. 36
      worker_src/worker.js

17
lib/Tesseract.js

@ -2,11 +2,14 @@ var Tesseract = (function(){ @@ -2,11 +2,14 @@ var Tesseract = (function(){
var Tesseract = {}
var blob = new Blob(["importScripts('http://localhost:1234/master/worker/worker.js');"]);
//https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js
//https://rawgit.com/naptha/tesseract.js/master/worker/worker.js for testing
var blob = new Blob(["importScripts('https://cdn.rawgit.com/naptha/tessdata/worker/worker.js');"]);
console.log('localhost')
var worker = new Worker(window.URL.createObjectURL(blob));
console.log(worker)
worker.postMessage({init: {mem: 16777216*6}})
var bigworker = false
var index = 0
var handlers = []
@ -77,10 +80,16 @@ var Tesseract = (function(){ @@ -77,10 +80,16 @@ var Tesseract = (function(){
Tesseract.recognize = function(image, options, callback){
var lang = options.lang
if(typeof lang === "undefined"){
if (typeof lang === "undefined"){
lang = 'eng'
}
if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(lang) != -1){
worker.postMessage({init: {mem: 16777216*10}})
bigworker = true
console.log('started big worker')
}
if (typeof options === 'string') {
lang = options
options = {}

36
worker_src/worker.js

@ -1,3 +1,4 @@ @@ -1,3 +1,4 @@
var Tesseract304 = require('tesseract')
var leveljs = require('level-js')
var db;
if (typeof indexedDB === 'undefined'){
@ -13,12 +14,14 @@ var filesizes = {"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, @@ -13,12 +14,14 @@ var filesizes = {"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820,
var pako = require('pako')
var T = (function createTesseractInstance(){
var T;
var tesseractinit = (function createTesseractInstance(memory){
curindex = 0
var Module = Tesseract304({
TOTAL_MEMORY: 6*16777216, //must be a multiple of 10 megabytes
TOTAL_MEMORY: memory, //must be a multiple of 10 megabytes
TesseractProgress: function(percent){
postMessage({
index: curindex,
@ -341,7 +344,7 @@ var T = (function createTesseractInstance(){ @@ -341,7 +344,7 @@ var T = (function createTesseractInstance(){
return {
text: base.GetUTF8Text(),
html: base.GetHOCRText(),
html: deindent(base.GetHOCRText()),
confidence: base.MeanTextConf(),
@ -353,6 +356,18 @@ var T = (function createTesseractInstance(){ @@ -353,6 +356,18 @@ var T = (function createTesseractInstance(){
}
}
function deindent(html){
var lines = html.split('\n')
if(lines[0].substring(0,2) === " "){
for (var i = 0; i < lines.length; i++) {
if (lines[i].substring(0,2) === " ") {
lines[i] = lines[i].slice(2)
}
};
}
return lines.join('\n')
}
function desaturate(image){
var width, height;
if(image.data){
@ -413,8 +428,9 @@ var T = (function createTesseractInstance(){ @@ -413,8 +428,9 @@ var T = (function createTesseractInstance(){
if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]);
postMessage({
progress: {
set_variable: {
index: index,
'progress': {
'set_variable': {
variable: option,
value: options[option]
}
@ -485,7 +501,7 @@ var T = (function createTesseractInstance(){ @@ -485,7 +501,7 @@ var T = (function createTesseractInstance(){
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
})
base.End();
Module._free(ptr);
}
@ -497,10 +513,14 @@ var T = (function createTesseractInstance(){ @@ -497,10 +513,14 @@ var T = (function createTesseractInstance(){
recognize: recognize,
detect: detect
}
})()
})
onmessage = function(e) {
if(e.data.fun === 'recognize'){
if(e.data.init){
T = tesseractinit(e.data.init.mem)
}
else if(e.data.fun === 'recognize'){
T.recognize(e.data.index, e.data.image, e.data.lang, e.data.options, function(err, result){
postMessage({index: e.data.index, err:err, result: result})
})

Loading…
Cancel
Save