Browse Source

add worker sauce

pull/12/head
Guillermo 8 years ago
parent
commit
37e6de16bb
  1. 28
      src/worker/detect.js
  2. 5
      src/worker/dump.js
  3. 14
      src/worker/index.js
  4. 24
      src/worker/loadLanguage.js
  5. 26
      src/worker/recognize.js

28
src/worker/detect.js

@ -1,29 +1,29 @@
import desaturate from './desaturate' import desaturate from './desaturate'
import loadLanguage from './loadLanguage' import loadLanguage from './loadLanguage'
export default function detect(jobId, module, base, image, cb){ export default function detect(jobId, image, cb){
var width = image.width, height = image.height; var width = image.width, height = image.height;
image = desaturate(image) image = desaturate(image)
var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL); var ptr = self.module.allocate(image, 'i8', self.module.ALLOC_NORMAL);
// console.log('allocated image') // console.log('allocated image')
loadLanguage(jobId, module, 'osd', err => { loadLanguage(jobId, 'osd', err => {
module._free(ptr); self.module._free(ptr);
cb(err) cb(err)
}, success => { }, success => {
base.Init(null, 'osd') self.base.Init(null, 'osd')
base.SetPageSegMode(module.PSM_OSD_ONLY) self.base.SetPageSegMode(self.module.PSM_OSD_ONLY)
// console.log('loaded language') // console.log('loaded language')
base.SetImage(module.wrapPointer(ptr), width, height, 1, width) self.base.SetImage(self.module.wrapPointer(ptr), width, height, 1, width)
base.SetRectangle(0, 0, width, height) self.base.SetRectangle(0, 0, width, height)
var results = new module.OSResults(); var results = new self.module.OSResults();
var success = base.DetectOS(results); var success = self.base.DetectOS(results);
if(!success){ if(!success){
base.End(); self.base.End();
module._free(ptr); self.module._free(ptr);
cb("failed to detect os") cb("failed to detect os")
} }
else { else {
@ -46,8 +46,8 @@ export default function detect(jobId, module, base, image, cb){
orientation_confidence: best.get_oconfidence() orientation_confidence: best.get_oconfidence()
}) })
base.End(); self.base.End();
module._free(ptr); self.module._free(ptr);
} }
}) })
} }

5
src/worker/dump.js

@ -10,7 +10,10 @@ function deindent(html){
return lines.join('\n') return lines.join('\n')
} }
export default function DumpLiterallyEverything(module, base){ export default function DumpLiterallyEverything(){
var {module, base} = self
var ri = base.GetIterator(); var ri = base.GetIterator();
var blocks = []; var blocks = [];
var block, para, textline, word, symbol; var block, para, textline, word, symbol;

14
src/worker/index.js

@ -1,6 +1,3 @@
import TesseractCore from 'tesseract.js-core'
import pako from 'pako'
import recognize from './recognize' import recognize from './recognize'
import detect from './detect' import detect from './detect'
@ -14,7 +11,8 @@ onmessage = function(e) {
if(action == 'init'){ if(action == 'init'){
module = TesseractCore({ self.langUrl = args.langUrl
self.module = TesseractCore({
TOTAL_MEMORY: args.mem, //must be a multiple of 10 megabytes TOTAL_MEMORY: args.mem, //must be a multiple of 10 megabytes
TesseractProgress(percent){ TesseractProgress(percent){
postMessage({ jobId, postMessage({ jobId,
@ -25,15 +23,15 @@ onmessage = function(e) {
}, },
onRuntimeInitialized() {} onRuntimeInitialized() {}
}) })
module.FS_createPath("/","tessdata",true,true) self.module.FS_createPath("/","tessdata",true,true)
base = new module.TessBaseAPI() self.base = new self.module.TessBaseAPI()
} else if(action === 'recognize'){ } else if(action === 'recognize'){
var {image, options} = args var {image, options} = args
recognize(jobId, module, base, image, options, recognize(jobId, image, options,
(error, result) => postMessage({jobId, error, result})) (error, result) => postMessage({jobId, error, result}))
} else if(action === 'detect'){ } else if(action === 'detect'){
detect(jobId, module, base, args.image, detect(jobId, args.image,
(error, result) => postMessage({jobId, error, result})) (error, result) => postMessage({jobId, error, result}))
} }
} }

24
src/worker/loadLanguage.js

@ -1,12 +1,12 @@
import pako from 'pako' import {ungzip} from 'pako'
import db from './db' import db from './db'
import fileSizes from './fileSizes' import fileSizes from './fileSizes'
function getLanguageData(lang, progress, cb, url='https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'+lang+'.traineddata.gz'){ function getLanguageData(lang, progress, cb){
var xhr = new XMLHttpRequest(); var xhr = new XMLHttpRequest();
xhr.responseType = 'arraybuffer'; xhr.responseType = 'arraybuffer';
xhr.open('GET', url, true); xhr.open('GET', self.LANG_URL + lang + '.traineddata.gz', true);
xhr.onerror = e => { xhr.onerror = e => {
xhr.onprogress = xhr.onload = null xhr.onprogress = xhr.onload = null
cb(xhr, null) cb(xhr, null)
} }
@ -20,7 +20,7 @@ function getLanguageData(lang, progress, cb, url='https://cdn.rawgit.com/naptha/
progress({'unzipping_lang_model': true}) progress({'unzipping_lang_model': true})
var response = new Uint8Array(xhr.response) var response = new Uint8Array(xhr.response)
while(response[0] == 0x1f && response[1] == 0x8b) response = pako.ungzip(response); while(response[0] == 0x1f && response[1] == 0x8b) response = ungzip(response);
progress({ progress({
'unzipped_lang_model': true, 'unzipped_lang_model': true,
@ -40,7 +40,7 @@ function getLanguageData(lang, progress, cb, url='https://cdn.rawgit.com/naptha/
} }
function load(lang, jobId, cb, url){ function load(lang, jobId, cb){
console.log('loadLanguage jobId', jobId) console.log('loadLanguage jobId', jobId)
@ -69,13 +69,13 @@ function load(lang, jobId, cb, url){
db.open({compression: false}, err => { db.open({compression: false}, err => {
if (err) return getLanguageData(lang, progressMessage, createDataFile, url); if (err) return getLanguageData(lang, progressMessage, createDataFile);
db.get(lang, (err, data) => { db.get(lang, (err, data) => {
if (err) return getLanguageData(lang, progressMessage, createDataFileCached, url) if (err) return getLanguageData(lang, progressMessage, createDataFileCached)
while(data[0] == 0x1f && data[1] == 0x8b) data = pako.ungzip(data); while(data[0] == 0x1f && data[1] == 0x8b) data = ungzip(data);
progressMessage({ loaded_lang_model: lang, from_cache: true }) progressMessage({ loaded_lang_model: lang, from_cache: true })
@ -86,14 +86,14 @@ function load(lang, jobId, cb, url){
var loaded_langs = [] var loaded_langs = []
export default function loadLanguage(jobId, module, lang, error, success){ export default function loadLanguage(jobId, lang, error, success){
if(loaded_langs.indexOf(lang) == -1) load(lang, jobId, function(err, result){ if(loaded_langs.indexOf(lang) == -1) load(lang, jobId, function(err, result){
if(err) return error(err) if(err) return error(err)
loaded_langs.push(lang) loaded_langs.push(lang)
module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false); self.module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false);
success() success()
}) })
else run(); else success();
} }

26
src/worker/recognize.js

@ -5,7 +5,7 @@ import dump from './dump'
var loaded_langs = [] var loaded_langs = []
export default function recognize(jobId, module, base, image, options, cb){ export default function recognize(jobId, image, options, cb){
console.log('recognize id', jobId) console.log('recognize id', jobId)
var {lang} = options var {lang} = options
@ -13,13 +13,13 @@ export default function recognize(jobId, module, base, image, options, cb){
image = desaturate(image) image = desaturate(image)
var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL); var ptr = self.module.allocate(image, 'i8', self.module.ALLOC_NORMAL);
loadLanguage(jobId, module, lang, err => { loadLanguage(jobId, lang, err => {
module._free(ptr) self.module._free(ptr)
cb(err) cb(err)
}, success => { }, success => {
base.Init(null, lang) self.base.Init(null, lang)
postMessage({ postMessage({
jobId, jobId,
@ -30,7 +30,7 @@ export default function recognize(jobId, module, base, image, options, cb){
for (var option in options) { for (var option in options) {
if (options.hasOwnProperty(option)) { if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]); self.base.SetVariable(option, options[option]);
postMessage({ postMessage({
jobId: jobId, jobId: jobId,
'progress': { 'progress': {
@ -44,13 +44,13 @@ export default function recognize(jobId, module, base, image, options, cb){
} }
base.SetImage(module.wrapPointer(ptr), width, height, 1, width) self.base.SetImage(self.module.wrapPointer(ptr), width, height, 1, width)
base.SetRectangle(0, 0, width, height) self.base.SetRectangle(0, 0, width, height)
// base.GetUTF8Text() // self.base.GetUTF8Text()
base.Recognize(null) self.base.Recognize(null)
var everything = circularize(dump(module, base)) var everything = circularize(dump())
base.End(); self.base.End();
module._free(ptr); self.module._free(ptr);
cb(null, everything) cb(null, everything)
}) })
} }
Loading…
Cancel
Save