diff --git a/README.md b/README.md index 84e5145..b555448 100644 --- a/README.md +++ b/README.md @@ -44,12 +44,115 @@ worker.recognize('#my-image') ## npm -###TODO +### TODO +# Docs +## Tesseract.recognize(image) -> [TesseractJob](#tesseractjob) +Returns a TesseractJob whose `then` method can be used to act on the result of the OCR. + +For example: + +`image` can be + - an `img` element or querySelector that matches an `img` element + - a `video` element or querySelector that matches a `video` element + - a `canvas` element or querySelector that matches a `canvas` element + - a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`) + - the absolute `url` of an image from the same website that is running your script. Browser security policies don't allow access to the content of images from other websites :( + - + +## Tesseract.detect(image) -> [TesseractJob](#tesseractjob) +Returns a TesseractJob whose `then` method can be used to act on the result of the OCR. + +For example: + +`image` can be + - an `img` element or querySelector that matches an `img` element + - a `video` element or querySelector that matches a `video` element + - a `canvas` element or querySelector that matches a `canvas` element + - a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`) + - the absolute `url` of an image from the same website that is running your script. Browser security policies don't allow access to the content of images from other websites :( + +## TesseractJob +A TesseractJob is an an object returned by a call to recognize or detect. +All methods of a TesseractJob return itself to enable chaining. + +Typical use is: +```javascript +var job1 = Tesseract.recognize('#my-image') + +job1.progress(function(message){console.log(message)}) + .error(function(err){console.error(err)}) + .then(function(result){console.log(result)}) +``` + +or more concisely: +```javascript +Tesseract.recognize('#my-image') + .progress(function(message){console.log(message)}) + .error(function(err){console.error(err)}) + .then(function(result){console.log(result)}) +``` + + +### TesseractJob.progress(callback: function) -> TesseractJob +Sets `callback` as the function that will be called every time the job progresses. + +`callback` is a function with the signature `callback(progress)` where progress is json object. + +For example: +```javascript +Tesseract.recognize('#my-image') + .progress(function(message){console.log('progress is: 'message)}) +``` + +The console will show something like: +```javascript +progress is: {loaded_lang_model: "eng", from_cache: true} +progress is: {initialized_with_lang: "eng"} +progress is: {set_variable: Object} +progress is: {set_variable: Object} +progress is: {recognized: 0} +progress is: {recognized: 0.3} +progress is: {recognized: 0.6} +progress is: {recognized: 0.9} +progress is: {recognized: 1} +``` + + +### TesseractJob.then(callback: function) -> TesseractJob +Sets `callback` as the function that will be called if and when the job successfully completes. + +For example: +```javascript +Tesseract.recognize('#my-image') + .then(function(result){console.log('result is: 'result)}) +``` + +The console will show something like: +```javascript +progress is: { + blocks: Array[1] + confidence: 87 + html: "
TesseractJob +Sets `callback` as the function that will be called if and when the job successfully completes. + + # Contributing ## Development To run a development copy of tesseract.js, first clone this repo. diff --git a/index.html b/index.html index 63a7d64..6a1cc93 100644 --- a/index.html +++ b/index.html @@ -16,15 +16,15 @@ var tesseract = createTesseractWorker(); + tesseract.detect(canvas) // tesseract.recognize('http://localhost:7355/westmorland.jpg') - tesseract.recognize(canvas, { - tessedit_char_blacklist: 'e' - }) + // tesseract.recognize(canvas, { + // tessedit_char_blacklist: 'e' + // }) .progress(function(e){ console.log('progress', e) }) .then(function(e){ console.log('result', e) }) - \ No newline at end of file diff --git a/src/worker/detect.js b/src/worker/detect.js index 1b980a2..0bbc4e0 100644 --- a/src/worker/detect.js +++ b/src/worker/detect.js @@ -1,54 +1,53 @@ +import desaturate from './desaturate' +import loadLanguage from './loadLanguage' + export default function detect(jobId, module, base, image, cb){ var width = image.width, height = image.height; image = desaturate(image) var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL); - console.log('allocated image') - // base = new module.TessBaseAPI() + // console.log('allocated image') + + loadLanguage(jobId, module, 'osd', err => { + module._free(ptr); + cb(err) + }, success => { + base.Init(null, 'osd') + base.SetPageSegMode(module.PSM_OSD_ONLY) + // console.log('loaded language') + + base.SetImage(module.wrapPointer(ptr), width, height, 1, width) + base.SetRectangle(0, 0, width, height) - loadLanguage('osd', jobId, function(err, result){ - if(err){ + var results = new module.OSResults(); + var success = base.DetectOS(results); + if(!success){ + base.End(); module._free(ptr); - cb(err) + cb("failed to detect os") } else { - base.Init(null, 'osd') - base.SetPageSegMode(module.PSM_OSD_ONLY) - console.log('loaded language') - - base.SetImage(module.wrapPointer(ptr), width, height, 1, width) - base.SetRectangle(0, 0, width, height) + var charset = results.get_unicharset() + // console.log(charset) + // results.print_scores() - var results = new module.OSResults(); - var success = base.DetectOS(results); - if(!success){ - base.End(); - module._free(ptr); - cb("failed to detect os") - } - else { - var charset = results.get_unicharset() - console.log(charset) - // results.print_scores() + var best = results.get_best_result() + var oid = best.get_orientation_id(), + sid = best.get_script_id(); + // console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence()) + // console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence()) + // console.log(best) - var best = results.get_best_result() - var oid = best.get_orientation_id(), - sid = best.get_script_id(); - // console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence()) - // console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence()) - // console.log(best) + cb(null, { + tesseract_script_id: sid, + script: charset.get_script_from_script_id(sid), + script_confidence: best.get_sconfidence(), + orientation_degrees: [0, 270, 180, 90][oid], + orientation_confidence: best.get_oconfidence() + }) - cb(null, { - tesseract_script_id: sid, - script: charset.get_script_from_script_id(sid), - script_confidence: best.get_sconfidence(), - orientation_degrees: [0, 270, 180, 90][oid], - orientation_confidence: best.get_oconfidence() - }) - - base.End(); - module._free(ptr); - } + base.End(); + module._free(ptr); } }) } \ No newline at end of file diff --git a/src/worker/loadLanguage.js b/src/worker/loadLanguage.js index 386b08e..f79080e 100644 --- a/src/worker/loadLanguage.js +++ b/src/worker/loadLanguage.js @@ -39,14 +39,11 @@ function getLanguageData(lang, progress, cb, url='https://cdn.rawgit.com/naptha/ xhr.send() } -// var loaded_langs = [] -export default function loadLanguage(lang, jobId, cb, url){ +function load(lang, jobId, cb, url){ console.log('loadLanguage jobId', jobId) - // if(loaded_langs.indexOf(lang) != -1) return cb(null, lang); - function progressMessage(progress){ postMessage({ jobId, progress }) } @@ -85,4 +82,18 @@ export default function loadLanguage(lang, jobId, cb, url){ cb(null, data) }) }) -} \ No newline at end of file +} + +var loaded_langs = [] + +export default function loadLanguage(jobId, module, lang, error, success){ + if(loaded_langs.indexOf(lang) == -1) load(lang, jobId, function(err, result){ + if(err) return error(err) + + loaded_langs.push(lang) + module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false); + + success() + }) + else run(); +} diff --git a/src/worker/recognize.js b/src/worker/recognize.js index f1107b9..238bfba 100644 --- a/src/worker/recognize.js +++ b/src/worker/recognize.js @@ -15,8 +15,10 @@ export default function recognize(jobId, module, base, image, options, cb){ var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL); - - function run() { + loadLanguage(jobId, module, lang, err => { + module._free(ptr) + cb(err) + }, success => { base.Init(null, lang) postMessage({ @@ -50,22 +52,5 @@ export default function recognize(jobId, module, base, image, options, cb){ base.End(); module._free(ptr); cb(null, everything) - } - - - - if(loaded_langs.indexOf(lang) == -1) loadLanguage(lang, jobId, function(err, result){ - - if(err){ - console.error("error loading", lang); - module._free(ptr); - return cb(err, null); - } - - loaded_langs.push(lang) - module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false); - run() - - }) - else run(); + }) } \ No newline at end of file