diff --git a/README.md b/README.md
index 84e5145..b555448 100644
--- a/README.md
+++ b/README.md
@@ -44,12 +44,115 @@ worker.recognize('#my-image')
## npm
-###TODO
+### TODO
+# Docs
+## Tesseract.recognize(image) -> [TesseractJob](#tesseractjob)
+Returns a TesseractJob whose `then` method can be used to act on the result of the OCR.
+
+For example:
+
+`image` can be
+ - an `img` element or querySelector that matches an `img` element
+ - a `video` element or querySelector that matches a `video` element
+ - a `canvas` element or querySelector that matches a `canvas` element
+ - a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`)
+ - the absolute `url` of an image from the same website that is running your script. Browser security policies don't allow access to the content of images from other websites :(
+ -
+
+## Tesseract.detect(image) -> [TesseractJob](#tesseractjob)
+Returns a TesseractJob whose `then` method can be used to act on the result of the OCR.
+
+For example:
+
+`image` can be
+ - an `img` element or querySelector that matches an `img` element
+ - a `video` element or querySelector that matches a `video` element
+ - a `canvas` element or querySelector that matches a `canvas` element
+ - a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`)
+ - the absolute `url` of an image from the same website that is running your script. Browser security policies don't allow access to the content of images from other websites :(
+
+## TesseractJob
+A TesseractJob is an an object returned by a call to recognize or detect.
+All methods of a TesseractJob return itself to enable chaining.
+
+Typical use is:
+```javascript
+var job1 = Tesseract.recognize('#my-image')
+
+job1.progress(function(message){console.log(message)})
+ .error(function(err){console.error(err)})
+ .then(function(result){console.log(result)})
+```
+
+or more concisely:
+```javascript
+Tesseract.recognize('#my-image')
+ .progress(function(message){console.log(message)})
+ .error(function(err){console.error(err)})
+ .then(function(result){console.log(result)})
+```
+
+
+### TesseractJob.progress(callback: function) -> TesseractJob
+Sets `callback` as the function that will be called every time the job progresses.
+
+`callback` is a function with the signature `callback(progress)` where progress is json object.
+
+For example:
+```javascript
+Tesseract.recognize('#my-image')
+ .progress(function(message){console.log('progress is: 'message)})
+```
+
+The console will show something like:
+```javascript
+progress is: {loaded_lang_model: "eng", from_cache: true}
+progress is: {initialized_with_lang: "eng"}
+progress is: {set_variable: Object}
+progress is: {set_variable: Object}
+progress is: {recognized: 0}
+progress is: {recognized: 0.3}
+progress is: {recognized: 0.6}
+progress is: {recognized: 0.9}
+progress is: {recognized: 1}
+```
+
+
+### TesseractJob.then(callback: function) -> TesseractJob
+Sets `callback` as the function that will be called if and when the job successfully completes.
+
+For example:
+```javascript
+Tesseract.recognize('#my-image')
+ .then(function(result){console.log('result is: 'result)})
+```
+
+The console will show something like:
+```javascript
+progress is: {
+ blocks: Array[1]
+ confidence: 87
+ html: "
TesseractJob
+Sets `callback` as the function that will be called if and when the job successfully completes.
+
+
# Contributing
## Development
To run a development copy of tesseract.js, first clone this repo.
diff --git a/index.html b/index.html
index 63a7d64..6a1cc93 100644
--- a/index.html
+++ b/index.html
@@ -16,15 +16,15 @@
var tesseract = createTesseractWorker();
+ tesseract.detect(canvas)
// tesseract.recognize('http://localhost:7355/westmorland.jpg')
- tesseract.recognize(canvas, {
- tessedit_char_blacklist: 'e'
- })
+ // tesseract.recognize(canvas, {
+ // tessedit_char_blacklist: 'e'
+ // })
.progress(function(e){
console.log('progress', e)
})
.then(function(e){
console.log('result', e)
})
-
\ No newline at end of file
diff --git a/src/worker/detect.js b/src/worker/detect.js
index 1b980a2..0bbc4e0 100644
--- a/src/worker/detect.js
+++ b/src/worker/detect.js
@@ -1,54 +1,53 @@
+import desaturate from './desaturate'
+import loadLanguage from './loadLanguage'
+
export default function detect(jobId, module, base, image, cb){
var width = image.width, height = image.height;
image = desaturate(image)
var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL);
- console.log('allocated image')
- // base = new module.TessBaseAPI()
+ // console.log('allocated image')
+
+ loadLanguage(jobId, module, 'osd', err => {
+ module._free(ptr);
+ cb(err)
+ }, success => {
+ base.Init(null, 'osd')
+ base.SetPageSegMode(module.PSM_OSD_ONLY)
+ // console.log('loaded language')
+
+ base.SetImage(module.wrapPointer(ptr), width, height, 1, width)
+ base.SetRectangle(0, 0, width, height)
- loadLanguage('osd', jobId, function(err, result){
- if(err){
+ var results = new module.OSResults();
+ var success = base.DetectOS(results);
+ if(!success){
+ base.End();
module._free(ptr);
- cb(err)
+ cb("failed to detect os")
}
else {
- base.Init(null, 'osd')
- base.SetPageSegMode(module.PSM_OSD_ONLY)
- console.log('loaded language')
-
- base.SetImage(module.wrapPointer(ptr), width, height, 1, width)
- base.SetRectangle(0, 0, width, height)
+ var charset = results.get_unicharset()
+ // console.log(charset)
+ // results.print_scores()
- var results = new module.OSResults();
- var success = base.DetectOS(results);
- if(!success){
- base.End();
- module._free(ptr);
- cb("failed to detect os")
- }
- else {
- var charset = results.get_unicharset()
- console.log(charset)
- // results.print_scores()
+ var best = results.get_best_result()
+ var oid = best.get_orientation_id(),
+ sid = best.get_script_id();
+ // console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence())
+ // console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence())
+ // console.log(best)
- var best = results.get_best_result()
- var oid = best.get_orientation_id(),
- sid = best.get_script_id();
- // console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence())
- // console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence())
- // console.log(best)
+ cb(null, {
+ tesseract_script_id: sid,
+ script: charset.get_script_from_script_id(sid),
+ script_confidence: best.get_sconfidence(),
+ orientation_degrees: [0, 270, 180, 90][oid],
+ orientation_confidence: best.get_oconfidence()
+ })
- cb(null, {
- tesseract_script_id: sid,
- script: charset.get_script_from_script_id(sid),
- script_confidence: best.get_sconfidence(),
- orientation_degrees: [0, 270, 180, 90][oid],
- orientation_confidence: best.get_oconfidence()
- })
-
- base.End();
- module._free(ptr);
- }
+ base.End();
+ module._free(ptr);
}
})
}
\ No newline at end of file
diff --git a/src/worker/loadLanguage.js b/src/worker/loadLanguage.js
index 386b08e..f79080e 100644
--- a/src/worker/loadLanguage.js
+++ b/src/worker/loadLanguage.js
@@ -39,14 +39,11 @@ function getLanguageData(lang, progress, cb, url='https://cdn.rawgit.com/naptha/
xhr.send()
}
-// var loaded_langs = []
-export default function loadLanguage(lang, jobId, cb, url){
+function load(lang, jobId, cb, url){
console.log('loadLanguage jobId', jobId)
- // if(loaded_langs.indexOf(lang) != -1) return cb(null, lang);
-
function progressMessage(progress){
postMessage({ jobId, progress })
}
@@ -85,4 +82,18 @@ export default function loadLanguage(lang, jobId, cb, url){
cb(null, data)
})
})
-}
\ No newline at end of file
+}
+
+var loaded_langs = []
+
+export default function loadLanguage(jobId, module, lang, error, success){
+ if(loaded_langs.indexOf(lang) == -1) load(lang, jobId, function(err, result){
+ if(err) return error(err)
+
+ loaded_langs.push(lang)
+ module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false);
+
+ success()
+ })
+ else run();
+}
diff --git a/src/worker/recognize.js b/src/worker/recognize.js
index f1107b9..238bfba 100644
--- a/src/worker/recognize.js
+++ b/src/worker/recognize.js
@@ -15,8 +15,10 @@ export default function recognize(jobId, module, base, image, options, cb){
var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL);
-
- function run() {
+ loadLanguage(jobId, module, lang, err => {
+ module._free(ptr)
+ cb(err)
+ }, success => {
base.Init(null, lang)
postMessage({
@@ -50,22 +52,5 @@ export default function recognize(jobId, module, base, image, options, cb){
base.End();
module._free(ptr);
cb(null, everything)
- }
-
-
-
- if(loaded_langs.indexOf(lang) == -1) loadLanguage(lang, jobId, function(err, result){
-
- if(err){
- console.error("error loading", lang);
- module._free(ptr);
- return cb(err, null);
- }
-
- loaded_langs.push(lang)
- module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false);
- run()
-
- })
- else run();
+ })
}
\ No newline at end of file