Browse Source

change loadlanguage api

pull/12/head
Guillermo 8 years ago
parent
commit
c80038b6cb
  1. 105
      README.md
  2. 8
      index.html
  3. 83
      src/worker/detect.js
  4. 19
      src/worker/loadLanguage.js
  5. 23
      src/worker/recognize.js

105
README.md

@ -44,12 +44,115 @@ worker.recognize('#my-image')
## npm ## npm
###TODO ### TODO
<!-- ```shell <!-- ```shell
> npm install tesseract-js ? > npm install tesseract-js ?
```--> ```-->
# Docs
## Tesseract.recognize(image) -> [TesseractJob](#tesseractjob)
Returns a TesseractJob whose `then` method can be used to act on the result of the OCR.
For example:
`image` can be
- an `img` element or querySelector that matches an `img` element
- a `video` element or querySelector that matches a `video` element
- a `canvas` element or querySelector that matches a `canvas` element
- a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`)
- the absolute `url` of an image from the same website that is running your script. Browser security policies don't allow access to the content of images from other websites :(
-
## Tesseract.detect(image) -> [TesseractJob](#tesseractjob)
Returns a TesseractJob whose `then` method can be used to act on the result of the OCR.
For example:
`image` can be
- an `img` element or querySelector that matches an `img` element
- a `video` element or querySelector that matches a `video` element
- a `canvas` element or querySelector that matches a `canvas` element
- a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`)
- the absolute `url` of an image from the same website that is running your script. Browser security policies don't allow access to the content of images from other websites :(
## TesseractJob
A TesseractJob is an an object returned by a call to recognize or detect.
All methods of a TesseractJob return itself to enable chaining.
Typical use is:
```javascript
var job1 = Tesseract.recognize('#my-image')
job1.progress(function(message){console.log(message)})
.error(function(err){console.error(err)})
.then(function(result){console.log(result)})
```
or more concisely:
```javascript
Tesseract.recognize('#my-image')
.progress(function(message){console.log(message)})
.error(function(err){console.error(err)})
.then(function(result){console.log(result)})
```
### TesseractJob.progress(callback: function) -> TesseractJob
Sets `callback` as the function that will be called every time the job progresses.
`callback` is a function with the signature `callback(progress)` where progress is json object.
For example:
```javascript
Tesseract.recognize('#my-image')
.progress(function(message){console.log('progress is: 'message)})
```
The console will show something like:
```javascript
progress is: {loaded_lang_model: "eng", from_cache: true}
progress is: {initialized_with_lang: "eng"}
progress is: {set_variable: Object}
progress is: {set_variable: Object}
progress is: {recognized: 0}
progress is: {recognized: 0.3}
progress is: {recognized: 0.6}
progress is: {recognized: 0.9}
progress is: {recognized: 1}
```
### TesseractJob.then(callback: function) -> TesseractJob
Sets `callback` as the function that will be called if and when the job successfully completes.
For example:
```javascript
Tesseract.recognize('#my-image')
.then(function(result){console.log('result is: 'result)})
```
The console will show something like:
```javascript
progress is: {
blocks: Array[1]
confidence: 87
html: "<div class='ocr_page' id='page_1' ..."
lines: Array[3]
oem: "DEFAULT"
paragraphs: Array[1]
psm: "SINGLE_BLOCK"
symbols: Array[33]
text: "Hello World↵from beyond↵the Cosmic Void↵↵"
version: "3.04.00"
words: Array[7]
}
```
### TesseractJob.error(callback: function) -> TesseractJob
Sets `callback` as the function that will be called if and when the job successfully completes.
# Contributing # Contributing
## Development ## Development
To run a development copy of tesseract.js, first clone this repo. To run a development copy of tesseract.js, first clone this repo.

8
index.html

@ -16,15 +16,15 @@
var tesseract = createTesseractWorker(); var tesseract = createTesseractWorker();
tesseract.detect(canvas)
// tesseract.recognize('http://localhost:7355/westmorland.jpg') // tesseract.recognize('http://localhost:7355/westmorland.jpg')
tesseract.recognize(canvas, { // tesseract.recognize(canvas, {
tessedit_char_blacklist: 'e' // tessedit_char_blacklist: 'e'
}) // })
.progress(function(e){ .progress(function(e){
console.log('progress', e) console.log('progress', e)
}) })
.then(function(e){ .then(function(e){
console.log('result', e) console.log('result', e)
}) })
</script> </script>

83
src/worker/detect.js

@ -1,54 +1,53 @@
import desaturate from './desaturate'
import loadLanguage from './loadLanguage'
export default function detect(jobId, module, base, image, cb){ export default function detect(jobId, module, base, image, cb){
var width = image.width, height = image.height; var width = image.width, height = image.height;
image = desaturate(image) image = desaturate(image)
var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL); var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL);
console.log('allocated image') // console.log('allocated image')
// base = new module.TessBaseAPI()
loadLanguage(jobId, module, 'osd', err => {
module._free(ptr);
cb(err)
}, success => {
base.Init(null, 'osd')
base.SetPageSegMode(module.PSM_OSD_ONLY)
// console.log('loaded language')
loadLanguage('osd', jobId, function(err, result){ base.SetImage(module.wrapPointer(ptr), width, height, 1, width)
if(err){ base.SetRectangle(0, 0, width, height)
var results = new module.OSResults();
var success = base.DetectOS(results);
if(!success){
base.End();
module._free(ptr); module._free(ptr);
cb(err) cb("failed to detect os")
} }
else { else {
base.Init(null, 'osd') var charset = results.get_unicharset()
base.SetPageSegMode(module.PSM_OSD_ONLY) // console.log(charset)
console.log('loaded language') // results.print_scores()
base.SetImage(module.wrapPointer(ptr), width, height, 1, width) var best = results.get_best_result()
base.SetRectangle(0, 0, width, height) var oid = best.get_orientation_id(),
sid = best.get_script_id();
var results = new module.OSResults(); // console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence())
var success = base.DetectOS(results); // console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence())
if(!success){ // console.log(best)
base.End();
module._free(ptr); cb(null, {
cb("failed to detect os") tesseract_script_id: sid,
} script: charset.get_script_from_script_id(sid),
else { script_confidence: best.get_sconfidence(),
var charset = results.get_unicharset() orientation_degrees: [0, 270, 180, 90][oid],
console.log(charset) orientation_confidence: best.get_oconfidence()
// results.print_scores() })
var best = results.get_best_result() base.End();
var oid = best.get_orientation_id(), module._free(ptr);
sid = best.get_script_id();
// console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence())
// console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence())
// console.log(best)
cb(null, {
tesseract_script_id: sid,
script: charset.get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
})
base.End();
module._free(ptr);
}
} }
}) })
} }

19
src/worker/loadLanguage.js

@ -39,14 +39,11 @@ function getLanguageData(lang, progress, cb, url='https://cdn.rawgit.com/naptha/
xhr.send() xhr.send()
} }
// var loaded_langs = []
export default function loadLanguage(lang, jobId, cb, url){ function load(lang, jobId, cb, url){
console.log('loadLanguage jobId', jobId) console.log('loadLanguage jobId', jobId)
// if(loaded_langs.indexOf(lang) != -1) return cb(null, lang);
function progressMessage(progress){ function progressMessage(progress){
postMessage({ jobId, progress }) postMessage({ jobId, progress })
} }
@ -86,3 +83,17 @@ export default function loadLanguage(lang, jobId, cb, url){
}) })
}) })
} }
var loaded_langs = []
export default function loadLanguage(jobId, module, lang, error, success){
if(loaded_langs.indexOf(lang) == -1) load(lang, jobId, function(err, result){
if(err) return error(err)
loaded_langs.push(lang)
module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false);
success()
})
else run();
}

23
src/worker/recognize.js

@ -15,8 +15,10 @@ export default function recognize(jobId, module, base, image, options, cb){
var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL); var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL);
loadLanguage(jobId, module, lang, err => {
function run() { module._free(ptr)
cb(err)
}, success => {
base.Init(null, lang) base.Init(null, lang)
postMessage({ postMessage({
@ -50,22 +52,5 @@ export default function recognize(jobId, module, base, image, options, cb){
base.End(); base.End();
module._free(ptr); module._free(ptr);
cb(null, everything) cb(null, everything)
}
if(loaded_langs.indexOf(lang) == -1) loadLanguage(lang, jobId, function(err, result){
if(err){
console.error("error loading", lang);
module._free(ptr);
return cb(err, null);
}
loaded_langs.push(lang)
module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false);
run()
}) })
else run();
} }
Loading…
Cancel
Save