Browse Source

change loadlanguage api

pull/12/head
Guillermo 8 years ago
parent
commit
c80038b6cb
  1. 105
      README.md
  2. 8
      index.html
  3. 77
      src/worker/detect.js
  4. 21
      src/worker/loadLanguage.js
  5. 25
      src/worker/recognize.js

105
README.md

@ -44,12 +44,115 @@ worker.recognize('#my-image') @@ -44,12 +44,115 @@ worker.recognize('#my-image')
## npm
###TODO
### TODO
<!-- ```shell
> npm install tesseract-js ?
```-->
# Docs
## Tesseract.recognize(image) -> [TesseractJob](#tesseractjob)
Returns a TesseractJob whose `then` method can be used to act on the result of the OCR.
For example:
`image` can be
- an `img` element or querySelector that matches an `img` element
- a `video` element or querySelector that matches a `video` element
- a `canvas` element or querySelector that matches a `canvas` element
- a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`)
- the absolute `url` of an image from the same website that is running your script. Browser security policies don't allow access to the content of images from other websites :(
-
## Tesseract.detect(image) -> [TesseractJob](#tesseractjob)
Returns a TesseractJob whose `then` method can be used to act on the result of the OCR.
For example:
`image` can be
- an `img` element or querySelector that matches an `img` element
- a `video` element or querySelector that matches a `video` element
- a `canvas` element or querySelector that matches a `canvas` element
- a CanvasRenderingContext2D (returned by `canvas.getContext('2d')`)
- the absolute `url` of an image from the same website that is running your script. Browser security policies don't allow access to the content of images from other websites :(
## TesseractJob
A TesseractJob is an an object returned by a call to recognize or detect.
All methods of a TesseractJob return itself to enable chaining.
Typical use is:
```javascript
var job1 = Tesseract.recognize('#my-image')
job1.progress(function(message){console.log(message)})
.error(function(err){console.error(err)})
.then(function(result){console.log(result)})
```
or more concisely:
```javascript
Tesseract.recognize('#my-image')
.progress(function(message){console.log(message)})
.error(function(err){console.error(err)})
.then(function(result){console.log(result)})
```
### TesseractJob.progress(callback: function) -> TesseractJob
Sets `callback` as the function that will be called every time the job progresses.
`callback` is a function with the signature `callback(progress)` where progress is json object.
For example:
```javascript
Tesseract.recognize('#my-image')
.progress(function(message){console.log('progress is: 'message)})
```
The console will show something like:
```javascript
progress is: {loaded_lang_model: "eng", from_cache: true}
progress is: {initialized_with_lang: "eng"}
progress is: {set_variable: Object}
progress is: {set_variable: Object}
progress is: {recognized: 0}
progress is: {recognized: 0.3}
progress is: {recognized: 0.6}
progress is: {recognized: 0.9}
progress is: {recognized: 1}
```
### TesseractJob.then(callback: function) -> TesseractJob
Sets `callback` as the function that will be called if and when the job successfully completes.
For example:
```javascript
Tesseract.recognize('#my-image')
.then(function(result){console.log('result is: 'result)})
```
The console will show something like:
```javascript
progress is: {
blocks: Array[1]
confidence: 87
html: "<div class='ocr_page' id='page_1' ..."
lines: Array[3]
oem: "DEFAULT"
paragraphs: Array[1]
psm: "SINGLE_BLOCK"
symbols: Array[33]
text: "Hello World↵from beyond↵the Cosmic Void↵↵"
version: "3.04.00"
words: Array[7]
}
```
### TesseractJob.error(callback: function) -> TesseractJob
Sets `callback` as the function that will be called if and when the job successfully completes.
# Contributing
## Development
To run a development copy of tesseract.js, first clone this repo.

8
index.html

@ -16,15 +16,15 @@ @@ -16,15 +16,15 @@
var tesseract = createTesseractWorker();
tesseract.detect(canvas)
// tesseract.recognize('http://localhost:7355/westmorland.jpg')
tesseract.recognize(canvas, {
tessedit_char_blacklist: 'e'
})
// tesseract.recognize(canvas, {
// tessedit_char_blacklist: 'e'
// })
.progress(function(e){
console.log('progress', e)
})
.then(function(e){
console.log('result', e)
})
</script>

77
src/worker/detect.js

@ -1,54 +1,53 @@ @@ -1,54 +1,53 @@
import desaturate from './desaturate'
import loadLanguage from './loadLanguage'
export default function detect(jobId, module, base, image, cb){
var width = image.width, height = image.height;
image = desaturate(image)
var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL);
console.log('allocated image')
// base = new module.TessBaseAPI()
// console.log('allocated image')
loadLanguage(jobId, module, 'osd', err => {
module._free(ptr);
cb(err)
}, success => {
base.Init(null, 'osd')
base.SetPageSegMode(module.PSM_OSD_ONLY)
// console.log('loaded language')
base.SetImage(module.wrapPointer(ptr), width, height, 1, width)
base.SetRectangle(0, 0, width, height)
loadLanguage('osd', jobId, function(err, result){
if(err){
var results = new module.OSResults();
var success = base.DetectOS(results);
if(!success){
base.End();
module._free(ptr);
cb(err)
cb("failed to detect os")
}
else {
base.Init(null, 'osd')
base.SetPageSegMode(module.PSM_OSD_ONLY)
console.log('loaded language')
base.SetImage(module.wrapPointer(ptr), width, height, 1, width)
base.SetRectangle(0, 0, width, height)
var charset = results.get_unicharset()
// console.log(charset)
// results.print_scores()
var results = new module.OSResults();
var success = base.DetectOS(results);
if(!success){
base.End();
module._free(ptr);
cb("failed to detect os")
}
else {
var charset = results.get_unicharset()
console.log(charset)
// results.print_scores()
var best = results.get_best_result()
var oid = best.get_orientation_id(),
sid = best.get_script_id();
// console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence())
// console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence())
// console.log(best)
var best = results.get_best_result()
var oid = best.get_orientation_id(),
sid = best.get_script_id();
// console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence())
// console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence())
// console.log(best)
cb(null, {
tesseract_script_id: sid,
script: charset.get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
})
cb(null, {
tesseract_script_id: sid,
script: charset.get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
})
base.End();
module._free(ptr);
}
base.End();
module._free(ptr);
}
})
}

21
src/worker/loadLanguage.js

@ -39,14 +39,11 @@ function getLanguageData(lang, progress, cb, url='https://cdn.rawgit.com/naptha/ @@ -39,14 +39,11 @@ function getLanguageData(lang, progress, cb, url='https://cdn.rawgit.com/naptha/
xhr.send()
}
// var loaded_langs = []
export default function loadLanguage(lang, jobId, cb, url){
function load(lang, jobId, cb, url){
console.log('loadLanguage jobId', jobId)
// if(loaded_langs.indexOf(lang) != -1) return cb(null, lang);
function progressMessage(progress){
postMessage({ jobId, progress })
}
@ -85,4 +82,18 @@ export default function loadLanguage(lang, jobId, cb, url){ @@ -85,4 +82,18 @@ export default function loadLanguage(lang, jobId, cb, url){
cb(null, data)
})
})
}
}
var loaded_langs = []
export default function loadLanguage(jobId, module, lang, error, success){
if(loaded_langs.indexOf(lang) == -1) load(lang, jobId, function(err, result){
if(err) return error(err)
loaded_langs.push(lang)
module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false);
success()
})
else run();
}

25
src/worker/recognize.js

@ -15,8 +15,10 @@ export default function recognize(jobId, module, base, image, options, cb){ @@ -15,8 +15,10 @@ export default function recognize(jobId, module, base, image, options, cb){
var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL);
function run() {
loadLanguage(jobId, module, lang, err => {
module._free(ptr)
cb(err)
}, success => {
base.Init(null, lang)
postMessage({
@ -50,22 +52,5 @@ export default function recognize(jobId, module, base, image, options, cb){ @@ -50,22 +52,5 @@ export default function recognize(jobId, module, base, image, options, cb){
base.End();
module._free(ptr);
cb(null, everything)
}
if(loaded_langs.indexOf(lang) == -1) loadLanguage(lang, jobId, function(err, result){
if(err){
console.error("error loading", lang);
module._free(ptr);
return cb(err, null);
}
loaded_langs.push(lang)
module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false);
run()
})
else run();
})
}
Loading…
Cancel
Save