Pure Javascript OCR for more than 100 Languages 📖🎉🖥
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

129 lines
3.1 KiB

var Tesseract = (function(){
10 years ago
var Tesseract = {}
//https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js
//https://rawgit.com/naptha/tesseract.js/master/worker/worker.js for testing
var blob = new Blob(["importScripts('https://cdn.rawgit.com/naptha/tessdata/worker/worker.js');"]);
// console.log('localhost')
var worker = new Worker(window.URL.createObjectURL(blob));
worker.postMessage({init: {mem: 16777216*6}})
var bigworker = false
10 years ago
var index = 0
var handlers = []
10 years ago
worker.onmessage = function(e){
var handler = handlers[e.data.index]
if(e.data.progress){
handler.progress(e.data.progress)
}
else if(e.data.err){
handler.reject(e.data.err)
handler.callback(e.data.err)
}
else {
handler.resolve(e.data.result)
handler.callback(null,e.data.result)
}
10 years ago
}
function convertToImageData(image){
if(image.getContext){
image = image.getContext('2d');
}else if(image.tagName == "IMG" || image.tagName == "VIDEO"){
var c = document.createElement('canvas');
if(image.tagName == "IMG"){
c.width = image.naturalWidth;
c.height = image.naturalHeight;
}else if(image.tagName == "VIDEO"){
c.width = image.videoWidth;
c.height = image.videoHeight;
}
var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0);
image = ctx;
}
if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return image
10 years ago
}
Tesseract.detect = function(image, progress, callback){
image = convertToImageData(image)
10 years ago
if(typeof progress === "undefined"){
progress = callback = new Function()
10 years ago
}
if (typeof callback === "undefined"){
callback = progress
progress = new Function()
}
var i = index++
handlers[i] = {
resolve: new Function(),
reject: new Function()
}
handlers[i].callback = callback
handlers[i].progress = progress
return new Promise(function(resolve, reject){
handlers[i].resolve = resolve
handlers[i].reject = reject
worker.postMessage({index: i, fun: 'detect', image: image})
})
}
Tesseract.recognize = function(image, options, callback){
var lang = options.lang
if (typeof lang === "undefined"){
lang = 'eng'
}
if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(lang) != -1){
worker.postMessage({init: {mem: 16777216*10}})
bigworker = true
console.log('started big worker')
}
if (typeof options === 'string') {
lang = options
options = {}
}
10 years ago
if (typeof options === "function") {
callback = options
options = {}
10 years ago
}
image = convertToImageData(image)
var i = index++
handlers[i] = {
resolve: new Function(),
reject: new Function()
}
handlers[i].callback = callback || new Function()
handlers[i].progress = (function(){
if(typeof options.progress === 'function'){
var p = options.progress
delete options.progress
return p
10 years ago
}
return function(){}
})()
return new Promise(function(resolve, reject){
handlers[i].resolve = resolve
handlers[i].reject = reject
worker.postMessage({index: i, fun: 'recognize', image: image, lang: lang, options: options})
10 years ago
})
10 years ago
}
return Tesseract
})()