From 35f450efca60537f4202595c37be07ce0ac5bd91 Mon Sep 17 00:00:00 2001 From: Guillermo Date: Sun, 2 Oct 2016 14:26:55 -0400 Subject: [PATCH] rewrite --- README.md | 17 ++ bify | 1 - devServer.js | 26 ++ dist/tesseract.js | 1 + example.htm => index.html | 19 +- lib/Tesseract.2015.07.26.js | 130 --------- lib/Tesseract_dev.js | 130 --------- package.json | 15 +- src/Tesseract.js | 130 --------- src/browser/index.js | 65 +++++ src/worker.js | 533 ------------------------------------ src/worker/circularize.js | 56 ++++ src/worker/db.js | 3 + src/worker/desaturate.js | 26 ++ src/worker/detect.js | 54 ++++ src/worker/dump.js | 163 +++++++++++ src/worker/fileSizes.js | 2 + src/worker/index.js | 39 +++ src/worker/loadLanguage.js | 88 ++++++ src/worker/recognize.js | 71 +++++ webpack.config.dev.js | 42 +++ webpack.config.prod.js | 46 ++++ 22 files changed, 725 insertions(+), 932 deletions(-) delete mode 100644 bify create mode 100644 devServer.js create mode 100644 dist/tesseract.js rename example.htm => index.html (65%) delete mode 100644 lib/Tesseract.2015.07.26.js delete mode 100644 lib/Tesseract_dev.js delete mode 100644 src/Tesseract.js create mode 100644 src/browser/index.js delete mode 100644 src/worker.js create mode 100644 src/worker/circularize.js create mode 100644 src/worker/db.js create mode 100644 src/worker/desaturate.js create mode 100644 src/worker/detect.js create mode 100644 src/worker/dump.js create mode 100644 src/worker/fileSizes.js create mode 100644 src/worker/index.js create mode 100644 src/worker/loadLanguage.js create mode 100644 src/worker/recognize.js create mode 100644 webpack.config.dev.js create mode 100644 webpack.config.prod.js diff --git a/README.md b/README.md index a6c797c..8fb89ba 100644 --- a/README.md +++ b/README.md @@ -1 +1,18 @@ # tesseract.js +Tesseract.js is a pure javascript version of the Tesseract OCR Engine that can recognize English, Chinese, Russian, and 60 other languages. + + + +# Installation +Tesseract.js works with a ` + \ No newline at end of file diff --git a/lib/Tesseract.2015.07.26.js b/lib/Tesseract.2015.07.26.js deleted file mode 100644 index e066d85..0000000 --- a/lib/Tesseract.2015.07.26.js +++ /dev/null @@ -1,130 +0,0 @@ -var Tesseract = (function(){ - - var Tesseract = {} - - //https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js - //https://rawgit.com/naptha/tesseract.js/master/worker/worker.js for testing - //https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js - - var blob = new Blob(["importScripts('https://cdn.rawgit.com/naptha/tesseract.js/master/lib/worker.2015.07.26.js');"]); // changed on build - // console.log('localhost') - var worker = new Worker(window.URL.createObjectURL(blob)); - worker.postMessage({init: {mem: 16777216*6}}) - var bigworker = false - - var index = 0 - var handlers = [] - - worker.onmessage = function(e){ - var handler = handlers[e.data.index] - if(e.data.progress){ - handler.progress(e.data.progress) - } - else if(e.data.err){ - handler.reject(e.data.err) - handler.callback(e.data.err) - } - else { - handler.resolve(e.data.result) - handler.callback(null,e.data.result) - } - } - - function convertToImageData(image){ - if(image.getContext){ - image = image.getContext('2d'); - }else if(image.tagName == "IMG" || image.tagName == "VIDEO"){ - var c = document.createElement('canvas'); - if(image.tagName == "IMG"){ - c.width = image.naturalWidth; - c.height = image.naturalHeight; - }else if(image.tagName == "VIDEO"){ - c.width = image.videoWidth; - c.height = image.videoHeight; - } - var ctx = c.getContext('2d'); - ctx.drawImage(image, 0, 0); - image = ctx; - } - if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height); - return image - } - - Tesseract.detect = function(image, progress, callback){ - image = convertToImageData(image) - - if(typeof progress === "undefined"){ - progress = callback = new Function() - } - - if (typeof callback === "undefined"){ - callback = progress - progress = new Function() - } - - var i = index++ - - handlers[i] = { - resolve: new Function(), - reject: new Function() - } - handlers[i].callback = callback - handlers[i].progress = progress - - return new Promise(function(resolve, reject){ - handlers[i].resolve = resolve - handlers[i].reject = reject - worker.postMessage({index: i, fun: 'detect', image: image}) - }) - - } - - Tesseract.recognize = function(image, options, callback){ - var lang = options.lang - if (typeof lang === "undefined"){ - lang = 'eng' - } - - if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(lang) != -1){ - worker.postMessage({init: {mem: 16777216*10}}) - bigworker = true - console.log('started big worker') - } - - if (typeof options === 'string') { - lang = options - options = {} - } - - if (typeof options === "function") { - callback = options - options = {} - } - - image = convertToImageData(image) - - var i = index++ - - handlers[i] = { - resolve: new Function(), - reject: new Function() - } - handlers[i].callback = callback || new Function() - handlers[i].progress = (function(){ - if(typeof options.progress === 'function'){ - var p = options.progress - delete options.progress - return p - } - return function(){} - })() - - return new Promise(function(resolve, reject){ - handlers[i].resolve = resolve - handlers[i].reject = reject - worker.postMessage({index: i, fun: 'recognize', image: image, lang: lang, options: options}) - }) - - } - return Tesseract -})() diff --git a/lib/Tesseract_dev.js b/lib/Tesseract_dev.js deleted file mode 100644 index 01b11e4..0000000 --- a/lib/Tesseract_dev.js +++ /dev/null @@ -1,130 +0,0 @@ -var Tesseract = (function(){ - - var Tesseract = {} - - //https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js - //https://rawgit.com/naptha/tesseract.js/master/worker/worker.js for testing - //https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js - - var blob = new Blob(["importScripts('http://localhost:1234/master/lib/worker.2015.07.26.js');"]); // changed on build - // console.log('localhost') - var worker = new Worker(window.URL.createObjectURL(blob)); - worker.postMessage({init: {mem: 16777216*6}}) - var bigworker = false - - var index = 0 - var handlers = [] - - worker.onmessage = function(e){ - var handler = handlers[e.data.index] - if(e.data.progress){ - handler.progress(e.data.progress) - } - else if(e.data.err){ - handler.reject(e.data.err) - handler.callback(e.data.err) - } - else { - handler.resolve(e.data.result) - handler.callback(null,e.data.result) - } - } - - function convertToImageData(image){ - if(image.getContext){ - image = image.getContext('2d'); - }else if(image.tagName == "IMG" || image.tagName == "VIDEO"){ - var c = document.createElement('canvas'); - if(image.tagName == "IMG"){ - c.width = image.naturalWidth; - c.height = image.naturalHeight; - }else if(image.tagName == "VIDEO"){ - c.width = image.videoWidth; - c.height = image.videoHeight; - } - var ctx = c.getContext('2d'); - ctx.drawImage(image, 0, 0); - image = ctx; - } - if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height); - return image - } - - Tesseract.detect = function(image, progress, callback){ - image = convertToImageData(image) - - if(typeof progress === "undefined"){ - progress = callback = new Function() - } - - if (typeof callback === "undefined"){ - callback = progress - progress = new Function() - } - - var i = index++ - - handlers[i] = { - resolve: new Function(), - reject: new Function() - } - handlers[i].callback = callback - handlers[i].progress = progress - - return new Promise(function(resolve, reject){ - handlers[i].resolve = resolve - handlers[i].reject = reject - worker.postMessage({index: i, fun: 'detect', image: image}) - }) - - } - - Tesseract.recognize = function(image, options, callback){ - var lang = options.lang - if (typeof lang === "undefined"){ - lang = 'eng' - } - - if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(lang) != -1){ - worker.postMessage({init: {mem: 16777216*10}}) - bigworker = true - console.log('started big worker') - } - - if (typeof options === 'string') { - lang = options - options = {} - } - - if (typeof options === "function") { - callback = options - options = {} - } - - image = convertToImageData(image) - - var i = index++ - - handlers[i] = { - resolve: new Function(), - reject: new Function() - } - handlers[i].callback = callback || new Function() - handlers[i].progress = (function(){ - if(typeof options.progress === 'function'){ - var p = options.progress - delete options.progress - return p - } - return function(){} - })() - - return new Promise(function(resolve, reject){ - handlers[i].resolve = resolve - handlers[i].reject = reject - worker.postMessage({index: i, fun: 'recognize', image: image, lang: lang, options: options}) - }) - - } - return Tesseract -})() diff --git a/package.json b/package.json index ee76085..def39d0 100644 --- a/package.json +++ b/package.json @@ -3,12 +3,25 @@ "version": "1.0.0", "description": "", "main": "Tesseract.js", + "scripts": { + "start": "node devServer.js", + "build": "webpack --config webpack.config.prod.js" + }, "dependencies": { "level-js": "^2.1.6", "pako": "^0.2.7", "tesseract.js-core": "^1.0.0" }, - "devDependencies": {}, + "devDependencies": { + "babel": "^6.5.2", + "babel-core": "^6.7.0", + "babel-loader": "^6.2.4", + "express": "^4.13.4", + "webpack": "^1.13.0", + "webpack-dev-middleware": "^1.5.1", + "babel-preset-stage-1": "^6.5.0", + "babel-preset-es2015": "^6.6.0" + }, "repository": { "type": "git", "url": "https://github.com/naptha/tesseract.js.git" diff --git a/src/Tesseract.js b/src/Tesseract.js deleted file mode 100644 index 47972e5..0000000 --- a/src/Tesseract.js +++ /dev/null @@ -1,130 +0,0 @@ -var Tesseract = (function(){ - - var Tesseract = {} - - //https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js - //https://rawgit.com/naptha/tesseract.js/master/worker/worker.js for testing - //https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js - - var blob = new Blob(["importScripts('__worker__');"]); // changed on build - // console.log('localhost') - var worker = new Worker(window.URL.createObjectURL(blob)); - worker.postMessage({init: {mem: 16777216*6}}) - var bigworker = false - - var index = 0 - var handlers = [] - - worker.onmessage = function(e){ - var handler = handlers[e.data.index] - if(e.data.progress){ - handler.progress(e.data.progress) - } - else if(e.data.err){ - handler.reject(e.data.err) - handler.callback(e.data.err) - } - else { - handler.resolve(e.data.result) - handler.callback(null,e.data.result) - } - } - - function convertToImageData(image){ - if(image.getContext){ - image = image.getContext('2d'); - }else if(image.tagName == "IMG" || image.tagName == "VIDEO"){ - var c = document.createElement('canvas'); - if(image.tagName == "IMG"){ - c.width = image.naturalWidth; - c.height = image.naturalHeight; - }else if(image.tagName == "VIDEO"){ - c.width = image.videoWidth; - c.height = image.videoHeight; - } - var ctx = c.getContext('2d'); - ctx.drawImage(image, 0, 0); - image = ctx; - } - if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height); - return image - } - - Tesseract.detect = function(image, progress, callback){ - image = convertToImageData(image) - - if(typeof progress === "undefined"){ - progress = callback = new Function() - } - - if (typeof callback === "undefined"){ - callback = progress - progress = new Function() - } - - var i = index++ - - handlers[i] = { - resolve: new Function(), - reject: new Function() - } - handlers[i].callback = callback - handlers[i].progress = progress - - return new Promise(function(resolve, reject){ - handlers[i].resolve = resolve - handlers[i].reject = reject - worker.postMessage({index: i, fun: 'detect', image: image}) - }) - - } - - Tesseract.recognize = function(image, options, callback){ - var lang = options.lang - if (typeof lang === "undefined"){ - lang = 'eng' - } - - if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(lang) != -1){ - worker.postMessage({init: {mem: 16777216*10}}) - bigworker = true - console.log('started big worker') - } - - if (typeof options === 'string') { - lang = options - options = {} - } - - if (typeof options === "function") { - callback = options - options = {} - } - - image = convertToImageData(image) - - var i = index++ - - handlers[i] = { - resolve: new Function(), - reject: new Function() - } - handlers[i].callback = callback || new Function() - handlers[i].progress = (function(){ - if(typeof options.progress === 'function'){ - var p = options.progress - delete options.progress - return p - } - return function(){} - })() - - return new Promise(function(resolve, reject){ - handlers[i].resolve = resolve - handlers[i].reject = reject - worker.postMessage({index: i, fun: 'recognize', image: image, lang: lang, options: options}) - }) - - } - return Tesseract -})() \ No newline at end of file diff --git a/src/browser/index.js b/src/browser/index.js new file mode 100644 index 0000000..3bad2a0 --- /dev/null +++ b/src/browser/index.js @@ -0,0 +1,65 @@ +//TODO: replace with cdn url +module.exports = function Tesseract(url=location.href+'build/tesseract.worker.js'){ + var blob = new Blob(["importScripts('"+url+"');"]) + var worker = new Worker(window.URL.createObjectURL(blob)); + + var bigworker = false + var jobCounter = 0 + var handlers = {} + + function runAsync(action, args){ + var jobId = jobCounter++ + handlers[jobId] = {} + var message = {jobId, action, args} + console.log(message) + worker.postMessage(message) + return { + then (f){ handlers[jobId].result = f; return this}, + error (f){ handlers[jobId].error = f; return this}, + progress(f){ handlers[jobId].progress = f; return this} + } + } + + worker.onmessage = function(e){ + var {jobId, progress, error, result} = e.data + var handler = handlers[jobId] + if(progress && handler.progress) handler.progress(progress); + if(error && handler.error) handler.error(error); + if(result && handler.result) handler.result(result); + } + + function convertToImageData(image){ + if(image.getContext) image = image.getContext('2d'); + else if(image.tagName == "IMG" || image.tagName == "VIDEO"){ + var c = document.createElement('canvas'); + c.width = image.naturalWidth || image.videoWidth; + c.height = image.naturalHeight || image.videoHeight; + var ctx = c.getContext('2d'); + ctx.drawImage(image, 0, 0); + image = ctx; + } + if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height); + return image + } + + runAsync('init', {mem: (1<<24) * 6}) + + return { + detect(image){ + return runAsync('detect', {image: convertToImageData(image)}) + }, + + recognize(image, options='eng'){ + + if (typeof options === 'string') options = {lang: options}; + else options.lang = options.lang || 'eng'; + + if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(options.lang) != -1){ + runAsync('init', {mem: (1<<24) * 10}) + bigworker = true + } + + return runAsync('recognize', {options, image: convertToImageData(image)}) + } + } +} \ No newline at end of file diff --git a/src/worker.js b/src/worker.js deleted file mode 100644 index 9b77e27..0000000 --- a/src/worker.js +++ /dev/null @@ -1,533 +0,0 @@ -var Tesseract304 = require('tesseract.js-core') -var leveljs = require('level-js') -var db; -if (typeof indexedDB === 'undefined'){ - db = { open: function(opts, cb){ cb(true) /*err = true*/ } } -} -else { - db = leveljs('./tessdata') -} - -console.log('hallo') - -var filesizes = {"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922} - -var pako = require('pako') - -var T; - -var tesseractinit = (function createTesseractInstance(memory){ - - curindex = 0 - - var Module = Tesseract304({ - TOTAL_MEMORY: memory, //must be a multiple of 10 megabytes - TesseractProgress: function(percent){ - postMessage({ - index: curindex, - 'progress': { - 'recognized': Math.max(0,(percent-30)/70) - } - }) - }//, - // onRuntimeInitialized: function(){ - // console.log('wau') - // } - }) - - var base = new Module.TessBaseAPI() - var loaded_langs = [] - var loadLanguage = function(lang, index, cb){ // NodeJS style callback - if(loaded_langs.indexOf(lang) != -1){ - cb(null, lang) - } - else{ - Module.FS_createPath("/","tessdata",true,true) - - var downloadlang = function(shouldcache){ - postMessage({ - index: index, - 'progress': { - 'loaded_lang_model': 0, - cached: false, - requesting: true - } - }) - var xhr = new XMLHttpRequest(); - xhr.open('GET', 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'+lang+'.traineddata.gz', true); - xhr.responseType = 'arraybuffer'; - xhr.onerror = function(){ cb(xhr, null) } - xhr.onprogress = function(e){ - postMessage({ - index: index, - 'progress': { - 'loaded_lang_model': e.loaded/filesizes[lang], //this is kinda wrong on safari - cached: false - } - }) - } - xhr.onload = function(){ - if (xhr.status == 200 || (xhr.status == 0 && xhr.response)) { - postMessage({ - index: index, - 'progress': 'unzipping_lang_model' - }) - - var response = new Uint8Array(xhr.response) - - while(response[0] == 0x1f && response[1] == 0x8b){ - response = pako.ungzip(response) - } - console.log('asdf') - - postMessage({ - index: index, - 'progress': { - 'unzipped_lang_model': true, - 'lang_model_size': response.length - } - }) - - Module.FS_createDataFile('tessdata', lang +".traineddata", response, true, false); - - if(shouldcache){ - db.put(lang, response, function(err){ - console.log('cached lang') - }) - } - - postMessage({ - index: index, - 'progress': { - 'created_virtual_datafile': true, - 'cached_file': shouldcache - } - }) - - loaded_langs.push(lang) - - cb(null, lang) - } else cb(xhr, null); - } - xhr.send(null) - } - - db.open({compression: false},function(err){ - // err = true - if (err) { - downloadlang(false) - } - else { - db.get(lang, function (err, value) { - - // err = true - - if (err) { - downloadlang(true) - } - else { - - while(value[0] == 0x1f && value[1] == 0x8b){ - value = pako.ungzip(value) - } - - postMessage({ - index: index, - 'progress': { - loaded_lang_model:1, - cached: true - } - }) - - Module.FS_createDataFile('tessdata', lang +".traineddata", value, true, false); - loaded_langs.push(lang) - cb(null, lang) - } - }) - } - }) - } - } - - function circularize(page){ - page.paragraphs = [] - page.lines = [] - page.words = [] - page.symbols = [] - - page.blocks.forEach(function(block){ - block.page = page; - - block.lines = [] - block.words = [] - block.symbols = [] - - block.paragraphs.forEach(function(para){ - para.block = block; - para.page = page; - - para.words = [] - para.symbols = [] - - para.lines.forEach(function(line){ - line.paragraph = para; - line.block = block; - line.page = page; - - line.symbols = [] - - line.words.forEach(function(word){ - word.line = line; - word.paragraph = para; - word.block = block; - word.page = page; - word.symbols.forEach(function(sym){ - sym.word = word; - sym.line = line; - sym.paragraph = para; - sym.block = block; - sym.page = page; - - sym.line.symbols.push(sym) - sym.paragraph.symbols.push(sym) - sym.block.symbols.push(sym) - sym.page.symbols.push(sym) - }) - word.paragraph.words.push(word) - word.block.words.push(word) - word.page.words.push(word) - }) - line.block.lines.push(line) - line.page.lines.push(line) - }) - para.page.paragraphs.push(para) - }) - }) - return page - } - - function DumpLiterallyEverything(){ - var ri = base.GetIterator(); - var blocks = []; - var block, para, textline, word, symbol; - - function enumToString(value, prefix){ - return (Object.keys(Module) - .filter(function(e){ return e.substr(0, prefix.length + 1) == prefix + '_' }) - .filter(function(e){ return Module[e] === value }) - .map(function(e){ return e.slice(prefix.length + 1) })[0]) - } - - ri.Begin() - do { - if(ri.IsAtBeginningOf(Module.RIL_BLOCK)){ - var poly = ri.BlockPolygon(); - var polygon = null; - // BlockPolygon() returns null when automatic page segmentation is off - if(Module.getPointer(poly) > 0){ - var n = poly.get_n(), - px = poly.get_x(), - py = poly.get_y(), - polygon = []; - for(var i = 0; i < n; i++){ - polygon.push([px.getValue(i), py.getValue(i)]); - } - Module._ptaDestroy(Module.getPointer(poly)); - } - - block = { - paragraphs: [], - - text: ri.GetUTF8Text(Module.RIL_BLOCK), - confidence: ri.Confidence(Module.RIL_BLOCK), - baseline: ri.getBaseline(Module.RIL_BLOCK), - bbox: ri.getBoundingBox(Module.RIL_BLOCK), - - blocktype: enumToString(ri.BlockType(), 'PT'), - polygon: polygon - } - blocks.push(block) - } - if(ri.IsAtBeginningOf(Module.RIL_PARA)){ - para = { - lines: [], - - text: ri.GetUTF8Text(Module.RIL_PARA), - confidence: ri.Confidence(Module.RIL_PARA), - baseline: ri.getBaseline(Module.RIL_PARA), - bbox: ri.getBoundingBox(Module.RIL_PARA), - - is_ltr: !!ri.ParagraphIsLtr() - } - block.paragraphs.push(para) - } - if(ri.IsAtBeginningOf(Module.RIL_TEXTLINE)){ - textline = { - words: [], - - text: ri.GetUTF8Text(Module.RIL_TEXTLINE), - confidence: ri.Confidence(Module.RIL_TEXTLINE), - baseline: ri.getBaseline(Module.RIL_TEXTLINE), - bbox: ri.getBoundingBox(Module.RIL_TEXTLINE) - } - para.lines.push(textline) - } - if(ri.IsAtBeginningOf(Module.RIL_WORD)){ - var fontInfo = ri.getWordFontAttributes(), - wordDir = ri.WordDirection(); - word = { - symbols: [], - choices: [], - - text: ri.GetUTF8Text(Module.RIL_WORD), - confidence: ri.Confidence(Module.RIL_WORD), - baseline: ri.getBaseline(Module.RIL_WORD), - bbox: ri.getBoundingBox(Module.RIL_WORD), - - is_numeric: !!ri.WordIsNumeric(), - in_dictionary: !!ri.WordIsFromDictionary(), - direction: enumToString(wordDir, 'DIR'), - language: ri.WordRecognitionLanguage(), - - is_bold: fontInfo.is_bold, - is_italic: fontInfo.is_italic, - is_underlined: fontInfo.is_underlined, - is_monospace: fontInfo.is_monospace, - is_serif: fontInfo.is_serif, - is_smallcaps: fontInfo.is_smallcaps, - font_size: fontInfo.pointsize, - font_id: fontInfo.font_id, - font_name: fontInfo.font_name, - } - var wc = new Module.WordChoiceIterator(ri); - do { - word.choices.push({ - text: wc.GetUTF8Text(), - confidence: wc.Confidence() - }) - } while (wc.Next()); - Module.destroy(wc) - textline.words.push(word) - } - - var image = null; - // var pix = ri.GetBinaryImage(Module.RIL_SYMBOL) - // var image = pix2array(pix); - // // for some reason it seems that things stop working if you destroy pics - // Module._pixDestroy(Module.getPointer(pix)); - if(ri.IsAtBeginningOf(Module.RIL_SYMBOL)){ - symbol = { - choices: [], - image: image, - - text: ri.GetUTF8Text(Module.RIL_SYMBOL), - confidence: ri.Confidence(Module.RIL_SYMBOL), - baseline: ri.getBaseline(Module.RIL_SYMBOL), - bbox: ri.getBoundingBox(Module.RIL_SYMBOL), - - is_superscript: !!ri.SymbolIsSuperscript(), - is_subscript: !!ri.SymbolIsSubscript(), - is_dropcap: !!ri.SymbolIsDropcap(), - } - word.symbols.push(symbol) - var ci = new Module.ChoiceIterator(ri); - do { - symbol.choices.push({ - text: ci.GetUTF8Text(), - confidence: ci.Confidence() - }) - } while (ci.Next()); - Module.destroy(ci) - } - } while (ri.Next(Module.RIL_SYMBOL)); - Module.destroy(ri) - - return { - text: base.GetUTF8Text(), - html: deindent(base.GetHOCRText()), - - confidence: base.MeanTextConf(), - - blocks: blocks, - - psm: enumToString(base.GetPageSegMode(), 'PSM'), - oem: enumToString(base.oem(), 'OEM'), - version: base.Version(), - } - } - - function deindent(html){ - var lines = html.split('\n') - if(lines[0].substring(0,2) === " "){ - for (var i = 0; i < lines.length; i++) { - if (lines[i].substring(0,2) === " ") { - lines[i] = lines[i].slice(2) - } - }; - } - return lines.join('\n') - } - - function desaturate(image){ - var width, height; - if(image.data){ - var src = image.data; - width = image.width, height = image.height; - var dst = new Uint8Array(width * height); - var srcLength = src.length | 0, srcLength_16 = (srcLength - 16) | 0; - - for (var i = 0, j = 0; i <= srcLength_16; i += 16, j += 4) { - // convert to grayscale 4 pixels at a time; eveything with alpha get put in front of 50% gray - dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16 - dst[j+1] = (((src[i+4] * 77 + src[i+5] * 151 + src[i+6] * 28) * src[i+7]) + ((255-src[i+7]) << 15) + 32768) >> 16 - dst[j+2] = (((src[i+8] * 77 + src[i+9] * 151 + src[i+10] * 28) * src[i+11]) + ((255-src[i+11]) << 15) + 32768) >> 16 - dst[j+3] = (((src[i+12] * 77 + src[i+13] * 151 + src[i+14] * 28) * src[i+15]) + ((255-src[i+15]) << 15) + 32768) >> 16 - - } - for (; i < srcLength; i += 4, ++j) //finish up - dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16 - - image = dst; - } - else { - throw 'Expected ImageData' - } - return image - } - - function recognize(index, image, lang, options, cb){ - - - var width = image.width, height = image.height; - - image = desaturate(image) - - var ptr = Module.allocate(image, 'i8', Module.ALLOC_NORMAL); - - loadLanguage(lang, index, function(err, result){ - - if(err){ - console.error("error loading", lang); - Module._free(ptr); - cb(err, null) - } - else { - curindex = index - - base.Init(null, lang) - - postMessage({ - index: index, - 'progress': { - 'initialized_with_lang': true, - 'lang': lang - } - }) - - for (var option in options) { - if (options.hasOwnProperty(option)) { - base.SetVariable(option, options[option]); - postMessage({ - index: index, - 'progress': { - 'set_variable': { - variable: option, - value: options[option] - } - } - }) - } - } - - - base.SetImage(Module.wrapPointer(ptr), width, height, 1, width) - base.SetRectangle(0, 0, width, height) - // base.GetUTF8Text() - base.Recognize(null) - var everything = circularize(DumpLiterallyEverything()) - base.End(); - Module._free(ptr); - cb(null, everything) - - } - }) - } - - function detect(index, image, cb){ - var width = image.width, height = image.height; - image = desaturate(image) - - var ptr = Module.allocate(image, 'i8', Module.ALLOC_NORMAL); - console.log('allocated image') - // base = new Module.TessBaseAPI() - - loadLanguage('osd', index, function(err, result){ - if(err){ - Module._free(ptr); - cb(err) - } - else { - curindex = index - base.Init(null, 'osd') - base.SetPageSegMode(Module.PSM_OSD_ONLY) - console.log('loaded language') - - base.SetImage(Module.wrapPointer(ptr), width, height, 1, width) - base.SetRectangle(0, 0, width, height) - - var results = new Module.OSResults(); - var success = base.DetectOS(results); - if(!success){ - base.End(); - Module._free(ptr); - cb("failed to detect os") - } - else { - var charset = results.get_unicharset() - console.log(charset) - // results.print_scores() - - var best = results.get_best_result() - var oid = best.get_orientation_id(), - sid = best.get_script_id(); - // console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence()) - // console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence()) - // console.log(best) - - cb(null, { - tesseract_script_id: sid, - script: charset.get_script_from_script_id(sid), - script_confidence: best.get_sconfidence(), - orientation_degrees: [0, 270, 180, 90][oid], - orientation_confidence: best.get_oconfidence() - }) - - base.End(); - Module._free(ptr); - } - } - }) - } - - return { - recognize: recognize, - detect: detect - } -}) - -onmessage = function(e) { - - if(e.data.init){ - T = tesseractinit(e.data.init.mem) - } - else if(e.data.fun === 'recognize'){ - T.recognize(e.data.index, e.data.image, e.data.lang, e.data.options, function(err, result){ - postMessage({index: e.data.index, err:err, result: result}) - }) - } - else if(e.data.fun === 'detect'){ - T.detect(e.data.index, e.data.image, function(err, result){ - postMessage({index: e.data.index, err:err, result: result}) - }) - } -} \ No newline at end of file diff --git a/src/worker/circularize.js b/src/worker/circularize.js new file mode 100644 index 0000000..4ad180f --- /dev/null +++ b/src/worker/circularize.js @@ -0,0 +1,56 @@ +export default function circularize(page){ + page.paragraphs = [] + page.lines = [] + page.words = [] + page.symbols = [] + + page.blocks.forEach(function(block){ + block.page = page; + + block.lines = [] + block.words = [] + block.symbols = [] + + block.paragraphs.forEach(function(para){ + para.block = block; + para.page = page; + + para.words = [] + para.symbols = [] + + para.lines.forEach(function(line){ + line.paragraph = para; + line.block = block; + line.page = page; + + line.symbols = [] + + line.words.forEach(function(word){ + word.line = line; + word.paragraph = para; + word.block = block; + word.page = page; + word.symbols.forEach(function(sym){ + sym.word = word; + sym.line = line; + sym.paragraph = para; + sym.block = block; + sym.page = page; + + sym.line.symbols.push(sym) + sym.paragraph.symbols.push(sym) + sym.block.symbols.push(sym) + sym.page.symbols.push(sym) + }) + word.paragraph.words.push(word) + word.block.words.push(word) + word.page.words.push(word) + }) + line.block.lines.push(line) + line.page.lines.push(line) + }) + para.page.paragraphs.push(para) + }) + }) + return page +} \ No newline at end of file diff --git a/src/worker/db.js b/src/worker/db.js new file mode 100644 index 0000000..a33c458 --- /dev/null +++ b/src/worker/db.js @@ -0,0 +1,3 @@ +import leveljs from 'level-js' +var db = typeof indexedDB === 'undefined' ? { open: (_, cb) => cb(true) } : leveljs('./tessdata') +export default db \ No newline at end of file diff --git a/src/worker/desaturate.js b/src/worker/desaturate.js new file mode 100644 index 0000000..d027e68 --- /dev/null +++ b/src/worker/desaturate.js @@ -0,0 +1,26 @@ +export default function desaturate(image){ + var width, height; + if(image.data){ + var src = image.data; + width = image.width, height = image.height; + var dst = new Uint8Array(width * height); + var srcLength = src.length | 0, srcLength_16 = (srcLength - 16) | 0; + + for (var i = 0, j = 0; i <= srcLength_16; i += 16, j += 4) { + // convert to grayscale 4 pixels at a time; eveything with alpha gets put in front of 50% gray + dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16 + dst[j+1] = (((src[i+4] * 77 + src[i+5] * 151 + src[i+6] * 28) * src[i+7]) + ((255-src[i+7]) << 15) + 32768) >> 16 + dst[j+2] = (((src[i+8] * 77 + src[i+9] * 151 + src[i+10] * 28) * src[i+11]) + ((255-src[i+11]) << 15) + 32768) >> 16 + dst[j+3] = (((src[i+12] * 77 + src[i+13] * 151 + src[i+14] * 28) * src[i+15]) + ((255-src[i+15]) << 15) + 32768) >> 16 + + } + for (; i < srcLength; i += 4, ++j) //finish up + dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16 + + image = dst; + } + else { + throw 'Expected ImageData' + } + return image +} \ No newline at end of file diff --git a/src/worker/detect.js b/src/worker/detect.js new file mode 100644 index 0000000..1b980a2 --- /dev/null +++ b/src/worker/detect.js @@ -0,0 +1,54 @@ +export default function detect(jobId, module, base, image, cb){ + var width = image.width, height = image.height; + image = desaturate(image) + + var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL); + console.log('allocated image') + // base = new module.TessBaseAPI() + + loadLanguage('osd', jobId, function(err, result){ + if(err){ + module._free(ptr); + cb(err) + } + else { + base.Init(null, 'osd') + base.SetPageSegMode(module.PSM_OSD_ONLY) + console.log('loaded language') + + base.SetImage(module.wrapPointer(ptr), width, height, 1, width) + base.SetRectangle(0, 0, width, height) + + var results = new module.OSResults(); + var success = base.DetectOS(results); + if(!success){ + base.End(); + module._free(ptr); + cb("failed to detect os") + } + else { + var charset = results.get_unicharset() + console.log(charset) + // results.print_scores() + + var best = results.get_best_result() + var oid = best.get_orientation_id(), + sid = best.get_script_id(); + // console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence()) + // console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence()) + // console.log(best) + + cb(null, { + tesseract_script_id: sid, + script: charset.get_script_from_script_id(sid), + script_confidence: best.get_sconfidence(), + orientation_degrees: [0, 270, 180, 90][oid], + orientation_confidence: best.get_oconfidence() + }) + + base.End(); + module._free(ptr); + } + } + }) +} \ No newline at end of file diff --git a/src/worker/dump.js b/src/worker/dump.js new file mode 100644 index 0000000..203e7e0 --- /dev/null +++ b/src/worker/dump.js @@ -0,0 +1,163 @@ +function deindent(html){ + var lines = html.split('\n') + if(lines[0].substring(0,2) === " "){ + for (var i = 0; i < lines.length; i++) { + if (lines[i].substring(0,2) === " ") { + lines[i] = lines[i].slice(2) + } + }; + } + return lines.join('\n') +} + +export default function DumpLiterallyEverything(module, base){ + var ri = base.GetIterator(); + var blocks = []; + var block, para, textline, word, symbol; + + function enumToString(value, prefix){ + return (Object.keys(module) + .filter(function(e){ return e.substr(0, prefix.length + 1) == prefix + '_' }) + .filter(function(e){ return module[e] === value }) + .map(function(e){ return e.slice(prefix.length + 1) })[0]) + } + + const {RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL} = module + + ri.Begin() + do { + if(ri.IsAtBeginningOf(RIL_BLOCK)){ + var poly = ri.BlockPolygon(); + var polygon = null; + // BlockPolygon() returns null when automatic page segmentation is off + if(module.getPointer(poly) > 0){ + var n = poly.get_n(), + px = poly.get_x(), + py = poly.get_y(), + polygon = []; + for(var i = 0; i < n; i++){ + polygon.push([px.getValue(i), py.getValue(i)]); + } + module._ptaDestroy(module.getPointer(poly)); + } + + block = { + paragraphs: [], + + text: ri.GetUTF8Text(RIL_BLOCK), + confidence: ri.Confidence(RIL_BLOCK), + baseline: ri.getBaseline(RIL_BLOCK), + bbox: ri.getBoundingBox(RIL_BLOCK), + + blocktype: enumToString(ri.BlockType(), 'PT'), + polygon: polygon + } + blocks.push(block) + } + if(ri.IsAtBeginningOf(RIL_PARA)){ + para = { + lines: [], + + text: ri.GetUTF8Text(RIL_PARA), + confidence: ri.Confidence(RIL_PARA), + baseline: ri.getBaseline(RIL_PARA), + bbox: ri.getBoundingBox(RIL_PARA), + + is_ltr: !!ri.ParagraphIsLtr() + } + block.paragraphs.push(para) + } + if(ri.IsAtBeginningOf(RIL_TEXTLINE)){ + textline = { + words: [], + + text: ri.GetUTF8Text(RIL_TEXTLINE), + confidence: ri.Confidence(RIL_TEXTLINE), + baseline: ri.getBaseline(RIL_TEXTLINE), + bbox: ri.getBoundingBox(RIL_TEXTLINE) + } + para.lines.push(textline) + } + if(ri.IsAtBeginningOf(RIL_WORD)){ + var fontInfo = ri.getWordFontAttributes(), + wordDir = ri.WordDirection(); + word = { + symbols: [], + choices: [], + + text: ri.GetUTF8Text(RIL_WORD), + confidence: ri.Confidence(RIL_WORD), + baseline: ri.getBaseline(RIL_WORD), + bbox: ri.getBoundingBox(RIL_WORD), + + is_numeric: !!ri.WordIsNumeric(), + in_dictionary: !!ri.WordIsFromDictionary(), + direction: enumToString(wordDir, 'DIR'), + language: ri.WordRecognitionLanguage(), + + is_bold: fontInfo.is_bold, + is_italic: fontInfo.is_italic, + is_underlined: fontInfo.is_underlined, + is_monospace: fontInfo.is_monospace, + is_serif: fontInfo.is_serif, + is_smallcaps: fontInfo.is_smallcaps, + font_size: fontInfo.pointsize, + font_id: fontInfo.font_id, + font_name: fontInfo.font_name, + } + var wc = new module.WordChoiceIterator(ri); + do { + word.choices.push({ + text: wc.GetUTF8Text(), + confidence: wc.Confidence() + }) + } while (wc.Next()); + module.destroy(wc) + textline.words.push(word) + } + + var image = null; + // var pix = ri.GetBinaryImage(RIL_SYMBOL) + // var image = pix2array(pix); + // // for some reason it seems that things stop working if you destroy pics + // module._pixDestroy(module.getPointer(pix)); + if(ri.IsAtBeginningOf(RIL_SYMBOL)){ + symbol = { + choices: [], + image: image, + + text: ri.GetUTF8Text(RIL_SYMBOL), + confidence: ri.Confidence(RIL_SYMBOL), + baseline: ri.getBaseline(RIL_SYMBOL), + bbox: ri.getBoundingBox(RIL_SYMBOL), + + is_superscript: !!ri.SymbolIsSuperscript(), + is_subscript: !!ri.SymbolIsSubscript(), + is_dropcap: !!ri.SymbolIsDropcap(), + } + word.symbols.push(symbol) + var ci = new module.ChoiceIterator(ri); + do { + symbol.choices.push({ + text: ci.GetUTF8Text(), + confidence: ci.Confidence() + }) + } while (ci.Next()); + module.destroy(ci) + } + } while (ri.Next(RIL_SYMBOL)); + module.destroy(ri) + + return { + text: base.GetUTF8Text(), + html: deindent(base.GetHOCRText()), + + confidence: base.MeanTextConf(), + + blocks: blocks, + + psm: enumToString(base.GetPageSegMode(), 'PSM'), + oem: enumToString(base.oem(), 'OEM'), + version: base.Version(), + } +} \ No newline at end of file diff --git a/src/worker/fileSizes.js b/src/worker/fileSizes.js new file mode 100644 index 0000000..7efb8f8 --- /dev/null +++ b/src/worker/fileSizes.js @@ -0,0 +1,2 @@ +const fileSizes = {"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922} +export default fileSizes; \ No newline at end of file diff --git a/src/worker/index.js b/src/worker/index.js new file mode 100644 index 0000000..3172df8 --- /dev/null +++ b/src/worker/index.js @@ -0,0 +1,39 @@ +import TesseractCore from 'tesseract.js-core' +import pako from 'pako' + +import recognize from './recognize' +import detect from './detect' + +var module, base, jobId + +onmessage = function(e) { + var {action, args} = e.data; + jobId = e.data.jobId + + console.log('worker got action', action) + + if(action == 'init'){ + + module = TesseractCore({ + TOTAL_MEMORY: args.mem, //must be a multiple of 10 megabytes + TesseractProgress(percent){ + postMessage({ jobId, + 'progress': { + 'recognized': Math.max(0,(percent-30)/70) + } + }) + }, + onRuntimeInitialized() {} + }) + module.FS_createPath("/","tessdata",true,true) + base = new module.TessBaseAPI() + + } else if(action === 'recognize'){ + var {image, options} = args + recognize(jobId, module, base, image, options, + (error, result) => postMessage({jobId, error, result})) + } else if(action === 'detect'){ + detect(jobId, module, base, args.image, + (error, result) => postMessage({jobId, error, result})) + } +} \ No newline at end of file diff --git a/src/worker/loadLanguage.js b/src/worker/loadLanguage.js new file mode 100644 index 0000000..386b08e --- /dev/null +++ b/src/worker/loadLanguage.js @@ -0,0 +1,88 @@ +import pako from 'pako' +import db from './db' +import fileSizes from './fileSizes' + +function getLanguageData(lang, progress, cb, url='https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'+lang+'.traineddata.gz'){ + var xhr = new XMLHttpRequest(); + xhr.responseType = 'arraybuffer'; + xhr.open('GET', url, true); + xhr.onerror = e => { + xhr.onprogress = xhr.onload = null + cb(xhr, null) + } + xhr.onprogress = e => progress({ + 'loaded_lang_model': e.loaded/fileSizes[lang], //this is kinda wrong on safari + cached: false + }) + xhr.onload = e => { + if (!(xhr.status == 200 || (xhr.status == 0 && xhr.response))) return cb(xhr, null); + + progress({'unzipping_lang_model': true}) + + var response = new Uint8Array(xhr.response) + while(response[0] == 0x1f && response[1] == 0x8b) response = pako.ungzip(response); + + progress({ + 'unzipped_lang_model': true, + 'lang_model_size': response.length + }) + + cb(null, response) + } + + progress({ + 'loaded_lang_model': 0, + cached: false, + requesting: true + }) + + xhr.send() +} + +// var loaded_langs = [] + +export default function loadLanguage(lang, jobId, cb, url){ + + console.log('loadLanguage jobId', jobId) + + // if(loaded_langs.indexOf(lang) != -1) return cb(null, lang); + + function progressMessage(progress){ + postMessage({ jobId, progress }) + } + + function finish(err, data) { + if(err) return cb(err); + // loaded_langs.push(lang) + cb(null, data) + } + + function createDataFile(err, data){ + progressMessage({ created_virtual_datafile: true}) + finish(err, data) + } + + function createDataFileCached(err, data) { + if(err) return createDataFile(err); + + db.put(lang, data, err => console.log('cached', lang, err)) + progressMessage({cached_lang: lang}) + createDataFile(null, data) + } + + + db.open({compression: false}, err => { + if (err) return getLanguageData(lang, progressMessage, createDataFile, url); + + db.get(lang, (err, data) => { + + if (err) return getLanguageData(lang, progressMessage, createDataFileCached, url) + + while(data[0] == 0x1f && data[1] == 0x8b) data = pako.ungzip(data); + + progressMessage({ loaded_lang_model: lang, from_cache: true }) + + cb(null, data) + }) + }) +} \ No newline at end of file diff --git a/src/worker/recognize.js b/src/worker/recognize.js new file mode 100644 index 0000000..f1107b9 --- /dev/null +++ b/src/worker/recognize.js @@ -0,0 +1,71 @@ +import desaturate from './desaturate' +import loadLanguage from './loadLanguage' +import circularize from './circularize' +import dump from './dump' + +var loaded_langs = [] + +export default function recognize(jobId, module, base, image, options, cb){ + + console.log('recognize id', jobId) + var {lang} = options + var width = image.width, height = image.height; + + image = desaturate(image) + + var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL); + + + function run() { + base.Init(null, lang) + + postMessage({ + jobId, + 'progress': { + 'initialized_with_lang': lang + } + }) + + for (var option in options) { + if (options.hasOwnProperty(option)) { + base.SetVariable(option, options[option]); + postMessage({ + jobId: jobId, + 'progress': { + 'set_variable': { + variable: option, + value: options[option] + } + } + }) + } + } + + + base.SetImage(module.wrapPointer(ptr), width, height, 1, width) + base.SetRectangle(0, 0, width, height) + // base.GetUTF8Text() + base.Recognize(null) + var everything = circularize(dump(module, base)) + base.End(); + module._free(ptr); + cb(null, everything) + } + + + + if(loaded_langs.indexOf(lang) == -1) loadLanguage(lang, jobId, function(err, result){ + + if(err){ + console.error("error loading", lang); + module._free(ptr); + return cb(err, null); + } + + loaded_langs.push(lang) + module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false); + run() + + }) + else run(); +} \ No newline at end of file diff --git a/webpack.config.dev.js b/webpack.config.dev.js new file mode 100644 index 0000000..25316a0 --- /dev/null +++ b/webpack.config.dev.js @@ -0,0 +1,42 @@ +var path = require('path'); +var webpack = require('webpack'); + +function config({entry, output, include}) { + return { + devtool: 'cheap-module-eval-source-map', + entry, + output: Object.assign({}, output, { + path: path.join(__dirname, 'build'), + publicPath: '/build/', + }), + plugins: [ + new webpack.NoErrorsPlugin() + ], + module: { + loaders: [{ + test: /\.js$/, + loaders: ['babel'], + include + }] + }, + node: { + fs: "empty" + } + } +} + +module.exports = [{ + entry: './src/browser/index.js', + output: { + filename: 'tesseract.js', + library: "Tesseract", + libraryTarget: "umd" + }, + include: [path.join(__dirname, 'src/browser')] +}, { + entry: './src/worker/index.js', + output: { + filename: 'tesseract.worker.js', + }, + include: [path.join(__dirname, 'src/worker')] +}].map(config); \ No newline at end of file diff --git a/webpack.config.prod.js b/webpack.config.prod.js new file mode 100644 index 0000000..302e5eb --- /dev/null +++ b/webpack.config.prod.js @@ -0,0 +1,46 @@ +var path = require('path'); +var webpack = require('webpack'); + +function config({entry, output, include}) { + return { + entry, + output: Object.assign({}, output, { + path: path.join(__dirname, 'dist') + }), + plugins: [ + new webpack.optimize.OccurenceOrderPlugin(), + new webpack.optimize.DedupePlugin(), + new webpack.optimize.UglifyJsPlugin({ + compressor: { + warnings: false + } + }) + ], + module: { + loaders: [{ + test: /\.js$/, + loaders: ['babel'], + include + }] + }, + node: { + fs: "empty" + } + } +} + +module.exports = [{ + entry: './src/browser/index.js', + output: { + filename: 'tesseract.js', + library: "Tesseract", + libraryTarget: "umd" + }, + include: [path.join(__dirname, 'src/browser')] +}, { + entry: './src/worker/index.js', + output: { + filename: 'tesseract.worker.js', + }, + include: [path.join(__dirname, 'src/worker')] +}].map(config); \ No newline at end of file