Browse Source

rewrite

pull/12/head
Guillermo 8 years ago
parent
commit
b130433437
  1. 17
      README.md
  2. 1
      bify
  3. 26
      devServer.js
  4. 1
      dist/tesseract.js
  5. 19
      index.html
  6. 130
      lib/Tesseract.2015.07.26.js
  7. 130
      lib/Tesseract_dev.js
  8. 12281
      lib/worker.2015.07.26.js
  9. 15
      package.json
  10. 130
      src/Tesseract.js
  11. 65
      src/browser/index.js
  12. 533
      src/worker.js
  13. 56
      src/worker/circularize.js
  14. 3
      src/worker/db.js
  15. 26
      src/worker/desaturate.js
  16. 54
      src/worker/detect.js
  17. 163
      src/worker/dump.js
  18. 2
      src/worker/fileSizes.js
  19. 39
      src/worker/index.js
  20. 88
      src/worker/loadLanguage.js
  21. 71
      src/worker/recognize.js
  22. 42
      webpack.config.dev.js
  23. 46
      webpack.config.prod.js

17
README.md

@ -1 +1,18 @@
# tesseract.js # tesseract.js
Tesseract.js is a pure javascript version of the Tesseract OCR Engine that can recognize English, Chinese, Russian, and 60 other languages.
<!-- ![alt text]( "Logo Title Text 1") -->
# Installation
Tesseract.js works with a `<script>` tag, or with `npm` (if you're using webpack /browserify).
## `<script/>`
```html
<script src='' />
```
## npm
```shell
npm install tesseract
```

1
bify

@ -1 +0,0 @@
browserify src/worker.js -o inter/worker.js

26
devServer.js

@ -0,0 +1,26 @@
var path = require('path');
var express = require('express');
var webpack = require('webpack');
var config = require('./webpack.config.dev');
var app = express();
var compiler = webpack(config);
app.use(require('webpack-dev-middleware')(compiler, {
noInfo: true,
publicPath: config[0].output.publicPath
}));
// app.use(require('webpack-hot-middleware')(compiler));
app.use('/', express.static('./'));
var port = 7355
app.listen(port, 'localhost', function(err) {
if (err) {
console.log(err);
return;
}
console.log('Listening at http://localhost:' + port);
});

1
dist/tesseract.js vendored

@ -0,0 +1 @@
!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t():"function"==typeof define&&define.amd?define([],t):"object"==typeof exports?exports.Tesseract=t():e.Tesseract=t()}(this,function(){return function(e){function t(n){if(r[n])return r[n].exports;var o=r[n]={exports:{},id:n,loaded:!1};return e[n].call(o.exports,o,o.exports,t),o.loaded=!0,o.exports}var r={};return t.m=e,t.c=r,t.p="",t(0)}([function(e,t){"use strict";e.exports=function(){function e(e,t){var r=a++;s[r]={};var n={jobId:r,action:e,args:t};return console.log(n),o.postMessage(n),{then:function(e){return s[r].result=e,this},error:function(e){return s[r].error=e,this},progress:function(e){return s[r].progress=e,this}}}function t(e){if(e.getContext)e=e.getContext("2d");else if("IMG"==e.tagName||"VIDEO"==e.tagName){var t=document.createElement("canvas");t.width=e.naturalWidth||e.videoWidth,t.height=e.naturalHeight||e.videoHeight;var r=t.getContext("2d");r.drawImage(e,0,0),e=r}return e.getImageData&&(e=e.getImageData(0,0,e.canvas.width,e.canvas.height)),e}var r=arguments.length>0&&void 0!==arguments[0]?arguments[0]:location.href+"build/tesseract.worker.js",n=new Blob(["importScripts('"+r+"');"]),o=new Worker(window.URL.createObjectURL(n)),i=!1,a=0,s={};return o.onmessage=function(e){var t=e.data,r=t.jobId,n=t.progress,o=t.error,i=t.result,a=s[r];n&&a.progress&&a.progress(n),o&&a.error&&a.error(o),i&&a.result&&a.result(i)},e("init",{mem:100663296}),{detect:function(r){return e("detect",{image:t(r)})},recognize:function(r){var n=arguments.length>1&&void 0!==arguments[1]?arguments[1]:"eng";return"string"==typeof n?n={lang:n}:n.lang=n.lang||"eng",i||["chi_sim","chi_tra","jpn"].indexOf(n.lang)==-1||(e("init",{mem:167772160}),i=!0),e("recognize",{options:n,image:t(r)})}}}}])});

19
example.htm → index.html

@ -1,5 +1,5 @@
<canvas id="c"></canvas> <canvas id="c"></canvas>
<script type="text/javascript" src="./lib/Tesseract.js"></script> <script type="text/javascript" src="./build/tesseract.js"></script>
<script type="text/javascript"> <script type="text/javascript">
var canvas = document.getElementById('c') var canvas = document.getElementById('c')
canvas.width = 400 canvas.width = 400
@ -14,11 +14,16 @@
ctx.font = '30px sans-serif' ctx.font = '30px sans-serif'
ctx.fillText('the Cosmic Void', 100, 120) ctx.fillText('the Cosmic Void', 100, 120)
Tesseract.recognize(canvas,{ var tesseract = Tesseract();
tessedit_char_blacklist:'e',
progress: function(e){ tesseract.recognize(canvas, {
console.log(e) tessedit_char_blacklist: 'e'
} })
}).then( function(d){ console.log(d) } ) .progress(function(e){
console.log('progress', e)
})
.then(function(e){
console.log('result', e)
})
</script> </script>

130
lib/Tesseract.2015.07.26.js

@ -1,130 +0,0 @@
var Tesseract = (function(){
var Tesseract = {}
//https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js
//https://rawgit.com/naptha/tesseract.js/master/worker/worker.js for testing
//https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js
var blob = new Blob(["importScripts('https://cdn.rawgit.com/naptha/tesseract.js/master/lib/worker.2015.07.26.js');"]); // changed on build
// console.log('localhost')
var worker = new Worker(window.URL.createObjectURL(blob));
worker.postMessage({init: {mem: 16777216*6}})
var bigworker = false
var index = 0
var handlers = []
worker.onmessage = function(e){
var handler = handlers[e.data.index]
if(e.data.progress){
handler.progress(e.data.progress)
}
else if(e.data.err){
handler.reject(e.data.err)
handler.callback(e.data.err)
}
else {
handler.resolve(e.data.result)
handler.callback(null,e.data.result)
}
}
function convertToImageData(image){
if(image.getContext){
image = image.getContext('2d');
}else if(image.tagName == "IMG" || image.tagName == "VIDEO"){
var c = document.createElement('canvas');
if(image.tagName == "IMG"){
c.width = image.naturalWidth;
c.height = image.naturalHeight;
}else if(image.tagName == "VIDEO"){
c.width = image.videoWidth;
c.height = image.videoHeight;
}
var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0);
image = ctx;
}
if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return image
}
Tesseract.detect = function(image, progress, callback){
image = convertToImageData(image)
if(typeof progress === "undefined"){
progress = callback = new Function()
}
if (typeof callback === "undefined"){
callback = progress
progress = new Function()
}
var i = index++
handlers[i] = {
resolve: new Function(),
reject: new Function()
}
handlers[i].callback = callback
handlers[i].progress = progress
return new Promise(function(resolve, reject){
handlers[i].resolve = resolve
handlers[i].reject = reject
worker.postMessage({index: i, fun: 'detect', image: image})
})
}
Tesseract.recognize = function(image, options, callback){
var lang = options.lang
if (typeof lang === "undefined"){
lang = 'eng'
}
if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(lang) != -1){
worker.postMessage({init: {mem: 16777216*10}})
bigworker = true
console.log('started big worker')
}
if (typeof options === 'string') {
lang = options
options = {}
}
if (typeof options === "function") {
callback = options
options = {}
}
image = convertToImageData(image)
var i = index++
handlers[i] = {
resolve: new Function(),
reject: new Function()
}
handlers[i].callback = callback || new Function()
handlers[i].progress = (function(){
if(typeof options.progress === 'function'){
var p = options.progress
delete options.progress
return p
}
return function(){}
})()
return new Promise(function(resolve, reject){
handlers[i].resolve = resolve
handlers[i].reject = reject
worker.postMessage({index: i, fun: 'recognize', image: image, lang: lang, options: options})
})
}
return Tesseract
})()

130
lib/Tesseract_dev.js

@ -1,130 +0,0 @@
var Tesseract = (function(){
var Tesseract = {}
//https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js
//https://rawgit.com/naptha/tesseract.js/master/worker/worker.js for testing
//https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js
var blob = new Blob(["importScripts('http://localhost:1234/master/lib/worker.2015.07.26.js');"]); // changed on build
// console.log('localhost')
var worker = new Worker(window.URL.createObjectURL(blob));
worker.postMessage({init: {mem: 16777216*6}})
var bigworker = false
var index = 0
var handlers = []
worker.onmessage = function(e){
var handler = handlers[e.data.index]
if(e.data.progress){
handler.progress(e.data.progress)
}
else if(e.data.err){
handler.reject(e.data.err)
handler.callback(e.data.err)
}
else {
handler.resolve(e.data.result)
handler.callback(null,e.data.result)
}
}
function convertToImageData(image){
if(image.getContext){
image = image.getContext('2d');
}else if(image.tagName == "IMG" || image.tagName == "VIDEO"){
var c = document.createElement('canvas');
if(image.tagName == "IMG"){
c.width = image.naturalWidth;
c.height = image.naturalHeight;
}else if(image.tagName == "VIDEO"){
c.width = image.videoWidth;
c.height = image.videoHeight;
}
var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0);
image = ctx;
}
if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return image
}
Tesseract.detect = function(image, progress, callback){
image = convertToImageData(image)
if(typeof progress === "undefined"){
progress = callback = new Function()
}
if (typeof callback === "undefined"){
callback = progress
progress = new Function()
}
var i = index++
handlers[i] = {
resolve: new Function(),
reject: new Function()
}
handlers[i].callback = callback
handlers[i].progress = progress
return new Promise(function(resolve, reject){
handlers[i].resolve = resolve
handlers[i].reject = reject
worker.postMessage({index: i, fun: 'detect', image: image})
})
}
Tesseract.recognize = function(image, options, callback){
var lang = options.lang
if (typeof lang === "undefined"){
lang = 'eng'
}
if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(lang) != -1){
worker.postMessage({init: {mem: 16777216*10}})
bigworker = true
console.log('started big worker')
}
if (typeof options === 'string') {
lang = options
options = {}
}
if (typeof options === "function") {
callback = options
options = {}
}
image = convertToImageData(image)
var i = index++
handlers[i] = {
resolve: new Function(),
reject: new Function()
}
handlers[i].callback = callback || new Function()
handlers[i].progress = (function(){
if(typeof options.progress === 'function'){
var p = options.progress
delete options.progress
return p
}
return function(){}
})()
return new Promise(function(resolve, reject){
handlers[i].resolve = resolve
handlers[i].reject = reject
worker.postMessage({index: i, fun: 'recognize', image: image, lang: lang, options: options})
})
}
return Tesseract
})()

12281
lib/worker.2015.07.26.js

File diff suppressed because one or more lines are too long

15
package.json

@ -3,12 +3,25 @@
"version": "1.0.0", "version": "1.0.0",
"description": "", "description": "",
"main": "Tesseract.js", "main": "Tesseract.js",
"scripts": {
"start": "node devServer.js",
"build": "webpack --config webpack.config.prod.js"
},
"dependencies": { "dependencies": {
"level-js": "^2.1.6", "level-js": "^2.1.6",
"pako": "^0.2.7", "pako": "^0.2.7",
"tesseract.js-core": "^1.0.0" "tesseract.js-core": "^1.0.0"
}, },
"devDependencies": {}, "devDependencies": {
"babel": "^6.5.2",
"babel-core": "^6.7.0",
"babel-loader": "^6.2.4",
"express": "^4.13.4",
"webpack": "^1.13.0",
"webpack-dev-middleware": "^1.5.1",
"babel-preset-stage-1": "^6.5.0",
"babel-preset-es2015": "^6.6.0"
},
"repository": { "repository": {
"type": "git", "type": "git",
"url": "https://github.com/naptha/tesseract.js.git" "url": "https://github.com/naptha/tesseract.js.git"

130
src/Tesseract.js

@ -1,130 +0,0 @@
var Tesseract = (function(){
var Tesseract = {}
//https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js
//https://rawgit.com/naptha/tesseract.js/master/worker/worker.js for testing
//https://cdn.rawgit.com/naptha/tesseract.js/master/worker/worker.js
var blob = new Blob(["importScripts('__worker__');"]); // changed on build
// console.log('localhost')
var worker = new Worker(window.URL.createObjectURL(blob));
worker.postMessage({init: {mem: 16777216*6}})
var bigworker = false
var index = 0
var handlers = []
worker.onmessage = function(e){
var handler = handlers[e.data.index]
if(e.data.progress){
handler.progress(e.data.progress)
}
else if(e.data.err){
handler.reject(e.data.err)
handler.callback(e.data.err)
}
else {
handler.resolve(e.data.result)
handler.callback(null,e.data.result)
}
}
function convertToImageData(image){
if(image.getContext){
image = image.getContext('2d');
}else if(image.tagName == "IMG" || image.tagName == "VIDEO"){
var c = document.createElement('canvas');
if(image.tagName == "IMG"){
c.width = image.naturalWidth;
c.height = image.naturalHeight;
}else if(image.tagName == "VIDEO"){
c.width = image.videoWidth;
c.height = image.videoHeight;
}
var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0);
image = ctx;
}
if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return image
}
Tesseract.detect = function(image, progress, callback){
image = convertToImageData(image)
if(typeof progress === "undefined"){
progress = callback = new Function()
}
if (typeof callback === "undefined"){
callback = progress
progress = new Function()
}
var i = index++
handlers[i] = {
resolve: new Function(),
reject: new Function()
}
handlers[i].callback = callback
handlers[i].progress = progress
return new Promise(function(resolve, reject){
handlers[i].resolve = resolve
handlers[i].reject = reject
worker.postMessage({index: i, fun: 'detect', image: image})
})
}
Tesseract.recognize = function(image, options, callback){
var lang = options.lang
if (typeof lang === "undefined"){
lang = 'eng'
}
if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(lang) != -1){
worker.postMessage({init: {mem: 16777216*10}})
bigworker = true
console.log('started big worker')
}
if (typeof options === 'string') {
lang = options
options = {}
}
if (typeof options === "function") {
callback = options
options = {}
}
image = convertToImageData(image)
var i = index++
handlers[i] = {
resolve: new Function(),
reject: new Function()
}
handlers[i].callback = callback || new Function()
handlers[i].progress = (function(){
if(typeof options.progress === 'function'){
var p = options.progress
delete options.progress
return p
}
return function(){}
})()
return new Promise(function(resolve, reject){
handlers[i].resolve = resolve
handlers[i].reject = reject
worker.postMessage({index: i, fun: 'recognize', image: image, lang: lang, options: options})
})
}
return Tesseract
})()

65
src/browser/index.js

@ -0,0 +1,65 @@
//TODO: replace with cdn url
module.exports = function Tesseract(url=location.href+'build/tesseract.worker.js'){
var blob = new Blob(["importScripts('"+url+"');"])
var worker = new Worker(window.URL.createObjectURL(blob));
var bigworker = false
var jobCounter = 0
var handlers = {}
function runAsync(action, args){
var jobId = jobCounter++
handlers[jobId] = {}
var message = {jobId, action, args}
console.log(message)
worker.postMessage(message)
return {
then (f){ handlers[jobId].result = f; return this},
error (f){ handlers[jobId].error = f; return this},
progress(f){ handlers[jobId].progress = f; return this}
}
}
worker.onmessage = function(e){
var {jobId, progress, error, result} = e.data
var handler = handlers[jobId]
if(progress && handler.progress) handler.progress(progress);
if(error && handler.error) handler.error(error);
if(result && handler.result) handler.result(result);
}
function convertToImageData(image){
if(image.getContext) image = image.getContext('2d');
else if(image.tagName == "IMG" || image.tagName == "VIDEO"){
var c = document.createElement('canvas');
c.width = image.naturalWidth || image.videoWidth;
c.height = image.naturalHeight || image.videoHeight;
var ctx = c.getContext('2d');
ctx.drawImage(image, 0, 0);
image = ctx;
}
if(image.getImageData) image = image.getImageData(0, 0, image.canvas.width, image.canvas.height);
return image
}
runAsync('init', {mem: (1<<24) * 6})
return {
detect(image){
return runAsync('detect', {image: convertToImageData(image)})
},
recognize(image, options='eng'){
if (typeof options === 'string') options = {lang: options};
else options.lang = options.lang || 'eng';
if (!bigworker && ['chi_sim', 'chi_tra', 'jpn'].indexOf(options.lang) != -1){
runAsync('init', {mem: (1<<24) * 10})
bigworker = true
}
return runAsync('recognize', {options, image: convertToImageData(image)})
}
}
}

533
src/worker.js

@ -1,533 +0,0 @@
var Tesseract304 = require('tesseract.js-core')
var leveljs = require('level-js')
var db;
if (typeof indexedDB === 'undefined'){
db = { open: function(opts, cb){ cb(true) /*err = true*/ } }
}
else {
db = leveljs('./tessdata')
}
console.log('hallo')
var filesizes = {"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922}
var pako = require('pako')
var T;
var tesseractinit = (function createTesseractInstance(memory){
curindex = 0
var Module = Tesseract304({
TOTAL_MEMORY: memory, //must be a multiple of 10 megabytes
TesseractProgress: function(percent){
postMessage({
index: curindex,
'progress': {
'recognized': Math.max(0,(percent-30)/70)
}
})
}//,
// onRuntimeInitialized: function(){
// console.log('wau')
// }
})
var base = new Module.TessBaseAPI()
var loaded_langs = []
var loadLanguage = function(lang, index, cb){ // NodeJS style callback
if(loaded_langs.indexOf(lang) != -1){
cb(null, lang)
}
else{
Module.FS_createPath("/","tessdata",true,true)
var downloadlang = function(shouldcache){
postMessage({
index: index,
'progress': {
'loaded_lang_model': 0,
cached: false,
requesting: true
}
})
var xhr = new XMLHttpRequest();
xhr.open('GET', 'https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'+lang+'.traineddata.gz', true);
xhr.responseType = 'arraybuffer';
xhr.onerror = function(){ cb(xhr, null) }
xhr.onprogress = function(e){
postMessage({
index: index,
'progress': {
'loaded_lang_model': e.loaded/filesizes[lang], //this is kinda wrong on safari
cached: false
}
})
}
xhr.onload = function(){
if (xhr.status == 200 || (xhr.status == 0 && xhr.response)) {
postMessage({
index: index,
'progress': 'unzipping_lang_model'
})
var response = new Uint8Array(xhr.response)
while(response[0] == 0x1f && response[1] == 0x8b){
response = pako.ungzip(response)
}
console.log('asdf')
postMessage({
index: index,
'progress': {
'unzipped_lang_model': true,
'lang_model_size': response.length
}
})
Module.FS_createDataFile('tessdata', lang +".traineddata", response, true, false);
if(shouldcache){
db.put(lang, response, function(err){
console.log('cached lang')
})
}
postMessage({
index: index,
'progress': {
'created_virtual_datafile': true,
'cached_file': shouldcache
}
})
loaded_langs.push(lang)
cb(null, lang)
} else cb(xhr, null);
}
xhr.send(null)
}
db.open({compression: false},function(err){
// err = true
if (err) {
downloadlang(false)
}
else {
db.get(lang, function (err, value) {
// err = true
if (err) {
downloadlang(true)
}
else {
while(value[0] == 0x1f && value[1] == 0x8b){
value = pako.ungzip(value)
}
postMessage({
index: index,
'progress': {
loaded_lang_model:1,
cached: true
}
})
Module.FS_createDataFile('tessdata', lang +".traineddata", value, true, false);
loaded_langs.push(lang)
cb(null, lang)
}
})
}
})
}
}
function circularize(page){
page.paragraphs = []
page.lines = []
page.words = []
page.symbols = []
page.blocks.forEach(function(block){
block.page = page;
block.lines = []
block.words = []
block.symbols = []
block.paragraphs.forEach(function(para){
para.block = block;
para.page = page;
para.words = []
para.symbols = []
para.lines.forEach(function(line){
line.paragraph = para;
line.block = block;
line.page = page;
line.symbols = []
line.words.forEach(function(word){
word.line = line;
word.paragraph = para;
word.block = block;
word.page = page;
word.symbols.forEach(function(sym){
sym.word = word;
sym.line = line;
sym.paragraph = para;
sym.block = block;
sym.page = page;
sym.line.symbols.push(sym)
sym.paragraph.symbols.push(sym)
sym.block.symbols.push(sym)
sym.page.symbols.push(sym)
})
word.paragraph.words.push(word)
word.block.words.push(word)
word.page.words.push(word)
})
line.block.lines.push(line)
line.page.lines.push(line)
})
para.page.paragraphs.push(para)
})
})
return page
}
function DumpLiterallyEverything(){
var ri = base.GetIterator();
var blocks = [];
var block, para, textline, word, symbol;
function enumToString(value, prefix){
return (Object.keys(Module)
.filter(function(e){ return e.substr(0, prefix.length + 1) == prefix + '_' })
.filter(function(e){ return Module[e] === value })
.map(function(e){ return e.slice(prefix.length + 1) })[0])
}
ri.Begin()
do {
if(ri.IsAtBeginningOf(Module.RIL_BLOCK)){
var poly = ri.BlockPolygon();
var polygon = null;
// BlockPolygon() returns null when automatic page segmentation is off
if(Module.getPointer(poly) > 0){
var n = poly.get_n(),
px = poly.get_x(),
py = poly.get_y(),
polygon = [];
for(var i = 0; i < n; i++){
polygon.push([px.getValue(i), py.getValue(i)]);
}
Module._ptaDestroy(Module.getPointer(poly));
}
block = {
paragraphs: [],
text: ri.GetUTF8Text(Module.RIL_BLOCK),
confidence: ri.Confidence(Module.RIL_BLOCK),
baseline: ri.getBaseline(Module.RIL_BLOCK),
bbox: ri.getBoundingBox(Module.RIL_BLOCK),
blocktype: enumToString(ri.BlockType(), 'PT'),
polygon: polygon
}
blocks.push(block)
}
if(ri.IsAtBeginningOf(Module.RIL_PARA)){
para = {
lines: [],
text: ri.GetUTF8Text(Module.RIL_PARA),
confidence: ri.Confidence(Module.RIL_PARA),
baseline: ri.getBaseline(Module.RIL_PARA),
bbox: ri.getBoundingBox(Module.RIL_PARA),
is_ltr: !!ri.ParagraphIsLtr()
}
block.paragraphs.push(para)
}
if(ri.IsAtBeginningOf(Module.RIL_TEXTLINE)){
textline = {
words: [],
text: ri.GetUTF8Text(Module.RIL_TEXTLINE),
confidence: ri.Confidence(Module.RIL_TEXTLINE),
baseline: ri.getBaseline(Module.RIL_TEXTLINE),
bbox: ri.getBoundingBox(Module.RIL_TEXTLINE)
}
para.lines.push(textline)
}
if(ri.IsAtBeginningOf(Module.RIL_WORD)){
var fontInfo = ri.getWordFontAttributes(),
wordDir = ri.WordDirection();
word = {
symbols: [],
choices: [],
text: ri.GetUTF8Text(Module.RIL_WORD),
confidence: ri.Confidence(Module.RIL_WORD),
baseline: ri.getBaseline(Module.RIL_WORD),
bbox: ri.getBoundingBox(Module.RIL_WORD),
is_numeric: !!ri.WordIsNumeric(),
in_dictionary: !!ri.WordIsFromDictionary(),
direction: enumToString(wordDir, 'DIR'),
language: ri.WordRecognitionLanguage(),
is_bold: fontInfo.is_bold,
is_italic: fontInfo.is_italic,
is_underlined: fontInfo.is_underlined,
is_monospace: fontInfo.is_monospace,
is_serif: fontInfo.is_serif,
is_smallcaps: fontInfo.is_smallcaps,
font_size: fontInfo.pointsize,
font_id: fontInfo.font_id,
font_name: fontInfo.font_name,
}
var wc = new Module.WordChoiceIterator(ri);
do {
word.choices.push({
text: wc.GetUTF8Text(),
confidence: wc.Confidence()
})
} while (wc.Next());
Module.destroy(wc)
textline.words.push(word)
}
var image = null;
// var pix = ri.GetBinaryImage(Module.RIL_SYMBOL)
// var image = pix2array(pix);
// // for some reason it seems that things stop working if you destroy pics
// Module._pixDestroy(Module.getPointer(pix));
if(ri.IsAtBeginningOf(Module.RIL_SYMBOL)){
symbol = {
choices: [],
image: image,
text: ri.GetUTF8Text(Module.RIL_SYMBOL),
confidence: ri.Confidence(Module.RIL_SYMBOL),
baseline: ri.getBaseline(Module.RIL_SYMBOL),
bbox: ri.getBoundingBox(Module.RIL_SYMBOL),
is_superscript: !!ri.SymbolIsSuperscript(),
is_subscript: !!ri.SymbolIsSubscript(),
is_dropcap: !!ri.SymbolIsDropcap(),
}
word.symbols.push(symbol)
var ci = new Module.ChoiceIterator(ri);
do {
symbol.choices.push({
text: ci.GetUTF8Text(),
confidence: ci.Confidence()
})
} while (ci.Next());
Module.destroy(ci)
}
} while (ri.Next(Module.RIL_SYMBOL));
Module.destroy(ri)
return {
text: base.GetUTF8Text(),
html: deindent(base.GetHOCRText()),
confidence: base.MeanTextConf(),
blocks: blocks,
psm: enumToString(base.GetPageSegMode(), 'PSM'),
oem: enumToString(base.oem(), 'OEM'),
version: base.Version(),
}
}
function deindent(html){
var lines = html.split('\n')
if(lines[0].substring(0,2) === " "){
for (var i = 0; i < lines.length; i++) {
if (lines[i].substring(0,2) === " ") {
lines[i] = lines[i].slice(2)
}
};
}
return lines.join('\n')
}
function desaturate(image){
var width, height;
if(image.data){
var src = image.data;
width = image.width, height = image.height;
var dst = new Uint8Array(width * height);
var srcLength = src.length | 0, srcLength_16 = (srcLength - 16) | 0;
for (var i = 0, j = 0; i <= srcLength_16; i += 16, j += 4) {
// convert to grayscale 4 pixels at a time; eveything with alpha get put in front of 50% gray
dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16
dst[j+1] = (((src[i+4] * 77 + src[i+5] * 151 + src[i+6] * 28) * src[i+7]) + ((255-src[i+7]) << 15) + 32768) >> 16
dst[j+2] = (((src[i+8] * 77 + src[i+9] * 151 + src[i+10] * 28) * src[i+11]) + ((255-src[i+11]) << 15) + 32768) >> 16
dst[j+3] = (((src[i+12] * 77 + src[i+13] * 151 + src[i+14] * 28) * src[i+15]) + ((255-src[i+15]) << 15) + 32768) >> 16
}
for (; i < srcLength; i += 4, ++j) //finish up
dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16
image = dst;
}
else {
throw 'Expected ImageData'
}
return image
}
function recognize(index, image, lang, options, cb){
var width = image.width, height = image.height;
image = desaturate(image)
var ptr = Module.allocate(image, 'i8', Module.ALLOC_NORMAL);
loadLanguage(lang, index, function(err, result){
if(err){
console.error("error loading", lang);
Module._free(ptr);
cb(err, null)
}
else {
curindex = index
base.Init(null, lang)
postMessage({
index: index,
'progress': {
'initialized_with_lang': true,
'lang': lang
}
})
for (var option in options) {
if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]);
postMessage({
index: index,
'progress': {
'set_variable': {
variable: option,
value: options[option]
}
}
})
}
}
base.SetImage(Module.wrapPointer(ptr), width, height, 1, width)
base.SetRectangle(0, 0, width, height)
// base.GetUTF8Text()
base.Recognize(null)
var everything = circularize(DumpLiterallyEverything())
base.End();
Module._free(ptr);
cb(null, everything)
}
})
}
function detect(index, image, cb){
var width = image.width, height = image.height;
image = desaturate(image)
var ptr = Module.allocate(image, 'i8', Module.ALLOC_NORMAL);
console.log('allocated image')
// base = new Module.TessBaseAPI()
loadLanguage('osd', index, function(err, result){
if(err){
Module._free(ptr);
cb(err)
}
else {
curindex = index
base.Init(null, 'osd')
base.SetPageSegMode(Module.PSM_OSD_ONLY)
console.log('loaded language')
base.SetImage(Module.wrapPointer(ptr), width, height, 1, width)
base.SetRectangle(0, 0, width, height)
var results = new Module.OSResults();
var success = base.DetectOS(results);
if(!success){
base.End();
Module._free(ptr);
cb("failed to detect os")
}
else {
var charset = results.get_unicharset()
console.log(charset)
// results.print_scores()
var best = results.get_best_result()
var oid = best.get_orientation_id(),
sid = best.get_script_id();
// console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence())
// console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence())
// console.log(best)
cb(null, {
tesseract_script_id: sid,
script: charset.get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
})
base.End();
Module._free(ptr);
}
}
})
}
return {
recognize: recognize,
detect: detect
}
})
onmessage = function(e) {
if(e.data.init){
T = tesseractinit(e.data.init.mem)
}
else if(e.data.fun === 'recognize'){
T.recognize(e.data.index, e.data.image, e.data.lang, e.data.options, function(err, result){
postMessage({index: e.data.index, err:err, result: result})
})
}
else if(e.data.fun === 'detect'){
T.detect(e.data.index, e.data.image, function(err, result){
postMessage({index: e.data.index, err:err, result: result})
})
}
}

56
src/worker/circularize.js

@ -0,0 +1,56 @@
export default function circularize(page){
page.paragraphs = []
page.lines = []
page.words = []
page.symbols = []
page.blocks.forEach(function(block){
block.page = page;
block.lines = []
block.words = []
block.symbols = []
block.paragraphs.forEach(function(para){
para.block = block;
para.page = page;
para.words = []
para.symbols = []
para.lines.forEach(function(line){
line.paragraph = para;
line.block = block;
line.page = page;
line.symbols = []
line.words.forEach(function(word){
word.line = line;
word.paragraph = para;
word.block = block;
word.page = page;
word.symbols.forEach(function(sym){
sym.word = word;
sym.line = line;
sym.paragraph = para;
sym.block = block;
sym.page = page;
sym.line.symbols.push(sym)
sym.paragraph.symbols.push(sym)
sym.block.symbols.push(sym)
sym.page.symbols.push(sym)
})
word.paragraph.words.push(word)
word.block.words.push(word)
word.page.words.push(word)
})
line.block.lines.push(line)
line.page.lines.push(line)
})
para.page.paragraphs.push(para)
})
})
return page
}

3
src/worker/db.js

@ -0,0 +1,3 @@
import leveljs from 'level-js'
var db = typeof indexedDB === 'undefined' ? { open: (_, cb) => cb(true) } : leveljs('./tessdata')
export default db

26
src/worker/desaturate.js

@ -0,0 +1,26 @@
export default function desaturate(image){
var width, height;
if(image.data){
var src = image.data;
width = image.width, height = image.height;
var dst = new Uint8Array(width * height);
var srcLength = src.length | 0, srcLength_16 = (srcLength - 16) | 0;
for (var i = 0, j = 0; i <= srcLength_16; i += 16, j += 4) {
// convert to grayscale 4 pixels at a time; eveything with alpha gets put in front of 50% gray
dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16
dst[j+1] = (((src[i+4] * 77 + src[i+5] * 151 + src[i+6] * 28) * src[i+7]) + ((255-src[i+7]) << 15) + 32768) >> 16
dst[j+2] = (((src[i+8] * 77 + src[i+9] * 151 + src[i+10] * 28) * src[i+11]) + ((255-src[i+11]) << 15) + 32768) >> 16
dst[j+3] = (((src[i+12] * 77 + src[i+13] * 151 + src[i+14] * 28) * src[i+15]) + ((255-src[i+15]) << 15) + 32768) >> 16
}
for (; i < srcLength; i += 4, ++j) //finish up
dst[j] = (((src[i] * 77 + src[i+1] * 151 + src[i+2] * 28) * src[i+3]) + ((255-src[i+3]) << 15) + 32768) >> 16
image = dst;
}
else {
throw 'Expected ImageData'
}
return image
}

54
src/worker/detect.js

@ -0,0 +1,54 @@
export default function detect(jobId, module, base, image, cb){
var width = image.width, height = image.height;
image = desaturate(image)
var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL);
console.log('allocated image')
// base = new module.TessBaseAPI()
loadLanguage('osd', jobId, function(err, result){
if(err){
module._free(ptr);
cb(err)
}
else {
base.Init(null, 'osd')
base.SetPageSegMode(module.PSM_OSD_ONLY)
console.log('loaded language')
base.SetImage(module.wrapPointer(ptr), width, height, 1, width)
base.SetRectangle(0, 0, width, height)
var results = new module.OSResults();
var success = base.DetectOS(results);
if(!success){
base.End();
module._free(ptr);
cb("failed to detect os")
}
else {
var charset = results.get_unicharset()
console.log(charset)
// results.print_scores()
var best = results.get_best_result()
var oid = best.get_orientation_id(),
sid = best.get_script_id();
// console.log('orientation id', oid, [0, 270, 180, 90][oid], best.get_oconfidence())
// console.log('script id', sid, charset.get_script_from_script_id(sid), best.get_sconfidence())
// console.log(best)
cb(null, {
tesseract_script_id: sid,
script: charset.get_script_from_script_id(sid),
script_confidence: best.get_sconfidence(),
orientation_degrees: [0, 270, 180, 90][oid],
orientation_confidence: best.get_oconfidence()
})
base.End();
module._free(ptr);
}
}
})
}

163
src/worker/dump.js

@ -0,0 +1,163 @@
function deindent(html){
var lines = html.split('\n')
if(lines[0].substring(0,2) === " "){
for (var i = 0; i < lines.length; i++) {
if (lines[i].substring(0,2) === " ") {
lines[i] = lines[i].slice(2)
}
};
}
return lines.join('\n')
}
export default function DumpLiterallyEverything(module, base){
var ri = base.GetIterator();
var blocks = [];
var block, para, textline, word, symbol;
function enumToString(value, prefix){
return (Object.keys(module)
.filter(function(e){ return e.substr(0, prefix.length + 1) == prefix + '_' })
.filter(function(e){ return module[e] === value })
.map(function(e){ return e.slice(prefix.length + 1) })[0])
}
const {RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD, RIL_SYMBOL} = module
ri.Begin()
do {
if(ri.IsAtBeginningOf(RIL_BLOCK)){
var poly = ri.BlockPolygon();
var polygon = null;
// BlockPolygon() returns null when automatic page segmentation is off
if(module.getPointer(poly) > 0){
var n = poly.get_n(),
px = poly.get_x(),
py = poly.get_y(),
polygon = [];
for(var i = 0; i < n; i++){
polygon.push([px.getValue(i), py.getValue(i)]);
}
module._ptaDestroy(module.getPointer(poly));
}
block = {
paragraphs: [],
text: ri.GetUTF8Text(RIL_BLOCK),
confidence: ri.Confidence(RIL_BLOCK),
baseline: ri.getBaseline(RIL_BLOCK),
bbox: ri.getBoundingBox(RIL_BLOCK),
blocktype: enumToString(ri.BlockType(), 'PT'),
polygon: polygon
}
blocks.push(block)
}
if(ri.IsAtBeginningOf(RIL_PARA)){
para = {
lines: [],
text: ri.GetUTF8Text(RIL_PARA),
confidence: ri.Confidence(RIL_PARA),
baseline: ri.getBaseline(RIL_PARA),
bbox: ri.getBoundingBox(RIL_PARA),
is_ltr: !!ri.ParagraphIsLtr()
}
block.paragraphs.push(para)
}
if(ri.IsAtBeginningOf(RIL_TEXTLINE)){
textline = {
words: [],
text: ri.GetUTF8Text(RIL_TEXTLINE),
confidence: ri.Confidence(RIL_TEXTLINE),
baseline: ri.getBaseline(RIL_TEXTLINE),
bbox: ri.getBoundingBox(RIL_TEXTLINE)
}
para.lines.push(textline)
}
if(ri.IsAtBeginningOf(RIL_WORD)){
var fontInfo = ri.getWordFontAttributes(),
wordDir = ri.WordDirection();
word = {
symbols: [],
choices: [],
text: ri.GetUTF8Text(RIL_WORD),
confidence: ri.Confidence(RIL_WORD),
baseline: ri.getBaseline(RIL_WORD),
bbox: ri.getBoundingBox(RIL_WORD),
is_numeric: !!ri.WordIsNumeric(),
in_dictionary: !!ri.WordIsFromDictionary(),
direction: enumToString(wordDir, 'DIR'),
language: ri.WordRecognitionLanguage(),
is_bold: fontInfo.is_bold,
is_italic: fontInfo.is_italic,
is_underlined: fontInfo.is_underlined,
is_monospace: fontInfo.is_monospace,
is_serif: fontInfo.is_serif,
is_smallcaps: fontInfo.is_smallcaps,
font_size: fontInfo.pointsize,
font_id: fontInfo.font_id,
font_name: fontInfo.font_name,
}
var wc = new module.WordChoiceIterator(ri);
do {
word.choices.push({
text: wc.GetUTF8Text(),
confidence: wc.Confidence()
})
} while (wc.Next());
module.destroy(wc)
textline.words.push(word)
}
var image = null;
// var pix = ri.GetBinaryImage(RIL_SYMBOL)
// var image = pix2array(pix);
// // for some reason it seems that things stop working if you destroy pics
// module._pixDestroy(module.getPointer(pix));
if(ri.IsAtBeginningOf(RIL_SYMBOL)){
symbol = {
choices: [],
image: image,
text: ri.GetUTF8Text(RIL_SYMBOL),
confidence: ri.Confidence(RIL_SYMBOL),
baseline: ri.getBaseline(RIL_SYMBOL),
bbox: ri.getBoundingBox(RIL_SYMBOL),
is_superscript: !!ri.SymbolIsSuperscript(),
is_subscript: !!ri.SymbolIsSubscript(),
is_dropcap: !!ri.SymbolIsDropcap(),
}
word.symbols.push(symbol)
var ci = new module.ChoiceIterator(ri);
do {
symbol.choices.push({
text: ci.GetUTF8Text(),
confidence: ci.Confidence()
})
} while (ci.Next());
module.destroy(ci)
}
} while (ri.Next(RIL_SYMBOL));
module.destroy(ri)
return {
text: base.GetUTF8Text(),
html: deindent(base.GetHOCRText()),
confidence: base.MeanTextConf(),
blocks: blocks,
psm: enumToString(base.GetPageSegMode(), 'PSM'),
oem: enumToString(base.oem(), 'OEM'),
version: base.Version(),
}
}

2
src/worker/fileSizes.js

@ -0,0 +1,2 @@
const fileSizes = {"afr": 1079573, "ara": 1701536, "aze": 1420865, "bel": 1276820, "ben": 6772012, "bul": 1605615, "cat": 1652368, "ces": 1035441, "chi_sim": 17710414, "chi_tra": 24717749, "chr": 320649, "dan-frak": 677656, "dan": 1972936, "deu-frak": 822644, "deu": 991656, "ell": 859719, "eng": 9453554, "enm": 619254, "epo": 1241212, "equ": 821130, "est": 1905040, "eus": 1641190, "fin": 979418, "fra": 1376221, "frk": 5912963, "frm": 5147082, "glg": 1674938, "grc": 3012615, "heb": 1051501, "hin": 6590065, "hrv": 1926995, "hun": 3074473, "ind": 1874776, "isl": 1634041, "ita": 948593, "ita_old": 3436571, "jpn": 13507168, "kan": 4390317, "kor": 5353098, "lav": 1843944, "lit": 1779240, "mal": 5966263, "meme": 88453, "mkd": 1163087, "mlt": 1463001, "msa": 1665427, "nld": 1134708, "nor": 2191610, "osd": 4274649, "pol": 7024662, "por": 909359, "ron": 915680, "rus": 5969957, "slk-frak": 289885, "slk": 2217342, "slv": 1611338, "spa": 883170, "spa_old": 5647453, "sqi": 1667041, "srp": 1770244, "swa": 757916, "swe": 2451917, "tam": 3498763, "tel": 5795246, "tgl": 1496256, "tha": 3811136, "tur": 3563264, "ukr": 937566, "vie": 2195922}
export default fileSizes;

39
src/worker/index.js

@ -0,0 +1,39 @@
import TesseractCore from 'tesseract.js-core'
import pako from 'pako'
import recognize from './recognize'
import detect from './detect'
var module, base, jobId
onmessage = function(e) {
var {action, args} = e.data;
jobId = e.data.jobId
console.log('worker got action', action)
if(action == 'init'){
module = TesseractCore({
TOTAL_MEMORY: args.mem, //must be a multiple of 10 megabytes
TesseractProgress(percent){
postMessage({ jobId,
'progress': {
'recognized': Math.max(0,(percent-30)/70)
}
})
},
onRuntimeInitialized() {}
})
module.FS_createPath("/","tessdata",true,true)
base = new module.TessBaseAPI()
} else if(action === 'recognize'){
var {image, options} = args
recognize(jobId, module, base, image, options,
(error, result) => postMessage({jobId, error, result}))
} else if(action === 'detect'){
detect(jobId, module, base, args.image,
(error, result) => postMessage({jobId, error, result}))
}
}

88
src/worker/loadLanguage.js

@ -0,0 +1,88 @@
import pako from 'pako'
import db from './db'
import fileSizes from './fileSizes'
function getLanguageData(lang, progress, cb, url='https://cdn.rawgit.com/naptha/tessdata/gh-pages/3.02/'+lang+'.traineddata.gz'){
var xhr = new XMLHttpRequest();
xhr.responseType = 'arraybuffer';
xhr.open('GET', url, true);
xhr.onerror = e => {
xhr.onprogress = xhr.onload = null
cb(xhr, null)
}
xhr.onprogress = e => progress({
'loaded_lang_model': e.loaded/fileSizes[lang], //this is kinda wrong on safari
cached: false
})
xhr.onload = e => {
if (!(xhr.status == 200 || (xhr.status == 0 && xhr.response))) return cb(xhr, null);
progress({'unzipping_lang_model': true})
var response = new Uint8Array(xhr.response)
while(response[0] == 0x1f && response[1] == 0x8b) response = pako.ungzip(response);
progress({
'unzipped_lang_model': true,
'lang_model_size': response.length
})
cb(null, response)
}
progress({
'loaded_lang_model': 0,
cached: false,
requesting: true
})
xhr.send()
}
// var loaded_langs = []
export default function loadLanguage(lang, jobId, cb, url){
console.log('loadLanguage jobId', jobId)
// if(loaded_langs.indexOf(lang) != -1) return cb(null, lang);
function progressMessage(progress){
postMessage({ jobId, progress })
}
function finish(err, data) {
if(err) return cb(err);
// loaded_langs.push(lang)
cb(null, data)
}
function createDataFile(err, data){
progressMessage({ created_virtual_datafile: true})
finish(err, data)
}
function createDataFileCached(err, data) {
if(err) return createDataFile(err);
db.put(lang, data, err => console.log('cached', lang, err))
progressMessage({cached_lang: lang})
createDataFile(null, data)
}
db.open({compression: false}, err => {
if (err) return getLanguageData(lang, progressMessage, createDataFile, url);
db.get(lang, (err, data) => {
if (err) return getLanguageData(lang, progressMessage, createDataFileCached, url)
while(data[0] == 0x1f && data[1] == 0x8b) data = pako.ungzip(data);
progressMessage({ loaded_lang_model: lang, from_cache: true })
cb(null, data)
})
})
}

71
src/worker/recognize.js

@ -0,0 +1,71 @@
import desaturate from './desaturate'
import loadLanguage from './loadLanguage'
import circularize from './circularize'
import dump from './dump'
var loaded_langs = []
export default function recognize(jobId, module, base, image, options, cb){
console.log('recognize id', jobId)
var {lang} = options
var width = image.width, height = image.height;
image = desaturate(image)
var ptr = module.allocate(image, 'i8', module.ALLOC_NORMAL);
function run() {
base.Init(null, lang)
postMessage({
jobId,
'progress': {
'initialized_with_lang': lang
}
})
for (var option in options) {
if (options.hasOwnProperty(option)) {
base.SetVariable(option, options[option]);
postMessage({
jobId: jobId,
'progress': {
'set_variable': {
variable: option,
value: options[option]
}
}
})
}
}
base.SetImage(module.wrapPointer(ptr), width, height, 1, width)
base.SetRectangle(0, 0, width, height)
// base.GetUTF8Text()
base.Recognize(null)
var everything = circularize(dump(module, base))
base.End();
module._free(ptr);
cb(null, everything)
}
if(loaded_langs.indexOf(lang) == -1) loadLanguage(lang, jobId, function(err, result){
if(err){
console.error("error loading", lang);
module._free(ptr);
return cb(err, null);
}
loaded_langs.push(lang)
module.FS_createDataFile('tessdata', lang +".traineddata", result, true, false);
run()
})
else run();
}

42
webpack.config.dev.js

@ -0,0 +1,42 @@
var path = require('path');
var webpack = require('webpack');
function config({entry, output, include}) {
return {
devtool: 'cheap-module-eval-source-map',
entry,
output: Object.assign({}, output, {
path: path.join(__dirname, 'build'),
publicPath: '/build/',
}),
plugins: [
new webpack.NoErrorsPlugin()
],
module: {
loaders: [{
test: /\.js$/,
loaders: ['babel'],
include
}]
},
node: {
fs: "empty"
}
}
}
module.exports = [{
entry: './src/browser/index.js',
output: {
filename: 'tesseract.js',
library: "Tesseract",
libraryTarget: "umd"
},
include: [path.join(__dirname, 'src/browser')]
}, {
entry: './src/worker/index.js',
output: {
filename: 'tesseract.worker.js',
},
include: [path.join(__dirname, 'src/worker')]
}].map(config);

46
webpack.config.prod.js

@ -0,0 +1,46 @@
var path = require('path');
var webpack = require('webpack');
function config({entry, output, include}) {
return {
entry,
output: Object.assign({}, output, {
path: path.join(__dirname, 'dist')
}),
plugins: [
new webpack.optimize.OccurenceOrderPlugin(),
new webpack.optimize.DedupePlugin(),
new webpack.optimize.UglifyJsPlugin({
compressor: {
warnings: false
}
})
],
module: {
loaders: [{
test: /\.js$/,
loaders: ['babel'],
include
}]
},
node: {
fs: "empty"
}
}
}
module.exports = [{
entry: './src/browser/index.js',
output: {
filename: 'tesseract.js',
library: "Tesseract",
libraryTarget: "umd"
},
include: [path.join(__dirname, 'src/browser')]
}, {
entry: './src/worker/index.js',
output: {
filename: 'tesseract.worker.js',
},
include: [path.join(__dirname, 'src/worker')]
}].map(config);
Loading…
Cancel
Save