Jerome Wu
5 years ago
27 changed files with 300 additions and 344 deletions
@ -1,2 +1,19 @@
@@ -1,2 +1,19 @@
|
||||
<script src="/dist/tesseract.dev.js"></script> |
||||
<input type="file" onchange="const worker = new Tesseract.TesseractWorker({ corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js' });worker.recognize(this.files[0]).progress(function(data){console.log(data)}).then(function(data){console.log(data)})"> |
||||
<html> |
||||
<head> |
||||
<script src="/dist/tesseract.dev.js"></script> |
||||
</head> |
||||
<body> |
||||
<input type="file" id="uploader"> |
||||
<script> |
||||
const recognize = async ({ target: { files } }) => { |
||||
const { text } = await Tesseract.recognize(files[0], 'eng', { |
||||
corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js', |
||||
logger: m => console.log(m), |
||||
}); |
||||
console.log(text); |
||||
} |
||||
const elm = document.getElementById('uploader'); |
||||
elm.addEventListener('change', recognize); |
||||
</script> |
||||
</body> |
||||
</html> |
||||
|
@ -1,18 +1,13 @@
@@ -1,18 +1,13 @@
|
||||
#!/usr/bin/env node
|
||||
const path = require('path'); |
||||
const { TesseractWorker } = require('../../'); |
||||
const Tesseract = require('../../'); |
||||
|
||||
const [,, imagePath] = process.argv; |
||||
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
||||
const tessWorker = new TesseractWorker(); |
||||
|
||||
console.log(`Detecting ${image}`); |
||||
console.log(`Recognizing ${image}`); |
||||
|
||||
tessWorker.detect(image) |
||||
.progress((info) => { |
||||
console.log(info); |
||||
}) |
||||
Tesseract.detect(image, { logger: m => console.log(m) }) |
||||
.then((data) => { |
||||
console.log('done', data); |
||||
process.exit(); |
||||
console.log(data); |
||||
}); |
||||
|
@ -0,0 +1,2 @@
@@ -0,0 +1,2 @@
|
||||
module.exports = (typeof process.env !== 'undefined' && process.env.NODE_ENV === 'development') |
||||
? console.log : () => {}; |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
const { set, get, del } = require('idb-keyval'); |
||||
|
||||
module.exports = { |
||||
readCache: get, |
||||
writeCache: set, |
||||
deleteCache: del, |
||||
checkCache: path => ( |
||||
get(path).then(v => typeof v !== 'undefined') |
||||
), |
||||
}; |
@ -0,0 +1,19 @@
@@ -0,0 +1,19 @@
|
||||
module.exports = (corePath, res) => { |
||||
if (typeof global.TesseractCore === 'undefined') { |
||||
res.progress({ status: 'loading tesseract core', progress: 0 }); |
||||
global.importScripts(corePath); |
||||
/* |
||||
* Depending on whether the browser supports WebAssembly, |
||||
* the version of the TesseractCore will be different. |
||||
*/ |
||||
if (typeof global.TesseractCoreWASM !== 'undefined' && typeof WebAssembly === 'object') { |
||||
global.TesseractCore = global.TesseractCoreWASM; |
||||
} else if (typeof global.TesseractCoreASM !== 'undefined') { |
||||
global.TesseractCore = global.TesseractCoreASM; |
||||
} else { |
||||
throw Error('Failed to load TesseractCore'); |
||||
} |
||||
res.progress({ status: 'loading tesseract core', progress: 1 }); |
||||
} |
||||
return global.TesseractCore; |
||||
}; |
@ -0,0 +1 @@
@@ -0,0 +1 @@
|
||||
module.exports = require('zlibjs').gunzipSync; |
@ -0,0 +1,33 @@
@@ -0,0 +1,33 @@
|
||||
/** |
||||
* |
||||
* Browser worker scripts |
||||
* |
||||
* @fileoverview Browser worker implementation |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
|
||||
const worker = require('../'); |
||||
const getCore = require('./getCore'); |
||||
const gunzip = require('./gunzip'); |
||||
const resolveURL = require('./resolveURL'); |
||||
const cache = require('./cache'); |
||||
|
||||
/* |
||||
* register message handler |
||||
*/ |
||||
global.addEventListener('message', ({ data }) => { |
||||
worker.dispatchHandlers(data, obj => postMessage(obj)); |
||||
}); |
||||
|
||||
/* |
||||
* getCore is a sync function to load and return |
||||
* TesseractCore. |
||||
*/ |
||||
worker.setAdapter({ |
||||
getCore, |
||||
gunzip, |
||||
resolveURL, |
||||
...cache, |
||||
}); |
@ -1 +0,0 @@
@@ -1 +0,0 @@
|
||||
module.exports = s => new Uint8Array(atob(s).split('').map(c => c.charCodeAt(0))); |
@ -0,0 +1,18 @@
@@ -0,0 +1,18 @@
|
||||
const resolveURL = require('resolve-url'); |
||||
const { version } = require('../../../package.json'); |
||||
const defaultOptions = require('../../constants/defaultOptions'); |
||||
|
||||
/* |
||||
* Default options for browser worker |
||||
*/ |
||||
module.exports = { |
||||
...defaultOptions, |
||||
workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development') |
||||
? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`) |
||||
: `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`, |
||||
/* |
||||
* If browser doesn't support WebAssembly, |
||||
* load ASM version instead |
||||
*/ |
||||
corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`, |
||||
}; |
@ -0,0 +1,5 @@
@@ -0,0 +1,5 @@
|
||||
module.exports = (worker, handler) => { |
||||
worker.onmessage = ({ data }) => { // eslint-disable-line
|
||||
handler(data); |
||||
}; |
||||
}; |
@ -0,0 +1,92 @@
@@ -0,0 +1,92 @@
|
||||
const axios = require('axios'); |
||||
const resolveURL = require('resolve-url'); |
||||
|
||||
/** |
||||
* readFromBlobOrFile |
||||
* |
||||
* @name readFromBlobOrFile |
||||
* @function |
||||
* @access private |
||||
* @param {object} blob A blob or file objec to read |
||||
* @param {function} res callback function after reading completes |
||||
*/ |
||||
const readFromBlobOrFile = blob => ( |
||||
new Promise((resolve, reject) => { |
||||
const fileReader = new FileReader(); |
||||
fileReader.onload = () => { |
||||
resolve(fileReader.result); |
||||
}; |
||||
fileReader.onerror = ({ target: { error: { code } } }) => { |
||||
reject(Error(`File could not be read! Code=${code}`)); |
||||
}; |
||||
fileReader.readAsArrayBuffer(blob); |
||||
}) |
||||
); |
||||
|
||||
/** |
||||
* loadImage |
||||
* |
||||
* @name loadImage |
||||
* @function load image from different source |
||||
* @access private |
||||
* @param {string, object} image - image source, supported formats: |
||||
* string: URL string, can be relative path |
||||
* string: base64 image |
||||
* img HTMLElement: extract image source from src attribute |
||||
* video HTMLElement: extract image source from poster attribute |
||||
* canvas HTMLElement: extract image data by converting to Blob |
||||
* File instance: data from <input type="file" /> |
||||
* @returns {array} binary image in array format |
||||
*/ |
||||
const loadImage = async (image) => { |
||||
let data = image; |
||||
if (typeof image === 'undefined') { |
||||
return 'undefined'; |
||||
} |
||||
|
||||
if (typeof image === 'string') { |
||||
// Base64 Image
|
||||
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { |
||||
data = atob(image.split(',')[1]) |
||||
.split('') |
||||
.map(c => c.charCodeAt(0)); |
||||
} else { |
||||
const { data: _data } = await axios.get(resolveURL(image), { responseType: 'arraybuffer' }); |
||||
data = _data; |
||||
} |
||||
} else if (image instanceof HTMLElement) { |
||||
if (image.tagName === 'IMG') { |
||||
data = loadImage(image.src); |
||||
} |
||||
if (image.tagName === 'VIDEO') { |
||||
data = loadImage(image.poster); |
||||
} |
||||
if (image.tagName === 'CANVAS') { |
||||
await new Promise((resolve) => { |
||||
image.toBlob(async (blob) => { |
||||
data = await readFromBlobOrFile(blob); |
||||
resolve(); |
||||
}); |
||||
}); |
||||
} |
||||
} else if (image instanceof File || image instanceof Blob) { |
||||
data = await readFromBlobOrFile(image); |
||||
} |
||||
|
||||
return new Uint8Array(data); |
||||
}; |
||||
|
||||
/** |
||||
* sendPacket |
||||
* |
||||
* @name sendPacket |
||||
* @function send packet to worker and create a job |
||||
* @access public |
||||
* @param {object} instance TesseractWorker instance |
||||
* @param {object} iPacket data for worker |
||||
*/ |
||||
module.exports = async (worker, _packet) => { |
||||
const packet = { ..._packet }; |
||||
packet.payload.image = await loadImage(packet.payload.image); |
||||
worker.postMessage(packet); |
||||
}; |
@ -0,0 +1,23 @@
@@ -0,0 +1,23 @@
|
||||
/** |
||||
* spawnWorker |
||||
* |
||||
* @name spawnWorker |
||||
* @function create a new Worker in browser |
||||
* @access public |
||||
* @param {object} options |
||||
* @param {string} options.workerPath - worker script path |
||||
* @param {boolean} options.workerBlobURL - Use a blob:// URL for the worker script
|
||||
*/ |
||||
module.exports = ({ workerPath, workerBlobURL }) => { |
||||
let worker; |
||||
if (Blob && URL && workerBlobURL) { |
||||
const blob = new Blob([`importScripts("${workerPath}");`], { |
||||
type: 'application/javascript', |
||||
}); |
||||
worker = new Worker(URL.createObjectURL(blob)); |
||||
} else { |
||||
worker = new Worker(workerPath); |
||||
} |
||||
|
||||
return worker; |
||||
}; |
@ -0,0 +1,11 @@
@@ -0,0 +1,11 @@
|
||||
/** |
||||
* terminateWorker |
||||
* |
||||
* @name terminateWorker |
||||
* @function terminate worker |
||||
* @access public |
||||
* @param {object} instance TesseractWorker instance |
||||
*/ |
||||
module.exports = (worker) => { |
||||
worker.terminate(); |
||||
}; |
@ -1,55 +0,0 @@
@@ -1,55 +0,0 @@
|
||||
/** |
||||
* |
||||
* Browser worker scripts |
||||
* |
||||
* @fileoverview Browser worker implementation |
||||
* @author Kevin Kwok <antimatter15@gmail.com> |
||||
* @author Guillermo Webster <gui@mit.edu> |
||||
* @author Jerome Wu <jeromewus@gmail.com> |
||||
*/ |
||||
|
||||
const check = require('check-types'); |
||||
const workerWrapper = require('../../workerWrapper'); |
||||
const b64toU8Array = require('./b64toU8Array'); |
||||
|
||||
/* |
||||
* register message handler |
||||
*/ |
||||
global.addEventListener('message', ({ data }) => { |
||||
workerWrapper.dispatchHandlers(data, obj => postMessage(obj)); |
||||
}); |
||||
|
||||
/* |
||||
* getCore is a sync function to load and return |
||||
* TesseractCore. |
||||
*/ |
||||
workerWrapper.setAdapter({ |
||||
getCore: (corePath, res) => { |
||||
if (check.undefined(global.TesseractCore)) { |
||||
res.progress({ status: 'loading tesseract core', progress: 0 }); |
||||
global.importScripts(corePath); |
||||
/* |
||||
* Depending on whether the browser supports WebAssembly, |
||||
* the version of the TesseractCore will be different. |
||||
*/ |
||||
if (check.not.undefined(global.TesseractCoreWASM) && typeof WebAssembly === 'object') { |
||||
global.TesseractCore = global.TesseractCoreWASM; |
||||
} else if (check.not.undefined(global.TesseractCoreASM)) { |
||||
global.TesseractCore = global.TesseractCoreASM; |
||||
} else { |
||||
throw Error('Failed to load TesseractCore'); |
||||
} |
||||
res.progress({ status: 'loading tesseract core', progress: 1 }); |
||||
} |
||||
return global.TesseractCore; |
||||
}, |
||||
b64toU8Array, |
||||
writeFile: (path, data, type) => { |
||||
postMessage({ |
||||
jobId: 'Download', |
||||
path, |
||||
data, |
||||
type, |
||||
}); |
||||
}, |
||||
}); |
Loading…
Reference in new issue