Jerome Wu
5 years ago
27 changed files with 300 additions and 344 deletions
@ -1,2 +1,19 @@ |
|||||||
<script src="/dist/tesseract.dev.js"></script> |
<html> |
||||||
<input type="file" onchange="const worker = new Tesseract.TesseractWorker({ corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js' });worker.recognize(this.files[0]).progress(function(data){console.log(data)}).then(function(data){console.log(data)})"> |
<head> |
||||||
|
<script src="/dist/tesseract.dev.js"></script> |
||||||
|
</head> |
||||||
|
<body> |
||||||
|
<input type="file" id="uploader"> |
||||||
|
<script> |
||||||
|
const recognize = async ({ target: { files } }) => { |
||||||
|
const { text } = await Tesseract.recognize(files[0], 'eng', { |
||||||
|
corePath: '../../node_modules/tesseract.js-core/tesseract-core.wasm.js', |
||||||
|
logger: m => console.log(m), |
||||||
|
}); |
||||||
|
console.log(text); |
||||||
|
} |
||||||
|
const elm = document.getElementById('uploader'); |
||||||
|
elm.addEventListener('change', recognize); |
||||||
|
</script> |
||||||
|
</body> |
||||||
|
</html> |
||||||
|
@ -1,18 +1,13 @@ |
|||||||
#!/usr/bin/env node
|
#!/usr/bin/env node
|
||||||
const path = require('path'); |
const path = require('path'); |
||||||
const { TesseractWorker } = require('../../'); |
const Tesseract = require('../../'); |
||||||
|
|
||||||
const [,, imagePath] = process.argv; |
const [,, imagePath] = process.argv; |
||||||
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
||||||
const tessWorker = new TesseractWorker(); |
|
||||||
|
|
||||||
console.log(`Detecting ${image}`); |
console.log(`Recognizing ${image}`); |
||||||
|
|
||||||
tessWorker.detect(image) |
Tesseract.detect(image, { logger: m => console.log(m) }) |
||||||
.progress((info) => { |
|
||||||
console.log(info); |
|
||||||
}) |
|
||||||
.then((data) => { |
.then((data) => { |
||||||
console.log('done', data); |
console.log(data); |
||||||
process.exit(); |
|
||||||
}); |
}); |
||||||
|
@ -0,0 +1,2 @@ |
|||||||
|
module.exports = (typeof process.env !== 'undefined' && process.env.NODE_ENV === 'development') |
||||||
|
? console.log : () => {}; |
@ -0,0 +1,10 @@ |
|||||||
|
const { set, get, del } = require('idb-keyval'); |
||||||
|
|
||||||
|
module.exports = { |
||||||
|
readCache: get, |
||||||
|
writeCache: set, |
||||||
|
deleteCache: del, |
||||||
|
checkCache: path => ( |
||||||
|
get(path).then(v => typeof v !== 'undefined') |
||||||
|
), |
||||||
|
}; |
@ -0,0 +1,19 @@ |
|||||||
|
module.exports = (corePath, res) => { |
||||||
|
if (typeof global.TesseractCore === 'undefined') { |
||||||
|
res.progress({ status: 'loading tesseract core', progress: 0 }); |
||||||
|
global.importScripts(corePath); |
||||||
|
/* |
||||||
|
* Depending on whether the browser supports WebAssembly, |
||||||
|
* the version of the TesseractCore will be different. |
||||||
|
*/ |
||||||
|
if (typeof global.TesseractCoreWASM !== 'undefined' && typeof WebAssembly === 'object') { |
||||||
|
global.TesseractCore = global.TesseractCoreWASM; |
||||||
|
} else if (typeof global.TesseractCoreASM !== 'undefined') { |
||||||
|
global.TesseractCore = global.TesseractCoreASM; |
||||||
|
} else { |
||||||
|
throw Error('Failed to load TesseractCore'); |
||||||
|
} |
||||||
|
res.progress({ status: 'loading tesseract core', progress: 1 }); |
||||||
|
} |
||||||
|
return global.TesseractCore; |
||||||
|
}; |
@ -0,0 +1 @@ |
|||||||
|
module.exports = require('zlibjs').gunzipSync; |
@ -0,0 +1,33 @@ |
|||||||
|
/** |
||||||
|
* |
||||||
|
* Browser worker scripts |
||||||
|
* |
||||||
|
* @fileoverview Browser worker implementation |
||||||
|
* @author Kevin Kwok <antimatter15@gmail.com> |
||||||
|
* @author Guillermo Webster <gui@mit.edu> |
||||||
|
* @author Jerome Wu <jeromewus@gmail.com> |
||||||
|
*/ |
||||||
|
|
||||||
|
const worker = require('../'); |
||||||
|
const getCore = require('./getCore'); |
||||||
|
const gunzip = require('./gunzip'); |
||||||
|
const resolveURL = require('./resolveURL'); |
||||||
|
const cache = require('./cache'); |
||||||
|
|
||||||
|
/* |
||||||
|
* register message handler |
||||||
|
*/ |
||||||
|
global.addEventListener('message', ({ data }) => { |
||||||
|
worker.dispatchHandlers(data, obj => postMessage(obj)); |
||||||
|
}); |
||||||
|
|
||||||
|
/* |
||||||
|
* getCore is a sync function to load and return |
||||||
|
* TesseractCore. |
||||||
|
*/ |
||||||
|
worker.setAdapter({ |
||||||
|
getCore, |
||||||
|
gunzip, |
||||||
|
resolveURL, |
||||||
|
...cache, |
||||||
|
}); |
@ -1 +0,0 @@ |
|||||||
module.exports = s => new Uint8Array(atob(s).split('').map(c => c.charCodeAt(0))); |
|
@ -0,0 +1,18 @@ |
|||||||
|
const resolveURL = require('resolve-url'); |
||||||
|
const { version } = require('../../../package.json'); |
||||||
|
const defaultOptions = require('../../constants/defaultOptions'); |
||||||
|
|
||||||
|
/* |
||||||
|
* Default options for browser worker |
||||||
|
*/ |
||||||
|
module.exports = { |
||||||
|
...defaultOptions, |
||||||
|
workerPath: (typeof process !== 'undefined' && process.env.TESS_ENV === 'development') |
||||||
|
? resolveURL(`/dist/worker.dev.js?nocache=${Math.random().toString(36).slice(3)}`) |
||||||
|
: `https://unpkg.com/tesseract.js@v${version}/dist/worker.min.js`, |
||||||
|
/* |
||||||
|
* If browser doesn't support WebAssembly, |
||||||
|
* load ASM version instead |
||||||
|
*/ |
||||||
|
corePath: `https://unpkg.com/tesseract.js-core@v2.0.0-beta.10/tesseract-core.${typeof WebAssembly === 'object' ? 'wasm' : 'asm'}.js`, |
||||||
|
}; |
@ -0,0 +1,5 @@ |
|||||||
|
module.exports = (worker, handler) => { |
||||||
|
worker.onmessage = ({ data }) => { // eslint-disable-line
|
||||||
|
handler(data); |
||||||
|
}; |
||||||
|
}; |
@ -0,0 +1,92 @@ |
|||||||
|
const axios = require('axios'); |
||||||
|
const resolveURL = require('resolve-url'); |
||||||
|
|
||||||
|
/** |
||||||
|
* readFromBlobOrFile |
||||||
|
* |
||||||
|
* @name readFromBlobOrFile |
||||||
|
* @function |
||||||
|
* @access private |
||||||
|
* @param {object} blob A blob or file objec to read |
||||||
|
* @param {function} res callback function after reading completes |
||||||
|
*/ |
||||||
|
const readFromBlobOrFile = blob => ( |
||||||
|
new Promise((resolve, reject) => { |
||||||
|
const fileReader = new FileReader(); |
||||||
|
fileReader.onload = () => { |
||||||
|
resolve(fileReader.result); |
||||||
|
}; |
||||||
|
fileReader.onerror = ({ target: { error: { code } } }) => { |
||||||
|
reject(Error(`File could not be read! Code=${code}`)); |
||||||
|
}; |
||||||
|
fileReader.readAsArrayBuffer(blob); |
||||||
|
}) |
||||||
|
); |
||||||
|
|
||||||
|
/** |
||||||
|
* loadImage |
||||||
|
* |
||||||
|
* @name loadImage |
||||||
|
* @function load image from different source |
||||||
|
* @access private |
||||||
|
* @param {string, object} image - image source, supported formats: |
||||||
|
* string: URL string, can be relative path |
||||||
|
* string: base64 image |
||||||
|
* img HTMLElement: extract image source from src attribute |
||||||
|
* video HTMLElement: extract image source from poster attribute |
||||||
|
* canvas HTMLElement: extract image data by converting to Blob |
||||||
|
* File instance: data from <input type="file" /> |
||||||
|
* @returns {array} binary image in array format |
||||||
|
*/ |
||||||
|
const loadImage = async (image) => { |
||||||
|
let data = image; |
||||||
|
if (typeof image === 'undefined') { |
||||||
|
return 'undefined'; |
||||||
|
} |
||||||
|
|
||||||
|
if (typeof image === 'string') { |
||||||
|
// Base64 Image
|
||||||
|
if (/data:image\/([a-zA-Z]*);base64,([^"]*)/.test(image)) { |
||||||
|
data = atob(image.split(',')[1]) |
||||||
|
.split('') |
||||||
|
.map(c => c.charCodeAt(0)); |
||||||
|
} else { |
||||||
|
const { data: _data } = await axios.get(resolveURL(image), { responseType: 'arraybuffer' }); |
||||||
|
data = _data; |
||||||
|
} |
||||||
|
} else if (image instanceof HTMLElement) { |
||||||
|
if (image.tagName === 'IMG') { |
||||||
|
data = loadImage(image.src); |
||||||
|
} |
||||||
|
if (image.tagName === 'VIDEO') { |
||||||
|
data = loadImage(image.poster); |
||||||
|
} |
||||||
|
if (image.tagName === 'CANVAS') { |
||||||
|
await new Promise((resolve) => { |
||||||
|
image.toBlob(async (blob) => { |
||||||
|
data = await readFromBlobOrFile(blob); |
||||||
|
resolve(); |
||||||
|
}); |
||||||
|
}); |
||||||
|
} |
||||||
|
} else if (image instanceof File || image instanceof Blob) { |
||||||
|
data = await readFromBlobOrFile(image); |
||||||
|
} |
||||||
|
|
||||||
|
return new Uint8Array(data); |
||||||
|
}; |
||||||
|
|
||||||
|
/** |
||||||
|
* sendPacket |
||||||
|
* |
||||||
|
* @name sendPacket |
||||||
|
* @function send packet to worker and create a job |
||||||
|
* @access public |
||||||
|
* @param {object} instance TesseractWorker instance |
||||||
|
* @param {object} iPacket data for worker |
||||||
|
*/ |
||||||
|
module.exports = async (worker, _packet) => { |
||||||
|
const packet = { ..._packet }; |
||||||
|
packet.payload.image = await loadImage(packet.payload.image); |
||||||
|
worker.postMessage(packet); |
||||||
|
}; |
@ -0,0 +1,23 @@ |
|||||||
|
/** |
||||||
|
* spawnWorker |
||||||
|
* |
||||||
|
* @name spawnWorker |
||||||
|
* @function create a new Worker in browser |
||||||
|
* @access public |
||||||
|
* @param {object} options |
||||||
|
* @param {string} options.workerPath - worker script path |
||||||
|
* @param {boolean} options.workerBlobURL - Use a blob:// URL for the worker script
|
||||||
|
*/ |
||||||
|
module.exports = ({ workerPath, workerBlobURL }) => { |
||||||
|
let worker; |
||||||
|
if (Blob && URL && workerBlobURL) { |
||||||
|
const blob = new Blob([`importScripts("${workerPath}");`], { |
||||||
|
type: 'application/javascript', |
||||||
|
}); |
||||||
|
worker = new Worker(URL.createObjectURL(blob)); |
||||||
|
} else { |
||||||
|
worker = new Worker(workerPath); |
||||||
|
} |
||||||
|
|
||||||
|
return worker; |
||||||
|
}; |
@ -0,0 +1,11 @@ |
|||||||
|
/** |
||||||
|
* terminateWorker |
||||||
|
* |
||||||
|
* @name terminateWorker |
||||||
|
* @function terminate worker |
||||||
|
* @access public |
||||||
|
* @param {object} instance TesseractWorker instance |
||||||
|
*/ |
||||||
|
module.exports = (worker) => { |
||||||
|
worker.terminate(); |
||||||
|
}; |
@ -1,55 +0,0 @@ |
|||||||
/** |
|
||||||
* |
|
||||||
* Browser worker scripts |
|
||||||
* |
|
||||||
* @fileoverview Browser worker implementation |
|
||||||
* @author Kevin Kwok <antimatter15@gmail.com> |
|
||||||
* @author Guillermo Webster <gui@mit.edu> |
|
||||||
* @author Jerome Wu <jeromewus@gmail.com> |
|
||||||
*/ |
|
||||||
|
|
||||||
const check = require('check-types'); |
|
||||||
const workerWrapper = require('../../workerWrapper'); |
|
||||||
const b64toU8Array = require('./b64toU8Array'); |
|
||||||
|
|
||||||
/* |
|
||||||
* register message handler |
|
||||||
*/ |
|
||||||
global.addEventListener('message', ({ data }) => { |
|
||||||
workerWrapper.dispatchHandlers(data, obj => postMessage(obj)); |
|
||||||
}); |
|
||||||
|
|
||||||
/* |
|
||||||
* getCore is a sync function to load and return |
|
||||||
* TesseractCore. |
|
||||||
*/ |
|
||||||
workerWrapper.setAdapter({ |
|
||||||
getCore: (corePath, res) => { |
|
||||||
if (check.undefined(global.TesseractCore)) { |
|
||||||
res.progress({ status: 'loading tesseract core', progress: 0 }); |
|
||||||
global.importScripts(corePath); |
|
||||||
/* |
|
||||||
* Depending on whether the browser supports WebAssembly, |
|
||||||
* the version of the TesseractCore will be different. |
|
||||||
*/ |
|
||||||
if (check.not.undefined(global.TesseractCoreWASM) && typeof WebAssembly === 'object') { |
|
||||||
global.TesseractCore = global.TesseractCoreWASM; |
|
||||||
} else if (check.not.undefined(global.TesseractCoreASM)) { |
|
||||||
global.TesseractCore = global.TesseractCoreASM; |
|
||||||
} else { |
|
||||||
throw Error('Failed to load TesseractCore'); |
|
||||||
} |
|
||||||
res.progress({ status: 'loading tesseract core', progress: 1 }); |
|
||||||
} |
|
||||||
return global.TesseractCore; |
|
||||||
}, |
|
||||||
b64toU8Array, |
|
||||||
writeFile: (path, data, type) => { |
|
||||||
postMessage({ |
|
||||||
jobId: 'Download', |
|
||||||
path, |
|
||||||
data, |
|
||||||
type, |
|
||||||
}); |
|
||||||
}, |
|
||||||
}); |
|
Loading…
Reference in new issue