Jerome Wu
5 years ago
13 changed files with 117 additions and 45 deletions
@ -0,0 +1,52 @@ |
|||||||
|
<html> |
||||||
|
<head> |
||||||
|
<script src="/dist/tesseract.dev.js"></script> |
||||||
|
</head> |
||||||
|
<body> |
||||||
|
<div> |
||||||
|
<input type="file" id="uploader"> |
||||||
|
<button id="download-pdf" disabled="true">Download PDF</button> |
||||||
|
</div> |
||||||
|
<textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea> |
||||||
|
<script> |
||||||
|
const { createWorker } = Tesseract; |
||||||
|
const worker = createWorker({ |
||||||
|
corePath: '/node_modules/tesseract.js-core/tesseract-core.wasm.js', |
||||||
|
logger: m => console.log(m), |
||||||
|
}); |
||||||
|
const uploader = document.getElementById('uploader'); |
||||||
|
const dlBtn = document.getElementById('download-pdf'); |
||||||
|
const recognize = async ({ target: { files } }) => { |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize(files[0]); |
||||||
|
const board = document.getElementById('board'); |
||||||
|
board.value = text; |
||||||
|
dlBtn.disabled = false; |
||||||
|
}; |
||||||
|
const downloadPDF = async () => { |
||||||
|
const filename = 'tesseract-ocr-result.pdf'; |
||||||
|
const { data } = await worker.getPDF('Tesseract OCR Result'); |
||||||
|
const blob = new Blob([new Uint8Array(data)], { type: 'application/pdf' }); |
||||||
|
if (navigator.msSaveBlob) { |
||||||
|
// IE 10+ |
||||||
|
navigator.msSaveBlob(blob, filename); |
||||||
|
} else { |
||||||
|
const link = document.createElement('a'); |
||||||
|
if (link.download !== undefined) { |
||||||
|
const url = URL.createObjectURL(blob); |
||||||
|
link.setAttribute('href', url); |
||||||
|
link.setAttribute('download', filename); |
||||||
|
link.style.visibility = 'hidden'; |
||||||
|
document.body.appendChild(link); |
||||||
|
link.click(); |
||||||
|
document.body.removeChild(link); |
||||||
|
} |
||||||
|
} |
||||||
|
}; |
||||||
|
uploader.addEventListener('change', recognize); |
||||||
|
dlBtn.addEventListener('click', downloadPDF); |
||||||
|
</script> |
||||||
|
</body> |
||||||
|
</html> |
@ -0,0 +1,22 @@ |
|||||||
|
#!/usr/bin/env node
|
||||||
|
const path = require('path'); |
||||||
|
const fs = require('fs'); |
||||||
|
const { createWorker } = require('../../'); |
||||||
|
|
||||||
|
const [,, imagePath] = process.argv; |
||||||
|
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
||||||
|
|
||||||
|
console.log(`Recognizing ${image}`); |
||||||
|
|
||||||
|
(async () => { |
||||||
|
const worker = createWorker(); |
||||||
|
await worker.load(); |
||||||
|
await worker.loadLanguage('eng'); |
||||||
|
await worker.initialize('eng'); |
||||||
|
const { data: { text } } = await worker.recognize(image); |
||||||
|
console.log(text); |
||||||
|
const { data } = await worker.getPDF('Tesseract OCR Result'); |
||||||
|
fs.writeFileSync('tesseract-ocr-result.pdf', Buffer.from(data)); |
||||||
|
console.log('Generate PDF: tesseract-ocr-result.pdf'); |
||||||
|
await worker.terminate(); |
||||||
|
})(); |
@ -1,2 +1,9 @@ |
|||||||
module.exports = (typeof process.env !== 'undefined' && process.env.NODE_ENV === 'development') |
let logging = false; |
||||||
? console.log : () => {}; |
|
||||||
|
exports.logging = logging; |
||||||
|
|
||||||
|
exports.setLogging = (_logging) => { |
||||||
|
logging = _logging; |
||||||
|
}; |
||||||
|
|
||||||
|
exports.log = (...args) => (logging ? console.log.apply(this, args) : null); |
||||||
|
Loading…
Reference in new issue