You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
52 lines
1.9 KiB
52 lines
1.9 KiB
<html> |
|
<head> |
|
<script src="/dist/tesseract.dev.js"></script> |
|
</head> |
|
<body> |
|
<div> |
|
<input type="file" id="uploader"> |
|
<button id="download-pdf" disabled="true">Download PDF</button> |
|
</div> |
|
<textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea> |
|
<script> |
|
const { createWorker } = Tesseract; |
|
const worker = createWorker({ |
|
corePath: '/node_modules/tesseract.js-core/tesseract-core.wasm.js', |
|
logger: m => console.log(m), |
|
}); |
|
const uploader = document.getElementById('uploader'); |
|
const dlBtn = document.getElementById('download-pdf'); |
|
const recognize = async ({ target: { files } }) => { |
|
await worker.load(); |
|
await worker.loadLanguage('eng'); |
|
await worker.initialize('eng'); |
|
const { data: { text } } = await worker.recognize(files[0]); |
|
const board = document.getElementById('board'); |
|
board.value = text; |
|
dlBtn.disabled = false; |
|
}; |
|
const downloadPDF = async () => { |
|
const filename = 'tesseract-ocr-result.pdf'; |
|
const { data } = await worker.getPDF('Tesseract OCR Result'); |
|
const blob = new Blob([new Uint8Array(data)], { type: 'application/pdf' }); |
|
if (navigator.msSaveBlob) { |
|
// IE 10+ |
|
navigator.msSaveBlob(blob, filename); |
|
} else { |
|
const link = document.createElement('a'); |
|
if (link.download !== undefined) { |
|
const url = URL.createObjectURL(blob); |
|
link.setAttribute('href', url); |
|
link.setAttribute('download', filename); |
|
link.style.visibility = 'hidden'; |
|
document.body.appendChild(link); |
|
link.click(); |
|
document.body.removeChild(link); |
|
} |
|
} |
|
}; |
|
uploader.addEventListener('change', recognize); |
|
dlBtn.addEventListener('click', downloadPDF); |
|
</script> |
|
</body> |
|
</html>
|
|
|