Jerome Wu
5 years ago
13 changed files with 117 additions and 45 deletions
@ -0,0 +1,52 @@
@@ -0,0 +1,52 @@
|
||||
<html> |
||||
<head> |
||||
<script src="/dist/tesseract.dev.js"></script> |
||||
</head> |
||||
<body> |
||||
<div> |
||||
<input type="file" id="uploader"> |
||||
<button id="download-pdf" disabled="true">Download PDF</button> |
||||
</div> |
||||
<textarea id="board" readonly rows="8" cols="80">Upload an image file</textarea> |
||||
<script> |
||||
const { createWorker } = Tesseract; |
||||
const worker = createWorker({ |
||||
corePath: '/node_modules/tesseract.js-core/tesseract-core.wasm.js', |
||||
logger: m => console.log(m), |
||||
}); |
||||
const uploader = document.getElementById('uploader'); |
||||
const dlBtn = document.getElementById('download-pdf'); |
||||
const recognize = async ({ target: { files } }) => { |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize(files[0]); |
||||
const board = document.getElementById('board'); |
||||
board.value = text; |
||||
dlBtn.disabled = false; |
||||
}; |
||||
const downloadPDF = async () => { |
||||
const filename = 'tesseract-ocr-result.pdf'; |
||||
const { data } = await worker.getPDF('Tesseract OCR Result'); |
||||
const blob = new Blob([new Uint8Array(data)], { type: 'application/pdf' }); |
||||
if (navigator.msSaveBlob) { |
||||
// IE 10+ |
||||
navigator.msSaveBlob(blob, filename); |
||||
} else { |
||||
const link = document.createElement('a'); |
||||
if (link.download !== undefined) { |
||||
const url = URL.createObjectURL(blob); |
||||
link.setAttribute('href', url); |
||||
link.setAttribute('download', filename); |
||||
link.style.visibility = 'hidden'; |
||||
document.body.appendChild(link); |
||||
link.click(); |
||||
document.body.removeChild(link); |
||||
} |
||||
} |
||||
}; |
||||
uploader.addEventListener('change', recognize); |
||||
dlBtn.addEventListener('click', downloadPDF); |
||||
</script> |
||||
</body> |
||||
</html> |
@ -0,0 +1,22 @@
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env node
|
||||
const path = require('path'); |
||||
const fs = require('fs'); |
||||
const { createWorker } = require('../../'); |
||||
|
||||
const [,, imagePath] = process.argv; |
||||
const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/cosmic.png')); |
||||
|
||||
console.log(`Recognizing ${image}`); |
||||
|
||||
(async () => { |
||||
const worker = createWorker(); |
||||
await worker.load(); |
||||
await worker.loadLanguage('eng'); |
||||
await worker.initialize('eng'); |
||||
const { data: { text } } = await worker.recognize(image); |
||||
console.log(text); |
||||
const { data } = await worker.getPDF('Tesseract OCR Result'); |
||||
fs.writeFileSync('tesseract-ocr-result.pdf', Buffer.from(data)); |
||||
console.log('Generate PDF: tesseract-ocr-result.pdf'); |
||||
await worker.terminate(); |
||||
})(); |
@ -1,2 +1,9 @@
@@ -1,2 +1,9 @@
|
||||
module.exports = (typeof process.env !== 'undefined' && process.env.NODE_ENV === 'development') |
||||
? console.log : () => {}; |
||||
let logging = false; |
||||
|
||||
exports.logging = logging; |
||||
|
||||
exports.setLogging = (_logging) => { |
||||
logging = _logging; |
||||
}; |
||||
|
||||
exports.log = (...args) => (logging ? console.log.apply(this, args) : null); |
||||
|
Loading…
Reference in new issue