diff --git a/package-lock.json b/package-lock.json index 667a512..e791c84 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4822,9 +4822,19 @@ } }, "tesseract.js-core": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-1.0.2.tgz", - "integrity": "sha1-HOM6KdyBI1e4NWow7e34sYgEOzc=" + "version": "2.0.0-beta.2", + "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-2.0.0-beta.2.tgz", + "integrity": "sha512-aExaof0ZxYc2q0T7HN26wxcARN/N+0q9z9gjkyaEFsuUj82VtUZr0A6yDQ0rpQyIdk22K15fASUX5XcwFIb0rw==", + "requires": { + "node-fetch": "^2.3.0" + }, + "dependencies": { + "node-fetch": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.3.0.tgz", + "integrity": "sha512-MOd8pV3fxENbryESLgVIeaGKrdl+uaYhCSSVkjeOb/31/njTpcis5aWfdqgNlHIrKOLRbMnfPINPOML2CIFeXA==" + } + } }, "text-table": { "version": "0.2.0", diff --git a/package.json b/package.json index 8c3ce5f..7f4b85b 100644 --- a/package.json +++ b/package.json @@ -42,7 +42,7 @@ "node-fetch": "^1.6.3", "object-assign": "^4.1.0", "png.js": "^0.2.1", - "tesseract.js-core": "^1.0.2" + "tesseract.js-core": "^2.0.0-beta.2" }, "repository": { "type": "git", diff --git a/src/common/worker.js b/src/common/worker.js index 83eda82..d97d21c 100644 --- a/src/common/worker.js +++ b/src/common/worker.js @@ -50,31 +50,28 @@ function handleInit(req, res){ if(!Module || Module.TOTAL_MEMORY < MIN_MEMORY){ var Core = adapter.getCore(req, res); - res.progress({ status: 'initializing tesseract', progress: 0 }) + res.progress({ status: 'initializing tesseract', progress: 0 }) Module = Core({ TOTAL_MEMORY: MIN_MEMORY, TesseractProgress(percent){ latestJob.progress({ status: 'recognizing text', progress: Math.max(0, (percent-30)/70) }); }, - onRuntimeInitialized() {} }); - - Module.FS_createPath("/", "tessdata", true, true); - base = new Module.TessBaseAPI(); - res.progress({ status: 'initializing tesseract', progress: 1 }); + base = new Module.TessBaseAPI(); + res.progress({ status: 'initializing tesseract', progress: 1 }); } } -function setImage(Module, base, image){ - var imgbin = desaturate(image), - width = image.width, - height = image.height; +function setImage(Module, base, image) { + const imgbin = desaturate(image); + const { width, height } = image; - var ptr = Module.allocate(imgbin, 'i8', Module.ALLOC_NORMAL); - base.SetImage(Module.wrapPointer(ptr), width, height, 1, width); - base.SetRectangle(0, 0, width, height); - return ptr; + const ptr = Module._malloc(imgbin.length, Uint8Array.BYTES_PER_ELEMENT); + Module.HEAPU8.set(imgbin, ptr); + base.SetImage(ptr, width, height, Uint8Array.BYTES_PER_ELEMENT, width); + base.SetRectangle(0, 0, width, height); + return ptr; } function loadLanguage(req, res, cb){ @@ -85,8 +82,8 @@ function loadLanguage(req, res, cb){ if(lang in Module._loadedLanguages) return cb(); adapter.getLanguageData(req, res, function(data){ - res.progress({ status: 'loading ' + langFile, progress: 0 }); - Module.FS_createDataFile('tessdata', langFile, data, true, false); + res.progress({ status: 'loading ' + langFile, progress: 0 }); + Module.FS.writeFile(langFile, data); Module._loadedLanguages[lang] = true; res.progress({ status: 'loading ' + langFile, progress: 1 }); cb(); diff --git a/src/node/index.js b/src/node/index.js index 7661cd3..33ead58 100644 --- a/src/node/index.js +++ b/src/node/index.js @@ -5,7 +5,7 @@ const fetch = require('isomorphic-fetch'), exports.defaultOptions = { workerPath: require('path').join(__dirname, 'worker.js'), - langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/3.02/', + langPath: 'https://cdn.jsdelivr.net/gh/naptha/tessdata@gh-pages/4.0.0/', } exports.spawnWorker = function spawnWorker(instance, workerOptions){ diff --git a/src/node/worker.js b/src/node/worker.js index 1531d34..129c58a 100644 --- a/src/node/worker.js +++ b/src/node/worker.js @@ -11,7 +11,7 @@ workerUtils.setAdapter({ getCore: (req, res) => { if (check.null(TesseractCore)) { res.progress({ status: 'loading tesseract core' }); - TesseractCore = require('tesseract.js-core'); + TesseractCore = require('tesseract.js-core/src/tesseract-core.asm'); res.progress({ status: 'loaded tesseract core' }); } return TesseractCore;