diff --git a/examples/node/preserve-interword-spaces.js b/examples/node/preserve-interword-spaces.js new file mode 100755 index 0000000..8cf26b6 --- /dev/null +++ b/examples/node/preserve-interword-spaces.js @@ -0,0 +1,22 @@ +#!/usr/bin/env node +const path = require('path'); +const fs = require('fs'); +const { createWorker } = require('../../'); + +const [,, imagePath] = process.argv; +const image = path.resolve(__dirname, (imagePath || '../../tests/assets/images/bill.png')); + +console.log(`Recognizing ${image}`); + +(async () => { + const worker = createWorker(); + await worker.load(); + await worker.loadLanguage('eng'); + await worker.initialize('eng'); + await worker.setParameters({ + preserve_interword_spaces: '1', + }); + const { data: { text } } = await worker.recognize(image); + console.log(JSON.stringify({ text })); + await worker.terminate(); +})(); diff --git a/package.json b/package.json index 359f16f..477f039 100644 --- a/package.json +++ b/package.json @@ -12,8 +12,9 @@ "prepublishOnly": "npm run build", "wait": "rimraf dist && wait-on http://localhost:3000/dist/tesseract.dev.js", "test": "npm-run-all -p -r start test:all", - "test:all": "npm-run-all wait test:browser:* test:node", - "test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js ./tests/*.test.js", + "test:all": "npm-run-all wait test:browser:* test:node:all", + "test:node": "nyc mocha --exit --bail --require ./scripts/test-helper.js", + "test:node:all": "npm run test:node:one -- ./tests/*.test.js", "test:browser-tpl": "mocha-headless-chrome -a incognito -a no-sandbox -a disable-setuid-sandbox -a disable-logging -t 300000", "test:browser:detect": "npm run test:browser-tpl -- -f ./tests/detect.test.html", "test:browser:recognize": "npm run test:browser-tpl -- -f ./tests/recognize.test.html", diff --git a/tests/assets/images/bill.png b/tests/assets/images/bill.png new file mode 100644 index 0000000..b18e870 Binary files /dev/null and b/tests/assets/images/bill.png differ diff --git a/tests/constants.js b/tests/constants.js index cb3c07b..50dbb1a 100644 --- a/tests/constants.js +++ b/tests/constants.js @@ -1,4 +1,4 @@ -const TIMEOUT = 10000; +const TIMEOUT = 30000; const IMAGE_PATH = 'http://localhost:3000/tests/assets/images'; const IS_BROWSER = typeof window !== 'undefined' && typeof window.document !== 'undefined'; const OPTIONS = { @@ -12,6 +12,7 @@ const SIMPLE_TEXT_HALF = 'Tesse\n'; const COMSIC_TEXT = 'HellO World\nfrom beyond\nthe Cosmic Void\n'; const TESTOCR_TEXT = 'This is a lot of 12 point text to test the\nocr code and see if it works on all types\nof file format.\n\nThe quick brown dog jumped over the\nlazy fox. The quick brown dog jumped\nover the lazy fox. The quick brown dog\njumped over the lazy fox. The quick\nbrown dog jumped over the lazy fox.\n'; const CHINESE_TEXT = '繁 體 中 文 測 試\n'; +const BILL_SPACED_TEXT = 'FIRST CHEQUING\n\nLine of Credit 100,000.00 Rate 4.2000\n\nDate Description Number Debits Credits Balance\n31Jul2018 Balance Forward 99,878.08 -\n01Aug2018 Clearing Cheque 4987 36.07 99,914.15 -\n01Aug2018 Clearing Cheque 4986 60.93 99,975.08 -\n01Aug2018 Clearing Cheque 4982 800.04 100,775.12 EX\n01Aug2018 Clearing Cheque 4981 82334 101,598.46 EX\n01Aug2018 Incoming Interac e-Transfer 1454 101,583.92 EX\n01Aug2018 Incoming Interac e-Transfer 400.00 101,183.92 EX\n01Aug2018 Assisted Deposit 3241450 68,769.42 -\n01Aug2018 Transfer out to loan 7 1,500.00 70,269.42 -\n02Aug2018 Clearing Cheque 4984 48.08 70,317.50 -\n02Aug2018 Clearing Cheque 4985 7051 70,388.01 -\n02Aug2018 Clearing Cheque 4992 500.00 70.888.01 -\n'; const FORMATS = ['png', 'jpg', 'bmp', 'pbm']; const SIMPLE_PNG_BASE64 = ''; const SIMPLE_JPG_BASE64 = ''; @@ -28,6 +29,7 @@ if (typeof module !== 'undefined') { SIMPLE_TEXT_HALF, COMSIC_TEXT, TESTOCR_TEXT, + BILL_SPACED_TEXT, FORMATS, OPTIONS, }; diff --git a/tests/recognize.test.js b/tests/recognize.test.js index 46d4298..874ef26 100644 --- a/tests/recognize.test.js +++ b/tests/recognize.test.js @@ -81,6 +81,17 @@ describe('recognize()', () => { )); }); + describe('should work with selected parameters', () => { + it('support preserve_interword_spaces', async () => { + await worker.initialize('eng'); + await worker.setParameters({ + preserve_interword_spaces: '1', + }); + const { data: { text } } = await worker.recognize(`${IMAGE_PATH}/bill.png`); + expect(text).to.be(BILL_SPACED_TEXT); + }).timeout(TIMEOUT); + }); + describe('should support all page seg modes', () => { Object .keys(PSM)