/** * In the recognition result of tesseract, there * is a deep JSON object for details, it has around * * The result of dump.js is a big JSON tree * which can be easily serialized (for instance * to be sent from a webworker to the main app * or through Node's IPC), but we want * a (circular) DOM-like interface for walking * through the data. * * @fileoverview DOM-like interface for walking through data * @author Kevin Kwok * @author Guillermo Webster * @author Jerome Wu */ module.exports = (page) => { const blocks = []; const paragraphs = []; const lines = []; const words = []; const symbols = []; page.blocks.forEach((block) => { block.paragraphs.forEach((paragraph) => { paragraph.lines.forEach((line) => { line.words.forEach((word) => { word.symbols.forEach((sym) => { symbols.push({ ...sym, page, block, paragraph, line, word, }); }); words.push({ ...word, page, block, paragraph, line, }); }); lines.push({ ...line, page, block, paragraph, }); }); paragraphs.push({ ...paragraph, page, block, }); }); blocks.push({ ...block, page, }); }); return { ...page, blocks, paragraphs, lines, words, symbols, }; };