You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.4 KiB
54 lines
1.4 KiB
/** |
|
* In the recognition result of tesseract, there |
|
* is a deep JSON object for details, it has around |
|
* |
|
* The result of dump.js is a big JSON tree |
|
* which can be easily serialized (for instance |
|
* to be sent from a webworker to the main app |
|
* or through Node's IPC), but we want |
|
* a (circular) DOM-like interface for walking |
|
* through the data. |
|
* |
|
* @fileoverview DOM-like interface for walking through data |
|
* @author Kevin Kwok <antimatter15@gmail.com> |
|
* @author Guillermo Webster <gui@mit.edu> |
|
* @author Jerome Wu <jeromewus@gmail.com> |
|
*/ |
|
|
|
module.exports = (page) => { |
|
const blocks = []; |
|
const paragraphs = []; |
|
const lines = []; |
|
const words = []; |
|
const symbols = []; |
|
|
|
page.blocks.forEach((block) => { |
|
block.paragraphs.forEach((paragraph) => { |
|
paragraph.lines.forEach((line) => { |
|
line.words.forEach((word) => { |
|
word.symbols.forEach((sym) => { |
|
symbols.push({ |
|
...sym, page, block, paragraph, line, word, |
|
}); |
|
}); |
|
words.push({ |
|
...word, page, block, paragraph, line, |
|
}); |
|
}); |
|
lines.push({ |
|
...line, page, block, paragraph, |
|
}); |
|
}); |
|
paragraphs.push({ |
|
...paragraph, page, block, |
|
}); |
|
}); |
|
blocks.push({ |
|
...block, page, |
|
}); |
|
}); |
|
|
|
return { |
|
...page, blocks, paragraphs, lines, words, symbols, |
|
}; |
|
};
|
|
|