You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
55 lines
1.4 KiB
55 lines
1.4 KiB
5 years ago
|
/**
|
||
|
* In the recognition result of tesseract, there
|
||
|
* is a deep JSON object for details, it has around
|
||
|
*
|
||
|
* The result of dump.js is a big JSON tree
|
||
|
* which can be easily serialized (for instance
|
||
|
* to be sent from a webworker to the main app
|
||
|
* or through Node's IPC), but we want
|
||
|
* a (circular) DOM-like interface for walking
|
||
|
* through the data.
|
||
|
*
|
||
|
* @fileoverview DOM-like interface for walking through data
|
||
|
* @author Kevin Kwok <antimatter15@gmail.com>
|
||
|
* @author Guillermo Webster <gui@mit.edu>
|
||
|
* @author Jerome Wu <jeromewus@gmail.com>
|
||
|
*/
|
||
|
|
||
|
module.exports = (page) => {
|
||
|
const blocks = [];
|
||
|
const paragraphs = [];
|
||
|
const lines = [];
|
||
|
const words = [];
|
||
|
const symbols = [];
|
||
|
|
||
|
page.blocks.forEach((block) => {
|
||
|
block.paragraphs.forEach((paragraph) => {
|
||
|
paragraph.lines.forEach((line) => {
|
||
|
line.words.forEach((word) => {
|
||
|
word.symbols.forEach((sym) => {
|
||
|
symbols.push({
|
||
|
...sym, page, block, paragraph, line, word,
|
||
|
});
|
||
|
});
|
||
|
words.push({
|
||
|
...word, page, block, paragraph, line,
|
||
|
});
|
||
|
});
|
||
|
lines.push({
|
||
|
...line, page, block, paragraph,
|
||
|
});
|
||
|
});
|
||
|
paragraphs.push({
|
||
|
...paragraph, page, block,
|
||
|
});
|
||
|
});
|
||
|
blocks.push({
|
||
|
...block, page,
|
||
|
});
|
||
|
});
|
||
|
|
||
|
return {
|
||
|
...page, blocks, paragraphs, lines, words, symbols,
|
||
|
};
|
||
|
};
|