Browse Source

Make getTextContent return offset array and improve the algorithm. Make

parts in viewer.js work again.
Julian Viereck 13 years ago
parent
commit
a38c4bc729
  1. 67
      src/evaluator.js
  2. 2
      web/viewer.html
  3. 2
      web/viewer.js

67
src/evaluator.js

@ -505,7 +505,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
return queue; return queue;
}, },
getTextContent: function partialEvaluatorGetIRQueue(stream, resources) { getTextContent: function partialEvaluatorGetIRQueue(stream, resources, state) {
if (!state) {
state = {
text: '',
mapping: []
};
}
var self = this; var self = this;
var xref = this.xref; var xref = this.xref;
@ -515,18 +521,22 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
} }
resources = xref.fetchIfRef(resources) || new Dict(); resources = xref.fetchIfRef(resources) || new Dict();
// The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
var xobjs = null;
var parser = new Parser(new Lexer(stream), false); var parser = new Parser(new Lexer(stream), false);
var res = resources; var res = resources;
var args = [], obj; var args = [], obj;
var text = ''; var text = state.text;
var chunk = ''; var chunk = '';
var commandOffset = state.mapping;
var font = null; var font = null;
while (!isEOF(obj = parser.getObj())) { while (!isEOF(obj = parser.getObj())) {
if (isCmd(obj)) { if (isCmd(obj)) {
var cmd = obj.cmd; var cmd = obj.cmd;
switch (cmd) { switch (cmd) {
// TODO: Add support for SAVE/RESTORE and XFORM here.
case 'Tf': case 'Tf':
font = handleSetFont(args[0].name).translated; font = handleSetFont(args[0].name).translated;
break; break;
@ -536,9 +546,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
if (typeof items[j] === 'string') { if (typeof items[j] === 'string') {
chunk += fontCharsToUnicode(items[j], font); chunk += fontCharsToUnicode(items[j], font);
} else if (items[j] < 0) { } else if (items[j] < 0) {
// making all negative offsets a space - better to have
// a space in incorrect place than not have them at all
chunk += ' '; chunk += ' ';
} else if (items[j] < 0 && font.spacedWidth > 0) {
var numFakeSpaces = Math.round(-e / font.spacedWidth);
if (numFakeSpaces > 0) {
chunk += ' ';
}
} }
} }
break; break;
@ -551,8 +564,49 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
case '"': case '"':
chunk += fontCharsToUnicode(args[2], font) + ' '; chunk += fontCharsToUnicode(args[2], font) + ' ';
break; break;
case 'Do':
// Set the chunk such that the following if won't add something
// to the state.
chunk = '';
if (args[0].code) {
break;
}
if (!xobjs) {
xobjs = resources.get('XObject') || new Dict();
}
var name = args[0].name;
var xobj = xobjs.get(name);
if (!xobj)
break;
assertWellFormed(isStream(xobj), 'XObject should be a stream');
var type = xobj.dict.get('Subtype');
assertWellFormed(
isName(type),
'XObject should have a Name subtype'
);
if ('Form' !== type.name)
break;
// Add some spacing between the text here and the text of the
// xForm.
text = text + ' ';
state.text = text;
state = this.getTextContent(
xobj,
xobj.dict.get('Resources') || resources,
state
);
text = state.text;
break;
} // switch } // switch
if (chunk !== '') { if (chunk !== '') {
commandOffset.push(text.length);
text += chunk; text += chunk;
chunk = ''; chunk = '';
} }
@ -564,7 +618,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
} }
} }
return text; return {
text: text,
mapping: commandOffset
};
}, },
extractDataStructures: function extractDataStructures: function

2
web/viewer.html

@ -88,7 +88,7 @@ limitations under the License.
<button id="viewOutline" class="toolbarButton group" title="Show Document Outline" tabindex="2" data-l10n-id="outline"> <button id="viewOutline" class="toolbarButton group" title="Show Document Outline" tabindex="2" data-l10n-id="outline">
<span data-l10n-id="outline_label">Document Outline</span> <span data-l10n-id="outline_label">Document Outline</span>
</button> </button>
<button id="viewSearch" class="toolbarButton group hidden" title="Search Document" tabindex="3" data-l10n-id="search_panel"> <button id="viewSearch" class="toolbarButton group" title="Search Document" tabindex="3" data-l10n-id="search_panel">
<span data-l10n-id="search_panel_label">Search Document</span> <span data-l10n-id="search_panel_label">Search Document</span>
</button> </button>
</div> </div>

2
web/viewer.js

@ -1043,7 +1043,7 @@ var PDFView = {
function extractPageText(pageIndex) { function extractPageText(pageIndex) {
self.pages[pageIndex].pdfPage.getTextContent().then( self.pages[pageIndex].pdfPage.getTextContent().then(
function textContentResolved(textContent) { function textContentResolved(textContent) {
self.pageText[pageIndex] = textContent; self.pageText[pageIndex] = textContent.text;
self.search(); self.search();
if ((pageIndex + 1) < self.pages.length) if ((pageIndex + 1) < self.pages.length)
extractPageText(pageIndex + 1); extractPageText(pageIndex + 1);

Loading…
Cancel
Save