Merge pull request #4654 from yurydelendik/nodeexampe

Basic node.js example that demonstrate pdf.combined.js file use
11 years ago · e9cdf6bb01
2 changed files with 183 additions and 0 deletions
--- a/examples/node/domparsermock.js
+++ b/examples/node/domparsermock.js
@ -0,0 +1,107 @@
				@@ -0,0 +1,107 @@
+/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+// Dummy XML Parser
+
+function DOMNodeMock(nodeName, nodeValue) {
+  this.nodeName = nodeName;
+  this.nodeValue = nodeValue;
+  Object.defineProperty(this, 'parentNode', {value: null, writable: true});
+}
+DOMNodeMock.prototype = {
+  get firstChild() {
+    return this.childNodes[0];
+  },
+  get nextSibling() {
+    var index = this.parentNode.childNodes.indexOf(this);
+    return this.parentNode.childNodes[index + 1];
+  },
+  get textContent() {
+    if (!this.childNodes) {
+      return this.nodeValue || '';
+    }
+    return this.childNodes.map(function (child) {
+      return child.textContent;
+    }).join('');
+  },
+  hasChildNodes: function () {
+    return this.childNodes && this.childNodes.length > 0;
+  }
+};
+
+function decodeXML(text) {
+  if (text.indexOf('&') < 0) {
+    return text;
+  }
+  return text.replace(/&(#(x[0-9a-f]+|\d+)|\w+);/gi, function (all, entityName, number) {
+    if (number) {
+      return String.fromCharCode(number[0] === 'x' ? parseInt(number.substring(1), 16) : +number);
+    }
+    switch (entityName) {
+      case 'amp':
+        return '&';
+      case 'lt':
+        return '<';
+      case 'gt':
+        return '>';
+      case 'quot':
+        return '\"';
+      case 'apos':
+        return '\'';
+    }
+    return '&' + entityName + ';';
+  });
+}
+
+function DOMParserMock() {};
+DOMParserMock.prototype = {
+  parseFromString: function (content) {
+    content = content.replace(/<\?[\s\S]*?\?>|<!--[\s\S]*?-->/g, '').trim();
+    var nodes = [];
+    content = content.replace(/>([\s\S]+?)</g, function (all, text) {
+      var i = nodes.length;
+      var node = new DOMNodeMock('#text', decodeXML(text));
+      nodes.push(node);
+      if (node.textContent.trim().length === 0) {
+        return '><'; // ignoring whitespaces
+      }
+      return '>' + i + ',<';
+    });
+    content = content.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, function (all, text) {
+      var i = nodes.length;
+      var node = new DOMNodeMock('#text', text);
+      nodes.push(node);
+      return i + ',';
+    });
+    var lastLength;
+    do {
+      lastLength = nodes.length;
+      content = content.replace(/<([\w\:]+)((?:[\s\w:=]|'[^']*'|"[^"]*")*)(?:\/>|>([\d,]*)<\/[^>]+>)/g,
+        function (all, name, attrs, content) {
+        var i = nodes.length;
+        var node = new DOMNodeMock(name);
+        var children = [];
+        if (content) {
+          content = content.split(',');
+          content.pop();
+          content.forEach(function (child) {
+            var childNode = nodes[+child];
+            childNode.parentNode = node;
+            children.push(childNode);
+          })
+        }
+        node.childNodes = children;
+        nodes.push(node);
+        return i + ',';
+
+      });
+    } while(lastLength < nodes.length);
+    return {
+      documentElement: nodes.pop()
+    };
+  }
+};
+
+exports.DOMParserMock = DOMParserMock;
--- a/examples/node/getinfo.js
+++ b/examples/node/getinfo.js
@ -0,0 +1,76 @@
				@@ -0,0 +1,76 @@
+/* -*- Mode: Java; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set shiftwidth=2 tabstop=2 autoindent cindent expandtab: */
+/* Any copyright is dedicated to the Public Domain.
+ * http://creativecommons.org/publicdomain/zero/1.0/ */
+
+//
+// Basic node example that prints document metadata and text content.
+// Requires single file built version of PDF.js -- please run
+// `node make singlefile` before running the example.
+//
+
+var fs = require('fs');
+
+// HACK few hacks to let PDF.js be loaded not as a module in global space.
+global.window = global;
+global.navigator = { userAgent: "node" };
+global.PDFJS = {};
+global.DOMParser = require('./domparsermock.js').DOMParserMock;
+
+require('../../build/singlefile/build/pdf.combined.js');
+
+// Loading file from file system into typed array
+var pdfPath = process.argv[2] || '../../web/compressed.tracemonkey-pldi-09.pdf';
+var data = new Uint8Array(fs.readFileSync(pdfPath));
+
+// Will be using promises to load document, pages and misc data instead of
+// callback.
+PDFJS.getDocument(data).then(function (doc) {
+  var numPages = doc.numPages;
+  console.log('# Document Loaded');
+  console.log('Number of Pages: ' + numPages);
+  console.log();
+
+  var lastPromise; // will be used to chain promises
+  lastPromise = doc.getMetadata().then(function (data) {
+    console.log('# Metadata Is Loaded');
+    console.log('## Info');
+    console.log(JSON.stringify(data.info, null, 2));
+    console.log();
+    if (data.metadata) {
+      console.log('## Metadata');
+      console.log(JSON.stringify(data.metadata.metadata, null, 2));
+      console.log();
+    }
+  });
+
+  var loadPage = function (pageNum) {
+    return doc.getPage(pageNum).then(function (page) {
+      console.log('# Page ' + pageNum);
+      var viewport = page.getViewport(1.0 /* scale */);
+      console.log('Size: ' + viewport.width + 'x' + viewport.height);
+      console.log();
+      return page.getTextContent().then(function (content) {
+        // Content contains lots of information about the text layout and
+        // styles, but we need only strings at the moment
+        var strings = content.items.map(function (item) {
+          return item.str;
+        });
+        console.log('## Text Content');
+        console.log(strings.join(' '));
+      }).then(function () {
+        console.log();
+      });
+    })
+  };
+  // Loading of the first page will wait on metadata and subsequent loadings
+  // will wait on the previous pages.
+  for (var i = 1; i <= numPages; i++) {
+    lastPromise = lastPromise.then(loadPage.bind(null, i));
+  }
+  return lastPromise;
+}).then(function () {
+  console.log('# End of Document');
+}, function (err) {
+  console.error('Error: ' + err);
+});