From 01f03fe393adf98d044d859e5338b8f04fe65212 Mon Sep 17 00:00:00 2001
From: Rob Wu <rob@robwu.nl>
Date: Thu, 6 Jul 2017 15:08:37 +0200
Subject: [PATCH] Optimize PNG compression in SVG backend on Node.js

Use the environment's zlib implementation if available to get
reasonably-sized SVG files when an XObject image is converted to PNG.
The generated PNG is not optimal because we do not use a PNG predictor.
Futher, when our SVG backend is run in a browser, the generated PNG
images will still be unnecessarily large (though the use of blob:-URLs
when available should reduce the impact on memory usage). If we want to
optimize PNG images in browsers too, we can either try to use a DEFLATE
library such as pako, or re-use our XObject image painting logic in
src/display/canvas.js. This potential improvement is not implemented by
this commit

Tested with:

- Node.js 8.1.3 (uses zlib)
- Node.js 0.11.12 (uses zlib)
- Node.js 0.10.48 (falls back to inferior existing implementation).
- Chrome 59.0.3071.86
- Firefox 54.0

Tests:

Unit test on Node.js:

```
$ gulp lib
$ JASMINE_CONFIG_PATH=test/unit/clitests.json node ./node_modules/.bin/jasmine --filter=SVG
```

Unit test in browser: Run `gulp server` and open
http://localhost:8888/test/unit/unit_test.html?spec=SVGGraphics

To verify that the patch works as desired,

```
$ node examples/node/pdf2svg.js test/pdfs/xobject-image.pdf
$ du -b svgdump/xobject-image-1.svg
 # ^ Calculates the file size. Confirm that the size is small
 #   (784 instead of 80664 bytes).
```
---
 gulpfile.js                   |   1 +
 src/display/svg.js            |  34 ++++++++-
 test/pdfs/.gitignore          |   1 +
 test/pdfs/xobject-image.pdf   |  61 +++++++++++++++
 test/unit/clitests.json       |   1 +
 test/unit/display_svg_spec.js | 135 ++++++++++++++++++++++++++++++++++
 test/unit/jasmine-boot.js     |   1 +
 7 files changed, 233 insertions(+), 1 deletion(-)
 create mode 100644 test/pdfs/xobject-image.pdf
 create mode 100644 test/unit/display_svg_spec.js

diff --git a/gulpfile.js b/gulpfile.js
index 36d31664a..0408d07c1 100644
--- a/gulpfile.js
+++ b/gulpfile.js
@@ -1044,6 +1044,7 @@ gulp.task('lib', ['buildnumber'], function () {
       'src/{pdf,pdf.worker}.js',
     ], { base: 'src/', }),
     gulp.src([
+      'examples/node/domstubs.js',
       'web/*.js',
       '!web/pdfjs.js',
       '!web/viewer.js',
diff --git a/src/display/svg.js b/src/display/svg.js
index 26909fe60..e0f65c3ac 100644
--- a/src/display/svg.js
+++ b/src/display/svg.js
@@ -12,10 +12,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+/* globals __non_webpack_require__ */
 
 import {
   createObjectURL, FONT_IDENTITY_MATRIX, IDENTITY_MATRIX, ImageKind, isArray,
-  isNum, OPS, Util, warn
+  isNodeJS, isNum, OPS, Util, warn
 } from '../shared/util';
 
 var SVGGraphics = function() {
@@ -104,6 +105,37 @@ var convertImgDataToPng = (function convertImgDataToPngClosure() {
    *   http://www.libpng.org/pub/png/spec/1.2/PNG-Compression.html
    */
   function deflateSync(literals) {
+    if (!isNodeJS()) {
+      // zlib is certainly not available outside of Node.js. We can either use
+      // the pako library for client-side DEFLATE compression, or use the canvas
+      // API of the browser to obtain a more optimal PNG file.
+      return deflateSyncUncompressed(literals);
+    }
+    try {
+      // NOTE: This implementation is far from perfect, but already way better
+      // than not applying any compression.
+      //
+      // A better algorithm will try to choose a good predictor/filter and
+      // then choose a suitable zlib compression strategy (e.g. 3,Z_RLE).
+      //
+      // Node v0.11.12 zlib.deflateSync is introduced (and returns a Buffer).
+      // Node v3.0.0   Buffer inherits from Uint8Array.
+      // Node v8.0.0   zlib.deflateSync accepts Uint8Array as input.
+      var input;
+        // eslint-disable-next-line no-undef
+      if (parseInt(process.versions.node) >= 8) {
+        input = literals;
+      } else {
+        // eslint-disable-next-line no-undef
+        input = new Buffer(literals);
+      }
+      var output = __non_webpack_require__('zlib')
+        .deflateSync(input, { level: 9, });
+      return output instanceof Uint8Array ? output : new Uint8Array(output);
+    } catch (e) {
+      warn('Not compressing PNG because zlib.deflateSync is unavailable: ' + e);
+    }
+
     return deflateSyncUncompressed(literals);
   }
 
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index d08deacc2..f884b62c5 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -288,3 +288,4 @@
 !font_ascent_descent.pdf
 !issue8097_reduced.pdf
 !transparent.pdf
+!xobject-image.pdf
diff --git a/test/pdfs/xobject-image.pdf b/test/pdfs/xobject-image.pdf
new file mode 100644
index 000000000..2d4468c4d
--- /dev/null
+++ b/test/pdfs/xobject-image.pdf
@@ -0,0 +1,61 @@
+%PDF-1.1
+1 0 obj
+<</Type/Catalog/Pages 2 0 R>>
+endobj
+2 0 obj
+<</Type/Pages/Count 1/Kids[3 0 R]/MediaBox [0 0 200 100]>>
+endobj
+3 0 obj
+<<
+ /Type/Page
+ /Parent 2 0 R
+ /Resources <<
+  /XObject << /SomeImage 4 0 R >>
+ >>
+ /Contents 5 0 R
+>>
+endobj
+4 0 obj
+<<
+ /Type/XObject
+ /Subtype/Image
+ /Width 200 % The width or height directly affects the image's file size.
+ /Height 100
+ /ColorSpace/DeviceRGB
+ /DecodeParms [] % Forces NativeImageDecoder.isSupported to return false.
+ /BitsPerComponent 8
+ /Length 580
+ /Filter [ /ASCIIHexDecode /DCTDecode ]
+>>
+% convert -size 1x1 xc:red jpeg:- | xxd -p -c40
+stream
+ffd8ffe000104a46494600010100000100010000ffdb004300030202020202030202020303030304
+060404040404080606050609080a0a090809090a0c0f0c0a0b0e0b09090d110d0e0f101011100a0c
+12131210130f101010ffdb00430103030304030408040408100b090b101010101010101010101010
+1010101010101010101010101010101010101010101010101010101010101010101010101010ffc0
+0011080001000103011100021101031101ffc40014000100000000000000000000000000000008ff
+c40014100100000000000000000000000000000000ffc40015010101000000000000000000000000
+00000709ffc40014110100000000000000000000000000000000ffda000c03010002110311003f00
+3a03154dffd9
+endstream
+endobj
+5 0 obj
+<</Length 14>>
+stream
+500 0 0 400 0 0 cm
+/SomeImage Do
+endstream
+endobj
+xref
+0 6
+0000000000 65535 f 
+0000000008 00000 n 
+0000000054 00000 n 
+0000000128 00000 n 
+0000000246 00000 n 
+0000001201 00000 n 
+trailer
+<</Root 1 0 R/Size 6>>
+startxref
+1281
+%%EOF
diff --git a/test/unit/clitests.json b/test/unit/clitests.json
index 38a018da1..59751ddb2 100644
--- a/test/unit/clitests.json
+++ b/test/unit/clitests.json
@@ -7,6 +7,7 @@
     "cff_parser_spec.js",
     "cmap_spec.js",
     "crypto_spec.js",
+    "display_svg_spec.js",
     "document_spec.js",
     "dom_utils_spec.js",
     "evaluator_spec.js",
diff --git a/test/unit/display_svg_spec.js b/test/unit/display_svg_spec.js
new file mode 100644
index 000000000..515e59fe0
--- /dev/null
+++ b/test/unit/display_svg_spec.js
@@ -0,0 +1,135 @@
+/* Copyright 2017 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* globals __non_webpack_require__ */
+
+import { isNodeJS, NativeImageDecoding } from '../../src/shared/util';
+import { setStubs, unsetStubs } from '../../examples/node/domstubs';
+import { buildGetDocumentParams } from './test_utils';
+import { getDocument } from '../../src/display/api';
+import { SVGGraphics } from '../../src/display/svg';
+
+// withZlib(true, callback); = run test with require('zlib') if possible.
+// withZlib(false, callback); = run test without require('zlib').deflateSync.
+// The return value of callback is returned as-is.
+function withZlib(isZlibRequired, callback) {
+  if (isZlibRequired) {
+    // We could try to polyfill zlib in the browser, e.g. using pako.
+    // For now, only support zlib functionality on Node.js
+    if (!isNodeJS()) {
+      throw new Error('zlib test can only be run in Node.js');
+    }
+
+    return callback();
+  }
+
+  if (!isNodeJS()) {
+    // Assume that require('zlib') is unavailable in non-Node.
+    return callback();
+  }
+
+  var zlib = __non_webpack_require__('zlib');
+  var deflateSync = zlib.deflateSync;
+  zlib.deflateSync = function() {
+    throw new Error('zlib.deflateSync is explicitly disabled for testing.');
+  };
+  try {
+    return callback();
+  } finally {
+    zlib.deflateSync = deflateSync;
+  }
+}
+
+describe('SVGGraphics', function () {
+  var loadingTask;
+  var page;
+  beforeAll(function(done) {
+    loadingTask = getDocument(buildGetDocumentParams('xobject-image.pdf', {
+      nativeImageDecoderSupport: NativeImageDecoding.DISPLAY,
+    }));
+    loadingTask.promise.then(function(doc) {
+      doc.getPage(1).then(function(firstPage) {
+        page = firstPage;
+        done();
+      });
+    });
+  });
+  afterAll(function(done) {
+    loadingTask.destroy().then(done);
+  });
+
+  describe('paintImageXObject', function() {
+    function getSVGImage() {
+      var svgGfx;
+      return page.getOperatorList().then(function(opList) {
+        var forceDataSchema = true;
+        svgGfx = new SVGGraphics(page.commonObjs, page.objs, forceDataSchema);
+        return svgGfx.loadDependencies(opList);
+      }).then(function() {
+        var svgImg;
+        // A mock to steal the svg:image element from paintInlineImageXObject.
+        var elementContainer = {
+          appendChild(element) {
+            svgImg = element;
+          },
+        };
+
+        // This points to the XObject image in xobject-image.pdf.
+        var xobjectObjId = { ref: 4, gen: 0, };
+        if (isNodeJS()) {
+          setStubs(global);
+        }
+        try {
+          svgGfx.paintImageXObject(xobjectObjId, elementContainer);
+        } finally {
+          if (isNodeJS()) {
+            unsetStubs(global);
+          }
+        }
+        return svgImg;
+      });
+    }
+
+    it('should produce a reasonably small svg:image', function() {
+      if (!isNodeJS()) {
+        pending('zlib.deflateSync is not supported in non-Node environments.');
+      }
+      withZlib(true, getSVGImage).then(function(svgImg) {
+        expect(svgImg.nodeName).toBe('svg:image');
+        expect(svgImg.getAttribute('width')).toBe('200px');
+        expect(svgImg.getAttribute('height')).toBe('100px');
+        var imgUrl = svgImg.getAttribute('xlink:href');
+        // forceDataSchema = true, so the generated URL should be a data:-URL.
+        expect(imgUrl).toMatch(/^data:image\/png;base64,/);
+        // Test whether the generated image has a reasonable file size.
+        // I obtained a data URL of size 366 with Node 8.1.3 and zlib 1.2.11.
+        // Without zlib (uncompressed), the size of the data URL was excessive
+        // (80247).
+        expect(imgUrl.length).toBeLessThan(367);
+      });
+    });
+
+    it('should produce a svg:image even if zlib is unavailable', function() {
+      withZlib(false, getSVGImage).then(function(svgImg) {
+        expect(svgImg.nodeName).toBe('svg:image');
+        expect(svgImg.getAttribute('width')).toBe('200px');
+        expect(svgImg.getAttribute('height')).toBe('100px');
+        var imgUrl = svgImg.getAttribute('xlink:href');
+        expect(imgUrl).toMatch(/^data:image\/png;base64,/);
+        // The size of our naively generated PNG file is excessive :(
+        expect(imgUrl.length).toBe(80247);
+      });
+    });
+  });
+});
diff --git a/test/unit/jasmine-boot.js b/test/unit/jasmine-boot.js
index 6e63cc1ec..2bd0149b2 100644
--- a/test/unit/jasmine-boot.js
+++ b/test/unit/jasmine-boot.js
@@ -50,6 +50,7 @@ function initializePDFJS(callback) {
     'pdfjs-test/unit/cmap_spec',
     'pdfjs-test/unit/crypto_spec',
     'pdfjs-test/unit/custom_spec',
+    'pdfjs-test/unit/display_svg_spec',
     'pdfjs-test/unit/document_spec',
     'pdfjs-test/unit/dom_utils_spec',
     'pdfjs-test/unit/evaluator_spec',