From a61a4b18cc4b195c7109e9f4673c840f3852e47a Mon Sep 17 00:00:00 2001
From: Martin Heller <hellemar@fel.cvut.cz>
Date: Mon, 11 May 2015 00:46:59 +0200
Subject: [PATCH] URL annotations handled as UTF-8 to accommodate some bad
 PDFs. For proper 7-bit ASCII this makes no difference. Fixes Bug 1122280.

---
 src/core/annotation.js | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/core/annotation.js b/src/core/annotation.js
index bd4fed891..32974b568 100644
--- a/src/core/annotation.js
+++ b/src/core/annotation.js
@@ -16,7 +16,8 @@
  */
 /* globals PDFJS, Util, isDict, isName, stringToPDFString, warn, Dict, Stream,
            stringToBytes, assert, Promise, isArray, ObjectLoader, OperatorList,
-           isValidUrl, OPS, createPromiseCapability, AnnotationType */
+           isValidUrl, OPS, createPromiseCapability, AnnotationType, 
+           stringToUTF8String */
 
 'use strict';
 
@@ -500,7 +501,15 @@ var LinkAnnotation = (function LinkAnnotationClosure() {
         if (!isValidUrl(url, false)) {
           url = '';
         }
-        data.url = url;
+        // According to ISO 32000-1:2008, section 12.6.4.7, 
+        // URI should to be encoded in 7-bit ASCII.
+        // Some bad PDFs may have URIs in UTF-8 encoding, see Bugzilla 1122280.
+        try {
+          data.url = stringToUTF8String(url);
+        } catch (e) {
+          // Fall back to a simple copy.
+          data.url = url;
+        }
       } else if (linkType === 'GoTo') {
         data.dest = action.get('D');
       } else if (linkType === 'GoToR') {