From d52495a9c83c266a34d0aad77a7bb699ea47720f Mon Sep 17 00:00:00 2001
From: Jonas Jenwald <jonas.jenwald@gmail.com>
Date: Sat, 9 Jan 2016 11:50:48 +0100
Subject: [PATCH] [TrueType] Recover from a missing "glyf" table by replacing
 it with dummy data, utilizing the existing code in `sanitizeGlyphLocations`

It seems to be fairly common for OCR software to include incomplete TrueType fonts, notable missing the "glyf" table, in PDF files. Since we currently reject such fonts, the result is that text-selection/copying is broken.

This patch contains a suggested approach to try and use these kind of broken fonts, by using existing code in `sanitizeGlyphLocations` to replace a missing "glyf" table with dummy data.

Fixes 4684.
Fixes 6007.
Fixes 6829.
---
 src/core/fonts.js       |  12 ++++++++++--
 test/pdfs/.gitignore    |   1 +
 test/pdfs/issue4684.pdf | Bin 0 -> 3714 bytes
 test/test_manifest.json |   8 ++++++++
 4 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 test/pdfs/issue4684.pdf

diff --git a/src/core/fonts.js b/src/core/fonts.js
index cdb08c74b..e20dad92c 100644
--- a/src/core/fonts.js
+++ b/src/core/fonts.js
@@ -4154,8 +4154,16 @@ var Font = (function FontClosure() {
         delete tables['cvt '];
         this.isOpenType = true;
       } else {
-        if (!tables.glyf || !tables.loca) {
-          error('Required "glyf" or "loca" tables are not found');
+        if (!tables.loca) {
+          error('Required "loca" table is not found');
+        }
+        if (!tables.glyf) {
+          warn('Required "glyf" table is not found -- trying to recover.');
+          // Note: We use `sanitizeGlyphLocations` to add dummy glyf data below.
+          tables.glyf = {
+            tag: 'glyf',
+            data: new Uint8Array(0),
+          };
         }
         this.isOpenType = false;
       }
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index fe8e207ee..d1d8cf0bc 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -10,6 +10,7 @@
 !issue2391-1.pdf
 !issue2391-2.pdf
 !issue4665.pdf
+!issue4684.pdf
 !issue5801.pdf
 !issue5946.pdf
 !issue5972.pdf
diff --git a/test/pdfs/issue4684.pdf b/test/pdfs/issue4684.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..b2c15f09bda9ae2bf89ee612bbdcd2e206ff8a46
GIT binary patch
literal 3714
zcmeHKc~}$I7I&!&BNl2E7q+W5r8Po!AVrb=a|OhJwFtZfhA<(EBm)Fw(We3~6{sy}
zad~L1ARq!Nf@lzgii&^`0<uT|fg*@v3|OLh6Hp$szVCbd_x|W4-<+8<=bn4+nK{4v
zJ2+PdCsUG{1%zwwsH%lXfCvOE4uICIfe20lKI+Wm!GK^pdf*X$2qHpQTo@v3Lt&l=
zu(TMnazeN;8IZBTZV<s%<O}mrU_rJtBa$sBm<5-~76B?maDc}inUYCFGb)u%h6o!_
zCKvIw<+HgkAdWe+vlEDcClPao{k$LoBWxEIf+iTvcp<V26$l}MJ#*K37-0uYn8?~1
zg86=<;cG(q&pHq;3JZa!CJ+>MfPDpiFy?nN%x9wkfJ7vbKHQK1nQTslFh?R(2s3$*
z_+)=ll|bKJ_cuw^jgV7ySKRk(biG<+@3^h=y7T5$aW<TTj1_Uii4L|qL11@dYp(LU
z8zIY#ZJO_<E?65BmvHZ@+Z&vj>t{i2{|umm{hy?{-Jg{nw=!++FEIo9f>-l0RWpmE
zO>Z9*g;6@f@~)*$({A5lSzT3idWq-mw^_#N1rO*xh}g6!>*(}*Pb04Dp&1E@_nLOM
zh!5vWHcC$3HF?I+Hget(mN-}we(vVU{>JPVU9*{Go0Mnrg{7YSf_Y2MN9@hzhtGfY
zwBlLtJ>%&oxcDPY&*Iw*!AQDSteYfb-ujg(-dmE7*QB=AzR{D`>4wUmi*?d<e2NF3
zgg#A2vVu5~hU=q4yB*zyD?-&X>dN%<Kc6`ZS7ku%wM|{5S{L%EGzEWtDEn5S`rx|D
z`JsifF23G1>tl&6mb%ZE4$HHXi6>fmk(6#8J|*L8gJP{UW&Rbso){@XUoXC$TLR4-
z{zAS`*_N{3(177wVm6zxTuwd2p~<yhTP!fTlU^6te12zalra5%36ip*_s}jUg>3&&
z+&dF_<_kf|Oa&vkCuvd4E)wR0Y08`v7IEhC;pHLT@wMN&#^X+=zT=ccxJXUqwiWWD
zFTC|-6<Xbyt@IT|E|!Fd;_~1x_YdH7=v8x{n3!kJeL{$uA>Vg4+J1O?<8JTagoYw~
z#J5~)-Kh1$BKbL%`s$0g%XKDrx|IYPQmGNTNw|kJx3em<tJ-niY<F?mi}av_qT`Qs
z4IMh6bX$F$Eu8psHZ4mI6_p=wS=n};G}D-N+EG7eeT<8}&ZeSQo}5cfWcO^A0=Kq#
zzE$F-{og0a`iuxYIjo?ff4R)rd-`SI>yS+LX6{*Bsh9OWvzJDqq^0Gq(w&385(8ac
z!m?WJ<2xN{>b}{#_^s@c)`LxCkI#kfukK$4#;n>WcZv?O)^U4l>~gMq_C!6W`WHN6
zEz-{L(8J&Ojx~p0Uzz)Qp!Okq@LJSoJl;Z=-k7Y4o*lKE6sJaq@GCzihd;ggMA39>
zanIdP$`4oawW?kQM9_b}_)GJbr;hzY8MjiaIqBS^%z)LcoKA{<{V{!y)QGg8H+SoO
zzA!yY`sqsX++Wkvw_dFNRc3QMV<hKNOyxD}?y|xKOS5@;!6wQST4eb}ldM%o6ynsh
zjI9mT+xlV>f4|(d9pdV>(Y2Zr!uRfU+TK$7<3sXo&pK*;Ruy?`Q{Lf627NfT^i9JK
z(XBSsgM#)3pVodO3mkqVQWo9HG(F#{M>yCZiv|Oajs_3KS@l{!v+lR980oNTr{SEU
zqdf-2Dx;3a-|QKH?_{iu?d~tkmvkzkrq#zcs^_SU)YDRTtf|#t$XNA*aX6v;?{H#4
zrA@*Qp7cJa$#vlYdBGXG(xgxOBDaGjLImvd4)|ln(a0a5WsP^8mkWFd*$H-O#a+qq
zM-^db*GdX+-ZG4}w@_R2WTE}lYqdN4IV1hAuJBtTcN>(S2*_VrA1lkN9r^>gno=2`
zGT^O@RiakbTy}SNi|V(apy=b?g*uBbbkq{xEmfAj9Z&|DtIc_jsYl|~UPZfG`=jzN
zm>U(AaSQs)`{l!X2pKIh>9%KG1EDHU^_$$lQ>Pyuxbr%sR(V5OxFzFdPUXYJhvCpx
znXtaR@&+xZ^YlpU>ujkayuV$%I^L9Pi}$Q4H@}i+XxL|CVpUUShE_eVVfRrKvqok_
zR@`#%$;qyk^#v-}O15JDNLPcMq?n+Xr<$_ZC@INS;8jx=PLkUKg}!Ra!dX%}ps-R+
zS=dGw8!P6hrYtr@mz+_UsHQ9&qi_2t7OSQ#)~YO$FN8+vNCv%Rc>Ra1;`#434X6x<
z2KIeFGE@<hbwMo*`q?J?*zCNy_6pvetTPvWw|wh-Oi67ZK8ne?IlrSNB-U$KH47}t
z&HGr@P$yQ6E0Yb201Z=N6k%*jw`XFTzksbNmk;~4=K1)(y#@9|nB!!CN+bfBh7W8)
z{6xUhT;mD5vlj$oWCPp5$9#@XGtr~QZrr0QQ>dXP@1yuH{@q}aAXw;&MLW)d-d~Y`
z#c2Fkz}O#~nvY`cM8I-XJ0XIJ0ceJd2jGMVMW}rMQ>Y<341qfz(U1|K!51tolQ|C1
zkTFa<M!}rOcx;FrQv_?QK1#`gS?DNLVhDp_O*SxA!+bv*i9&-2+YvuB081T-h^6xH
z)89Gx|NagpQU8e*r75@Ziu%CfC((9021Ef2;b=R9y=LrV2V@#x@E|(H%mTXz$7+p)
zl`X>`B9YC^0b?`tl`UWi1%WUh8#h)yz#l8L)qsbHL<pY^&;S^$U~|L#R{)fW^+2n?
z1`H0wSOt9*EQ0+2D-5{Uy8)5F9}UG&2A~1}^EGw=g~h0;2<F1RC?epSjSh2UKp#O&
zu@Lr$h=2}ZFXsIL%xN?V4fum`9SK7(%>nr1I`Ut2Bnk;j(_}j`iHdbpf7a14nE8_q
z>)QUJqY|;6YO)=LOq-xXg-omq6hfn*=_U}MK!g7ner-d-VQfvKO%+p!jxLfyHK#*3
JoTG~q^lx<nqcH#g

literal 0
HcmV?d00001

diff --git a/test/test_manifest.json b/test/test_manifest.json
index c62dda6d9..00460e3ee 100644
--- a/test/test_manifest.json
+++ b/test/test_manifest.json
@@ -1789,6 +1789,14 @@
       "type": "eq",
       "about": "Has a multi-byte char codes."
     },
+    {  "id": "issue4684-text",
+       "file": "pdfs/issue4684.pdf",
+       "md5": "af5056fcdfb08bd7adc1710d36e4b5b5",
+       "link": false,
+       "rounds": 1,
+       "type": "text",
+       "about": "Invisible (and broken) TrueType font used for text-selection."
+    },
     {  "id": "issue1912",
       "file": "pdfs/issue1912.pdf",
       "md5": "15305b7c2cba971e7423de3f6ad38fef",