From c22368ff30d522aad55f30b77deafe93d8bdce03 Mon Sep 17 00:00:00 2001
From: Calixte Denizet <calixte.denizet@gmail.com>
Date: Sun, 18 Apr 2021 23:37:22 +0200
Subject: [PATCH] Support search with or without diacritics   - get original
 index in using a dichotomic seach instead of a linear one;   - remove
 diacritics from text using NFD decomposition and unicode regex;   - convert
 the query string into a RegExp;   - replace whitespaces in the query with
 \s+;   - remove pdf_find_utils.js.

---
 l10n/en-US/viewer.properties          |   1 +
 test/pdfs/.gitignore                  |   1 +
 test/pdfs/french_diacritics.pdf       | Bin 0 -> 10500 bytes
 test/unit/pdf_find_controller_spec.js | 125 +++++++++
 web/app.js                            |   3 +
 web/firefoxcom.js                     |   2 +
 web/pdf_find_bar.js                   |   6 +
 web/pdf_find_controller.js            | 356 ++++++++++++++------------
 web/ui_utils.js                       |   4 +-
 web/viewer.html                       |   7 +-
 web/viewer.js                         |   1 +
 11 files changed, 333 insertions(+), 173 deletions(-)
 create mode 100644 test/pdfs/french_diacritics.pdf
diff --git a/l10n/en-US/viewer.properties b/l10n/en-US/viewer.properties
index 5fe094b769b286..e02857c1157980 100644
--- a/l10n/en-US/viewer.properties
+++ b/l10n/en-US/viewer.properties
@@ -168,6 +168,7 @@ find_next.title=Find the next occurrence of the phrase
 find_next_label=Next
 find_highlight=Highlight all
 find_match_case_label=Match case
+find_match_diacritics_label=Match Diacritics
 find_entire_word_label=Whole words
 find_reached_top=Reached top of document, continued from bottom
 find_reached_bottom=Reached end of document, continued from top
diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore
index 1abd53fd2fc180..bc44edac2f7948 100644
--- a/test/pdfs/.gitignore
+++ b/test/pdfs/.gitignore
@@ -327,6 +327,7 @@
 !issue4650.pdf
 !issue6721_reduced.pdf
 !issue3025.pdf
+!french_diacritics.pdf
 !issue2099-1.pdf
 !issue3371.pdf
 !issue2956.pdf
diff --git a/test/pdfs/french_diacritics.pdf b/test/pdfs/french_diacritics.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..ba5b5cb07e2bde6e804ea005722ffa01b4f851c5
GIT binary patch
literal 10500
zcmbuFcT`jB(*LOeL+?Ra=shHaDj=a(siGhq0s_(r9RxxV=~5IBF;qd6CLq1{B1J$!
zng}8YNRux7@Z59m@4oNJT9>olto)I^v$OUypUgA!o!P^!r>rUrgNOsUN5;E80^wjN
z*v-ZXC@TvT(YABFi*f+N#3aD~dI3dL9i35jo?sDGXKR$5vYo9P(hhk2I?xN{X=m*M
z^qZDTf|29FNC{3F+qrL>^>+INIX=Q*bcj&V=kj4PdNro;X)H2N9=&@zO~`#P#{Pw~
zJX6U$7u+Gi=;oi5?PO<SRPka*>!+1>p~1>{++Z(Sh{sDC!1DqhCG}L{sbEI3_`dkH
zZ-h<pCq|nLGYExldplsWo~gwb$X2!~%4GI!Biq5b5c3-p%PNU=r@B4R&K3FR;x0b@
zHcayG28v3-E^VOq=#XY7l#*!c3L!(3UEqd-Lw0LnZ_OP2B;C4#yDqD;v9VkX+qXJG
zisoBxtignAIX%pe6Exsyh2w~$N7{<#gUBO|=2Xq>OcB%mQ|-AIhiDV0{eD*$>+|UE
zcqvR71?gs)hT1WLNBhTTp?$~4W|{Bf8^v`u%Rj1z64uhaDarh0E&jpuE+VL6DVE)m
zmW+W`yL3uED$UJI_k_U#*>@g%^=PQAC8%qq?dUU8w9gnPc$iR+0YzT00+ZxgB;OYx
ze+k~7QJ9fZ8op{Dps2546%ORMhOz}aJ;*fpUN2A-O9Dl?zRp^1jpk+C{ua1F=`u36
z5+>pBWj%`3iMR@0#du4YGGj(LK*)l&w##xOGc@e)oTadEmWE%NbMV+-oN)y^Y)S)-
zowHkOdVS=CZvG2&Af<v{;vx|+@LpSZS=v)I%bdg!TZ*xCWc`TH1mX||=Q_QjaaZk4
zaolpn)auP_uAS>`4B9k?JvCL~4Qggfpx1VpRw(Ow(DIX=ji0}8^SUN7#C{rBJmr*3
z7vaWn$Izwb>miLF?Pkt6_XccQP2{8qj<UQUiC8t}#o0s$hZ-Kb5r*(-&g+pqHR_-@
z-+aN+!n8B+B={_etvRbYd^2oc7H0#UfZ57Id`2g*@5VJ?JPt-o-Pm7gd@cq%0Fd17
zOsS5qU(bBp7tecak><$2fcepTCs}2<!swC7aP0}B>GQy8jCfxb*;W*-gwiB>lIQK@
zP^n+`7hni(y&;<R@64sKFqag+H1o(&d96+;2(TqdE7ZvR<R>8Kh(JQ6)LSr%_oKoV
z4-pmlxc+JQJ2I)m>U!y8x?`$qK1tn@54)C%%rK|wa{$)tr7rynS_9_yPefEaI5tIt
zx@Gg4#7yC}!L3G-t7*M(?cr70_iKuc&5g6R_4TrJsCxPjsAWJ&HO>O1AjU{w?#|QB
zJCYib%TjFTY(IXOZ_9hqWs1Fjyiw)75;U`2;(I@;GSO`%(4jL*g91VHPT>;<UW89k
z%>D&^btkPwftxGL)IHcWj7Wm=fJT(?(UtzH$?j<8AV;y<TNyPTxIuSSmtV;->^nXy
zcp*0QHsrd0Cgxd+NnmsUuOwqXZdKvTor8u&lSrmhTq|hQ`QHh{pjZ$~OJ5ps^`jQe
zP6XwH5Ox)-EBVz@gN>m)(#h!@lv29TNK9HJG5QWr07xIUp0}*OL(E@h>^eL9&^_$l
zjYkwdJ}bkLb{@^mm{j1k0RaI+0k43*&If#+UYQ2f*-FE1Ht%DWvSZA+K72Lf+8_%K
zzTXmq^a|9-P_Nlb(m&>#7RGhpi3lw_FvhKz9K*2X<p9JQi^I!8pOd!{j03lcm4QPz
z1i+?<_<P2Ff%v-qU$0|=RLkFdvsX@0k?Z)1cMG1$*n!)A#+OgHCvT-Q6j?6xN$B%5
z83R0P%PM3cM^Lq2an58oB_(pSco#s=Z&HHnbMM!EW;I@;()x9yr<MLq37!afqw0xD
z2dox=)BG@sYCuWIQpmRH<Ydzy<4a0IefxDSP0J!x#55N4Ff8arFQ1y?M-}e1LurY&
zQrm$P)*o0Rnsu!VYNP_GU$zHO?ky<*+P~_%+oC0ZIowaZ9}$F8QF!EQl~2odMc!hE
zTG2v&wW;9M;e^h!os-ZOnq&6YE~PK4rVpp}9;cX<Pjbnx3-kr`hqN?^_c==}Ml7X2
zxl*Fr-y@`ayB0LbtbiNGd0i%##7T3!v*=2g@k6y-l*bO04}lNsdKb(O!knXqqs&=C
ze>2iaWv2WmGyz9Bq0$*McT)Chc-Fjd)6Xe4{Px>XLJ?N{#HI&r6lh9iu!KXXf((!?
zb4l!l;F@kO7z;7c|0}V!-?<L35P^<V_%;zf;*Xb7$>gG|_}YrReX}oINT^6nL8a~K
z^*d|@i6ePL?1e{y=>{=M?P|#xTwE`@`s!_l1m|yb-Q}KV*wgMltYD4mXrJYrcsx$V
zq_oyQoLu?g^mu0<@p8`0UE$^DPw1Zs&K>P)0*Xt|!@L&osPiF%cJsa8lENxQMq)xD
zNk+Sg=ktHCos}~q&A|_o5QCML2#Rkioov}0$Dss{t#ypu)$@ZDu9X?t?$91~*QQRG
zR16>I9A`iySRDb}VPzZnB2y2luc|6w+qj*_frS_r`6DWJd3BIBI^={0qr_P-p*I8N
zTgXMfqp3A<LPlnv;vRrM8d4O1ZziH+(7dMe+FL67llSJVp7t3-D}_+%J1`BNi2UcM
zKzxHe=LS~2%n{KxVJ8=_E`E`qQjc`&Q`I}Rvej=ucj41N$OYpb>{3kyY5%xVOflF^
zfOErrn<FctJ-%&NlEy+r8}%*&04gL?<M)c>uSzGAg^?7s$>+LzY~y~y`_!^dcTJJg
zH-a?`r$9(RFYYD5=0`t}8dy0Ym+y1U{@dMK2kz+sm|LpmuI4IQ4gvYBY80Fzd=ZLV
zsJNieM?ZRl-IG5Hp5f8g=fbc66vN_ju*=8Cv{5Z!JCqV<grATkd|@(jA*(gt;WV`5
zr4<9Z)GQiYq;=tN=5m>8+r)CA#_x3q3^m)>Kf^Lk)$8w<z`+_WvlF2K!P7^6iUlah
zdof{W3Qz>!<9ynfVpbKZci%jlH}wg2sgC%%t9CR>VaznGWKlQ^jbKfGrCbm31KC1*
z`%Le1)#R=Y0LlxMxW|8^aDUKUm>B?^N(CDt`L{YoIo`R0L^w5o(CQ!BZy;|o2KD)K
z``<$H+Qx;C=_O;+Et5PL6CJxxabY1n?*Y8*kS+X*{-QTnP>KH?6-@l{z6O^VgTrq@
z^>A$Q2}zV9X_ZP9A)Ih9+aRF^*NUgKzvt=$K~RRrY9NkE-JV=9oo0-tVcp&{yezuV
zAcjZ>CBkrHh%N&mfE54$NP}yL7LNgds9_=yi0!(S)%C5gu%;Yz*z-03pD+>E5hD<k
z&O{3WF}jDHpP#dZT6dpQfWML{k?t^6TGpA#)#26Q2-Dr8$6}@g7Bk_{OJ~ggG}DXZ
zpO{Hnw`We9OGZa(QzvUq>kK+%Tw`2kI)i^?JDk@`Y(-pGXXZ+zIK8k3hZKGm(%Pyw
z4;6y-y7k^)_WZd7T*AW<DtAA@ViA@S0uB|sv_;j)ZJHej$|zm}v-I$~07@AVjE2A{
zeFWn52s44)Pd(XUHHsLtMP_i-bxl_G9%jwO&B0B2p~b$)Wi3$|`1x7Nz`C<+K^*WJ
z%X<}tl&oyepsx1&gASgMM?DN#M(L2^benqUbJ%Ow)n6N57~fCLiZ7?Xb4)Yb51q)u
zS-?SZCvXZ2Nw$}%eyoWrIzo0o6)YU%g_r}G(Kf6~gOLefPr`HK@IC#G$?7ucvIm`j
zTyh&hL<U137`8U4<8q~PV|ThMU;w}oc&nPy_m(F^k+U$(-V!U1V<)o@lE&$->g$k7
z`fIP3S&6K7ElGbhCRl<{15JB#ax66ac@#I-KlrS$olFs0HVt=kSiR8{{xsax9Uwg*
zeLvxTX`7(D?-9jTVSC^yK=W(P78d^4N{fFW1U8akz;*<siex@9sZGLy8A4J&*_c@I
z=6?z>McLmWhTkyyyS6CW_+d5vfT{iYJKFOH`kcU#4*JQbAONV^W4VJ|@p&d|&9Yrx
zm=vyD*9ehZzQi|V#DiZRWY<^^lInxAMxRS_1>fv%F3Qciry)u6wf#XO6V+9!No9vv
zy=RehKZ6;Sz8Iv0RDQH!DlnJ}V91V`d)`txFhf1n)kWmCB#@#LzeztD3=_;-q-61!
z<-W6YBkfs!Ix5}KnuemPQdCzKJ(P-*(PMhwDyfwq+nOsR6bmtIg@O3x_u$4@o6`j`
zz=8>f&|q6*SO$Yb3ST!f9@@x(94+VnyA)G2z0)h~FUHoT);k(r`)>VX?A!gB$gM~d
z`+aDLL;x!pe-5tPPkttY>@i3e#h?L%CzWE)KCo5e;dXK*?U&mSTeEko-**EOw~^b5
zX3xfA0w;}iuHb0npAo1{WRI5{1*YAhb(%2zG-9h$GwQ=)#q@;;g*(?pj?-3_=(R~)
zEh7&kB_PK<Rul0pY3OSS@fO-?@2>0|3U3@yUlLgi>4=i|)WqAYew7Bws^h7sJtW34
zla{vG_`rpKqhuQxtb?~A7P|SeBys8LUGC=3WW)ZURDUNaicSBEN?v;JZzRk7_Pr=I
z1A;=RTCW)*h0mWBy@0RH?qJ7`#yq|d2hUiAFI?|2{o?Tt9!Ip16T@Q)dmsDQABl@T
z%rPS7j^12AZXG1k)}uTe70cw@H&eS{J2!Uy);R*Z{juUK-6t%8;Zu5gEPd2)K^-;4
zrKDiqtg~5vkbUWAVXx;c#`^;Y%Rx<fsr}yyEsUrUkK5k{95pOdW6tlI?~7C1;jAsC
zR5ZU~CR~SmjhlVG{T11?pG{L$z}`Y(@s0b@0X)2_DHb20{vO%>Izk0ISw*^~bDa-+
z_=k^3$aEv>u^_{y+r=*Ld@k=4G`oJQ&{SOfH&6yjdkDsnn~<<#D+p~)Je)@2Q2;UL
zt9J>+OWgaW?-?P~!1o&dv(C=}kBuwkh&M-8dN~W9je%JcGJIIqDoHB+9Wopupj%e^
za&+FScXYm2{P_5>{G7byNSVZKGmPb~Qri*d<UI*B)yV<_VU(tb9<3R*)kuGl7?A%K
z8`_3`pWgA45DO*u1mlMRW~Ks~<hWh{3vBZ}XpP`4(+-80#)8lYxTofxwuELIk4x-T
z41Zc-6^T8Oc1ZgDJ|2e~HFQ=aMt4_3kCl!+SKH&Eyd{>A(I3R;K1=Fq8u?=3hfT$c
zU%r##@=o_c#nbj|TSgoE#jX+pb|DxS?no+Nqz6F*Xk<bNaC^-Fq|!Da#=E7il#tAc
zl8zlaUVI)ILVbv^=za2WXDQXICdjN@xGrnkTp<zZmCzu2(pVRa@mHFPanYG7h~vNa
z`1EwoQ+MigEsLa|z7ZnTl4IYdM;^MbNF7l{lVB0~lG@(T8cwB%$5+NuWytDDoU{!f
z!ufI_S|PemO<pbbC_vkG9^qT`Y&viMTS#T3hawR7+y-zZ0X;QP;cZ%E0ZE2jCxKB4
zdcUe-TOg4;wLVlOtB;(%67b^QlXRsoykCVkURHguebQOBhF~Bwe=l>wb{l{NCN_Z&
zzr5&58~)Q7TL^>(+szOcJNUh(0I}l25kUiGkRZdNra4gHe>!8zjv1eu?H}HlQM<O3
zO5zh_R#{SeZRJJsTrXYL{#5DCp<+qqI!gn3z1l40wcFvBU%MIWYx~JsmG_!O>ijSP
zL&ScOS1YNGiVB;ACYEQ<A3tmtdM)6GP&$xOWVOO@1U(_!lIxxqgFeHsTZz~wx0*o1
znLm|=X@3AG77OQxrY;(4HiWY5q_k+uBU79nxe!(xH4hAhtx^m<Nni-L={5MiNMOY1
z2GcP>xzaoX3pQ-B{vU+BXNfS^-zoRJ-$3X_2ibZZ!nkZ*z3@&Sj2gnBpb}P;oXStl
z&C-AD5J<}wmfRkbA)bTK8*Q9;cY3t@qPMVcDbDMDs-#FA^wj=a%8ueoM){#L(=xW3
z;`tx481fkmcxM~84n7RatU4VHMxln7I;I|HX668JQgZNx$Xm&;jN`&`<qSGmoMpD%
zdGj|IC3*?3WW_z-V%qX$o8j(coU6f`Dhyi7QA8J|A#AH2P>SLPq~_i_aYqx4J50{E
zg6-9pH9I*}xn2Nb1>@rNZB*9V(^J@nnC^^H#Sa*eV?l;R-i!W&u;)Ee@w-!Wk#lQ^
zwLdq9Ci&ye3iT;C$7T6i0Tl#EDh#wpRAl-#@;35jIP<kV%r>7BP|z~B-(J_TKg$<*
zrz9Mi>Pesf%1(bkYrz*H$V^{Zo9>o9?>AK$l2mCE5^lS{@jB>!P*bJloVk`&OL7mm
zi{`Ch4vpe6jlM=deJv{zA1c$dnX2kB?NB|+hf)a{SYtYhZiaNcYTJvr3vMWI>!&tP
z)=H0zKT>vf=<XV#D3+;|7tOWbuV1k&EPp!hus;qqY}ZUlYE`!4<giBuo&w<`{G$m@
zC{Ab{6`KR}(FfuAU0g(v!Cfr;u+$Czq!jla`9o!2@be@d%<;!%-^KHO!xgz_lYh~@
z_$^v=*Yrnn7Zk>VtAbOnD<(*%7M_cND8|^2$e&L2wU{EGl%{HXW?bvj%1GN;JPhB(
zi_IAP+VNyX(qi11jZ5U?oK1#HFz@rHWw&c$PYR@J1pQITavn|8xmR)w3JhvA6F)E`
zs)#&Dk})o<W{y2?cS_CPpI>v)3Pee6CltYjKd<slk5Y8uazabTJXWt*jv^M{wnW%3
zLC5j{Vl2-J7!!9Mu!`L5rQOn^Now_@^jD7Kck-Yq%p=K_OZ%p3rFjfBBbXm={K_f5
zrf-J@AeJ&kR7~p9Qsv(OLx!xa$>S;XGA}$ZBA-%$bR75<{H-tC&md8eu1TXlu_iHD
z?HlaD*kR8I8r`we_O9{`+%*4%NpbPlfMiFDwZ`!k?Kv)ybZzhZ64|m%{kinMW&Z4&
znT{Sm<L@_yG)J4xSUB;x7E@#tCK`y)5#wlU<&ssfJRI(GRxPA4xKkM;JCcc0N<Z_e
z74?vF0TjYHot(cC-(88iE3vRfS$&8mt3@#Dv2Uy2-=b5sX_=$R#Bactvqefa?urbx
z9!m-(>Q%gG<-K7=)Sc4%Mny(jK5P|ASi?+0$D}&5>q{!wZzSxyLsnp6Sc-)swsPa2
zq>My4or`iWa^5L6t+16LdZ`Y_q--7h-in{#3WK_a3RQt7A3tJ*{cd87+BTJ0efgrc
zbF`vDo5P;Q?m3TEA!H@Ou-PZ;t8Byavph*nxCT$quK85+SR>cTprh)({upwA_ss7+
zh+W2V?hWUKlKX;GT1I>8iEF=RQjT|2>x^H8wC7yg`!$T>jSi%^scLUJfoO~@-6o`Q
zf53-9tW!~J5vnTRKM&V?<{_2Gz~mt{dqgvF<zznTCvl_$(9?iWAJd2keB=O5v{0x{
z$6@hYrEiNq#^IY%r{I45jo4z~PB9j2SZWTKxag(b4=y?Z7c+qBK?!U)X$TGfs{E7E
zaid9+)!!|9?ktT_+=hxSjve5{c&-K>3#8Jf6?(495K0MxJXag|l<|$aJ^kzXpdcvp
zCKP&VZFSny8k8N@)D-rtE-Z07l(Yk%BnpiOh$SKwB?9?f1sQ{q+SmYrAREBiFMR48
zD!gm2&d!O57ys-Ar1z^o{=1zQt&G1vF%`Xh#|tho0vFT2uoKT(i_lQ(-wlp=oX-E+
z$oZcg4kYLh^hbw7a-0p(1NKOg9c|4H3p<y`gNM4yDF-3kgYm;p&*j|*m)H8}dI@pf
z)QgW}F$kMphhIK!#3w0ecIZ(?om1vOW541>4ic_wWdgF9NUG5$#Pw^rs0knbB0iM9
zm`qt3lV2n=`+yMo*jI?yM7r^yZP51YXywN4wldAop6tY`vRB!si)VMcLOhw4r_Ii+
zNQ-Dwr>|YJ<wB*k6Hit?JgYsLO)Ffauh$ee(W=k`<JyKO_b>%feddK7@;yuf0qCHL
zWPY;fp71_mA&M~SyWbuZE6+PWjU;nN#A4dG+pod^bh%@cY1%fq3DQ;BRPmqVsa+^i
zN$byZKb+`{?(5E}PH#m^2<+NS4sxC6$S}U8OBm?&W<S`$7pu85gM}V8wGO>}wKFtQ
zaZw<W;*2npr;#4xqmOW#ELIiFX`KjVh_6XUr(C<n2P8-T5|b}@K2o7n@ijL#x`A;n
zse#M4;=Vj|$q*r@WUVZb`??K3>T$~TskZOPH$74+P5C3a*`8OuujRepZd76!?m_DD
zRxPUD)26}mCQ49+(A)6e()sDGD4ZaA*AytruV;Y60Hc1o1*erP#Xvsq7vHXTl)Yv2
zLnb8mc+6#)Ti(=pJF0Z#GZ`8C;yLOK`sZ5Ka+6VL3N6DS0dm4y0Sgyw!W(*duNoTu
zqi_Z+$md6EQ;ASs2ye+NCedJy>#eltTl6*szf0#xtAwxmo&*E5ZBiu?9Q!)k!h^9U
zEMj>{_zAUCo>}EOesp6AHo|?O-rtQ(T#KG0XL`KSi08T?z_M!{VwSRY<Cr=5>XsS3
zFdqxlsv{zW@%!z(=S-g@zpNr>c)Dk3UX=(=XV^w95t{Yk8l+TQ9qB472V#_s7Iuma
zTOrmA<^8_H&mDsKeHRO|evLQb-}R##QQ$b=e)II(&AMpKs}AX7sjRv+s9Y?3uqbSp
z)a4rsL&Ft7v>q*B!IC)wzC=Qh%}`X5YpoT=xI{TVPI+VEqux0Ex4a|1>*y~mTr&Bw
zVl}8Jx5FR9pJzr`PFNe(cYD8p7v1}L6H93$sOx1{EfN@F3=L$+6%7O*Y82dSo@c#l
zux;PQS{!ju`3>RNUT(eID*INe!oBmA`^#pRIiKk>3R?$^YTl!t)nbEJ3#0UXpUICN
z8f+?;Wc~8k!pjCpw30qL?dcO7b<(7Yr*+5}dp!3f*MAoa3M|SQ_7D1daM%R=B5{=C
z*w~T~!vE@wYk~h)L^Zic8vms=9)(XzI*1q~@7zI{g@lIvI>(JV2g{!0bWr#g9qM)e
zWD7kzI>(Rtv;XatXw>s}Yc3@J|IZJ4o^D8Q+lv7jK6j-3O*`X@!5c9sL=?sk6j8dE
zph3A^d~<R7uR}*SSLKU28!(^pHBqQ23@Qeb5{1Jg#6<<6qP$Qj@5OaD+>rn6lAE5^
z?(TNTi=iEBXD>UTh~7;VV~DD^v$Ku0tLyKz+;njC0$;rUwI(6(Ejv%Ii(7-?5SSQD
zTv}8@Tv!w$@z+}aSRq*3(Z>#au_&mBvYm~iwJSgPp1U>5!OPAX?DOZ(B_ME!DF5&A
zpg%`%{_}sm>F;g_7SXkFGDNumMRdSYzh{d)y-;9r(SHvfNl0EkZf+Q9xqd>B&VHwD
zM2Kg$0HBrb2KXvZIn?n`)_ytNnjDx4pcd}frvne<d(E*K3{?A(o756b-zey%x*>2$
zy+K(bDV{atr6HGTN=+!<YEc-EY_Jf0tm5q1(QI<P6YI3%vxA0;*?4>tf(FRAx6Iz~
zNrMk!iPWs-Q>n?mNL4A$;gS{&{)EmG?`hiT${ru2^~#$!%psBKRmMqoG6-Ca*<Tfv
z1c&&(PpDGejHN67v~QO`<ndlP<}Ll&*Q1s68vvJ_W=UO_1c|O@=kDO1h<;7D)@j1D
z{f@DHyI-Vkec?j!q`Fw3B+tR)cTad}ks412y(cyvY@b$4r6`TL4Yfh%XxG%<vak_B
zd?6z%v*s-uJ``q$q~*LsY_&uWDqpdzN$CGNKUop!r0zI5yArRKfQG=n{NN5WHUtDF
z_4ri?+&Bgw^ZTeg=x)4^DAtU7r<N=iCsNH$@<}7}w)wslW?8y#$ayj`+^{Hi^Q^f`
zdU4dOA^&RQld_w2-+NpFb+0i_AP^Ps5m<EI^HWk%(#63SQus9Cl!Y&i9q6MXxecW~
z#Eh(<vBV|UUpx5pe81{)*9u9Q9kAKwUkNRGH2Q+>3kaomO1k9CgLnupw=a)M*1_0K
zr&x(jRxTt>+QJ5z{SUaP8GT>od8}`?whn>Zo7hK&GhVF7ra*TdF4h>!v+T?y^3~_v
z*+Xccg1k$@!xmE}Z5+l_NvkGftoe1vk(e}#IQ47HWb2dqq_4(pz^jiGT+z^lMUrqc
z6im<A+DiN-2OtwdDg;mk87tEOW(cf9e`Heu9ME(CzGgarnU3{>xQX?GNBPXDrnI>U
z{`|$Xjuk4W*~gSQoq2Ic&T}qqsO&5!y(X*lMGKE+Y6+I0@Shs%Ut7W0z|I~hqT!0P
z^8=eg!D6E3KoLX70K1D%K#|*EQy5qj47)gI;O2$`!+(owgWs3iyMh0RFuxC!!KSj3
zii)bDib@KS(ozzVa8YTvC=3o4lY~pbpfFWsDaGsH|9;8EUH(1#cX=U87cWDOYYkEv
zezrZ_X{!y`j$ZLoeo>BhG1p_jKMoWeDXG$Y?fo&-VZw$e$S^BwX6dWN!pdhP*k&-h
z`(!Zeki#whl*kiKJEM1~ctlM1xX4erfGp-~QIOg;6Zd3^v;NN*C$^i<F;OmJB1J!y
zH)y?ud>M+KV9tPKK74XHpQTTJ#%Fzw)8qW~ob?c2(2N@Mcko^)YfqG)=LPBu9#9yN
Ln_ER!75M)E&lq1~

literal 0
HcmV?d00001

diff --git a/test/unit/pdf_find_controller_spec.js b/test/unit/pdf_find_controller_spec.js
index 1b97f47e48eda9..814e4c9ba2005d 100644
--- a/test/unit/pdf_find_controller_spec.js
+++ b/test/unit/pdf_find_controller_spec.js
@@ -271,5 +271,130 @@ describe("pdf_find_controller", function () {
       pageMatches: [[19, 48, 66]],
       pageMatchesLength: [[8, 8, 8]],
     });
+
+    await testSearch({
+      eventBus,
+      pdfFindController,
+      parameters: {
+        query: "1/2",
+        caseSensitive: false,
+        entireWord: false,
+        phraseSearch: true,
+        findPrevious: false,
+      },
+      matchesPerPage: [2],
+      selectedMatch: {
+        pageIndex: 0,
+        matchIndex: 0,
+      },
+      pageMatches: [[28, 57]],
+      pageMatchesLength: [[1, 1]],
+    });
+
+    await testSearch({
+      eventBus,
+      pdfFindController,
+      parameters: {
+        query: "½",
+        caseSensitive: false,
+        entireWord: false,
+        phraseSearch: true,
+        findPrevious: false,
+      },
+      matchesPerPage: [2],
+      selectedMatch: {
+        pageIndex: 0,
+        matchIndex: 0,
+      },
+      pageMatches: [[28, 57]],
+      pageMatchesLength: [[1, 1]],
+    });
+  });
+
+  it("performs a normal search, where the text with diacritics is normalized", async function () {
+    const { eventBus, pdfFindController } = await initPdfFindController(
+      "french_diacritics.pdf"
+    );
+
+    await testSearch({
+      eventBus,
+      pdfFindController,
+      parameters: {
+        query: "a",
+        caseSensitive: false,
+        entireWord: false,
+        phraseSearch: true,
+        findPrevious: false,
+        matchDiacritics: false,
+      },
+      matchesPerPage: [6],
+      selectedMatch: {
+        pageIndex: 0,
+        matchIndex: 0,
+      },
+      pageMatches: [[0, 2, 4, 6, 8, 10]],
+      pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
+    });
+
+    await testSearch({
+      eventBus,
+      pdfFindController,
+      parameters: {
+        query: "u",
+        caseSensitive: false,
+        entireWord: false,
+        phraseSearch: true,
+        findPrevious: false,
+        matchDiacritics: false,
+      },
+      matchesPerPage: [6],
+      selectedMatch: {
+        pageIndex: 0,
+        matchIndex: 0,
+      },
+      pageMatches: [[44, 46, 48, 50, 52, 54]],
+      pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
+    });
+
+    await testSearch({
+      eventBus,
+      pdfFindController,
+      parameters: {
+        query: "ë",
+        caseSensitive: false,
+        entireWord: false,
+        phraseSearch: true,
+        findPrevious: false,
+        matchDiacritics: true,
+      },
+      matchesPerPage: [2],
+      selectedMatch: {
+        pageIndex: 0,
+        matchIndex: 0,
+      },
+      pageMatches: [[28, 30]],
+      pageMatchesLength: [[1, 1]],
+    });
+  });
+
+  it("performs a search where one of the results contains an hyphen", async function () {
+    const { eventBus, pdfFindController } = await initPdfFindController();
+
+    await testSearch({
+      eventBus,
+      pdfFindController,
+      parameters: {
+        query: "optimiz",
+        caseSensitive: false,
+        entireWord: false,
+        phraseSearch: true,
+        findPrevious: false,
+      },
+      matchesPerPage: [1, 4, 2, 3, 3, 0, 2, 9, 1, 0, 0, 6, 3, 4],
+      selectedMatch: {
+        pageIndex: 0,
+        matchIndex: 0,
+      },
+    });
   });
 });
diff --git a/web/app.js b/web/app.js
index 213b8324a28b69..6903a8a1eb152c 100644
--- a/web/app.js
+++ b/web/app.js
@@ -2603,6 +2603,7 @@ function webViewerFind(evt) {
     entireWord: evt.entireWord,
     highlightAll: evt.highlightAll,
     findPrevious: evt.findPrevious,
+    matchDiacritics: evt.matchDiacritics,
   });
 }
 
@@ -2614,6 +2615,7 @@ function webViewerFindFromUrlHash(evt) {
     entireWord: false,
     highlightAll: true,
     findPrevious: false,
+    matchDiacritics: true,
   });
 }
 
@@ -2820,6 +2822,7 @@ function webViewerKeyDown(evt) {
               entireWord: findState.entireWord,
               highlightAll: findState.highlightAll,
               findPrevious: cmd === 5 || cmd === 12,
+              matchDiacritics: findState.matchDiacritics,
             });
           }
           handled = true;
diff --git a/web/firefoxcom.js b/web/firefoxcom.js
index 129190a747851c..a98d093177b65f 100644
--- a/web/firefoxcom.js
+++ b/web/firefoxcom.js
@@ -218,6 +218,7 @@ class MozL10n {
     "findcasesensitivitychange",
     "findentirewordchange",
     "findbarclose",
+    "finddiacriticmatchingchange",
   ];
   const handleEvent = function ({ type, detail }) {
     if (!PDFViewerApplication.initialized) {
@@ -236,6 +237,7 @@ class MozL10n {
       entireWord: !!detail.entireWord,
       highlightAll: !!detail.highlightAll,
       findPrevious: !!detail.findPrevious,
+      matchDiacritics: !!detail.matchDiacritics,
     });
   };
 
diff --git a/web/pdf_find_bar.js b/web/pdf_find_bar.js
index 3388711d935fd7..cd00f8d438b245 100644
--- a/web/pdf_find_bar.js
+++ b/web/pdf_find_bar.js
@@ -33,6 +33,7 @@ class PDFFindBar {
     this.highlightAll = options.highlightAllCheckbox;
     this.caseSensitive = options.caseSensitiveCheckbox;
     this.entireWord = options.entireWordCheckbox;
+    this.matchDiacritics = options.matchDiacriticsCheckbox;
     this.findMsg = options.findMsg;
     this.findResultsCount = options.findResultsCount;
     this.findPreviousButton = options.findPreviousButton;
@@ -82,6 +83,10 @@ class PDFFindBar {
       this.dispatchEvent("entirewordchange");
     });
 
+    this.matchDiacritics.addEventListener("click", () => {
+      this.dispatchEvent("diacriticmatchingchange");
+    });
+
     this.eventBus._on("resize", this._adjustWidth.bind(this));
   }
 
@@ -99,6 +104,7 @@ class PDFFindBar {
       entireWord: this.entireWord.checked,
       highlightAll: this.highlightAll.checked,
       findPrevious: findPrev,
+      matchDiacritics: this.matchDiacritics.checked,
     });
   }
 
diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js
index 7eeb01e65e8658..e1c8fdb317643a 100644
--- a/web/pdf_find_controller.js
+++ b/web/pdf_find_controller.js
@@ -13,9 +13,9 @@
  * limitations under the License.
  */
 
+import { binarySearchFirstItem, scrollIntoView } from "./ui_utils.js";
 import { createPromiseCapability } from "pdfjs-lib";
 import { getCharacterType } from "./pdf_find_utils.js";
-import { scrollIntoView } from "./ui_utils.js";
 
 const FindState = {
   FOUND: 0,
@@ -42,47 +42,141 @@ const CHARACTERS_TO_NORMALIZE = {
   "\u00BE": "3/4", // Vulgar fraction three quarters
 };
 
+const diacriticsRegExp = /\p{Mn}+/gu;
+const escapeRegExp = /[.*+\-?^${}()|[\]\\]/g;
+const prepareNoDiacriticsRegExp = /(\p{Mn})|(\p{L})/gu;
+const whitespacesRegExp = /\s+/g;
+const notDiacriticFromEndRegExp = /([^\p{Mn}])\p{Mn}*$/u;
+const notDiacriticFromStartRegExp = /^\p{Mn}*([^\p{Mn}])/u;
+
 let normalizationRegex = null;
 function normalize(text) {
+  // The diacritics in the text or in the query can be composed or not.
+  // So we use a decomposed text using NFD (and the same for the query)
+  // in order to be sure that diacritics are in the same order.
+
   if (!normalizationRegex) {
     // Compile the regular expression for text normalization once.
     const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
-    normalizationRegex = new RegExp(`[${replace}]`, "g");
+    normalizationRegex = new RegExp(
+      `([${replace}])|(-\\n)|(\\n)|(\\p{Mn}+)`,
+      "gum"
+    );
   }
-  let diffs = null;
-  const normalizedText = text.replace(normalizationRegex, function (ch, index) {
-    const normalizedCh = CHARACTERS_TO_NORMALIZE[ch],
-      diff = normalizedCh.length - ch.length;
-    if (diff !== 0) {
-      (diffs ||= []).push([index, diff]);
+
+  // The goal of this function is to normalize the string and
+  // be able to get from an index in the new string the
+  // corresponding index in the old string.
+  // For example if we have: abCd12ef456gh where C is replaced by ccc
+  // and numbers replaced by nothing (it's the case for diacritics), then
+  // we'll obtain the normalized string: abcccdefgh.
+  // So here the reverse map is: [0,1,2,2,2,3,6,7,11,12].
+
+  // The goal is to obtain the array: [[0, 0], [3, -1], [4, -2],
+  // [6, 0], [8, 3]].
+  // which can be used like this:
+  //  - let say that i is the index in new string and j the index
+  //    the old string.
+  //  - if i is in [0; 3[ then j = i + 0
+  //  - if i is in [3; 4[ then j = i - 1
+  //  - if i is in [4; 6[ then j = i - 2
+  //  ...
+  // Thanks to a binary search it's easy to know where is i and what's the
+  // shift.
+  // Let say that the last entry in the array is [x, s] and we have a
+  // substitution at index y (old string) which will replace o chars by n chars.
+  // Firstly, if o === n, then no need to add a new entry: the shift is
+  // the same.
+  // Secondly, if o < n, then we push the n - o elements:
+  // [y - (s - 1), s - 1], [y - (s - 2), s - 2], ...
+  // Thirdly, if o > n, then we push the element: [y - (s - n), o + s - n]
+
+  // Collect diacritics length and positions.
+  const rawDiacriticsPositions = [];
+  let m;
+  while ((m = diacriticsRegExp.exec(text)) !== null) {
+    rawDiacriticsPositions.push([m[0].length, m.index]);
+  }
+
+  let normalized = text.normalize("NFD");
+  const positions = [[0, 0]];
+  let k = 0;
+  let shift = 0;
+  let shiftOrigin = 0;
+  let eol = 0;
+  normalized = normalized.replace(
+    normalizationRegex,
+    (match, p1, p2, p3, p4, i) => {
+      i -= shiftOrigin;
+      if (p1) {
+        // Fractions...
+        const replacement = CHARACTERS_TO_NORMALIZE[match];
+        const jj = replacement.length;
+        for (let j = 1; j < jj; j++) {
+          positions.push([i - shift + j, shift - j]);
+        }
+        shift -= jj - 1;
+        return replacement;
+      }
+
+      if (p2) {
+        // "-\n" is removed because an hypen at the end of a line
+        // is likely here to mark a break in a word.
+        positions.push([i - shift, 1 + shift]);
+        shift += 1;
+        shiftOrigin += 1;
+        eol += 1;
+        return "";
+      }
+
+      if (p3) {
+        // eol is replaced by space: "foo\nbar" is likely equivalent to
+        // "foo bar".
+        positions.push([i - shift + 1, shift - 1]);
+        shift -= 1;
+        shiftOrigin += 1;
+        eol += 1;
+        return " ";
+      }
+
+      // Diacritics.
+      let jj = match.length;
+      if (i + eol === rawDiacriticsPositions?.[k]?.[1]) {
+        jj -= rawDiacriticsPositions[k][0];
+        ++k;
+      }
+
+      for (let j = 1; j < jj + 1; j++) {
+        // i is the position of the first diacritic
+        // so (i - 1) is the position for the letter before.
+        positions.push([i - 1 - shift + j, shift - j]);
+      }
+      shift -= jj;
+      shiftOrigin += jj;
+
+      return match;
     }
-    return normalizedCh;
-  });
+  );
 
-  return [normalizedText, diffs];
+  positions.push([normalized.length, shift]);
+
+  return [normalized, positions];
 }
 
-// Determine the original, non-normalized, match index such that highlighting of
-// search results is correct in the `textLayer` for strings containing e.g. "½"
-// characters; essentially "inverting" the result of the `normalize` function.
-function getOriginalIndex(matchIndex, diffs = null) {
-  if (!diffs) {
-    return matchIndex;
+function getOriginalIndex(positions, pos, len) {
+  const start = pos;
+  const end = pos + len - 1;
+  let i = binarySearchFirstItem(positions, x => x[0] >= start);
+  if (positions[i][0] > start) {
+    --i;
   }
-  let totalDiff = 0;
-  for (const [index, diff] of diffs) {
-    const currentIndex = index + totalDiff;
 
-    if (currentIndex >= matchIndex) {
-      break;
-    }
-    if (currentIndex + diff > matchIndex) {
-      totalDiff += matchIndex - currentIndex;
-      break;
-    }
-    totalDiff += diff;
+  let j = binarySearchFirstItem(positions, x => x[0] >= end, i);
+  if (positions[j][0] > end) {
+    --j;
   }
-  return matchIndex - totalDiff;
+
+  return [start + positions[i][1], len + positions[j][1] - positions[i][1]];
 }
 
 /**
@@ -302,192 +396,111 @@ class PDFFindController {
     return true;
   }
 
-  /**
-   * Helper for multi-term search that fills the `matchesWithLength` array
-   * and handles cases where one search term includes another search term (for
-   * example, "tamed tame" or "this is"). It looks for intersecting terms in
-   * the `matches` and keeps elements with a longer match length.
-   */
-  _prepareMatches(matchesWithLength, matches, matchesLength) {
-    function isSubTerm(currentIndex) {
-      const currentElem = matchesWithLength[currentIndex];
-      const nextElem = matchesWithLength[currentIndex + 1];
-
-      // Check for cases like "TAMEd TAME".
-      if (
-        currentIndex < matchesWithLength.length - 1 &&
-        currentElem.match === nextElem.match
-      ) {
-        currentElem.skipped = true;
-        return true;
-      }
-
-      // Check for cases like "thIS IS".
-      for (let i = currentIndex - 1; i >= 0; i--) {
-        const prevElem = matchesWithLength[i];
-        if (prevElem.skipped) {
-          continue;
-        }
-        if (prevElem.match + prevElem.matchLength < currentElem.match) {
-          break;
-        }
-        if (
-          prevElem.match + prevElem.matchLength >=
-          currentElem.match + currentElem.matchLength
-        ) {
-          currentElem.skipped = true;
-          return true;
-        }
-      }
-      return false;
-    }
-
-    // Sort the array of `{ match: <match>, matchLength: <matchLength> }`
-    // objects on increasing index first and on the length otherwise.
-    matchesWithLength.sort(function (a, b) {
-      return a.match === b.match
-        ? a.matchLength - b.matchLength
-        : a.match - b.match;
-    });
-    for (let i = 0, len = matchesWithLength.length; i < len; i++) {
-      if (isSubTerm(i)) {
-        continue;
-      }
-      matches.push(matchesWithLength[i].match);
-      matchesLength.push(matchesWithLength[i].matchLength);
-    }
-  }
-
   /**
    * Determine if the search query constitutes a "whole word", by comparing the
    * first/last character type with the preceding/following character type.
    */
   _isEntireWord(content, startIdx, length) {
-    if (startIdx > 0) {
+    let match = content.slice(0, startIdx).match(notDiacriticFromEndRegExp);
+    if (match) {
       const first = content.charCodeAt(startIdx);
-      const limit = content.charCodeAt(startIdx - 1);
-      if (getCharacterType(first) === getCharacterType(limit)) {
+      if (getCharacterType(first) === getCharacterType(match[1])) {
         return false;
       }
     }
-    const endIdx = startIdx + length - 1;
-    if (endIdx < content.length - 1) {
-      const last = content.charCodeAt(endIdx);
-      const limit = content.charCodeAt(endIdx + 1);
-      if (getCharacterType(last) === getCharacterType(limit)) {
+
+    match = content.slice(startIdx + length).match(notDiacriticFromStartRegExp);
+    if (match) {
+      const last = content.charCodeAt(startIdx + length - 1);
+      if (getCharacterType(last) === getCharacterType(match[1])) {
         return false;
       }
     }
+
     return true;
   }
 
-  _calculatePhraseMatch(query, pageIndex, pageContent, pageDiffs, entireWord) {
+  _calculateRegExpMatch(query, entireWord, pageIndex, pageContent) {
     const matches = [],
       matchesLength = [];
-    const queryLen = query.length;
 
-    let matchIdx = -queryLen;
-    while (true) {
-      matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
-      if (matchIdx === -1) {
-        break;
-      }
-      if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) {
+    const diffs = this._pageDiffs[pageIndex];
+    let match;
+    while ((match = query.exec(pageContent)) !== null) {
+      if (
+        entireWord &&
+        !this._isEntireWord(pageContent, match.index, match[0].length)
+      ) {
         continue;
       }
-      const originalMatchIdx = getOriginalIndex(matchIdx, pageDiffs),
-        matchEnd = matchIdx + queryLen - 1,
-        originalQueryLen =
-          getOriginalIndex(matchEnd, pageDiffs) - originalMatchIdx + 1;
 
-      matches.push(originalMatchIdx);
-      matchesLength.push(originalQueryLen);
+      const [matchPos, matchLen] = getOriginalIndex(
+        diffs,
+        match.index,
+        match[0].length
+      );
+      matches.push(matchPos);
+      matchesLength.push(matchLen);
     }
     this._pageMatches[pageIndex] = matches;
     this._pageMatchesLength[pageIndex] = matchesLength;
   }
 
-  _calculateWordMatch(query, pageIndex, pageContent, pageDiffs, entireWord) {
-    const matchesWithLength = [];
+  _convertToRegExpString(query) {
+    const { matchDiacritics } = this._state;
 
-    // Divide the query into pieces and search for text in each piece.
-    const queryArray = query.match(/\S+/g);
-    for (let i = 0, len = queryArray.length; i < len; i++) {
-      const subquery = queryArray[i];
-      const subqueryLen = subquery.length;
+    // Escape characters like *+?... to not interfer with regexp syntax.
+    query = query.replace(escapeRegExp, "\\$&");
 
-      let matchIdx = -subqueryLen;
-      while (true) {
-        matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen);
-        if (matchIdx === -1) {
-          break;
-        }
-        if (
-          entireWord &&
-          !this._isEntireWord(pageContent, matchIdx, subqueryLen)
-        ) {
-          continue;
+    if (matchDiacritics) {
+      // aX musn't match aXY.
+      query = `${query}(?=[^\\p{Mn}])`;
+    } else {
+      query = query.replace(prepareNoDiacriticsRegExp, (match, p1) => {
+        if (p1) {
+          // Diacritics are removed.
+          return "";
         }
-        const originalMatchIdx = getOriginalIndex(matchIdx, pageDiffs),
-          matchEnd = matchIdx + subqueryLen - 1,
-          originalQueryLen =
-            getOriginalIndex(matchEnd, pageDiffs) - originalMatchIdx + 1;
-
-        // Other searches do not, so we store the length.
-        matchesWithLength.push({
-          match: originalMatchIdx,
-          matchLength: originalQueryLen,
-          skipped: false,
-        });
-      }
+        // A letter has been matched and it can be followed by any diacritics
+        // in normalized text.
+        return `${match}\\p{Mn}*`;
+      });
     }
 
-    // Prepare arrays for storing the matches.
-    this._pageMatchesLength[pageIndex] = [];
-    this._pageMatches[pageIndex] = [];
+    // Replace spaces by \s+ to be sure to match any spaces.
+    // We must do it after the if (matchDiacritcs) block to avoid
+    // wrong things with the "s".
+    query = query.replace(whitespacesRegExp, "\\s+");
 
-    // Sort `matchesWithLength`, remove intersecting terms and put the result
-    // into the two arrays.
-    this._prepareMatches(
-      matchesWithLength,
-      this._pageMatches[pageIndex],
-      this._pageMatchesLength[pageIndex]
-    );
+    return query;
   }
 
   _calculateMatch(pageIndex) {
-    let pageContent = this._pageContents[pageIndex];
-    const pageDiffs = this._pageDiffs[pageIndex];
     let query = this._query;
-    const { caseSensitive, entireWord, phraseSearch } = this._state;
-
     if (query.length === 0) {
       // Do nothing: the matches should be wiped out already.
       return;
     }
 
-    if (!caseSensitive) {
-      pageContent = pageContent.toLowerCase();
-      query = query.toLowerCase();
-    }
+    const { caseSensitive, entireWord, phraseSearch } = this._state;
+    const pageContent = this._pageContents[pageIndex];
 
+    const flags = caseSensitive ? "gu" : "gui";
     if (phraseSearch) {
-      this._calculatePhraseMatch(
-        query,
-        pageIndex,
-        pageContent,
-        pageDiffs,
-        entireWord
-      );
+      query = this._convertToRegExpString(query);
     } else {
-      this._calculateWordMatch(
-        query,
-        pageIndex,
-        pageContent,
-        pageDiffs,
-        entireWord
-      );
+      // Words are sorted in reverse order to be sure that "foobar" is matched
+      // before "foo" in case the query is "foobar foo".
+      query = query
+        .match(/\S+/g)
+        .sort()
+        .reverse()
+        .map(q => `(${this._convertToRegExpString(q)})`)
+        .join("|");
     }
+    query = new RegExp(query, flags);
+
+    this._calculateRegExpMatch(query, entireWord, pageIndex, pageContent);
 
     // When `highlightAll` is set, ensure that the matches on previously
     // rendered (and still active) pages are correctly highlighted.
@@ -533,6 +546,9 @@ class PDFFindController {
 
               for (let j = 0, jj = textItems.length; j < jj; j++) {
                 strBuf.push(textItems[j].str);
+                if (textItems[j].hasEOL) {
+                  strBuf.push("\n");
+                }
               }
 
               // Store the normalized page content (text items) as one string.
diff --git a/web/ui_utils.js b/web/ui_utils.js
index d9af9235b76461..dd26ddbba572c4 100644
--- a/web/ui_utils.js
+++ b/web/ui_utils.js
@@ -204,8 +204,8 @@ function parseQueryString(query) {
  * @returns {number} Index of the first array element to pass the test,
  *                   or |items.length| if no such element exists.
  */
-function binarySearchFirstItem(items, condition) {
-  let minIndex = 0;
+function binarySearchFirstItem(items, condition, start = 0) {
+  let minIndex = start;
   let maxIndex = items.length - 1;
 
   if (maxIndex < 0 || !condition(items[maxIndex])) {
diff --git a/web/viewer.html b/web/viewer.html
index c390ba62e88693..57f4e1021caf97 100644
--- a/web/viewer.html
+++ b/web/viewer.html
@@ -138,8 +138,13 @@
             <label for="findMatchCase" class="toolbarLabel" data-l10n-id="find_match_case_label">Match case</label>
           </div>
           <div id="findbarOptionsTwoContainer">
-            <input type="checkbox" id="findEntireWord" class="toolbarField" tabindex="96">
+            <input type="checkbox" id="findMatchDiacritics" class="toolbarField" tabindex="96">
+            <label for="findMatchDiacritics" class="toolbarLabel" data-l10n-id="find_match_diacritics_label">Match Diacritics</label>
+            <input type="checkbox" id="findEntireWord" class="toolbarField" tabindex="97">
             <label for="findEntireWord" class="toolbarLabel" data-l10n-id="find_entire_word_label">Whole words</label>
+          </div>
+
+          <div id="findbarOptionsThreeContainer">
             <span id="findResultsCount" class="toolbarLabel hidden"></span>
           </div>
 
diff --git a/web/viewer.js b/web/viewer.js
index 8c843566010575..fdf6ecbfb249b7 100644
--- a/web/viewer.js
+++ b/web/viewer.js
@@ -155,6 +155,7 @@ function getViewerConfiguration() {
       highlightAllCheckbox: document.getElementById("findHighlightAll"),
       caseSensitiveCheckbox: document.getElementById("findMatchCase"),
       entireWordCheckbox: document.getElementById("findEntireWord"),
+      matchDiacriticsCheckbox: document.getElementById("findMatchDiacritics"),
       findMsg: document.getElementById("findMsg"),
       findResultsCount: document.getElementById("findResultsCount"),
       findPreviousButton: document.getElementById("findPrevious"),