From c670bbd5b099da2e24acb04a0a57704e158b5f96 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Tue, 26 Sep 2023 16:07:04 -0700 Subject: [PATCH] ICU-22420 GB18030 change 3 mappings for GBK/web compat --- icu4c/source/data/mappings/gb18030-2022.ucm | 23 ++++++++++++++++-- icu4c/source/test/testdata/conversion.txt | 8 ++++++ .../icu/impl/data/icudt74b/gb18030-2022.cnv | Bin 232816 -> 233024 bytes .../ibm/icu/dev/data/testdata/conversion.res | Bin 37280 -> 37360 bytes 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/icu4c/source/data/mappings/gb18030-2022.ucm b/icu4c/source/data/mappings/gb18030-2022.ucm index c85538940a55..7f02d06856bf 100644 --- a/icu4c/source/data/mappings/gb18030-2022.ucm +++ b/icu4c/source/data/mappings/gb18030-2022.ucm @@ -5,6 +5,12 @@ # ICU codepage data for GB 18030-2022 +# This data file was originally generated from the mapping tables +# published with the original (year 2000) GB18030 standard. +# It has been updated for the 2005 version of GB18030 (ICU-8274 & ICU-8427) +# and for the 2022 version (ICU-22357). +# ICU-22420 then made minor mapping changes for GBK and web data/WHATWG compatibility. + "gb18030-2022" "AXXXX" 4 @@ -23,7 +29,8 @@ # The second line is commented out (and does not count) # because the state table is hand-optimized and does not use what would be # the natural path for the encoding scheme. - 0-7f, 81:6, 82:7, 83:8, 84:9, 85-fe:3 +# ICU-22420 makes 0x80 valid for the GBK encoding of the Euro sign. + 0-80, 81:6, 82:7, 83:8, 84:9, 85-fe:3 # 30-39:2, 40-7e, 80-fe 81-fe:2 30-39 @@ -56,6 +63,18 @@ CHARMAP +# ICU-22420 reverse fallbacks for compatibility with GBK and other web data as in WHATWG. +# U+20AC = EURO SIGN (normally \xA2\xE3) +# U+3000 = IDEOGRAPHIC SPACE (normally \xA1\xA1) +# +# PUA U+E5E5 used to round-trip to \xA3\xA0, as specified in GB18030. +# Now that \xA3\xA0 maps to U+3000 (“reverse fallback” mapping), +# we use a “good one-way” mapping from U+E5E5 to \xA3\xA0 +# for maximum compatibility with previous behavior. + \x80 |3 + \xA3\xA0 |3 + \xA3\xA0 |4 + \x00 |0 \x01 |0 \x02 |0 @@ -29602,7 +29621,7 @@ CHARMAP \xA3\x9D |0 \xA3\x9E |0 \xA3\x9F |0 - \xA3\xA0 |0 +# \xA3\xA0 |0 \xA4\x40 |0 \xA4\x41 |0 \xA4\x42 |0 diff --git a/icu4c/source/test/testdata/conversion.txt b/icu4c/source/test/testdata/conversion.txt index 290925ef31fa..8b430a53f89a 100644 --- a/icu4c/source/test/testdata/conversion.txt +++ b/icu4c/source/test/testdata/conversion.txt @@ -115,6 +115,14 @@ conversion:table(nofallback) { :intvector{ 0,1,5,5,5,5,6,7,9,9,9,9,10,11,12,13,13,13,13,14,15,17,18,20 }, :int{1}, :int{0}, "", "&C", :bin{""} } + // GB18030: ICU-22420 adds two reverse fallbacks + { + "gb18030", + :bin{ 80a1a1a2e3a3a0 }, + "\u20AC\u3000\u20AC\u3000", + :intvector{ 0,1,3,5 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } { "UTF-8", :bin{ 61f1808182f180813cf18081fff180ff3cf1ff3c3e7a }, diff --git a/icu4j/main/charset/src/main/resources/com/ibm/icu/impl/data/icudt74b/gb18030-2022.cnv b/icu4j/main/charset/src/main/resources/com/ibm/icu/impl/data/icudt74b/gb18030-2022.cnv index 4731af8d1f1b31eca610e45e67e544e147805bc1..fdc914760004c0644381babc4dfebdc53bb5126c 100644 GIT binary patch delta 293 zcmew`i|@c3z6qjCj0_XSUkC^@FzD=HU@Tn1z&JyKxogH|envJXW(9>cnY>q(UscE^#9-H1FtGqGv05vn#Q=@Y8umn8%zug+hy{Zr^`rNF)%RPVPIhL z0n$1^dI^wb0n$tijMD{+8BLg(wlGdyC{ka=U`1y*2j~(wm@Zq)SUP=n4|4+}JIGzh Z)2(}%l^B^PE|i#_(96uh;>56c0RXqjJR1N2 delta 159 zcmX>whwsBIz6qjC42%=SUkC^>Fz9SxU@V-&z&JyIxog5^envJX=7RtKH%l=0Xl-7j zE47#D>C??8URADU{NL_0jd8ovG^PhPm=-VCu9D9@T}GObfq|isfq`)vkX8iJH-I!V zkY;3HoUT~RXu{0+gmL0R5k^N4uW`C)FLT4h4GPnL_A)C81C=%al>)Uhtbzgn4zMxl diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/testdata/conversion.res b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/data/testdata/conversion.res index a61b4b8d676cfba5822470e352c0fc30e40332a6..8d66e56168803045f3591b90c6d98d7a4c8657db 100644 GIT binary patch delta 7695 zcmaKw4^&iT`p4fpe}n->uYU?~=FT66ZNQ-hB^=wa2E{}J1s7XZQD^$0p`i|j2?;f0 z&|*SKzV5;~(rSyH3_VF}yVAyHXLFS{H`}#S?N4@hT5a|BlU6#pKdmT#-{;N@quk`4 z^SyKL^M0P^ectDN-+M3Y>rxGMsoHE(HDfFeGBTFFIV`5ey}(%fme=xjtKuz(vKi^f zUsM-Nx$TKxFrDg{>bkH-bycX+)M+-VJIDQ#X;jD9i>md)VQNL&^&46r;n%GB+IAlk zN_k#dvi1bxGd^tfL~Q%gl7$nzH_a^heT=1vEdJ#Xi+=^F36vfk@IAq=q&b8yxW#6` z^=z9%s`G{E_dx$iK{weP;Cf{Glr2XXzbl;_jp;dpiI>RN>(c|;Rv%N<@S*gU)L)BC z=MFL5oJc5?Aky*fAH0+L7FxY%U8iWRzQZA`;hlF-^xivcLNOnZudm(VlFAU>0nelG z{H-FzB72uGlMmTx`0*Jn+E;u`+s`{P$^;`H$}k8Qcv5DmWc0CdGewr*39*EHMT~|_ zI}t1iL^+6+3gYcdC&kM&@MD>Jo-)lKeFlCK_yG8A3ch4oh4wHC{sQlv<`MSuYtssZ z9-il@!TEJZxp0(=S(IB=mJ^m0S$5&?{7{xbFHb8iH;CU zyeraMthUBG^QE2$kq=_Nf@rw2OzMje7-6DUK@8nlDp9Z0OD%|c1>w7^MceCRYC9jg zORl!lEp_1j|KRZ^Jl=|UApOXk`<#WeHZ-$&k2AVt*E>C0c)=!SQ&K(E^yxi`+Pr7PiU-Z8r?(s$my&9n}L zC-|v*m#b6)&$#a$A)XK3=hdD?xPR~x4+UB4aY@(cUSt!_gxG|S6bbZs9LtVk=y4z} zf%tqhK_%_Rfgj;|^cAl51f%iJ>Q)d;=)K$YXeJu@)oOWbl%Fxz721dKhUk;DI`ef4$j5mB+8-JETF( z#8wbjL40*Hfy`&mtEFDGkBDp{YBh0UgouspgL(P1*|lB}sL#Y`ePTrR{azx*9RyJT zqDT?r(EW{b>X7L$h)NI}MiX){d2ZF<7#{kJg`g;hx?Jbg^8<7Iktpqm@X8~ruH z72uq`xESZ);xe2+k*_m~E40fo63*L-lkprrP+S(d*Od6B+mYXDkxfCLQ=(7KSl(?V zm0FqL!zHCcKQCEQpzX&AJu7W54vG8UB%cc537cCcII*2k5%SRJOjz4%=2<%~uX)rbIP#KbP0;3~h z(}sNbR3z81O>mbJkg_O8+qU_odig zAI5C#_pot1kM4`v$TuMs{^wy6&_9)szBS^S@tYF^nH|&d7mWJnF`H1J9Ek=SJz)R= zt74oMgAm#v`htp7Mtv0b&)jTN-4CN`6;vDpHCQ%@%MkG0m`#o6ZsJ|2!q%8gg`^XH zgT#~gm1Ql$OFX!oJ`fKqcjJ71xd-R;a<9%JvfJOoF!u4*a+hG?$ICr9YgXh7e!gIZ z2j}hbxgY0W@%WW)eEH8?>D7H9vgA_uz0ME8R>v={^x&Maiq30s{sTX-%8iA7ew9}@ z4eXm>yLpkez3!y(u$}Ya?}iXd=BLZ=hFRka=yr-MMUA8fd8@BczW{eO4C}iP zOTic8awDJhP>Y@zR9e>q>ta~V)8m3KzWB>?UHiHjV;YDK+7fC=3t^F{$g2kRMTaR{ zj(qTtP0+hxYDVRb!t~s(iduau3}?`L2MnJeatiOPsM70T{SsNU!upkL&01Zh4~i@` zIc!apWNRO+9k5~(#_tWW)H$-XXpK`}3#%7>kB43^* ze`-7WH^AD-+a9iv<6#kK7sArFtFm6-3xrbqBMg)xLRD7i%V0%{ac5zr6cKLiB^}aB zY!0!hX4x`pomWB!Q_Hano`*I1S&I75!xD4#`Z|YX0#WZ{>^~r$s74t{BgF7Je<~i1 zN<+-CK3m zlmYr1j6)M*hLM+5G#`;ahV;yQ{9<)#h~L9YHafL* z^)jxEeBVYptvv(gU-dVP?YF#jV+tSK7|!A37}4M3S$>1;`>~I)1o-~9?7QAyEL$&N z3iDwdZiHvULXy=YT71@9QaD|qWBgY4_~$-_22D*K+=R4iKq;b9w_c*djo67{hM z>#!@tjC-TgP0?Y)A8?O`3xl;x&nV-OD1p|~uwg+MkMaS(nUFV z#+-{)Z#**w^)cGihV;4g2*ZLHNBlWnUE{XW)#WifzvQQC4D?(u(n6gQVv zvPu(>aU-l#c+uukogGyba5s$~*j%W?wo%37&clZ{m(kvpO0nQwuqd}k^T68Z;Mk)S8vK2X=saGPj`6F(MFQqm+LdTHk`xRF{1y2H`E$r-%F@sJ$(Ny`}Wm3 zt$5~2crwWmzf;!2q~=L=PHTtAOqcYJhD_Ktb@Ic>WJBc(KxIZL$_u^X zPwBdRISyuvDsQ5%JNdbc*2r2k6>#I@;W(v;;{mlSN{z&+dpsN`GJ~c~QR0R;wS3@l zhlEdCQzNYHi1X7ZH4-O%tIJA6dnBJdQR0R;dwIjwGC9scyuJnzXJiMB#2MOJF2}*V z2$Mma`JOmu`TBahbQ!%}0F?sjf1}h$oSu58bQA<;gS`P_WHv^K;re`OhRDn`6Wc?~ z9GwXQ!ZlvekT1Oog7(KY5ROPMF`}=*C2a%YK^`+f%vBKL6J>HQrP#kBh@U8krYHPz z3_sqG--6hpAjBt&=~ZRscqt5k=ui+1PnOBVVbt|Q5Yf8EG93a@hZv}{cQ!kW&bX5?DL*6v4_gU*d-XW|_JQ;8xT(d@xWb`9)@l3-@NxM`Mu}YITBBZ$#K` z9mitlok6oq<%~hO_(0HUbt6{3k1?!r%lv3>vCJ)nJZqaxBB~73BcK$2Uh(zYD)a>) zuovPMfbjEyZ8fr`1>@+4Wf$++VdQmBd8M`B4q{GM!m6zHm8abLK@h06x)Q|uysgp5 zf70mCPk{9tGFS!c1z5}F%&%gF;p@oqH8Q8Vj~pVi;+fIpAZ5_R7gbB)NzFD%0(%<0 zWWlOjfkn*@+6mTk=y@rK&lI`sZgxpKL41iA5{PRG;$m|R5ouU?aoG21qJpT~9wr>f z?iV0tDTw%I3gsBZs9YR~=zEXRA=jw-nNr%-X;tXuKnPYvjL8?SA#rCvbJpNG(Rb4K`z-rwTTVx4K8LR5S*woFyo;*i|!+|)XKda{qP z*KTyfg2Y78rneaoO{E)#Er=_vcKQRt zR)}aXfLH`VP7XieV$OQjmP-VdmMS|GX(b9Vy}ob`Ufik$wWb@t(HyFH+q223Cq$TV zs1g!KGQ`%i)l%W5XvtJbBl&BnuSV*NUwPK9rP1udJ5VPp>66vp)Z2W;Di=J4?&xtZP4@33s5)I2mJ{efW8Vc zqb|q-##CHnLb=df2%Q_(K${^r8()InfZm1v3VjCsC&(lXG!>c-&4;|uW6;yk&mj0o zuR(7?A3&c%LqTTJLg>>3KhwfsfSHi3>2atPdJ#GX{SCStWM&nFPRui)0%$eV4E+)U zXU0g(pF`gUnI#3nKrHt_i=b7|lh7}qx1o=qOF?Eu0#*sS6Pg{s2_?0nRMuCZ;}AeA zro)P$X;$cd2%}5GRHb1`(hxB12n06m&k&ML8-&2y5+MY)ArV_1v;@KcZ5tpA()Kj; zJcNPTehuXwXDU|07Ib5NZ11-*v74#ckJ*2*`z-9XPc7{B-4>Q!3C6;5ySq!6_;cJ> F{Xfi92tWV; delta 7453 zcmaJ_4OmpynLc-ZfB|kYiVEY*Wth1r=wQGf9OD=V#RMxTC|VjpnS?3{5I`|v17^lx z3K3!mU$$lwvf3qS4Z2TFur)EJYOoD$%(_{#(Nw$MY<_ejo6wqUjAiS-=gtg>SDEK| z@7(*I_xsNGedp(%nSn0do-SRxEEO}xA^{s?$)!OsJ)Q-|qBgyj^_;HfKXsocdOKqJ znMrq8cTMo={wl0D)EOSu?~3>{Gw2SppXmK^hRY$h5!^ebWl;w;! z9!Cd^1|8lIx5pVT9H%#&cCo_8nA6OneyOmiS7~rc0u4H4p`VguE5%G5NBMGsSQpZ~ z2hIDcrcIV%e;}l}ASVf?j^v<0dUBFrp@PtpKe@`->|?rW8c1%n956GJM`5M~waAwP zQ85I0oSvK1Yk3o`rD%Ov(^@;(CHSawvXuutSr(SidG-G4WVg5i-Y3KI5G)TPIGIXQ z+J)&fkit`pN^Ld1>|@4WdOo#Ch^K*6tMFeGJEc&J_pylSW)|&LSade*lBs=49A{Z9 z*kZ72z}o2K6q(yiu?j~hc4~^KKrt2yKNQhyRp5B=16?W6rsX4+v%5v4= zdeBuYyiVpc-fLP~I+&8Q6yamqpJo-+K|ujb$iU7S#M_tVmUiR$pUo`BrLdU0G_|lz z%a-rP4F13#79+B|; z7sT}zEPktIUX||Uqib-}f%Nbs;~bRW%o*G=H6sT#sm-toE^5mt=Gh4=%`Dcguvl5s zuVH$#QB_d;^k!5mVTK>q=Vv^FYwFBXmSk9B8peL8u{6)>Qd_SpOpMgDo}ASsBvWlB zkEkVcx24w1MASws(X$sxd-d(Sb&z-yl_QDoAdCzK{Fi5HM zdj&o9&o4E81b2U;0xx&+kk>6;;m>Av=V^uA`H?1oBVO12uOszHC@w?sKQ}9QN%@wA zFX4LVn{)VJQ3!mgaofMa_Wy3SrL3$JX)F}W%*-64FteypY|nBFU(%5*J`88GJQ9!P z6R6Xno}p1^EGQDanBq@Dkpo38rg#GF2RkjqmVtdpo%m@BbH!IfY!ldKRol5RTWSOw z4G+7)c4_(;T$pb}RtA|0veg=9*GTF!EKo!9$0IFt9{MKRC0#_7ABEyOD83)5KxXq6 z)r;uWxX#SRpblfks1crDC4IGwU6vVT$n%n4TcSQ-r`DQE^^pN`xn1wT!0>%dm4WbDe!$me}P(-92Iw< zIoHg_b}4M^J{ToXRjym&d$bcQ3OzPFS>coiXK>N6+-$C315lu|v0sf0ABv}4BCNph{E=$&TtWhM=6Qv8=)8KhE%kya!Sx;LT$(G)rb|n6aLrhjk85CA5w2&| z`_%k0<0?eIliKs+u}|*JFA6;y3TmV=h_BSl#-Yz~;hk+b?)HLmqpG06f zdy#1sbuRZ<{vCGZ3e@3PaU0BcV0pfTItjmlq8^^Z?*J|cDoR!yH9`@2kdg{{RC5Y< zaf^uWVeu|3PHBAh7w+QlJVlS?Sl5C-Z7MFN&ZBeg%xc&=8t?}S2 zy=YCTso%`v3t{&w+7GUdF0Jw6n!1)>AHww*?Op4^p7+_>Qqv@8`J{P;@=85|iCRiM zu_kbz`fyfPSUjC7o(0z>cH-b>*f~=$3=|F&G%t^j1ZFQ4lsAYy(C`?42ZqN8U*%=u3b2SV@*}W3M)-T^vI+6U zJfX0OF)GvYaH)hICZY@xZD8|jnfe2S@eiUqPKFAXWPzgG$Jn<}Xn%y{Rn%Cpq9Xig zZ3@}*eI#iAVMUGF!;6Sy78EaQ6u!!Q2^ma82Es-t`bNfp7tF~@SvW@nm3*^}syde_ zLwymm@;cOi9jO+Mo9Th{B@QHIMbQmzg;`xUcWFj&g!8zBp|N^Hs_dIp)c1(&z(8Az zcrpqQR$#?m^Px~^E2h|LyQ(UO;C8A?%B!|#a$ohMs2b2_uqIE*#y5_62Ci?WJ=N(( zen+^o(Li+ypM2IXEatPw?H6>uI)UcZ1Y_tK#rq*@tg))Ly*|dGVf%a4_DD^>%AUp$ z&Ifyz5^85UFqN%ekK*kLRn^*6)%Dw}Yaux)Y1`(jp?7XjL$KwJ;yjHY>D^L zjCuG^3bVC_hnss>*TUJ2YGG`c%Y47F?G7u@`bTh>5VkH#Lt!xeY`@}m3NWk+Pa9hg zRn^%uG3#wdM^W{nO>a$J*o|*YhzPDfMaSwq4t{rT6w3=_+hpbYg6*=8v2lp?D>dtq zO*2*YZ;1GDg-MJ$Hx-)V@HB|033PQ+t_jOVr^l0v@*XYXYgfV&(a!**T_VRG_3&J! zBJ`37Me%U1;fhG5WZP_a;4cNK^7au+HB=H(@nvwEx=DWO+-%R}zFJ37J%ctrp%&m9 zb&KZu7inN~x{=@gQk_v*^(i8MIux8y(qXkeZS}!O-yOyKdvvnis@mdpENn*Be^PB@ zx1>9;=c>O-ur#GFZ&8b{sRSy53M0T5uoOND3Rv(s2JktQh-I@y-JBBdMCBJMEOC6; z7mo|Q;t!mkWOF}>7%g7;#A#s#T0?U&aVAy$B^QC4GY4YQT2D&&IH7$j_$7+zzg23dPeLR@~sHl}h zRX-RPAr6Y8xA5~W9cf6BzC>@Qp-O-%{Ig}~z0=W{F1-%HMTGMv1Ve){)KOkzwlob2 zJ`hb%ga?8Pf+gM$#j9p!kAtF7VfOe?D8q`_t!}9i3Kznd4h28g3Ba(zx3!4(V#feP z#6Yn~qxf)Zjq0NqFUYGw4I?rzg<;*3c>AE7inG~SE_*iN~pK+ zk8^GmtS#>iilm$03uq1ZmJ~<*0lTWQb2|ml#*`#e+O~8D4sJ>5KE}KXOPX;Dm75y3 z$r4v(LsbFQePI>c24gz1txQ}9MG;0K8;W8|d!kNd>XAn;n8!O>ZPfQfsZ<8pE)409 zKx&J|JjJ08)Yvh$ zdsP)Cs(vC=f7hxUk#uCc-Lc2a9C1Fz%nEaacZ?ef>WHVo?N*7aWT;S;E&!W@<1!A>jul|XQQlAeBK}TrpgbnL4;_A$?f+@6%I-$d zu7iEKqZPmQn`J2h(zmcuQb20wUuUz6uLB1!&q^ph(1i9`vs>B$#Rd3~p!iIqnA1|H zDz0Dz4?;1hQS=2B&Ug&oKPt?btWo^vsa)>E=|Q=%Qk>zJ9^)VMe4LIwRmj)0vk1L( zDPT1|GIrRFy-?JJ4y{!?W{Q{b+~i{n$9re9I*)d^L@wF|@(joqI$GT(PV7T3Rg~50 zQhmHdwXN|Mml^%wWB5BO{5`($S`)bTUE0&ikHv4a%9hiJ_>zLgk@4UF{SAD5irr%? zyyCRjHc{uig3u;Vb=&mmdLLu2DQwsbvoZ7rkt0Ftj1lTTk-W>V%uJ4>e{19aLS1U} zN?ttM(e0B8ljV`!Lb2%S6#m;mcB6-vpiqAwa}~S>OWE?YoXHiKS2~vhHtYjFSNotu z8$aFY1-+pg-{BaFNO?Nma@-6GA6<0JP=r`_av3kX6fT)Ab|`)W@2iIQMT>T(7wq`c&UA4xRD86)KrWpW^-TICZinHD#3~<-9k*(Bq3Cu| z$unjCo&Zbu5V#s(78{TStO7Ozy8yIX{sZ{m0Gl`$C;_U0b^vV?{}uRifLX0TCa@G( z13Ut31)c(a2J8ji1p0u_fo}uMW(+XDZ9Hxqz+C`3w=D;J0E}(V07NWcsr7kB{J0Bi$bC%pu814n@Of%Cv$0xS`oCc-We!6q&Z_*vpQ+-w1!2KE7e z0L}ql1(+S3*b{-7Kmkw(>;#~*BNO|_z<&o=k^w*>Nt1xPfh9l%umji+yaSv85P-u3 zSb$Vut{)ea%JC1t0RTb=hQk4;P7#<6AUkI{fFW_hp>sC?t@CXF!8*?ZT>o8w$#5|&p+h<|NlQ;ruN1F