From dc88dcbffcd1183076cff4dcff6bc652c84fe676 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 27 Feb 2024 09:11:26 -0600 Subject: [PATCH] Bump to nvcomp 3.0.6. (#15128) This PR bumps nvcomp to 3.0.6. This is needed as a hotfix for https://github.com/rapidsai/cudf/issues/15096. Depends on: - https://github.com/conda-forge/nvcomp-feedstock/pull/14 - https://github.com/rapidsai/rapids-cmake/pull/542 - https://github.com/rapidsai/kvikio/pull/346 Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Ray Douglass (https://github.com/raydouglass) --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-120_arch-x86_64.yaml | 2 +- conda/recipes/libcudf/conda_build_config.yaml | 2 +- dependencies.yaml | 2 +- .../data/parquet/zstd_huff_tables_bug.parquet | Bin 0 -> 2759 bytes python/cudf/cudf/tests/test_parquet.py | 11 +++++++++++ 6 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 python/cudf/cudf/tests/data/parquet/zstd_huff_tables_bug.parquet diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 956c685f7de..f123e7c7bbb 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -62,7 +62,7 @@ dependencies: - numpy>=1.21,<1.25 - numpydoc - nvcc_linux-64=11.8 -- nvcomp==3.0.5 +- nvcomp==3.0.6 - nvtx>=0.2.1 - packaging - pandas>=1.3,<1.6.0dev0 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index cd2c70577f9..9db43a2b938 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -60,7 +60,7 @@ dependencies: - numba>=0.57 - numpy>=1.21,<1.25 - numpydoc -- nvcomp==3.0.5 +- nvcomp==3.0.6 - nvtx>=0.2.1 - packaging - pandas>=1.3,<1.6.0dev0 diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index 9ed8c94f2bb..084f4651450 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -38,7 +38,7 @@ spdlog_version: - ">=1.12.0,<1.13" nvcomp_version: - - "=3.0.5" + - "=3.0.6" zlib_version: - ">=1.2.13" diff --git a/dependencies.yaml b/dependencies.yaml index 9a1d11af02d..efd42c838bb 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -251,7 +251,7 @@ dependencies: - libkvikio==24.2.* - librdkafka>=1.9.0,<1.10.0a0 # Align nvcomp version with rapids-cmake - - nvcomp==3.0.5 + - nvcomp==3.0.6 - spdlog>=1.12.0,<1.13 build_wheels: common: diff --git a/python/cudf/cudf/tests/data/parquet/zstd_huff_tables_bug.parquet b/python/cudf/cudf/tests/data/parquet/zstd_huff_tables_bug.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4fb66fd86fc6c689ea522032d5ded66d64a30167 GIT binary patch literal 2759 zcmZuz3p`Y58$WX~#{D*Dnqk^Sa?3L07S_m8W7qw*qA)ZeA`DWWCd3d1iQJW3%AF;` zu3VFAF1h9sYEijgHjKVg?e5op?K$uBJHO|D-t&K+_j#W8Jg@0-b43sUmw3P=4zPt$ z6v;~fYY-5E0?_~eM`nKsa8a5sR0`&l!N5rD~ z&A}=Tam^@xG2Cc>awZ#-Lu^UJ%b-kIZAwgDpCl-cJs$kj`2#5zWths8>X z8(_UyYB{ovls&aKy9MV!sN_9UiZg z8CNE3UJ3R)h^-2=-c35ETaL)5ni3M7vtlSFnf=~gt;9TPv>`^s^@)Y}eo*m1+DX@Z z?)8$%muoyR)GA6y-dO6QntYq4M80A#V48?=kxEYF3>Vlh|B~x zo84&c*;LLBb!*;&vB*~5ypl#mzOBCK*fI|xt%p|0`JJkz0vq<`N<5JaC*JC6ErTcmDKd#jDADtugs(>yms^E+wNL&#doX8Uri;Nhtq!y?_Vow zR{VKauFKLn>-i6$y$&U%$O{Go`nD<(QJ;(KAtyK!eZ3p~b&(}v0RQTH@{T_5P@{Qu z`O|#AmHG2OON|-ZAZhEZv_xKZ_4kz4pU*MrUcn5cHwP*GAYq@E0VG0qLnViA42u${ zI5puQ!Z@?QqOPoNTdZySxyT3Y4^8gpVMZ`hQw;t5r)!L}tyw7(q}o>EnY@?P^-m}F z`uJBE){7Q!ZLTM?!*S}jG^^awU(wVk#rK$Z$FbT(i49C)@9?^#6ecjBUWqN z(bR|QLt1BhvqtYURQ^g*s8}zHZhNC}EjR7fdlqWbjrC%u@+uc3+DuXLq{{QeY7&q7 zzLw<<;@&RT(TfPn!niuF-1jx#G;iS^HsBchT|Lq4t-;ZI$6V_?-$?{o*m$$mkZMVE z6;E`#0}S5V7{i!VHKo(UhJ5A}!jhK9_IU7h@Oz2TJBXrQei@^a*a-X4e!C^lp(knQ4!X1` z4DQ8{S2FJ%-VEf-cDuw{uo;)@bikPS>eh9~Hi){=eb2kc`$p=~DMuu|sTTiXCwlnVHg1X@vlwqU{j@=h?x8Kh~yrzAoA`d_apZ0sw#-1ndJ0h(`hf zY(Q~+7spRr@A2k`Y$WZaW@L|`lzLP$)sJNy4>k=u6>VEX`2afYd9Yo|*1a+k3=7?1 z#>Ok&G)^kKU7jYN;h)ucN9rE$17e|kDMtloM@bX9!|eI|==JgJiRrg9xaE$Dm88|Y zwI`e8ohbmO3a>`xa6jeko#SgFpbnjqs3jID_UT}bd*sp$o*Rxd8DBC9w_g8Q-Ol^8 z;F(6pLoPvXO};ZUa_AqCD5hF$Sc2c}-DLT+-R-QI?4{h52l{2D<@joeC9k&C!j76z z?a9E(DdOVfGW`#+z4{MpVkZLD~Ugo`adE2PD+P_`$-eQLfjxght$Lqo&7!Y@S(560NXQIgrxG}1q22>`B* z-RGB{@Sh&$7aV^l)ms!SDm0Z{RHLo4nJb|FA91+JNl$ec9aX3BRoeG z-85jAh?~v@k>E5MoP&Xn<0gktT3|E`ME=zyjSzr12M!z85%AHFfRTs0Q&5`q0q+@c zCk?g_VGj`cLQTx@rLnE6_KSkb(nSBnO{}Ag>pKOB2Zb_C-4<43AxZb^nXxI+k4iI& zf61(1M;L!IbN+;?toVvPIU8r@kouNz00D!lvIqhGh5$dz#WW#4RVDJveqD|N zgdis*VTgTMNHCBr!agbNYS#cAq9}%&T`&6%8H0`1IrQ?94knJ=B92Wqs4#VB~>=g@!)Ov zD&f0`Onj z0J)*}*lb>G2ZtnD3x||G0RT8;JQQ>+ApL~~;0aK&Adfz#qo3n{T>t0Z-+YUl&@_lJ zHXH^KhQT00l=K(cKM3nF1Q7YBb6+j(Gi3+?n~s*O$7tcue{BIA8v0Xxx*%Ww1^tJK z5yEE^NEH6x!t~k5TW)L!N^*n%4FV4B@R=OIq5m`kVA0b5f&zFsXhR{P*gygL_<+`0 xp%2AX5{>N_$3PoP$o#mefu4n^#m9t;ntJ%TxOw6Y2>|f-00g1WOray}e*rj$Zdd>S literal 0 HcmV?d00001 diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 007349ab551..2424b33a5dc 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -3040,3 +3040,14 @@ def test_parquet_reader_multiindex(): def test_parquet_reader_engine_error(): with pytest.raises(ValueError): cudf.read_parquet(BytesIO(), engine="abc") + + +def test_parquet_reader_zstd_huff_tables(datadir): + # Ensure that this zstd-compressed file does not overrun buffers. The + # problem was fixed in nvcomp 3.0.6. + # See https://github.com/rapidsai/cudf/issues/15096 + fname = datadir / "zstd_huff_tables_bug.parquet" + + expected = pa.parquet.read_table(fname).to_pandas() + actual = cudf.read_parquet(fname) + assert_eq(actual, expected)