From 899fe80c04e3768377f04898d3a381d47c60e97e Mon Sep 17 00:00:00 2001 From: vuule Date: Mon, 23 May 2022 17:25:36 -0700 Subject: [PATCH 1/3] enable zstd only when all nvcomp integrations are enabled --- cpp/src/io/comp/nvcomp_adapter.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index 5804ef3cc9b..e1f2ade14f8 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -17,6 +17,7 @@ #include "nvcomp_adapter.cuh" #include +#include #include @@ -30,6 +31,8 @@ namespace cudf::io::nvcomp { +[[noreturn]] void fail_unsupported() { CUDF_FAIL("Unsupported compression type"); } + template auto batched_decompress_get_temp_size(compression_type compression, Args&&... args) { @@ -40,7 +43,7 @@ auto batched_decompress_get_temp_size(compression_type compression, Args&&... ar case compression_type::ZSTD: return nvcompBatchedZstdDecompressGetTempSize(std::forward(args)...); #endif - default: CUDF_FAIL("Unsupported compression type"); + default: fail_unsupported(); } }; @@ -54,7 +57,7 @@ auto batched_decompress_async(compression_type compression, Args&&... args) case compression_type::ZSTD: return nvcompBatchedZstdDecompressAsync(std::forward(args)...); #endif - default: CUDF_FAIL("Unsupported compression type"); + default: fail_unsupported(); } }; @@ -76,6 +79,11 @@ void batched_decompress(compression_type compression, size_t max_uncomp_chunk_size, rmm::cuda_stream_view stream) { + // TODO Consolidate config use to a common location + if (compression == compression_type::ZSTD and + not cudf::io::detail::nvcomp_integration::is_all_enabled()) { + fail_unsupported(); + } auto const num_chunks = inputs.size(); // cuDF inflate inputs converted to nvcomp inputs From 4a893b22a68a57873e93d26761e73a46c009b110 Mon Sep 17 00:00:00 2001 From: vuule Date: Tue, 24 May 2022 15:53:50 -0700 Subject: [PATCH 2/3] improve error messages; stop checking the error message in tests --- cpp/src/io/comp/nvcomp_adapter.cpp | 18 +++++++++++------- python/cudf/cudf/tests/test_orc.py | 5 +---- python/cudf/cudf/tests/test_parquet.py | 5 +---- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index e1f2ade14f8..b7b003e0af9 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -31,8 +31,6 @@ namespace cudf::io::nvcomp { -[[noreturn]] void fail_unsupported() { CUDF_FAIL("Unsupported compression type"); } - template auto batched_decompress_get_temp_size(compression_type compression, Args&&... args) { @@ -43,7 +41,7 @@ auto batched_decompress_get_temp_size(compression_type compression, Args&&... ar case compression_type::ZSTD: return nvcompBatchedZstdDecompressGetTempSize(std::forward(args)...); #endif - default: fail_unsupported(); + default: CUDF_FAIL("Unsupported compression type"); } }; @@ -57,7 +55,7 @@ auto batched_decompress_async(compression_type compression, Args&&... args) case compression_type::ZSTD: return nvcompBatchedZstdDecompressAsync(std::forward(args)...); #endif - default: fail_unsupported(); + default: CUDF_FAIL("Unsupported compression type"); } }; @@ -80,10 +78,16 @@ void batched_decompress(compression_type compression, rmm::cuda_stream_view stream) { // TODO Consolidate config use to a common location - if (compression == compression_type::ZSTD and - not cudf::io::detail::nvcomp_integration::is_all_enabled()) { - fail_unsupported(); + if (compression == compression_type::ZSTD) { +#if NVCOMP_HAS_ZSTD + CUDF_EXPECTS(cudf::io::detail::nvcomp_integration::is_all_enabled(), + "Zstandard compression is experimental, you can enable it through " + "`LIBCUDF_NVCOMP_POLICY` environment variable."); +#else + CUDF_FAIL("nvCOMP 2.3 or newer is required for Zstandard compression"); +#endif } + auto const num_chunks = inputs.size(); // cuDF inflate inputs converted to nvcomp inputs diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 8de680fd706..9992a0b2c1a 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -1728,7 +1728,4 @@ def test_orc_reader_zstd_compression(list_struct_buff): got = cudf.read_orc(buffer) assert_eq(expected, got) except RuntimeError as e: - if "Unsupported compression type" in str(e): - pytest.mark.xfail(reason="nvcomp build doesn't have zstd") - else: - raise e + pytest.mark.xfail(reason="zstd support is not enabled") diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 58fa69c59d0..6b7e853ae8e 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -2516,7 +2516,4 @@ def test_parquet_reader_zstd_compression(datadir): pdf = pd.read_parquet(fname) assert_eq(df, pdf) except RuntimeError as e: - if "Unsupported compression type" in str(e): - pytest.mark.xfail(reason="nvcomp build doesn't have zstd") - else: - raise e + pytest.mark.xfail(reason="zstd support is not enabled") From af354712cb849f6d6b961ce3b01e6b22f0eb3da8 Mon Sep 17 00:00:00 2001 From: vuule Date: Tue, 24 May 2022 16:05:39 -0700 Subject: [PATCH 3/3] style --- python/cudf/cudf/tests/test_orc.py | 2 +- python/cudf/cudf/tests/test_parquet.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 9992a0b2c1a..c5b6395394b 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -1727,5 +1727,5 @@ def test_orc_reader_zstd_compression(list_struct_buff): try: got = cudf.read_orc(buffer) assert_eq(expected, got) - except RuntimeError as e: + except RuntimeError: pytest.mark.xfail(reason="zstd support is not enabled") diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 6b7e853ae8e..32619e37a3c 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -2515,5 +2515,5 @@ def test_parquet_reader_zstd_compression(datadir): df = cudf.read_parquet(fname) pdf = pd.read_parquet(fname) assert_eq(df, pdf) - except RuntimeError as e: + except RuntimeError: pytest.mark.xfail(reason="zstd support is not enabled")