diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu index 56eb34bbe2f..46b3206f731 100644 --- a/cpp/src/io/parquet/reader_impl.cu +++ b/cpp/src/io/parquet/reader_impl.cu @@ -1179,6 +1179,19 @@ rmm::device_buffer reader::impl::decompress_page_data( codec_stats{parquet::SNAPPY, 0, 0}, codec_stats{parquet::BROTLI, 0, 0}}; + auto is_codec_supported = [&codecs](int8_t codec) { + if (codec == parquet::UNCOMPRESSED) return true; + return std::find_if(codecs.begin(), codecs.end(), [codec](auto& cstats) { + return codec == cstats.compression_type; + }) != codecs.end(); + }; + CUDF_EXPECTS(std::all_of(chunks.begin(), + chunks.end(), + [&is_codec_supported](auto const& chunk) { + return is_codec_supported(chunk.codec); + }), + "Unsupported compression type"); + for (auto& codec : codecs) { for_each_codec_page(codec.compression_type, [&](size_t page) { auto page_uncomp_size = pages[page].uncompressed_page_size; diff --git a/python/cudf/cudf/tests/data/parquet/spark_zstd.parquet b/python/cudf/cudf/tests/data/parquet/spark_zstd.parquet new file mode 100644 index 00000000000..99b584aa557 Binary files /dev/null and b/python/cudf/cudf/tests/data/parquet/spark_zstd.parquet differ diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 58ba77d0b0e..727200293f7 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -2420,3 +2420,10 @@ def test_parquet_reader_decimal_columns(): expected = pd.read_parquet(buffer, columns=["col3", "col2", "col1"]) assert_eq(actual, expected) + + +def test_parquet_reader_unsupported_compression(datadir): + fname = datadir / "spark_zstd.parquet" + + with pytest.raises(RuntimeError): + cudf.read_parquet(fname)