diff --git a/python/cudf/cudf/tests/dataframe/test_io_serialization.py b/python/cudf/cudf/tests/dataframe/test_io_serialization.py index 06777c8e6af..911a7f9e865 100644 --- a/python/cudf/cudf/tests/dataframe/test_io_serialization.py +++ b/python/cudf/cudf/tests/dataframe/test_io_serialization.py @@ -1 +1,37 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. +from io import BytesIO + +import pandas as pd +import pyarrow.parquet as pq +import pytest + +import cudf +from cudf.testing._utils import assert_eq + + +@pytest.mark.parametrize( + "index", + [range(1, 11), list(range(1, 11)), range(1, 11)[::2]], + ids=["RangeIndex", "IntIndex", "StridedRange"], +) +@pytest.mark.parametrize("write_index", [False, True, None]) +@pytest.mark.parametrize("empty", [False, True], ids=["nonempty", "empty"]) +def test_dataframe_parquet_roundtrip(index, write_index, empty): + if empty: + data = {} + else: + data = {"a": [i * 2 for i in index]} + df = cudf.DataFrame(data=data, index=index) + pf = pd.DataFrame(data=data, index=index) + gpu_buf = BytesIO() + cpu_buf = BytesIO() + + df.to_parquet(gpu_buf, index=write_index) + pf.to_parquet(cpu_buf, index=write_index) + gpu_table = pq.read_table(gpu_buf) + cpu_table = pq.read_table(cpu_buf) + assert gpu_table.schema.pandas_metadata == cpu_table.schema.pandas_metadata + + gpu_read = cudf.read_parquet(gpu_buf) + cpu_read = cudf.read_parquet(cpu_buf) + assert_eq(gpu_read, cpu_read)