From 70612e6139ee4c70bfb8399dafea6cdd031f41c3 Mon Sep 17 00:00:00 2001 From: Devavret Makkar Date: Thu, 13 Jan 2022 02:09:30 +0530 Subject: [PATCH] Remove destructor in favour of contextlib --- python/cudf/cudf/_lib/parquet.pyx | 5 +++- python/cudf/cudf/io/parquet.py | 5 +++- python/cudf/cudf/tests/test_parquet.py | 36 ++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx index a76b773f4b0..16873435e1d 100644 --- a/python/cudf/cudf/_lib/parquet.pyx +++ b/python/cudf/cudf/_lib/parquet.pyx @@ -484,7 +484,10 @@ cdef class ParquetWriter: return np.asarray(out_metadata_py) return None - def __dealloc__(self): + def __enter__(self): + return self + + def __exit__(self, *args): self.close() def _initialize_chunked_state(self, table, num_partitions=1): diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py index ebe45c6bc04..bd4de908f49 100644 --- a/python/cudf/cudf/io/parquet.py +++ b/python/cudf/cudf/io/parquet.py @@ -950,7 +950,10 @@ def close(self, return_metadata=False): else metadata[0] ) - def __del__(self): + def __enter__(self): + return self + + def __exit__(self, *args): self.close() diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 076a0a817b8..016ed1229f1 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -1577,6 +1577,16 @@ def test_parquet_writer_gpu_chunked(tmpdir, simple_pdf, simple_gdf): assert_eq(pd.read_parquet(gdf_fname), pd.concat([simple_pdf, simple_pdf])) +def test_parquet_writer_gpu_chunked_context(tmpdir, simple_pdf, simple_gdf): + gdf_fname = tmpdir.join("gdf.parquet") + + with ParquetWriter(gdf_fname) as writer: + writer.write_table(simple_gdf) + writer.write_table(simple_gdf) + + assert_eq(pd.read_parquet(gdf_fname), pd.concat([simple_pdf, simple_pdf])) + + def test_parquet_write_bytes_io(simple_gdf): output = BytesIO() simple_gdf.to_parquet(output) @@ -1672,6 +1682,32 @@ def test_parquet_writer_chunked_partitioned(tmpdir_factory, return_meta): assert_eq(got_pd, got_cudf) +def test_parquet_writer_chunked_partitioned_context(tmpdir_factory): + pdf_dir = str(tmpdir_factory.mktemp("pdf_dir")) + gdf_dir = str(tmpdir_factory.mktemp("gdf_dir")) + + df1 = cudf.DataFrame({"a": [1, 1, 2, 2, 1], "b": [9, 8, 7, 6, 5]}) + df2 = cudf.DataFrame({"a": [1, 3, 3, 1, 3], "b": [4, 3, 2, 1, 0]}) + + with ParquetDatasetWriter( + gdf_dir, partition_cols=["a"], index=False + ) as cw: + cw.write_table(df1) + cw.write_table(df2) + + pdf = cudf.concat([df1, df2]).to_pandas() + pdf.to_parquet(pdf_dir, index=False, partition_cols=["a"]) + + # Read back with pandas to compare + expect_pd = pd.read_parquet(pdf_dir) + got_pd = pd.read_parquet(gdf_dir) + assert_eq(expect_pd, got_pd) + + # Check that cudf and pd return the same read + got_cudf = cudf.read_parquet(gdf_dir) + assert_eq(got_pd, got_cudf) + + @pytest.mark.parametrize("cols", [None, ["b"]]) def test_parquet_write_to_dataset(tmpdir_factory, cols): dir1 = tmpdir_factory.mktemp("dir1")