Remove destructor in favour of contextlib

rapidsai · Jan 12, 2022 · 70612e6 · 70612e6
1 parent 64aae8d
commit 70612e6
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 2 deletions.
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
@@ -484,7 +484,10 @@ cdef class ParquetWriter:
             return np.asarray(out_metadata_py)
         return None
 
-    def __dealloc__(self):
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
         self.close()
 
     def _initialize_chunked_state(self, table, num_partitions=1):

diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
@@ -950,7 +950,10 @@ def close(self, return_metadata=False):
                 else metadata[0]
             )
 
-    def __del__(self):
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
         self.close()
 
 

diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
@@ -1577,6 +1577,16 @@ def test_parquet_writer_gpu_chunked(tmpdir, simple_pdf, simple_gdf):
     assert_eq(pd.read_parquet(gdf_fname), pd.concat([simple_pdf, simple_pdf]))
 
 
+def test_parquet_writer_gpu_chunked_context(tmpdir, simple_pdf, simple_gdf):
+    gdf_fname = tmpdir.join("gdf.parquet")
+
+    with ParquetWriter(gdf_fname) as writer:
+        writer.write_table(simple_gdf)
+        writer.write_table(simple_gdf)
+
+    assert_eq(pd.read_parquet(gdf_fname), pd.concat([simple_pdf, simple_pdf]))
+
+
 def test_parquet_write_bytes_io(simple_gdf):
     output = BytesIO()
     simple_gdf.to_parquet(output)
@@ -1672,6 +1682,32 @@ def test_parquet_writer_chunked_partitioned(tmpdir_factory, return_meta):
     assert_eq(got_pd, got_cudf)
 
 
+def test_parquet_writer_chunked_partitioned_context(tmpdir_factory):
+    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
+    gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))
+
+    df1 = cudf.DataFrame({"a": [1, 1, 2, 2, 1], "b": [9, 8, 7, 6, 5]})
+    df2 = cudf.DataFrame({"a": [1, 3, 3, 1, 3], "b": [4, 3, 2, 1, 0]})
+
+    with ParquetDatasetWriter(
+        gdf_dir, partition_cols=["a"], index=False
+    ) as cw:
+        cw.write_table(df1)
+        cw.write_table(df2)
+
+    pdf = cudf.concat([df1, df2]).to_pandas()
+    pdf.to_parquet(pdf_dir, index=False, partition_cols=["a"])
+
+    # Read back with pandas to compare
+    expect_pd = pd.read_parquet(pdf_dir)
+    got_pd = pd.read_parquet(gdf_dir)
+    assert_eq(expect_pd, got_pd)
+
+    # Check that cudf and pd return the same read
+    got_cudf = cudf.read_parquet(gdf_dir)
+    assert_eq(got_pd, got_cudf)
+
+
 @pytest.mark.parametrize("cols", [None, ["b"]])
 def test_parquet_write_to_dataset(tmpdir_factory, cols):
     dir1 = tmpdir_factory.mktemp("dir1")