Skip to content

Commit

Permalink
Remove destructor in favour of contextlib
Browse files Browse the repository at this point in the history
  • Loading branch information
devavret committed Jan 12, 2022
1 parent 64aae8d commit 70612e6
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 2 deletions.
5 changes: 4 additions & 1 deletion python/cudf/cudf/_lib/parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,10 @@ cdef class ParquetWriter:
return np.asarray(out_metadata_py)
return None

def __dealloc__(self):
def __enter__(self):
return self

def __exit__(self, *args):
self.close()

def _initialize_chunked_state(self, table, num_partitions=1):
Expand Down
5 changes: 4 additions & 1 deletion python/cudf/cudf/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,10 @@ def close(self, return_metadata=False):
else metadata[0]
)

def __del__(self):
def __enter__(self):
return self

def __exit__(self, *args):
self.close()


Expand Down
36 changes: 36 additions & 0 deletions python/cudf/cudf/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1577,6 +1577,16 @@ def test_parquet_writer_gpu_chunked(tmpdir, simple_pdf, simple_gdf):
assert_eq(pd.read_parquet(gdf_fname), pd.concat([simple_pdf, simple_pdf]))


def test_parquet_writer_gpu_chunked_context(tmpdir, simple_pdf, simple_gdf):
gdf_fname = tmpdir.join("gdf.parquet")

with ParquetWriter(gdf_fname) as writer:
writer.write_table(simple_gdf)
writer.write_table(simple_gdf)

assert_eq(pd.read_parquet(gdf_fname), pd.concat([simple_pdf, simple_pdf]))


def test_parquet_write_bytes_io(simple_gdf):
output = BytesIO()
simple_gdf.to_parquet(output)
Expand Down Expand Up @@ -1672,6 +1682,32 @@ def test_parquet_writer_chunked_partitioned(tmpdir_factory, return_meta):
assert_eq(got_pd, got_cudf)


def test_parquet_writer_chunked_partitioned_context(tmpdir_factory):
pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))

df1 = cudf.DataFrame({"a": [1, 1, 2, 2, 1], "b": [9, 8, 7, 6, 5]})
df2 = cudf.DataFrame({"a": [1, 3, 3, 1, 3], "b": [4, 3, 2, 1, 0]})

with ParquetDatasetWriter(
gdf_dir, partition_cols=["a"], index=False
) as cw:
cw.write_table(df1)
cw.write_table(df2)

pdf = cudf.concat([df1, df2]).to_pandas()
pdf.to_parquet(pdf_dir, index=False, partition_cols=["a"])

# Read back with pandas to compare
expect_pd = pd.read_parquet(pdf_dir)
got_pd = pd.read_parquet(gdf_dir)
assert_eq(expect_pd, got_pd)

# Check that cudf and pd return the same read
got_cudf = cudf.read_parquet(gdf_dir)
assert_eq(got_pd, got_cudf)


@pytest.mark.parametrize("cols", [None, ["b"]])
def test_parquet_write_to_dataset(tmpdir_factory, cols):
dir1 = tmpdir_factory.mktemp("dir1")
Expand Down

0 comments on commit 70612e6

Please sign in to comment.