Skip to content

Commit

Permalink
Enable FutureWarnings/DeprecationWarnings as errors for dask_cudf (#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored May 3, 2024
1 parent 2ff60d6 commit bee2a38
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 22 deletions.
18 changes: 15 additions & 3 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,14 +1119,26 @@ def _concat(cls, objs):
assert (
PANDAS_LT_300
), "Need to drop after pandas-3.0 support is added."
warnings.warn(
warning_msg = (
"The behavior of array concatenation with empty entries is "
"deprecated. In a future version, this will no longer exclude "
"empty items when determining the result dtype. "
"To retain the old behavior, exclude the empty entries before "
"the concat operation.",
FutureWarning,
"the concat operation."
)
# Warn only if the type might _actually_ change
if len(non_empties) == 0:
if not all(objs[0].dtype == index.dtype for index in objs[1:]):
warnings.warn(warning_msg, FutureWarning)
else:
common_all_type = find_common_type(
[index.dtype for index in objs]
)
common_non_empty_type = find_common_type(
[index.dtype for index in non_empties]
)
if common_all_type != common_non_empty_type:
warnings.warn(warning_msg, FutureWarning)
if all(isinstance(obj, RangeIndex) for obj in non_empties):
result = _concat_range_index(non_empties)
else:
Expand Down
12 changes: 8 additions & 4 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,9 @@ def test_index_append(data, other):
(len(data) == 0 or len(other) == 0) and pd_data.dtype != pd_other.dtype
):
expected = pd_data.append(pd_other)
with expect_warning_if(len(data) == 0 or len(other) == 0):
with expect_warning_if(
(len(data) == 0 or len(other) == 0) and gd_data.dtype != gd_other.dtype
):
actual = gd_data.append(gd_other)
if len(data) == 0 and len(other) == 0:
# Pandas default dtype to "object" for empty list
Expand Down Expand Up @@ -1237,7 +1239,10 @@ def test_index_append_list(data, other):
and (any(d.dtype != data.dtype for d in other))
):
expected = pd_data.append(pd_other)
with expect_warning_if(len(data) == 0 or any(len(d) == 0 for d in other)):
with expect_warning_if(
(len(data) == 0 or any(len(d) == 0 for d in other))
and (any(d.dtype != data.dtype for d in other))
):
actual = gd_data.append(gd_other)

assert_eq(expected, actual)
Expand Down Expand Up @@ -2817,8 +2822,7 @@ def test_index_methods(index, func):

if func == "append":
expected = pidx.append(other=pidx)
with expect_warning_if(len(gidx) == 0):
actual = gidx.append(other=gidx)
actual = gidx.append(other=gidx)
else:
expected = getattr(pidx, func)()
actual = getattr(gidx, func)()
Expand Down
5 changes: 2 additions & 3 deletions python/dask_cudf/dask_cudf/io/tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,8 @@ def test_read_json_nested(tmp_path):
}
)
kwargs = dict(orient="records", lines=True)
with tmp_path / "data.json" as f, dask.config.set(
{"dataframe.convert-string": False}
):
f = tmp_path / "data.json"
with dask.config.set({"dataframe.convert-string": False}):
df.to_json(f, **kwargs)
# Ensure engine='cudf' is tested.
actual = dask_cudf.read_json(f, engine="cudf", **kwargs)
Expand Down
6 changes: 3 additions & 3 deletions python/dask_cudf/dask_cudf/tests/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ def test_struct_explode(data):


def test_tz_localize():
data = Series(date_range("2000-04-01", "2000-04-03", freq="H"))
data = Series(date_range("2000-04-01", "2000-04-03", freq="h"))
expect = data.dt.tz_localize(
"US/Eastern", ambiguous="NaT", nonexistent="NaT"
)
Expand All @@ -560,8 +560,8 @@ def test_tz_localize():
@pytest.mark.parametrize(
"data",
[
date_range("2000-04-01", "2000-04-03", freq="H").tz_localize("UTC"),
date_range("2000-04-01", "2000-04-03", freq="H").tz_localize(
date_range("2000-04-01", "2000-04-03", freq="h").tz_localize("UTC"),
date_range("2000-04-01", "2000-04-03", freq="h").tz_localize(
"US/Eastern"
),
],
Expand Down
10 changes: 5 additions & 5 deletions python/dask_cudf/dask_cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def test_groupby_split_out(split_out, column):
gddf = dask_cudf.from_cudf(gdf, npartitions=3)

ddf_result = (
ddf.groupby(column)
ddf.groupby(column, observed=True)
.a.mean(split_out=split_out)
.compute()
.sort_values()
Expand Down Expand Up @@ -368,10 +368,10 @@ def test_groupby_dropna_dask(dropna, by):

if dropna is None:
dask_cudf_result = gddf.groupby(by).e.sum()
dask_result = ddf.groupby(by).e.sum()
dask_result = ddf.groupby(by, observed=True).e.sum()
else:
dask_cudf_result = gddf.groupby(by, dropna=dropna).e.sum()
dask_result = ddf.groupby(by, dropna=dropna).e.sum()
dask_result = ddf.groupby(by, dropna=dropna, observed=True).e.sum()

dd.assert_eq(dask_cudf_result, dask_result)

Expand Down Expand Up @@ -505,7 +505,7 @@ def test_groupby_reset_index_dtype():
a = df.groupby("a").agg({"b": ["count"]})

assert a.index.dtype == "int8"
assert a.reset_index().dtypes[0] == "int8"
assert a.reset_index().dtypes.iloc[0] == "int8"


def test_groupby_reset_index_names():
Expand Down Expand Up @@ -563,7 +563,7 @@ def test_groupby_categorical_key():
# (See: https://github.com/dask/dask/issues/9515)
expect = (
ddf.compute()
.groupby("name", sort=True)
.groupby("name", sort=True, observed=True)
.agg({"x": ["mean", "max"], "y": ["mean", "count"]})
)
dd.assert_eq(expect, got)
Expand Down
16 changes: 12 additions & 4 deletions python/dask_cudf/dask_cudf/tests/test_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,12 @@ def test_join_inner(left_nrows, right_nrows, left_nkeys, right_nkeys):
def gather(df, grows):
grows[df["x"].values[0]] = (set(df.al), set(df.ar))

expect.reset_index().groupby("x").apply(partial(gather, grows=expect_rows))
expect.reset_index().groupby("x").apply(partial(gather, grows=got_rows))
expect.reset_index().groupby("x")[["x", "al", "ar"]].apply(
partial(gather, grows=expect_rows)
)
expect.reset_index().groupby("x")[["x", "al", "ar"]].apply(
partial(gather, grows=got_rows)
)

assert got_rows == expect_rows

Expand Down Expand Up @@ -127,9 +131,13 @@ def gather(df, grows):

grows[df["x"].values[0]] = (cola, colb)

expect.reset_index().groupby("x").apply(partial(gather, grows=expect_rows))
expect.reset_index().groupby("x")[["x", "al", "ar"]].apply(
partial(gather, grows=expect_rows)
)

expect.reset_index().groupby("x").apply(partial(gather, grows=got_rows))
expect.reset_index().groupby("x")[["x", "al", "ar"]].apply(
partial(gather, grows=got_rows)
)

for k in expect_rows:
np.testing.assert_array_equal(expect_rows[k][0], got_rows[k][0])
Expand Down
10 changes: 10 additions & 0 deletions python/dask_cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,13 @@ skip = [
"build",
"dist",
]

[tool.pytest.ini_options]
filterwarnings = [
"error::FutureWarning",
"error::DeprecationWarning",
"ignore:create_block_manager_from_blocks is deprecated and will be removed in a future version. Use public APIs instead.:DeprecationWarning",
# https://github.com/dask/partd/blob/main/partd/pandas.py#L198
"ignore:Passing a BlockManager to DataFrame is deprecated and will raise in a future version. Use public APIs instead.:DeprecationWarning",
"ignore:String support for `aggregate_files` is experimental. Behavior may change in the future.:FutureWarning:dask",
]

0 comments on commit bee2a38

Please sign in to comment.