Skip to content

Commit

Permalink
Fix warnings in test_joining.py (#12304)
Browse files Browse the repository at this point in the history
Contributes to #9999 and #10363.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - https://github.com/brandon-b-miller
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #12304
  • Loading branch information
vyasr authored Dec 6, 2022
1 parent f60877c commit 9557fd9
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 14 deletions.
11 changes: 2 additions & 9 deletions python/cudf/cudf/core/join/_join_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from __future__ import annotations

import warnings
from collections import abc
from typing import TYPE_CHECKING, Any, Tuple, cast

Expand Down Expand Up @@ -98,14 +97,8 @@ def _match_join_keys(
):
common_type = max(ltype, rtype)

if how == "left":
if rcol.fillna(0).can_cast_safely(ltype):
return lcol, rcol.astype(ltype)
else:
warnings.warn(
f"Can't safely cast column from {rtype} to {ltype}, "
f"upcasting to {common_type}."
)
if how == "left" and rcol.fillna(0).can_cast_safely(ltype):
return lcol, rcol.astype(ltype)

return lcol.astype(common_type), rcol.astype(common_type)

Expand Down
13 changes: 13 additions & 0 deletions python/cudf/cudf/core/join/join.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
from __future__ import annotations

import warnings
from typing import Any, ClassVar, List, Optional

import cudf
Expand Down Expand Up @@ -411,6 +412,18 @@ def _validate_merge_params(
"lsuffix and rsuffix are not defined"
)

if (
isinstance(lhs, cudf.DataFrame)
and isinstance(rhs, cudf.DataFrame)
and lhs._data.nlevels != rhs._data.nlevels
):
warnings.warn(
"merging between different levels is deprecated and will be "
f"removed in a future version. ({lhs._data.nlevels} levels on "
f"the left, {rhs._data.nlevels} on the right)",
FutureWarning,
)


class MergeSemi(Merge):
_joiner: ClassVar[staticmethod] = staticmethod(libcudf.join.semi_join)
Expand Down
23 changes: 18 additions & 5 deletions python/cudf/cudf/tests/test_joining.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
NUMERIC_TYPES,
assert_eq,
assert_exceptions_equal,
expect_warning_if,
)

_JOIN_TYPES = ("left", "inner", "outer", "right", "leftanti", "leftsemi")
Expand Down Expand Up @@ -1123,11 +1124,21 @@ def test_typecast_on_join_overflow_unsafe(dtypes):
lhs = cudf.DataFrame({"a": [1, 2, 3, 4, 5]}, dtype=dtype_l)
rhs = cudf.DataFrame({"a": [1, 2, 3, 4, dtype_l_max + 1]}, dtype=dtype_r)

with pytest.warns(
p_lhs = lhs.to_pandas()
p_rhs = rhs.to_pandas()

with expect_warning_if(
(dtype_l.kind == "f" and dtype_r.kind in {"i", "u"})
or (dtype_l.kind in {"i", "u"} and dtype_r.kind == "f"),
UserWarning,
match=(f"Can't safely cast column" f" from {dtype_r} to {dtype_l}"),
):
merged = lhs.merge(rhs, on="a", how="left") # noqa: F841
expect = p_lhs.merge(p_rhs, on="a", how="left")
got = lhs.merge(rhs, on="a", how="left")

# The dtypes here won't match exactly because pandas does some unsafe
# conversions (with a warning that we are catching above) that we don't
# want to match.
assert_join_results_equal(expect, got, how="left", check_dtype=False)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -2148,11 +2159,13 @@ def test_join_multiindex_empty():
lhs = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]}, index=["a", "b", "c"])
lhs.columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])
rhs = pd.DataFrame(index=["a", "c", "d"])
expect = lhs.join(rhs, how="inner")
with pytest.warns(FutureWarning):
expect = lhs.join(rhs, how="inner")

lhs = cudf.from_pandas(lhs)
rhs = cudf.from_pandas(rhs)
got = lhs.join(rhs, how="inner")
with pytest.warns(FutureWarning):
got = lhs.join(rhs, how="inner")

assert_join_results_equal(expect, got, how="inner")

Expand Down

0 comments on commit 9557fd9

Please sign in to comment.