Skip to content

Commit

Permalink
Fix index alignment for Series objects with repeated index (rapidsai#…
Browse files Browse the repository at this point in the history
…11103)

The logic for aligning the indexes of two Series objects with a repeated value of the index is broken. We previously introduced an optimization that avoids a join operation, but unfortunately, that doesn't work in every situation, as seen in rapidsai#11094. This PR removes that special-casing.

Closes rapidsai#11094

Authors:
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: rapidsai#11103
  • Loading branch information
shwina authored Jul 1, 2022
1 parent ff63c0a commit 544f039
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 16 deletions.
26 changes: 10 additions & 16 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4712,22 +4712,16 @@ def _align_indices(series_list, how="outer", allow_non_unique=False):
if all_index_equal:
return series_list

if how == "outer":
combined_index = cudf.core.reshape.concat(
[sr.index for sr in series_list]
).unique()
combined_index.names = new_index_names
else:
combined_index = series_list[0].index
for sr in series_list[1:]:
combined_index = (
cudf.DataFrame(index=sr.index).join(
cudf.DataFrame(index=combined_index),
sort=True,
how="inner",
)
).index
combined_index.names = new_index_names
combined_index = series_list[0].index
for sr in series_list[1:]:
combined_index = (
cudf.DataFrame(index=sr.index).join(
cudf.DataFrame(index=combined_index),
sort=True,
how=how,
)
).index
combined_index.names = new_index_names

# align all Series to the combined index
result = [
Expand Down
19 changes: 19 additions & 0 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,14 @@ def test_different_shapes_and_columns_with_unaligned_indices(binop):
cd_frame["y"] = cd_frame["y"].astype(np.float64)
utils.assert_eq(cd_frame, pd_frame)

pdf1 = pd.DataFrame({"x": [1, 1]}, index=["a", "a"])
pdf2 = pd.DataFrame({"x": [2]}, index=["a"])
gdf1 = cudf.DataFrame.from_pandas(pdf1)
gdf2 = cudf.DataFrame.from_pandas(pdf2)
pd_frame = binop(pdf1, pdf2)
cd_frame = binop(gdf1, gdf2)
utils.assert_eq(pd_frame, cd_frame)


@pytest.mark.parametrize(
"df2",
Expand Down Expand Up @@ -2962,3 +2970,14 @@ def test_binops_dot(df, other):
got = df @ other

utils.assert_eq(expected, got)


def test_binop_series_with_repeated_index():
# GH: #11094
psr1 = pd.Series([1, 1], index=["a", "a"])
psr2 = pd.Series([1], index=["a"])
gsr1 = cudf.from_pandas(psr1)
gsr2 = cudf.from_pandas(psr2)
expected = psr1 - psr2
got = gsr1 - gsr2
utils.assert_eq(expected, got)

0 comments on commit 544f039

Please sign in to comment.