From 8dc15590b5f529b878871070aed0e9917b5bde7f Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 13 Apr 2021 17:45:39 -0500 Subject: [PATCH] Reindex in `DataFrame.__setitem__` (#7948) This PR fixes missing reindexing in `DataFrame.__setitem__` when the `value` argument is a `DataFrame`, we currently align index if `value` is a Series & `arg` is a column name already. This change is necessary to continue with the upgrade to pandas `1.2.4`, however pandas has confirmed this as a regression only in `1.2.0` and `1.2.2` hence corrected the pytest to only xfail in those versions of pandas. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Ashwin Srinath (https://github.com/shwina) URL: https://github.com/rapidsai/cudf/pull/7948 --- python/cudf/cudf/core/_compat.py | 2 +- python/cudf/cudf/core/dataframe.py | 10 +++++++++- python/cudf/cudf/tests/test_setitem.py | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py index 807e96f2c38..24b25b6eec0 100644 --- a/python/cudf/cudf/core/_compat.py +++ b/python/cudf/cudf/core/_compat.py @@ -7,4 +7,4 @@ PANDAS_GE_100 = PANDAS_VERSION >= version.parse("1.0") PANDAS_GE_110 = PANDAS_VERSION >= version.parse("1.1") PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2") -PANDAS_EQ_123 = PANDAS_VERSION == version.parse("1.2.3") +PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2") diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index d5393a724ec..08097d01da7 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -7930,7 +7930,12 @@ def _align_indices(lhs, rhs): return lhs_out, rhs_out -def _setitem_with_dataframe(input_df, replace_df, input_cols=None, mask=None): +def _setitem_with_dataframe( + input_df: DataFrame, + replace_df: DataFrame, + input_cols: Any = None, + mask: Optional[cudf.core.column.ColumnBase] = None, +): """ This function sets item dataframes relevant columns with replacement df :param input_df: Dataframe to be modified inplace @@ -7947,6 +7952,9 @@ def _setitem_with_dataframe(input_df, replace_df, input_cols=None, mask=None): "Number of Input Columns must be same replacement Dataframe" ) + if not input_df.index.equals(replace_df.index): + replace_df = replace_df.reindex(input_df.index) + for col_1, col_2 in zip(input_cols, replace_df.columns): if col_1 in input_df.columns: if mask is not None: diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index 1005efec3ee..28cb2568908 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -5,7 +5,7 @@ import pytest import cudf -from cudf.core._compat import PANDAS_EQ_123, PANDAS_GE_120 +from cudf.core._compat import PANDAS_GE_120, PANDAS_LE_122 from cudf.tests.utils import assert_eq, assert_exceptions_equal @@ -21,7 +21,7 @@ def test_dataframe_setitem_bool_mask_scaler(df, arg, value): @pytest.mark.xfail( - condition=PANDAS_EQ_123 or not PANDAS_GE_120, + condition=PANDAS_GE_120 and PANDAS_LE_122, reason="https://github.com/pandas-dev/pandas/issues/40204", ) def test_dataframe_setitem_scaler_bool():