From cdbce51ed4c67a2c0c733a55245cb39213478610 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 11 Oct 2022 10:13:57 -0700 Subject: [PATCH] Fix type casting in Series.__setitem__ To mimic pandas, we must upcast a column to the numpy result_type of the column itself and the input value dtype. This previously occurred in all relevant cases except when the index provided to __setitem__ was a single integer (originally introduced in #2442). Closes #11901. --- python/cudf/cudf/core/series.py | 11 ++++++----- python/cudf/cudf/tests/test_setitem.py | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index f11052096e3..7b146f5eea8 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -222,11 +222,12 @@ def __setitem__(self, key, value): and _is_non_decimal_numeric_dtype(value.dtype) ): # normalize types if necessary: - if not is_integer(key): - to_dtype = np.result_type( - value.dtype, self._frame._column.dtype - ) - value = value.astype(to_dtype) + # In contrast to Column.__setitem__ (which downcasts the value to + # the dtype of the column) here we upcast the series to the + # larger data type mimicing pandas + to_dtype = np.result_type(value.dtype, self._frame._column.dtype) + value = value.astype(to_dtype) + if to_dtype != self._frame._column.dtype: self._frame._column._mimic_inplace( self._frame._column.astype(to_dtype), inplace=True ) diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py index 13b342e6c3b..57417e287d6 100644 --- a/python/cudf/cudf/tests/test_setitem.py +++ b/python/cudf/cudf/tests/test_setitem.py @@ -297,3 +297,19 @@ def test_series_slice_setitem_struct(): actual[0:3] = cudf.Scalar({"a": {"b": 5050}, "b": 101}) assert_eq(actual, expected) + + +@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64]) +@pytest.mark.parametrize("indices", [0, [1, 2]]) +def test_series_setitem_upcasting(dtype, indices): + sr = pd.Series([0, 0, 0], dtype=dtype) + cr = cudf.from_pandas(sr) + assert_eq(sr.values, cr.values) + new_value = np.float64(10.5) + col_ref = cr._column + sr[indices] = new_value + cr[indices] = new_value + assert_eq(sr.values, cr.values) + if dtype == np.float64: + # no-op type cast should not modify backing column + assert col_ref == cr._column