From 0e2736a11c4d970fadae22f4a36092642374f691 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Wed, 10 Mar 2021 12:03:46 -0800 Subject: [PATCH] Support `Series.__setitem__` with key to a new row (#7443) Closes #7290 Supports assigning to a new row (specified by a new label) in a series. Authors: - Michael Wang (@isVoid) Approvers: - @brandon-b-miller - GALI PREM SAGAR (@galipremsagar) URL: https://github.com/rapidsai/cudf/pull/7443 --- python/cudf/cudf/core/indexing.py | 30 +++++++++++++++++++++++-- python/cudf/cudf/tests/test_indexing.py | 25 +++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/indexing.py b/python/cudf/cudf/core/indexing.py index 653004eaee4..aec931fefbf 100755 --- a/python/cudf/cudf/core/indexing.py +++ b/python/cudf/cudf/core/indexing.py @@ -7,9 +7,12 @@ from nvtx import annotate import cudf +from cudf._lib.concat import concat_columns from cudf._lib.scalar import _is_null_host_scalar -from cudf._typing import DataFrameOrSeries, ScalarLike +from cudf._typing import ColumnLike, DataFrameOrSeries, ScalarLike +from cudf.core.column.column import as_column from cudf.utils.dtypes import ( + find_common_type, is_categorical_dtype, is_column_like, is_list_like, @@ -142,7 +145,19 @@ def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]: return self._sr.iloc[arg] def __setitem__(self, key, value): - key = self._loc_to_iloc(key) + try: + key = self._loc_to_iloc(key) + except KeyError as e: + if ( + is_scalar(key) + and not isinstance(self._sr.index, cudf.MultiIndex) + and is_scalar(value) + ): + _append_new_row_inplace(self._sr.index._values, key) + _append_new_row_inplace(self._sr._column, value) + return + else: + raise e if isinstance(value, (pd.Series, cudf.Series)): value = cudf.Series(value) value = value._align_to_index(self._sr.index, how="right") @@ -481,3 +496,14 @@ def _normalize_dtypes(df): for name, col in df._data.items(): df[name] = col.astype(normalized_dtype) return df + + +def _append_new_row_inplace(col: ColumnLike, value: ScalarLike): + """Append a scalar `value` to the end of `col` inplace. + Cast to common type if possible + """ + to_type = find_common_type([type(value), col.dtype]) + val_col = as_column(value, dtype=to_type) + old_col = col.astype(to_type) + + col._mimic_inplace(concat_columns([old_col, val_col]), inplace=True) diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 15d504799e4..558700f1f89 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -1043,6 +1043,10 @@ def test_series_setitem_string(key, value): [ ("a", 4), ("b", 4), + ("b", np.int8(8)), + ("d", 4), + ("d", np.int8(16)), + ("d", np.float32(16)), (["a", "b"], 4), (["a", "b"], [4, 5]), ([True, False, True], 4), @@ -1058,6 +1062,27 @@ def test_series_setitem_loc(key, value): assert_eq(psr, gsr) +@pytest.mark.parametrize( + "key, value", + [ + (1, "d"), + (2, "e"), + (4, "f"), + ([1, 3], "g"), + ([1, 3], ["g", "h"]), + ([True, False, True], "i"), + ([False, False, False], "j"), + ([True, False, True], ["k", "l"]), + ], +) +def test_series_setitem_loc_numeric_index(key, value): + psr = pd.Series(["a", "b", "c"], [1, 2, 3]) + gsr = cudf.from_pandas(psr) + psr.loc[key] = value + gsr.loc[key] = value + assert_eq(psr, gsr) + + @pytest.mark.parametrize( "key, value", [