Skip to content

Commit

Permalink
Fix issue with set-item incase of list and struct types (#11760)
Browse files Browse the repository at this point in the history
Fixes: #11721 
This PR:
- [x] Fixes: #11721, by not going through the fill & fill_inplace APIs which don't support `struct` and `list` columns.
- [x] Fixes an issue in caching while constructing a `struct` or `list` scalar as `list` & `dict` objects are not hashable and we were running into the following errors:
```python
In [9]: i = cudf.Scalar([10, 11])
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File /nvme/0/pgali/envs/cudfdev/lib/python3.9/site-packages/cudf/core/scalar.py:51, in CachedScalarInstanceMeta.__call__(self, value, dtype)
     49 try:
     50     # try retrieving an instance from the cache:
---> 51     self.__instances.move_to_end(cache_key)
     52     return self.__instances[cache_key]

KeyError: ([10, 11], <class 'list'>, None, <class 'NoneType'>)

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
Cell In [9], line 1
----> 1 i = cudf.Scalar([10, 11])

File /nvme/0/pgali/envs/cudfdev/lib/python3.9/site-packages/cudf/core/scalar.py:57, in CachedScalarInstanceMeta.__call__(self, value, dtype)
     53 except KeyError:
     54     # if an instance couldn't be found in the cache,
     55     # construct it and add to cache:
     56     obj = super().__call__(value, dtype=dtype)
---> 57     self.__instances[cache_key] = obj
     58     if len(self.__instances) > self.__maxsize:
     59         self.__instances.popitem(last=False)

TypeError: unhashable type: 'list'
```

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #11760
  • Loading branch information
galipremsagar authored Sep 26, 2022
1 parent a945377 commit 11156cc
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 2 deletions.
7 changes: 6 additions & 1 deletion python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,12 @@ def _scatter_by_slice(

self._check_scatter_key_length(num_keys, value)

if step == 1:
if step == 1 and not isinstance(
self, (cudf.core.column.StructColumn, cudf.core.column.ListColumn)
):
# NOTE: List & Struct dtypes aren't supported by both
# inplace & out-of-place fill. Hence we need to use scatter for
# these two types.
if isinstance(value, cudf.core.scalar.Scalar):
return self._fill(value, start, stop, inplace=True)
else:
Expand Down
6 changes: 5 additions & 1 deletion python/cudf/cudf/core/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ def __call__(self, value, dtype=None):
# if an instance couldn't be found in the cache,
# construct it and add to cache:
obj = super().__call__(value, dtype=dtype)
self.__instances[cache_key] = obj
try:
self.__instances[cache_key] = obj
except TypeError:
# couldn't hash the arguments, don't cache:
return obj
if len(self.__instances) > self.__maxsize:
self.__instances.popitem(last=False)
return obj
Expand Down
55 changes: 55 additions & 0 deletions python/cudf/cudf/tests/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,58 @@ def test_categorical_setitem_invalid():
"the categories first",
):
gs[0] = 5


def test_series_slice_setitem_list():
actual = cudf.Series([[[1, 2], [2, 3]], [[3, 4]], [[4, 5]], [[6, 7]]])
actual[slice(0, 3, 1)] = [[10, 11], [12, 23]]
expected = cudf.Series(
[
[[10, 11], [12, 23]],
[[10, 11], [12, 23]],
[[10, 11], [12, 23]],
[[6, 7]],
]
)
assert_eq(actual, expected)

actual = cudf.Series([[[1, 2], [2, 3]], [[3, 4]], [[4, 5]], [[6, 7]]])
actual[0:3] = cudf.Scalar([[10, 11], [12, 23]])

assert_eq(actual, expected)


def test_series_slice_setitem_struct():
actual = cudf.Series(
[
{"a": {"b": 10}, "b": 11},
{"a": {"b": 100}, "b": 5},
{"a": {"b": 50}, "b": 2},
{"a": {"b": 1000}, "b": 67},
{"a": {"b": 4000}, "b": 1090},
]
)
actual[slice(0, 3, 1)] = {"a": {"b": 5050}, "b": 101}
expected = cudf.Series(
[
{"a": {"b": 5050}, "b": 101},
{"a": {"b": 5050}, "b": 101},
{"a": {"b": 5050}, "b": 101},
{"a": {"b": 1000}, "b": 67},
{"a": {"b": 4000}, "b": 1090},
]
)
assert_eq(actual, expected)

actual = cudf.Series(
[
{"a": {"b": 10}, "b": 11},
{"a": {"b": 100}, "b": 5},
{"a": {"b": 50}, "b": 2},
{"a": {"b": 1000}, "b": 67},
{"a": {"b": 4000}, "b": 1090},
]
)
actual[0:3] = cudf.Scalar({"a": {"b": 5050}, "b": 101})

assert_eq(actual, expected)

0 comments on commit 11156cc

Please sign in to comment.