Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: implement putmask for CI/DTI/TDI/PI #36400

Merged
merged 1 commit into from
Sep 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,11 @@ def map(self, mapper):
# -------------------------------------------------------------
# Validators; ideally these can be de-duplicated

def _validate_where_value(self, value):
if is_scalar(value):
return self._validate_fill_value(value)
return self._validate_listlike(value)

def _validate_insert_value(self, value) -> int:
code = self.categories.get_indexer([value])
if (code == -1) and not (is_scalar(value) and isna(value)):
Expand Down
3 changes: 0 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4232,9 +4232,6 @@ def putmask(self, mask, value):
try:
converted = self._validate_fill_value(value)
np.putmask(values, mask, converted)
if is_period_dtype(self.dtype):
# .values cast to object, so we need to cast back
values = type(self)(values)._data
return self._shallow_copy(values)
except (ValueError, TypeError) as err:
if is_object_dtype(self):
Expand Down
11 changes: 11 additions & 0 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,17 @@ def where(self, cond, other=None):
cat = Categorical(values, dtype=self.dtype)
return type(self)._simple_new(cat, name=self.name)

def putmask(self, mask, value):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not necessarily for today. but is there any value to pushing this down to the array and having a putmask_compat until NEP18 can be supported?

putmask on the Index returns a copy whereas putmask compat on the array would be expected to be inplace. This may not be so easy for Categorical, but for other numpy backed arrays could be more trivial.

also is the goal of extension array backed indexes to allow 3rd party EAs in the Index. If so, putmask on the array would need to be added to the EA interface?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[...] putmask on the array would need to be added to the EA interface?

I would be in favor of this

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep I am also +1 on this as this is a 'standard' array method, can you create an issue (we might have one?)

try:
code_value = self._data._validate_where_value(value)
except (TypeError, ValueError):
return self.astype(object).putmask(mask, value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is hit in tests?


codes = self._data._ndarray.copy()
np.putmask(codes, mask, code_value)
cat = self._data._from_backing_data(codes)
return type(self)._simple_new(cat, name=self.name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this hit in tests?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like we have coverage for all of the datetimelike but none of the categorical; will update


def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
"""
Create index with target's values (move/add/delete values as necessary)
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,18 @@ def where(self, cond, other=None):
raise TypeError(f"Where requires matching dtype, not {oth}") from err

result = np.where(cond, values, other).astype("i8")
arr = type(self._data)._simple_new(result, dtype=self.dtype)
arr = self._data._from_backing_data(result)
return type(self)._simple_new(arr, name=self.name)

def putmask(self, mask, value):
try:
value = self._data._validate_where_value(value)
except (TypeError, ValueError):
return self.astype(object).putmask(mask, value)

result = self._data._ndarray.copy()
np.putmask(result, mask, value)
arr = self._data._from_backing_data(result)
return type(self)._simple_new(arr, name=self.name)

def _summary(self, name=None) -> str:
Expand Down
7 changes: 4 additions & 3 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,16 +846,17 @@ def test_map_str(self):
def test_putmask_with_wrong_mask(self):
# GH18368
index = self.create_index()
fill = index[0]

msg = "putmask: mask and data must be the same size"
with pytest.raises(ValueError, match=msg):
index.putmask(np.ones(len(index) + 1, np.bool_), 1)
index.putmask(np.ones(len(index) + 1, np.bool_), fill)

with pytest.raises(ValueError, match=msg):
index.putmask(np.ones(len(index) - 1, np.bool_), 1)
index.putmask(np.ones(len(index) - 1, np.bool_), fill)

with pytest.raises(ValueError, match=msg):
index.putmask("foo", 1)
index.putmask("foo", fill)

@pytest.mark.parametrize("copy", [True, False])
@pytest.mark.parametrize("name", [None, "foo"])
Expand Down