Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: use _values_for_argsort for join_non_unique, join_monotonic #32467

Merged
merged 2 commits into from
Mar 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1494,7 +1494,7 @@ def check_for_ordered(self, op):
)

def _values_for_argsort(self):
return self._codes.copy()
return self._codes

def argsort(self, ascending=True, kind="quicksort", **kwargs):
"""
Expand Down
30 changes: 23 additions & 7 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3385,6 +3385,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
-------
join_index, (left_indexer, right_indexer)
"""
other = ensure_index(other)
self_is_mi = isinstance(self, ABCMultiIndex)
other_is_mi = isinstance(other, ABCMultiIndex)

Expand All @@ -3404,8 +3405,6 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
other, level, how=how, return_indexers=return_indexers
)

other = ensure_index(other)

if len(other) == 0 and how in ("left", "outer"):
join_index = self._shallow_copy()
if return_indexers:
Expand Down Expand Up @@ -3567,16 +3566,26 @@ def _join_multi(self, other, how, return_indexers=True):
def _join_non_unique(self, other, how="left", return_indexers=False):
from pandas.core.reshape.merge import _get_join_indexers

# We only get here if dtypes match
assert self.dtype == other.dtype

if is_extension_array_dtype(self.dtype):
jreback marked this conversation as resolved.
Show resolved Hide resolved
lvalues = self._data._values_for_argsort()
rvalues = other._data._values_for_argsort()
else:
lvalues = self._values
rvalues = other._values

left_idx, right_idx = _get_join_indexers(
[self._ndarray_values], [other._ndarray_values], how=how, sort=True
[lvalues], [rvalues], how=how, sort=True
)

left_idx = ensure_platform_int(left_idx)
right_idx = ensure_platform_int(right_idx)

join_index = np.asarray(self._ndarray_values.take(left_idx))
join_index = np.asarray(lvalues.take(left_idx))
mask = left_idx == -1
np.putmask(join_index, mask, other._ndarray_values.take(right_idx))
np.putmask(join_index, mask, rvalues.take(right_idx))

join_index = self._wrap_joined_index(join_index, other)

Expand Down Expand Up @@ -3727,15 +3736,22 @@ def _get_leaf_sorter(labels):
return join_index

def _join_monotonic(self, other, how="left", return_indexers=False):
# We only get here with matching dtypes
assert other.dtype == self.dtype

if self.equals(other):
ret_index = other if how == "right" else self
if return_indexers:
return ret_index, None, None
else:
return ret_index

sv = self._ndarray_values
ov = other._ndarray_values
if is_extension_array_dtype(self.dtype):
sv = self._data._values_for_argsort()
ov = other._data._values_for_argsort()
else:
sv = self._values
ov = other._values

if self.is_unique and other.is_unique:
# We can perform much better than the general case
Expand Down