Skip to content

Commit

Permalink
CLN: use _values_for_argsort for join_non_unique, join_monotonic (pan…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and SeeminSyed committed Mar 22, 2020
1 parent 59b2238 commit 04b025f
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1488,7 +1488,7 @@ def check_for_ordered(self, op):
)

def _values_for_argsort(self):
return self._codes.copy()
return self._codes

def argsort(self, ascending=True, kind="quicksort", **kwargs):
"""
Expand Down
30 changes: 23 additions & 7 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3395,6 +3395,7 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
-------
join_index, (left_indexer, right_indexer)
"""
other = ensure_index(other)
self_is_mi = isinstance(self, ABCMultiIndex)
other_is_mi = isinstance(other, ABCMultiIndex)

Expand All @@ -3414,8 +3415,6 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False)
other, level, how=how, return_indexers=return_indexers
)

other = ensure_index(other)

if len(other) == 0 and how in ("left", "outer"):
join_index = self._shallow_copy()
if return_indexers:
Expand Down Expand Up @@ -3577,16 +3576,26 @@ def _join_multi(self, other, how, return_indexers=True):
def _join_non_unique(self, other, how="left", return_indexers=False):
from pandas.core.reshape.merge import _get_join_indexers

# We only get here if dtypes match
assert self.dtype == other.dtype

if is_extension_array_dtype(self.dtype):
lvalues = self._data._values_for_argsort()
rvalues = other._data._values_for_argsort()
else:
lvalues = self._values
rvalues = other._values

left_idx, right_idx = _get_join_indexers(
[self._ndarray_values], [other._ndarray_values], how=how, sort=True
[lvalues], [rvalues], how=how, sort=True
)

left_idx = ensure_platform_int(left_idx)
right_idx = ensure_platform_int(right_idx)

join_index = np.asarray(self._ndarray_values.take(left_idx))
join_index = np.asarray(lvalues.take(left_idx))
mask = left_idx == -1
np.putmask(join_index, mask, other._ndarray_values.take(right_idx))
np.putmask(join_index, mask, rvalues.take(right_idx))

join_index = self._wrap_joined_index(join_index, other)

Expand Down Expand Up @@ -3737,15 +3746,22 @@ def _get_leaf_sorter(labels):
return join_index

def _join_monotonic(self, other, how="left", return_indexers=False):
# We only get here with matching dtypes
assert other.dtype == self.dtype

if self.equals(other):
ret_index = other if how == "right" else self
if return_indexers:
return ret_index, None, None
else:
return ret_index

sv = self._ndarray_values
ov = other._ndarray_values
if is_extension_array_dtype(self.dtype):
sv = self._data._values_for_argsort()
ov = other._data._values_for_argsort()
else:
sv = self._values
ov = other._values

if self.is_unique and other.is_unique:
# We can perform much better than the general case
Expand Down

0 comments on commit 04b025f

Please sign in to comment.