searchsorted, repeat broken off from pandas-dev#24024 (pandas-dev#24461)

Pingviinituutti · Feb 28, 2019 · e239ab3 · e239ab3
1 parent 89cd33c
commit e239ab3
Show file tree

Hide file tree

Showing 6 changed files with 249 additions and 0 deletions.
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -12,6 +12,7 @@
 from pandas._libs.tslibs.timestamps import (
     RoundTo, maybe_integer_op_deprecated, round_nsint64)
 import pandas.compat as compat
+from pandas.compat.numpy import function as nv
 from pandas.errors import (
     AbstractMethodError, NullFrequencyError, PerformanceWarning)
 from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
@@ -82,6 +83,79 @@ def _get_attributes_dict(self):
         """
         return {k: getattr(self, k, None) for k in self._attributes}
 
+    @property
+    def _scalar_type(self):
+        # type: () -> Union[type, Tuple[type]]
+        """The scalar associated with this datelike
+
+        * PeriodArray : Period
+        * DatetimeArray : Timestamp
+        * TimedeltaArray : Timedelta
+        """
+        raise AbstractMethodError(self)
+
+    def _scalar_from_string(self, value):
+        # type: (str) -> Union[Period, Timestamp, Timedelta, NaTType]
+        """
+        Construct a scalar type from a string.
+
+        Parameters
+        ----------
+        value : str
+
+        Returns
+        -------
+        Period, Timestamp, or Timedelta, or NaT
+            Whatever the type of ``self._scalar_type`` is.
+
+        Notes
+        -----
+        This should call ``self._check_compatible_with`` before
+        unboxing the result.
+        """
+        raise AbstractMethodError(self)
+
+    def _unbox_scalar(self, value):
+        # type: (Union[Period, Timestamp, Timedelta, NaTType]) -> int
+        """
+        Unbox the integer value of a scalar `value`.
+
+        Parameters
+        ----------
+        value : Union[Period, Timestamp, Timedelta]
+
+        Returns
+        -------
+        int
+
+        Examples
+        --------
+        >>> self._unbox_scalar(Timedelta('10s'))  # DOCTEST: +SKIP
+        10000000000
+        """
+        raise AbstractMethodError(self)
+
+    def _check_compatible_with(self, other):
+        # type: (Union[Period, Timestamp, Timedelta, NaTType]) -> None
+        """
+        Verify that `self` and `other` are compatible.
+
+        * DatetimeArray verifies that the timezones (if any) match
+        * PeriodArray verifies that the freq matches
+        * Timedelta has no verification
+
+        In each case, NaT is considered compatible.
+
+        Parameters
+        ----------
+        other
+
+        Raises
+        ------
+        Exception
+        """
+        raise AbstractMethodError(self)
+
 
 class DatelikeOps(object):
     """
@@ -515,6 +589,67 @@ def _values_for_factorize(self):
     def _from_factorized(cls, values, original):
         return cls(values, dtype=original.dtype)
 
+    def _values_for_argsort(self):
+        return self._data
+
+    # ------------------------------------------------------------------
+    # Additional array methods
+    #  These are not part of the EA API, but we implement them because
+    #  pandas assumes they're there.
+
+    def searchsorted(self, value, side='left', sorter=None):
+        """
+        Find indices where elements should be inserted to maintain order.
+
+        Find the indices into a sorted array `self` such that, if the
+        corresponding elements in `value` were inserted before the indices,
+        the order of `self` would be preserved.
+
+        Parameters
+        ----------
+        value : array_like
+            Values to insert into `self`.
+        side : {'left', 'right'}, optional
+            If 'left', the index of the first suitable location found is given.
+            If 'right', return the last such index.  If there is no suitable
+            index, return either 0 or N (where N is the length of `self`).
+        sorter : 1-D array_like, optional
+            Optional array of integer indices that sort `self` into ascending
+            order. They are typically the result of ``np.argsort``.
+
+        Returns
+        -------
+        indices : array of ints
+            Array of insertion points with the same shape as `value`.
+        """
+        if isinstance(value, compat.string_types):
+            value = self._scalar_from_string(value)
+
+        if not (isinstance(value, (self._scalar_type, type(self)))
+                or isna(value)):
+            raise ValueError("Unexpected type for 'value': {valtype}"
+                             .format(valtype=type(value)))
+
+        self._check_compatible_with(value)
+        if isinstance(value, type(self)):
+            value = value.asi8
+        else:
+            value = self._unbox_scalar(value)
+
+        return self.asi8.searchsorted(value, side=side, sorter=sorter)
+
+    def repeat(self, repeats, *args, **kwargs):
+        """
+        Repeat elements of an array.
+
+        See Also
+        --------
+        numpy.ndarray.repeat
+        """
+        nv.validate_repeat(args, kwargs)
+        values = self._data.repeat(repeats)
+        return type(self)(values, dtype=self.dtype)
+
     # ------------------------------------------------------------------
     # Null Handling
 

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -171,6 +171,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
         _data
     """
     _typ = "datetimearray"
+    _scalar_type = Timestamp
 
     # define my properties & methods for delegation
     _bool_ops = ['is_month_start', 'is_month_end',
@@ -347,6 +348,26 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
 
         return cls._simple_new(index.asi8, freq=freq, tz=tz)
 
+    # -----------------------------------------------------------------
+    # DatetimeLike Interface
+
+    def _unbox_scalar(self, value):
+        if not isinstance(value, self._scalar_type) and value is not NaT:
+            raise ValueError("'value' should be a Timestamp.")
+        if not isna(value):
+            self._check_compatible_with(value)
+        return value.value
+
+    def _scalar_from_string(self, value):
+        return Timestamp(value, tz=self.tz)
+
+    def _check_compatible_with(self, other):
+        if other is NaT:
+            return
+        if not timezones.tz_compare(self.tz, other.tz):
+            raise ValueError("Timezones don't match. '{own} != {other}'"
+                             .format(own=self.tz, other=other.tz))
+
     # -----------------------------------------------------------------
     # Descriptive Properties
 

diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
@@ -137,6 +137,7 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin,
     __array_priority__ = 1000
     _attributes = ["freq"]
     _typ = "periodarray"  # ABCPeriodArray
+    _scalar_type = Period
 
     # Names others delegate to us
     _other_ops = []
@@ -240,7 +241,28 @@ def _generate_range(cls, start, end, periods, freq, fields):
 
         return subarr, freq
 
+    # -----------------------------------------------------------------
+    # DatetimeLike Interface
+
+    def _unbox_scalar(self, value):
+        # type: (Union[Period, NaTType]) -> int
+        if value is NaT:
+            return value.value
+        elif isinstance(value, self._scalar_type):
+            if not isna(value):
+                self._check_compatible_with(value)
+            return value.ordinal
+        else:
+            raise ValueError("'value' should be a Period. Got '{val}' instead."
+                             .format(val=value))
+
+    def _scalar_from_string(self, value):
+        # type: (str) -> Period
+        return Period(value, freq=self.freq)
+
     def _check_compatible_with(self, other):
+        if other is NaT:
+            return
         if self.freqstr != other.freqstr:
             _raise_on_incompatible(self, other)
 

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -116,6 +116,7 @@ def wrapper(self, other):
 
 class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
     _typ = "timedeltaarray"
+    _scalar_type = Timedelta
     __array_priority__ = 1000
     # define my properties & methods for delegation
     _other_ops = []
@@ -221,6 +222,22 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
 
         return cls._simple_new(index, freq=freq)
 
+    # ----------------------------------------------------------------
+    # DatetimeLike Interface
+
+    def _unbox_scalar(self, value):
+        if not isinstance(value, self._scalar_type) and value is not NaT:
+            raise ValueError("'value' should be a Timedelta.")
+        self._check_compatible_with(value)
+        return value.value
+
+    def _scalar_from_string(self, value):
+        return Timedelta(value)
+
+    def _check_compatible_with(self, other):
+        # we don't have anything to validate.
+        pass
+
     # ----------------------------------------------------------------
     # Array-Like / EA-Interface Methods
 

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -477,6 +477,7 @@ def repeat(self, repeats, axis=None):
         nv.validate_repeat(tuple(), dict(axis=axis))
         freq = self.freq if is_period_dtype(self) else None
         return self._shallow_copy(self.asi8.repeat(repeats), freq=freq)
+        # TODO: dispatch to _eadata
 
     @Appender(_index_shared_docs['where'] % _index_doc_kwargs)
     def where(self, cond, other=None):

diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
@@ -2,6 +2,8 @@
 import numpy as np
 import pytest
 
+import pandas.compat as compat
+
 import pandas as pd
 from pandas.core.arrays import (
     DatetimeArrayMixin as DatetimeArray, PeriodArray,
@@ -129,6 +131,57 @@ def test_concat_same_type(self):
 
         tm.assert_index_equal(self.index_cls(result), expected)
 
+    def test_unbox_scalar(self):
+        data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
+        arr = self.array_cls(data, freq='D')
+        result = arr._unbox_scalar(arr[0])
+        assert isinstance(result, (int, compat.long))
+
+        result = arr._unbox_scalar(pd.NaT)
+        assert isinstance(result, (int, compat.long))
+
+        with pytest.raises(ValueError):
+            arr._unbox_scalar('foo')
+
+    def test_check_compatible_with(self):
+        data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
+        arr = self.array_cls(data, freq='D')
+
+        arr._check_compatible_with(arr[0])
+        arr._check_compatible_with(arr[:1])
+        arr._check_compatible_with(pd.NaT)
+
+    def test_scalar_from_string(self):
+        data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
+        arr = self.array_cls(data, freq='D')
+        result = arr._scalar_from_string(str(arr[0]))
+        assert result == arr[0]
+
+    def test_searchsorted(self):
+        data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
+        arr = self.array_cls(data, freq='D')
+
+        # scalar
+        result = arr.searchsorted(arr[1])
+        assert result == 1
+
+        result = arr.searchsorted(arr[2], side="right")
+        assert result == 3
+
+        # own-type
+        result = arr.searchsorted(arr[1:3])
+        expected = np.array([1, 2], dtype=np.int64)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = arr.searchsorted(arr[1:3], side="right")
+        expected = np.array([2, 3], dtype=np.int64)
+        tm.assert_numpy_array_equal(result, expected)
+
+        # Following numpy convention, NaT goes at the beginning
+        #  (unlike NaN which goes at the end)
+        result = arr.searchsorted(pd.NaT)
+        assert result == 0
+
 
 class TestDatetimeArray(SharedTests):
     index_cls = pd.DatetimeIndex