diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index a064aec492df20..b3c519ab99b6e8 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -369,31 +369,6 @@ ctypedef fused algos_t: uint8_t -# TODO: unused; needed? -@cython.wraparound(False) -@cython.boundscheck(False) -cpdef map_indices(ndarray[algos_t] index): - """ - Produce a dict mapping the values of the input array to their respective - locations. - - Example: - array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} - - Better to do this with Cython because of the enormous speed boost. - """ - cdef: - Py_ssize_t i, length - dict result = {} - - length = len(index) - - for i in range(length): - result[index[i]] = i - - return result - - @cython.boundscheck(False) @cython.wraparound(False) def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): @@ -458,20 +433,6 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): return indexer -pad_float64 = pad["float64_t"] -pad_float32 = pad["float32_t"] -pad_object = pad["object"] -pad_int64 = pad["int64_t"] -pad_int32 = pad["int32_t"] -pad_int16 = pad["int16_t"] -pad_int8 = pad["int8_t"] -pad_uint64 = pad["uint64_t"] -pad_uint32 = pad["uint32_t"] -pad_uint16 = pad["uint16_t"] -pad_uint8 = pad["uint8_t"] -pad_bool = pad["uint8_t"] - - @cython.boundscheck(False) @cython.wraparound(False) def pad_inplace(ndarray[algos_t] values, @@ -509,15 +470,6 @@ def pad_inplace(ndarray[algos_t] values, val = values[i] -pad_inplace_float64 = pad_inplace["float64_t"] -pad_inplace_float32 = pad_inplace["float32_t"] -pad_inplace_object = pad_inplace["object"] -pad_inplace_int64 = pad_inplace["int64_t"] -pad_inplace_int32 = pad_inplace["int32_t"] -pad_inplace_uint64 = pad_inplace["uint64_t"] -pad_inplace_bool = pad_inplace["uint8_t"] - - @cython.boundscheck(False) @cython.wraparound(False) def pad_2d_inplace(ndarray[algos_t, ndim=2] values, @@ -557,15 +509,6 @@ def pad_2d_inplace(ndarray[algos_t, ndim=2] values, val = values[j, i] -pad_2d_inplace_float64 = pad_2d_inplace["float64_t"] -pad_2d_inplace_float32 = pad_2d_inplace["float32_t"] -pad_2d_inplace_object = pad_2d_inplace["object"] -pad_2d_inplace_int64 = pad_2d_inplace["int64_t"] -pad_2d_inplace_int32 = pad_2d_inplace["int32_t"] -pad_2d_inplace_uint64 = pad_2d_inplace["uint64_t"] -pad_2d_inplace_bool = pad_2d_inplace["uint8_t"] - - """ Backfilling logic for generating fill vector @@ -657,20 +600,6 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): return indexer -backfill_float64 = backfill["float64_t"] -backfill_float32 = backfill["float32_t"] -backfill_object = backfill["object"] -backfill_int64 = backfill["int64_t"] -backfill_int32 = backfill["int32_t"] -backfill_int16 = backfill["int16_t"] -backfill_int8 = backfill["int8_t"] -backfill_uint64 = backfill["uint64_t"] -backfill_uint32 = backfill["uint32_t"] -backfill_uint16 = backfill["uint16_t"] -backfill_uint8 = backfill["uint8_t"] -backfill_bool = backfill["uint8_t"] - - @cython.boundscheck(False) @cython.wraparound(False) def backfill_inplace(ndarray[algos_t] values, @@ -708,15 +637,6 @@ def backfill_inplace(ndarray[algos_t] values, val = values[i] -backfill_inplace_float64 = backfill_inplace["float64_t"] -backfill_inplace_float32 = backfill_inplace["float32_t"] -backfill_inplace_object = backfill_inplace["object"] -backfill_inplace_int64 = backfill_inplace["int64_t"] -backfill_inplace_int32 = backfill_inplace["int32_t"] -backfill_inplace_uint64 = backfill_inplace["uint64_t"] -backfill_inplace_bool = backfill_inplace["uint8_t"] - - @cython.boundscheck(False) @cython.wraparound(False) def backfill_2d_inplace(ndarray[algos_t, ndim=2] values, @@ -756,15 +676,6 @@ def backfill_2d_inplace(ndarray[algos_t, ndim=2] values, val = values[j, i] -backfill_2d_inplace_float64 = backfill_2d_inplace["float64_t"] -backfill_2d_inplace_float32 = backfill_2d_inplace["float32_t"] -backfill_2d_inplace_object = backfill_2d_inplace["object"] -backfill_2d_inplace_int64 = backfill_2d_inplace["int64_t"] -backfill_2d_inplace_int32 = backfill_2d_inplace["int32_t"] -backfill_2d_inplace_uint64 = backfill_2d_inplace["uint64_t"] -backfill_2d_inplace_bool = backfill_2d_inplace["uint8_t"] - - @cython.wraparound(False) @cython.boundscheck(False) def arrmap(ndarray[algos_t] index, object func): @@ -875,20 +786,6 @@ def is_monotonic(ndarray[algos_t, ndim=1] arr, bint timelike): return is_monotonic_inc, is_monotonic_dec, is_strict_monotonic -is_monotonic_float64 = is_monotonic["float64_t"] -is_monotonic_float32 = is_monotonic["float32_t"] -is_monotonic_object = is_monotonic["object"] -is_monotonic_int64 = is_monotonic["int64_t"] -is_monotonic_int32 = is_monotonic["int32_t"] -is_monotonic_int16 = is_monotonic["int16_t"] -is_monotonic_int8 = is_monotonic["int8_t"] -is_monotonic_uint64 = is_monotonic["uint64_t"] -is_monotonic_uint32 = is_monotonic["uint32_t"] -is_monotonic_uint16 = is_monotonic["uint16_t"] -is_monotonic_uint8 = is_monotonic["uint8_t"] -is_monotonic_bool = is_monotonic["uint8_t"] - - # generated from template include "algos_common_helper.pxi" include "algos_rank_helper.pxi" diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 7d9ba420525c80..91599fa223b578 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -70,18 +70,6 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr, for j in range(start, stop): out[i, j] = arr[i, j] - arr[i, j - periods] - -def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values, - ndarray[int64_t] indexer, Py_ssize_t loc, - ndarray[{{dest_type}}] out): - cdef: - Py_ssize_t i, j, k - - k = len(values) - for j in range(k): - i = indexer[j] - out[i] = values[j, loc] - {{endfor}} # ---------------------------------------------------------------------- diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 365713d579d606..c9190867015366 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -392,7 +392,7 @@ cdef class DatetimeEngine(Int64Engine): return self.vgetter().view('i8') def _call_monotonic(self, values): - return algos.is_monotonic_int64(values, timelike=True) + return algos.is_monotonic(values, timelike=True) cpdef get_loc(self, object val): if is_definitely_invalid_key(val): @@ -451,14 +451,13 @@ cdef class DatetimeEngine(Int64Engine): if other.dtype != self._get_box_dtype(): return np.repeat(-1, len(other)).astype('i4') other = np.asarray(other).view('i8') - return algos.pad_int64(self._get_index_values(), other, limit=limit) + return algos.pad(self._get_index_values(), other, limit=limit) def get_backfill_indexer(self, other, limit=None): if other.dtype != self._get_box_dtype(): return np.repeat(-1, len(other)).astype('i4') other = np.asarray(other).view('i8') - return algos.backfill_int64(self._get_index_values(), other, - limit=limit) + return algos.backfill(self._get_index_values(), other, limit=limit) cdef class TimedeltaEngine(DatetimeEngine): @@ -492,15 +491,15 @@ cdef class PeriodEngine(Int64Engine): freq = super(PeriodEngine, self).vgetter().freq ordinal = periodlib.extract_ordinals(other, freq) - return algos.pad_int64(self._get_index_values(), - np.asarray(ordinal), limit=limit) + return algos.pad(self._get_index_values(), + np.asarray(ordinal), limit=limit) def get_backfill_indexer(self, other, limit=None): freq = super(PeriodEngine, self).vgetter().freq ordinal = periodlib.extract_ordinals(other, freq) - return algos.backfill_int64(self._get_index_values(), - np.asarray(ordinal), limit=limit) + return algos.backfill(self._get_index_values(), + np.asarray(ordinal), limit=limit) def get_indexer_non_unique(self, targets): freq = super(PeriodEngine, self).vgetter().freq diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index b393283bfd4cad..6383c1534fb44f 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -31,15 +31,13 @@ dtypes = [('Float64', 'float64', 'float64_t', 'Float64', 'float64'), cdef class {{name}}Engine(IndexEngine): def _call_monotonic(self, values): - return algos.is_monotonic_{{dtype}}(values, timelike=False) + return algos.is_monotonic(values, timelike=False) def get_backfill_indexer(self, other, limit=None): - return algos.backfill_{{dtype}}(self._get_index_values(), - other, limit=limit) + return algos.backfill(self._get_index_values(), other, limit=limit) def get_pad_indexer(self, other, limit=None): - return algos.pad_{{dtype}}(self._get_index_values(), - other, limit=limit) + return algos.pad(self._get_index_values(), other, limit=limit) cdef _make_hash_table(self, n): return _hash.{{hashtable_name}}HashTable(n) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ee9aa9e2291260..15538b8196684e 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -452,107 +452,56 @@ def interpolate_2d(values, method='pad', axis=0, limit=None, fill_value=None, return values -def _interp_wrapper(f, wrap_dtype, na_override=None): - def wrapper(arr, mask, limit=None): - view = arr.view(wrap_dtype) - f(view, mask, limit=limit) - - return wrapper - - -_pad_1d_datetime = _interp_wrapper(algos.pad_inplace_int64, np.int64) -_pad_2d_datetime = _interp_wrapper(algos.pad_2d_inplace_int64, np.int64) -_backfill_1d_datetime = _interp_wrapper(algos.backfill_inplace_int64, np.int64) -_backfill_2d_datetime = _interp_wrapper(algos.backfill_2d_inplace_int64, - np.int64) +def _cast_values_for_fillna(values, dtype): + """ + Cast values to a dtype that algos.pad and algos.backfill can handle. + """ + # TODO: for int-dtypes we make a copy, but for everything else this + # alters the values in-place. Is this intentional? + if (is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype) or + is_timedelta64_dtype(dtype)): + values = values.view(np.int64) -def pad_1d(values, limit=None, mask=None, dtype=None): - if dtype is None: - dtype = values.dtype - _method = None - if is_float_dtype(values): - name = 'pad_inplace_{name}'.format(name=dtype.name) - _method = getattr(algos, name, None) - elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): - _method = _pad_1d_datetime elif is_integer_dtype(values): + # NB: this check needs to come after the datetime64 check above values = ensure_float64(values) - _method = algos.pad_inplace_float64 - elif values.dtype == np.object_: - _method = algos.pad_inplace_object - elif is_timedelta64_dtype(values): - # NaTs are treated identically to datetime64, so we can dispatch - # to that implementation - _method = _pad_1d_datetime - - if _method is None: - raise ValueError('Invalid dtype for pad_1d [{name}]' - .format(name=dtype.name)) - if mask is None: - mask = isna(values) - mask = mask.view(np.uint8) - _method(values, mask, limit=limit) return values -def backfill_1d(values, limit=None, mask=None, dtype=None): +def _fillna_prep(values, mask=None, dtype=None): + # boilerplate for pad_1d, backfill_1d, pad_2d, backfill_2d if dtype is None: dtype = values.dtype - _method = None - if is_float_dtype(values): - name = 'backfill_inplace_{name}'.format(name=dtype.name) - _method = getattr(algos, name, None) - elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): - _method = _backfill_1d_datetime - elif is_integer_dtype(values): - values = ensure_float64(values) - _method = algos.backfill_inplace_float64 - elif values.dtype == np.object_: - _method = algos.backfill_inplace_object - elif is_timedelta64_dtype(values): - # NaTs are treated identically to datetime64, so we can dispatch - # to that implementation - _method = _backfill_1d_datetime - - if _method is None: - raise ValueError('Invalid dtype for backfill_1d [{name}]' - .format(name=dtype.name)) if mask is None: + # This needs to occur before datetime/timedeltas are cast to int64 mask = isna(values) + + values = _cast_values_for_fillna(values, dtype) + mask = mask.view(np.uint8) + return values, mask + - _method(values, mask, limit=limit) +def pad_1d(values, limit=None, mask=None, dtype=None): + values, mask = _fillna_prep(values, mask, dtype) + algos.pad_inplace(values, mask, limit=limit) return values -def pad_2d(values, limit=None, mask=None, dtype=None): - if dtype is None: - dtype = values.dtype - _method = None - if is_float_dtype(values): - name = 'pad_2d_inplace_{name}'.format(name=dtype.name) - _method = getattr(algos, name, None) - elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): - _method = _pad_2d_datetime - elif is_integer_dtype(values): - values = ensure_float64(values) - _method = algos.pad_2d_inplace_float64 - elif values.dtype == np.object_: - _method = algos.pad_2d_inplace_object +def backfill_1d(values, limit=None, mask=None, dtype=None): + values, mask = _fillna_prep(values, mask, dtype) + algos.backfill_inplace(values, mask, limit=limit) + return values - if _method is None: - raise ValueError('Invalid dtype for pad_2d [{name}]' - .format(name=dtype.name)) - if mask is None: - mask = isna(values) - mask = mask.view(np.uint8) +def pad_2d(values, limit=None, mask=None, dtype=None): + values, mask = _fillna_prep(values, mask, dtype) if np.all(values.shape): - _method(values, mask, limit=limit) + algos.pad_2d_inplace(values, mask, limit=limit) else: # for test coverage pass @@ -560,30 +509,10 @@ def pad_2d(values, limit=None, mask=None, dtype=None): def backfill_2d(values, limit=None, mask=None, dtype=None): - if dtype is None: - dtype = values.dtype - _method = None - if is_float_dtype(values): - name = 'backfill_2d_inplace_{name}'.format(name=dtype.name) - _method = getattr(algos, name, None) - elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype): - _method = _backfill_2d_datetime - elif is_integer_dtype(values): - values = ensure_float64(values) - _method = algos.backfill_2d_inplace_float64 - elif values.dtype == np.object_: - _method = algos.backfill_2d_inplace_object - - if _method is None: - raise ValueError('Invalid dtype for backfill_2d [{name}]' - .format(name=dtype.name)) - - if mask is None: - mask = isna(values) - mask = mask.view(np.uint8) + values, mask = _fillna_prep(values, mask, dtype) if np.all(values.shape): - _method(values, mask, limit=limit) + algos.backfill_2d_inplace(values, mask, limit=limit) else: # for test coverage pass diff --git a/pandas/tests/indexing/test_indexing_engines.py b/pandas/tests/indexing/test_indexing_engines.py index dcdfbcb7fbea2d..57b85fd46a44e3 100644 --- a/pandas/tests/indexing/test_indexing_engines.py +++ b/pandas/tests/indexing/test_indexing_engines.py @@ -155,7 +155,7 @@ def test_get_backfill_indexer(self): new = np.array(list('abcdefghij'), dtype=self.dtype) result = engine.get_backfill_indexer(new) - expected = libalgos.backfill_object(arr, new) + expected = libalgos.backfill["object"](arr, new) tm.assert_numpy_array_equal(result, expected) def test_get_pad_indexer(self): @@ -165,5 +165,5 @@ def test_get_pad_indexer(self): new = np.array(list('abcdefghij'), dtype=self.dtype) result = engine.get_pad_indexer(new) - expected = libalgos.pad_object(arr, new) + expected = libalgos.pad["object"](arr, new) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 42e9b1f5af8ad9..5951f5802f50e2 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1491,19 +1491,19 @@ def test_pad_backfill_object_segfault(): old = np.array([], dtype='O') new = np.array([datetime(2010, 12, 31)], dtype='O') - result = libalgos.pad_object(old, new) + result = libalgos.pad["object"](old, new) expected = np.array([-1], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) - result = libalgos.pad_object(new, old) + result = libalgos.pad["object"](new, old) expected = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) - result = libalgos.backfill_object(old, new) + result = libalgos.backfill["object"](old, new) expected = np.array([-1], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) - result = libalgos.backfill_object(new, old) + result = libalgos.backfill["object"](new, old) expected = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) @@ -1535,7 +1535,7 @@ def test_backfill(self): old = Index([1, 5, 10]) new = Index(lrange(12)) - filler = libalgos.backfill_int64(old.values, new.values) + filler = libalgos.backfill["int64_t"](old.values, new.values) expect_filler = np.array([0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, -1], dtype=np.int64) @@ -1544,7 +1544,7 @@ def test_backfill(self): # corner case old = Index([1, 4]) new = Index(lrange(5, 10)) - filler = libalgos.backfill_int64(old.values, new.values) + filler = libalgos.backfill["int64_t"](old.values, new.values) expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) tm.assert_numpy_array_equal(filler, expect_filler) @@ -1553,7 +1553,7 @@ def test_pad(self): old = Index([1, 5, 10]) new = Index(lrange(12)) - filler = libalgos.pad_int64(old.values, new.values) + filler = libalgos.pad["int64_t"](old.values, new.values) expect_filler = np.array([-1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2], dtype=np.int64) @@ -1562,7 +1562,7 @@ def test_pad(self): # corner case old = Index([5, 10]) new = Index(lrange(5)) - filler = libalgos.pad_int64(old.values, new.values) + filler = libalgos.pad["int64_t"](old.values, new.values) expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.int64) tm.assert_numpy_array_equal(filler, expect_filler)