From 95f8dca47723192ddeed5a2f198d0521c687c9aa Mon Sep 17 00:00:00 2001 From: Noam Hershtig Date: Sat, 26 Jan 2019 19:48:41 +0200 Subject: [PATCH] CLN: Refactor cython to use memory views (#24932) --- pandas/_libs/algos.pyx | 26 +++--- pandas/_libs/groupby_helper.pxi.in | 92 +++++++++++----------- pandas/_libs/hashtable.pyx | 18 +++-- pandas/_libs/hashtable_class_helper.pxi.in | 2 +- pandas/_libs/internals.pyx | 7 +- pandas/_libs/join.pyx | 46 +++++------ pandas/_libs/lib.pyx | 47 +++++------ pandas/_libs/missing.pyx | 7 +- pandas/_libs/parsers.pyx | 23 +++--- pandas/_libs/reduction.pyx | 2 +- pandas/_libs/skiplist.pyx | 19 +++-- pandas/_libs/sparse_op_helper.pxi.in | 20 ++--- pandas/_libs/tslibs/conversion.pyx | 2 +- pandas/_libs/tslibs/fields.pyx | 2 +- pandas/_libs/tslibs/parsing.pyx | 5 +- pandas/_libs/tslibs/period.pyx | 7 +- pandas/_libs/tslibs/resolution.pyx | 19 ++--- pandas/_libs/window.pyx | 13 +-- pandas/io/msgpack/_packer.pyx | 38 ++++----- pandas/io/msgpack/_unpacker.pyx | 48 +++++------ pandas/io/sas/sas.pyx | 11 +-- 21 files changed, 240 insertions(+), 214 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index b3c519ab99b6e..663411ad984c2 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -76,7 +76,7 @@ class NegInfinity(object): @cython.wraparound(False) @cython.boundscheck(False) -cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr): +cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr): """ Efficiently find the unique first-differences of the given array. @@ -150,7 +150,7 @@ def is_lexsorted(list_of_arrays: list) -> bint: @cython.boundscheck(False) @cython.wraparound(False) -def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups): +def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups): """ compute a 1-d indexer that is an ordering of the passed index, ordered by the groups. This is a reverse of the label @@ -230,7 +230,7 @@ def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric: @cython.boundscheck(False) @cython.wraparound(False) -def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None): +def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None): cdef: Py_ssize_t i, j, xi, yi, N, K bint minpv @@ -294,7 +294,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None): @cython.boundscheck(False) @cython.wraparound(False) -def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): +def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1): cdef: Py_ssize_t i, j, xi, yi, N, K ndarray[float64_t, ndim=2] result @@ -435,8 +435,8 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): @cython.boundscheck(False) @cython.wraparound(False) -def pad_inplace(ndarray[algos_t] values, - ndarray[uint8_t, cast=True] mask, +def pad_inplace(algos_t[:] values, + const uint8_t[:] mask, limit=None): cdef: Py_ssize_t i, N @@ -472,8 +472,8 @@ def pad_inplace(ndarray[algos_t] values, @cython.boundscheck(False) @cython.wraparound(False) -def pad_2d_inplace(ndarray[algos_t, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, +def pad_2d_inplace(algos_t[:, :] values, + const uint8_t[:, :] mask, limit=None): cdef: Py_ssize_t i, j, N, K @@ -602,8 +602,8 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): @cython.boundscheck(False) @cython.wraparound(False) -def backfill_inplace(ndarray[algos_t] values, - ndarray[uint8_t, cast=True] mask, +def backfill_inplace(algos_t[:] values, + const uint8_t[:] mask, limit=None): cdef: Py_ssize_t i, N @@ -639,8 +639,8 @@ def backfill_inplace(ndarray[algos_t] values, @cython.boundscheck(False) @cython.wraparound(False) -def backfill_2d_inplace(ndarray[algos_t, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, +def backfill_2d_inplace(algos_t[:, :] values, + const uint8_t[:, :] mask, limit=None): cdef: Py_ssize_t i, j, N, K @@ -678,7 +678,7 @@ def backfill_2d_inplace(ndarray[algos_t, ndim=2] values, @cython.wraparound(False) @cython.boundscheck(False) -def arrmap(ndarray[algos_t] index, object func): +def arrmap(algos_t[:] index, object func): cdef: Py_ssize_t length = index.shape[0] Py_ssize_t i = 0 diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index abac9f147848e..858039f038d02 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -29,10 +29,10 @@ def get_dispatch(dtypes): @cython.wraparound(False) @cython.boundscheck(False) -def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, +def group_add_{{name}}({{c_type}}[:, :] out, + int64_t[:] counts, + {{c_type}}[:, :] values, + const int64_t[:] labels, Py_ssize_t min_count=0): """ Only aggregates on axis=0 @@ -76,10 +76,10 @@ def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, +def group_prod_{{name}}({{c_type}}[:, :] out, + int64_t[:] counts, + {{c_type}}[:, :] values, + const int64_t[:] labels, Py_ssize_t min_count=0): """ Only aggregates on axis=0 @@ -123,10 +123,10 @@ def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) @cython.cdivision(True) -def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, +def group_var_{{name}}({{c_type}}[:, :] out, + int64_t[:] counts, + {{c_type}}[:, :] values, + const int64_t[:] labels, Py_ssize_t min_count=-1): cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) @@ -175,10 +175,10 @@ def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, +def group_mean_{{name}}({{c_type}}[:, :] out, + int64_t[:] counts, + {{c_type}}[:, :] values, + const int64_t[:] labels, Py_ssize_t min_count=-1): cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) @@ -220,11 +220,11 @@ def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, - Py_ssize_t min_count=-1): +def group_ohlc_{{name}}({{c_type}}[:, :] out, + int64_t[:] counts, + {{c_type}}[:, :] values, + const int64_t[:] labels, + Py_ssize_t min_count=-1): """ Only aggregates on axis=0 """ @@ -293,10 +293,10 @@ def get_dispatch(dtypes): @cython.wraparound(False) @cython.boundscheck(False) -def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, +def group_last_{{name}}({{c_type}}[:, :] out, + int64_t[:] counts, + {{c_type}}[:, :] values, + const int64_t[:] labels, Py_ssize_t min_count=-1): """ Only aggregates on axis=0 @@ -350,10 +350,10 @@ def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out, - ndarray[int64_t] counts, - ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, int64_t rank, +def group_nth_{{name}}({{c_type}}[:, :] out, + int64_t[:] counts, + {{c_type}}[:, :] values, + const int64_t[:] labels, int64_t rank, Py_ssize_t min_count=-1): """ Only aggregates on axis=0 @@ -411,9 +411,9 @@ def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, - ndarray[{{c_type}}, ndim=2] values, - ndarray[int64_t] labels, +def group_rank_{{name}}(float64_t[:, :] out, + {{c_type}}[:, :] values, + const int64_t[:] labels, bint is_datetimelike, object ties_method, bint ascending, bint pct, object na_option): """ @@ -606,10 +606,10 @@ ctypedef fused groupby_t: @cython.wraparound(False) @cython.boundscheck(False) -def group_max(ndarray[groupby_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[groupby_t, ndim=2] values, - ndarray[int64_t] labels, +def group_max(groupby_t[:, :] out, + int64_t[:] counts, + groupby_t[:, :] values, + const int64_t[:] labels, Py_ssize_t min_count=-1): """ Only aggregates on axis=0 @@ -669,10 +669,10 @@ def group_max(ndarray[groupby_t, ndim=2] out, @cython.wraparound(False) @cython.boundscheck(False) -def group_min(ndarray[groupby_t, ndim=2] out, - ndarray[int64_t] counts, - ndarray[groupby_t, ndim=2] values, - ndarray[int64_t] labels, +def group_min(groupby_t[:, :] out, + int64_t[:] counts, + groupby_t[:, :] values, + const int64_t[:] labels, Py_ssize_t min_count=-1): """ Only aggregates on axis=0 @@ -731,9 +731,9 @@ def group_min(ndarray[groupby_t, ndim=2] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_cummin(ndarray[groupby_t, ndim=2] out, - ndarray[groupby_t, ndim=2] values, - ndarray[int64_t] labels, +def group_cummin(groupby_t[:, :] out, + groupby_t[:, :] values, + const int64_t[:] labels, bint is_datetimelike): """ Only transforms on axis=0 @@ -779,9 +779,9 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_cummax(ndarray[groupby_t, ndim=2] out, - ndarray[groupby_t, ndim=2] values, - ndarray[int64_t] labels, +def group_cummax(groupby_t[:, :] out, + groupby_t[:, :] values, + const int64_t[:] labels, bint is_datetimelike): """ Only transforms on axis=0 diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx index 47fa5932290af..8d0c451ad0ab8 100644 --- a/pandas/_libs/hashtable.pyx +++ b/pandas/_libs/hashtable.pyx @@ -52,9 +52,10 @@ include "hashtable_class_helper.pxi" include "hashtable_func_helper.pxi" cdef class Factorizer: - cdef public PyObjectHashTable table - cdef public ObjectVector uniques - cdef public Py_ssize_t count + cdef public: + PyObjectHashTable table + ObjectVector uniques + Py_ssize_t count def __init__(self, size_hint): self.table = PyObjectHashTable(size_hint) @@ -96,9 +97,10 @@ cdef class Factorizer: cdef class Int64Factorizer: - cdef public Int64HashTable table - cdef public Int64Vector uniques - cdef public Py_ssize_t count + cdef public: + Int64HashTable table + Int64Vector uniques + Py_ssize_t count def __init__(self, size_hint): self.table = Int64HashTable(size_hint) @@ -140,7 +142,7 @@ cdef class Int64Factorizer: @cython.wraparound(False) @cython.boundscheck(False) -def unique_label_indices(ndarray[int64_t, ndim=1] labels): +def unique_label_indices(const int64_t[:] labels): """ indices of the first occurrences of the unique labels *excluding* -1. equivalent to: @@ -168,6 +170,6 @@ def unique_label_indices(ndarray[int64_t, ndim=1] labels): kh_destroy_int64(table) arr = idx.to_array() - arr = arr[labels[arr].argsort()] + arr = arr[np.asarray(labels)[arr].argsort()] return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index eac35588b6fc3..3644928d8dedc 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -322,7 +322,7 @@ cdef class {{name}}HashTable(HashTable): self.table.vals[k] = values[i] @cython.boundscheck(False) - def map_locations(self, ndarray[{{dtype}}_t, ndim=1] values): + def map_locations(self, const {{dtype}}_t[:] values): cdef: Py_ssize_t i, n = len(values) int ret = 0 diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 72a1cf16f96b6..f23d2666b4bf4 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -23,10 +23,11 @@ from pandas._libs.algos import ensure_int64 cdef class BlockPlacement: # __slots__ = '_as_slice', '_as_array', '_len' - cdef slice _as_slice - cdef object _as_array + cdef: + slice _as_slice + object _as_array - cdef bint _has_slice, _has_array, _is_known_slice_like + bint _has_slice, _has_array, _is_known_slice_like def __init__(self, val): cdef: diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index e4440ac3d9fd8..503867058b3c8 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -14,7 +14,7 @@ from pandas._libs.algos import groupsort_indexer, ensure_platform_int from pandas.core.algorithms import take_nd -def inner_join(ndarray[int64_t] left, ndarray[int64_t] right, +def inner_join(const int64_t[:] left, const int64_t[:] right, Py_ssize_t max_groups): cdef: Py_ssize_t i, j, k, count = 0 @@ -65,7 +65,7 @@ def inner_join(ndarray[int64_t] left, ndarray[int64_t] right, _get_result_indexer(right_sorter, right_indexer)) -def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right, +def left_outer_join(const int64_t[:] left, const int64_t[:] right, Py_ssize_t max_groups, sort=True): cdef: Py_ssize_t i, j, k, count = 0 @@ -139,7 +139,7 @@ def left_outer_join(ndarray[int64_t] left, ndarray[int64_t] right, return left_indexer, right_indexer -def full_outer_join(ndarray[int64_t] left, ndarray[int64_t] right, +def full_outer_join(const int64_t[:] left, const int64_t[:] right, Py_ssize_t max_groups): cdef: Py_ssize_t i, j, k, count = 0 @@ -213,7 +213,7 @@ def _get_result_indexer(sorter, indexer): return res -def ffill_indexer(ndarray[int64_t] indexer): +def ffill_indexer(const int64_t[:] indexer): cdef: Py_ssize_t i, n = len(indexer) ndarray[int64_t] result @@ -252,7 +252,7 @@ ctypedef fused join_t: @cython.wraparound(False) @cython.boundscheck(False) -def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right): +def left_join_indexer_unique(join_t[:] left, join_t[:] right): cdef: Py_ssize_t i, j, nleft, nright ndarray[int64_t] indexer @@ -677,10 +677,10 @@ ctypedef fused by_t: uint64_t -def asof_join_backward_on_X_by_Y(ndarray[asof_t] left_values, - ndarray[asof_t] right_values, - ndarray[by_t] left_by_values, - ndarray[by_t] right_by_values, +def asof_join_backward_on_X_by_Y(asof_t[:] left_values, + asof_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, bint allow_exact_matches=1, tolerance=None): @@ -746,10 +746,10 @@ def asof_join_backward_on_X_by_Y(ndarray[asof_t] left_values, return left_indexer, right_indexer -def asof_join_forward_on_X_by_Y(ndarray[asof_t] left_values, - ndarray[asof_t] right_values, - ndarray[by_t] left_by_values, - ndarray[by_t] right_by_values, +def asof_join_forward_on_X_by_Y(asof_t[:] left_values, + asof_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, bint allow_exact_matches=1, tolerance=None): @@ -815,10 +815,10 @@ def asof_join_forward_on_X_by_Y(ndarray[asof_t] left_values, return left_indexer, right_indexer -def asof_join_nearest_on_X_by_Y(ndarray[asof_t] left_values, - ndarray[asof_t] right_values, - ndarray[by_t] left_by_values, - ndarray[by_t] right_by_values, +def asof_join_nearest_on_X_by_Y(asof_t[:] left_values, + asof_t[:] right_values, + by_t[:] left_by_values, + by_t[:] right_by_values, bint allow_exact_matches=1, tolerance=None): @@ -864,8 +864,8 @@ def asof_join_nearest_on_X_by_Y(ndarray[asof_t] left_values, # asof_join # ---------------------------------------------------------------------- -def asof_join_backward(ndarray[asof_t] left_values, - ndarray[asof_t] right_values, +def asof_join_backward(asof_t[:] left_values, + asof_t[:] right_values, bint allow_exact_matches=1, tolerance=None): @@ -917,8 +917,8 @@ def asof_join_backward(ndarray[asof_t] left_values, return left_indexer, right_indexer -def asof_join_forward(ndarray[asof_t] left_values, - ndarray[asof_t] right_values, +def asof_join_forward(asof_t[:] left_values, + asof_t[:] right_values, bint allow_exact_matches=1, tolerance=None): @@ -971,8 +971,8 @@ def asof_join_forward(ndarray[asof_t] left_values, return left_indexer, right_indexer -def asof_join_nearest(ndarray[asof_t] left_values, - ndarray[asof_t] right_values, +def asof_join_nearest(asof_t[:] left_values, + asof_t[:] right_values, bint allow_exact_matches=1, tolerance=None): diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f845a5437ded4..4745916eb0ce2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -40,11 +40,12 @@ cdef extern from "numpy/arrayobject.h": # Use PyDataType_* macros when possible, however there are no macros # for accessing some of the fields, so some are defined. Please # ask on cython-dev if you need more. - cdef int type_num - cdef int itemsize "elsize" - cdef char byteorder - cdef object fields - cdef tuple names + cdef: + int type_num + int itemsize "elsize" + char byteorder + object fields + tuple names cdef extern from "src/parse_helper.h": @@ -67,12 +68,13 @@ from pandas._libs.missing cimport ( # constants that will be compared to potentially arbitrarily large # python int -cdef object oINT64_MAX = INT64_MAX -cdef object oINT64_MIN = INT64_MIN -cdef object oUINT64_MAX = UINT64_MAX +cdef: + object oINT64_MAX = INT64_MAX + object oINT64_MIN = INT64_MIN + object oUINT64_MAX = UINT64_MAX -cdef bint PY2 = sys.version_info[0] == 2 -cdef float64_t NaN = np.NaN + bint PY2 = sys.version_info[0] == 2 + float64_t NaN = np.NaN def values_from_object(obj: object): @@ -376,7 +378,7 @@ def fast_zip(list ndarrays): return result -def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): +def get_reverse_indexer(const int64_t[:] indexer, Py_ssize_t length): """ Reverse indexing operation. @@ -405,7 +407,7 @@ def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): @cython.wraparound(False) @cython.boundscheck(False) -def has_infs_f4(ndarray[float32_t] arr) -> bool: +def has_infs_f4(const float32_t[:] arr) -> bool: cdef: Py_ssize_t i, n = len(arr) float32_t inf, neginf, val @@ -422,7 +424,7 @@ def has_infs_f4(ndarray[float32_t] arr) -> bool: @cython.wraparound(False) @cython.boundscheck(False) -def has_infs_f8(ndarray[float64_t] arr) -> bool: +def has_infs_f8(const float64_t[:] arr) -> bool: cdef: Py_ssize_t i, n = len(arr) float64_t inf, neginf, val @@ -660,7 +662,7 @@ def clean_index_list(obj: list): # is a general, O(max(len(values), len(binner))) method. @cython.boundscheck(False) @cython.wraparound(False) -def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, +def generate_bins_dt64(ndarray[int64_t] values, const int64_t[:] binner, object closed='left', bint hasnans=0): """ Int64 (datetime64) version of generic python version in groupby.py @@ -723,7 +725,7 @@ def generate_bins_dt64(ndarray[int64_t] values, ndarray[int64_t] binner, @cython.boundscheck(False) @cython.wraparound(False) -def row_bool_subset(ndarray[float64_t, ndim=2] values, +def row_bool_subset(const float64_t[:, :] values, ndarray[uint8_t, cast=True] mask): cdef: Py_ssize_t i, j, n, k, pos = 0 @@ -767,8 +769,8 @@ def row_bool_subset_object(ndarray[object, ndim=2] values, @cython.boundscheck(False) @cython.wraparound(False) -def get_level_sorter(ndarray[int64_t, ndim=1] label, - ndarray[int64_t, ndim=1] starts): +def get_level_sorter(const int64_t[:] label, + const int64_t[:] starts): """ argsort for a single level of a multi-index, keeping the order of higher levels unchanged. `starts` points to starts of same-key indices w.r.t @@ -780,10 +782,11 @@ def get_level_sorter(ndarray[int64_t, ndim=1] label, int64_t l, r Py_ssize_t i ndarray[int64_t, ndim=1] out = np.empty(len(label), dtype=np.int64) + ndarray[int64_t, ndim=1] label_arr = np.asarray(label) for i in range(len(starts) - 1): l, r = starts[i], starts[i + 1] - out[l:r] = l + label[l:r].argsort(kind='mergesort') + out[l:r] = l + label_arr[l:r].argsort(kind='mergesort') return out @@ -791,7 +794,7 @@ def get_level_sorter(ndarray[int64_t, ndim=1] label, @cython.boundscheck(False) @cython.wraparound(False) def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, - ndarray[int64_t, ndim=1] labels, + const int64_t[:] labels, Py_ssize_t max_bin, int axis): cdef: @@ -818,7 +821,7 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask, return counts -def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups): +def generate_slices(const int64_t[:] labels, Py_ssize_t ngroups): cdef: Py_ssize_t i, group_size, n, start int64_t lab @@ -847,7 +850,7 @@ def generate_slices(ndarray[int64_t] labels, Py_ssize_t ngroups): return starts, ends -def indices_fast(object index, ndarray[int64_t] labels, list keys, +def indices_fast(object index, const int64_t[:] labels, list keys, list sorted_labels): cdef: Py_ssize_t i, j, k, lab, cur, start, n = len(labels) @@ -2146,7 +2149,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, @cython.boundscheck(False) @cython.wraparound(False) -def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, +def map_infer_mask(ndarray arr, object f, const uint8_t[:] mask, bint convert=1): """ Substitute for np.vectorize with pandas-friendly dtype inference diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 229edbac4992d..ab0e4cd6cc765 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -16,10 +16,11 @@ from pandas._libs.tslibs.nattype cimport ( checknull_with_nat, c_NaT as NaT, is_null_datetimelike) -cdef float64_t INF = np.inf -cdef float64_t NEGINF = -INF +cdef: + float64_t INF = np.inf + float64_t NEGINF = -INF -cdef int64_t NPY_NAT = util.get_nat() + int64_t NPY_NAT = util.get_nat() cpdef bint checknull(object val): diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 6cb6ed749f87b..f679746643643 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -64,10 +64,11 @@ from pandas.errors import (ParserError, DtypeWarning, CParserError = ParserError -cdef bint PY3 = (sys.version_info[0] >= 3) +cdef: + bint PY3 = (sys.version_info[0] >= 3) -cdef float64_t INF = np.inf -cdef float64_t NEGINF = -INF + float64_t INF = np.inf + float64_t NEGINF = -INF cdef extern from "errno.h": @@ -735,7 +736,7 @@ cdef class TextReader: int status int64_t hr, data_line char *errors = "strict" - cdef StringPath path = _string_path(self.c_encoding) + StringPath path = _string_path(self.c_encoding) header = [] unnamed_cols = set() @@ -1389,8 +1390,9 @@ cdef class TextReader: return None -cdef object _true_values = [b'True', b'TRUE', b'true'] -cdef object _false_values = [b'False', b'FALSE', b'false'] +cdef: + object _true_values = [b'True', b'TRUE', b'true'] + object _false_values = [b'False', b'FALSE', b'false'] def _ensure_encoded(list lst): @@ -1637,7 +1639,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col, int64_t current_category = 0 char *errors = "strict" - cdef StringPath path = _string_path(encoding) + StringPath path = _string_path(encoding) int ret = 0 kh_str_t *table @@ -1727,9 +1729,10 @@ cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col, data += width -cdef char* cinf = b'inf' -cdef char* cposinf = b'+inf' -cdef char* cneginf = b'-inf' +cdef: + char* cinf = b'inf' + char* cposinf = b'+inf' + char* cneginf = b'-inf' cdef _try_double(parser_t *parser, int64_t col, diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index ca39c4de4d309..507567cf480d7 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -494,7 +494,7 @@ class InvalidApply(Exception): def apply_frame_axis0(object frame, object f, object names, - ndarray[int64_t] starts, ndarray[int64_t] ends): + const int64_t[:] starts, const int64_t[:] ends): cdef: BlockSlider slider Py_ssize_t i, n = len(starts) diff --git a/pandas/_libs/skiplist.pyx b/pandas/_libs/skiplist.pyx index 6698fcb767d7c..2fdee72f9d588 100644 --- a/pandas/_libs/skiplist.pyx +++ b/pandas/_libs/skiplist.pyx @@ -57,8 +57,9 @@ cdef class IndexableSkiplist: return self.get(i) cpdef get(self, Py_ssize_t i): - cdef Py_ssize_t level - cdef Node node + cdef: + Py_ssize_t level + Node node node = self.head i += 1 @@ -71,9 +72,10 @@ cdef class IndexableSkiplist: return node.value cpdef insert(self, double value): - cdef Py_ssize_t level, steps, d - cdef Node node, prevnode, newnode, next_at_level, tmp - cdef list chain, steps_at_level + cdef: + Py_ssize_t level, steps, d + Node node, prevnode, newnode, next_at_level, tmp + list chain, steps_at_level # find first node on each level where node.next[levels].value > value chain = [None] * self.maxlevels @@ -110,9 +112,10 @@ cdef class IndexableSkiplist: self.size += 1 cpdef remove(self, double value): - cdef Py_ssize_t level, d - cdef Node node, prevnode, tmpnode, next_at_level - cdef list chain + cdef: + Py_ssize_t level, d + Node node, prevnode, tmpnode, next_at_level + list chain # find first node on each level where node.next[levels].value >= value chain = [None] * self.maxlevels diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index c6621ab5977ca..5949a3fd0ed81 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -125,10 +125,10 @@ def get_dispatch(dtypes): @cython.wraparound(False) @cython.boundscheck(False) -cdef inline tuple block_op_{{opname}}_{{dtype}}(ndarray x_, +cdef inline tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, BlockIndex xindex, {{dtype}}_t xfill, - ndarray y_, + {{dtype}}_t[:] y_, BlockIndex yindex, {{dtype}}_t yfill): ''' @@ -142,7 +142,7 @@ cdef inline tuple block_op_{{opname}}_{{dtype}}(ndarray x_, int32_t xloc, yloc Py_ssize_t xblock = 0, yblock = 0 # block numbers - ndarray[{{dtype}}_t, ndim=1] x, y + {{dtype}}_t[:] x, y ndarray[{{rdtype}}_t, ndim=1] out # to suppress Cython warning @@ -226,16 +226,18 @@ cdef inline tuple block_op_{{opname}}_{{dtype}}(ndarray x_, @cython.wraparound(False) @cython.boundscheck(False) -cdef inline tuple int_op_{{opname}}_{{dtype}}(ndarray x_, IntIndex xindex, +cdef inline tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, + IntIndex xindex, {{dtype}}_t xfill, - ndarray y_, IntIndex yindex, + {{dtype}}_t[:] y_, + IntIndex yindex, {{dtype}}_t yfill): cdef: IntIndex out_index Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices int32_t xloc, yloc - ndarray[int32_t, ndim=1] xindices, yindices, out_indices - ndarray[{{dtype}}_t, ndim=1] x, y + int32_t[:] xindices, yindices, out_indices + {{dtype}}_t[:] x, y ndarray[{{rdtype}}_t, ndim=1] out # suppress Cython compiler warnings due to inlining @@ -284,9 +286,9 @@ cdef inline tuple int_op_{{opname}}_{{dtype}}(ndarray x_, IntIndex xindex, return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}} -cpdef sparse_{{opname}}_{{dtype}}(ndarray[{{dtype}}_t, ndim=1] x, +cpdef sparse_{{opname}}_{{dtype}}({{dtype}}_t[:] x, SparseIndex xindex, {{dtype}}_t xfill, - ndarray[{{dtype}}_t, ndim=1] y, + {{dtype}}_t[:] y, SparseIndex yindex, {{dtype}}_t yfill): if isinstance(xindex, BlockIndex): diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 6c8b732928bc3..1c0adaaa288a9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -147,7 +147,7 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True): @cython.boundscheck(False) @cython.wraparound(False) -def datetime_to_datetime64(values: object[:]): +def datetime_to_datetime64(object[:] values): """ Convert ndarray of datetime-like objects to int64 array representing nanosecond timestamps. diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 5cda7992369fc..240f008394099 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -381,7 +381,7 @@ def get_start_end_field(int64_t[:] dtindex, object field, @cython.wraparound(False) @cython.boundscheck(False) -def get_date_field(ndarray[int64_t] dtindex, object field): +def get_date_field(int64_t[:] dtindex, object field): """ Given a int64-based datetime index, extract the year, month, etc., field and return an array of these values. diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 82719de2dbdbd..7759e165b7193 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -44,9 +44,10 @@ class DateParseError(ValueError): _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0, second=0, microsecond=0) -cdef object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])') +cdef: + object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])') -cdef set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} + set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'} # ---------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 2f4edb7de8f95..e38e9a1ca5df6 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -52,9 +52,10 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.offsets cimport to_offset from pandas._libs.tslibs.offsets import _Tick -cdef bint PY2 = str == bytes -cdef enum: - INT32_MIN = -2147483648 +cdef: + bint PY2 = str == bytes + enum: + INT32_MIN = -2147483648 ctypedef struct asfreq_info: diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index f80c1e9841abe..13a4f5ba48557 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -16,15 +16,16 @@ from pandas._libs.tslibs.ccalendar cimport get_days_in_month # ---------------------------------------------------------------------- # Constants -cdef int64_t NPY_NAT = get_nat() - -cdef int RESO_NS = 0 -cdef int RESO_US = 1 -cdef int RESO_MS = 2 -cdef int RESO_SEC = 3 -cdef int RESO_MIN = 4 -cdef int RESO_HR = 5 -cdef int RESO_DAY = 6 +cdef: + int64_t NPY_NAT = get_nat() + + int RESO_NS = 0 + int RESO_US = 1 + int RESO_MS = 2 + int RESO_SEC = 3 + int RESO_MIN = 4 + int RESO_HR = 5 + int RESO_DAY = 6 # ---------------------------------------------------------------------- diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index e8f3de64c3823..cc5b3b63f5b04 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -26,13 +26,14 @@ from pandas._libs.skiplist cimport ( skiplist_t, skiplist_init, skiplist_destroy, skiplist_get, skiplist_insert, skiplist_remove) -cdef float32_t MINfloat32 = np.NINF -cdef float64_t MINfloat64 = np.NINF +cdef: + float32_t MINfloat32 = np.NINF + float64_t MINfloat64 = np.NINF -cdef float32_t MAXfloat32 = np.inf -cdef float64_t MAXfloat64 = np.inf + float32_t MAXfloat32 = np.inf + float64_t MAXfloat64 = np.inf -cdef float64_t NaN = np.NaN + float64_t NaN = np.NaN cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b @@ -242,7 +243,7 @@ cdef class VariableWindowIndexer(WindowIndexer): # max window size self.win = (self.end - self.start).max() - def build(self, ndarray[int64_t] index, int64_t win, bint left_closed, + def build(self, const int64_t[:] index, int64_t win, bint left_closed, bint right_closed): cdef: diff --git a/pandas/io/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx index d67c632188e62..8e2d943d8ddb1 100644 --- a/pandas/io/msgpack/_packer.pyx +++ b/pandas/io/msgpack/_packer.pyx @@ -74,14 +74,15 @@ cdef class Packer(object): Use bin type introduced in msgpack spec 2.0 for bytes. It also enable str8 type for unicode. """ - cdef msgpack_packer pk - cdef object _default - cdef object _bencoding - cdef object _berrors - cdef char *encoding - cdef char *unicode_errors - cdef bint use_float - cdef bint autoreset + cdef: + msgpack_packer pk + object _default + object _bencoding + object _berrors + char *encoding + char *unicode_errors + bint use_float + bint autoreset def __cinit__(self): cdef int buf_size = 1024 * 1024 @@ -123,16 +124,17 @@ cdef class Packer(object): cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: - cdef long long llval - cdef unsigned long long ullval - cdef long longval - cdef float fval - cdef double dval - cdef char* rawval - cdef int ret - cdef dict d - cdef size_t L - cdef int default_used = 0 + cdef: + long long llval + unsigned long long ullval + long longval + float fval + double dval + char* rawval + int ret + dict d + size_t L + int default_used = 0 if nest_limit < 0: raise PackValueError("recursion limit exceeded.") diff --git a/pandas/io/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx index 0c50aa5e68103..9bbfe749ef9ba 100644 --- a/pandas/io/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -120,14 +120,15 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, See :class:`Unpacker` for options. """ - cdef unpack_context ctx - cdef size_t off = 0 - cdef int ret + cdef: + unpack_context ctx + size_t off = 0 + int ret - cdef char* buf - cdef Py_ssize_t buf_len - cdef char* cenc = NULL - cdef char* cerr = NULL + char* buf + Py_ssize_t buf_len + char* cenc = NULL + char* cerr = NULL PyObject_AsReadBuffer(packed, &buf, &buf_len) @@ -243,16 +244,17 @@ cdef class Unpacker(object): for o in unpacker: process(o) """ - cdef unpack_context ctx - cdef char* buf - cdef size_t buf_size, buf_head, buf_tail - cdef object file_like - cdef object file_like_read - cdef Py_ssize_t read_size - # To maintain refcnt. - cdef object object_hook, object_pairs_hook, list_hook, ext_hook - cdef object encoding, unicode_errors - cdef size_t max_buffer_size + cdef: + unpack_context ctx + char* buf + size_t buf_size, buf_head, buf_tail + object file_like + object file_like_read + Py_ssize_t read_size + # To maintain refcnt. + object object_hook, object_pairs_hook, list_hook, ext_hook + object encoding, unicode_errors + size_t max_buffer_size def __cinit__(self): self.buf = NULL @@ -270,8 +272,9 @@ cdef class Unpacker(object): Py_ssize_t max_array_len=2147483647, Py_ssize_t max_map_len=2147483647, Py_ssize_t max_ext_len=2147483647): - cdef char *cenc=NULL, - cdef char *cerr=NULL + cdef: + char *cenc=NULL, + char *cerr=NULL self.object_hook = object_hook self.object_pairs_hook = object_pairs_hook @@ -388,9 +391,10 @@ cdef class Unpacker(object): cdef object _unpack(self, execute_fn execute, object write_bytes, bint iter=0): - cdef int ret - cdef object obj - cdef size_t prev_head + cdef: + int ret + object obj + size_t prev_head if self.buf_head >= self.buf_tail and self.file_like is not None: self.read_from_file() diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index a5bfd5866a261..9b8fba16741f6 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -203,11 +203,12 @@ cdef enum ColumnTypes: # type the page_data types -cdef int page_meta_type = const.page_meta_type -cdef int page_mix_types_0 = const.page_mix_types[0] -cdef int page_mix_types_1 = const.page_mix_types[1] -cdef int page_data_type = const.page_data_type -cdef int subheader_pointers_offset = const.subheader_pointers_offset +cdef: + int page_meta_type = const.page_meta_type + int page_mix_types_0 = const.page_mix_types[0] + int page_mix_types_1 = const.page_mix_types[1] + int page_data_type = const.page_data_type + int subheader_pointers_offset = const.subheader_pointers_offset cdef class Parser(object):