Skip to content

Commit

Permalink
CLN: Refactor cython to use memory views (#24932)
Browse files Browse the repository at this point in the history
  • Loading branch information
noamher authored and jreback committed Jan 26, 2019
1 parent 602eda4 commit 95f8dca
Show file tree
Hide file tree
Showing 21 changed files with 240 additions and 214 deletions.
26 changes: 13 additions & 13 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class NegInfinity(object):

@cython.wraparound(False)
@cython.boundscheck(False)
cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr):
cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr):
"""
Efficiently find the unique first-differences of the given array.
Expand Down Expand Up @@ -150,7 +150,7 @@ def is_lexsorted(list_of_arrays: list) -> bint:

@cython.boundscheck(False)
@cython.wraparound(False)
def groupsort_indexer(ndarray[int64_t] index, Py_ssize_t ngroups):
def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
"""
compute a 1-d indexer that is an ordering of the passed index,
ordered by the groups. This is a reverse of the label
Expand Down Expand Up @@ -230,7 +230,7 @@ def kth_smallest(numeric[:] a, Py_ssize_t k) -> numeric:

@cython.boundscheck(False)
@cython.wraparound(False)
def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):
def nancorr(const float64_t[:, :] mat, bint cov=0, minp=None):
cdef:
Py_ssize_t i, j, xi, yi, N, K
bint minpv
Expand Down Expand Up @@ -294,7 +294,7 @@ def nancorr(ndarray[float64_t, ndim=2] mat, bint cov=0, minp=None):

@cython.boundscheck(False)
@cython.wraparound(False)
def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
cdef:
Py_ssize_t i, j, xi, yi, N, K
ndarray[float64_t, ndim=2] result
Expand Down Expand Up @@ -435,8 +435,8 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_inplace(ndarray[algos_t] values,
ndarray[uint8_t, cast=True] mask,
def pad_inplace(algos_t[:] values,
const uint8_t[:] mask,
limit=None):
cdef:
Py_ssize_t i, N
Expand Down Expand Up @@ -472,8 +472,8 @@ def pad_inplace(ndarray[algos_t] values,

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_2d_inplace(ndarray[algos_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
def pad_2d_inplace(algos_t[:, :] values,
const uint8_t[:, :] mask,
limit=None):
cdef:
Py_ssize_t i, j, N, K
Expand Down Expand Up @@ -602,8 +602,8 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None):

@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_inplace(ndarray[algos_t] values,
ndarray[uint8_t, cast=True] mask,
def backfill_inplace(algos_t[:] values,
const uint8_t[:] mask,
limit=None):
cdef:
Py_ssize_t i, N
Expand Down Expand Up @@ -639,8 +639,8 @@ def backfill_inplace(ndarray[algos_t] values,

@cython.boundscheck(False)
@cython.wraparound(False)
def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
def backfill_2d_inplace(algos_t[:, :] values,
const uint8_t[:, :] mask,
limit=None):
cdef:
Py_ssize_t i, j, N, K
Expand Down Expand Up @@ -678,7 +678,7 @@ def backfill_2d_inplace(ndarray[algos_t, ndim=2] values,

@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap(ndarray[algos_t] index, object func):
def arrmap(algos_t[:] index, object func):
cdef:
Py_ssize_t length = index.shape[0]
Py_ssize_t i = 0
Expand Down
92 changes: 46 additions & 46 deletions pandas/_libs/groupby_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def get_dispatch(dtypes):

@cython.wraparound(False)
@cython.boundscheck(False)
def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_add_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=0):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -76,10 +76,10 @@ def group_add_{{name}}(ndarray[{{c_type}}, ndim=2] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_prod_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=0):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -123,10 +123,10 @@ def group_prod_{{name}}(ndarray[{{c_type}}, ndim=2] out,
@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision(True)
def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_var_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
Expand Down Expand Up @@ -175,10 +175,10 @@ def group_var_{{name}}(ndarray[{{c_type}}, ndim=2] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_mean_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
Expand Down Expand Up @@ -220,11 +220,11 @@ def group_mean_{{name}}(ndarray[{{c_type}}, ndim=2] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
Py_ssize_t min_count=-1):
def group_ohlc_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
"""
Expand Down Expand Up @@ -293,10 +293,10 @@ def get_dispatch(dtypes):

@cython.wraparound(False)
@cython.boundscheck(False)
def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_last_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -350,10 +350,10 @@ def group_last_{{name}}(ndarray[{{c_type}}, ndim=2] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out,
ndarray[int64_t] counts,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels, int64_t rank,
def group_nth_{{name}}({{c_type}}[:, :] out,
int64_t[:] counts,
{{c_type}}[:, :] values,
const int64_t[:] labels, int64_t rank,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -411,9 +411,9 @@ def group_nth_{{name}}(ndarray[{{c_type}}, ndim=2] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
ndarray[{{c_type}}, ndim=2] values,
ndarray[int64_t] labels,
def group_rank_{{name}}(float64_t[:, :] out,
{{c_type}}[:, :] values,
const int64_t[:] labels,
bint is_datetimelike, object ties_method,
bint ascending, bint pct, object na_option):
"""
Expand Down Expand Up @@ -606,10 +606,10 @@ ctypedef fused groupby_t:

@cython.wraparound(False)
@cython.boundscheck(False)
def group_max(ndarray[groupby_t, ndim=2] out,
ndarray[int64_t] counts,
ndarray[groupby_t, ndim=2] values,
ndarray[int64_t] labels,
def group_max(groupby_t[:, :] out,
int64_t[:] counts,
groupby_t[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -669,10 +669,10 @@ def group_max(ndarray[groupby_t, ndim=2] out,

@cython.wraparound(False)
@cython.boundscheck(False)
def group_min(ndarray[groupby_t, ndim=2] out,
ndarray[int64_t] counts,
ndarray[groupby_t, ndim=2] values,
ndarray[int64_t] labels,
def group_min(groupby_t[:, :] out,
int64_t[:] counts,
groupby_t[:, :] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
"""
Only aggregates on axis=0
Expand Down Expand Up @@ -731,9 +731,9 @@ def group_min(ndarray[groupby_t, ndim=2] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummin(ndarray[groupby_t, ndim=2] out,
ndarray[groupby_t, ndim=2] values,
ndarray[int64_t] labels,
def group_cummin(groupby_t[:, :] out,
groupby_t[:, :] values,
const int64_t[:] labels,
bint is_datetimelike):
"""
Only transforms on axis=0
Expand Down Expand Up @@ -779,9 +779,9 @@ def group_cummin(ndarray[groupby_t, ndim=2] out,

@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummax(ndarray[groupby_t, ndim=2] out,
ndarray[groupby_t, ndim=2] values,
ndarray[int64_t] labels,
def group_cummax(groupby_t[:, :] out,
groupby_t[:, :] values,
const int64_t[:] labels,
bint is_datetimelike):
"""
Only transforms on axis=0
Expand Down
18 changes: 10 additions & 8 deletions pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,10 @@ include "hashtable_class_helper.pxi"
include "hashtable_func_helper.pxi"

cdef class Factorizer:
cdef public PyObjectHashTable table
cdef public ObjectVector uniques
cdef public Py_ssize_t count
cdef public:
PyObjectHashTable table
ObjectVector uniques
Py_ssize_t count

def __init__(self, size_hint):
self.table = PyObjectHashTable(size_hint)
Expand Down Expand Up @@ -96,9 +97,10 @@ cdef class Factorizer:


cdef class Int64Factorizer:
cdef public Int64HashTable table
cdef public Int64Vector uniques
cdef public Py_ssize_t count
cdef public:
Int64HashTable table
Int64Vector uniques
Py_ssize_t count

def __init__(self, size_hint):
self.table = Int64HashTable(size_hint)
Expand Down Expand Up @@ -140,7 +142,7 @@ cdef class Int64Factorizer:

@cython.wraparound(False)
@cython.boundscheck(False)
def unique_label_indices(ndarray[int64_t, ndim=1] labels):
def unique_label_indices(const int64_t[:] labels):
"""
indices of the first occurrences of the unique labels
*excluding* -1. equivalent to:
Expand Down Expand Up @@ -168,6 +170,6 @@ def unique_label_indices(ndarray[int64_t, ndim=1] labels):
kh_destroy_int64(table)

arr = idx.to_array()
arr = arr[labels[arr].argsort()]
arr = arr[np.asarray(labels)[arr].argsort()]

return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr
2 changes: 1 addition & 1 deletion pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ cdef class {{name}}HashTable(HashTable):
self.table.vals[k] = <Py_ssize_t>values[i]

@cython.boundscheck(False)
def map_locations(self, ndarray[{{dtype}}_t, ndim=1] values):
def map_locations(self, const {{dtype}}_t[:] values):
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down
7 changes: 4 additions & 3 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ from pandas._libs.algos import ensure_int64

cdef class BlockPlacement:
# __slots__ = '_as_slice', '_as_array', '_len'
cdef slice _as_slice
cdef object _as_array
cdef:
slice _as_slice
object _as_array

cdef bint _has_slice, _has_array, _is_known_slice_like
bint _has_slice, _has_array, _is_known_slice_like

def __init__(self, val):
cdef:
Expand Down
Loading

0 comments on commit 95f8dca

Please sign in to comment.