Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor filling.repeat API #10371

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 13 additions & 17 deletions python/cudf/cudf/_lib/filling.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import numpy as np

Expand All @@ -15,7 +15,11 @@ from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport size_type
from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
from cudf._lib.utils cimport (
columns_from_unique_ptr,
data_from_unique_ptr,
table_view_from_columns,
)


def fill_in_place(Column destination, int begin, int end, DeviceScalar value):
Expand Down Expand Up @@ -50,15 +54,15 @@ def fill(Column destination, int begin, int end, DeviceScalar value):
return Column.from_unique_ptr(move(c_result))


def repeat(inp, object count, bool check_count=False):
def repeat(list inp, object count, bool check_count=False):
if isinstance(count, Column):
return _repeat_via_column(inp, count, check_count)
else:
return _repeat_via_size_type(inp, count)


def _repeat_via_column(inp, Column count, bool check_count):
cdef table_view c_inp = table_view_from_table(inp)
def _repeat_via_column(list inp, Column count, bool check_count):
cdef table_view c_inp = table_view_from_columns(inp)
cdef column_view c_count = count.view()
cdef bool c_check_count = check_count
cdef unique_ptr[table] c_result
Expand All @@ -70,15 +74,11 @@ def _repeat_via_column(inp, Column count, bool check_count):
c_check_count
))

return data_from_unique_ptr(
move(c_result),
column_names=inp._column_names,
index_names=inp._index_names
)
return columns_from_unique_ptr(move(c_result))


def _repeat_via_size_type(inp, size_type count):
cdef table_view c_inp = table_view_from_table(inp)
def _repeat_via_size_type(list inp, size_type count):
cdef table_view c_inp = table_view_from_columns(inp)
cdef unique_ptr[table] c_result

with nogil:
Expand All @@ -87,11 +87,7 @@ def _repeat_via_size_type(inp, size_type count):
count
))

return data_from_unique_ptr(
move(c_result),
column_names=inp._column_names,
index_names=inp._index_names
)
return columns_from_unique_ptr(move(c_result))


def sequence(int size, DeviceScalar init, DeviceScalar step):
Expand Down
31 changes: 31 additions & 0 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,37 @@ def _apply_boolean_mask(self, boolean_mask):
column_names=self._column_names,
)

def repeat(self, repeats, axis=None):
"""Repeat elements of a Index.

Returns a new Index where each element of the current Index is repeated
consecutively a given number of times.

Parameters
----------
repeats : int, or array of ints
The number of repetitions for each element. This should
be a non-negative integer. Repeating 0 times will return
an empty object.

Returns
-------
Index
A newly created object of same type as caller with repeated
elements.

Examples
--------
>>> index = cudf.Index([10, 22, 33, 55])
>>> index
Int64Index([10, 22, 33, 55], dtype='int64')
>>> index.repeat(5)
Int64Index([10, 10, 10, 10, 10, 22, 22, 22, 22, 22, 33,
33, 33, 33, 33, 55, 55, 55, 55, 55],
dtype='int64')
"""
raise NotImplementedError

def _split_columns_by_levels(self, levels):
if isinstance(levels, int) and levels > 0:
raise ValueError(f"Out of bound level: {levels}")
Expand Down
104 changes: 14 additions & 90 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1548,96 +1548,6 @@ def rank(

return self._from_data(data, index).astype(np.float64)

@_cudf_nvtx_annotate
def repeat(self, repeats, axis=None):
"""Repeats elements consecutively.

Returns a new object of caller type(DataFrame/Series/Index) where each
element of the current object is repeated consecutively a given
number of times.

Parameters
----------
repeats : int, or array of ints
The number of repetitions for each element. This should
be a non-negative integer. Repeating 0 times will return
an empty object.

Returns
-------
Series/DataFrame/Index
A newly created object of same type as caller
with repeated elements.

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3], 'b': [10, 20, 30]})
>>> df
a b
0 1 10
1 2 20
2 3 30
>>> df.repeat(3)
a b
0 1 10
0 1 10
0 1 10
1 2 20
1 2 20
1 2 20
2 3 30
2 3 30
2 3 30

Repeat on Series

>>> s = cudf.Series([0, 2])
>>> s
0 0
1 2
dtype: int64
>>> s.repeat([3, 4])
0 0
0 0
0 0
1 2
1 2
1 2
1 2
dtype: int64
>>> s.repeat(2)
0 0
0 0
1 2
1 2
dtype: int64

Repeat on Index

>>> index = cudf.Index([10, 22, 33, 55])
>>> index
Int64Index([10, 22, 33, 55], dtype='int64')
>>> index.repeat(5)
Int64Index([10, 10, 10, 10, 10, 22, 22, 22, 22, 22, 33,
33, 33, 33, 33, 55, 55, 55, 55, 55],
dtype='int64')
"""
if axis is not None:
raise NotImplementedError(
"Only axis=`None` supported at this time."
)

if not is_scalar(repeats):
repeats = as_column(repeats)

result = self.__class__._from_data(
*libcudf.filling.repeat(self, repeats)
)

result._copy_type_metadata(self)
return result

@_cudf_nvtx_annotate
def shift(self, periods=1, freq=None, axis=0, fill_value=None):
"""Shift values by `periods` positions."""
Expand Down Expand Up @@ -6347,6 +6257,20 @@ def nunique(self, dropna: bool = True):
for name, col in self._data.items()
}

@staticmethod
def _repeat(
columns: List[ColumnBase], repeats, axis=None
) -> List[ColumnBase]:
if axis is not None:
raise NotImplementedError(
"Only axis=`None` supported at this time."
)

if not is_scalar(repeats):
repeats = as_column(repeats)

return libcudf.filling.repeat(columns, repeats)


@_cudf_nvtx_annotate
def _get_replacement_values_for_columns(
Expand Down
8 changes: 8 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,9 @@ def _apply_boolean_mask(self, boolean_mask):
[self._values.apply_boolean_mask(boolean_mask)], [self.name]
)

def repeat(self, repeats, axis=None):
return self._as_int64().repeat(repeats, axis)

def _split(self, splits):
return Int64Index._from_columns(
[self._values.columns_split(splits)], [self.name]
Expand Down Expand Up @@ -1264,6 +1267,11 @@ def argsort(
na_position=na_position,
)

def repeat(self, repeats, axis=None):
return self._from_columns_like_self(
Frame._repeat([*self._columns], repeats, axis), self._column_names
)


class NumericIndex(GenericIndex):
"""Immutable, ordered and sliceable sequence of labels.
Expand Down
73 changes: 73 additions & 0 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2027,6 +2027,79 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):

return NotImplemented

@_cudf_nvtx_annotate
def repeat(self, repeats, axis=None):
"""Repeats elements consecutively.

Returns a new object of caller type(DataFrame/Series) where each
element of the current object is repeated consecutively a given
number of times.

Parameters
----------
repeats : int, or array of ints
The number of repetitions for each element. This should
be a non-negative integer. Repeating 0 times will return
an empty object.

Returns
-------
Series/DataFrame
A newly created object of same type as caller
with repeated elements.

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3], 'b': [10, 20, 30]})
>>> df
a b
0 1 10
1 2 20
2 3 30
>>> df.repeat(3)
a b
0 1 10
0 1 10
0 1 10
1 2 20
1 2 20
1 2 20
2 3 30
2 3 30
2 3 30

Repeat on Series

>>> s = cudf.Series([0, 2])
>>> s
0 0
1 2
dtype: int64
>>> s.repeat([3, 4])
0 0
0 0
0 0
1 2
1 2
1 2
1 2
dtype: int64
>>> s.repeat(2)
0 0
0 0
1 2
1 2
dtype: int64
"""
return self._from_columns_like_self(
Frame._repeat(
[*self._index._data.columns, *self._columns], repeats, axis
),
self._column_names,
self._index_names,
)

def _append(
self, other, ignore_index=False, verify_integrity=False, sort=None
):
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1830,3 +1830,8 @@ def _split_columns_by_levels(self, levels):
index_columns.append(col)
index_names.append(name)
return data_columns, index_columns, data_names, index_names

def repeat(self, repeats, axis=None):
return self._from_columns_like_self(
Frame._repeat([*self._columns], repeats, axis), self._column_names
)