Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor filling.repeat API #10371

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 13 additions & 17 deletions python/cudf/cudf/_lib/filling.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.

import numpy as np

Expand All @@ -15,7 +15,11 @@ from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport size_type
from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
from cudf._lib.utils cimport (
columns_from_unique_ptr,
data_from_unique_ptr,
table_view_from_columns,
)


def fill_in_place(Column destination, int begin, int end, DeviceScalar value):
Expand Down Expand Up @@ -50,15 +54,15 @@ def fill(Column destination, int begin, int end, DeviceScalar value):
return Column.from_unique_ptr(move(c_result))


def repeat(inp, object count, bool check_count=False):
def repeat(list inp, object count, bool check_count=False):
if isinstance(count, Column):
return _repeat_via_column(inp, count, check_count)
else:
return _repeat_via_size_type(inp, count)


def _repeat_via_column(inp, Column count, bool check_count):
cdef table_view c_inp = table_view_from_table(inp)
def _repeat_via_column(list inp, Column count, bool check_count):
cdef table_view c_inp = table_view_from_columns(inp)
cdef column_view c_count = count.view()
cdef bool c_check_count = check_count
cdef unique_ptr[table] c_result
Expand All @@ -70,15 +74,11 @@ def _repeat_via_column(inp, Column count, bool check_count):
c_check_count
))

return data_from_unique_ptr(
move(c_result),
column_names=inp._column_names,
index_names=inp._index_names
)
return columns_from_unique_ptr(move(c_result))


def _repeat_via_size_type(inp, size_type count):
cdef table_view c_inp = table_view_from_table(inp)
def _repeat_via_size_type(list inp, size_type count):
cdef table_view c_inp = table_view_from_columns(inp)
cdef unique_ptr[table] c_result

with nogil:
Expand All @@ -87,11 +87,7 @@ def _repeat_via_size_type(inp, size_type count):
count
))

return data_from_unique_ptr(
move(c_result),
column_names=inp._column_names,
index_names=inp._index_names
)
return columns_from_unique_ptr(move(c_result))


def sequence(int size, DeviceScalar init, DeviceScalar step):
Expand Down
31 changes: 31 additions & 0 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,37 @@ def _apply_boolean_mask(self, boolean_mask):
column_names=self._column_names,
)

def repeat(self, repeats, axis=None):
"""Repeat elements of a Index.

Returns a new Index where each element of the current Index is repeated
consecutively a given number of times.

Parameters
----------
repeats : int, or array of ints
The number of repetitions for each element. This should
be a non-negative integer. Repeating 0 times will return
an empty object.

Returns
-------
Index
A newly created object of same type as caller with repeated
elements.

Examples
--------
>>> index = cudf.Index([10, 22, 33, 55])
>>> index
Int64Index([10, 22, 33, 55], dtype='int64')
>>> index.repeat(5)
Int64Index([10, 10, 10, 10, 10, 22, 22, 22, 22, 22, 33,
33, 33, 33, 33, 55, 55, 55, 55, 55],
dtype='int64')
"""
raise NotImplementedError()
isVoid marked this conversation as resolved.
Show resolved Hide resolved

def _split_columns_by_levels(self, levels):
if isinstance(levels, int) and levels > 0:
raise ValueError(f"Out of bound level: {levels}")
Expand Down
85 changes: 4 additions & 81 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1550,81 +1550,9 @@ def rank(

return self._from_data(data, index).astype(np.float64)

@annotate("FRAME_REPEAT", color="green", domain="cudf_python")
def repeat(self, repeats, axis=None):
"""Repeats elements consecutively.

Returns a new object of caller type(DataFrame/Series/Index) where each
element of the current object is repeated consecutively a given
number of times.

Parameters
----------
repeats : int, or array of ints
The number of repetitions for each element. This should
be a non-negative integer. Repeating 0 times will return
an empty object.

Returns
-------
Series/DataFrame/Index
A newly created object of same type as caller
with repeated elements.

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3], 'b': [10, 20, 30]})
>>> df
a b
0 1 10
1 2 20
2 3 30
>>> df.repeat(3)
a b
0 1 10
0 1 10
0 1 10
1 2 20
1 2 20
1 2 20
2 3 30
2 3 30
2 3 30

Repeat on Series

>>> s = cudf.Series([0, 2])
>>> s
0 0
1 2
dtype: int64
>>> s.repeat([3, 4])
0 0
0 0
0 0
1 2
1 2
1 2
1 2
dtype: int64
>>> s.repeat(2)
0 0
0 0
1 2
1 2
dtype: int64

Repeat on Index

>>> index = cudf.Index([10, 22, 33, 55])
>>> index
Int64Index([10, 22, 33, 55], dtype='int64')
>>> index.repeat(5)
Int64Index([10, 10, 10, 10, 10, 22, 22, 22, 22, 22, 33,
33, 33, 33, 33, 55, 55, 55, 55, 55],
dtype='int64')
"""
def _repeat(
self, columns: List[ColumnBase], repeats, axis=None
) -> List[ColumnBase]:
if axis is not None:
raise NotImplementedError(
"Only axis=`None` supported at this time."
Expand All @@ -1633,12 +1561,7 @@ def repeat(self, repeats, axis=None):
if not is_scalar(repeats):
repeats = as_column(repeats)

result = self.__class__._from_data(
*libcudf.filling.repeat(self, repeats)
)

result._copy_type_metadata(self)
return result
return libcudf.filling.repeat(columns, repeats)

@annotate("FRAME_SHIFT", color="green", domain="cudf_python")
def shift(self, periods=1, freq=None, axis=0, fill_value=None):
Expand Down
8 changes: 8 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,9 @@ def _apply_boolean_mask(self, boolean_mask):
[self._values.apply_boolean_mask(boolean_mask)], [self.name]
)

def repeat(self, repeats, axis=None):
return self._values.repeat(repeats, axis)


# Patch in all binops and unary ops, which bypass __getattr__ on the instance
# and prevent the above overload from working.
Expand Down Expand Up @@ -1225,6 +1228,11 @@ def argsort(
na_position=na_position,
)

def repeat(self, repeats, axis=None):
return self._from_columns_like_self(
self._repeat([*self._columns], repeats, axis), self._column_names
)


class NumericIndex(GenericIndex):
"""Immutable, ordered and sliceable sequence of labels.
Expand Down
73 changes: 73 additions & 0 deletions python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1778,6 +1778,79 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):

return NotImplemented

@annotate("FRAME_REPEAT", color="green", domain="cudf_python")
def repeat(self, repeats, axis=None):
"""Repeats elements consecutively.

Returns a new object of caller type(DataFrame/Series/Index) where each
isVoid marked this conversation as resolved.
Show resolved Hide resolved
element of the current object is repeated consecutively a given
number of times.

Parameters
----------
repeats : int, or array of ints
The number of repetitions for each element. This should
be a non-negative integer. Repeating 0 times will return
an empty object.

Returns
-------
Series/DataFrame
A newly created object of same type as caller
with repeated elements.

Examples
--------
>>> import cudf
>>> df = cudf.DataFrame({'a': [1, 2, 3], 'b': [10, 20, 30]})
>>> df
a b
0 1 10
1 2 20
2 3 30
>>> df.repeat(3)
a b
0 1 10
0 1 10
0 1 10
1 2 20
1 2 20
1 2 20
2 3 30
2 3 30
2 3 30

Repeat on Series

>>> s = cudf.Series([0, 2])
>>> s
0 0
1 2
dtype: int64
>>> s.repeat([3, 4])
0 0
0 0
0 0
1 2
1 2
1 2
1 2
dtype: int64
>>> s.repeat(2)
0 0
0 0
1 2
1 2
dtype: int64
"""
return self._from_columns_like_self(
self._repeat(
[*self._index._data.columns, *self._columns], repeats
),
self._column_names,
self._index_names,
)


def _check_duplicate_level_names(specified, level_names):
"""Raise if any of `specified` has duplicates in `level_names`."""
Expand Down
5 changes: 5 additions & 0 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1756,3 +1756,8 @@ def _split_columns_by_levels(self, levels):
index_columns.append(col)
index_names.append(name)
return data_columns, index_columns, data_names, index_names

def repeat(self, repeats, axis=None):
return self._from_columns_like_self(
self._repeat([*self._columns], repeats, axis), self._column_names
)