diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx index fdc7a9c39ff..7de63def6a6 100644 --- a/python/cudf/cudf/_lib/filling.pyx +++ b/python/cudf/cudf/_lib/filling.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import numpy as np @@ -15,7 +15,11 @@ from cudf._lib.cpp.table.table cimport table from cudf._lib.cpp.table.table_view cimport table_view from cudf._lib.cpp.types cimport size_type from cudf._lib.scalar cimport DeviceScalar -from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table +from cudf._lib.utils cimport ( + columns_from_unique_ptr, + data_from_unique_ptr, + table_view_from_columns, +) def fill_in_place(Column destination, int begin, int end, DeviceScalar value): @@ -50,15 +54,15 @@ def fill(Column destination, int begin, int end, DeviceScalar value): return Column.from_unique_ptr(move(c_result)) -def repeat(inp, object count, bool check_count=False): +def repeat(list inp, object count, bool check_count=False): if isinstance(count, Column): return _repeat_via_column(inp, count, check_count) else: return _repeat_via_size_type(inp, count) -def _repeat_via_column(inp, Column count, bool check_count): - cdef table_view c_inp = table_view_from_table(inp) +def _repeat_via_column(list inp, Column count, bool check_count): + cdef table_view c_inp = table_view_from_columns(inp) cdef column_view c_count = count.view() cdef bool c_check_count = check_count cdef unique_ptr[table] c_result @@ -70,15 +74,11 @@ def _repeat_via_column(inp, Column count, bool check_count): c_check_count )) - return data_from_unique_ptr( - move(c_result), - column_names=inp._column_names, - index_names=inp._index_names - ) + return columns_from_unique_ptr(move(c_result)) -def _repeat_via_size_type(inp, size_type count): - cdef table_view c_inp = table_view_from_table(inp) +def _repeat_via_size_type(list inp, size_type count): + cdef table_view c_inp = table_view_from_columns(inp) cdef unique_ptr[table] c_result with nogil: @@ -87,11 +87,7 @@ def _repeat_via_size_type(inp, size_type count): count )) - return data_from_unique_ptr( - move(c_result), - column_names=inp._column_names, - index_names=inp._index_names - ) + return columns_from_unique_ptr(move(c_result)) def sequence(int size, DeviceScalar init, DeviceScalar step): diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 60f739cff8b..4e09d3868f5 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -1512,6 +1512,37 @@ def _apply_boolean_mask(self, boolean_mask): column_names=self._column_names, ) + def repeat(self, repeats, axis=None): + """Repeat elements of a Index. + + Returns a new Index where each element of the current Index is repeated + consecutively a given number of times. + + Parameters + ---------- + repeats : int, or array of ints + The number of repetitions for each element. This should + be a non-negative integer. Repeating 0 times will return + an empty object. + + Returns + ------- + Index + A newly created object of same type as caller with repeated + elements. + + Examples + -------- + >>> index = cudf.Index([10, 22, 33, 55]) + >>> index + Int64Index([10, 22, 33, 55], dtype='int64') + >>> index.repeat(5) + Int64Index([10, 10, 10, 10, 10, 22, 22, 22, 22, 22, 33, + 33, 33, 33, 33, 55, 55, 55, 55, 55], + dtype='int64') + """ + raise NotImplementedError + def _split_columns_by_levels(self, levels): if isinstance(levels, int) and levels > 0: raise ValueError(f"Out of bound level: {levels}") diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 84b3bc03fbf..a847c0b5d3b 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1547,96 +1547,6 @@ def rank( return self._from_data(data, index).astype(np.float64) - @_cudf_nvtx_annotate - def repeat(self, repeats, axis=None): - """Repeats elements consecutively. - - Returns a new object of caller type(DataFrame/Series/Index) where each - element of the current object is repeated consecutively a given - number of times. - - Parameters - ---------- - repeats : int, or array of ints - The number of repetitions for each element. This should - be a non-negative integer. Repeating 0 times will return - an empty object. - - Returns - ------- - Series/DataFrame/Index - A newly created object of same type as caller - with repeated elements. - - Examples - -------- - >>> import cudf - >>> df = cudf.DataFrame({'a': [1, 2, 3], 'b': [10, 20, 30]}) - >>> df - a b - 0 1 10 - 1 2 20 - 2 3 30 - >>> df.repeat(3) - a b - 0 1 10 - 0 1 10 - 0 1 10 - 1 2 20 - 1 2 20 - 1 2 20 - 2 3 30 - 2 3 30 - 2 3 30 - - Repeat on Series - - >>> s = cudf.Series([0, 2]) - >>> s - 0 0 - 1 2 - dtype: int64 - >>> s.repeat([3, 4]) - 0 0 - 0 0 - 0 0 - 1 2 - 1 2 - 1 2 - 1 2 - dtype: int64 - >>> s.repeat(2) - 0 0 - 0 0 - 1 2 - 1 2 - dtype: int64 - - Repeat on Index - - >>> index = cudf.Index([10, 22, 33, 55]) - >>> index - Int64Index([10, 22, 33, 55], dtype='int64') - >>> index.repeat(5) - Int64Index([10, 10, 10, 10, 10, 22, 22, 22, 22, 22, 33, - 33, 33, 33, 33, 55, 55, 55, 55, 55], - dtype='int64') - """ - if axis is not None: - raise NotImplementedError( - "Only axis=`None` supported at this time." - ) - - if not is_scalar(repeats): - repeats = as_column(repeats) - - result = self.__class__._from_data( - *libcudf.filling.repeat(self, repeats) - ) - - result._copy_type_metadata(self) - return result - @_cudf_nvtx_annotate def shift(self, periods=1, freq=None, axis=0, fill_value=None): """Shift values by `periods` positions.""" @@ -6260,6 +6170,20 @@ def nunique(self, dropna: bool = True): for name, col in self._data.items() } + @staticmethod + def _repeat( + columns: List[ColumnBase], repeats, axis=None + ) -> List[ColumnBase]: + if axis is not None: + raise NotImplementedError( + "Only axis=`None` supported at this time." + ) + + if not is_scalar(repeats): + repeats = as_column(repeats) + + return libcudf.filling.repeat(columns, repeats) + @_cudf_nvtx_annotate def _get_replacement_values_for_columns( diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 1c68289898f..586401de150 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -743,6 +743,9 @@ def _apply_boolean_mask(self, boolean_mask): [self._values.apply_boolean_mask(boolean_mask)], [self.name] ) + def repeat(self, repeats, axis=None): + return self._as_int64().repeat(repeats, axis) + def _split(self, splits): return Int64Index._from_columns( [self._values.columns_split(splits)], [self.name] @@ -1264,6 +1267,11 @@ def argsort( na_position=na_position, ) + def repeat(self, repeats, axis=None): + return self._from_columns_like_self( + Frame._repeat([*self._columns], repeats, axis), self._column_names + ) + class NumericIndex(GenericIndex): """Immutable, ordered and sliceable sequence of labels. diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 3fa951241f7..f527f7a1e1f 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -2027,6 +2027,79 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): return NotImplemented + @_cudf_nvtx_annotate + def repeat(self, repeats, axis=None): + """Repeats elements consecutively. + + Returns a new object of caller type(DataFrame/Series) where each + element of the current object is repeated consecutively a given + number of times. + + Parameters + ---------- + repeats : int, or array of ints + The number of repetitions for each element. This should + be a non-negative integer. Repeating 0 times will return + an empty object. + + Returns + ------- + Series/DataFrame + A newly created object of same type as caller + with repeated elements. + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame({'a': [1, 2, 3], 'b': [10, 20, 30]}) + >>> df + a b + 0 1 10 + 1 2 20 + 2 3 30 + >>> df.repeat(3) + a b + 0 1 10 + 0 1 10 + 0 1 10 + 1 2 20 + 1 2 20 + 1 2 20 + 2 3 30 + 2 3 30 + 2 3 30 + + Repeat on Series + + >>> s = cudf.Series([0, 2]) + >>> s + 0 0 + 1 2 + dtype: int64 + >>> s.repeat([3, 4]) + 0 0 + 0 0 + 0 0 + 1 2 + 1 2 + 1 2 + 1 2 + dtype: int64 + >>> s.repeat(2) + 0 0 + 0 0 + 1 2 + 1 2 + dtype: int64 + """ + return self._from_columns_like_self( + Frame._repeat( + [*self._index._data.columns, *self._columns], repeats, axis + ), + self._column_names, + self._index_names, + ) + def _append( self, other, ignore_index=False, verify_integrity=False, sort=None ): diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index c9036db05fa..d94c2ae3e93 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -1830,3 +1830,8 @@ def _split_columns_by_levels(self, levels): index_columns.append(col) index_names.append(name) return data_columns, index_columns, data_names, index_names + + def repeat(self, repeats, axis=None): + return self._from_columns_like_self( + Frame._repeat([*self._columns], repeats, axis), self._column_names + )