From 5e4064c7d732703a27f9dc1bc8f207f91747c9a1 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 5 May 2021 11:48:49 -0700 Subject: [PATCH 1/9] Move monotonicity methods to SingleColumnFrame. --- python/cudf/cudf/core/frame.py | 32 ++++++++++++++++++++++++++++++++ python/cudf/cudf/core/index.py | 25 +------------------------ python/cudf/cudf/core/series.py | 33 --------------------------------- 3 files changed, 33 insertions(+), 57 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 25552009444..86e331048dd 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3529,6 +3529,38 @@ def to_arrow(self): """ return self._column.to_arrow() + @property + def is_monotonic(self): + """Return boolean if values in the object are monotonic_increasing. + + This property is an alias for :attr:`is_monotonic_increasing`. + + Returns + ------- + bool + """ + return self.is_monotonic_increasing + + @property + def is_monotonic_increasing(self): + """Return boolean if values in the object are monotonic_increasing. + + Returns + ------- + bool + """ + return self._column.is_monotonic_increasing + + @property + def is_monotonic_decreasing(self): + """Return boolean if values in the object are monotonic_decreasing. + + Returns + ------- + bool + """ + return self._column.is_monotonic_decreasing + @property def _copy_construct_defaults(self): """A default dictionary of kwargs to be used for copy construction.""" diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 2846dc241db..db595dfe63c 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1148,30 +1148,7 @@ def is_unique(self): """ Return if the index has unique values. """ - raise (NotImplementedError) - - @property - def is_monotonic(self): - """ - Alias for is_monotonic_increasing. - """ - return self.is_monotonic_increasing - - @property - def is_monotonic_increasing(self): - """ - Return if the index is monotonic increasing - (only equal or increasing) values. - """ - return self._values.is_monotonic_increasing - - @property - def is_monotonic_decreasing(self): - """ - Return if the index is monotonic decreasing - (only equal or decreasing) values. - """ - return self._values.is_monotonic_decreasing + raise NotImplementedError @property def empty(self): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 7b1e6454394..385f50c75fa 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5945,39 +5945,6 @@ def is_unique(self): """ return self._column.is_unique - @property - def is_monotonic(self): - """ - Return boolean if values in the object are monotonic_increasing. - - Returns - ------- - out : bool - """ - return self._column.is_monotonic_increasing - - @property - def is_monotonic_increasing(self): - """ - Return boolean if values in the object are monotonic_increasing. - - Returns - ------- - out : bool - """ - return self._column.is_monotonic_increasing - - @property - def is_monotonic_decreasing(self): - """ - Return boolean if values in the object are monotonic_decreasing. - - Returns - ------- - out : bool - """ - return self._column.is_monotonic_decreasing - @property def __cuda_array_interface__(self): return self._column.__cuda_array_interface__ From b75842f60052b0b15f52ae627c5f731523240d08 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 5 May 2021 11:53:22 -0700 Subject: [PATCH 2/9] Move is_unique to SingleColumnFrame. --- python/cudf/cudf/core/frame.py | 10 ++++++++++ python/cudf/cudf/core/index.py | 14 -------------- python/cudf/cudf/core/series.py | 11 ----------- 3 files changed, 10 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 86e331048dd..a098525000c 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3529,6 +3529,16 @@ def to_arrow(self): """ return self._column.to_arrow() + @property + def is_unique(self): + """Return boolean if values in the object are unique. + + Returns + ------- + bool + """ + return self._column.is_unique + @property def is_monotonic(self): """Return boolean if values in the object are monotonic_increasing. diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index db595dfe63c..b0193308c22 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1143,13 +1143,6 @@ def to_series(self, index=None, name=None): name=self.name if name is None else name, ) - @property - def is_unique(self): - """ - Return if the index has unique values. - """ - raise NotImplementedError - @property def empty(self): """ @@ -1966,13 +1959,6 @@ def find_label_range(self, first, last): end += 1 return begin, end - @property - def is_unique(self): - """ - Return if the index has unique values. - """ - return self._values.is_unique - def get_slice_bound(self, label, side, kind): return self._values.get_slice_bound(label, side, kind) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 385f50c75fa..aaac970820b 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5934,17 +5934,6 @@ def rename(self, index=None, copy=True): return out.copy(deep=copy) - @property - def is_unique(self): - """ - Return boolean if values in the object are unique. - - Returns - ------- - out : bool - """ - return self._column.is_unique - @property def __cuda_array_interface__(self): return self._column.__cuda_array_interface__ From 7d7aac969e780cbdd2f61feb04183ed6dd9378ce Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 5 May 2021 11:57:19 -0700 Subject: [PATCH 3/9] Move __cuda_array_interface__ to SingleColumnFrame. --- python/cudf/cudf/core/frame.py | 4 ++++ python/cudf/cudf/core/index.py | 14 +------------- python/cudf/cudf/core/series.py | 4 ---- 3 files changed, 5 insertions(+), 17 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index a098525000c..5a92fe7e597 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3571,6 +3571,10 @@ def is_monotonic_decreasing(self): """ return self._column.is_monotonic_decreasing + @property + def __cuda_array_interface__(self): + return self._column.__cuda_array_interface__ + @property def _copy_construct_defaults(self): """A default dictionary of kwargs to be used for copy construction.""" diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index b0193308c22..ac8b5a9ca9e 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1281,10 +1281,6 @@ def where(self, cond, other=None): """ return super().where(cond=cond, other=other) - @property - def __cuda_array_interface__(self): - raise (NotImplementedError) - def memory_usage(self, deep=False): """ Memory usage of the values. @@ -1742,10 +1738,6 @@ def get_slice_bound(self, label, side, kind=None): pos = search_range(start, stop, label, step, side=side) return pos - @property - def __cuda_array_interface__(self): - return self._values.__cuda_array_interface__ - def memory_usage(self, **kwargs): return 0 @@ -1807,7 +1799,7 @@ def _initialize(self, values, **kwargs): @property def _values(self): - return next(iter(self._data.columns)) + return self._column def copy(self, name=None, deep=False, dtype=None, names=None): """ @@ -1962,10 +1954,6 @@ def find_label_range(self, first, last): def get_slice_bound(self, label, side, kind): return self._values.get_slice_bound(label, side, kind) - @property - def __cuda_array_interface__(self): - return self._values.__cuda_array_interface__ - class NumericIndex(GenericIndex): """Immutable, ordered and sliceable sequence of labels. diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index aaac970820b..16cb8512365 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -5934,10 +5934,6 @@ def rename(self, index=None, copy=True): return out.copy(deep=copy) - @property - def __cuda_array_interface__(self): - return self._column.__cuda_array_interface__ - def _align_to_index( self, index, how="outer", sort=True, allow_non_unique=False ): From f3bcb1062a3d875d5cbb62b1227c0c2fec483344 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 5 May 2021 12:01:22 -0700 Subject: [PATCH 4/9] Move factorize to SingleColumnFrame. --- python/cudf/cudf/core/frame.py | 27 +++++++++++++++++++++++++++ python/cudf/cudf/core/index.py | 11 ----------- python/cudf/cudf/core/series.py | 26 -------------------------- 3 files changed, 27 insertions(+), 37 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 5a92fe7e597..f092a6f05c0 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3575,6 +3575,33 @@ def is_monotonic_decreasing(self): def __cuda_array_interface__(self): return self._column.__cuda_array_interface__ + def factorize(self, na_sentinel=-1): + """Encode the input values as integer labels + + Parameters + ---------- + na_sentinel : number + Value to indicate missing category. + + Returns + -------- + (labels, cats) : (cupy.ndarray, cupy.ndarray or Index) + - *labels* contains the encoded values + - *cats* contains the categories in order that the N-th + item corresponds to the (N-1) code. + + Examples + -------- + >>> import cudf + >>> s = cudf.Series(['a', 'a', 'c']) + >>> codes, uniques = s.factorize() + >>> codes + array([0, 0, 1], dtype=int8) + >>> uniques + StringIndex(['a' 'c'], dtype='object') + """ + return cudf.core.algorithms.factorize(self, na_sentinel=na_sentinel) + @property def _copy_construct_defaults(self): """A default dictionary of kwargs to be used for copy construction.""" diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index ac8b5a9ca9e..3972238e898 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -335,17 +335,6 @@ def _clean_nulls_from_index(self): else: return self - def factorize(self, na_sentinel=-1): - """ - Encode the input values as integer labels - - See Also - -------- - cudf.core.series.Series.factorize : Encode the input values of Series. - - """ - return cudf.core.algorithms.factorize(self, na_sentinel=na_sentinel) - @property def nlevels(self): """ diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 16cb8512365..0ca6454f49c 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -3860,32 +3860,6 @@ def _return_sentinel_series(): return codes._copy_construct(name=None, index=self.index) - def factorize(self, na_sentinel=-1): - """Encode the input values as integer labels - - Parameters - ---------- - na_sentinel : number - Value to indicate missing category. - - Returns - -------- - (labels, cats) : (cupy.ndarray, cupy.ndarray or Index) - - *labels* contains the encoded values - - *cats* contains the categories in order that the N-th - item corresponds to the (N-1) code. - - Examples - -------- - >>> import cudf - >>> s = cudf.Series(['a', 'a', 'c']) - >>> codes - array([0, 0, 1], dtype=int8) - >>> uniques - StringIndex(['a' 'c'], dtype='object') - """ - return cudf.core.algorithms.factorize(self, na_sentinel=na_sentinel) - # UDF related def applymap(self, udf, out_dtype=None): From 6baa63e227bb5b554a0f9639aa6706f15132ee77 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 14 May 2021 12:06:44 -0700 Subject: [PATCH 5/9] Alias product as prod rather than redefining. --- python/cudf/cudf/core/series.py | 55 +-------------------------------- 1 file changed, 1 insertion(+), 54 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 0ca6454f49c..d9685521c63 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -4227,60 +4227,7 @@ def product( skipna=skipna, dtype=dtype, min_count=min_count ) - def prod( - self, - axis=None, - skipna=None, - dtype=None, - level=None, - numeric_only=None, - min_count=0, - **kwargs, - ): - """ - Return product of the values in the series - - Parameters - ---------- - - skipna : bool, default True - Exclude NA/null values when computing the result. - - dtype : data type - Data type to cast the result to. - - min_count : int, default 0 - The required number of valid values to perform the operation. - If fewer than min_count non-NA values are present the result - will be NA. - - The default being 0. This means the sum of an all-NA or empty - Series is 0, and the product of an all-NA or empty Series is 1. - - Returns - ------- - scalar - - Notes - ----- - Parameters currently not supported are `axis`, `level`, `numeric_only`. - - Examples - -------- - >>> import cudf - >>> ser = cudf.Series([1, 5, 2, 4, 3]) - >>> ser.prod() - 120 - """ - return self.product( - axis=axis, - skipna=skipna, - dtype=dtype, - level=level, - numeric_only=numeric_only, - min_count=min_count, - **kwargs, - ) + prod = product def cummin(self, axis=None, skipna=True, *args, **kwargs): """ From c1ff6ae51949f84d8879b517d2844756eedf4f47 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 14 May 2021 12:26:42 -0700 Subject: [PATCH 6/9] Delete more redundant properties and make _num_columns a constant time op for SingleColumnFrame. --- python/cudf/cudf/core/frame.py | 6 +++- python/cudf/cudf/core/index.py | 57 ---------------------------------- 2 files changed, 5 insertions(+), 58 deletions(-) diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index f092a6f05c0..f59954aaf08 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -28,8 +28,8 @@ from cudf.utils.dtypes import ( is_categorical_dtype, is_column_like, - is_numerical_dtype, is_decimal_dtype, + is_numerical_dtype, is_scalar, min_scalar_type, ) @@ -3364,6 +3364,10 @@ def __bool__(self): "a.empty, a.bool(), a.item(), a.any() or a.all()." ) + @property + def _num_columns(self): + return 1 + @property def _column(self): return self._data[self.name] diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 3972238e898..a7d5013c70f 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1132,29 +1132,6 @@ def to_series(self, index=None, name=None): name=self.name if name is None else name, ) - @property - def empty(self): - """ - Indicator whether Index is empty. - - True if Index is entirely empty (no items). - - Returns - ------- - out : bool - If Index is empty, return True, if not return False. - - Examples - -------- - >>> import cudf - >>> index = cudf.Index([]) - >>> index - Float64Index([], dtype='float64') - >>> index.empty - True - """ - return not self.size - def get_slice_bound(self, label, side, kind): """ Calculate slice bound that corresponds to given label. @@ -1240,36 +1217,6 @@ def isin(self, values): return result - def where(self, cond, other=None): - """ - Replace values where the condition is False. - - Parameters - ---------- - cond : bool array-like with the same length as self - Where cond is True, keep the original value. - Where False, replace with corresponding value from other. - Callables are not supported. - other: scalar, or array-like - Entries where cond is False are replaced with - corresponding value from other. Callables are not - supported. Default is None. - - Returns - ------- - Same type as caller - - Examples - -------- - >>> import cudf - >>> index = cudf.Index([4, 3, 2, 1, 0]) - >>> index - Int64Index([4, 3, 2, 1, 0], dtype='int64') - >>> index.where(index > 2, 15) - Int64Index([4, 3, 15, 15, 15], dtype='int64') - """ - return super().where(cond=cond, other=other) - def memory_usage(self, deep=False): """ Memory usage of the values. @@ -1462,10 +1409,6 @@ def step(self): """ return self._step - @property - def _num_columns(self): - return 1 - @property def _num_rows(self): return len(self) From 7aa9c4c689eba5bb8034bdb7b5ceea9ccebadc93 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 14 May 2021 12:32:09 -0700 Subject: [PATCH 7/9] Simplify implementation of isin for Index. --- python/cudf/cudf/core/index.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index a7d5013c70f..bc5900999ae 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1213,9 +1213,7 @@ def isin(self, values): array([ True, False, False]) """ - result = self.to_series().isin(values).values - - return result + return self._values.isin(values).values def memory_usage(self, deep=False): """ From 2e847c7f5039a258d077c5ea5ef3611ad4b33c1b Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 14 May 2021 12:47:01 -0700 Subject: [PATCH 8/9] Fix _num_columns for MultiIndex. --- python/cudf/cudf/core/multiindex.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index ca029198e52..e0a895f28cc 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -204,6 +204,11 @@ def names(self, value): ) self._names = pd.core.indexes.frozen.FrozenList(value) + @property + def _num_columns(self): + # MultiIndex is not a single-columned frame. + return super(SingleColumnFrame, self)._num_columns + def rename(self, names, inplace=False): """ Alter MultiIndex level names From ea9331f4def10b5394cb60bbbfc28da7e7fb00de Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 14 May 2021 12:58:52 -0700 Subject: [PATCH 9/9] Put back where since it's there to remove the inplace parameter. --- python/cudf/cudf/core/index.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index bc5900999ae..94ddeb72028 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1215,6 +1215,36 @@ def isin(self, values): return self._values.isin(values).values + def where(self, cond, other=None): + """ + Replace values where the condition is False. + + Parameters + ---------- + cond : bool array-like with the same length as self + Where cond is True, keep the original value. + Where False, replace with corresponding value from other. + Callables are not supported. + other: scalar, or array-like + Entries where cond is False are replaced with + corresponding value from other. Callables are not + supported. Default is None. + + Returns + ------- + Same type as caller + + Examples + -------- + >>> import cudf + >>> index = cudf.Index([4, 3, 2, 1, 0]) + >>> index + Int64Index([4, 3, 2, 1, 0], dtype='int64') + >>> index.where(index > 2, 15) + Int64Index([4, 3, 15, 15, 15], dtype='int64') + """ + return super().where(cond=cond, other=other) + def memory_usage(self, deep=False): """ Memory usage of the values.