diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 05ff4acde09..5e290e21a88 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -642,11 +642,6 @@ def sort_values( ------- Series or Index with each list sorted - Notes - ----- - Difference from pandas: - * Not supporting: `inplace`, `kind` - Examples -------- >>> s = cudf.Series([[4, 2, None, 9], [8, 8, 2], [2, 1]]) @@ -655,6 +650,11 @@ def sort_values( 1 [2.0, 8.0, 8.0] 2 [1.0, 2.0] dtype: list + + .. pandas-compat:: + **list.ListMethods.sort_values** + + The ``inplace`` and ``kind`` arguments are currently not supported. """ if inplace: raise NotImplementedError("`inplace` not currently implemented.") diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 9c30585a541..79082779e0b 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -596,11 +596,6 @@ def extract( for each group. If `expand=False` and `pat` has only one capture group, then return a Series/Index. - Notes - ----- - The `flags` parameter currently only supports re.DOTALL and - re.MULTILINE. - Examples -------- >>> import cudf @@ -627,6 +622,12 @@ def extract( 1 2 2 dtype: object + + .. pandas-compat:: + **StringMethods.extract** + + The `flags` parameter currently only supports re.DOTALL and + re.MULTILINE. """ # noqa W605 if not _is_supported_regex_flags(flags): raise NotImplementedError( @@ -674,14 +675,6 @@ def contains( pattern is contained within the string of each element of the Series/Index. - Notes - ----- - The parameters `case` and `na` are not yet supported and will - raise a NotImplementedError if anything other than the default - value is set. - The `flags` parameter currently only supports re.DOTALL and - re.MULTILINE. - Examples -------- >>> import cudf @@ -755,6 +748,15 @@ def contains( 3 True 4 dtype: bool + + .. pandas-compat:: + **StringMethods.contains** + + The parameters `case` and `na` are not yet supported and will + raise a NotImplementedError if anything other than the default + value is set. + The `flags` parameter currently only supports re.DOTALL and + re.MULTILINE. """ # noqa W605 if case is not True: raise NotImplementedError("`case` parameter is not yet supported") @@ -946,12 +948,6 @@ def replace( A copy of the object with all matching occurrences of pat replaced by repl. - Notes - ----- - The parameters `case` and `flags` are not yet supported and will raise - a `NotImplementedError` if anything other than the default value - is set. - Examples -------- >>> import cudf @@ -981,6 +977,13 @@ def replace( 1 fuz 2 dtype: object + + .. pandas-compat:: + **StringMethods.replace** + + The parameters `case` and `flags` are not yet supported and will raise + a `NotImplementedError` if anything other than the default value + is set. """ if case is not None: raise NotImplementedError("`case` parameter is not yet supported") @@ -2767,11 +2770,6 @@ def partition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex: DataFrame or MultiIndex Returns a DataFrame / MultiIndex - Notes - ----- - The parameter `expand` is not yet supported and will raise a - `NotImplementedError` if anything other than the default value is set. - See Also -------- rpartition @@ -2813,6 +2811,12 @@ def partition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex: MultiIndex([('X', ' ', '123'), ('Y', ' ', '999')], ) + + .. pandas-compat:: + **StringMethods.partition** + + The parameter `expand` is not yet supported and will raise a + `NotImplementedError` if anything other than the default value is set. """ if expand is not True: raise NotImplementedError( @@ -3500,11 +3504,11 @@ def count(self, pat: str, flags: int = 0) -> SeriesOrIndex: Notes ----- - - `flags` parameter currently only supports re.DOTALL - and re.MULTILINE. - - Some characters need to be escaped when passing - in pat. e.g. ``'$'`` has a special meaning in regex - and must be escaped when finding this literal character. + - `flags` parameter currently only supports re.DOTALL + and re.MULTILINE. + - Some characters need to be escaped when passing + in pat. e.g. ``'$'`` has a special meaning in regex + and must be escaped when finding this literal character. Examples -------- @@ -3568,11 +3572,6 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex: All non-overlapping matches of pattern or regular expression in each string of this Series/Index. - Notes - ----- - The `flags` parameter currently only supports re.DOTALL and - re.MULTILINE. - Examples -------- >>> import cudf @@ -3613,6 +3612,12 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex: 1 [] 2 [b, b] dtype: list + + .. pandas-compat:: + **StringMethods.findall** + + The `flags` parameter currently only supports re.DOTALL and + re.MULTILINE. """ if isinstance(pat, re.Pattern): flags = pat.flags & ~re.U @@ -3795,11 +3800,6 @@ def endswith(self, pat: str) -> SeriesOrIndex: A Series of booleans indicating whether the given pattern matches the end of each string element. - Notes - ----- - `na` parameter is not yet supported, as cudf uses - native strings instead of Python objects. - Examples -------- >>> import cudf @@ -3816,6 +3816,12 @@ def endswith(self, pat: str) -> SeriesOrIndex: 2 False 3 dtype: bool + + .. pandas-compat:: + **StringMethods.endswith** + + `na` parameter is not yet supported, as cudf uses + native strings instead of Python objects. """ if pat is None: raise TypeError( @@ -4241,13 +4247,6 @@ def match( ------- Series or Index of boolean values. - Notes - ----- - Parameters `case` and `na` are currently not supported. - The `flags` parameter currently only supports re.DOTALL and - re.MULTILINE. - - Examples -------- >>> import cudf @@ -4268,6 +4267,13 @@ def match( 1 True 2 True dtype: bool + + .. pandas-compat:: + **StringMethods.match** + + Parameters `case` and `na` are currently not supported. + The `flags` parameter currently only supports re.DOTALL and + re.MULTILINE. """ if case is not True: raise NotImplementedError("`case` parameter is not yet supported") diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 1ebf59ba6e4..ab795f3a833 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3118,10 +3118,6 @@ def diff(self, periods=1, axis=0): DataFrame First differences of the DataFrame. - Notes - ----- - Diff currently only supports numeric dtype columns. - Examples -------- >>> import cudf @@ -3145,6 +3141,10 @@ def diff(self, periods=1, axis=0): 4 2 3 16 5 2 5 20 + .. pandas-compat:: + **DataFrame.diff** + + Diff currently only supports numeric dtype columns. """ if not is_integer(periods): if not (is_float(periods) and periods.is_integer()): @@ -3310,14 +3310,6 @@ def rename( ------- DataFrame - Notes - ----- - Difference from pandas: - * Not supporting: level - - Rename will not overwrite column names. If a list with duplicates is - passed, column names will be postfixed with a number. - Examples -------- >>> import cudf @@ -3343,6 +3335,14 @@ def rename( 10 1 4 20 2 5 30 3 6 + + .. pandas-compat:: + **DataFrame.rename** + + * Not Supporting: level + + Rename will not overwrite column names. If a list with duplicates is + passed, column names will be postfixed with a number. """ if errors != "ignore": raise NotImplementedError( @@ -3442,10 +3442,10 @@ def agg(self, aggs, axis=None): When ``DataFrame.agg`` is called with several aggs, ``DataFrame`` is returned. - Notes - ----- - Difference from pandas: - * Not supporting: ``axis``, ``*args``, ``**kwargs`` + .. pandas-compat:: + **DataFrame.agg** + + * Not supporting: ``axis``, ``*args``, ``**kwargs`` """ # TODO: Remove the typecasting below once issue #6846 is fixed @@ -3578,11 +3578,6 @@ def nlargest(self, n, columns, keep="first"): The first `n` rows ordered by the given columns in descending order. - Notes - ----- - Difference from pandas: - - Only a single column is supported in *columns* - Examples -------- >>> import cudf @@ -3617,6 +3612,11 @@ def nlargest(self, n, columns, keep="first"): France 65000000 2583560 FR Italy 59000000 1937894 IT Brunei 434000 12128 BN + + .. pandas-compat:: + **DataFrame.nlargest** + + - Only a single column is supported in *columns* """ return self._n_largest_or_smallest(True, n, columns, keep) @@ -3643,11 +3643,6 @@ def nsmallest(self, n, columns, keep="first"): ------- DataFrame - Notes - ----- - Difference from pandas: - - Only a single column is supported in *columns* - Examples -------- >>> import cudf @@ -3689,6 +3684,11 @@ def nsmallest(self, n, columns, keep="first"): Anguilla 11300 311 AI Tuvalu 11300 38 TV Nauru 337000 182 NR + + .. pandas-compat:: + **DataFrame.nsmallest** + + - Only a single column is supported in *columns* """ return self._n_largest_or_smallest(False, n, columns, keep) @@ -3766,10 +3766,10 @@ def transpose(self): ------- a new (ncol x nrow) dataframe. self is (nrow x ncol) - Notes - ----- - Difference from pandas: - Not supporting *copy* because default and only behavior is copy=True + .. pandas-compat:: + **DataFrame.transpose, DataFrame.T** + + Not supporting *copy* because default and only behavior is copy=True """ index = self._data.to_pandas_index() @@ -3921,10 +3921,6 @@ def merge( ------- merged : DataFrame - Notes - ----- - **DataFrames merges in cuDF result in non-deterministic row ordering.** - Examples -------- >>> import cudf @@ -3960,6 +3956,11 @@ def merge( right dtype respectively. This extends to semi and anti joins. - For outer joins, the result will be the union of categories from both sides. + + .. pandas-compat:: + **DataFrame.merge** + + **DataFrames merges in cuDF result in non-deterministic row ordering.** """ if indicator: raise NotImplementedError( @@ -4030,12 +4031,11 @@ def join( ------- joined : DataFrame - Notes - ----- - Difference from pandas: + .. pandas-compat:: + **DataFrame.join** - - *other* must be a single DataFrame for now. - - *on* is not supported yet due to lack of multi-index support. + - *other* must be a single DataFrame for now. + - *on* is not supported yet due to lack of multi-index support. """ if on is not None: raise NotImplementedError("The on parameter is not yet supported") @@ -5147,11 +5147,6 @@ def from_arrow(cls, table): ------- cudf DataFrame - Notes - ----- - - Does not support automatically setting index column(s) similar - to how ``to_pandas`` works for PyArrow Tables. - Examples -------- >>> import cudf @@ -5162,6 +5157,12 @@ def from_arrow(cls, table): 0 1 4 1 2 5 2 3 6 + + .. pandas-compat:: + **DataFrame.from_arrow** + + - Does not support automatically setting index column(s) similar + to how ``to_pandas`` works for PyArrow Tables. """ index_col = None if isinstance(table, pa.Table) and isinstance( @@ -5501,14 +5502,6 @@ def quantile( If q is a float, a Series will be returned where the index is the columns of self and the values are the quantiles. - .. pandas-compat:: - **DataFrame.quantile** - - One notable difference from Pandas is when DataFrame is of - non-numeric types and result is expected to be a Series in case of - Pandas. cuDF will return a DataFrame as it doesn't support mixed - types under Series. - Examples -------- >>> import cupy as cp @@ -5529,6 +5522,14 @@ def quantile( a b 0.1 1.3 3.7 0.5 2.5 55.0 + + .. pandas-compat:: + **DataFrame.quantile** + + One notable difference from Pandas is when DataFrame is of + non-numeric types and result is expected to be a Series in case of + Pandas. cuDF will return a DataFrame as it doesn't support mixed + types under Series. """ # noqa: E501 if axis not in (0, None): raise NotImplementedError("axis is not implemented yet") @@ -5794,10 +5795,6 @@ def count(self, axis=0, level=None, numeric_only=False, **kwargs): Series For each column/row the number of non-NA/null entries. - Notes - ----- - Parameters currently not supported are `axis`, `level`, `numeric_only`. - Examples -------- >>> import cudf @@ -5811,6 +5808,11 @@ def count(self, axis=0, level=None, numeric_only=False, **kwargs): Age 4 Single 5 dtype: int64 + + .. pandas-compat:: + **DataFrame.count** + + Parameters currently not supported are `axis`, `level`, `numeric_only`. """ axis = self._get_axis_from_axis_arg(axis) if axis != 0: @@ -5958,10 +5960,6 @@ def mode(self, axis=0, numeric_only=False, dropna=True): cudf.Series.value_counts : Return the counts of values in a Series. - Notes - ----- - ``axis`` parameter is currently not supported. - Examples -------- >>> import cudf @@ -6000,6 +5998,11 @@ def mode(self, axis=0, numeric_only=False, dropna=True): legs wings 0 2 0.0 1 2.0 + + .. pandas-compat:: + **DataFrame.mode** + + ``axis`` parameter is currently not supported. """ if axis not in (0, "index"): raise NotImplementedError("Only axis=0 is currently supported") @@ -6538,7 +6541,7 @@ def to_struct(self, name=None): Notes ----- - Note that a copy of the columns is made. + Note: that a copy of the columns is made. """ if not all(isinstance(name, str) for name in self._data.names): warnings.warn( @@ -6643,11 +6646,6 @@ def append( ------- DataFrame - See Also - -------- - cudf.concat : General function to concatenate DataFrame or - objects. - Notes ----- If a list of dict/series is passed and the keys are all contained in @@ -6657,7 +6655,12 @@ def append( computationally intensive than a single concatenate. A better solution is to append those rows to a list and then concatenate the list with the original DataFrame all at once. - `verify_integrity` parameter is not supported yet. + `verify_integrity` parameter is not supported yet + + See Also + -------- + cudf.concat : General function to concatenate DataFrame or + objects. Examples -------- @@ -7035,22 +7038,6 @@ def eval(self, expr: str, inplace: bool = False, **kwargs): DataFrame if any assignment statements are included in ``expr``, or None if ``inplace=True``. - Notes - ----- - Difference from pandas: - * Additional kwargs are not supported. - * Bitwise and logical operators are not dtype-dependent. - Specifically, `&` must be used for bitwise operators on integers, - not `and`, which is specifically for the logical and between - booleans. - * Only numerical types are currently supported. - * Operators generally will not cast automatically. Users are - responsible for casting columns to suitable types before - evaluating a function. - * Multiple assignments to the same name (i.e. a sequence of - assignment statements where later statements are conditioned upon - the output of earlier statements) is not supported. - Examples -------- >>> df = cudf.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)}) @@ -7112,6 +7099,22 @@ def eval(self, expr: str, inplace: bool = False, **kwargs): 2 3 6 9 -3 3 4 4 8 0 4 5 2 7 3 + + .. pandas-compat:: + **DataFrame.eval** + + * Additional kwargs are not supported. + * Bitwise and logical operators are not dtype-dependent. + Specifically, `&` must be used for bitwise operators on integers, + not `and`, which is specifically for the logical and between + booleans. + * Only numerical types are currently supported. + * Operators generally will not cast automatically. Users are + responsible for casting columns to suitable types before + evaluating a function. + * Multiple assignments to the same name (i.e. a sequence of + assignment statements where later statements are conditioned upon + the output of earlier statements) is not supported. """ if kwargs: raise ValueError( diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 8b508eac324..35002863065 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -253,11 +253,6 @@ def empty(self): out : bool If DataFrame/Series is empty, return True, if not return False. - Notes - ----- - If DataFrame/Series contains only `null` values, it is still not - considered empty. See the example below. - Examples -------- >>> import cudf @@ -298,6 +293,12 @@ def empty(self): Series([], dtype: float64) >>> s.empty True + + .. pandas-compat:: + **DataFrame.empty, Series.empty, Frame.empty** + + If DataFrame/Series contains only `null` values, it is still not + considered empty. See the example above. """ return self.size == 0 @@ -628,6 +629,8 @@ def where(self, cond, other=None, inplace=False): dtype: int64 .. pandas-compat:: + **DataFrame.where, Series.where, Frame.where** + Note that ``where`` treats missing values as falsy, in parallel with pandas treatment of nullable data: @@ -1908,10 +1911,6 @@ def min( ------- Series - Notes - ----- - Parameters currently not supported are `level`, `numeric_only`. - Examples -------- >>> import cudf @@ -1920,6 +1919,11 @@ def min( a 1 b 7 dtype: int64 + + .. pandas-compat:: + **DataFrame.min, Series.min, Frame.min** + + Parameters currently not supported are `level`, `numeric_only`. """ return self._reduce( "min", @@ -1959,10 +1963,6 @@ def max( ------- Series - Notes - ----- - Parameters currently not supported are `level`, `numeric_only`. - Examples -------- >>> import cudf @@ -1971,6 +1971,11 @@ def max( a 4 b 10 dtype: int64 + + .. pandas-compat:: + **DataFrame.max, Series.max, Frame.max** + + Parameters currently not supported are `level`, `numeric_only`. """ return self._reduce( "max", @@ -2015,10 +2020,6 @@ def sum( ------- Series - Notes - ----- - Parameters currently not supported are `level`, `numeric_only`. - Examples -------- >>> import cudf @@ -2027,6 +2028,11 @@ def sum( a 10 b 34 dtype: int64 + + .. pandas-compat:: + **DataFrame.sum, Series.sum, Frame.sum** + + Parameters currently not supported are `level`, `numeric_only`. """ return self._reduce( "sum", @@ -2073,10 +2079,6 @@ def product( ------- Series - Notes - ----- - Parameters currently not supported are level`, `numeric_only`. - Examples -------- >>> import cudf @@ -2085,6 +2087,11 @@ def product( a 24 b 5040 dtype: int64 + + .. pandas-compat:: + **DataFrame.product, Series.product, Frame.product** + + Parameters currently not supported are level`, `numeric_only`. """ axis = self._get_axis_from_axis_arg(axis) return self._reduce( @@ -2179,11 +2186,6 @@ def std( ------- Series - Notes - ----- - Parameters currently not supported are `level` and - `numeric_only` - Examples -------- >>> import cudf @@ -2192,6 +2194,12 @@ def std( a 1.290994 b 1.290994 dtype: float64 + + .. pandas-compat:: + **DataFrame.std, Series.std, Frame.std** + + Parameters currently not supported are `level` and + `numeric_only` """ return self._reduce( @@ -2235,11 +2243,6 @@ def var( ------- scalar - Notes - ----- - Parameters currently not supported are `level` and - `numeric_only` - Examples -------- >>> import cudf @@ -2248,6 +2251,12 @@ def var( a 1.666667 b 1.666667 dtype: float64 + + .. pandas-compat:: + **DataFrame.var, Series.var, Frame.var** + + Parameters currently not supported are `level` and + `numeric_only` """ return self._reduce( "var", @@ -2280,10 +2289,6 @@ def kurtosis( ------- Series or scalar - Notes - ----- - Parameters currently not supported are `level` and `numeric_only` - Examples -------- **Series** @@ -2301,6 +2306,11 @@ def kurtosis( a -1.2 b -1.2 dtype: float64 + + .. pandas-compat:: + **DataFrame.kurtosis, Frame.kurtosis** + + Parameters currently not supported are `level` and `numeric_only` """ if axis not in (0, "index", None): raise NotImplementedError("Only axis=0 is currently supported.") @@ -2333,11 +2343,6 @@ def skew( ------- Series - Notes - ----- - Parameters currently not supported are `axis`, `level` and - `numeric_only` - Examples -------- **Series** @@ -2362,6 +2367,12 @@ def skew( a 0.00000 b -0.37037 dtype: float64 + + .. pandas-compat:: + **DataFrame.skew, Series.skew, Frame.skew** + + Parameters currently not supported are `axis`, `level` and + `numeric_only` """ if axis not in (0, "index", None): raise NotImplementedError("Only axis=0 is currently supported.") @@ -2393,10 +2404,6 @@ def all(self, axis=0, skipna=True, level=None, **kwargs): ------- Series - Notes - ----- - Parameters currently not supported are `axis`, `bool_only`, `level`. - Examples -------- >>> import cudf @@ -2405,6 +2412,11 @@ def all(self, axis=0, skipna=True, level=None, **kwargs): a True b False dtype: bool + + .. pandas-compat:: + **DataFrame.all, Series.all, Frame.all** + + Parameters currently not supported are `axis`, `bool_only`, `level`. """ return self._reduce( "all", @@ -2432,10 +2444,6 @@ def any(self, axis=0, skipna=True, level=None, **kwargs): ------- Series - Notes - ----- - Parameters currently not supported are `axis`, `bool_only`, `level`. - Examples -------- >>> import cudf @@ -2444,6 +2452,11 @@ def any(self, axis=0, skipna=True, level=None, **kwargs): a True b True dtype: bool + + .. pandas-compat:: + **DataFrame.any, Series.any, Frame.any** + + Parameters currently not supported are `axis`, `bool_only`, `level`. """ return self._reduce( "any", @@ -2498,10 +2511,6 @@ def median( ------- scalar - Notes - ----- - Parameters currently not supported are `level` and `numeric_only`. - Examples -------- >>> import cudf @@ -2516,6 +2525,11 @@ def median( dtype: int64 >>> ser.median() 17.0 + + .. pandas-compat:: + **DataFrame.median, Series.median, Frame.median** + + Parameters currently not supported are `level` and `numeric_only`. """ return self._reduce( "median", diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 91e00eb43f3..2dd2167b32d 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -535,10 +535,10 @@ def _reduce( Series or DataFrame Computed {op} of values within each group. - Notes - ----- - Difference from pandas: - * Not supporting: numeric_only, min_count + .. pandas-compat:: + **{cls}.{op}** + + The numeric_only, min_count """ if numeric_only: raise NotImplementedError( @@ -897,7 +897,7 @@ def mult(df): 6 2 6 12 .. pandas-compat:: - **groupby.apply** + **GroupBy.apply** cuDF's ``groupby.apply`` is limited compared to pandas. In some situations, Pandas returns the grouped keys as part of @@ -1654,7 +1654,7 @@ def fillna( DataFrame or Series .. pandas-compat:: - **groupby.fillna** + **GroupBy.fillna** This function may return result in different format to the method Pandas supports. For example: @@ -1732,9 +1732,10 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): Series or DataFrame Object shifted within each group. - Notes - ----- - Parameter ``freq`` is unsupported. + .. pandas-compat:: + **GroupBy.shift** + + Parameter ``freq`` is unsupported. """ if freq is not None: diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 25c89c34040..4d8b1b39258 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -585,11 +585,6 @@ def replace( result : Series Series after replacement. The mask and index are preserved. - Notes - ----- - Parameters that are currently not supported are: `limit`, `regex`, - `method` - Examples -------- **Series** @@ -732,6 +727,12 @@ def replace( 2 2 7 c 3 3 8 d 4 4 9 e + + .. pandas-compat:: + **DataFrame.replace, Series.replace, IndexedFrame.replace** + + Parameters that are currently not supported are: `limit`, `regex`, + `method` """ if limit is not None: raise NotImplementedError("limit parameter is not implemented yet") @@ -1063,13 +1064,6 @@ def truncate(self, before=None, after=None, axis=0, copy=True): `before` and `after` may be specified as strings instead of Timestamps. - .. pandas-compat:: - **DataFrame.truncate, Series.truncate** - - The ``copy`` parameter is only present for API compatibility, but - ``copy=False`` is not supported. This method always generates a - copy. - Examples -------- **Series** @@ -1211,6 +1205,13 @@ def truncate(self, before=None, after=None, axis=0, copy=True): 2021-01-01 23:45:25 1 2 2021-01-01 23:45:26 1 2 2021-01-01 23:45:27 1 2 + + .. pandas-compat:: + **DataFrame.truncate, Series.truncate, IndexedFrame.truncate** + + The ``copy`` parameter is only present for API compatibility, but + ``copy=False`` is not supported. This method always generates a + copy. """ if not copy: raise ValueError("Truncating with copy=False is not supported.") @@ -1463,11 +1464,6 @@ def sort_index( ------- Frame or None - Notes - ----- - Difference from pandas: - * Not supporting: kind, sort_remaining=False - Examples -------- **Series** @@ -1508,6 +1504,11 @@ def sort_index( 1 2 3 3 1 2 2 3 1 + + .. pandas-compat:: + **DataFrame.sort_index, Series.sort_index, IndexedFrame.sort_index** + + * Not supporting: kind, sort_remaining=False """ if kind is not None: raise NotImplementedError("kind is not yet supported") @@ -2172,12 +2173,6 @@ def sort_values( ------- Frame : Frame with sorted values. - Notes - ----- - Difference from pandas: - * Support axis='index' only. - * Not supporting: inplace, kind - Examples -------- >>> import cudf @@ -2189,6 +2184,12 @@ def sort_values( 0 0 -3 2 2 0 1 1 2 + + .. pandas-compat:: + **DataFrame.sort_values, Series.sort_values, IndexedFrame.sort_values** + + * Support axis='index' only. + * Not supporting: inplace, kind """ if na_position not in {"first", "last"}: raise ValueError(f"invalid na_position: {na_position}") @@ -2648,13 +2649,14 @@ def resample( 2018-02-28 18.0 63.333333 - Notes - ----- - Note that the dtype of the index (or the 'on' column if using - 'on=') in the result will be of a frequency closest to the - resampled frequency. For example, if resampling from - nanoseconds to milliseconds, the index will be of dtype - 'datetime64[ms]'. + .. pandas-compat:: + **DataFrame.resample, Series.resample, IndexedFrame.resample** + + Note that the dtype of the index (or the 'on' column if using + 'on=') in the result will be of a frequency closest to the + resampled frequency. For example, if resampling from + nanoseconds to milliseconds, the index will be of dtype + 'datetime64[ms]'. """ import cudf.core.resample @@ -3087,18 +3089,6 @@ def sample( provided via the `random_state` parameter. This function will always produce the same sample given an identical `random_state`. - Notes - ----- - When sampling from ``axis=0/'index'``, ``random_state`` can be either - a numpy random state (``numpy.random.RandomState``) or a cupy random - state (``cupy.random.RandomState``). When a numpy random state is - used, the output is guaranteed to match the output of the corresponding - pandas method call, but generating the sample may be slow. If exact - pandas equivalence is not required, using a cupy random state will - achieve better performance, especially when sampling large number of - items. It's advised to use the matching `ndarray` type to the random - state for the `weights` array. - Parameters ---------- n : int, optional @@ -3166,6 +3156,19 @@ def sample( a c 0 1 3 1 2 4 + + .. pandas-compat:: + **DataFrame.sample, Series.sample, IndexedFrame.sample** + + When sampling from ``axis=0/'index'``, ``random_state`` can be either + a numpy random state (``numpy.random.RandomState``) or a cupy random + state (``cupy.random.RandomState``). When a numpy random state is + used, the output is guaranteed to match the output of the corresponding + pandas method call, but generating the sample may be slow. If exact + pandas equivalence is not required, using a cupy random state will + achieve better performance, especially when sampling large number of + items. It's advised to use the matching `ndarray` type to the random + state for the `weights` array. """ axis = self._get_axis_from_axis_arg(axis) size = self.shape[axis] diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 1c697a2d824..4b6fde74d06 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1203,10 +1203,11 @@ def map(self, arg, na_action=None) -> "Series": 4 dtype: int64 - Notes - ----- - Please note map currently only supports fixed-width numeric - type functions. + .. pandas-compat:: + **Series.map** + + Please note map currently only supports fixed-width numeric + type functions. """ if isinstance(arg, dict): if hasattr(arg, "__missing__"): @@ -1979,12 +1980,6 @@ def sort_values( ------- Series : Series with sorted values. - Notes - ----- - Difference from pandas: - * Support axis='index' only. - * Not supporting: inplace, kind - Examples -------- >>> import cudf @@ -1996,6 +1991,12 @@ def sort_values( 3 4 1 5 dtype: int64 + + .. pandas-compat:: + **Series.sort_values** + + * Support axis='index' only. + * The inplace and kind argument is currently unsupported """ return super().sort_values( by=self.name, @@ -2443,16 +2444,17 @@ def count(self, level=None, **kwargs): int Number of non-null values in the Series. - Notes - ----- - Parameters currently not supported is `level`. - Examples -------- >>> import cudf >>> ser = cudf.Series([1, 5, 2, 4, 3]) >>> ser.count() 5 + + .. pandas-compat:: + **Series.count** + + Parameters currently not supported is `level`. """ if level is not None: @@ -2554,10 +2556,6 @@ def cov(self, other, min_periods=None): Covariance between Series and other normalized by N-1 (unbiased estimator). - Notes - ----- - `min_periods` parameter is not yet supported. - Examples -------- >>> import cudf @@ -2565,6 +2563,11 @@ def cov(self, other, min_periods=None): >>> ser2 = cudf.Series([0.12, 0.26, 0.51]) >>> ser1.cov(ser2) -0.015750000000000004 + + .. pandas-compat:: + **Series.cov** + + `min_periods` parameter is not yet supported. """ if min_periods is not None: @@ -3285,12 +3288,6 @@ def rename(self, index=None, copy=True): ------- Series - Notes - ----- - Difference from pandas: - - Supports scalar values only for changing name attribute - - Not supporting : inplace, level - Examples -------- >>> import cudf @@ -3309,6 +3306,12 @@ def rename(self, index=None, copy=True): Name: numeric_series, dtype: int64 >>> renamed_series.name 'numeric_series' + + .. pandas-compat:: + **Series.rename** + + - Supports scalar values only for changing name attribute + - The ``inplace`` and ``level`` is not supported """ out_data = self._data.copy(deep=copy) return Series._from_data(out_data, self.index, name=index) @@ -4488,11 +4491,6 @@ def strftime(self, date_format, *args, **kwargs): Series Series of formatted strings. - Notes - ----- - The following date format identifiers are not yet - supported: ``%c``, ``%x``,``%X`` - Examples -------- >>> import cudf @@ -4519,6 +4517,12 @@ def strftime(self, date_format, *args, **kwargs): 1 2000 / 30 / 06 2 2000 / 30 / 09 dtype: object + + .. pandas-compat:: + **series.DatetimeProperties.strftime** + + The following date format identifiers are not yet + supported: ``%c``, ``%x``,``%X`` """ if not isinstance(date_format, str): diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 92ef49e92d9..472aee9c516 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. import math import re @@ -808,7 +808,6 @@ def date_range( '2023-12-23 08:00:00', '2025-02-23 08:00:00', '2026-04-23 08:00:00'], dtype='datetime64[ns]') - """ if tz is not None: raise NotImplementedError("tz is currently unsupported.") diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py index 0273227010b..609a5503040 100644 --- a/python/cudf/cudf/core/tools/numeric.py +++ b/python/cudf/cudf/core/tools/numeric.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. import warnings @@ -56,13 +56,6 @@ def to_numeric(arg, errors="raise", downcast=None): Depending on the input, if series is passed in, series is returned, otherwise ndarray - Notes - ----- - An important difference from pandas is that this function does not accept - mixed numeric/non-numeric type sequences. For example ``[1, 'a']``. - A ``TypeError`` will be raised when such input is received, regardless of - ``errors`` parameter. - Examples -------- >>> s = cudf.Series(['1', '2.0', '3e3']) @@ -92,6 +85,15 @@ def to_numeric(arg, errors="raise", downcast=None): 1 1.0 2 3000.0 dtype: float64 + + .. pandas-compat:: + **cudf.to_numeric** + + An important difference from pandas is that this function does not accept + mixed numeric/non-numeric type sequences. For example ``[1, 'a']``. + A ``TypeError`` will be raised when such input is received, regardless of + ``errors`` parameter. + """ if errors not in {"raise", "ignore", "coerce"}: diff --git a/python/dask_cudf/dask_cudf/accessors.py b/python/dask_cudf/dask_cudf/accessors.py index 1c21fca51c8..873981b24f9 100644 --- a/python/dask_cudf/dask_cudf/accessors.py +++ b/python/dask_cudf/dask_cudf/accessors.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. class StructMethods: @@ -263,11 +263,6 @@ def sort_values( ------- ListColumn with each list sorted - Notes - ----- - Difference from pandas: - * Not supporting: `inplace`, `kind` - Examples -------- >>> s = cudf.Series([[4, 2, None, 9], [8, 8, 2], [2, 1]]) @@ -277,6 +272,11 @@ def sort_values( 1 [2.0, 8.0, 8.0] 2 [1.0, 2.0] dtype: list + + .. pandas-compat:: + **ListMethods.sort_values** + + The `inplace` and `kind` argument is currently unsupported. """ return self.d_series.map_partitions( lambda s: s.list.sort_values(