diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index e9a79701d5b..ca1dc03b987 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -192,6 +192,7 @@ Reshaping, sorting, transposing DataFrame.unstack DataFrame.melt DataFrame.explode + DataFrame.to_struct DataFrame.T DataFrame.transpose diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index c23c9a3f6c1..30269bb2a72 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -23,6 +23,7 @@ Properties Index.empty Index.gpu_values + Index.has_duplicates Index.is_monotonic Index.is_monotonic_increasing Index.is_monotonic_decreasing @@ -46,6 +47,14 @@ Modifying and computations Index.drop_duplicates Index.equals Index.factorize + Index.is_boolean + Index.is_categorical + Index.is_floating + Index.is_integer + Index.is_interval + Index.is_mixed + Index.is_numeric + Index.is_object Index.min Index.max Index.rename @@ -84,9 +93,15 @@ Conversion :toctree: api/ Index.astype + Index.to_array + Index.to_arrow Index.to_list Index.to_series Index.to_frame + Index.to_pandas + Index.to_dlpack + Index.from_pandas + Index.from_arrow Sorting ~~~~~~~ @@ -110,6 +125,8 @@ Combining / joining / set operations :toctree: api/ Index.append + Index.union + Index.intersection Index.join Index.difference @@ -249,7 +266,13 @@ Time/date components DatetimeIndex.minute DatetimeIndex.second DatetimeIndex.dayofweek + DatetimeIndex.dayofyear + DatetimeIndex.day_of_year DatetimeIndex.weekday + DatetimeIndex.is_leap_year + DatetimeIndex.quarter + DatetimeIndex.isocalendar + Time-specific operations ~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index ffa809268f3..95cf58adf0e 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -44,7 +44,6 @@ Conversion Series.copy Series.to_list Series.__array__ - Series.as_index Series.as_mask Series.scale @@ -99,6 +98,7 @@ Function application, GroupBy & window .. autosummary:: :toctree: api/ + Series.apply Series.applymap Series.map Series.groupby @@ -250,6 +250,7 @@ Datetime, Timedelta :ref:`dt ` String :ref:`str ` Categorical :ref:`cat ` List :ref:`list ` +Struct :ref:`struct ` =========================== ================================= .. _api.series.dt: @@ -270,12 +271,23 @@ Datetime properties day dayofweek + dayofyear + days_in_month + day_of_year hour minute month second weekday year + is_leap_year + is_month_start + is_month_end + is_quarter_start + is_quarter_end + is_year_start + is_year_end + quarter Datetime methods ^^^^^^^^^^^^^^^^ @@ -284,6 +296,7 @@ Datetime methods :toctree: api/ strftime + isocalendar Timedelta properties @@ -324,6 +337,7 @@ strings and apply several methods to it. These can be accessed like count detokenize edit_distance + edit_distance_matrix endswith extract filter_alphanum @@ -454,6 +468,23 @@ lists and apply list methods to it. These can be accessed like unique +.. _api.series.struct: + +Struct handling +~~~~~~~~~~~~~~~ + +``Series.struct`` can be used to access the values of the series as +Structs and apply struct methods to it. These can be accessed like +``Series.struct.``. + +.. currentmodule:: cudf.core.column.struct.StructMethods +.. autosummary:: + :toctree: api/ + + field + explode + + Serialization / IO / conversion ------------------------------- .. currentmodule:: cudf diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst index ee63f67daa2..cae7d017291 100644 --- a/docs/cudf/source/basics/basics.rst +++ b/docs/cudf/source/basics/basics.rst @@ -36,7 +36,8 @@ The following table lists all of cudf types. For methods requiring dtype argumen +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ | Boolean | | np.bool_ | ``'bool'`` | +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ - | Decimal | Decimal64Dtype | (none) | (none) | + | Decimal | Decimal32Dtype, | (none) | (none) | + | | Decimal64Dtype | | | +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ **Note: All dtypes above are Nullable** diff --git a/docs/cudf/source/basics/io-supported-types.rst b/docs/cudf/source/basics/io-supported-types.rst index 78c1bfb6554..544acb9c683 100644 --- a/docs/cudf/source/basics/io-supported-types.rst +++ b/docs/cudf/source/basics/io-supported-types.rst @@ -58,7 +58,9 @@ The following table lists are compatible cudf types for each supported IO format +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ | struct | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | decimal64 | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | + | decimal32 | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | decimal64 | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ **Notes:** diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb index 67cc90f9236..215d11cdbb8 100644 --- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb +++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb @@ -2161,7 +2161,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# `cudf.Series.apply`" + "## `cudf.Series.apply`" ] }, { @@ -2246,7 +2246,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Caveats" + "## Caveats" ] }, { diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 3f6a18c2ea0..81d4c9adfa1 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -4675,7 +4675,7 @@ def subword_tokenize( Examples -------- >>> import cudf - >>> from cudf.utils.hash_vocab_utils import hash_vocab + >>> from cudf.utils.hash_vocab_utils import hash_vocab >>> hash_vocab('bert-base-uncased-vocab.txt', 'voc_hash.txt') >>> ser = cudf.Series(['this is the', 'best book']) >>> stride, max_length = 8, 8 diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 5d3b05e3f85..0baa4012570 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -978,7 +978,7 @@ def __getitem__(self, arg): 2 2 2 2 3 3 3 3 >>> df[-5:] # get last 5 rows of all columns - a b c + a b c 15 15 15 15 16 16 16 16 17 17 17 17 @@ -1233,7 +1233,7 @@ def memory_usage(self, index=True, deep=False): ... for t in dtypes]) >>> df = cudf.DataFrame(data) >>> df.head() - int64 float64 object bool + int64 float64 object bool 0 1 1.0 1.0 True 1 1 1.0 1.0 True 2 1 1.0 1.0 True @@ -1611,7 +1611,7 @@ def astype(self, dtype, copy=False, errors="raise", **kwargs): b int64 dtype: object >>> df.astype({'a': 'float32'}) - a b + a b 0 10.0 1 1 20.0 2 2 30.0 3 @@ -4900,7 +4900,7 @@ def to_pandas(self, nullable=False, **kwargs): dtype: object >>> pdf = df.to_pandas(nullable=False) >>> pdf - a b + a b 0 0.0 True 1 NaN False 2 2.0 None @@ -5384,7 +5384,7 @@ def quantile( b 3.7 Name: 0.1, dtype: float64 >>> df.quantile([.1, .5]) - a b + a b 0.1 1.3 3.7 0.5 2.5 55.0 """ # noqa: E501 @@ -6348,7 +6348,7 @@ def append( See Also -------- - cudf.core.reshape.concat : General function to concatenate DataFrame or + cudf.concat : General function to concatenate DataFrame or objects. Notes diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 789f5117d9c..934682a1996 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1823,7 +1823,7 @@ def round(self, decimals=0, how="half_even"): ... columns=['dogs', 'cats'] ... ) >>> df - dogs cats + dogs cats 0 0.21 0.32 1 0.01 0.67 2 0.66 0.03 @@ -1833,7 +1833,7 @@ def round(self, decimals=0, how="half_even"): of decimal places >>> df.round(1) - dogs cats + dogs cats 0 0.2 0.3 1 0.0 0.7 2 0.7 0.0 @@ -1844,7 +1844,7 @@ def round(self, decimals=0, how="half_even"): places as value >>> df.round({'dogs': 1, 'cats': 0}) - dogs cats + dogs cats 0 0.2 0.0 1 0.0 1.0 2 0.7 0.0 @@ -1856,7 +1856,7 @@ def round(self, decimals=0, how="half_even"): >>> decimals = cudf.Series([0, 1], index=['cats', 'dogs']) >>> df.round(decimals) - dogs cats + dogs cats 0 0.2 0.0 1 0.0 1.0 2 0.7 0.0 @@ -2634,7 +2634,7 @@ def isnull(self): 1 6 1939-05-27 00:00:00.000000 Batman Batmobile 2 1940-04-25 00:00:00.000000 Joker >>> df.isnull() - age born name toy + age born name toy 0 False True False True 1 False False False False 2 True False False False @@ -5080,12 +5080,12 @@ def nans_to_nulls(self): >>> df['a'] = cudf.Series([1, None, np.nan], nan_as_null=False) >>> df['b'] = cudf.Series([None, 3.14, np.nan], nan_as_null=False) >>> df - a b + a b 0 1.0 1 3.14 2 NaN NaN >>> df.nans_to_nulls() - a b + a b 0 1.0 1 3.14 2 diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index bcc97ae82ce..7743fecad49 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -728,7 +728,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False): See Also -------- - cudf.core.reshape.concat : General function to concatenate DataFrame or + cudf.concat : General function to concatenate DataFrame or Series objects. Examples @@ -2879,6 +2879,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): dtype: int64 Apply a basic function to a series with nulls + >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x): ... return x + 1 @@ -2890,6 +2891,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs): Use a function that does something conditionally, based on if the value is or is not null + >>> sr = cudf.Series([1,cudf.NA,3]) >>> def f(x): ... if x is cudf.NA: diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py index 60139f7d7af..3502fc9acae 100644 --- a/python/cudf/cudf/core/subword_tokenizer.py +++ b/python/cudf/cudf/core/subword_tokenizer.py @@ -134,12 +134,12 @@ def __call__( Examples -------- >>> import cudf - >>> from cudf.utils.hash_vocab_utils import hash_vocab + >>> from cudf.utils.hash_vocab_utils import hash_vocab >>> hash_vocab('bert-base-cased-vocab.txt', 'voc_hash.txt') >>> from cudf.core.subword_tokenizer import SubwordTokenizer - >>> cudf_tokenizer = SubwordTokenizer('voc_hash.txt', + >>> cudf_tokenizer = SubwordTokenizer('voc_hash.txt', ... do_lower_case=True) >>> str_series = cudf.Series(['This is the', 'best book']) >>> tokenizer_output = cudf_tokenizer(str_series,