Miscellaneous documentation fixes to cudf (#9471)

This PR: - [x] Exposes `.struct` accessor - [x] Exposes `Decimal32Dtype` - [x] Adds newly introduced Datetime related APIs - [x] Fixes some examples. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Michael Wang (https://github.com/isVoid) URL: #9471
rapidsai · Oct 20, 2021 · 7e84aa1 · 7e84aa1
1 parent 2144034
commit 7e84aa1
Show file tree

Hide file tree

Showing 11 changed files with 82 additions and 22 deletions.
diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
@@ -192,6 +192,7 @@ Reshaping, sorting, transposing
    DataFrame.unstack
    DataFrame.melt
    DataFrame.explode
+   DataFrame.to_struct
    DataFrame.T
    DataFrame.transpose
 

diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
@@ -23,6 +23,7 @@ Properties
 
    Index.empty
    Index.gpu_values
+   Index.has_duplicates
    Index.is_monotonic
    Index.is_monotonic_increasing
    Index.is_monotonic_decreasing
@@ -46,6 +47,14 @@ Modifying and computations
    Index.drop_duplicates
    Index.equals
    Index.factorize
+   Index.is_boolean
+   Index.is_categorical
+   Index.is_floating
+   Index.is_integer
+   Index.is_interval
+   Index.is_mixed
+   Index.is_numeric
+   Index.is_object
    Index.min
    Index.max
    Index.rename
@@ -84,9 +93,15 @@ Conversion
    :toctree: api/
 
    Index.astype
+   Index.to_array
+   Index.to_arrow
    Index.to_list
    Index.to_series
    Index.to_frame
+   Index.to_pandas
+   Index.to_dlpack
+   Index.from_pandas
+   Index.from_arrow
 
 Sorting
 ~~~~~~~
@@ -110,6 +125,8 @@ Combining / joining / set operations
    :toctree: api/
 
    Index.append
+   Index.union
+   Index.intersection
    Index.join
    Index.difference
 
@@ -249,7 +266,13 @@ Time/date components
    DatetimeIndex.minute
    DatetimeIndex.second
    DatetimeIndex.dayofweek
+   DatetimeIndex.dayofyear
+   DatetimeIndex.day_of_year
    DatetimeIndex.weekday
+   DatetimeIndex.is_leap_year
+   DatetimeIndex.quarter
+   DatetimeIndex.isocalendar
+
 
 Time-specific operations
 ~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
@@ -44,7 +44,6 @@ Conversion
    Series.copy
    Series.to_list
    Series.__array__
-   Series.as_index
    Series.as_mask
    Series.scale
 
@@ -99,6 +98,7 @@ Function application, GroupBy & window
 .. autosummary::
    :toctree: api/
 
+   Series.apply
    Series.applymap
    Series.map
    Series.groupby
@@ -250,6 +250,7 @@ Datetime, Timedelta         :ref:`dt <api.series.dt>`
 String                      :ref:`str <api.series.str>`
 Categorical                 :ref:`cat <api.series.cat>`
 List                        :ref:`list <api.series.list>`
+Struct                      :ref:`struct <api.series.struct>`
 =========================== =================================
 
 .. _api.series.dt:
@@ -270,12 +271,23 @@ Datetime properties
 
    day
    dayofweek
+   dayofyear
+   days_in_month
+   day_of_year
    hour
    minute
    month
    second
    weekday
    year
+   is_leap_year
+   is_month_start
+   is_month_end
+   is_quarter_start
+   is_quarter_end
+   is_year_start
+   is_year_end
+   quarter
 
 Datetime methods
 ^^^^^^^^^^^^^^^^
@@ -284,6 +296,7 @@ Datetime methods
    :toctree: api/
 
    strftime
+   isocalendar
 
 
 Timedelta properties
@@ -324,6 +337,7 @@ strings and apply several methods to it. These can be accessed like
    count
    detokenize
    edit_distance
+   edit_distance_matrix
    endswith
    extract
    filter_alphanum
@@ -454,6 +468,23 @@ lists and apply list methods to it. These can be accessed like
    unique
 
 
+.. _api.series.struct:
+
+Struct handling
+~~~~~~~~~~~~~~~
+
+``Series.struct`` can be used to access the values of the series as
+Structs and apply struct methods to it. These can be accessed like
+``Series.struct.<function/property>``.
+
+.. currentmodule:: cudf.core.column.struct.StructMethods
+.. autosummary::
+   :toctree: api/
+
+   field
+   explode
+
+
 Serialization / IO / conversion
 -------------------------------
 .. currentmodule:: cudf

diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst
@@ -36,7 +36,8 @@ The following table lists all of cudf types. For methods requiring dtype argumen
     +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
     | Boolean                |                  | np.bool_                                                                            | ``'bool'``                                  |
     +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Decimal                | Decimal64Dtype   | (none)                                                                              | (none)                                      |
+    | Decimal                | Decimal32Dtype,  | (none)                                                                              | (none)                                      |
+    |                        | Decimal64Dtype   |                                                                                     |                                             |
     +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
 
 **Note: All dtypes above are Nullable**

diff --git a/docs/cudf/source/basics/io-supported-types.rst b/docs/cudf/source/basics/io-supported-types.rst
@@ -58,7 +58,9 @@ The following table lists are compatible cudf types for each supported IO format
     +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
     | struct                | ❌     | ❌     | ✅     | ✅     | ❌      | ❌     | ❌     | ✅     | ❌     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
     +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
-    | decimal64             | ❌     | ❌     | ✅     | ✅     | ❌      | ❌     | ❌     | ✅     | ❌     | ❌      | ❌      | ❌     | ❌     | ❌      | ❌      |
+    | decimal32             | ✅     | ✅     | ✅     | ✅     | ❌      | ❌     | ✅     | ✅     | ❌     | ❌      | ❌      | ❌     | ❌     | ❌      | ❌      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | decimal64             | ✅     | ✅     | ✅     | ✅     | ❌      | ❌     | ✅     | ✅     | ❌     | ❌      | ❌      | ❌     | ❌     | ❌      | ❌      |
     +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
 
 **Notes:**

diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
@@ -2161,7 +2161,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# `cudf.Series.apply`"
+    "## `cudf.Series.apply`"
    ]
   },
   {
@@ -2246,7 +2246,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Caveats"
+    "## Caveats"
    ]
   },
   {

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
@@ -4675,7 +4675,7 @@ def subword_tokenize(
         Examples
         --------
         >>> import cudf
-        >>> from cudf.utils.hash_vocab_utils  import hash_vocab
+        >>> from cudf.utils.hash_vocab_utils import hash_vocab
         >>> hash_vocab('bert-base-uncased-vocab.txt', 'voc_hash.txt')
         >>> ser = cudf.Series(['this is the', 'best book'])
         >>> stride, max_length = 8, 8

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
@@ -978,7 +978,7 @@ def __getitem__(self, arg):
         2  2  2  2
         3  3  3  3
         >>> df[-5:]  # get last 5 rows of all columns
-            a   b   c
+             a   b   c
         15  15  15  15
         16  16  16  16
         17  17  17  17
@@ -1233,7 +1233,7 @@ def memory_usage(self, index=True, deep=False):
         ...              for t in dtypes])
         >>> df = cudf.DataFrame(data)
         >>> df.head()
-            int64  float64  object  bool
+           int64  float64  object  bool
         0      1      1.0     1.0  True
         1      1      1.0     1.0  True
         2      1      1.0     1.0  True
@@ -1611,7 +1611,7 @@ def astype(self, dtype, copy=False, errors="raise", **kwargs):
         b      int64
         dtype: object
         >>> df.astype({'a': 'float32'})
-            a  b
+              a  b
         0  10.0  1
         1  20.0  2
         2  30.0  3
@@ -4900,7 +4900,7 @@ def to_pandas(self, nullable=False, **kwargs):
         dtype: object
         >>> pdf = df.to_pandas(nullable=False)
         >>> pdf
-            a      b
+             a      b
         0  0.0   True
         1  NaN  False
         2  2.0   None
@@ -5384,7 +5384,7 @@ def quantile(
         b    3.7
         Name: 0.1, dtype: float64
         >>> df.quantile([.1, .5])
-            a     b
+               a     b
         0.1  1.3   3.7
         0.5  2.5  55.0
         """  # noqa: E501
@@ -6348,7 +6348,7 @@ def append(
 
         See Also
         --------
-        cudf.core.reshape.concat : General function to concatenate DataFrame or
+        cudf.concat : General function to concatenate DataFrame or
             objects.
 
         Notes

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
@@ -1823,7 +1823,7 @@ def round(self, decimals=0, how="half_even"):
         ...     columns=['dogs', 'cats']
         ... )
         >>> df
-            dogs  cats
+           dogs  cats
         0  0.21  0.32
         1  0.01  0.67
         2  0.66  0.03
@@ -1833,7 +1833,7 @@ def round(self, decimals=0, how="half_even"):
         of decimal places
 
         >>> df.round(1)
-            dogs  cats
+           dogs  cats
         0   0.2   0.3
         1   0.0   0.7
         2   0.7   0.0
@@ -1844,7 +1844,7 @@ def round(self, decimals=0, how="half_even"):
         places as value
 
         >>> df.round({'dogs': 1, 'cats': 0})
-            dogs  cats
+           dogs  cats
         0   0.2   0.0
         1   0.0   1.0
         2   0.7   0.0
@@ -1856,7 +1856,7 @@ def round(self, decimals=0, how="half_even"):
 
         >>> decimals = cudf.Series([0, 1], index=['cats', 'dogs'])
         >>> df.round(decimals)
-            dogs  cats
+           dogs  cats
         0   0.2   0.0
         1   0.0   1.0
         2   0.7   0.0
@@ -2634,7 +2634,7 @@ def isnull(self):
         1     6  1939-05-27 00:00:00.000000  Batman  Batmobile
         2  <NA>  1940-04-25 00:00:00.000000              Joker
         >>> df.isnull()
-            age   born   name    toy
+             age   born   name    toy
         0  False   True  False   True
         1  False  False  False  False
         2   True  False  False  False
@@ -5080,12 +5080,12 @@ def nans_to_nulls(self):
         >>> df['a'] = cudf.Series([1, None, np.nan], nan_as_null=False)
         >>> df['b'] = cudf.Series([None, 3.14, np.nan], nan_as_null=False)
         >>> df
-            a     b
+              a     b
         0   1.0  <NA>
         1  <NA>  3.14
         2   NaN   NaN
         >>> df.nans_to_nulls()
-            a     b
+              a     b
         0   1.0  <NA>
         1  <NA>  3.14
         2  <NA>  <NA>

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
@@ -728,7 +728,7 @@ def append(self, to_append, ignore_index=False, verify_integrity=False):
 
         See Also
         --------
-        cudf.core.reshape.concat : General function to concatenate DataFrame or
+        cudf.concat : General function to concatenate DataFrame or
             Series objects.
 
         Examples
@@ -2879,6 +2879,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
         dtype: int64
 
         Apply a basic function to a series with nulls
+
         >>> sr = cudf.Series([1,cudf.NA,3])
         >>> def f(x):
         ...     return x + 1
@@ -2890,6 +2891,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
 
         Use a function that does something conditionally,
         based on if the value is or is not null
+
         >>> sr = cudf.Series([1,cudf.NA,3])
         >>> def f(x):
         ...     if x is cudf.NA:

diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py
@@ -134,12 +134,12 @@ def __call__(
         Examples
         --------
         >>> import cudf
-        >>> from cudf.utils.hash_vocab_utils  import hash_vocab
+        >>> from cudf.utils.hash_vocab_utils import hash_vocab
         >>> hash_vocab('bert-base-cased-vocab.txt', 'voc_hash.txt')
 
 
         >>> from cudf.core.subword_tokenizer import SubwordTokenizer
-        >>> cudf_tokenizer  = SubwordTokenizer('voc_hash.txt',
+        >>> cudf_tokenizer = SubwordTokenizer('voc_hash.txt',
         ...                                    do_lower_case=True)
         >>> str_series = cudf.Series(['This is the', 'best book'])
         >>> tokenizer_output = cudf_tokenizer(str_series,