diff --git a/.gitignore b/.gitignore
index b398cfc4f88..aee3d072de2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -159,3 +159,7 @@ dask-worker-space/
# protobuf
**/*_pb2.py
+
+# Sphinx docs & build artifacts
+docs/cudf/source/api_docs/generated/*
+docs/cudf/source/api_docs/api/*
\ No newline at end of file
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
index 70bbe88a00c..692ebe71794 100644
--- a/conda/environments/cudf_dev_cuda11.0.yml
+++ b/conda/environments/cudf_dev_cuda11.0.yml
@@ -26,7 +26,6 @@ dependencies:
- pytest-benchmark
- pytest-xdist
- sphinx
- - sphinx_rtd_theme
- sphinxcontrib-websupport
- nbsphinx
- numpydoc
@@ -57,6 +56,7 @@ dependencies:
- nvtx>=0.2.1
- cachetools
- transformers
+ - pydata-sphinx-theme
- pip:
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask/distributed.git@main
diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml
index 6d2abdda449..ce82b870e16 100644
--- a/conda/environments/cudf_dev_cuda11.2.yml
+++ b/conda/environments/cudf_dev_cuda11.2.yml
@@ -26,7 +26,6 @@ dependencies:
- pytest-benchmark
- pytest-xdist
- sphinx
- - sphinx_rtd_theme
- sphinxcontrib-websupport
- nbsphinx
- numpydoc
@@ -57,6 +56,7 @@ dependencies:
- nvtx>=0.2.1
- cachetools
- transformers
+ - pydata-sphinx-theme
- pip:
- git+https://github.com/dask/dask.git@main
- git+https://github.com/dask/distributed.git@main
diff --git a/docs/cudf/source/_static/RAPIDS-logo-purple.png b/docs/cudf/source/_static/RAPIDS-logo-purple.png
new file mode 100644
index 00000000000..d884e01374d
Binary files /dev/null and b/docs/cudf/source/_static/RAPIDS-logo-purple.png differ
diff --git a/docs/cudf/source/_static/copybutton_pydocs.js b/docs/cudf/source/_static/copybutton_pydocs.js
deleted file mode 100644
index cec05777e6b..00000000000
--- a/docs/cudf/source/_static/copybutton_pydocs.js
+++ /dev/null
@@ -1,65 +0,0 @@
-$(document).ready(function() {
- /* Add a [>>>] button on the top-right corner of code samples to hide
- * the >>> and ... prompts and the output and thus make the code
- * copyable. */
- var div = $('.highlight-python .highlight,' +
- '.highlight-python3 .highlight,' +
- '.highlight-pycon .highlight,' +
- '.highlight-default .highlight');
- var pre = div.find('pre');
-
- // get the styles from the current theme
- pre.parent().parent().css('position', 'relative');
- var hide_text = 'Hide the prompts and output';
- var show_text = 'Show the prompts and output';
- var border_width = pre.css('border-top-width');
- var border_style = pre.css('border-top-style');
- var border_color = pre.css('border-top-color');
- var button_styles = {
- 'cursor':'pointer', 'position': 'absolute', 'top': '0', 'right': '0',
- 'border-color': border_color, 'border-style': border_style,
- 'border-width': border_width, 'text-size': '75%',
- 'font-family': 'monospace', 'padding-left': '0.2em', 'padding-right': '1.5em',
- 'border-radius': '0 3px 0 0',
- 'transition': "0.5s"
- }
-
- // create and add the button to all the code blocks that contain >>>
- div.each(function(index) {
- var jthis = $(this);
- if (jthis.find('.gp').length > 0) {
- var button = $('>>>');
- button.css(button_styles)
- button.attr('title', hide_text);
- button.data('hidden', 'false');
- jthis.prepend(button);
- }
- // tracebacks (.gt) contain bare text elements that need to be
- // wrapped in a span to work with .nextUntil() (see later)
- jthis.find('pre:has(.gt)').contents().filter(function() {
- return ((this.nodeType == 3) && (this.data.trim().length > 0));
- }).wrap('');
- });
-
- // define the behavior of the button when it's clicked
- $('.copybutton').click(function(e){
- e.preventDefault();
- var button = $(this);
- if (button.data('hidden') === 'false') {
- // hide the code output
- button.parent().find('.go, .gp, .gt').hide();
- button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden');
- button.css('text-decoration', 'line-through');
- button.attr('title', show_text);
- button.data('hidden', 'true');
- } else {
- // show the code output
- button.parent().find('.go, .gp, .gt').show();
- button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible');
- button.css('text-decoration', 'none');
- button.attr('title', hide_text);
- button.data('hidden', 'false');
- }
- });
-});
-
diff --git a/docs/cudf/source/_static/params.css b/docs/cudf/source/_static/params.css
index 475b9dfb4ec..2bdd6f5a299 100644
--- a/docs/cudf/source/_static/params.css
+++ b/docs/cudf/source/_static/params.css
@@ -8,14 +8,6 @@
content: ":";
}
-.highlight:hover span#strike_button {
- color:#767676;
-}
-
-span#strike_button {
- color :#d0ced7;
-}
-
/* Fix for text wrap in sphinx tables:
* https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html
*/
@@ -40,3 +32,24 @@ table.io-supported-types-table {
table.io-supported-types-table thead{
text-align: center !important;
}
+
+:root {
+
+ --pst-color-active-navigation: 114, 83, 237;
+ --pst-color-navbar-link: 77, 77, 77;
+ --pst-color-navbar-link-hover: var(--pst-color-active-navigation);
+ --pst-color-navbar-link-active: var(--pst-color-active-navigation);
+ --pst-color-sidebar-link: 77, 77, 77;
+ --pst-color-sidebar-link-hover: var(--pst-color-active-navigation);
+ --pst-color-sidebar-link-active: var(--pst-color-active-navigation);
+ --pst-color-sidebar-expander-background-hover: 244, 244, 244;
+ --pst-color-sidebar-caption: 77, 77, 77;
+ --pst-color-toc-link: 119, 117, 122;
+ --pst-color-toc-link-hover: var(--pst-color-active-navigation);
+ --pst-color-toc-link-active: var(--pst-color-active-navigation);
+
+}
+
+.special-table td, .special-table th {
+ border: 1px solid #dee2e6;
+}
\ No newline at end of file
diff --git a/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst b/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst
new file mode 100644
index 00000000000..f86822bc567
--- /dev/null
+++ b/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst
@@ -0,0 +1,33 @@
+{% extends "!autosummary/class.rst" %}
+
+{% block methods %}
+{% if methods %}
+
+..
+ HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
+ .. autosummary::
+ :toctree:
+ {% for item in all_methods %}
+ {%- if not item.startswith('_') or item in ['__call__'] %}
+ {{ name }}.{{ item }}
+ {%- endif -%}
+ {%- endfor %}
+
+{% endif %}
+{% endblock %}
+
+{% block attributes %}
+{% if attributes %}
+
+..
+ HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
+ .. autosummary::
+ :toctree:
+ {% for item in all_attributes %}
+ {%- if not item.startswith('_') %}
+ {{ name }}.{{ item }}
+ {%- endif -%}
+ {%- endfor %}
+
+{% endif %}
+{% endblock %}
\ No newline at end of file
diff --git a/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst b/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst
new file mode 100644
index 00000000000..b57a7ceebb0
--- /dev/null
+++ b/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst
@@ -0,0 +1,6 @@
+{{ fullname }}
+{{ underline }}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
\ No newline at end of file
diff --git a/docs/cudf/source/api.rst b/docs/cudf/source/api.rst
deleted file mode 100644
index d3042be2129..00000000000
--- a/docs/cudf/source/api.rst
+++ /dev/null
@@ -1,270 +0,0 @@
-~~~~~~~~~~~~~~~~~~~
-cuDF API Reference
-~~~~~~~~~~~~~~~~~~~
-
-.. currentmodule:: cudf.core.dataframe
-
-DataFrame
----------
-.. autoclass:: DataFrame
- :members:
- :inherited-members:
- :exclude-members: serialize, deserialize, device_deserialize, device_serialize, host_deserialize, host_serialize, to_dict, itertuples, iterrows
-
-Series
-------
-.. currentmodule:: cudf.core.series
-
-.. autoclass:: Series
- :members:
- :inherited-members:
- :exclude-members: serialize, deserialize, logical_not, logical_or, logical_and, remainder, sum_of_squares, fill, merge, iteritems, items, device_deserialize, device_serialize, host_deserialize, host_serialize, to_dict, tolist, to_list
-
-Lists
------
-.. currentmodule:: cudf.core.column.lists
-
-.. autoclass:: ListMethods
- :members:
-
-Strings
--------
-.. currentmodule:: cudf.core.column.string
-
-.. autoclass:: StringMethods
- :members:
-
-General Functions
------------------
-.. automodule:: cudf.core.reshape
- :members:
-.. autofunction:: cudf.to_datetime
-.. autofunction:: cudf.to_numeric
-
-Index
------
-.. currentmodule:: cudf.core.index
-.. autoclass:: Index
- :members:
- :inherited-members:
- :exclude-members: serialize, deserialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-RangeIndex
-----------
-.. currentmodule:: cudf.core.index
-.. autoclass:: RangeIndex
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-GenericIndex
-------------
-.. currentmodule:: cudf.core.index
-.. autoclass:: GenericIndex
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-MultiIndex
-----------
-.. currentmodule:: cudf.core.multiindex
-.. autoclass:: MultiIndex
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-Int8Index
----------
-.. currentmodule:: cudf.core.index
-.. autoclass:: Int8Index
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-Int16Index
-----------
-.. currentmodule:: cudf.core.index
-.. autoclass:: Int16Index
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-Int32Index
-----------
-.. currentmodule:: cudf.core.index
-.. autoclass:: Int32Index
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-Int64Index
-----------
-.. currentmodule:: cudf.core.index
-.. autoclass:: Int64Index
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-UInt8Index
-----------
-.. currentmodule:: cudf.core.index
-.. autoclass:: UInt8Index
- :inherited-members:
- :members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-UInt16Index
------------
-.. currentmodule:: cudf.core.index
-.. autoclass:: UInt16Index
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-UInt32Index
------------
-.. currentmodule:: cudf.core.index
-.. autoclass:: UInt32Index
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-UInt64Index
------------
-.. currentmodule:: cudf.core.index
-.. autoclass:: UInt64Index
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-Float32Index
-------------
-.. currentmodule:: cudf.core.index
-.. autoclass:: Float32Index
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-Float64Index
-------------
-.. currentmodule:: cudf.core.index
-.. autoclass:: Float64Index
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-CategoricalIndex
-----------------
-.. currentmodule:: cudf.core.index
-.. autoclass:: CategoricalIndex
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-StringIndex
------------
-.. currentmodule:: cudf.core.index
-.. autoclass:: StringIndex
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-DatetimeIndex
--------------
-.. currentmodule:: cudf.core.index
-.. autoclass:: DatetimeIndex
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-TimedeltaIndex
---------------
-.. currentmodule:: cudf.core.index
-.. autoclass:: TimedeltaIndex
- :members:
- :inherited-members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize, tolist, to_list
-
-Categories
-----------
-.. currentmodule:: cudf.core.column.categorical
-
-.. autoclass:: CategoricalAccessor
- :members:
-
-GroupBy
--------
-.. currentmodule:: cudf.core.groupby.groupby
-
-.. autoclass:: GroupBy
- :members:
- :exclude-members: deserialize, serialize, device_deserialize, device_serialize, host_deserialize, host_serialize
-
-Window
-------
-.. currentmodule:: cudf.core.window
-.. autoclass:: Rolling
- :members:
-
-SubwordTokenizer
-----------------
-.. currentmodule:: cudf.core.subword_tokenizer
-
-.. autoclass:: SubwordTokenizer
- :members:
- :special-members: __call__
-
-General utility functions
--------------------------
-.. currentmodule:: cudf.testing
-
-.. automodule:: cudf.testing.testing
- :members:
-
-
-Timedelta Properties
---------------------
-.. currentmodule:: cudf.core.series
-.. autoclass:: TimedeltaProperties
- :members:
-
-Datetime Properties
--------------------
-.. currentmodule:: cudf.core.series
-.. autoclass:: DatetimeProperties
- :members:
-
-IO
---
-.. currentmodule:: cudf.io
-
-.. automodule:: cudf.io.csv
- :members:
-.. automodule:: cudf.io.parquet
- :members:
-.. automodule:: cudf.io.orc
- :members:
-.. automodule:: cudf.io.json
- :members:
-.. automodule:: cudf.io.avro
- :members:
-.. automodule:: cudf.io.dlpack
- :members:
-.. automodule:: cudf.io.feather
- :members:
-.. automodule:: cudf.io.hdf
- :members:
-
-Extending cuDF
-----------------
-.. currentmodule:: cudf.api.extensions
-
-.. automodule:: cudf.api.extensions.accessor
- :members:
-
-GpuArrowReader
---------------
-.. currentmodule:: cudf.comm.gpuarrow
-.. autoclass:: GpuArrowReader
- :members:
- :exclude-members: count, index
diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
new file mode 100644
index 00000000000..12ff1f13bc4
--- /dev/null
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -0,0 +1,254 @@
+=========
+DataFrame
+=========
+.. currentmodule:: cudf
+
+Constructor
+~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+ :template: autosummary/class_with_autosummary.rst
+
+ DataFrame
+
+Attributes and underlying data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+**Axes**
+
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.index
+ DataFrame.columns
+
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.dtypes
+ DataFrame.info
+ DataFrame.select_dtypes
+ DataFrame.values
+ DataFrame.ndim
+ DataFrame.size
+ DataFrame.shape
+ DataFrame.memory_usage
+ DataFrame.empty
+
+Conversion
+~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.astype
+ DataFrame.copy
+
+Indexing, iteration
+~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.head
+ DataFrame.at
+ DataFrame.iat
+ DataFrame.loc
+ DataFrame.iloc
+ DataFrame.insert
+ DataFrame.__iter__
+ DataFrame.iteritems
+ DataFrame.keys
+ DataFrame.iterrows
+ DataFrame.itertuples
+ DataFrame.pop
+ DataFrame.tail
+ DataFrame.isin
+ DataFrame.where
+ DataFrame.mask
+ DataFrame.query
+
+For more information on ``.at``, ``.iat``, ``.loc``, and
+``.iloc``, see the :ref:`indexing documentation `.
+
+Binary operator functions
+~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.add
+ DataFrame.sub
+ DataFrame.mul
+ DataFrame.div
+ DataFrame.truediv
+ DataFrame.floordiv
+ DataFrame.mod
+ DataFrame.pow
+ DataFrame.radd
+ DataFrame.rsub
+ DataFrame.rmul
+ DataFrame.rdiv
+ DataFrame.rtruediv
+ DataFrame.rfloordiv
+ DataFrame.rmod
+ DataFrame.rpow
+
+Function application, GroupBy & window
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.apply
+ DataFrame.apply_chunks
+ DataFrame.apply_rows
+ DataFrame.pipe
+ DataFrame.agg
+ DataFrame.groupby
+ DataFrame.rolling
+
+.. _api.dataframe.stats:
+
+Computations / descriptive stats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.all
+ DataFrame.any
+ DataFrame.clip
+ DataFrame.corr
+ DataFrame.count
+ DataFrame.cov
+ DataFrame.cummax
+ DataFrame.cummin
+ DataFrame.cumprod
+ DataFrame.cumsum
+ DataFrame.describe
+ DataFrame.kurt
+ DataFrame.kurtosis
+ DataFrame.max
+ DataFrame.mean
+ DataFrame.min
+ DataFrame.mode
+ DataFrame.prod
+ DataFrame.product
+ DataFrame.quantile
+ DataFrame.quantiles
+ DataFrame.rank
+ DataFrame.round
+ DataFrame.skew
+ DataFrame.sum
+ DataFrame.std
+ DataFrame.var
+
+Reindexing / selection / label manipulation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.drop
+ DataFrame.drop_duplicates
+ DataFrame.equals
+ DataFrame.head
+ DataFrame.reindex
+ DataFrame.rename
+ DataFrame.reset_index
+ DataFrame.sample
+ DataFrame.searchsorted
+ DataFrame.set_index
+ DataFrame.repeat
+ DataFrame.tail
+ DataFrame.take
+ DataFrame.tile
+
+.. _api.dataframe.missing:
+
+Missing data handling
+~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.dropna
+ DataFrame.fillna
+ DataFrame.isna
+ DataFrame.isnull
+ DataFrame.nans_to_nulls
+ DataFrame.notna
+ DataFrame.notnull
+ DataFrame.replace
+
+Reshaping, sorting, transposing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.argsort
+ DataFrame.interleave_columns
+ DataFrame.partition_by_hash
+ DataFrame.pivot
+ DataFrame.scatter_by_map
+ DataFrame.sort_values
+ DataFrame.sort_index
+ DataFrame.nlargest
+ DataFrame.nsmallest
+ DataFrame.stack
+ DataFrame.unstack
+ DataFrame.melt
+ DataFrame.explode
+ DataFrame.T
+ DataFrame.transpose
+
+Combining / comparing / joining / merging / encoding
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.append
+ DataFrame.assign
+ DataFrame.join
+ DataFrame.merge
+ DataFrame.update
+ DataFrame.label_encoding
+ DataFrame.one_hot_encoding
+
+Numerical operations
+~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.acos
+ DataFrame.asin
+ DataFrame.atan
+ DataFrame.cos
+ DataFrame.exp
+ DataFrame.log
+ DataFrame.sin
+ DataFrame.sqrt
+ DataFrame.tan
+
+Time Series-related
+~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.shift
+
+Serialization / IO / conversion
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DataFrame.as_gpu_matrix
+ DataFrame.as_matrix
+ DataFrame.from_arrow
+ DataFrame.from_pandas
+ DataFrame.from_records
+ DataFrame.hash_columns
+ DataFrame.to_arrow
+ DataFrame.to_dlpack
+ DataFrame.to_parquet
+ DataFrame.to_csv
+ DataFrame.to_hdf
+ DataFrame.to_dict
+ DataFrame.to_json
+ DataFrame.to_pandas
+ DataFrame.to_feather
+ DataFrame.to_records
+ DataFrame.to_string
diff --git a/docs/cudf/source/api_docs/general_functions.rst b/docs/cudf/source/api_docs/general_functions.rst
new file mode 100644
index 00000000000..226ae8acd32
--- /dev/null
+++ b/docs/cudf/source/api_docs/general_functions.rst
@@ -0,0 +1,32 @@
+=================
+General Functions
+=================
+.. currentmodule:: cudf
+
+Data manipulations
+------------------
+
+.. autosummary::
+ :toctree: api/
+
+ cudf.concat
+ cudf.melt
+ cudf.get_dummies
+ cudf.merge_sorted
+ cudf.pivot
+ cudf.unstack
+
+Top-level conversions
+---------------------
+.. autosummary::
+ :toctree: api/
+
+ cudf.to_numeric
+
+Top-level dealing with datetimelike
+-----------------------------------
+
+.. autosummary::
+ :toctree: api/
+
+ cudf.to_datetime
diff --git a/docs/cudf/source/api_docs/general_utilities.rst b/docs/cudf/source/api_docs/general_utilities.rst
new file mode 100644
index 00000000000..d9c53c3fbbd
--- /dev/null
+++ b/docs/cudf/source/api_docs/general_utilities.rst
@@ -0,0 +1,13 @@
+=================
+General Utilities
+=================
+
+Testing functions
+-----------------
+.. autosummary::
+ :toctree: api/
+
+ cudf.testing.testing.assert_column_equal
+ cudf.testing.testing.assert_frame_equal
+ cudf.testing.testing.assert_index_equal
+ cudf.testing.testing.assert_series_equal
diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst
new file mode 100644
index 00000000000..27a314fa425
--- /dev/null
+++ b/docs/cudf/source/api_docs/groupby.rst
@@ -0,0 +1,96 @@
+.. _api.groupby:
+
+=======
+GroupBy
+=======
+.. currentmodule:: cudf.core.groupby
+
+GroupBy objects are returned by groupby calls: :func:`cudf.DataFrame.groupby`, :func:`cudf.Series.groupby`, etc.
+
+Indexing, iteration
+-------------------
+.. autosummary::
+ :toctree: api/
+
+ GroupBy.__iter__
+ GroupBy.groups
+
+.. currentmodule:: cudf
+
+.. autosummary::
+ :toctree: api/
+
+ Grouper
+
+.. currentmodule:: cudf.core.groupby.groupby
+
+Function application
+--------------------
+.. autosummary::
+ :toctree: api/
+
+ GroupBy.apply
+ GroupBy.agg
+ SeriesGroupBy.aggregate
+ DataFrameGroupBy.aggregate
+ GroupBy.pipe
+
+Computations / descriptive stats
+--------------------------------
+.. autosummary::
+ :toctree: api/
+
+ GroupBy.bfill
+ GroupBy.backfill
+ GroupBy.count
+ GroupBy.cumcount
+ GroupBy.cummax
+ GroupBy.cummin
+ GroupBy.cumsum
+ GroupBy.ffill
+ GroupBy.max
+ GroupBy.mean
+ GroupBy.median
+ GroupBy.min
+ GroupBy.nth
+ GroupBy.pad
+ GroupBy.prod
+ GroupBy.size
+ GroupBy.std
+ GroupBy.sum
+ GroupBy.var
+
+The following methods are available in both ``SeriesGroupBy`` and
+``DataFrameGroupBy`` objects, but may differ slightly, usually in that
+the ``DataFrameGroupBy`` version usually permits the specification of an
+axis argument, and often an argument indicating whether to restrict
+application to columns of a specific data type.
+
+.. autosummary::
+ :toctree: api/
+
+ DataFrameGroupBy.backfill
+ DataFrameGroupBy.bfill
+ DataFrameGroupBy.count
+ DataFrameGroupBy.cumcount
+ DataFrameGroupBy.cummax
+ DataFrameGroupBy.cummin
+ DataFrameGroupBy.cumsum
+ DataFrameGroupBy.describe
+ DataFrameGroupBy.ffill
+ DataFrameGroupBy.fillna
+ DataFrameGroupBy.idxmax
+ DataFrameGroupBy.idxmin
+ DataFrameGroupBy.nunique
+ DataFrameGroupBy.pad
+ DataFrameGroupBy.quantile
+ DataFrameGroupBy.shift
+ DataFrameGroupBy.size
+
+The following methods are available only for ``SeriesGroupBy`` objects.
+
+.. autosummary::
+ :toctree: api/
+
+ SeriesGroupBy.nunique
+ SeriesGroupBy.unique
diff --git a/docs/cudf/source/api_docs/index.rst b/docs/cudf/source/api_docs/index.rst
new file mode 100644
index 00000000000..70b9563fc1d
--- /dev/null
+++ b/docs/cudf/source/api_docs/index.rst
@@ -0,0 +1,19 @@
+=============
+API reference
+=============
+
+This page provides a list of all publicly accessible modules, methods and classes through
+``cudf.*`` namespace.
+
+.. toctree::
+ :maxdepth: 2
+ :caption: API Documentation
+
+ series
+ dataframe
+ index_objects
+ groupby
+ general_functions
+ general_utilities
+ window
+
diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
new file mode 100644
index 00000000000..c23c9a3f6c1
--- /dev/null
+++ b/docs/cudf/source/api_docs/index_objects.rst
@@ -0,0 +1,296 @@
+=============
+Index objects
+=============
+
+Index
+-----
+.. currentmodule:: cudf
+
+**Many of these methods or variants thereof are available on the objects
+that contain an index (Series/DataFrame) and those should most likely be
+used before calling these methods directly.**
+
+.. autosummary::
+ :toctree: api/
+ :template: autosummary/class_with_autosummary.rst
+
+ Index
+
+Properties
+~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Index.empty
+ Index.gpu_values
+ Index.is_monotonic
+ Index.is_monotonic_increasing
+ Index.is_monotonic_decreasing
+ Index.is_unique
+ Index.name
+ Index.names
+ Index.ndim
+ Index.nlevels
+ Index.shape
+ Index.size
+ Index.values
+
+
+Modifying and computations
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Index.any
+ Index.copy
+ Index.drop_duplicates
+ Index.equals
+ Index.factorize
+ Index.min
+ Index.max
+ Index.rename
+ Index.repeat
+ Index.where
+ Index.take
+ Index.unique
+
+Compatibility with MultiIndex
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Index.set_names
+
+Missing values
+~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Index.fillna
+ Index.dropna
+ Index.isna
+ Index.notna
+
+Memory usage
+~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Index.memory_usage
+
+Conversion
+~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Index.astype
+ Index.to_list
+ Index.to_series
+ Index.to_frame
+
+Sorting
+~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Index.argsort
+ Index.searchsorted
+ Index.sort_values
+
+Time-specific operations
+~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Index.shift
+
+Combining / joining / set operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Index.append
+ Index.join
+ Index.difference
+
+Selecting
+~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Index.get_level_values
+ Index.get_loc
+ Index.get_slice_bound
+ Index.isin
+
+.. _api.numericindex:
+
+Numeric Index
+-------------
+.. autosummary::
+ :toctree: api/
+ :template: autosummary/class_without_autosummary.rst
+
+ RangeIndex
+ Int64Index
+ UInt64Index
+ Float64Index
+
+
+.. _api.categoricalindex:
+
+CategoricalIndex
+----------------
+.. autosummary::
+ :toctree: api/
+ :template: autosummary/class_without_autosummary.rst
+
+ CategoricalIndex
+
+Categorical components
+~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ CategoricalIndex.codes
+ CategoricalIndex.categories
+
+Modifying and computations
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ CategoricalIndex.equals
+
+.. _api.intervalindex:
+
+IntervalIndex
+-------------
+.. autosummary::
+ :toctree: api/
+
+ IntervalIndex
+
+IntervalIndex components
+~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ IntervalIndex.from_breaks
+ IntervalIndex.values
+ IntervalIndex.get_loc
+
+.. _api.multiindex:
+
+MultiIndex
+----------
+.. autosummary::
+ :toctree: api/
+ :template: autosummary/class_without_autosummary.rst
+
+ MultiIndex
+
+
+MultiIndex constructors
+~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ MultiIndex.from_tuples
+ MultiIndex.from_product
+ MultiIndex.from_frame
+ MultiIndex.from_arrow
+
+MultiIndex properties
+~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ MultiIndex.names
+ MultiIndex.levels
+ MultiIndex.codes
+ MultiIndex.nlevels
+
+MultiIndex components
+~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ MultiIndex.to_frame
+ MultiIndex.droplevel
+
+MultiIndex selecting
+~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ MultiIndex.get_loc
+ MultiIndex.get_level_values
+
+.. _api.datetimeindex:
+
+DatetimeIndex
+-------------
+.. autosummary::
+ :toctree: api/
+ :template: autosummary/class_without_autosummary.rst
+
+ DatetimeIndex
+
+Time/date components
+~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DatetimeIndex.year
+ DatetimeIndex.month
+ DatetimeIndex.day
+ DatetimeIndex.hour
+ DatetimeIndex.minute
+ DatetimeIndex.second
+ DatetimeIndex.dayofweek
+ DatetimeIndex.weekday
+
+Time-specific operations
+~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DatetimeIndex.round
+
+Conversion
+~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ DatetimeIndex.to_series
+ DatetimeIndex.to_frame
+
+TimedeltaIndex
+--------------
+.. autosummary::
+ :toctree: api/
+ :template: autosummary/class_without_autosummary.rst
+
+ TimedeltaIndex
+
+Components
+~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ TimedeltaIndex.days
+ TimedeltaIndex.seconds
+ TimedeltaIndex.microseconds
+ TimedeltaIndex.nanoseconds
+ TimedeltaIndex.components
+ TimedeltaIndex.inferred_freq
+
+Conversion
+~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ TimedeltaIndex.to_series
+ TimedeltaIndex.round
+ TimedeltaIndex.to_frame
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
new file mode 100644
index 00000000000..ffa809268f3
--- /dev/null
+++ b/docs/cudf/source/api_docs/series.rst
@@ -0,0 +1,478 @@
+======
+Series
+======
+.. currentmodule:: cudf
+
+Constructor
+-----------
+.. autosummary::
+ :toctree: api/
+ :template: autosummary/class_with_autosummary.rst
+
+ Series
+
+Attributes
+----------
+**Axes**
+
+.. autosummary::
+ :toctree: api/
+
+ Series.index
+ Series.values
+ Series.data
+ Series.dtype
+ Series.shape
+ Series.ndim
+ Series.nullable
+ Series.nullmask
+ Series.null_count
+ Series.size
+ Series.memory_usage
+ Series.has_nulls
+ Series.empty
+ Series.name
+ Series.valid_count
+ Series.values_host
+
+Conversion
+----------
+.. autosummary::
+ :toctree: api/
+
+ Series.astype
+ Series.copy
+ Series.to_list
+ Series.__array__
+ Series.as_index
+ Series.as_mask
+ Series.scale
+
+
+Indexing, iteration
+-------------------
+.. autosummary::
+ :toctree: api/
+
+ Series.loc
+ Series.iloc
+ Series.__iter__
+ Series.items
+ Series.iteritems
+ Series.keys
+
+For more information on ``.at``, ``.iat``, ``.loc``, and
+``.iloc``, see the :ref:`indexing documentation `.
+
+Binary operator functions
+-------------------------
+.. autosummary::
+ :toctree: api/
+
+ Series.add
+ Series.sub
+ Series.subtract
+ Series.mul
+ Series.multiply
+ Series.truediv
+ Series.floordiv
+ Series.mod
+ Series.pow
+ Series.radd
+ Series.rsub
+ Series.rmul
+ Series.rtruediv
+ Series.rfloordiv
+ Series.rmod
+ Series.rpow
+ Series.round
+ Series.lt
+ Series.gt
+ Series.le
+ Series.ge
+ Series.ne
+ Series.eq
+ Series.product
+
+Function application, GroupBy & window
+--------------------------------------
+.. autosummary::
+ :toctree: api/
+
+ Series.applymap
+ Series.map
+ Series.groupby
+ Series.rolling
+ Series.pipe
+
+.. _api.series.stats:
+
+Computations / descriptive stats
+--------------------------------
+.. autosummary::
+ :toctree: api/
+
+ Series.abs
+ Series.all
+ Series.any
+ Series.ceil
+ Series.clip
+ Series.corr
+ Series.count
+ Series.cov
+ Series.cummax
+ Series.cummin
+ Series.cumprod
+ Series.cumsum
+ Series.describe
+ Series.diff
+ Series.digitize
+ Series.factorize
+ Series.floor
+ Series.kurt
+ Series.max
+ Series.mean
+ Series.median
+ Series.min
+ Series.mode
+ Series.nlargest
+ Series.nsmallest
+ Series.prod
+ Series.quantile
+ Series.rank
+ Series.skew
+ Series.std
+ Series.sum
+ Series.var
+ Series.kurtosis
+ Series.unique
+ Series.nunique
+ Series.is_unique
+ Series.is_monotonic
+ Series.is_monotonic_increasing
+ Series.is_monotonic_decreasing
+ Series.value_counts
+
+Reindexing / selection / label manipulation
+-------------------------------------------
+.. autosummary::
+ :toctree: api/
+
+ Series.drop
+ Series.drop_duplicates
+ Series.equals
+ Series.head
+ Series.isin
+ Series.reindex
+ Series.rename
+ Series.reset_index
+ Series.reverse
+ Series.sample
+ Series.set_index
+ Series.set_mask
+ Series.take
+ Series.tail
+ Series.tile
+ Series.where
+ Series.mask
+
+Missing data handling
+---------------------
+.. autosummary::
+ :toctree: api/
+
+ Series.dropna
+ Series.fillna
+ Series.isna
+ Series.isnull
+ Series.nans_to_nulls
+ Series.notna
+ Series.notnull
+ Series.replace
+
+Reshaping, sorting
+------------------
+.. autosummary::
+ :toctree: api/
+
+ Series.argsort
+ Series.interleave_columns
+ Series.sort_values
+ Series.sort_index
+ Series.explode
+ Series.scatter_by_map
+ Series.searchsorted
+ Series.repeat
+
+Combining / comparing / joining / merging / encoding
+----------------------------------------------------
+.. autosummary::
+ :toctree: api/
+
+ Series.append
+ Series.update
+ Series.label_encoding
+ Series.one_hot_encoding
+
+Numerical operations
+~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+ :toctree: api/
+
+ Series.acos
+ Series.asin
+ Series.atan
+ Series.cos
+ Series.exp
+ Series.log
+ Series.sin
+ Series.sqrt
+ Series.tan
+
+Time Series-related
+-------------------
+.. autosummary::
+ :toctree: api/
+
+ Series.shift
+
+Accessors
+---------
+
+pandas provides dtype-specific methods under various accessors.
+These are separate namespaces within :class:`Series` that only apply
+to specific data types.
+
+=========================== =================================
+Data Type Accessor
+=========================== =================================
+Datetime, Timedelta :ref:`dt `
+String :ref:`str `
+Categorical :ref:`cat `
+List :ref:`list `
+=========================== =================================
+
+.. _api.series.dt:
+
+Datetimelike properties
+~~~~~~~~~~~~~~~~~~~~~~~
+
+``Series.dt`` can be used to access the values of the series as
+datetimelike and return several properties.
+These can be accessed like ``Series.dt.``.
+
+Datetime properties
+^^^^^^^^^^^^^^^^^^^
+.. currentmodule:: cudf.core.series.DatetimeProperties
+
+.. autosummary::
+ :toctree: api/
+
+ day
+ dayofweek
+ hour
+ minute
+ month
+ second
+ weekday
+ year
+
+Datetime methods
+^^^^^^^^^^^^^^^^
+
+.. autosummary::
+ :toctree: api/
+
+ strftime
+
+
+Timedelta properties
+^^^^^^^^^^^^^^^^^^^^
+
+.. currentmodule:: cudf.core.series.TimedeltaProperties
+.. autosummary::
+ :toctree: api/
+
+ components
+ days
+ microseconds
+ nanoseconds
+ seconds
+
+
+.. _api.series.str:
+
+String handling
+~~~~~~~~~~~~~~~
+
+``Series.str`` can be used to access the values of the series as
+strings and apply several methods to it. These can be accessed like
+``Series.str.``.
+
+.. currentmodule:: cudf.core.column.string.StringMethods
+.. autosummary::
+ :toctree: api/
+
+ byte_count
+ capitalize
+ cat
+ center
+ character_ngrams
+ character_tokenize
+ code_points
+ contains
+ count
+ detokenize
+ edit_distance
+ endswith
+ extract
+ filter_alphanum
+ filter_characters
+ filter_tokens
+ find
+ findall
+ get
+ get_json_object
+ htoi
+ index
+ insert
+ ip2int
+ is_consonant
+ is_vowel
+ isalnum
+ isalpha
+ isdecimal
+ isdigit
+ isempty
+ isfloat
+ ishex
+ isinteger
+ isipv4
+ isspace
+ islower
+ isnumeric
+ isupper
+ istimestamp
+ join
+ len
+ ljust
+ lower
+ lstrip
+ match
+ ngrams
+ ngrams_tokenize
+ normalize_characters
+ pad
+ partition
+ porter_stemmer_measure
+ replace
+ replace_tokens
+ replace_with_backrefs
+ rfind
+ rindex
+ rjust
+ rpartition
+ rstrip
+ slice
+ slice_from
+ slice_replace
+ split
+ rsplit
+ startswith
+ strip
+ subword_tokenize
+ swapcase
+ title
+ token_count
+ tokenize
+ translate
+ upper
+ url_decode
+ url_encode
+ wrap
+ zfill
+
+
+
+..
+ The following is needed to ensure the generated pages are created with the
+ correct template (otherwise they would be created in the Series/Index class page)
+
+..
+ .. currentmodule:: cudf
+ .. autosummary::
+ :toctree: api/
+ :template: autosummary/accessor.rst
+
+ Series.str
+ Series.cat
+ Series.dt
+ Index.str
+
+.. _api.series.cat:
+
+Categorical accessor
+~~~~~~~~~~~~~~~~~~~~
+
+Categorical-dtype specific methods and attributes are available under
+the ``Series.cat`` accessor.
+
+.. currentmodule:: cudf.core.column.categorical.CategoricalAccessor
+.. autosummary::
+ :toctree: api/
+
+ categories
+ ordered
+ codes
+ reorder_categories
+ add_categories
+ remove_categories
+ set_categories
+ as_ordered
+ as_unordered
+
+
+.. _api.series.list:
+
+List handling
+~~~~~~~~~~~~~
+
+``Series.list`` can be used to access the values of the series as
+lists and apply list methods to it. These can be accessed like
+``Series.list.``.
+
+.. currentmodule:: cudf.core.column.lists.ListMethods
+.. autosummary::
+ :toctree: api/
+
+ concat
+ contains
+ get
+ len
+ sort_values
+ take
+ unique
+
+
+Serialization / IO / conversion
+-------------------------------
+.. currentmodule:: cudf
+.. autosummary::
+ :toctree: api/
+
+ Series.to_array
+ Series.to_arrow
+ Series.to_dlpack
+ Series.to_frame
+ Series.to_gpu_array
+ Series.to_hdf
+ Series.to_json
+ Series.to_pandas
+ Series.to_string
+ Series.from_arrow
+ Series.from_categorical
+ Series.from_masked_array
+ Series.from_pandas
+ Series.hash_encode
+ Series.hash_values
+
\ No newline at end of file
diff --git a/docs/cudf/source/api_docs/window.rst b/docs/cudf/source/api_docs/window.rst
new file mode 100644
index 00000000000..9f94f620949
--- /dev/null
+++ b/docs/cudf/source/api_docs/window.rst
@@ -0,0 +1,24 @@
+.. _api.window:
+
+======
+Window
+======
+
+Rolling objects are returned by ``.rolling`` calls: :func:`cudf.DataFrame.rolling`, :func:`cudf.Series.rolling`, etc.
+
+.. _api.functions_rolling:
+
+Rolling window functions
+------------------------
+.. currentmodule:: cudf.core.window.rolling
+
+.. autosummary::
+ :toctree: api/
+
+ Rolling.count
+ Rolling.sum
+ Rolling.mean
+ Rolling.min
+ Rolling.max
+ Rolling.apply
+
diff --git a/docs/cudf/source/basics.rst b/docs/cudf/source/basics.rst
deleted file mode 100644
index 15b4b43662b..00000000000
--- a/docs/cudf/source/basics.rst
+++ /dev/null
@@ -1,54 +0,0 @@
-Basics
-======
-
-
-Supported Dtypes
-----------------
-
-cuDF uses dtypes for Series or individual columns of a DataFrame. cuDF uses NumPy dtypes, NumPy provides support for ``float``, ``int``, ``bool``,
-``'timedelta64[s]'``, ``'timedelta64[ms]'``, ``'timedelta64[us]'``, ``'timedelta64[ns]'``, ``'datetime64[s]'``, ``'datetime64[ms]'``,
-``'datetime64[us]'``, ``'datetime64[ns]'`` (note that NumPy does not support timezone-aware datetimes).
-
-
-The following table lists all of cudf types. For methods requiring dtype arguments, strings can be specified as indicated. See the respective documentation sections for more on each type.
-
-
-+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-| Kind of Data | Data Type | Scalar | String Aliases |
-+========================+==================+=====================================================================================+=============================================+
-| Integer | | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_, | ``'int8'``, ``'int16'``, ``'int32'``, |
-| | | np.uint32_, np.uint64_ | ``'int64'``, ``'uint8'``, ``'uint16'``, |
-| | | | ``'uint32'``, ``'uint64'`` |
-+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-| Float | | np.float32_, np.float64_ | ``'float32'``, ``'float64'`` |
-+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-| Strings | | `str `_ | ``'string'``, ``'object'`` |
-+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-| Datetime | | np.datetime64_ | ``'datetime64[s]'``, ``'datetime64[ms]'``, |
-| | | | ``'datetime64[us]'``, ``'datetime64[ns]'`` |
-+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-| Timedelta | | np.timedelta64_ | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,|
-| (duration type) | | | ``'timedelta64[us]'``, ``'timedelta64[ns]'``|
-+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-| Categorical | CategoricalDtype | (none) | ``'category'`` |
-+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-| Boolean | | np.bool_ | ``'bool'`` |
-+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-| Decimal | Decimal64Dtype | (none) | (none) |
-+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-
-**Note: All dtypes above are Nullable**
-
-.. _np.int8:
-.. _np.int16:
-.. _np.int32:
-.. _np.int64:
-.. _np.uint8:
-.. _np.uint16:
-.. _np.uint32:
-.. _np.uint64:
-.. _np.float32:
-.. _np.float64:
-.. _np.bool: https://numpy.org/doc/stable/user/basics.types.html
-.. _np.datetime64: https://numpy.org/doc/stable/reference/arrays.datetime.html#basic-datetimes
-.. _np.timedelta64: https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-and-timedelta-arithmetic
diff --git a/docs/cudf/source/PandasCompat.rst b/docs/cudf/source/basics/PandasCompat.rst
similarity index 100%
rename from docs/cudf/source/PandasCompat.rst
rename to docs/cudf/source/basics/PandasCompat.rst
diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst
new file mode 100644
index 00000000000..ee63f67daa2
--- /dev/null
+++ b/docs/cudf/source/basics/basics.rst
@@ -0,0 +1,56 @@
+Basics
+======
+
+
+Supported Dtypes
+----------------
+
+cuDF uses dtypes for Series or individual columns of a DataFrame. cuDF uses NumPy dtypes, NumPy provides support for ``float``, ``int``, ``bool``,
+``'timedelta64[s]'``, ``'timedelta64[ms]'``, ``'timedelta64[us]'``, ``'timedelta64[ns]'``, ``'datetime64[s]'``, ``'datetime64[ms]'``,
+``'datetime64[us]'``, ``'datetime64[ns]'`` (note that NumPy does not support timezone-aware datetimes).
+
+
+The following table lists all of cudf types. For methods requiring dtype arguments, strings can be specified as indicated. See the respective documentation sections for more on each type.
+
+.. rst-class:: special-table
+.. table::
+
+ +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+ | Kind of Data | Data Type | Scalar | String Aliases |
+ +========================+==================+=====================================================================================+=============================================+
+ | Integer | | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_, | ``'int8'``, ``'int16'``, ``'int32'``, |
+ | | | np.uint32_, np.uint64_ | ``'int64'``, ``'uint8'``, ``'uint16'``, |
+ | | | | ``'uint32'``, ``'uint64'`` |
+ +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+ | Float | | np.float32_, np.float64_ | ``'float32'``, ``'float64'`` |
+ +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+ | Strings | | `str `_ | ``'string'``, ``'object'`` |
+ +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+ | Datetime | | np.datetime64_ | ``'datetime64[s]'``, ``'datetime64[ms]'``, |
+ | | | | ``'datetime64[us]'``, ``'datetime64[ns]'`` |
+ +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+ | Timedelta | | np.timedelta64_ | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,|
+ | (duration type) | | | ``'timedelta64[us]'``, ``'timedelta64[ns]'``|
+ +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+ | Categorical | CategoricalDtype | (none) | ``'category'`` |
+ +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+ | Boolean | | np.bool_ | ``'bool'`` |
+ +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+ | Decimal | Decimal64Dtype | (none) | (none) |
+ +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+
+**Note: All dtypes above are Nullable**
+
+.. _np.int8:
+.. _np.int16:
+.. _np.int32:
+.. _np.int64:
+.. _np.uint8:
+.. _np.uint16:
+.. _np.uint32:
+.. _np.uint64:
+.. _np.float32:
+.. _np.float64:
+.. _np.bool: https://numpy.org/doc/stable/user/basics.types.html
+.. _np.datetime64: https://numpy.org/doc/stable/reference/arrays.datetime.html#basic-datetimes
+.. _np.timedelta64: https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-and-timedelta-arithmetic
diff --git a/docs/cudf/source/dask-cudf.rst b/docs/cudf/source/basics/dask-cudf.rst
similarity index 100%
rename from docs/cudf/source/dask-cudf.rst
rename to docs/cudf/source/basics/dask-cudf.rst
diff --git a/docs/cudf/source/groupby.rst b/docs/cudf/source/basics/groupby.rst
similarity index 51%
rename from docs/cudf/source/groupby.rst
rename to docs/cudf/source/basics/groupby.rst
index a6ce9db6817..04c4d42fa2a 100644
--- a/docs/cudf/source/groupby.rst
+++ b/docs/cudf/source/basics/groupby.rst
@@ -131,41 +131,44 @@ Aggregations on groups is supported via the ``agg`` method:
The following table summarizes the available aggregations and the types
that support them:
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| Aggregations / dtypes | Numeric | Datetime | String | Categorical | List | Struct | Interval | Decimal |
-+====================================+===========+============+==========+===============+========+==========+============+===========+
-| count | ✅ | ✅ | ✅ | ✅ | | | | ✅ |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| size | ✅ | ✅ | ✅ | ✅ | | | | ✅ |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| sum | ✅ | ✅ | | | | | | ✅ |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| idxmin | ✅ | ✅ | | | | | | ✅ |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| idxmax | ✅ | ✅ | | | | | | ✅ |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| min | ✅ | ✅ | ✅ | | | | | ✅ |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| max | ✅ | ✅ | ✅ | | | | | ✅ |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| mean | ✅ | ✅ | | | | | | |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| var | ✅ | ✅ | | | | | | |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| std | ✅ | ✅ | | | | | | |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| quantile | ✅ | ✅ | | | | | | |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| median | ✅ | ✅ | | | | | | |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| nunique | ✅ | ✅ | ✅ | ✅ | | | | ✅ |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| nth | ✅ | ✅ | ✅ | | | | | ✅ |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| collect | ✅ | ✅ | ✅ | | ✅ | | | ✅ |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
-| unique | ✅ | ✅ | ✅ | ✅ | | | | |
-+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+.. rst-class:: special-table
+.. table::
+
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | Aggregations / dtypes | Numeric | Datetime | String | Categorical | List | Struct | Interval | Decimal |
+ +====================================+===========+============+==========+===============+========+==========+============+===========+
+ | count | ✅ | ✅ | ✅ | ✅ | | | | ✅ |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | size | ✅ | ✅ | ✅ | ✅ | | | | ✅ |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | sum | ✅ | ✅ | | | | | | ✅ |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | idxmin | ✅ | ✅ | | | | | | ✅ |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | idxmax | ✅ | ✅ | | | | | | ✅ |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | min | ✅ | ✅ | ✅ | | | | | ✅ |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | max | ✅ | ✅ | ✅ | | | | | ✅ |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | mean | ✅ | ✅ | | | | | | |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | var | ✅ | ✅ | | | | | | |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | std | ✅ | ✅ | | | | | | |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | quantile | ✅ | ✅ | | | | | | |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | median | ✅ | ✅ | | | | | | |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | nunique | ✅ | ✅ | ✅ | ✅ | | | | ✅ |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | nth | ✅ | ✅ | ✅ | | | | | ✅ |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | collect | ✅ | ✅ | ✅ | | ✅ | | | ✅ |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+ | unique | ✅ | ✅ | ✅ | ✅ | | | | |
+ +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
GroupBy apply
-------------
diff --git a/docs/cudf/source/basics/index.rst b/docs/cudf/source/basics/index.rst
new file mode 100644
index 00000000000..a29866d7e32
--- /dev/null
+++ b/docs/cudf/source/basics/index.rst
@@ -0,0 +1,15 @@
+======
+Basics
+======
+
+
+.. toctree::
+ :maxdepth: 2
+
+ basics
+ io.rst
+ groupby.rst
+ PandasCompat.rst
+ dask-cudf.rst
+ internals.rst
+
\ No newline at end of file
diff --git a/docs/cudf/source/internals.rst b/docs/cudf/source/basics/internals.rst
similarity index 100%
rename from docs/cudf/source/internals.rst
rename to docs/cudf/source/basics/internals.rst
diff --git a/docs/cudf/source/io-gds-integration.rst b/docs/cudf/source/basics/io-gds-integration.rst
similarity index 100%
rename from docs/cudf/source/io-gds-integration.rst
rename to docs/cudf/source/basics/io-gds-integration.rst
diff --git a/docs/cudf/source/io-supported-types.rst b/docs/cudf/source/basics/io-supported-types.rst
similarity index 99%
rename from docs/cudf/source/io-supported-types.rst
rename to docs/cudf/source/basics/io-supported-types.rst
index 739c1634ca7..78c1bfb6554 100644
--- a/docs/cudf/source/io-supported-types.rst
+++ b/docs/cudf/source/basics/io-supported-types.rst
@@ -3,7 +3,7 @@ I/O Supported dtypes
The following table lists are compatible cudf types for each supported IO format.
-.. rst-class:: io-supported-types-table
+.. rst-class:: io-supported-types-table special-table
.. table::
:widths: 15 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
diff --git a/docs/cudf/source/io.rst b/docs/cudf/source/basics/io.rst
similarity index 100%
rename from docs/cudf/source/io.rst
rename to docs/cudf/source/basics/io.rst
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index c764b64da60..c5f1233d022 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -24,7 +24,10 @@
from docutils.nodes import Text
from sphinx.addnodes import pending_xref
+import cudf
+sys.path.insert(0, os.path.abspath(cudf.__path__[0]))
+sys.path.insert(0, os.path.abspath("."))
sys.path.insert(0, os.path.abspath("../.."))
sys.path.append(os.path.abspath("./_ext"))
@@ -43,7 +46,6 @@
"sphinx.ext.autosummary",
"sphinx_copybutton",
"numpydoc",
- "sphinx_markdown_tables",
"IPython.sphinxext.ipython_console_highlighting",
"IPython.sphinxext.ipython_directive",
"nbsphinx",
@@ -51,9 +53,11 @@
]
copybutton_prompt_text = ">>> "
-
+autosummary_generate = True
ipython_mplbackend = "str"
+html_use_modindex = True
+
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
@@ -61,7 +65,7 @@
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
-source_suffix = {".rst": "restructuredtext", ".md": "markdown"}
+source_suffix = {".rst": "restructuredtext"}
# The master toctree document.
master_doc = "index"
@@ -90,21 +94,30 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = []
+exclude_patterns = ['venv', "**/includes/**",]
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+html_theme_options = {
+ "external_links": [],
+ "github_url": "https://github.com/rapidsai/cudf",
+ "twitter_url": "https://twitter.com/rapidsai",
+ "show_toc_level": 1,
+ "navbar_align": "right",
+}
include_pandas_compat = True
-# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
-html_theme = "sphinx_rtd_theme"
-
+html_theme = "pydata_sphinx_theme"
+html_logo = "_static/RAPIDS-logo-purple.png"
# on_rtd is whether we are on readthedocs.org
on_rtd = os.environ.get("READTHEDOCS", None) == "True"
@@ -112,10 +125,10 @@
# only import and set the theme if we're building docs locally
# otherwise, readthedocs.org uses their theme by default,
# so no need to specify it
- import sphinx_rtd_theme
+ import pydata_sphinx_theme
- html_theme = "sphinx_rtd_theme"
- html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+ html_theme = "pydata_sphinx_theme"
+ html_theme_path = pydata_sphinx_theme.get_html_theme_path()
# Theme options are theme-specific and customize the look and feel of a theme
@@ -201,8 +214,9 @@
# Config numpydoc
numpydoc_show_inherited_class_members = True
numpydoc_class_members_toctree = False
+numpydoc_attributes_as_param_list = False
-autoclass_content = "init"
+autoclass_content = "class"
# Replace API shorthands with fullname
_reftarget_aliases = {
@@ -234,10 +248,27 @@ def ignore_internal_references(app, env, node, contnode):
node["reftarget"] = ""
return contnode
+def process_class_docstrings(app, what, name, obj, options, lines):
+ """
+ For those classes for which we use ::
+ :template: autosummary/class_without_autosummary.rst
+ the documented attributes/methods have to be listed in the class
+ docstring. However, if one of those lists is empty, we use 'None',
+ which then generates warnings in sphinx / ugly html output.
+ This "autodoc-process-docstring" event connector removes that part
+ from the processed docstring.
+ """
+ if what == "class":
+ if name in {"cudf.RangeIndex", "cudf.Int64Index", "cudf.UInt64Index", "cudf.Float64Index", "cudf.CategoricalIndex", "cudf.IntervalIndex", "cudf.MultiIndex", "cudf.DatetimeIndex", "cudf.TimedeltaIndex", "cudf.TimedeltaIndex"}:
+
+ cut_index = lines.index('.. rubric:: Attributes')
+ lines[:] = lines[:cut_index]
+
+
+
def setup(app):
- app.add_js_file("copybutton_pydocs.js")
app.add_css_file("params.css")
- app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
app.connect("doctree-read", resolve_aliases)
app.connect("missing-reference", ignore_internal_references)
+ app.connect("autodoc-process-docstring", process_class_docstrings)
diff --git a/docs/cudf/source/index.rst b/docs/cudf/source/index.rst
index 5a6d9a2617d..90b287bd1b6 100644
--- a/docs/cudf/source/index.rst
+++ b/docs/cudf/source/index.rst
@@ -1,25 +1,25 @@
Welcome to cuDF's documentation!
=================================
+cuDF is a Python GPU DataFrame library (built on the `Apache Arrow
+`_ columnar memory format) for loading, joining,
+aggregating, filtering, and otherwise manipulating data. cuDF also provides a
+pandas-like API that will be familiar to data engineers & data scientists, so
+they can use it to easily accelerate their workflows without going into
+the details of CUDA programming.
+
+
.. toctree::
:maxdepth: 2
:caption: Contents:
- api.rst
- 10min.ipynb
- basics.rst
- io.rst
- groupby.rst
- dask-cudf.rst
- 10min-cudf-cupy.ipynb
- guide-to-udfs.ipynb
- internals.rst
- Working-with-missing-data.ipynb
- PandasCompat.rst
+ user_guide/index
+ basics/index
+ api_docs/index
+
Indices and tables
==================
* :ref:`genindex`
-* :ref:`modindex`
* :ref:`search`
diff --git a/docs/cudf/source/10min-cudf-cupy.ipynb b/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb
similarity index 100%
rename from docs/cudf/source/10min-cudf-cupy.ipynb
rename to docs/cudf/source/user_guide/10min-cudf-cupy.ipynb
diff --git a/docs/cudf/source/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb
similarity index 100%
rename from docs/cudf/source/10min.ipynb
rename to docs/cudf/source/user_guide/10min.ipynb
diff --git a/docs/cudf/source/Working-with-missing-data.ipynb b/docs/cudf/source/user_guide/Working-with-missing-data.ipynb
similarity index 100%
rename from docs/cudf/source/Working-with-missing-data.ipynb
rename to docs/cudf/source/user_guide/Working-with-missing-data.ipynb
diff --git a/docs/cudf/source/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
similarity index 100%
rename from docs/cudf/source/guide-to-udfs.ipynb
rename to docs/cudf/source/user_guide/guide-to-udfs.ipynb
diff --git a/docs/cudf/source/user_guide/index.rst b/docs/cudf/source/user_guide/index.rst
new file mode 100644
index 00000000000..1061008eb3c
--- /dev/null
+++ b/docs/cudf/source/user_guide/index.rst
@@ -0,0 +1,12 @@
+==========
+User Guide
+==========
+
+
+.. toctree::
+ :maxdepth: 2
+
+ 10min.ipynb
+ 10min-cudf-cupy.ipynb
+ guide-to-udfs.ipynb
+ Working-with-missing-data.ipynb
diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index 2d52b517242..13c20d8bcd4 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -31,6 +31,7 @@
IntervalIndex,
MultiIndex,
RangeIndex,
+ StringIndex,
Scalar,
Series,
TimedeltaIndex,
@@ -73,7 +74,14 @@
tan,
true_divide,
)
-from cudf.core.reshape import concat, get_dummies, melt, merge_sorted
+from cudf.core.reshape import (
+ concat,
+ get_dummies,
+ melt,
+ merge_sorted,
+ pivot,
+ unstack,
+)
from cudf.core.series import isclose
from cudf.core.tools.datetimes import DateOffset, to_datetime
from cudf.core.tools.numeric import to_numeric
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index 1d9c8fa58e6..a15a180d466 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -359,7 +359,7 @@ def read_csv(
See Also
--------
- cudf.io.csv.read_csv
+ cudf.read_csv
"""
if not isinstance(datasource, (BytesIO, StringIO, bytes,
@@ -429,7 +429,7 @@ cpdef write_csv(
See Also
--------
- cudf.io.csv.to_csv
+ cudf.to_csv
"""
cdef table_view input_table_view = \
table.view() if index is True else table.data_view()
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index e15b569ed85..b888f213921 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -84,7 +84,7 @@ cpdef read_orc(object filepaths_or_buffers,
See Also
--------
- cudf.io.orc.read_orc
+ cudf.read_orc
"""
cdef orc_reader_options c_orc_reader_options = make_orc_reader_options(
filepaths_or_buffers,
@@ -142,7 +142,7 @@ cpdef write_orc(Table table,
See Also
--------
- cudf.io.orc.read_orc
+ cudf.read_orc
"""
cdef compression_type compression_ = _get_comp_type(compression)
cdef table_metadata metadata_ = table_metadata()
diff --git a/python/cudf/cudf/core/__init__.py b/python/cudf/cudf/core/__init__.py
index 5eaa5b52fd4..016aba2edb3 100644
--- a/python/cudf/cudf/core/__init__.py
+++ b/python/cudf/cudf/core/__init__.py
@@ -17,6 +17,7 @@
Int64Index,
IntervalIndex,
RangeIndex,
+ StringIndex,
TimedeltaIndex,
UInt8Index,
UInt16Index,
diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py
index 9f26ac8ee78..38b6f8789bb 100644
--- a/python/cudf/cudf/core/algorithms.py
+++ b/python/cudf/cudf/core/algorithms.py
@@ -35,7 +35,7 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None):
See Also
--------
- cudf.core.series.Series.factorize : Encode the input values of Series.
+ cudf.Series.factorize : Encode the input values of Series.
"""
if sort:
diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py
index c6875052685..8d80e488e2e 100644
--- a/python/cudf/cudf/core/buffer.py
+++ b/python/cudf/cudf/core/buffer.py
@@ -15,6 +15,23 @@
class Buffer(Serializable):
+ """
+ A Buffer represents a device memory allocation.
+
+ Parameters
+ ----------
+ data : Buffer, array_like, int
+ An array-like object or integer representing a
+ device or host pointer to pre-allocated memory.
+ size : int, optional
+ Size of memory allocation. Required if a pointer
+ is passed for `data`.
+ owner : object, optional
+ Python object to which the lifetime of the memory
+ allocation is tied. If provided, a reference to this
+ object is kept in this Buffer.
+ """
+
ptr: int
size: int
_owner: Any
@@ -22,22 +39,7 @@ class Buffer(Serializable):
def __init__(
self, data: Any = None, size: Optional[int] = None, owner: Any = None
):
- """
- A Buffer represents a device memory allocation.
-
- Parameters
- ----------
- data : Buffer, array_like, int
- An array-like object or integer representing a
- device or host pointer to pre-allocated memory.
- size : int, optional
- Size of memory allocation. Required if a pointer
- is passed for `data`.
- owner : object, optional
- Python object to which the lifetime of the memory
- allocation is tied. If provided, a reference to this
- object is kept in this Buffer.
- """
+
if isinstance(data, Buffer):
self.ptr = data.ptr
self.size = data.size
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 48398e03b2d..f435e0fa88c 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -49,62 +49,63 @@
class CategoricalAccessor(ColumnMethods):
+ """
+ Accessor object for categorical properties of the Series values.
+ Be aware that assigning to `categories` is a inplace operation,
+ while all methods return new categorical data per default.
+
+ Parameters
+ ----------
+ column : Column
+ parent : Series or CategoricalIndex
+
+ Examples
+ --------
+ >>> s = cudf.Series([1,2,3], dtype='category')
+ >>> s
+ >>> s
+ 0 1
+ 1 2
+ 2 3
+ dtype: category
+ Categories (3, int64): [1, 2, 3]
+ >>> s.cat.categories
+ Int64Index([1, 2, 3], dtype='int64')
+ >>> s.cat.reorder_categories([3,2,1])
+ 0 1
+ 1 2
+ 2 3
+ dtype: category
+ Categories (3, int64): [3, 2, 1]
+ >>> s.cat.remove_categories([1])
+ 0
+ 1 2
+ 2 3
+ dtype: category
+ Categories (2, int64): [2, 3]
+ >>> s.cat.set_categories(list('abcde'))
+ 0
+ 1
+ 2
+ dtype: category
+ Categories (5, object): ['a', 'b', 'c', 'd', 'e']
+ >>> s.cat.as_ordered()
+ 0 1
+ 1 2
+ 2 3
+ dtype: category
+ Categories (3, int64): [1 < 2 < 3]
+ >>> s.cat.as_unordered()
+ 0 1
+ 1 2
+ 2 3
+ dtype: category
+ Categories (3, int64): [1, 2, 3]
+ """
+
_column: CategoricalColumn
def __init__(self, parent: SeriesOrIndex):
- """
- Accessor object for categorical properties of the Series values.
- Be aware that assigning to `categories` is a inplace operation,
- while all methods return new categorical data per default.
-
- Parameters
- ----------
- column : Column
- parent : Series or CategoricalIndex
-
- Examples
- --------
- >>> s = cudf.Series([1,2,3], dtype='category')
- >>> s
- >>> s
- 0 1
- 1 2
- 2 3
- dtype: category
- Categories (3, int64): [1, 2, 3]
- >>> s.cat.categories
- Int64Index([1, 2, 3], dtype='int64')
- >>> s.cat.reorder_categories([3,2,1])
- 0 1
- 1 2
- 2 3
- dtype: category
- Categories (3, int64): [3, 2, 1]
- >>> s.cat.remove_categories([1])
- 0
- 1 2
- 2 3
- dtype: category
- Categories (2, int64): [2, 3]
- >>> s.cat.set_categories(list('abcde'))
- 0
- 1
- 2
- dtype: category
- Categories (5, object): ['a', 'b', 'c', 'd', 'e']
- >>> s.cat.as_ordered()
- 0 1
- 1 2
- 2 3
- dtype: category
- Categories (3, int64): [1 < 2 < 3]
- >>> s.cat.as_unordered()
- 0 1
- 1 2
- 2 3
- dtype: category
- Categories (3, int64): [1, 2, 3]
- """
if not is_categorical_dtype(parent.dtype):
raise AttributeError(
"Can only use .cat accessor with a 'category' dtype"
@@ -648,7 +649,19 @@ def reorder_categories(
class CategoricalColumn(column.ColumnBase):
- """Implements operations for Columns of Categorical type
+ """
+ Implements operations for Columns of Categorical type
+
+ Parameters
+ ----------
+ dtype : CategoricalDtype
+ mask : Buffer
+ The validity mask
+ offset : int
+ Data offset
+ children : Tuple[ColumnBase]
+ Two non-null columns containing the categories and codes
+ respectively
"""
dtype: cudf.core.dtypes.CategoricalDtype
@@ -664,18 +677,7 @@ def __init__(
null_count: int = None,
children: Tuple["column.ColumnBase", ...] = (),
):
- """
- Parameters
- ----------
- dtype : CategoricalDtype
- mask : Buffer
- The validity mask
- offset : int
- Data offset
- children : Tuple[ColumnBase]
- Two non-null columns containing the categories and codes
- respectively
- """
+
if size is None:
for child in children:
assert child.offset == 0
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index f3d1880b290..623d0e43f5d 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -52,6 +52,19 @@
class DatetimeColumn(column.ColumnBase):
+ """
+ A Column implementation for Date-time types.
+
+ Parameters
+ ----------
+ data : Buffer
+ The datetime values
+ dtype : np.dtype
+ The data type
+ mask : Buffer; optional
+ The validity mask
+ """
+
def __init__(
self,
data: Buffer,
@@ -61,16 +74,7 @@ def __init__(
offset: int = 0,
null_count: int = None,
):
- """
- Parameters
- ----------
- data : Buffer
- The datetime values
- dtype : np.dtype
- The data type
- mask : Buffer; optional
- The validity mask
- """
+
dtype = np.dtype(dtype)
if data.size % dtype.itemsize:
raise ValueError("Buffer size must be divisible by element size")
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index a3f4a82a7dc..29211b0f855 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -36,6 +36,17 @@
class NumericalColumn(NumericalBaseColumn):
+ """
+ A Column object for Numeric types.
+
+ Parameters
+ ----------
+ data : Buffer
+ dtype : np.dtype
+ The dtype associated with the data Buffer
+ mask : Buffer, optional
+ """
+
def __init__(
self,
data: Buffer,
@@ -45,14 +56,6 @@ def __init__(
offset: int = 0,
null_count: int = None,
):
- """
- Parameters
- ----------
- data : Buffer
- dtype : np.dtype
- The dtype associated with the data Buffer
- mask : Buffer, optional
- """
dtype = np.dtype(dtype)
if data.size % dtype.itemsize:
raise ValueError("Buffer size must be divisible by element size")
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 7d6afbb4056..50cd6c764cd 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -98,17 +98,18 @@ def str_to_boolean(column: StringColumn):
class StringMethods(ColumnMethods):
+ """
+ Vectorized string functions for Series and Index.
+
+ This mimics pandas ``df.str`` interface. nulls stay null
+ unless handled otherwise by a particular method.
+ Patterned after Python’s string methods, with some
+ inspiration from R’s stringr package.
+ """
+
_column: StringColumn
def __init__(self, parent):
- """
- Vectorized string functions for Series and Index.
-
- This mimics pandas ``df.str`` interface. nulls stay null
- unless handled otherwise by a particular method.
- Patterned after Python’s string methods, with some
- inspiration from R’s stringr package.
- """
value_type = (
parent.dtype.leaf_type
if is_list_dtype(parent.dtype)
@@ -2555,7 +2556,7 @@ def partition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex:
Also available on indices:
- >>> idx = cudf.core.index.StringIndex(['X 123', 'Y 999'])
+ >>> idx = cudf.Index(['X 123', 'Y 999'])
>>> idx
StringIndex(['X 123' 'Y 999'], dtype='object')
@@ -2622,7 +2623,7 @@ def rpartition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex:
Also available on indices:
- >>> idx = cudf.core.index.StringIndex(['X 123', 'Y 999'])
+ >>> idx = cudf.Index(['X 123', 'Y 999'])
>>> idx
StringIndex(['X 123' 'Y 999'], dtype='object')
@@ -3294,7 +3295,7 @@ def count(self, pat: str, flags: int = 0) -> SeriesOrIndex:
This is also available on Index.
- >>> index = cudf.core.index.StringIndex(['A', 'A', 'Aaba', 'cat'])
+ >>> index = cudf.Index(['A', 'A', 'Aaba', 'cat'])
>>> index.str.count('a')
Int64Index([0, 0, 2, 1], dtype='int64')
""" # noqa W605
@@ -4922,7 +4923,18 @@ def _expected_types_format(types):
class StringColumn(column.ColumnBase):
- """Implements operations for Columns of String type
+ """
+ Implements operations for Columns of String type
+
+ Parameters
+ ----------
+ mask : Buffer
+ The validity mask
+ offset : int
+ Data offset
+ children : Tuple[Column]
+ Two non-null columns containing the string data and offsets
+ respectively
"""
_start_offset: Optional[int]
@@ -4937,17 +4949,6 @@ def __init__(
null_count: int = None,
children: Tuple["column.ColumnBase", ...] = (),
):
- """
- Parameters
- ----------
- mask : Buffer
- The validity mask
- offset : int
- Data offset
- children : Tuple[Column]
- Two non-null columns containing the string data and offsets
- respectively
- """
dtype = np.dtype("object")
if size is None:
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index a27c20cc50c..b73353dd720 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -34,6 +34,24 @@
class TimeDeltaColumn(column.ColumnBase):
+ """
+ Parameters
+ ----------
+ data : Buffer
+ The Timedelta values
+ dtype : np.dtype
+ The data type
+ size : int
+ Size of memory allocation.
+ mask : Buffer; optional
+ The validity mask
+ offset : int
+ Data offset
+ null_count : int, optional
+ The number of null values.
+ If None, it is calculated automatically.
+ """
+
def __init__(
self,
data: Buffer,
@@ -43,23 +61,6 @@ def __init__(
offset: int = 0,
null_count: int = None,
):
- """
- Parameters
- ----------
- data : Buffer
- The Timedelta values
- dtype : np.dtype
- The data type
- size : int
- Size of memory allocation.
- mask : Buffer; optional
- The validity mask
- offset : int
- Data offset
- null_count : int, optional
- The number of null values.
- If None, it is calculated automatically.
- """
dtype = np.dtype(dtype)
if data.size % dtype.itemsize:
raise ValueError("Buffer size must be divisible by element size")
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 607b8ac307b..56882f89af8 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -80,6 +80,19 @@ def _to_flat_dict(d):
class ColumnAccessor(MutableMapping):
+ """
+ Parameters
+ ----------
+ data : mapping
+ Mapping of keys to column values.
+ multiindex : bool, optional
+ Whether tuple keys represent a hierarchical
+ index with multiple "levels" (default=False).
+ level_names : tuple, optional
+ Tuple containing names for each of the levels.
+ For a non-hierarchical index, a tuple of size 1
+ may be passe.
+ """
_data: "Dict[Any, ColumnBase]"
multiindex: bool
@@ -91,19 +104,6 @@ def __init__(
multiindex: bool = False,
level_names=None,
):
- """
- Parameters
- ----------
- data : mapping
- Mapping of keys to column values.
- multiindex : bool, optional
- Whether tuple keys represent a hierarchical
- index with multiple "levels" (default=False).
- level_names : tuple, optional
- Tuple containing names for each of the levels.
- For a non-hierarchical index, a tuple of size 1
- may be passe.
- """
if data is None:
data = {}
# TODO: we should validate the keys of `data`
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 8cdc6eebaee..6c5932e600b 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -69,100 +69,101 @@
class DataFrame(Frame, Serializable, GetAttrGetItemMixin):
+ """
+ A GPU Dataframe object.
- _PROTECTED_KEYS = frozenset(("_data", "_index"))
-
- @annotate("DATAFRAME_INIT", color="blue", domain="cudf_python")
- def __init__(self, data=None, index=None, columns=None, dtype=None):
- """
- A GPU Dataframe object.
+ Parameters
+ ----------
+ data : array-like, Iterable, dict, or DataFrame.
+ Dict can contain Series, arrays, constants, or list-like objects.
- Parameters
- ----------
- data : array-like, Iterable, dict, or DataFrame.
- Dict can contain Series, arrays, constants, or list-like objects.
+ index : Index or array-like
+ Index to use for resulting frame. Will default to
+ RangeIndex if no indexing information part of input data and
+ no index provided.
- index : Index or array-like
- Index to use for resulting frame. Will default to
- RangeIndex if no indexing information part of input data and
- no index provided.
+ columns : Index or array-like
+ Column labels to use for resulting frame.
+ Will default to RangeIndex (0, 1, 2, …, n) if no column
+ labels are provided.
- columns : Index or array-like
- Column labels to use for resulting frame.
- Will default to RangeIndex (0, 1, 2, …, n) if no column
- labels are provided.
+ dtype : dtype, default None
+ Data type to force. Only a single dtype is allowed.
+ If None, infer.
- dtype : dtype, default None
- Data type to force. Only a single dtype is allowed.
- If None, infer.
-
- Examples
- --------
+ Examples
+ --------
- Build dataframe with ``__setitem__``:
+ Build dataframe with ``__setitem__``:
- >>> import cudf
- >>> df = cudf.DataFrame()
- >>> df['key'] = [0, 1, 2, 3, 4]
- >>> df['val'] = [float(i + 10) for i in range(5)] # insert column
- >>> df
- key val
- 0 0 10.0
- 1 1 11.0
- 2 2 12.0
- 3 3 13.0
- 4 4 14.0
+ >>> import cudf
+ >>> df = cudf.DataFrame()
+ >>> df['key'] = [0, 1, 2, 3, 4]
+ >>> df['val'] = [float(i + 10) for i in range(5)] # insert column
+ >>> df
+ key val
+ 0 0 10.0
+ 1 1 11.0
+ 2 2 12.0
+ 3 3 13.0
+ 4 4 14.0
+
+ Build DataFrame via dict of columns:
+
+ >>> import numpy as np
+ >>> from datetime import datetime, timedelta
+ >>> t0 = datetime.strptime('2018-10-07 12:00:00', '%Y-%m-%d %H:%M:%S')
+ >>> n = 5
+ >>> df = cudf.DataFrame({
+ ... 'id': np.arange(n),
+ ... 'datetimes': np.array(
+ ... [(t0+ timedelta(seconds=x)) for x in range(n)])
+ ... })
+ >>> df
+ id datetimes
+ 0 0 2018-10-07T12:00:00.000
+ 1 1 2018-10-07T12:00:01.000
+ 2 2 2018-10-07T12:00:02.000
+ 3 3 2018-10-07T12:00:03.000
+ 4 4 2018-10-07T12:00:04.000
+
+ Build DataFrame via list of rows as tuples:
+
+ >>> df = cudf.DataFrame([
+ ... (5, "cats", "jump", np.nan),
+ ... (2, "dogs", "dig", 7.5),
+ ... (3, "cows", "moo", -2.1, "occasionally"),
+ ... ])
+ >>> df
+ 0 1 2 3 4
+ 0 5 cats jump
+ 1 2 dogs dig 7.5
+ 2 3 cows moo -2.1 occasionally
+
+ Convert from a Pandas DataFrame:
- Build DataFrame via dict of columns:
+ >>> import pandas as pd
+ >>> pdf = pd.DataFrame({'a': [0, 1, 2, 3],'b': [0.1, 0.2, None, 0.3]})
+ >>> pdf
+ a b
+ 0 0 0.1
+ 1 1 0.2
+ 2 2 NaN
+ 3 3 0.3
+ >>> df = cudf.from_pandas(pdf)
+ >>> df
+ a b
+ 0 0 0.1
+ 1 1 0.2
+ 2 2
+ 3 3 0.3
+ """
- >>> import numpy as np
- >>> from datetime import datetime, timedelta
- >>> t0 = datetime.strptime('2018-10-07 12:00:00', '%Y-%m-%d %H:%M:%S')
- >>> n = 5
- >>> df = cudf.DataFrame({
- ... 'id': np.arange(n),
- ... 'datetimes': np.array(
- ... [(t0+ timedelta(seconds=x)) for x in range(n)])
- ... })
- >>> df
- id datetimes
- 0 0 2018-10-07T12:00:00.000
- 1 1 2018-10-07T12:00:01.000
- 2 2 2018-10-07T12:00:02.000
- 3 3 2018-10-07T12:00:03.000
- 4 4 2018-10-07T12:00:04.000
-
- Build DataFrame via list of rows as tuples:
-
- >>> df = cudf.DataFrame([
- ... (5, "cats", "jump", np.nan),
- ... (2, "dogs", "dig", 7.5),
- ... (3, "cows", "moo", -2.1, "occasionally"),
- ... ])
- >>> df
- 0 1 2 3 4
- 0 5 cats jump
- 1 2 dogs dig 7.5
- 2 3 cows moo -2.1 occasionally
+ _PROTECTED_KEYS = frozenset(("_data", "_index"))
- Convert from a Pandas DataFrame:
+ @annotate("DATAFRAME_INIT", color="blue", domain="cudf_python")
+ def __init__(self, data=None, index=None, columns=None, dtype=None):
- >>> import pandas as pd
- >>> pdf = pd.DataFrame({'a': [0, 1, 2, 3],'b': [0.1, 0.2, None, 0.3]})
- >>> pdf
- a b
- 0 0 0.1
- 1 1 0.2
- 2 2 NaN
- 3 3 0.3
- >>> df = cudf.from_pandas(pdf)
- >>> df
- a b
- 0 0 0.1
- 1 1 0.2
- 2 2
- 3 3 0.3
- """
super().__init__()
if isinstance(columns, (Series, cudf.BaseIndex)):
@@ -3462,7 +3463,7 @@ def rename(
if index:
if (
any(type(item) == str for item in index.values())
- and type(self.index) != cudf.core.index.StringIndex
+ and type(self.index) != cudf.StringIndex
):
raise NotImplementedError(
"Implicit conversion of index to "
@@ -4455,6 +4456,7 @@ def join(
)
return df
+ @copy_docstring(DataFrameGroupBy)
def groupby(
self,
by=None,
@@ -4499,6 +4501,7 @@ def groupby(
sort=sort,
)
+ @copy_docstring(Rolling)
def rolling(
self, window, min_periods=None, center=False, axis=0, win_type=None
):
@@ -6482,9 +6485,9 @@ def mode(self, axis=0, numeric_only=False, dropna=True):
See Also
--------
- cudf.core.series.Series.mode : Return the highest frequency value
+ cudf.Series.mode : Return the highest frequency value
in a Series.
- cudf.core.series.Series.value_counts : Return the counts of values
+ cudf.Series.value_counts : Return the counts of values
in a Series.
Notes
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 6dbe55d0bb8..2d8bf9e5a2c 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -27,14 +27,14 @@ class _BaseDtype(ExtensionDtype, Serializable):
class CategoricalDtype(_BaseDtype):
+ """
+ dtype similar to pd.CategoricalDtype with the categories
+ stored on the GPU.
+ """
ordered: Optional[bool]
def __init__(self, categories=None, ordered: bool = None) -> None:
- """
- dtype similar to pd.CategoricalDtype with the categories
- stored on the GPU.
- """
self._categories = self._init_categories(categories)
self.ordered = ordered
@@ -223,14 +223,14 @@ def deserialize(cls, header: dict, frames: list):
class StructDtype(_BaseDtype):
+ """
+ fields : dict
+ A mapping of field names to dtypes
+ """
name = "struct"
def __init__(self, fields):
- """
- fields : dict
- A mapping of field names to dtypes
- """
pa_fields = {
k: cudf.utils.dtypes.cudf_dtype_to_pa_type(v)
for k, v in fields.items()
@@ -309,34 +309,34 @@ def deserialize(cls, header: dict, frames: list):
class Decimal32Dtype(_BaseDtype):
+ """
+ Parameters
+ ----------
+ precision : int
+ The total number of digits in each value of this dtype
+ scale : int, optional
+ The scale of the Decimal32Dtype. See Notes below.
+
+ Notes
+ -----
+ When the scale is positive:
+ - numbers with fractional parts (e.g., 0.0042) can be represented
+ - the scale is the total number of digits to the right of the
+ decimal point
+ When the scale is negative:
+ - only multiples of powers of 10 (including 10**0) can be
+ represented (e.g., 1729, 4200, 1000000)
+ - the scale represents the number of trailing zeros in the value.
+ For example, 42 is representable with precision=2 and scale=0.
+ 13.0051 is representable with precision=6 and scale=4,
+ and *not* representable with precision<6 or scale<4.
+ """
name = "decimal32"
_metadata = ("precision", "scale")
MAX_PRECISION = np.floor(np.log10(np.iinfo("int32").max))
def __init__(self, precision, scale=0):
- """
- Parameters
- ----------
- precision : int
- The total number of digits in each value of this dtype
- scale : int, optional
- The scale of the Decimal32Dtype. See Notes below.
-
- Notes
- -----
- When the scale is positive:
- - numbers with fractional parts (e.g., 0.0042) can be represented
- - the scale is the total number of digits to the right of the
- decimal point
- When the scale is negative:
- - only multiples of powers of 10 (including 10**0) can be
- represented (e.g., 1729, 4200, 1000000)
- - the scale represents the number of trailing zeros in the value.
- For example, 42 is representable with precision=2 and scale=0.
- 13.0051 is representable with precision=6 and scale=4,
- and *not* representable with precision<6 or scale<4.
- """
self._validate(precision, scale)
self._typ = pa.decimal128(precision, scale)
@@ -417,34 +417,34 @@ def deserialize(cls, header: dict, frames: list):
class Decimal64Dtype(_BaseDtype):
+ """
+ Parameters
+ ----------
+ precision : int
+ The total number of digits in each value of this dtype
+ scale : int, optional
+ The scale of the Decimal64Dtype. See Notes below.
+
+ Notes
+ -----
+ When the scale is positive:
+ - numbers with fractional parts (e.g., 0.0042) can be represented
+ - the scale is the total number of digits to the right of the
+ decimal point
+ When the scale is negative:
+ - only multiples of powers of 10 (including 10**0) can be
+ represented (e.g., 1729, 4200, 1000000)
+ - the scale represents the number of trailing zeros in the value.
+ For example, 42 is representable with precision=2 and scale=0.
+ 13.0051 is representable with precision=6 and scale=4,
+ and *not* representable with precision<6 or scale<4.
+ """
name = "decimal64"
_metadata = ("precision", "scale")
MAX_PRECISION = np.floor(np.log10(np.iinfo("int64").max))
def __init__(self, precision, scale=0):
- """
- Parameters
- ----------
- precision : int
- The total number of digits in each value of this dtype
- scale : int, optional
- The scale of the Decimal64Dtype. See Notes below.
-
- Notes
- -----
- When the scale is positive:
- - numbers with fractional parts (e.g., 0.0042) can be represented
- - the scale is the total number of digits to the right of the
- decimal point
- When the scale is negative:
- - only multiples of powers of 10 (including 10**0) can be
- represented (e.g., 1729, 4200, 1000000)
- - the scale represents the number of trailing zeros in the value.
- For example, 42 is representable with precision=2 and scale=0.
- 13.0051 is representable with precision=6 and scale=4,
- and *not* representable with precision<6 or scale<4.
- """
self._validate(precision, scale)
self._typ = pa.decimal128(precision, scale)
@@ -525,16 +525,17 @@ def deserialize(cls, header: dict, frames: list):
class IntervalDtype(StructDtype):
+ """
+ subtype: str, np.dtype
+ The dtype of the Interval bounds.
+ closed: {‘right’, ‘left’, ‘both’, ‘neither’}, default ‘right’
+ Whether the interval is closed on the left-side, right-side,
+ both or neither. See the Notes for more detailed explanation.
+ """
+
name = "interval"
def __init__(self, subtype, closed="right"):
- """
- subtype: str, np.dtype
- The dtype of the Interval bounds.
- closed: {‘right’, ‘left’, ‘both’, ‘neither’}, default ‘right’
- Whether the interval is closed on the left-side, right-side,
- both or neither. See the Notes for more detailed explanation.
- """
super().__init__(fields={"left": subtype, "right": subtype})
if closed in ["left", "right", "neither", "both"]:
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 6a976f54c2b..daa42d994ca 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1116,19 +1116,19 @@ def dropna(
See also
--------
- cudf.core.dataframe.DataFrame.isna
+ cudf.DataFrame.isna
Indicate null values.
- cudf.core.dataframe.DataFrame.notna
+ cudf.DataFrame.notna
Indicate non-null values.
- cudf.core.dataframe.DataFrame.fillna
+ cudf.DataFrame.fillna
Replace null values.
- cudf.core.series.Series.dropna
+ cudf.Series.dropna
Drop null values.
- cudf.core.index.Index.dropna
+ cudf.Index.dropna
Drop null indices.
Examples
@@ -4191,6 +4191,12 @@ def shape(self):
return (len(self),)
def __iter__(self):
+ """
+ Iterating over a GPU object is not effecient and hence not supported.
+
+ Consider using ``.to_arrow()``, ``.to_pandas()`` or ``.values_host``
+ if you wish to iterate over the values.
+ """
cudf.utils.utils.raise_iteration_error(obj=self)
def __len__(self):
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 4b063e7e57c..5b009984cf7 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -352,10 +352,10 @@ def pipe(self, func, *args, **kwargs):
See also
--------
- cudf.core.series.Series.pipe
+ cudf.Series.pipe
Apply a function with arguments to a series.
- cudf.core.dataframe.DataFrame.pipe
+ cudf.DataFrame.pipe
Apply a function with arguments to a dataframe.
apply
@@ -1017,93 +1017,93 @@ def _mimic_pandas_order(
class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
+ """
+ Group DataFrame using a mapper or by a Series of columns.
+
+ A groupby operation involves some combination of splitting the object,
+ applying a function, and combining the results. This can be used to
+ group large amounts of data and compute operations on these groups.
+
+ Parameters
+ ----------
+ by : mapping, function, label, or list of labels
+ Used to determine the groups for the groupby. If by is a
+ function, it’s called on each value of the object’s index.
+ If a dict or Series is passed, the Series or dict VALUES will
+ be used to determine the groups (the Series’ values are first
+ aligned; see .align() method). If a cupy array is passed, the
+ values are used as-is determine the groups. A label or list
+ of labels may be passed to group by the columns in self.
+ Notice that a tuple is interpreted as a (single) key.
+ level : int, level name, or sequence of such, default None
+ If the axis is a MultiIndex (hierarchical), group by a particular
+ level or levels.
+ as_index : bool, default True
+ For aggregated output, return object with group labels as
+ the index. Only relevant for DataFrame input.
+ as_index=False is effectively “SQL-style” grouped output.
+ sort : bool, default False
+ Sort result by group key. Differ from Pandas, cudf defaults to
+ ``False`` for better performance. Note this does not influence
+ the order of observations within each group. Groupby preserves
+ the order of rows within each group.
+ dropna : bool, optional
+ If True (default), do not include the "null" group.
+
+ Returns
+ -------
+ DataFrameGroupBy
+ Returns a groupby object that contains information
+ about the groups.
+
+ Examples
+ --------
+ >>> import cudf
+ >>> import pandas as pd
+ >>> df = cudf.DataFrame({'Animal': ['Falcon', 'Falcon',
+ ... 'Parrot', 'Parrot'],
+ ... 'Max Speed': [380., 370., 24., 26.]})
+ >>> df
+ Animal Max Speed
+ 0 Falcon 380.0
+ 1 Falcon 370.0
+ 2 Parrot 24.0
+ 3 Parrot 26.0
+ >>> df.groupby(['Animal']).mean()
+ Max Speed
+ Animal
+ Falcon 375.0
+ Parrot 25.0
+
+ >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+ ... ['Captive', 'Wild', 'Captive', 'Wild']]
+ >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
+ >>> df = cudf.DataFrame({'Max Speed': [390., 350., 30., 20.]},
+ index=index)
+ >>> df
+ Max Speed
+ Animal Type
+ Falcon Captive 390.0
+ Wild 350.0
+ Parrot Captive 30.0
+ Wild 20.0
+ >>> df.groupby(level=0).mean()
+ Max Speed
+ Animal
+ Falcon 370.0
+ Parrot 25.0
+ >>> df.groupby(level="Type").mean()
+ Max Speed
+ Type
+ Wild 185.0
+ Captive 210.0
+ """
+
_PROTECTED_KEYS = frozenset(("obj",))
def __init__(
self, obj, by=None, level=None, sort=False, as_index=True, dropna=True
):
- """
- Group DataFrame using a mapper or by a Series of columns.
-
- A groupby operation involves some combination of splitting the object,
- applying a function, and combining the results. This can be used to
- group large amounts of data and compute operations on these groups.
-
- Parameters
- ----------
- by : mapping, function, label, or list of labels
- Used to determine the groups for the groupby. If by is a
- function, it’s called on each value of the object’s index.
- If a dict or Series is passed, the Series or dict VALUES will
- be used to determine the groups (the Series’ values are first
- aligned; see .align() method). If a cupy array is passed, the
- values are used as-is determine the groups. A label or list
- of labels may be passed to group by the columns in self.
- Notice that a tuple is interpreted as a (single) key.
- level : int, level name, or sequence of such, default None
- If the axis is a MultiIndex (hierarchical), group by a particular
- level or levels.
- as_index : bool, default True
- For aggregated output, return object with group labels as
- the index. Only relevant for DataFrame input.
- as_index=False is effectively “SQL-style” grouped output.
- sort : bool, default False
- Sort result by group key. Differ from Pandas, cudf defaults to
- ``False`` for better performance. Note this does not influence
- the order of observations within each group. Groupby preserves
- the order of rows within each group.
- dropna : bool, optional
- If True (default), do not include the "null" group.
-
- Returns
- -------
- DataFrameGroupBy
- Returns a groupby object that contains information
- about the groups.
-
- Examples
- --------
- >>> import cudf
- >>> import pandas as pd
- >>> df = cudf.DataFrame({'Animal': ['Falcon', 'Falcon',
- ... 'Parrot', 'Parrot'],
- ... 'Max Speed': [380., 370., 24., 26.]})
- >>> df
- Animal Max Speed
- 0 Falcon 380.0
- 1 Falcon 370.0
- 2 Parrot 24.0
- 3 Parrot 26.0
- >>> df.groupby(['Animal']).mean()
- Max Speed
- Animal
- Falcon 375.0
- Parrot 25.0
-
- >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
- ... ['Captive', 'Wild', 'Captive', 'Wild']]
- >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
- >>> df = cudf.DataFrame({'Max Speed': [390., 350., 30., 20.]},
- index=index)
- >>> df
- Max Speed
- Animal Type
- Falcon Captive 390.0
- Wild 350.0
- Parrot Captive 30.0
- Wild 20.0
- >>> df.groupby(level=0).mean()
- Max Speed
- Animal
- Falcon 370.0
- Parrot 25.0
- >>> df.groupby(level="Type").mean()
- Max Speed
- Type
- Wild 185.0
- Captive 210.0
-
- """
super().__init__(
obj=obj,
by=by,
@@ -1126,68 +1126,68 @@ def nunique(self):
class SeriesGroupBy(GroupBy):
+ """
+ Group Series using a mapper or by a Series of columns.
+
+ A groupby operation involves some combination of splitting the object,
+ applying a function, and combining the results. This can be used to
+ group large amounts of data and compute operations on these groups.
+
+ Parameters
+ ----------
+ by : mapping, function, label, or list of labels
+ Used to determine the groups for the groupby. If by is a
+ function, it’s called on each value of the object’s index.
+ If a dict or Series is passed, the Series or dict VALUES will
+ be used to determine the groups (the Series’ values are first
+ aligned; see .align() method). If an cupy array is passed, the
+ values are used as-is determine the groups. A label or list
+ of labels may be passed to group by the columns in self.
+ Notice that a tuple is interpreted as a (single) key.
+ level : int, level name, or sequence of such, default None
+ If the axis is a MultiIndex (hierarchical), group by a particular
+ level or levels.
+ as_index : bool, default True
+ For aggregated output, return object with group labels as
+ the index. Only relevant for DataFrame input.
+ as_index=False is effectively “SQL-style” grouped output.
+ sort : bool, default False
+ Sort result by group key. Differ from Pandas, cudf defaults to
+ ``False`` for better performance. Note this does not influence
+ the order of observations within each group. Groupby preserves
+ the order of rows within each group.
+
+ Returns
+ -------
+ SeriesGroupBy
+ Returns a groupby object that contains information
+ about the groups.
+
+ Examples
+ --------
+ >>> ser = cudf.Series([390., 350., 30., 20.],
+ ... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+ ... name="Max Speed")
+ >>> ser
+ Falcon 390.0
+ Falcon 350.0
+ Parrot 30.0
+ Parrot 20.0
+ Name: Max Speed, dtype: float64
+ >>> ser.groupby(level=0).mean()
+ Falcon 370.0
+ Parrot 25.0
+ Name: Max Speed, dtype: float64
+ >>> ser.groupby(ser > 100).mean()
+ Max Speed
+ False 25.0
+ True 370.0
+ Name: Max Speed, dtype: float64
+ """
+
def __init__(
self, obj, by=None, level=None, sort=False, as_index=True, dropna=True
):
- """
- Group Series using a mapper or by a Series of columns.
-
- A groupby operation involves some combination of splitting the object,
- applying a function, and combining the results. This can be used to
- group large amounts of data and compute operations on these groups.
-
- Parameters
- ----------
- by : mapping, function, label, or list of labels
- Used to determine the groups for the groupby. If by is a
- function, it’s called on each value of the object’s index.
- If a dict or Series is passed, the Series or dict VALUES will
- be used to determine the groups (the Series’ values are first
- aligned; see .align() method). If an cupy array is passed, the
- values are used as-is determine the groups. A label or list
- of labels may be passed to group by the columns in self.
- Notice that a tuple is interpreted as a (single) key.
- level : int, level name, or sequence of such, default None
- If the axis is a MultiIndex (hierarchical), group by a particular
- level or levels.
- as_index : bool, default True
- For aggregated output, return object with group labels as
- the index. Only relevant for DataFrame input.
- as_index=False is effectively “SQL-style” grouped output.
- sort : bool, default False
- Sort result by group key. Differ from Pandas, cudf defaults to
- ``False`` for better performance. Note this does not influence
- the order of observations within each group. Groupby preserves
- the order of rows within each group.
-
- Returns
- -------
- SeriesGroupBy
- Returns a groupby object that contains information
- about the groups.
-
- Examples
- --------
- >>> ser = cudf.Series([390., 350., 30., 20.],
- ... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'],
- ... name="Max Speed")
- >>> ser
- Falcon 390.0
- Falcon 350.0
- Parrot 30.0
- Parrot 20.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(level=0).mean()
- Falcon 370.0
- Parrot 25.0
- Name: Max Speed, dtype: float64
- >>> ser.groupby(ser > 100).mean()
- Max Speed
- False 25.0
- True 370.0
- Name: Max Speed, dtype: float64
-
- """
super().__init__(
obj=obj,
by=by,
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index a2f13daf44c..b3ca6f7973b 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -326,7 +326,7 @@ def set_names(self, names, level=None, inplace=False):
See Also
--------
- cudf.core.index.Index.rename : Able to set new names without level.
+ cudf.Index.rename : Able to set new names without level.
Examples
--------
@@ -717,8 +717,8 @@ def sort_values(self, return_indexer=False, ascending=True, key=None):
See Also
--------
- cudf.core.series.Series.min : Sort values of a Series.
- cudf.core.dataframe.DataFrame.sort_values : Sort values in a DataFrame.
+ cudf.Series.min : Sort values of a Series.
+ cudf.DataFrame.sort_values : Sort values in a DataFrame.
Examples
--------
@@ -1287,9 +1287,9 @@ def from_pandas(cls, index, nan_as_null=None):
>>> import numpy as np
>>> data = [10, 20, 30, np.nan]
>>> pdi = pd.Index(data)
- >>> cudf.core.index.Index.from_pandas(pdi)
+ >>> cudf.Index.from_pandas(pdi)
Float64Index([10.0, 20.0, 30.0, ], dtype='float64')
- >>> cudf.core.index.Index.from_pandas(pdi, nan_as_null=False)
+ >>> cudf.Index.from_pandas(pdi, nan_as_null=False)
Float64Index([10.0, 20.0, 30.0, nan], dtype='float64')
"""
if not isinstance(index, pd.Index):
@@ -1709,25 +1709,25 @@ def __mul__(self, other):
class GenericIndex(BaseIndex):
- """An array of orderable values that represent the indices of another Column
+ """
+ An array of orderable values that represent the indices of another Column
Attributes
----------
_values: A Column object
name: A string
+
+ Parameters
+ ----------
+ data : Column
+ The Column of data for this index
+ name : str optional
+ The name of the Index. If not provided, the Index adopts the value
+ Column's name. Otherwise if this name is different from the value
+ Column's, the data Column will be cloned to adopt this name.
"""
def __init__(self, data, **kwargs):
- """
- Parameters
- ----------
- data : Column
- The Column of data for this index
- name : str optional
- The name of the Index. If not provided, the Index adopts the value
- Column's name. Otherwise if this name is different from the value
- Column's, the data Column will be cloned to adopt this name.
- """
kwargs = _setdefault_name(data, **kwargs)
# normalize the input
@@ -1933,42 +1933,252 @@ def __init__(self, data=None, dtype=None, copy=False, name=None):
class Int8Index(NumericIndex):
+ """
+ Immutable, ordered and sliceable sequence of labels.
+ The basic object storing row labels for all cuDF objects.
+ Int8Index is a special case of Index with purely
+ integer(``int8``) labels.
+
+ Parameters
+ ----------
+ data : array-like (1-dimensional)
+ dtype : NumPy dtype,
+ but not used.
+ copy : bool
+ Make a copy of input data.
+ name : object
+ Name to be stored in the index.
+
+ Returns
+ -------
+ Int8Index
+ """
+
_dtype = np.int8
class Int16Index(NumericIndex):
+ """
+ Immutable, ordered and sliceable sequence of labels.
+ The basic object storing row labels for all cuDF objects.
+ Int16Index is a special case of Index with purely
+ integer(``int16``) labels.
+
+ Parameters
+ ----------
+ data : array-like (1-dimensional)
+ dtype : NumPy dtype,
+ but not used.
+ copy : bool
+ Make a copy of input data.
+ name : object
+ Name to be stored in the index.
+
+ Returns
+ -------
+ Int16Index
+ """
+
_dtype = np.int16
class Int32Index(NumericIndex):
+ """
+ Immutable, ordered and sliceable sequence of labels.
+ The basic object storing row labels for all cuDF objects.
+ Int32Index is a special case of Index with purely
+ integer(``int32``) labels.
+
+ Parameters
+ ----------
+ data : array-like (1-dimensional)
+ dtype : NumPy dtype,
+ but not used.
+ copy : bool
+ Make a copy of input data.
+ name : object
+ Name to be stored in the index.
+
+ Returns
+ -------
+ Int32Index
+ """
+
_dtype = np.int32
class Int64Index(NumericIndex):
+ """
+ Immutable, ordered and sliceable sequence of labels.
+ The basic object storing row labels for all cuDF objects.
+ Int64Index is a special case of Index with purely
+ integer(``int64``) labels.
+
+ Parameters
+ ----------
+ data : array-like (1-dimensional)
+ dtype : NumPy dtype,
+ but not used.
+ copy : bool
+ Make a copy of input data.
+ name : object
+ Name to be stored in the index.
+
+ Returns
+ -------
+ Int64Index
+ """
+
_dtype = np.int64
class UInt8Index(NumericIndex):
+ """
+ Immutable, ordered and sliceable sequence of labels.
+ The basic object storing row labels for all cuDF objects.
+ UInt8Index is a special case of Index with purely
+ integer(``uint64``) labels.
+
+ Parameters
+ ----------
+ data : array-like (1-dimensional)
+ dtype : NumPy dtype,
+ but not used.
+ copy : bool
+ Make a copy of input data.
+ name : object
+ Name to be stored in the index.
+
+ Returns
+ -------
+ UInt8Index
+ """
+
_dtype = np.uint8
class UInt16Index(NumericIndex):
+ """
+ Immutable, ordered and sliceable sequence of labels.
+ The basic object storing row labels for all cuDF objects.
+ UInt16Index is a special case of Index with purely
+ integer(``uint16``) labels.
+
+ Parameters
+ ----------
+ data : array-like (1-dimensional)
+ dtype : NumPy dtype,
+ but not used.
+ copy : bool
+ Make a copy of input data.
+ name : object
+ Name to be stored in the index.
+
+ Returns
+ -------
+ UInt16Index
+ """
+
_dtype = np.uint16
class UInt32Index(NumericIndex):
+ """
+ Immutable, ordered and sliceable sequence of labels.
+ The basic object storing row labels for all cuDF objects.
+ UInt32Index is a special case of Index with purely
+ integer(``uint32``) labels.
+
+ Parameters
+ ----------
+ data : array-like (1-dimensional)
+ dtype : NumPy dtype,
+ but not used.
+ copy : bool
+ Make a copy of input data.
+ name : object
+ Name to be stored in the index.
+
+ Returns
+ -------
+ UInt32Index
+ """
+
_dtype = np.uint32
class UInt64Index(NumericIndex):
+ """
+ Immutable, ordered and sliceable sequence of labels.
+ The basic object storing row labels for all cuDF objects.
+ UInt64Index is a special case of Index with purely
+ integer(``uint64``) labels.
+
+ Parameters
+ ----------
+ data : array-like (1-dimensional)
+ dtype : NumPy dtype,
+ but not used.
+ copy : bool
+ Make a copy of input data.
+ name : object
+ Name to be stored in the index.
+
+ Returns
+ -------
+ UInt64Index
+ """
+
_dtype = np.uint64
class Float32Index(NumericIndex):
+ """
+ Immutable, ordered and sliceable sequence of labels.
+ The basic object storing row labels for all cuDF objects.
+ Float32Index is a special case of Index with purely
+ float(``float32``) labels.
+
+ Parameters
+ ----------
+ data : array-like (1-dimensional)
+ dtype : NumPy dtype,
+ but not used.
+ copy : bool
+ Make a copy of input data.
+ name : object
+ Name to be stored in the index.
+
+ Returns
+ -------
+ Float32Index
+ """
+
_dtype = np.float32
class Float64Index(NumericIndex):
+ """
+ Immutable, ordered and sliceable sequence of labels.
+ The basic object storing row labels for all cuDF objects.
+ Float64Index is a special case of Index with purely
+ float(``float64``) labels.
+
+ Parameters
+ ----------
+ data : array-like (1-dimensional)
+ dtype : NumPy dtype,
+ but not used.
+ copy : bool
+ Make a copy of input data.
+ name : object
+ Name to be stored in the index.
+
+ Returns
+ -------
+ Float64Index
+ """
+
_dtype = np.float64
@@ -2419,6 +2629,13 @@ def components(self):
@property
def inferred_freq(self):
+ """
+ Infers frequency of TimedeltaIndex.
+
+ Notes
+ -----
+ This property is currently not supported.
+ """
raise NotImplementedError("inferred_freq is not yet supported")
@@ -2724,7 +2941,7 @@ def from_breaks(breaks, closed="right", name=None, copy=False, dtype=None):
Construct an IntervalIndex from an array of splits.
Parameters
- ---------
+ ----------
breaks : array-like (1-dimensional)
Left and right bounds for each interval.
closed : {"left", "right", "both", "neither"}, default "right"
@@ -2804,7 +3021,7 @@ def __repr__(self):
+ ")"
)
- @copy_docstring(StringMethods.__init__) # type: ignore
+ @copy_docstring(StringMethods) # type: ignore
@property
def str(self):
return StringMethods(parent=self)
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 26a893a4676..cdc80b6ef32 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -609,6 +609,30 @@ def to_arrow(self):
@property
def codes(self):
+ """
+ Returns the codes of the underlying MultiIndex.
+
+ Examples
+ --------
+ >>> import cudf
+ >>> df = cudf.DataFrame({'a':[1, 2, 3], 'b':[10, 11, 12]})
+ >>> cudf.MultiIndex.from_frame(df)
+ MultiIndex([(1, 10),
+ (2, 11),
+ (3, 12)],
+ names=['a', 'b'])
+ >>> midx = cudf.MultiIndex.from_frame(df)
+ >>> midx
+ MultiIndex([(1, 10),
+ (2, 11),
+ (3, 12)],
+ names=['a', 'b'])
+ >>> midx.codes
+ a b
+ 0 0 0
+ 1 1 1
+ 2 2 2
+ """
if self._codes is None:
self._compute_levels_and_codes()
return self._codes
@@ -622,6 +646,37 @@ def nlevels(self):
@property
def levels(self):
+ """
+ Returns list of levels in the MultiIndex
+
+ Returns
+ -------
+ List of Series objects
+
+ Examples
+ --------
+ >>> import cudf
+ >>> df = cudf.DataFrame({'a':[1, 2, 3], 'b':[10, 11, 12]})
+ >>> cudf.MultiIndex.from_frame(df)
+ MultiIndex([(1, 10),
+ (2, 11),
+ (3, 12)],
+ names=['a', 'b'])
+ >>> midx = cudf.MultiIndex.from_frame(df)
+ >>> midx
+ MultiIndex([(1, 10),
+ (2, 11),
+ (3, 12)],
+ names=['a', 'b'])
+ >>> midx.levels
+ [0 1
+ 1 2
+ 2 3
+ dtype: int64, 0 10
+ 1 11
+ 2 12
+ dtype: int64]
+ """
if self._levels is None:
self._compute_levels_and_codes()
return self._levels
@@ -1123,6 +1178,37 @@ def _concat(cls, objs):
@classmethod
def from_tuples(cls, tuples, names=None):
+ """
+ Convert list of tuples to MultiIndex.
+
+ Parameters
+ ----------
+ tuples : list / sequence of tuple-likes
+ Each tuple is the index of one row/column.
+ names : list / sequence of str, optional
+ Names for the levels in the index.
+
+ Returns
+ -------
+ MultiIndex
+
+ See Also
+ --------
+ MultiIndex.from_product : Make a MultiIndex from cartesian product
+ of iterables.
+ MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
+
+ Examples
+ --------
+ >>> tuples = [(1, 'red'), (1, 'blue'),
+ ... (2, 'red'), (2, 'blue')]
+ >>> cudf.MultiIndex.from_tuples(tuples, names=('number', 'color'))
+ MultiIndex([(1, 'red'),
+ (1, 'blue'),
+ (2, 'red'),
+ (2, 'blue')],
+ names=['number', 'color'])
+ """
# Use Pandas for handling Python host objects
pdi = pd.MultiIndex.from_tuples(tuples, names=names)
result = cls.from_pandas(pdi)
@@ -1187,11 +1273,97 @@ def values(self):
return self._source_data.values
@classmethod
- def from_frame(cls, dataframe, names=None):
- return cls(source_data=dataframe, names=names)
+ def from_frame(cls, df, names=None):
+ """
+ Make a MultiIndex from a DataFrame.
+
+ Parameters
+ ----------
+ df : DataFrame
+ DataFrame to be converted to MultiIndex.
+ names : list-like, optional
+ If no names are provided, use the column names, or tuple of column
+ names if the columns is a MultiIndex. If a sequence, overwrite
+ names with the given sequence.
+
+ Returns
+ -------
+ MultiIndex
+ The MultiIndex representation of the given DataFrame.
+
+ See Also
+ --------
+ MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
+ MultiIndex.from_product : Make a MultiIndex from cartesian product
+ of iterables.
+
+ Examples
+ --------
+ >>> import cudf
+ >>> df = cudf.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],
+ ... ['NJ', 'Temp'], ['NJ', 'Precip']],
+ ... columns=['a', 'b'])
+ >>> df
+ a b
+ 0 HI Temp
+ 1 HI Precip
+ 2 NJ Temp
+ 3 NJ Precip
+ >>> cudf.MultiIndex.from_frame(df)
+ MultiIndex([('HI', 'Temp'),
+ ('HI', 'Precip'),
+ ('NJ', 'Temp'),
+ ('NJ', 'Precip')],
+ names=['a', 'b'])
+
+ Using explicit names, instead of the column names
+
+ >>> cudf.MultiIndex.from_frame(df, names=['state', 'observation'])
+ MultiIndex([('HI', 'Temp'),
+ ('HI', 'Precip'),
+ ('NJ', 'Temp'),
+ ('NJ', 'Precip')],
+ names=['state', 'observation'])
+ """
+ return cls(source_data=df, names=names)
@classmethod
def from_product(cls, arrays, names=None):
+ """
+ Make a MultiIndex from the cartesian product of multiple iterables.
+
+ Parameters
+ ----------
+ iterables : list / sequence of iterables
+ Each iterable has unique labels for each level of the index.
+ names : list / sequence of str, optional
+ Names for the levels in the index.
+ If not explicitly provided, names will be inferred from the
+ elements of iterables if an element has a name attribute
+
+ Returns
+ -------
+ MultiIndex
+
+ See Also
+ --------
+ MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
+ MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
+
+ Examples
+ --------
+ >>> numbers = [0, 1, 2]
+ >>> colors = ['green', 'purple']
+ >>> cudf.MultiIndex.from_product([numbers, colors],
+ ... names=['number', 'color'])
+ MultiIndex([(0, 'green'),
+ (0, 'purple'),
+ (1, 'green'),
+ (1, 'purple'),
+ (2, 'green'),
+ (2, 'purple')],
+ names=['number', 'color'])
+ """
# Use Pandas for handling Python host objects
pdi = pd.MultiIndex.from_product(arrays, names=names)
result = cls.from_pandas(pdi)
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 54571ebb31d..1b8405af1a4 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -803,9 +803,9 @@ def _pivot(df, index, columns):
Parameters
----------
df : DataFrame
- index : cudf.core.index.Index
+ index : cudf.Index
Index labels of the result
- columns : cudf.core.index.Index
+ columns : cudf.Index
Column labels of the result
"""
columns_labels, columns_idx = columns._encode()
diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py
index c6663a25684..4403a58dd30 100644
--- a/python/cudf/cudf/core/scalar.py
+++ b/python/cudf/cudf/core/scalar.py
@@ -17,45 +17,46 @@
class Scalar(object):
+ """
+ A GPU-backed scalar object with NumPy scalar like properties
+ May be used in binary operations against other scalars, cuDF
+ Series, DataFrame, and Index objects.
+
+ Examples
+ --------
+ >>> import cudf
+ >>> cudf.Scalar(42, dtype='int64')
+ Scalar(42, dtype=int64)
+ >>> cudf.Scalar(42, dtype='int32') + cudf.Scalar(42, dtype='float64')
+ Scalar(84.0, dtype=float64)
+ >>> cudf.Scalar(42, dtype='int64') + np.int8(21)
+ Scalar(63, dtype=int64)
+ >>> x = cudf.Scalar(42, dtype='datetime64[s]')
+ >>> y = cudf.Scalar(21, dtype='timedelta64[ns])
+ >>> x - y
+ Scalar(1970-01-01T00:00:41.999999979, dtype=datetime64[ns])
+ >>> cudf.Series([1,2,3]) + cudf.Scalar(1)
+ 0 2
+ 1 3
+ 2 4
+ dtype: int64
+ >>> df = cudf.DataFrame({'a':[1,2,3], 'b':[4.5, 5.5, 6.5]})
+ >>> slr = cudf.Scalar(10, dtype='uint8')
+ >>> df - slr
+ a b
+ 0 -9 -5.5
+ 1 -8 -4.5
+ 2 -7 -3.5
+
+ Parameters
+ ----------
+ value : Python Scalar, NumPy Scalar, or cuDF Scalar
+ The scalar value to be converted to a GPU backed scalar object
+ dtype : np.dtype or string specifier
+ The data type
+ """
+
def __init__(self, value, dtype=None):
- """
- A GPU-backed scalar object with NumPy scalar like properties
- May be used in binary operations against other scalars, cuDF
- Series, DataFrame, and Index objects.
-
- Examples
- --------
- >>> import cudf
- >>> cudf.Scalar(42, dtype='int64')
- Scalar(42, dtype=int64)
- >>> cudf.Scalar(42, dtype='int32') + cudf.Scalar(42, dtype='float64')
- Scalar(84.0, dtype=float64)
- >>> cudf.Scalar(42, dtype='int64') + np.int8(21)
- Scalar(63, dtype=int64)
- >>> x = cudf.Scalar(42, dtype='datetime64[s]')
- >>> y = cudf.Scalar(21, dtype='timedelta64[ns])
- >>> x - y
- Scalar(1970-01-01T00:00:41.999999979, dtype=datetime64[ns])
- >>> cudf.Series([1,2,3]) + cudf.Scalar(1)
- 0 2
- 1 3
- 2 4
- dtype: int64
- >>> df = cudf.DataFrame({'a':[1,2,3], 'b':[4.5, 5.5, 6.5]})
- >>> slr = cudf.Scalar(10, dtype='uint8')
- >>> df - slr
- a b
- 0 -9 -5.5
- 1 -8 -4.5
- 2 -7 -3.5
-
- Parameters
- ----------
- value : Python Scalar, NumPy Scalar, or cuDF Scalar
- The scalar value to be converted to a GPU backed scalar object
- dtype : np.dtype or string specifier
- The data type
- """
self._host_value = None
self._host_dtype = None
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index cb7a82bd4c8..e7a58be62b5 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -64,6 +64,48 @@
class Series(SingleColumnFrame, Serializable):
+ """
+ One-dimensional GPU array (including time series).
+
+ Labels need not be unique but must be a hashable type. The object
+ supports both integer- and label-based indexing and provides a
+ host of methods for performing operations involving the index.
+ Statistical methods from ndarray have been overridden to
+ automatically exclude missing data (currently represented
+ as null/NaN).
+
+ Operations between Series (`+`, `-`, `/`, `*`, `**`) align
+ values based on their associated index values-– they need
+ not be the same length. The result index will be the
+ sorted union of the two indexes.
+
+ ``Series`` objects are used as columns of ``DataFrame``.
+
+ Parameters
+ ----------
+ data : array-like, Iterable, dict, or scalar value
+ Contains data stored in Series.
+
+ index : array-like or Index (1d)
+ Values must be hashable and have the same length
+ as data. Non-unique index values are allowed. Will
+ default to RangeIndex (0, 1, 2, …, n) if not provided.
+ If both a dict and index sequence are used, the index will
+ override the keys found in the dict.
+
+ dtype : str, numpy.dtype, or ExtensionDtype, optional
+ Data type for the output Series. If not specified,
+ this will be inferred from data.
+
+ name : str, optional
+ The name to give to the Series.
+
+ nan_as_null : bool, Default True
+ If ``None``/``True``, converts ``np.nan`` values to
+ ``null`` values.
+ If ``False``, leaves ``np.nan`` values as is.
+ """
+
# The `constructor*` properties are used by `dask` (and `dask_cudf`)
@property
def _constructor(self):
@@ -171,47 +213,6 @@ def from_masked_array(cls, data, mask, null_count=None):
def __init__(
self, data=None, index=None, dtype=None, name=None, nan_as_null=True,
):
- """
- One-dimensional GPU array (including time series).
-
- Labels need not be unique but must be a hashable type. The object
- supports both integer- and label-based indexing and provides a
- host of methods for performing operations involving the index.
- Statistical methods from ndarray have been overridden to
- automatically exclude missing data (currently represented
- as null/NaN).
-
- Operations between Series (`+`, `-`, `/`, `*`, `**`) align
- values based on their associated index values-– they need
- not be the same length. The result index will be the
- sorted union of the two indexes.
-
- ``Series`` objects are used as columns of ``DataFrame``.
-
- Parameters
- ----------
- data : array-like, Iterable, dict, or scalar value
- Contains data stored in Series.
-
- index : array-like or Index (1d)
- Values must be hashable and have the same length
- as data. Non-unique index values are allowed. Will
- default to RangeIndex (0, 1, 2, …, n) if not provided.
- If both a dict and index sequence are used, the index will
- override the keys found in the dict.
-
- dtype : str, numpy.dtype, or ExtensionDtype, optional
- Data type for the output Series. If not specified,
- this will be inferred from data.
-
- name : str, optional
- The name to give to the Series.
-
- nan_as_null : bool, Default True
- If ``None``/``True``, converts ``np.nan`` values to
- ``null`` values.
- If ``False``, leaves ``np.nan`` values as is.
- """
if isinstance(data, pd.Series):
if name is None:
name = data.name
@@ -457,7 +458,7 @@ def drop(
Return series without null values
Series.drop_duplicates
Return series with duplicate values removed
- cudf.core.dataframe.DataFrame.drop
+ cudf.DataFrame.drop
Drop specified labels from rows or columns in dataframe
Examples
@@ -879,7 +880,7 @@ def memory_usage(self, index=True, deep=False):
See Also
--------
- cudf.core.dataframe.DataFrame.memory_usage : Bytes consumed by
+ cudf.DataFrame.memory_usage : Bytes consumed by
a DataFrame.
Examples
@@ -2344,22 +2345,22 @@ def __invert__(self):
f"Operation `~` not supported on {self.dtype.type.__name__}"
)
- @copy_docstring(CategoricalAccessor.__init__) # type: ignore
+ @copy_docstring(CategoricalAccessor) # type: ignore
@property
def cat(self):
return CategoricalAccessor(parent=self)
- @copy_docstring(StringMethods.__init__) # type: ignore
+ @copy_docstring(StringMethods) # type: ignore
@property
def str(self):
return StringMethods(parent=self)
- @copy_docstring(ListMethods.__init__) # type: ignore
+ @copy_docstring(ListMethods) # type: ignore
@property
def list(self):
return ListMethods(parent=self)
- @copy_docstring(StructMethods.__init__) # type: ignore
+ @copy_docstring(StructMethods) # type: ignore
@property
def struct(self):
return StructMethods(parent=self)
@@ -2503,10 +2504,10 @@ def dropna(self, axis=0, inplace=False, how=None):
Series.fillna : Replace null values.
- cudf.core.dataframe.DataFrame.dropna : Drop rows or columns which
+ cudf.DataFrame.dropna : Drop rows or columns which
contain null values.
- cudf.core.index.Index.dropna : Drop null indices.
+ cudf.Index.dropna : Drop null indices.
Examples
--------
@@ -2845,7 +2846,7 @@ def loc(self):
See also
--------
- cudf.core.dataframe.DataFrame.loc
+ cudf.DataFrame.loc
Examples
--------
@@ -2868,7 +2869,7 @@ def iloc(self):
See also
--------
- cudf.core.dataframe.DataFrame.iloc
+ cudf.DataFrame.iloc
Examples
--------
@@ -4609,7 +4610,7 @@ def value_counts(
Series.count
Number of non-NA elements in a Series.
- cudf.core.dataframe.DataFrame.count
+ cudf.DataFrame.count
Number of non-NA elements in a DataFrame.
Examples
@@ -5216,7 +5217,7 @@ def diff(self, periods=1):
return Series(output_col, name=self.name, index=self.index)
- @copy_docstring(SeriesGroupBy.__init__)
+ @copy_docstring(SeriesGroupBy)
def groupby(
self,
by=None,
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 00f60cfc8b5..181fa64240e 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -345,6 +345,66 @@ def get_units(value):
class DateOffset:
+ """
+ An object used for binary ops where calendrical arithmetic
+ is desired rather than absolute time arithmetic. Used to
+ add or subtract a whole number of periods, such as several
+ months or years, to a series or index of datetime dtype.
+ Works similarly to pd.DateOffset, but stores the offset
+ on the device (GPU).
+
+ Parameters
+ ----------
+ n : int, default 1
+ The number of time periods the offset represents.
+ **kwds
+ Temporal parameter that add to or replace the offset value.
+ Parameters that **add** to the offset (like Timedelta):
+ - months
+
+ See Also
+ --------
+ pandas.DateOffset : The equivalent Pandas object that this
+ object replicates
+
+ Examples
+ --------
+ >>> from cudf import DateOffset
+ >>> ts = cudf.Series([
+ "2000-01-01 00:00:00.012345678",
+ "2000-01-31 00:00:00.012345678",
+ "2000-02-29 00:00:00.012345678",
+ ], dtype='datetime64[ns])
+ >>> ts + DateOffset(months=3)
+ 0 2000-04-01 00:00:00.012345678
+ 1 2000-04-30 00:00:00.012345678
+ 2 2000-05-29 00:00:00.012345678
+ dtype: datetime64[ns]
+ >>> ts - DateOffset(months=12)
+ 0 1999-01-01 00:00:00.012345678
+ 1 1999-01-31 00:00:00.012345678
+ 2 1999-02-28 00:00:00.012345678
+ dtype: datetime64[ns]
+
+ Notes
+ -----
+ Note that cuDF does not yet support DateOffset arguments
+ that 'replace' units in the datetime data being operated on
+ such as
+ - year
+ - month
+ - week
+ - day
+ - hour
+ - minute
+ - second
+ - microsecond
+ - millisecond
+ - nanosecond
+
+ cuDF does not yet support rounding via a `normalize`
+ keyword argument.
+ """
_UNITS_TO_CODES = {
"nanoseconds": "ns",
@@ -362,66 +422,6 @@ class DateOffset:
_CODES_TO_UNITS = {v: k for k, v in _UNITS_TO_CODES.items()}
def __init__(self, n=1, normalize=False, **kwds):
- """
- An object used for binary ops where calendrical arithmetic
- is desired rather than absolute time arithmetic. Used to
- add or subtract a whole number of periods, such as several
- months or years, to a series or index of datetime dtype.
- Works similarly to pd.DateOffset, but stores the offset
- on the device (GPU).
-
- Parameters
- ----------
- n : int, default 1
- The number of time periods the offset represents.
- **kwds
- Temporal parameter that add to or replace the offset value.
- Parameters that **add** to the offset (like Timedelta):
- - months
-
- See Also
- --------
- pandas.DateOffset : The equivalent Pandas object that this
- object replicates
-
- Examples
- --------
- >>> from cudf import DateOffset
- >>> ts = cudf.Series([
- "2000-01-01 00:00:00.012345678",
- "2000-01-31 00:00:00.012345678",
- "2000-02-29 00:00:00.012345678",
- ], dtype='datetime64[ns])
- >>> ts + DateOffset(months=3)
- 0 2000-04-01 00:00:00.012345678
- 1 2000-04-30 00:00:00.012345678
- 2 2000-05-29 00:00:00.012345678
- dtype: datetime64[ns]
- >>> ts - DateOffset(months=12)
- 0 1999-01-01 00:00:00.012345678
- 1 1999-01-31 00:00:00.012345678
- 2 1999-02-28 00:00:00.012345678
- dtype: datetime64[ns]
-
- Notes
- -----
- Note that cuDF does not yet support DateOffset arguments
- that 'replace' units in the datetime data being operated on
- such as
- - year
- - month
- - week
- - day
- - hour
- - minute
- - second
- - microsecond
- - millisecond
- - nanosecond
-
- cuDF does not yet support rounding via a `normalize`
- keyword argument.
- """
if normalize:
raise NotImplementedError(
"normalize not yet supported for DateOffset"
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index d9a2fd89165..d2f120a7bb9 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -258,12 +258,12 @@ def apply(self, func, *args, **kwargs):
See also
--------
- cudf.core.series.Series.applymap : Apply an elementwise function to
+ cudf.Series.applymap : Apply an elementwise function to
transform the values in the Column.
Notes
-----
- See notes of the :meth:`cudf.core.series.Series.applymap`
+ See notes of the :meth:`cudf.Series.applymap`
"""
has_nulls = False
@@ -353,14 +353,15 @@ def __repr__(self):
class RollingGroupby(Rolling):
- def __init__(self, groupby, window, min_periods=None, center=False):
- """
- Grouped rolling window calculation.
+ """
+ Grouped rolling window calculation.
- See also
- --------
- cudf.core.window.Rolling
- """
+ See also
+ --------
+ cudf.core.window.Rolling
+ """
+
+ def __init__(self, groupby, window, min_periods=None, center=False):
sort_order = groupby.grouping.keys.argsort()
# TODO: there may be overlap between the columns
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 76d24dcd5d2..8744238a062 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3613,9 +3613,7 @@ def test_as_column_types():
assert_eq(pds, gds)
pds = pd.Series(pd.Index(["1", "18", "9"]), dtype="int")
- gds = cudf.Series(
- cudf.core.index.StringIndex(["1", "18", "9"]), dtype="int"
- )
+ gds = cudf.Series(cudf.StringIndex(["1", "18", "9"]), dtype="int")
assert_eq(pds, gds)
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index de7d8e35bce..7f402762730 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -985,7 +985,7 @@ def test_groupby_index_type():
df["string_col"] = ["a", "b", "c"]
df["counts"] = [1, 2, 3]
res = df.groupby(by="string_col").counts.sum()
- assert isinstance(res.index, cudf.core.index.StringIndex)
+ assert isinstance(res.index, cudf.StringIndex)
@pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 38b924006bf..f80bdec0ab5 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -332,7 +332,7 @@ def test_index_copy_datetime(name, dtype, deep=True):
@pytest.mark.parametrize("name", ["x"])
@pytest.mark.parametrize("dtype", ["category", "object"])
def test_index_copy_string(name, dtype, deep=True):
- cidx = cudf.core.index.StringIndex(["a", "b", "c"])
+ cidx = cudf.StringIndex(["a", "b", "c"])
pidx = cidx.to_pandas()
pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype)
@@ -389,7 +389,7 @@ def test_index_copy_category(name, dtype, deep=True):
"idx",
[
cudf.DatetimeIndex(["2001", "2002", "2003"]),
- cudf.core.index.StringIndex(["a", "b", "c"]),
+ cudf.StringIndex(["a", "b", "c"]),
cudf.Int64Index([1, 2, 3]),
cudf.Float64Index([1.0, 2.0, 3.0]),
cudf.CategoricalIndex([1, 2, 3]),
@@ -434,7 +434,7 @@ def test_index_copy_deep(idx, deep):
idx._values.categories.base_data.ptr
== idx_copy._values.categories.base_data.ptr
) == same_ref
- elif isinstance(idx, cudf.core.index.StringIndex):
+ elif isinstance(idx, cudf.StringIndex):
children = idx._values._base_children
copy_children = idx_copy._values._base_children
assert all(
@@ -479,7 +479,7 @@ def test_rangeindex_slice_attr_name():
def test_from_pandas_str():
idx = ["a", "b", "c"]
pidx = pd.Index(idx, name="idx")
- gidx_1 = cudf.core.index.StringIndex(idx, name="idx")
+ gidx_1 = cudf.StringIndex(idx, name="idx")
gidx_2 = cudf.from_pandas(pidx)
assert_eq(gidx_1, gidx_2)
diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index 6c3fdd4640a..d0b1ba0758e 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -507,8 +507,8 @@ def test_character_tokenize_index():
actual = sr.str.character_tokenize()
assert_eq(expected, actual)
- sr = cudf.core.index.as_index([""])
- expected = cudf.core.index.StringIndex([], dtype="object")
+ sr = cudf.Index([""])
+ expected = cudf.StringIndex([], dtype="object")
actual = sr.str.character_tokenize()
assert_eq(expected, actual)
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 97a472fc132..1927ef96e6f 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -67,8 +67,8 @@
See Also
--------
-cudf.io.csv.read_csv
-cudf.io.json.read_json
+cudf.read_csv
+cudf.read_json
""".format(
remote_data_sources=_docstring_remote_sources
)
@@ -175,7 +175,7 @@
--------
cudf.io.parquet.read_parquet_metadata
cudf.io.parquet.to_parquet
-cudf.io.orc.read_orc
+cudf.read_orc
""".format(
remote_data_sources=_docstring_remote_sources
)
@@ -217,7 +217,7 @@
See Also
--------
cudf.io.parquet.read_parquet
-cudf.io.orc.read_orc
+cudf.read_orc
"""
doc_to_parquet = docfmt_partial(docstring=_docstring_to_parquet)
@@ -276,7 +276,7 @@
See Also
--------
-cudf.io.orc.read_orc
+cudf.read_orc
"""
doc_read_orc_metadata = docfmt_partial(docstring=_docstring_read_orc_metadata)
@@ -302,7 +302,7 @@
See Also
--------
-cudf.io.orc.read_orc
+cudf.read_orc
"""
doc_read_orc_statistics = docfmt_partial(
docstring=_docstring_read_orc_statistics
@@ -391,7 +391,7 @@
See Also
--------
-cudf.io.orc.read_orc
+cudf.read_orc
"""
doc_to_orc = docfmt_partial(docstring=_docstring_to_orc)
@@ -693,7 +693,7 @@
See Also
--------
-cudf.io.hdf.read_hdf : Read from HDF file.
+cudf.read_hdf : Read from HDF file.
cudf.io.parquet.to_parquet : Write a DataFrame to the binary parquet format.
cudf.io.feather.to_feather : Write out feather-format for DataFrames.
"""
@@ -904,7 +904,7 @@
See Also
--------
-cudf.io.csv.to_csv
+cudf.to_csv
""".format(
remote_data_sources=_docstring_remote_sources
)
@@ -969,7 +969,7 @@
See Also
--------
-cudf.io.csv.read_csv
+cudf.read_csv
"""
doc_to_csv = docfmt_partial(
docstring=_docstring_to_csv.format(
diff --git a/python/cudf/requirements/cuda-11.0/dev_requirements.txt b/python/cudf/requirements/cuda-11.0/dev_requirements.txt
index efb22ddd5a4..f69c246832b 100644
--- a/python/cudf/requirements/cuda-11.0/dev_requirements.txt
+++ b/python/cudf/requirements/cuda-11.0/dev_requirements.txt
@@ -23,6 +23,7 @@ packaging
pandas>=1.0,<1.3.0dev0
pandoc==2.0a4
protobuf
+pydata-sphinx-theme
pyorc
pytest
pytest-benchmark
@@ -33,7 +34,6 @@ setuptools
sphinx
sphinx-copybutton
sphinx-markdown-tables
-sphinx_rtd_theme
sphinxcontrib-websupport
transformers
typing_extensions
diff --git a/python/cudf/requirements/cuda-11.2/dev_requirements.txt b/python/cudf/requirements/cuda-11.2/dev_requirements.txt
index cb88f74399f..e55dc2f921a 100644
--- a/python/cudf/requirements/cuda-11.2/dev_requirements.txt
+++ b/python/cudf/requirements/cuda-11.2/dev_requirements.txt
@@ -23,6 +23,7 @@ packaging
pandas>=1.0,<1.3.0dev0
pandoc==2.0a4
protobuf
+pydata-sphinx-theme
pyorc
pytest
pytest-benchmark
@@ -33,7 +34,6 @@ setuptools
sphinx
sphinx-copybutton
sphinx-markdown-tables
-sphinx_rtd_theme
sphinxcontrib-websupport
transformers
typing_extensions
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 53543b9e886..6fb5efbdf0f 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -51,8 +51,8 @@ def _nonempty_index(idx):
data = np.array([start, "1970-01-02"], dtype=idx.dtype)
values = cudf.core.column.as_column(data)
return cudf.core.index.DatetimeIndex(values, name=idx.name)
- elif isinstance(idx, cudf.core.index.StringIndex):
- return cudf.core.index.StringIndex(["cat", "dog"], name=idx.name)
+ elif isinstance(idx, cudf.StringIndex):
+ return cudf.StringIndex(["cat", "dog"], name=idx.name)
elif isinstance(idx, cudf.core.index.CategoricalIndex):
key = tuple(idx._data.keys())
assert len(key) == 1