From 054aa6b3c7715af48eb55a3341c48985f261015e Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 14 Apr 2022 08:52:53 -0700 Subject: [PATCH 1/6] misc docstring formatting fixes --- python/cudf/cudf/core/cut.py | 5 +++++ python/cudf/cudf/core/indexed_frame.py | 2 ++ python/cudf/cudf/core/tools/numeric.py | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py index 7c585602c23..77af1267861 100644 --- a/python/cudf/cudf/core/cut.py +++ b/python/cudf/cudf/core/cut.py @@ -1,3 +1,5 @@ +# Copyright (c) 2021-2022, NVIDIA CORPORATION. + from collections.abc import Sequence import cupy @@ -27,6 +29,7 @@ def cut( Use cut when you need to segment and sort data values into bins. This function is also useful for going from a continuous variable to a categorical variable. + Parameters ---------- x : array-like @@ -56,6 +59,7 @@ def cut( Categorical and Series (with Categorical dtype). If True, the resulting categorical will be ordered. If False, the resulting categorical will be unordered (labels must be provided). + Returns ------- out : CategoricalIndex @@ -66,6 +70,7 @@ def cut( For scalar or sequence bins, this is an ndarray with the computed bins. If set duplicates=drop, bins will drop non-unique bin. For an IntervalIndex bins, this is equal to bins. + Examples -------- Discretize into three equal-sized bins. diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 10736948b57..ea722ec3968 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -991,6 +991,7 @@ def add_prefix(self, prefix): Examples -------- **Series** + >>> s = cudf.Series([1, 2, 3, 4]) >>> s 0 1 @@ -1006,6 +1007,7 @@ def add_prefix(self, prefix): dtype: int64 **DataFrame** + >>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) >>> df A B diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py index 7eea7cedaad..0273227010b 100644 --- a/python/cudf/cudf/core/tools/numeric.py +++ b/python/cudf/cudf/core/tools/numeric.py @@ -57,7 +57,7 @@ def to_numeric(arg, errors="raise", downcast=None): otherwise ndarray Notes - ------- + ----- An important difference from pandas is that this function does not accept mixed numeric/non-numeric type sequences. For example ``[1, 'a']``. A ``TypeError`` will be raised when such input is received, regardless of From 4f3b09a9ba7f570c1d8e72f434b4a26d0581636e Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 14 Apr 2022 11:54:58 -0700 Subject: [PATCH 2/6] more fixes --- docs/cudf/source/api_docs/dataframe.rst | 3 +++ docs/cudf/source/api_docs/index_objects.rst | 2 ++ docs/cudf/source/api_docs/series.rst | 2 ++ docs/cudf/source/api_docs/string_handling.rst | 1 - 4 files changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index 1d600acfef1..e0ef3cb2ff0 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -149,6 +149,7 @@ Computations / descriptive stats DataFrame.round DataFrame.skew DataFrame.sum + DataFrame.sum_of_squares DataFrame.std DataFrame.var DataFrame.nunique @@ -248,9 +249,11 @@ Serialization / IO / conversion DataFrame.to_dlpack DataFrame.to_parquet DataFrame.to_csv + DataFrame.to_cupy DataFrame.to_hdf DataFrame.to_dict DataFrame.to_json + DataFrame.to_numpy DataFrame.to_pandas DataFrame.to_feather DataFrame.to_records diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 6f5affd0ecd..8e0e3bbd411 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -92,7 +92,9 @@ Conversion Index.astype Index.to_arrow + Index.to_cupy Index.to_list + Index.to_numpy Index.to_series Index.to_frame Index.to_pandas diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 95aa71919e4..d7015c9348d 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -390,10 +390,12 @@ Serialization / IO / conversion :toctree: api/ Series.to_arrow + Series.to_cupy Series.to_dlpack Series.to_frame Series.to_hdf Series.to_json + Series.to_numpy Series.to_pandas Series.to_string Series.from_arrow diff --git a/docs/cudf/source/api_docs/string_handling.rst b/docs/cudf/source/api_docs/string_handling.rst index 3087bcaa826..8d4646c47a7 100644 --- a/docs/cudf/source/api_docs/string_handling.rst +++ b/docs/cudf/source/api_docs/string_handling.rst @@ -83,7 +83,6 @@ strings and apply several methods to it. These can be accessed like rsplit startswith strip - subword_tokenize swapcase title token_count From 176568bf546be20f3dc09c3cc26901cc4c985fbc Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 14 Apr 2022 14:43:00 -0700 Subject: [PATCH 3/6] more fixes --- docs/cudf/source/conf.py | 1 + python/cudf/cudf/core/_base_index.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index dbdf8e59e6a..d65b77ef74b 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -252,6 +252,7 @@ def process_class_docstrings(app, what, name, obj, options, lines): lines[:] = lines[:cut_index] +nitpick_ignore = [("py:class", "SeriesOrIndex"),] def setup(app): diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index 259a7f711c3..6fed6510484 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -118,7 +118,7 @@ def get_level_values(self, level): See Also -------- - cudf.core.multiindex.MultiIndex.get_level_values : Get values for + cudf.MultiIndex.get_level_values : Get values for a level of a MultiIndex. Notes From 3dd05dd4172eeae3d6293432139369da1b9e091e Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 15 Apr 2022 15:45:20 -0700 Subject: [PATCH 4/6] more fixes --- docs/cudf/source/conf.py | 12 ++++++++++-- docs/cudf/x.txt | 16 ++++++++++++++++ python/cudf/cudf/core/column/string.py | 6 +++--- python/cudf/cudf/core/frame.py | 4 ++-- python/cudf/cudf/core/groupby/groupby.py | 2 +- 5 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 docs/cudf/x.txt diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index d65b77ef74b..3de344b26b8 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -22,6 +22,7 @@ from docutils.nodes import Text from sphinx.addnodes import pending_xref + import cudf sys.path.insert(0, os.path.abspath(cudf.__path__[0])) @@ -231,10 +232,14 @@ def resolve_aliases(app, doctree): def ignore_internal_references(app, env, node, contnode): name = node.get("reftarget", None) - if name is not None and name in _internal_names_to_ignore: + if name == "cudf.core.index.GenericIndex": + node["reftarget"] = "cudf.Index" + return contnode + elif name is not None and name in _internal_names_to_ignore: node["reftarget"] = "" return contnode + def process_class_docstrings(app, what, name, obj, options, lines): """ For those classes for which we use :: @@ -253,7 +258,10 @@ def process_class_docstrings(app, what, name, obj, options, lines): nitpick_ignore = [("py:class", "SeriesOrIndex"),] - +intersphinx_mapping = { + "numpy": ("https://numpy.org/doc/stable/", None), + "pyarrow": ("https://arrow.apache.org/docs/", None), + } def setup(app): app.add_css_file("params.css") diff --git a/docs/cudf/x.txt b/docs/cudf/x.txt new file mode 100644 index 00000000000..4b80058bb68 --- /dev/null +++ b/docs/cudf/x.txt @@ -0,0 +1,16 @@ +Running Sphinx v4.5.0 +Adding copy buttons to code blocks... +making output directory... done +loading intersphinx inventory from https://docs.python.org/objects.inv... +loading intersphinx inventory from https://docs.cupy.dev/en/stable/objects.inv... +intersphinx inventory has moved: https://docs.python.org/objects.inv -> https://docs.python.org/3/objects.inv +[autosummary] generating autosummary for: api_docs/api/cudf.CategoricalIndex.categories.rst, api_docs/api/cudf.CategoricalIndex.codes.rst, api_docs/api/cudf.CategoricalIndex.equals.rst, api_docs/api/cudf.CategoricalIndex.rst, api_docs/api/cudf.DataFrame.T.rst, api_docs/api/cudf.DataFrame.__iter__.rst, api_docs/api/cudf.DataFrame.abs.rst, api_docs/api/cudf.DataFrame.add.rst, api_docs/api/cudf.DataFrame.add_prefix.rst, api_docs/api/cudf.DataFrame.add_suffix.rst, ..., basics/io-gds-integration.rst, basics/io-nvcomp-integration.rst, basics/io-supported-types.rst, basics/io.rst, index.rst, user_guide/10min-cudf-cupy.ipynb, user_guide/10min.ipynb, user_guide/Working-with-missing-data.ipynb, user_guide/guide-to-udfs.ipynb, user_guide/index.rst +building [mo]: targets for 0 po files that are out of date +building [html]: targets for 681 source files that are out of date +updating environment: [new config] 681 added, 0 changed, 0 removed +reading sources... [ 0%] api_docs/api/cudf.CategoricalIndex +reading sources... [ 0%] api_docs/api/cudf.CategoricalIndex.categories +reading sources... [ 0%] api_docs/api/cudf.CategoricalIndex.codes +reading sources... [ 0%] api_docs/api/cudf.CategoricalIndex.equals +reading sources... [ 0%] api_docs/api/cudf.DataFrame +Makefile:20: recipe for target 'html' failed diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index d5d45c341d5..bf50cc578f6 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -36,7 +36,7 @@ ) from cudf.core.buffer import Buffer from cudf.core.column import column, datetime -from cudf.core.column.methods import ColumnMethods, ParentType +from cudf.core.column.methods import ColumnMethods from cudf.utils import utils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import can_convert_to_column @@ -162,7 +162,7 @@ def htoi(self) -> SeriesOrIndex: hex_to_int = htoi - def ip2int(self) -> ParentType: + def ip2int(self) -> SeriesOrIndex: """ This converts ip strings to integers @@ -4956,7 +4956,7 @@ def edit_distance(self, targets) -> SeriesOrIndex: libstrings.edit_distance(self._column, targets_column) ) - def edit_distance_matrix(self) -> ParentType: + def edit_distance_matrix(self) -> SeriesOrIndex: """Computes the edit distance between strings in the series. The series to compute the matrix should have more than 2 strings and diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 806cdf14c71..c877a011a1b 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -571,8 +571,8 @@ def to_numpy( Parameters ---------- - dtype : str or numpy.dtype, optional - The dtype to pass to :meth:`numpy.asarray`. + dtype : str or :external:numpy+ref:`numpy.dtype`, optional + The dtype to pass to :external:numpy+ref:py:meth:`numpy.asarray`. copy : bool, default True Whether to ensure that the returned value is not a view on another array. This parameter must be ``True`` since cuDF must copy diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 40f8eda0e4f..ff75e4e7ae8 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -783,7 +783,7 @@ def transform(self, function): See also -------- - cudf.core.groupby.GroupBy.agg + agg """ try: result = self.agg(function) From 0c27781097a9f9bea5a294bc90d54fb94d6f52f9 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 15 Apr 2022 15:47:57 -0700 Subject: [PATCH 5/6] remove file --- docs/cudf/x.txt | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 docs/cudf/x.txt diff --git a/docs/cudf/x.txt b/docs/cudf/x.txt deleted file mode 100644 index 4b80058bb68..00000000000 --- a/docs/cudf/x.txt +++ /dev/null @@ -1,16 +0,0 @@ -Running Sphinx v4.5.0 -Adding copy buttons to code blocks... -making output directory... done -loading intersphinx inventory from https://docs.python.org/objects.inv... -loading intersphinx inventory from https://docs.cupy.dev/en/stable/objects.inv... -intersphinx inventory has moved: https://docs.python.org/objects.inv -> https://docs.python.org/3/objects.inv -[autosummary] generating autosummary for: api_docs/api/cudf.CategoricalIndex.categories.rst, api_docs/api/cudf.CategoricalIndex.codes.rst, api_docs/api/cudf.CategoricalIndex.equals.rst, api_docs/api/cudf.CategoricalIndex.rst, api_docs/api/cudf.DataFrame.T.rst, api_docs/api/cudf.DataFrame.__iter__.rst, api_docs/api/cudf.DataFrame.abs.rst, api_docs/api/cudf.DataFrame.add.rst, api_docs/api/cudf.DataFrame.add_prefix.rst, api_docs/api/cudf.DataFrame.add_suffix.rst, ..., basics/io-gds-integration.rst, basics/io-nvcomp-integration.rst, basics/io-supported-types.rst, basics/io.rst, index.rst, user_guide/10min-cudf-cupy.ipynb, user_guide/10min.ipynb, user_guide/Working-with-missing-data.ipynb, user_guide/guide-to-udfs.ipynb, user_guide/index.rst -building [mo]: targets for 0 po files that are out of date -building [html]: targets for 681 source files that are out of date -updating environment: [new config] 681 added, 0 changed, 0 removed -reading sources... [ 0%] api_docs/api/cudf.CategoricalIndex -reading sources... [ 0%] api_docs/api/cudf.CategoricalIndex.categories -reading sources... [ 0%] api_docs/api/cudf.CategoricalIndex.codes -reading sources... [ 0%] api_docs/api/cudf.CategoricalIndex.equals -reading sources... [ 0%] api_docs/api/cudf.DataFrame -Makefile:20: recipe for target 'html' failed From 58a77d838c3807611b768a2158f6cfaec4f82f19 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Tue, 17 May 2022 13:01:52 -0500 Subject: [PATCH 6/6] Update conf.py --- docs/cudf/source/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index c2dbee29203..e9fd29ce859 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -238,6 +238,9 @@ def resolve_aliases(app, doctree): def ignore_internal_references(app, env, node, contnode): name = node.get("reftarget", None) if name == "cudf.core.index.GenericIndex": + # We don't exposed docs for `cudf.core.index.GenericIndex` + # hence we would want the docstring & mypy references to + # use `cudf.Index` node["reftarget"] = "cudf.Index" return contnode elif name is not None and name in _internal_names_to_ignore: