From c11b9a4ea2fa72dc0868830dab337ae6851284a8 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 29 Oct 2021 15:01:18 -0700
Subject: [PATCH 01/58] Add doctests script.

---
 python/cudf/cudf/tests/test_doctests.py | 28 +++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 python/cudf/cudf/tests/test_doctests.py

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
new file mode 100644
index 00000000000..f01a79a0d53
--- /dev/null
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -0,0 +1,28 @@
+import doctest
+import inspect
+
+import pytest
+
+import cudf
+
+
+def fetch_doctests():
+    finder = doctest.DocTestFinder()
+    for name, member in inspect.getmembers(cudf):
+        if inspect.ismodule(member):
+            for docstring in finder.find(member):
+                if docstring.examples:
+                    yield docstring
+
+
+class TestDoctests:
+    @pytest.mark.parametrize(
+        "docstring", fetch_doctests(), ids=lambda docstring: docstring.name
+    )
+    def test_docstring(self, docstring):
+        optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+        runner = doctest.DocTestRunner(optionflags=optionflags)
+        runner.run(docstring)
+        results = runner.summarize()
+        if results.failed:
+            raise AssertionError(results)

From 5e88c6755b06ee85a99a3e82cd7c65bce9060feb Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 1 Nov 2021 20:46:01 -0500
Subject: [PATCH 02/58] Intermediate progress.

---
 python/cudf/cudf/__init__.py                | 83 +++++++++++++++++++++
 python/cudf/cudf/api/__init__.py            |  4 +-
 python/cudf/cudf/api/extensions/__init__.py |  6 ++
 python/cudf/cudf/api/extensions/accessor.py |  7 ++
 python/cudf/cudf/tests/test_doctests.py     | 30 ++++++--
 5 files changed, 124 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index df09a72ce25..8155d067ebb 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -114,3 +114,86 @@
 
 __version__ = get_versions()["version"]
 del get_versions
+
+__all__ = [
+    "dtype",
+    "api",
+    "core",
+    "datasets",
+    "testing",
+    "NA",
+    "Scalar",
+    "BaseIndex",
+    "CategoricalIndex",
+    "DatetimeIndex",
+    "Float32Index",
+    "Float64Index",
+    "Index",
+    "GenericIndex",
+    "Int8Index",
+    "Int16Index",
+    "Int32Index",
+    "Int64Index",
+    "IntervalIndex",
+    "RangeIndex",
+    "StringIndex",
+    "TimedeltaIndex",
+    "UInt8Index",
+    "UInt16Index",
+    "UInt32Index",
+    "UInt64Index",
+    "interval_range",
+    "DataFrame",
+    "from_pandas",
+    "merge",
+    "Series",
+    "MultiIndex",
+    "cut",
+    "factorize",
+    "CategoricalDtype",
+    "Decimal64Dtype",
+    "Decimal32Dtype",
+    "IntervalDtype",
+    "ListDtype",
+    "StructDtype",
+    "Grouper",
+    "add",
+    "arccos",
+    "arcsin",
+    "arctan",
+    "cos",
+    "exp",
+    "floor_divide",
+    "log",
+    "logical_and",
+    "logical_not",
+    "logical_or",
+    "multiply",
+    "remainder",
+    "sin",
+    "sqrt",
+    "subtract",
+    "tan",
+    "true_divide",
+    "concat",
+    "get_dummies",
+    "melt",
+    "merge_sorted",
+    "pivot",
+    "unstack",
+    "isclose",
+    "DateOffset",
+    "to_datetime",
+    "to_numeric",
+    "from_dlpack",
+    "read_avro",
+    "read_csv",
+    "read_feather",
+    "read_hdf",
+    "read_json",
+    "read_orc",
+    "read_parquet",
+    "read_text",
+    "set_allocator",
+    "__version__",
+]
diff --git a/python/cudf/cudf/api/__init__.py b/python/cudf/cudf/api/__init__.py
index 21c24015e41..c66bfb4efeb 100644
--- a/python/cudf/cudf/api/__init__.py
+++ b/python/cudf/cudf/api/__init__.py
@@ -1,3 +1,5 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 
-from cudf.api import types
+from cudf.api import extensions, types
+
+__all__ = ["extensions", "types"]
diff --git a/python/cudf/cudf/api/extensions/__init__.py b/python/cudf/cudf/api/extensions/__init__.py
index c971e6f7731..eeb5dcdb32a 100644
--- a/python/cudf/cudf/api/extensions/__init__.py
+++ b/python/cudf/cudf/api/extensions/__init__.py
@@ -5,3 +5,9 @@
     register_index_accessor,
     register_series_accessor,
 )
+
+__all__ = [
+    "register_dataframe_accessor",
+    "register_index_accessor",
+    "register_series_accessor",
+]
diff --git a/python/cudf/cudf/api/extensions/accessor.py b/python/cudf/cudf/api/extensions/accessor.py
index a27ffa90cfc..524c11f048d 100644
--- a/python/cudf/cudf/api/extensions/accessor.py
+++ b/python/cudf/cudf/api/extensions/accessor.py
@@ -159,3 +159,10 @@ def register_index_accessor(name):
 def register_series_accessor(name):
     """{docstring}"""
     return _register_accessor(name, cudf.Series)
+
+
+__all__ = [
+    "register_dataframe_accessor",
+    "register_index_accessor",
+    "register_series_accessor",
+]
diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index f01a79a0d53..c1cf62de007 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -5,14 +5,34 @@
 
 import cudf
 
+# These classes and all subclasses will be doctested
+doctested_classes = [
+    "Frame",
+    "BaseIndex",
+]
+
+
+def find_docstrings_in_module(finder, module):
+    print("Finding in module", module.__name__)
+    for docstring in finder.find(module):
+        print("Finding in docstring", docstring.name, docstring.filename)
+        if docstring.examples:
+            yield docstring
+    for name, member in inspect.getmembers(module):
+        if name not in getattr(module, "__all__", []):
+            if inspect.ismodule(member):
+                print("SKIPPING MODULE", module.__name__, name)
+            else:
+                print("Skipping member", module.__name__, name)
+            continue
+        # print("Investigating", name)
+        if inspect.ismodule(member):
+            yield from find_docstrings_in_module(finder, member)
+
 
 def fetch_doctests():
     finder = doctest.DocTestFinder()
-    for name, member in inspect.getmembers(cudf):
-        if inspect.ismodule(member):
-            for docstring in finder.find(member):
-                if docstring.examples:
-                    yield docstring
+    yield from find_docstrings_in_module(finder, cudf)
 
 
 class TestDoctests:

From bb37a38064ec58acc4d8eeb9a34b93acccc9118e Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 14:39:14 -0600
Subject: [PATCH 03/58] Update __all__ in cudf/__init__.py.

---
 python/cudf/cudf/__init__.py | 81 ++++++++++++++----------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index b528c2e16ad..961438e22bc 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -56,6 +56,8 @@
     StructDtype,
 )
 from cudf.core.groupby import Grouper
+
+# TODO: Math operations like add, arccos, etc. are not exposed in pandas' root namespace.
 from cudf.core.ops import (
     add,
     arccos,
@@ -84,6 +86,8 @@
     pivot,
     unstack,
 )
+
+# TODO: Pandas does not expose isclose in the root namespace.
 from cudf.core.series import isclose
 from cudf.core.tools.datetimes import DateOffset, to_datetime
 from cudf.core.tools.numeric import to_numeric
@@ -128,76 +132,52 @@
 del get_versions
 
 __all__ = [
-    "dtype",
-    "api",
-    "core",
-    "datasets",
-    "testing",
-    "NA",
-    "Scalar",
     "BaseIndex",
+    "CategoricalDtype",
     "CategoricalIndex",
+    "DataFrame",
+    "DateOffset",
     "DatetimeIndex",
+    "Decimal32Dtype",
+    "Decimal64Dtype",
     "Float32Index",
     "Float64Index",
-    "Index",
     "GenericIndex",
-    "Int8Index",
+    "Grouper",
+    "Index",
     "Int16Index",
     "Int32Index",
     "Int64Index",
+    "Int8Index",
+    "IntervalDtype",
     "IntervalIndex",
+    "ListDtype",
+    "MultiIndex",
+    "NA",
     "RangeIndex",
+    "Scalar",
+    "Series",
     "StringIndex",
+    "StructDtype",
     "TimedeltaIndex",
-    "UInt8Index",
     "UInt16Index",
     "UInt32Index",
     "UInt64Index",
-    "interval_range",
-    "DataFrame",
-    "from_pandas",
-    "merge",
-    "Series",
-    "MultiIndex",
+    "UInt8Index",
+    "api",
+    "concat",
     "cut",
+    "date_range",
     "factorize",
-    "CategoricalDtype",
-    "Decimal64Dtype",
-    "Decimal32Dtype",
-    "IntervalDtype",
-    "ListDtype",
-    "StructDtype",
-    "Grouper",
-    "add",
-    "arccos",
-    "arcsin",
-    "arctan",
-    "cos",
-    "exp",
-    "floor_divide",
-    "log",
-    "logical_and",
-    "logical_not",
-    "logical_or",
-    "multiply",
-    "remainder",
-    "sin",
-    "sqrt",
-    "subtract",
-    "tan",
-    "true_divide",
-    "concat",
+    "from_dataframe",
+    "from_dlpack",
+    "from_pandas",
     "get_dummies",
+    "interval_range",
     "melt",
+    "merge",
     "merge_sorted",
     "pivot",
-    "unstack",
-    "isclose",
-    "DateOffset",
-    "to_datetime",
-    "to_numeric",
-    "from_dlpack",
     "read_avro",
     "read_csv",
     "read_feather",
@@ -207,5 +187,8 @@
     "read_parquet",
     "read_text",
     "set_allocator",
-    "__version__",
+    "testing",
+    "to_datetime",
+    "to_numeric",
+    "unstack",
 ]

From e4330af46865043a00d011c05964e7d60c87792b Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 14:46:07 -0600
Subject: [PATCH 04/58] Fix recursion logic for modules and classes.

---
 python/cudf/cudf/tests/test_doctests.py | 41 ++++++++++++++-----------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index c1cf62de007..f840f44e5f6 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -5,34 +5,39 @@
 
 import cudf
 
-# These classes and all subclasses will be doctested
-doctested_classes = [
-    "Frame",
-    "BaseIndex",
-]
 
+def _name_in_all(parent, name, member):
+    return name in getattr(parent, "__all__", [])
 
-def find_docstrings_in_module(finder, module):
-    print("Finding in module", module.__name__)
-    for docstring in finder.find(module):
-        print("Finding in docstring", docstring.name, docstring.filename)
+
+def _is_public_name(parent, name, member):
+    return not name.startswith("_")
+
+
+def find_docstrings_in_obj(finder, obj, criteria=None):
+    for docstring in finder.find(obj):
         if docstring.examples:
             yield docstring
-    for name, member in inspect.getmembers(module):
-        if name not in getattr(module, "__all__", []):
-            if inspect.ismodule(member):
-                print("SKIPPING MODULE", module.__name__, name)
-            else:
-                print("Skipping member", module.__name__, name)
+    for name, member in inspect.getmembers(obj):
+        # Filter out non-matching objects with criteria
+        if criteria is not None and not criteria(obj, name, member):
             continue
-        # print("Investigating", name)
+        # Recurse over the public API of modules (objects defined in __all__)
         if inspect.ismodule(member):
-            yield from find_docstrings_in_module(finder, member)
+            yield from find_docstrings_in_obj(
+                finder, member, criteria=_name_in_all
+            )
+        # Recurse over the public API of classes (attributes not prefixed with
+        # an underscore)
+        if inspect.isclass(member):
+            yield from find_docstrings_in_obj(
+                finder, member, criteria=_is_public_name
+            )
 
 
 def fetch_doctests():
     finder = doctest.DocTestFinder()
-    yield from find_docstrings_in_module(finder, cudf)
+    yield from find_docstrings_in_obj(finder, cudf, criteria=_name_in_all)
 
 
 class TestDoctests:

From 1ed143a4155b07f3c28f5e57fd88d9f3699d3861 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 15:06:42 -0600
Subject: [PATCH 05/58] Make test methods private.

---
 python/cudf/cudf/tests/test_doctests.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index f840f44e5f6..b405fdb530c 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -14,7 +14,7 @@ def _is_public_name(parent, name, member):
     return not name.startswith("_")
 
 
-def find_docstrings_in_obj(finder, obj, criteria=None):
+def _find_docstrings_in_obj(finder, obj, criteria=None):
     for docstring in finder.find(obj):
         if docstring.examples:
             yield docstring
@@ -24,25 +24,25 @@ def find_docstrings_in_obj(finder, obj, criteria=None):
             continue
         # Recurse over the public API of modules (objects defined in __all__)
         if inspect.ismodule(member):
-            yield from find_docstrings_in_obj(
+            yield from _find_docstrings_in_obj(
                 finder, member, criteria=_name_in_all
             )
         # Recurse over the public API of classes (attributes not prefixed with
         # an underscore)
         if inspect.isclass(member):
-            yield from find_docstrings_in_obj(
+            yield from _find_docstrings_in_obj(
                 finder, member, criteria=_is_public_name
             )
 
 
-def fetch_doctests():
+def _fetch_doctests():
     finder = doctest.DocTestFinder()
-    yield from find_docstrings_in_obj(finder, cudf, criteria=_name_in_all)
+    yield from _find_docstrings_in_obj(finder, cudf, criteria=_name_in_all)
 
 
 class TestDoctests:
     @pytest.mark.parametrize(
-        "docstring", fetch_doctests(), ids=lambda docstring: docstring.name
+        "docstring", _fetch_doctests(), ids=lambda docstring: docstring.name
     )
     def test_docstring(self, docstring):
         optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE

From 7155cf4be5a6550f02fbd9fec8bdba95edac50d5 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 20:29:57 -0600
Subject: [PATCH 06/58] Use <NA> instead of null.

---
 python/cudf/cudf/core/_base_index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index d688b75ed14..32dacb14e9c 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -488,7 +488,7 @@ def fillna(self, value, downcast=None):
         >>> import cudf
         >>> index = cudf.Index([1, 2, None, 4])
         >>> index
-        Int64Index([1, 2, null, 4], dtype='int64')
+        Int64Index([1, 2, <NA>, 4], dtype='int64')
         >>> index.fillna(3)
         Int64Index([1, 2, 3, 4], dtype='int64')
         """

From 36c819f9dcf77e77554cef8d1b50bfcf8d80ed4d Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 20:30:23 -0600
Subject: [PATCH 07/58] Inject globals into doctests.

---
 python/cudf/cudf/tests/test_doctests.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index b405fdb530c..154859e5050 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -1,6 +1,7 @@
 import doctest
 import inspect
 
+import numpy as np
 import pytest
 
 import cudf
@@ -47,6 +48,8 @@ class TestDoctests:
     def test_docstring(self, docstring):
         optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
         runner = doctest.DocTestRunner(optionflags=optionflags)
+        globs = dict(np=np,)
+        docstring.globs = globs
         runner.run(docstring)
         results = runner.summarize()
         if results.failed:

From 4f18028c811ce7bba89c6e8b98bf1c720f49e768 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 20:41:33 -0600
Subject: [PATCH 08/58] Add cudf to globals.

---
 python/cudf/cudf/tests/test_doctests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index 154859e5050..8c318bd19b0 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -48,7 +48,7 @@ class TestDoctests:
     def test_docstring(self, docstring):
         optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
         runner = doctest.DocTestRunner(optionflags=optionflags)
-        globs = dict(np=np,)
+        globs = dict(cudf=cudf, np=np,)
         docstring.globs = globs
         runner.run(docstring)
         results = runner.summarize()

From 427a72426c0c98cc4ae9d97fa6f063703bd5bfed Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 20:44:13 -0600
Subject: [PATCH 09/58] Fix Series.dt.

---
 python/cudf/cudf/core/series.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index cf035ef457d..28daf23d60a 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -512,13 +512,26 @@ def from_pandas(cls, s, nan_as_null=None):
     @property
     def dt(self):
         """
-        Accessor object for datetimelike properties of the Series values.
+        Accessor object for datetime-like properties of the Series values.
 
         Examples
         --------
+        >>> s = cudf.Series(cudf.date_range(
+        ...   start='2001-02-03 12:00:00',
+        ...   end='2001-02-03 14:00:00',
+        ...   freq='1H'))
         >>> s.dt.hour
+        0    12
+        1    13
+        dtype: int16
         >>> s.dt.second
+        0    0
+        1    0
+        dtype: int16
         >>> s.dt.day
+        0    3
+        1    3
+        dtype: int16
 
         Returns
         -------

From 46c6435fc2c05bf28ddbbb71917bce1ee7ec5670 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 20:45:05 -0600
Subject: [PATCH 10/58] Fix Series.memory_usage.

---
 python/cudf/cudf/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 28daf23d60a..a7dda1f2c3b 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1021,7 +1021,7 @@ def memory_usage(self, index=True, deep=False):
         --------
         >>> s = cudf.Series(range(3), index=['a','b','c'])
         >>> s.memory_usage()
-        48
+        43
 
         Not including the index gives the size of the rest of the data, which
         is necessarily smaller:

From 38b2fd8fb29299ba4858f7b294a7d5e9c152f2ae Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 20:45:50 -0600
Subject: [PATCH 11/58] Fix Series.hash_encode(..., use_name=True).

---
 python/cudf/cudf/core/series.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index a7dda1f2c3b..5ab1e7c63fe 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -3159,9 +3159,9 @@ def hash_encode(self, stop, use_name=False):
         encoding by specifying `use_name=True`
 
         >>> series.hash_encode(stop=200, use_name=True)
-        0    131
-        1     29
-        2     76
+        0     57
+        1     23
+        2    104
         dtype: int32
         """
         warnings.warn(

From ac0e1746406950331d2cbda183bf8ecab19af709 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 20:46:59 -0600
Subject: [PATCH 12/58] Fix Series.keys.

---
 python/cudf/cudf/core/series.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 5ab1e7c63fe..e899144cdf0 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -3697,7 +3697,7 @@ def keys(self):
         dtype: int64
 
         >>> sr.keys()
-        RangeIndex(start=0, stop=6)
+        RangeIndex(start=0, stop=6, step=1)
         >>> sr = cudf.Series(['a', 'b', 'c'])
         >>> sr
         0    a
@@ -3705,7 +3705,7 @@ def keys(self):
         2    c
         dtype: object
         >>> sr.keys()
-        RangeIndex(start=0, stop=3)
+        RangeIndex(start=0, stop=3, step=1)
         >>> sr = cudf.Series([1, 2, 3], index=['a', 'b', 'c'])
         >>> sr
         a    1

From b380afe5ec25cc9110a8b6bd42f8f5de6127862e Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 20:48:23 -0600
Subject: [PATCH 13/58] Fix Series.drop.

---
 python/cudf/cudf/core/series.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index e899144cdf0..b72f510944f 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -687,10 +687,12 @@ def drop(
            y    3
         2  x    4
            y    5
+        dtype: int64
         >>> s.drop(labels='y', level=1)
         0  x    0
         1  x    2
         2  x    4
+        Name: 2, dtype: int64
         """
         if labels is not None:
             if index is not None or columns is not None:

From 318a0b793dfd9037336cc4211386dc464ff56e41 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 20:48:51 -0600
Subject: [PATCH 14/58] Fix Series.dropna.

---
 python/cudf/cudf/core/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index b72f510944f..d000039c1c9 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1529,7 +1529,7 @@ def dropna(self, axis=0, inplace=False, how=None):
         >>> ser
         0       1
         1       2
-        2    null
+        2    <NA>
         dtype: int64
 
         Drop null values from a Series.

From 1e4f183eb780ce308b1a5bbeeade8be246000412 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 1 Dec 2021 20:52:50 -0600
Subject: [PATCH 15/58] Fix Series.data, Series.as_mask.

---
 python/cudf/cudf/core/series.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index d000039c1c9..920bd8239f6 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1774,7 +1774,7 @@ def data(self):
         3    4
         dtype: int64
         >>> series.data
-        <cudf.core.buffer.Buffer object at 0x7f23c192d110>
+        <cudf.core.buffer.Buffer object at 0x...>
         >>> series.data.to_host_array()
         array([1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
                0, 0, 4, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)
@@ -1798,7 +1798,7 @@ def as_mask(self):
         >>> import cudf
         >>> s = cudf.Series([True, False, True])
         >>> s.as_mask()
-        <cudf.core.buffer.Buffer object at 0x7f23c3eed0d0>
+        <cudf.core.buffer.Buffer object at 0x...>
         >>> s.as_mask().to_host_array()
         array([  5,   0,   0,   0,   0,   0,   0,   0,   1,   0,   0,   0,   0,
                  0,   0,   0,   2,   0,   0,   0,   0,   0,   0,   0, 181, 164,

From 2da598d558721a1c740050837c7de91afda5353a Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 08:57:56 -0600
Subject: [PATCH 16/58] Fix Series.cat.

---
 python/cudf/cudf/core/column/categorical.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index a2c1f04b2f2..baf477554c3 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -61,7 +61,6 @@ class CategoricalAccessor(ColumnMethods):
     --------
     >>> s = cudf.Series([1,2,3], dtype='category')
     >>> s
-    >>> s
     0    1
     1    2
     2    3

From e60e90961467579e69c591bbbb2053b7158d44e0 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 09:07:17 -0600
Subject: [PATCH 17/58] Fix Scalar.

---
 python/cudf/cudf/core/scalar.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py
index 787b28e213c..37bb8e32c5a 100644
--- a/python/cudf/cudf/core/scalar.py
+++ b/python/cudf/cudf/core/scalar.py
@@ -32,7 +32,7 @@ class Scalar(object):
     >>> cudf.Scalar(42, dtype='int64') + np.int8(21)
     Scalar(63, dtype=int64)
     >>> x = cudf.Scalar(42, dtype='datetime64[s]')
-    >>> y = cudf.Scalar(21, dtype='timedelta64[ns])
+    >>> y = cudf.Scalar(21, dtype='timedelta64[ns]')
     >>> x - y
     Scalar(1970-01-01T00:00:41.999999979, dtype=datetime64[ns])
     >>> cudf.Series([1,2,3]) + cudf.Scalar(1)

From b7443d3a6404e6ef37cd04f7b40f94e35ccc1e3a Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 09:10:51 -0600
Subject: [PATCH 18/58] Fix MultiIndex.

---
 python/cudf/cudf/core/multiindex.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index e0c68e56f63..28bf11dcf37 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -190,7 +190,7 @@ def rename(self, names, inplace=False):
         Renaming each levels of a MultiIndex to specified name:
 
         >>> midx = cudf.MultiIndex.from_product(
-                [('A', 'B'), (2020, 2021)], names=['c1', 'c2'])
+        ...     [('A', 'B'), (2020, 2021)], names=['c1', 'c2'])
         >>> midx.rename(['lv1', 'lv2'])
         MultiIndex([('A', 2020),
                     ('A', 2021),
@@ -1076,7 +1076,7 @@ def values(self):
             [4, 2],
             [5, 1]])
         >>> type(midx.values)
-        <class 'cupy.core.core.ndarray'>
+        <class 'cupy._core.core.ndarray'>
         """
         return self.to_frame(index=False).values
 
@@ -1577,13 +1577,13 @@ def get_loc(self, key, method=None, tolerance=None):
         --------
         >>> import cudf
         >>> mi = cudf.MultiIndex.from_tuples(
-            [('a', 'd'), ('b', 'e'), ('b', 'f')])
+        ...     [('a', 'd'), ('b', 'e'), ('b', 'f')])
         >>> mi.get_loc('b')
         slice(1, 3, None)
         >>> mi.get_loc(('b', 'e'))
         1
         >>> non_monotonic_non_unique_idx = cudf.MultiIndex.from_tuples(
-            [('c', 'd'), ('b', 'e'), ('a', 'f'), ('b', 'e')])
+        ...     [('c', 'd'), ('b', 'e'), ('a', 'f'), ('b', 'e')])
         >>> non_monotonic_non_unique_idx.get_loc('b') # differ from pandas
         slice(1, 4, 2)
 
@@ -1599,10 +1599,10 @@ def get_loc(self, key, method=None, tolerance=None):
 
                 >>> import pandas as pd
                 >>> import cudf
-                >>> x = pd.MultiIndex.from_tuples(
-                            [(2, 1, 1), (1, 2, 3), (1, 2, 1),
-                                (1, 1, 1), (1, 1, 1), (2, 2, 1)]
-                        )
+                >>> x = pd.MultiIndex.from_tuples([
+                ...     (2, 1, 1), (1, 2, 3), (1, 2, 1),
+                ...     (1, 1, 1), (1, 1, 1), (2, 2, 1),
+                ... ])
                 >>> x.get_loc(1)
                 array([False,  True,  True,  True,  True, False])
                 >>> cudf.from_pandas(x).get_loc(1)

From 9d389b55d661d2e4e1964066f6f5bcba039b96d3 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 09:11:48 -0600
Subject: [PATCH 19/58] Fix IntervalIndex.from_breaks.

---
 python/cudf/cudf/core/index.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 8f905ee6d49..059f012dd16 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2396,9 +2396,7 @@ def from_breaks(breaks, closed="right", name=None, copy=False, dtype=None):
         >>> import cudf
         >>> import pandas as pd
         >>> cudf.IntervalIndex.from_breaks([0, 1, 2, 3])
-        IntervalIndex([(0, 1], (1, 2], (2, 3]],
-                    closed='right',
-                    dtype='interval[int64]')
+        IntervalIndex([(0, 1], (1, 2], (2, 3]], dtype='interval')
         """
         if copy:
             breaks = column.as_column(breaks, dtype=dtype).copy()

From 992558a7f37b8990a67b4c1f3b345e5343db013d Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 09:14:34 -0600
Subject: [PATCH 20/58] Fix DatetimeIndex.floor.

---
 python/cudf/cudf/core/index.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 059f012dd16..a259b659666 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1923,12 +1923,13 @@ def floor(self, field):
         Examples
         --------
         >>> import cudf
-        >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:59:59"
-        ... ,"1999-12-31 18:44:59"])
+        >>> gIndex = cudf.DatetimeIndex([
+        ...     "2020-05-31 08:59:59",
+        ...     "1999-12-31 18:44:59",
+        ... ])
         >>> gIndex.floor("T")
-        DatetimeIndex(['2020-05-31 08:59:00', '1999-12-31 18:44:00'],
-        dtype='datetime64[ns]', freq=None)
-        """
+        DatetimeIndex(['2020-05-31 08:59:00', '1999-12-31 18:44:00'], dtype='datetime64[ns]')
+        """  # noqa: E501
         out_column = self._values.floor(field)
 
         return self.__class__._from_data({self.name: out_column})

From 3864f2685dee791a7af1d737a6d14f044480d2d3 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 09:16:09 -0600
Subject: [PATCH 21/58] Fix DatetimeIndex.ceil.

---
 python/cudf/cudf/core/index.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index a259b659666..e8fb0c23aa1 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1892,12 +1892,13 @@ def ceil(self, field):
         Examples
         --------
         >>> import cudf
-        >>> gIndex = cudf.DatetimeIndex(["2020-05-31 08:00:00",
-        ... "1999-12-31 18:40:00"])
+        >>> gIndex = cudf.DatetimeIndex([
+        ...     "2020-05-31 08:05:42",
+        ...     "1999-12-31 18:40:30",
+        ... ])
         >>> gIndex.ceil("T")
-        DatetimeIndex(['2020-05-31 08:00:00', '1999-12-31 18:40:00'],
-        dtype='datetime64[ns]', freq=None)
-        """
+        DatetimeIndex(['2020-05-31 08:06:00', '1999-12-31 18:41:00'], dtype='datetime64[ns]')
+        """  # noqa: E501
         out_column = self._values.ceil(field)
 
         return self.__class__._from_data({self.name: out_column})

From 6460542b24686b36a6429b72ce4bcd248957697c Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 09:18:42 -0600
Subject: [PATCH 22/58] Fix DatetimeIndex.

---
 python/cudf/cudf/core/index.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index e8fb0c23aa1..5b1fc13089c 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1528,9 +1528,11 @@ class DatetimeIndex(GenericIndex):
     --------
     >>> import cudf
     >>> cudf.DatetimeIndex([1, 2, 3, 4], name="a")
-    DatetimeIndex(['1970-01-01 00:00:00.001000', '1970-01-01 00:00:00.002000',
-                   '1970-01-01 00:00:00.003000', '1970-01-01 00:00:00.004000'],
-                  dtype='datetime64[ms]', name='a')
+    DatetimeIndex(['1970-01-01 00:00:00.000000001',
+                   '1970-01-01 00:00:00.000000002',
+                   '1970-01-01 00:00:00.000000003',
+                   '1970-01-01 00:00:00.000000004'],
+                  dtype='datetime64[ns]', name='a')
     """
 
     def __init__(

From aba3bdf02166db80bf4d0e358fe8c6120deb41a2 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 09:20:16 -0600
Subject: [PATCH 23/58] Fix DateOffset.

---
 python/cudf/cudf/core/tools/datetimes.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index 34d62ffc048..0d60ac2b94a 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -396,10 +396,10 @@ class DateOffset:
     --------
     >>> from cudf import DateOffset
     >>> ts = cudf.Series([
-        "2000-01-01 00:00:00.012345678",
-        "2000-01-31 00:00:00.012345678",
-        "2000-02-29 00:00:00.012345678",
-    ], dtype='datetime64[ns])
+    ...     "2000-01-01 00:00:00.012345678",
+    ...     "2000-01-31 00:00:00.012345678",
+    ...     "2000-02-29 00:00:00.012345678",
+    ... ], dtype='datetime64[ns]')
     >>> ts + DateOffset(months=3)
     0   2000-04-01 00:00:00.012345678
     1   2000-04-30 00:00:00.012345678

From 9e7627d77406c01fd57213d60f0e6f170c9043f1 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 09:23:13 -0600
Subject: [PATCH 24/58] Fix DataFrame.unstack.

---
 python/cudf/cudf/core/reshape.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index b2fac7a6140..fcf8cebe887 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -973,6 +973,7 @@ def unstack(df, level, fill_value=None):
 
     Examples
     --------
+    >>> df = cudf.DataFrame()
     >>> df['a'] = [1, 1, 1, 2, 2]
     >>> df['b'] = [1, 2, 3, 1, 2]
     >>> df['c'] = [5, 6, 7, 8, 9]

From 30b6d75a5534eb0932734b40cdc8c7d9fc19ff78 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 09:23:55 -0600
Subject: [PATCH 25/58] Fix DataFrame.explode.

---
 python/cudf/cudf/core/dataframe.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index c0cb6f1917f..1b9b818b6f5 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6302,8 +6302,11 @@ def explode(self, column, ignore_index=False):
         Examples
         --------
         >>> import cudf
-        >>> cudf.DataFrame(
-                {"a": [[1, 2, 3], [], None, [4, 5]], "b": [11, 22, 33, 44]})
+        >>> df = cudf.DataFrame({
+        ...     "a": [[1, 2, 3], [], None, [4, 5]],
+        ...     "b": [11, 22, 33, 44],
+        ... })
+        >>> df
                    a   b
         0  [1, 2, 3]  11
         1         []  22

From 06248a3e5c2bfa1bd0aa1d785bcc8e81af00157d Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 10:39:48 -0600
Subject: [PATCH 26/58] Fix formatting in DataFrame.stack.

---
 python/cudf/cudf/core/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 1b9b818b6f5..d0ebcc2848a 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5979,7 +5979,7 @@ def stack(self, level=-1, dropna=True):
         Examples
         --------
         >>> import cudf
-        >>> df = cudf.DataFrame({'a':[0,1,3], 'b':[1,2,4]})
+        >>> df = cudf.DataFrame({'a': [0, 1, 3], 'b': [1, 2, 4]})
         >>> df.stack()
         0  a    0
            b    1

From ff3a713247172086232fb69272485a3d073d242b Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 10:40:08 -0600
Subject: [PATCH 27/58] Fix DataFrame.to_csv.

---
 python/cudf/cudf/utils/ioutils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 0f9d9d53b23..b1ecbe32b5e 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -955,9 +955,9 @@
 >>> import cudf
 >>> filename = 'foo.csv'
 >>> df = cudf.DataFrame({'x': [0, 1, 2, 3],
-                         'y': [1.0, 3.3, 2.2, 4.4],
-                         'z': ['a', 'b', 'c', 'd']})
->>> df = df.set_index([3, 2, 1, 0])
+...                      'y': [1.0, 3.3, 2.2, 4.4],
+...                      'z': ['a', 'b', 'c', 'd']})
+>>> df = df.set_index(cudf.Series([3, 2, 1, 0]))
 >>> df.to_csv(filename)
 
 """

From dcf2a68ffda3365e9a9c278c10a4229ba678f240 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 10:50:01 -0600
Subject: [PATCH 28/58] Fix DataFrame.query.

---
 python/cudf/cudf/core/dataframe.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d0ebcc2848a..d62a6193626 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3854,10 +3854,10 @@ def query(self, expr, local_dict=None):
 
         Examples
         --------
-        >>> import cudf
-        >>> a = ('a', [1, 2, 2])
-        >>> b = ('b', [3, 4, 5])
-        >>> df = cudf.DataFrame([a, b])
+        >>> df = cudf.DataFrame({
+        ...     "a": [1, 2, 2],
+        ...     "b": [3, 4, 5],
+        ... })
         >>> expr = "(a == 2 and b == 4) or (b == 3)"
         >>> df.query(expr)
            a  b
@@ -3873,8 +3873,8 @@ def query(self, expr, local_dict=None):
         >>> df['datetimes'] = data
         >>> search_date = datetime.datetime.strptime('2018-10-08', '%Y-%m-%d')
         >>> df.query('datetimes==@search_date')
-                        datetimes
-        1 2018-10-08T00:00:00.000
+           datetimes
+        1 2018-10-08
 
         Using local_dict:
 
@@ -3885,9 +3885,9 @@ def query(self, expr, local_dict=None):
         >>> df['datetimes'] = data
         >>> search_date2 = datetime.datetime.strptime('2018-10-08', '%Y-%m-%d')
         >>> df.query('datetimes==@search_date',
-        ...         local_dict={'search_date':search_date2})
-                        datetimes
-        1 2018-10-08T00:00:00.000
+        ...          local_dict={'search_date': search_date2})
+           datetimes
+        1 2018-10-08
         """
         # can't use `annotate` decorator here as we inspect the calling
         # environment.

From b6ebe3d95cb197783b849d51b63e5bf9667a22da Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 10:51:40 -0600
Subject: [PATCH 29/58] Fix DataFrame.pivot.

---
 python/cudf/cudf/core/reshape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index fcf8cebe887..78376e55068 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -891,7 +891,7 @@ def pivot(data, index=None, columns=None, values=None):
     Examples
     --------
     >>> a = cudf.DataFrame()
-    >>> a['a'] = [1, 1, 2, 2],
+    >>> a['a'] = [1, 1, 2, 2]
     >>> a['b'] = ['a', 'b', 'a', 'b']
     >>> a['c'] = [1, 2, 3, 4]
     >>> a.pivot(index='a', columns='b')

From c7c240b1f391f1d3035ffe39ec46fe42c28b6010 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 10:52:41 -0600
Subject: [PATCH 30/58] Fix DataFrame.memory_usage.

---
 python/cudf/cudf/core/dataframe.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d62a6193626..94be6105b6e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1247,10 +1247,12 @@ def memory_usage(self, index=True, deep=False):
         object     40000
         bool        5000
         dtype: int64
+
         Use a Categorical for efficient storage of an object-dtype column with
         many repeated values.
+
         >>> df['object'].astype('category').memory_usage(deep=True)
-        5048
+        5008
         """
         if deep:
             warnings.warn(

From b5ecb98f5f574a7c7888506f099e4fa460e39f20 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 10:55:40 -0600
Subject: [PATCH 31/58] Fix DataFrame.info.

---
 python/cudf/cudf/core/dataframe.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 94be6105b6e..31d2192b497 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -4416,11 +4416,13 @@ def info(
         >>> buffer = io.StringIO()
         >>> df.info(buf=buffer)
         >>> s = buffer.getvalue()
+        >>> # TODO Can we remove this example? It writes a text file every time
+        >>> # tests run and it does not seem particularly helpful...
         >>> with open("df_info.txt", "w",
         ...           encoding="utf-8") as f:
         ...     f.write(s)
         ...
-        369
+        362
 
         The `memory_usage` parameter allows deep introspection mode, specially
         useful for big DataFrames and fine-tune memory optimization:

From 0a467810ef9bbe0096a02c7ab486720822919d9a Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 10:58:22 -0600
Subject: [PATCH 32/58] Fix DataFrame.groupby.

---
 python/cudf/cudf/core/groupby/groupby.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index f1d622362e2..c46e58f802e 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1256,9 +1256,10 @@ class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
     --------
     >>> import cudf
     >>> import pandas as pd
-    >>> df = cudf.DataFrame({'Animal': ['Falcon', 'Falcon',
-    ...                               'Parrot', 'Parrot'],
-    ...                    'Max Speed': [380., 370., 24., 26.]})
+    >>> df = cudf.DataFrame({
+    ...     'Animal': ['Falcon', 'Falcon', 'Parrot', 'Parrot'],
+    ...     'Max Speed': [380., 370., 24., 26.],
+    ... })
     >>> df
        Animal  Max Speed
     0  Falcon      380.0
@@ -1272,10 +1273,10 @@ class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin):
     Parrot       25.0
 
     >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
-    ... ['Captive', 'Wild', 'Captive', 'Wild']]
+    ...           ['Captive', 'Wild', 'Captive', 'Wild']]
     >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
     >>> df = cudf.DataFrame({'Max Speed': [390., 350., 30., 20.]},
-            index=index)
+    ...     index=index)
     >>> df
                     Max Speed
     Animal Type

From ac41f9741199c014c4a3c794f8fef9ed23391eec Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 11:17:18 -0600
Subject: [PATCH 33/58] Fix DataFrame.__getitem__.

---
 python/cudf/cudf/core/dataframe.py | 48 +++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 31d2192b497..6f323ea87be 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -976,23 +976,34 @@ def __getitem__(self, arg):
 
         Examples
         --------
-        >>> df = DataFrame([('a', list(range(20))),
-        ...                 ('b', list(range(20))),
-        ...                 ('c', list(range(20)))])
-        >>> df[:4]    # get first 4 rows of all columns
+        >>> df = cudf.DataFrame({
+        ...     'a': list(range(10)),
+        ...     'b': list(range(10)),
+        ...     'c': list(range(10)),
+        ... })
+
+        Get first 4 rows of all columns.
+
+        >>> df[:4]
            a  b  c
         0  0  0  0
         1  1  1  1
         2  2  2  2
         3  3  3  3
-        >>> df[-5:]  # get last 5 rows of all columns
-             a   b   c
-        15  15  15  15
-        16  16  16  16
-        17  17  17  17
-        18  18  18  18
-        19  19  19  19
-        >>> df[['a', 'c']] # get columns a and c
+
+        Get last 5 rows of all columns.
+
+        >>> df[-5:]
+           a  b  c
+        5  5  5  5
+        6  6  6  6
+        7  7  7  7
+        8  8  8  8
+        9  9  9  9
+
+        Get columns a and c.
+
+        >>> df[['a', 'c']]
            a  c
         0  0  0
         1  1  1
@@ -1004,8 +1015,17 @@ def __getitem__(self, arg):
         7  7  7
         8  8  8
         9  9  9
-        >>> df[[True, False, True, False]] # mask the entire dataframe,
-        # returning the rows specified in the boolean mask
+
+        Return the rows specified in the boolean mask.
+
+        >>> df[[True, False, True, False, True,
+        ...     False, True, False, True, False]]
+           a  b  c
+        0  0  0  0
+        2  2  2  2
+        4  4  4  4
+        6  6  6  6
+        8  8  8  8
         """
         if _is_scalar_or_zero_d_array(arg) or isinstance(arg, tuple):
             return self._get_columns_by_label(arg, downcast=True)

From 39a3050a200b7728def8bd2cc14bf27e8dbc9279 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 11:19:54 -0600
Subject: [PATCH 34/58] Fix DataFrame.

---
 python/cudf/cudf/core/dataframe.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 6f323ea87be..c1e98c7dfa5 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -469,12 +469,12 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin):
     ...     [(t0+ timedelta(seconds=x)) for x in range(n)])
     ... })
     >>> df
-        id                datetimes
-    0    0  2018-10-07T12:00:00.000
-    1    1  2018-10-07T12:00:01.000
-    2    2  2018-10-07T12:00:02.000
-    3    3  2018-10-07T12:00:03.000
-    4    4  2018-10-07T12:00:04.000
+        id            datetimes
+    0    0  2018-10-07 12:00:00
+    1    1  2018-10-07 12:00:01
+    2    2  2018-10-07 12:00:02
+    3    3  2018-10-07 12:00:03
+    4    4  2018-10-07 12:00:04
 
     Build DataFrame via list of rows as tuples:
 

From 72f661d0406d1d5bd36e584310f792c1c50625ce Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 11:21:34 -0600
Subject: [PATCH 35/58] Fix CategoricalIndex.

---
 python/cudf/cudf/core/index.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 5b1fc13089c..68827d359f6 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2105,11 +2105,11 @@ class CategoricalIndex(GenericIndex):
     >>> import pandas as pd
     >>> cudf.CategoricalIndex(
     ... data=[1, 2, 3, 4], categories=[1, 2], ordered=False, name="a")
-    CategoricalIndex([1, 2, <NA>, <NA>], categories=[1, 2], ordered=False, name='a', dtype='category', name='a')
+    CategoricalIndex([1, 2, <NA>, <NA>], categories=[1, 2], ordered=False, dtype='category', name='a')
 
     >>> cudf.CategoricalIndex(
     ... data=[1, 2, 3, 4], dtype=pd.CategoricalDtype([1, 2, 3]), name="a")
-    CategoricalIndex([1, 2, 3, <NA>], categories=[1, 2, 3], ordered=False, name='a', dtype='category', name='a')
+    CategoricalIndex([1, 2, 3, <NA>], categories=[1, 2, 3], ordered=False, dtype='category', name='a')
     """  # noqa: E501
 
     def __init__(

From 6769688cc0cea9671a5c8f0286120c76f0d91831 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 11:29:37 -0600
Subject: [PATCH 36/58] Fix BaseIndex.to_pandas.

---
 python/cudf/cudf/core/_base_index.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 32dacb14e9c..043d05c66eb 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -546,7 +546,7 @@ def to_pandas(self):
         >>> type(idx.to_pandas())
         <class 'pandas.core.indexes.numeric.Int64Index'>
         >>> type(idx)
-        <class 'cudf.core.index.GenericIndex'>
+        <class 'cudf.core.index.Int64Index'>
         """
         return pd.Index(self._values.to_pandas(), name=self.name)
 

From 28dbf05ea0cd0ae2110862bcc8dba6c786f3ee26 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 11:30:16 -0600
Subject: [PATCH 37/58] Work on BaseIndex.join - possibly an issue.

---
 python/cudf/cudf/core/_base_index.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 043d05c66eb..fd6198d783e 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -935,6 +935,7 @@ def is_interval(self):
         Examples
         --------
         >>> import cudf
+        >>> import pandas as pd
         >>> idx = cudf.from_pandas(
         ...     pd.Index([pd.Interval(left=0, right=5),
         ...               pd.Interval(left=5, right=10)])
@@ -1098,15 +1099,16 @@ def join(
         Examples
         --------
         >>> import cudf
-        >>> lhs = cudf.DataFrame(
-        ...     {"a":[2, 3, 1], "b":[3, 4, 2]}).set_index(['a', 'b']
-        ... ).index
+        >>> lhs = cudf.DataFrame({
+        ...     "a": [2, 3, 1],
+        ...     "b": [3, 4, 2],
+        ... }).set_index(['a', 'b']).index
         >>> lhs
         MultiIndex([(2, 3),
                     (3, 4),
                     (1, 2)],
                    names=['a', 'b'])
-        >>> rhs = cudf.DataFrame({"a":[1, 4, 3]}).set_index('a').index
+        >>> rhs = cudf.DataFrame({"a": [1, 4, 3]}).set_index('a').index
         >>> rhs
         Int64Index([1, 4, 3], dtype='int64', name='a')
         >>> lhs.join(rhs, how='inner')

From f881890a2da5e139358e3d2f531d663853b53748 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 15:34:04 -0600
Subject: [PATCH 38/58] Remove to_host_array from Series.as_mask doctests.

---
 python/cudf/cudf/core/series.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 920bd8239f6..d5daaebecb7 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1799,13 +1799,6 @@ def as_mask(self):
         >>> s = cudf.Series([True, False, True])
         >>> s.as_mask()
         <cudf.core.buffer.Buffer object at 0x...>
-        >>> s.as_mask().to_host_array()
-        array([  5,   0,   0,   0,   0,   0,   0,   0,   1,   0,   0,   0,   0,
-                 0,   0,   0,   2,   0,   0,   0,   0,   0,   0,   0, 181, 164,
-               188,   1,   0,   0,   0,   0, 255, 255, 255, 255, 255, 255, 255,
-               127, 253, 214,  62, 241,   1,   0,   0,   0,   0,   0,   0,   0,
-                 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
-             dtype=uint8)
         """
         if not is_bool_dtype(self.dtype):
             raise TypeError(

From 5111a7143446f779c356ca94703dd69280335a3d Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 2 Dec 2021 15:34:40 -0600
Subject: [PATCH 39/58] Match current implementation of DataFrame.describe for
 datetime values.

---
 python/cudf/cudf/utils/docutils.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/utils/docutils.py b/python/cudf/cudf/utils/docutils.py
index 57ad612846d..8894120529d 100644
--- a/python/cudf/cudf/utils/docutils.py
+++ b/python/cudf/cudf/utils/docutils.py
@@ -216,12 +216,12 @@ def wrapper(func):
         dtype: datetime64[s]
         >>> s.describe()
         count                                3
-        mean     2006-09-01 08:00:00.000000000
-        min      2000-01-01 00:00:00.000000000
-        25%      2004-12-31 12:00:00.000000000
-        50%      2010-01-01 00:00:00.000000000
-        75%      2010-01-01 00:00:00.000000000
-        max      2010-01-01 00:00:00.000000000
+        mean     2006-09-01T08:00:00.000000000
+        min                2000-01-01 00:00:00
+        25%                2004-12-31 12:00:00
+        50%                2010-01-01 00:00:00
+        75%                2010-01-01 00:00:00
+        max                2010-01-01 00:00:00
         dtype: object
 
         Describing a ``DataFrame``. By default only numeric fields are

From 699c21aafc1f604a41dac954bbbe87ddefe8a5f5 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 3 Dec 2021 16:13:14 -0600
Subject: [PATCH 40/58] Fix DataFrame.reindex. Resolves #9827.

---
 python/cudf/cudf/core/dataframe.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index c1e98c7dfa5..d898c2068b4 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -2287,11 +2287,11 @@ def reindex(
         3    3  13.0
         4    4  14.0
         >>> df_new
-           key   val  sum
-        0    0  10.0  NaN
-        3    3  13.0  NaN
-        4    4  14.0  NaN
-        5   -1   NaN  NaN
+           key   val   sum
+        0     0  10.0  <NA>
+        3     3  13.0  <NA>
+        4     4  14.0  <NA>
+        5  <NA>  <NA>  <NA>
         """
 
         if labels is None and index is None and columns is None:

From 3a0cb9fba35a702f5c945b9abe75571a21dddef1 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 4 Jan 2022 15:18:15 -0600
Subject: [PATCH 41/58] Remove TODOs, add isclose to __all__.

---
 python/cudf/cudf/__init__.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index 961438e22bc..4dadf6a1869 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -56,8 +56,6 @@
     StructDtype,
 )
 from cudf.core.groupby import Grouper
-
-# TODO: Math operations like add, arccos, etc. are not exposed in pandas' root namespace.
 from cudf.core.ops import (
     add,
     arccos,
@@ -86,8 +84,6 @@
     pivot,
     unstack,
 )
-
-# TODO: Pandas does not expose isclose in the root namespace.
 from cudf.core.series import isclose
 from cudf.core.tools.datetimes import DateOffset, to_datetime
 from cudf.core.tools.numeric import to_numeric
@@ -174,6 +170,7 @@
     "from_pandas",
     "get_dummies",
     "interval_range",
+    "isclose",
     "melt",
     "merge",
     "merge_sorted",

From 421fe473fe90c4c9a8e5c2575ecedcd1837206c9 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 4 Jan 2022 15:43:08 -0600
Subject: [PATCH 42/58] Print buffer instead of writing a file.

---
 python/cudf/cudf/core/dataframe.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index f79afa7e485..ee3f4b65485 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -4337,20 +4337,23 @@ def info(
         dtypes: float64(1), int64(1), object(1)
         memory usage: 130.0+ bytes
 
-        Pipe output of DataFrame.info to buffer instead of sys.stdout,
-        get buffer content and writes to a text file:
+        Pipe output of DataFrame.info to a buffer instead of sys.stdout and
+        print buffer contents:
 
         >>> import io
         >>> buffer = io.StringIO()
         >>> df.info(buf=buffer)
-        >>> s = buffer.getvalue()
-        >>> # TODO Can we remove this example? It writes a text file every time
-        >>> # tests run and it does not seem particularly helpful...
-        >>> with open("df_info.txt", "w",
-        ...           encoding="utf-8") as f:
-        ...     f.write(s)
-        ...
-        362
+        >>> print(buffer.getvalue())
+        <class 'cudf.core.dataframe.DataFrame'>
+        RangeIndex: 5 entries, 0 to 4
+        Data columns (total 3 columns):
+         #   Column     Non-Null Count  Dtype
+        ---  ------     --------------  -----
+         0   int_col    5 non-null      int64
+         1   text_col   5 non-null      object
+         2   float_col  5 non-null      float64
+        dtypes: float64(1), int64(1), object(1)
+        memory usage: 130.0+ bytes
 
         The `memory_usage` parameter allows deep introspection mode, specially
         useful for big DataFrames and fine-tune memory optimization:

From dd90c8d963fbab4e26330472c93bd622ec8f00c2 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 4 Jan 2022 16:24:43 -0600
Subject: [PATCH 43/58] Run doctests in a temporary path to avoid file I/O in
 the test directory.

---
 python/cudf/cudf/tests/test_doctests.py | 27 +++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index 8c318bd19b0..8e08971d8ac 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -1,5 +1,7 @@
 import doctest
 import inspect
+import os
+from contextlib import AbstractContextManager
 
 import numpy as np
 import pytest
@@ -41,16 +43,37 @@ def _fetch_doctests():
     yield from _find_docstrings_in_obj(finder, cudf, criteria=_name_in_all)
 
 
+class _chdir(AbstractContextManager):
+    """Non thread-safe context manager to change the current working directory.
+
+    Implementation copied from Python's contextlib.chdir, implemented in
+    October 2021. This is not yet released but can be replaced with
+    contextlib.chdir in the future.
+    """
+
+    def __init__(self, path):
+        self.path = path
+        self._old_cwd = []
+
+    def __enter__(self):
+        self._old_cwd.append(os.getcwd())
+        os.chdir(self.path)
+
+    def __exit__(self, *excinfo):
+        os.chdir(self._old_cwd.pop())
+
+
 class TestDoctests:
     @pytest.mark.parametrize(
         "docstring", _fetch_doctests(), ids=lambda docstring: docstring.name
     )
-    def test_docstring(self, docstring):
+    def test_docstring(self, docstring, tmp_path):
         optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
         runner = doctest.DocTestRunner(optionflags=optionflags)
         globs = dict(cudf=cudf, np=np,)
         docstring.globs = globs
-        runner.run(docstring)
+        with _chdir(tmp_path):
+            runner.run(docstring)
         results = runner.summarize()
         if results.failed:
             raise AssertionError(results)

From 4ed7c0f080a78154fbd22a6b932cc9e710f179bd Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 4 Jan 2022 16:51:50 -0600
Subject: [PATCH 44/58] Use a class-scoped autouse fixture for temporary
 directories.

---
 python/cudf/cudf/tests/test_doctests.py | 35 ++++++++-----------------
 1 file changed, 11 insertions(+), 24 deletions(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index 8e08971d8ac..8cecad8520a 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -1,7 +1,6 @@
 import doctest
 import inspect
 import os
-from contextlib import AbstractContextManager
 
 import numpy as np
 import pytest
@@ -43,37 +42,25 @@ def _fetch_doctests():
     yield from _find_docstrings_in_obj(finder, cudf, criteria=_name_in_all)
 
 
-class _chdir(AbstractContextManager):
-    """Non thread-safe context manager to change the current working directory.
-
-    Implementation copied from Python's contextlib.chdir, implemented in
-    October 2021. This is not yet released but can be replaced with
-    contextlib.chdir in the future.
-    """
-
-    def __init__(self, path):
-        self.path = path
-        self._old_cwd = []
-
-    def __enter__(self):
-        self._old_cwd.append(os.getcwd())
-        os.chdir(self.path)
-
-    def __exit__(self, *excinfo):
-        os.chdir(self._old_cwd.pop())
-
-
 class TestDoctests:
+    @pytest.fixture(autouse=True)
+    def chdir_to_tmp_path(tmp_path):
+        original_directory = os.getcwd()
+        try:
+            os.chdir(tmp_path)
+            yield
+        finally:
+            os.chdir(original_directory)
+
     @pytest.mark.parametrize(
         "docstring", _fetch_doctests(), ids=lambda docstring: docstring.name
     )
-    def test_docstring(self, docstring, tmp_path):
+    def test_docstring(self, docstring):
         optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
         runner = doctest.DocTestRunner(optionflags=optionflags)
         globs = dict(cudf=cudf, np=np,)
         docstring.globs = globs
-        with _chdir(tmp_path):
-            runner.run(docstring)
+        runner.run(docstring)
         results = runner.summarize()
         if results.failed:
             raise AssertionError(results)

From c624a84700cb2285907af6e8bb472631d1e16671 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 4 Jan 2022 17:08:07 -0600
Subject: [PATCH 45/58] Fix fixture.

---
 python/cudf/cudf/tests/test_doctests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index 8cecad8520a..fdc11571de3 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -44,7 +44,7 @@ def _fetch_doctests():
 
 class TestDoctests:
     @pytest.fixture(autouse=True)
-    def chdir_to_tmp_path(tmp_path):
+    def chdir_to_tmp_path(cls, tmp_path):
         original_directory = os.getcwd()
         try:
             os.chdir(tmp_path)

From 21d6cadbfe4a6da6501ef0844c555735f7ba7451 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 13 Jan 2022 11:08:42 -0800
Subject: [PATCH 46/58] Clean up doctests, add comments.

---
 python/cudf/cudf/tests/test_doctests.py | 53 +++++++++++++++++--------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index fdc11571de3..4f2de193731 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -8,43 +8,52 @@
 import cudf
 
 
-def _name_in_all(parent, name, member):
+def _name_in_all(parent, name):
     return name in getattr(parent, "__all__", [])
 
 
-def _is_public_name(parent, name, member):
+def _is_public_name(parent, name):
     return not name.startswith("_")
 
 
-def _find_docstrings_in_obj(finder, obj, criteria=None):
+def _find_doctests_in_obj(finder, obj, criteria):
+    """Find all doctests in an object.
+
+    Args:
+        finder (doctest.DocTestFinder): The DocTestFinder object to use.
+        obj (module or class): The object to search for docstring examples.
+        criteria (callable): Callable indicating whether to recurse over
+        members of the provided object.
+
+    Yields:
+        doctest.DocTest: The next doctest found in the object.
+    """
     for docstring in finder.find(obj):
         if docstring.examples:
             yield docstring
     for name, member in inspect.getmembers(obj):
-        # Filter out non-matching objects with criteria
-        if criteria is not None and not criteria(obj, name, member):
+        # Only recurse over members matching the criteria
+        if not criteria(obj, name):
             continue
-        # Recurse over the public API of modules (objects defined in __all__)
+        # Recurse over the public API of modules (objects defined in the
+        # module's __all__)
         if inspect.ismodule(member):
-            yield from _find_docstrings_in_obj(
+            yield from _find_doctests_in_obj(
                 finder, member, criteria=_name_in_all
             )
         # Recurse over the public API of classes (attributes not prefixed with
         # an underscore)
         if inspect.isclass(member):
-            yield from _find_docstrings_in_obj(
+            yield from _find_doctests_in_obj(
                 finder, member, criteria=_is_public_name
             )
 
 
-def _fetch_doctests():
-    finder = doctest.DocTestFinder()
-    yield from _find_docstrings_in_obj(finder, cudf, criteria=_name_in_all)
-
-
 class TestDoctests:
     @pytest.fixture(autouse=True)
     def chdir_to_tmp_path(cls, tmp_path):
+        # Some doctests generate files, so this fixture runs the tests in a
+        # temporary directory.
         original_directory = os.getcwd()
         try:
             os.chdir(tmp_path)
@@ -53,13 +62,25 @@ def chdir_to_tmp_path(cls, tmp_path):
             os.chdir(original_directory)
 
     @pytest.mark.parametrize(
-        "docstring", _fetch_doctests(), ids=lambda docstring: docstring.name
+        "docstring",
+        _find_doctests_in_obj(
+            finder=doctest.DocTestFinder(), obj=cudf, criteria=_name_in_all
+        ),
+        ids=lambda docstring: docstring.name,
     )
     def test_docstring(self, docstring):
+        # We ignore differences in whitespace in the doctest output, and enable
+        # the use of an ellipsis "..." to match any string in the doctest
+        # output. An ellipsis is useful for, e.g., memory addresses or
+        # imprecise floating point values.
         optionflags = doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
         runner = doctest.DocTestRunner(optionflags=optionflags)
-        globs = dict(cudf=cudf, np=np,)
-        docstring.globs = globs
+
+        # These global names are pre-defined and can be used in doctests
+        # without first importing them.
+        globals = dict(cudf=cudf, np=np,)
+        docstring.globs = globals
+
         runner.run(docstring)
         results = runner.summarize()
         if results.failed:

From a625070a0f8ba2aff7fc3b2356bbf02366ddbd58 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 13 Jan 2022 11:15:28 -0800
Subject: [PATCH 47/58] Fix TimedeltaIndex doctest.

---
 python/cudf/cudf/core/index.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 768ea6aa638..0bd9b0a5ea6 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2013,14 +2013,15 @@ class TimedeltaIndex(GenericIndex):
     --------
     >>> import cudf
     >>> cudf.TimedeltaIndex([1132223, 2023232, 342234324, 4234324],
-    ...     dtype='timedelta64[ns]')
-    TimedeltaIndex(['00:00:00.001132', '00:00:00.002023', '00:00:00.342234',
-                    '00:00:00.004234'],
-                dtype='timedelta64[ns]')
-    >>> cudf.TimedeltaIndex([1, 2, 3, 4], dtype='timedelta64[s]',
+    ...     dtype="timedelta64[ns]")
+    TimedeltaIndex(['0 days 00:00:00.001132223', '0 days 00:00:00.002023232',
+                    '0 days 00:00:00.342234324', '0 days 00:00:00.004234324'],
+                  dtype='timedelta64[ns]')
+    >>> cudf.TimedeltaIndex([1, 2, 3, 4], dtype="timedelta64[s]",
     ...     name="delta-index")
-    TimedeltaIndex(['00:00:01', '00:00:02', '00:00:03', '00:00:04'],
-                dtype='timedelta64[s]', name='delta-index')
+    TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03',
+                    '0 days 00:00:04'],
+                  dtype='timedelta64[s]', name='delta-index')
     """
 
     def __init__(

From 93ad86455593e7e20956d59b510fa0ef29574df9 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 13 Jan 2022 11:17:48 -0800
Subject: [PATCH 48/58] Update formatting of doctest to match current cuDF
 implementation.

---
 python/cudf/cudf/utils/docutils.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/utils/docutils.py b/python/cudf/cudf/utils/docutils.py
index cc24bbb8346..2fcf996b641 100644
--- a/python/cudf/cudf/utils/docutils.py
+++ b/python/cudf/cudf/utils/docutils.py
@@ -225,13 +225,13 @@ def wrapper(func):
         2   2010-01-01
         dtype: datetime64[s]
         >>> s.describe()
-        count                                3
-        mean     2006-09-01T08:00:00.000000000
-        min                2000-01-01 00:00:00
-        25%                2004-12-31 12:00:00
-        50%                2010-01-01 00:00:00
-        75%                2010-01-01 00:00:00
-        max                2010-01-01 00:00:00
+        count                     3
+        mean    2006-09-01 08:00:00
+        min     2000-01-01 00:00:00
+        25%     2004-12-31 12:00:00
+        50%     2010-01-01 00:00:00
+        75%     2010-01-01 00:00:00
+        max     2010-01-01 00:00:00
         dtype: object
 
         Describing a ``DataFrame``. By default only numeric fields are

From 86bdcfa467ae5a15d1571d8d1368c50f6e7f4875 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 13 Jan 2022 11:23:45 -0800
Subject: [PATCH 49/58] Avoid -0.99999999... in autocorrelation to ensure
 passing doctest if perturbed by numerical error.

---
 python/cudf/cudf/core/series.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index c7f7131ca10..c176b5f5bf9 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2812,11 +2812,11 @@ def autocorr(self, lag=1):
         Examples
         --------
         >>> import cudf
-        >>> s = cudf.Series([0.25, 0.5, 0.2, -0.05])
+        >>> s = cudf.Series([0.25, 0.5, 0.2, -0.05, 0.17])
         >>> s.autocorr()
-        0.10355263309024071
+        0.1438853844...
         >>> s.autocorr(lag=2)
-        -0.9999999999999999
+        -0.9647548490...
         """
         return self.corr(self.shift(lag))
 

From 1ec00877a77c5fd9c558098f012be653e8fdbcdf Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 13 Jan 2022 11:25:51 -0800
Subject: [PATCH 50/58] Fix misordered values in DatetimeIndex.round doctest.

---
 python/cudf/cudf/core/index.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 0bd9b0a5ea6..1e493708415 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1971,7 +1971,9 @@ def round(self, freq):
         ...     "2001-01-01 00:05:04",
         ... ], dtype="datetime64[ns]")
         >>> dt_idx
-        DatetimeIndex(['2001-01-01 00:04:45', '2001-01-01 00:05:04', '2001-01-01 00:04:58'], dtype='datetime64[ns]')
+        DatetimeIndex(['2001-01-01 00:04:45', '2001-01-01 00:04:58',
+                       '2001-01-01 00:05:04'],
+                      dtype='datetime64[ns]')
         >>> dt_idx.round('H')
         DatetimeIndex(['2001-01-01', '2001-01-01', '2001-01-01'], dtype='datetime64[ns]')
         >>> dt_idx.round('T')

From d6553db05889e9e10ef6e0ded2db88c038903ae5 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 13 Jan 2022 14:21:48 -0800
Subject: [PATCH 51/58] Remove try/finally.

---
 python/cudf/cudf/tests/test_doctests.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index 4f2de193731..ee6c0bc3896 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -55,11 +55,9 @@ def chdir_to_tmp_path(cls, tmp_path):
         # Some doctests generate files, so this fixture runs the tests in a
         # temporary directory.
         original_directory = os.getcwd()
-        try:
-            os.chdir(tmp_path)
-            yield
-        finally:
-            os.chdir(original_directory)
+        os.chdir(tmp_path)
+        yield
+        os.chdir(original_directory)
 
     @pytest.mark.parametrize(
         "docstring",

From 8eea41a180bc69c59bffccad57db6ccca8f4f2b3 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 13 Jan 2022 14:27:39 -0800
Subject: [PATCH 52/58] Use assert not...

---
 python/cudf/cudf/tests/test_doctests.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index ee6c0bc3896..bc7d1bb1d55 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -81,5 +81,4 @@ def test_docstring(self, docstring):
 
         runner.run(docstring)
         results = runner.summarize()
-        if results.failed:
-            raise AssertionError(results)
+        assert not results.failed, results

From 95303a3f952cd6c86be6b15050582004997e7696 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 13 Jan 2022 14:29:55 -0800
Subject: [PATCH 53/58] Use NumPy-style docstring.

---
 python/cudf/cudf/tests/test_doctests.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index bc7d1bb1d55..c78c4753f6d 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -19,14 +19,20 @@ def _is_public_name(parent, name):
 def _find_doctests_in_obj(finder, obj, criteria):
     """Find all doctests in an object.
 
-    Args:
-        finder (doctest.DocTestFinder): The DocTestFinder object to use.
-        obj (module or class): The object to search for docstring examples.
-        criteria (callable): Callable indicating whether to recurse over
-        members of the provided object.
+    Parameters
+    ----------
+    finder : doctest.DocTestFinder
+        The DocTestFinder object to use.
+    obj : module or class
+        The object to search for docstring examples.
+    criteria : callable
+        Callable indicating whether to recurse over members of the provided
+        object.
 
-    Yields:
-        doctest.DocTest: The next doctest found in the object.
+    Yields
+    ------
+    doctest.DocTest
+        The next doctest found in the object.
     """
     for docstring in finder.find(obj):
         if docstring.examples:

From f4254fdb77663cf945e31fdf785bca393768fc68 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 13 Jan 2022 14:34:36 -0800
Subject: [PATCH 54/58] Improve defaults in doctest finder.

---
 python/cudf/cudf/tests/test_doctests.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index c78c4753f6d..c4e4094ad03 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -16,24 +16,30 @@ def _is_public_name(parent, name):
     return not name.startswith("_")
 
 
-def _find_doctests_in_obj(finder, obj, criteria):
+def _find_doctests_in_obj(obj, finder=None, criteria=None):
     """Find all doctests in an object.
 
     Parameters
     ----------
-    finder : doctest.DocTestFinder
-        The DocTestFinder object to use.
     obj : module or class
         The object to search for docstring examples.
-    criteria : callable
+    finder : doctest.DocTestFinder, optional
+        The DocTestFinder object to use. If not provided, a DocTestFinder is
+        constructed.
+    criteria : callable, optional
         Callable indicating whether to recurse over members of the provided
-        object.
+        object. If not provided, names not defined in the object's ``__all__``
+        property are ignored.
 
     Yields
     ------
     doctest.DocTest
         The next doctest found in the object.
     """
+    if finder is None:
+        finder = doctest.DocTestFinder()
+    if criteria is None:
+        criteria = _name_in_all
     for docstring in finder.find(obj):
         if docstring.examples:
             yield docstring
@@ -45,13 +51,13 @@ def _find_doctests_in_obj(finder, obj, criteria):
         # module's __all__)
         if inspect.ismodule(member):
             yield from _find_doctests_in_obj(
-                finder, member, criteria=_name_in_all
+                member, finder, criteria=_name_in_all
             )
         # Recurse over the public API of classes (attributes not prefixed with
         # an underscore)
         if inspect.isclass(member):
             yield from _find_doctests_in_obj(
-                finder, member, criteria=_is_public_name
+                member, finder, criteria=_is_public_name
             )
 
 
@@ -67,9 +73,7 @@ def chdir_to_tmp_path(cls, tmp_path):
 
     @pytest.mark.parametrize(
         "docstring",
-        _find_doctests_in_obj(
-            finder=doctest.DocTestFinder(), obj=cudf, criteria=_name_in_all
-        ),
+        _find_doctests_in_obj(cudf),
         ids=lambda docstring: docstring.name,
     )
     def test_docstring(self, docstring):

From 3110606f9c219b65cbd0bbf7091090a086f7435a Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 13 Jan 2022 14:37:46 -0800
Subject: [PATCH 55/58] Remove __all__ from accessor.

---
 python/cudf/cudf/api/extensions/accessor.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/python/cudf/cudf/api/extensions/accessor.py b/python/cudf/cudf/api/extensions/accessor.py
index 524c11f048d..a27ffa90cfc 100644
--- a/python/cudf/cudf/api/extensions/accessor.py
+++ b/python/cudf/cudf/api/extensions/accessor.py
@@ -159,10 +159,3 @@ def register_index_accessor(name):
 def register_series_accessor(name):
     """{docstring}"""
     return _register_accessor(name, cudf.Series)
-
-
-__all__ = [
-    "register_dataframe_accessor",
-    "register_index_accessor",
-    "register_series_accessor",
-]

From f9512ad78ddad8b38eca00897aa62a8014645cba Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 14 Jan 2022 13:12:23 -0800
Subject: [PATCH 56/58] Show doctest failures in the traceback.

---
 python/cudf/cudf/tests/test_doctests.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/tests/test_doctests.py b/python/cudf/cudf/tests/test_doctests.py
index c4e4094ad03..05d6886c297 100644
--- a/python/cudf/cudf/tests/test_doctests.py
+++ b/python/cudf/cudf/tests/test_doctests.py
@@ -1,5 +1,7 @@
+import contextlib
 import doctest
 import inspect
+import io
 import os
 
 import numpy as np
@@ -89,6 +91,12 @@ def test_docstring(self, docstring):
         globals = dict(cudf=cudf, np=np,)
         docstring.globs = globals
 
-        runner.run(docstring)
-        results = runner.summarize()
-        assert not results.failed, results
+        # Capture stdout and include failing outputs in the traceback.
+        doctest_stdout = io.StringIO()
+        with contextlib.redirect_stdout(doctest_stdout):
+            runner.run(docstring)
+            results = runner.summarize()
+        assert not results.failed, (
+            f"{results.failed} of {results.attempted} doctests failed for "
+            f"{docstring.name}:\n{doctest_stdout.getvalue()}"
+        )

From 64a17c78d0e0e8343515136792fc064673c91b01 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 14 Jan 2022 13:35:23 -0800
Subject: [PATCH 57/58] Prevent test_dataframe_to_string from leaking state
 into the pandas options.

---
 python/cudf/cudf/tests/test_dataframe.py | 116 +++++++++++------------
 1 file changed, 57 insertions(+), 59 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e5b298a8448..f71e857918d 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -747,70 +747,68 @@ def test_index_astype(nelem):
 
 
 def test_dataframe_to_string():
-    pd.options.display.max_rows = 5
-    pd.options.display.max_columns = 8
-    # Test basic
-    df = cudf.DataFrame(
-        {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
-    )
-    string = str(df)
-
-    assert string.splitlines()[-1] == "[6 rows x 2 columns]"
-
-    # Test skipped columns
-    df = cudf.DataFrame(
-        {
-            "a": [1, 2, 3, 4, 5, 6],
-            "b": [11, 12, 13, 14, 15, 16],
-            "c": [11, 12, 13, 14, 15, 16],
-            "d": [11, 12, 13, 14, 15, 16],
-        }
-    )
-    string = df.to_string()
-
-    assert string.splitlines()[-1] == "[6 rows x 4 columns]"
-
-    # Test masked
-    df = cudf.DataFrame(
-        {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
-    )
-
-    data = np.arange(6)
-    mask = np.zeros(1, dtype=cudf.utils.utils.mask_dtype)
-    mask[0] = 0b00101101
+    with pd.option_context("display.max_rows", 5, "display.max_columns", 8):
+        # Test basic
+        df = cudf.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
+        )
+        string = str(df)
 
-    masked = cudf.Series.from_masked_array(data, mask)
-    assert masked.null_count == 2
-    df["c"] = masked
+        assert string.splitlines()[-1] == "[6 rows x 2 columns]"
 
-    # check data
-    values = masked.copy()
-    validids = [0, 2, 3, 5]
-    densearray = masked.dropna().to_numpy()
-    np.testing.assert_equal(data[validids], densearray)
-    # valid position is correct
+        # Test skipped columns
+        df = cudf.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6],
+                "b": [11, 12, 13, 14, 15, 16],
+                "c": [11, 12, 13, 14, 15, 16],
+                "d": [11, 12, 13, 14, 15, 16],
+            }
+        )
+        string = df.to_string()
 
-    for i in validids:
-        assert data[i] == values[i]
-    # null position is correct
-    for i in range(len(values)):
-        if i not in validids:
-            assert values[i] is cudf.NA
+        assert string.splitlines()[-1] == "[6 rows x 4 columns]"
 
-    pd.options.display.max_rows = 10
-    got = df.to_string()
+        # Test masked
+        df = cudf.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
+        )
 
-    expect = """
-a b  c
-0 1 11 0
-1 2 12 <NA>
-2 3 13 2
-3 4 14 3
-4 5 15 <NA>
-5 6 16 5
-"""
-    # values should match despite whitespace difference
-    assert got.split() == expect.split()
+        data = np.arange(6)
+        mask = np.zeros(1, dtype=cudf.utils.utils.mask_dtype)
+        mask[0] = 0b00101101
+
+        masked = cudf.Series.from_masked_array(data, mask)
+        assert masked.null_count == 2
+        df["c"] = masked
+
+        # check data
+        values = masked.copy()
+        validids = [0, 2, 3, 5]
+        densearray = masked.dropna().to_numpy()
+        np.testing.assert_equal(data[validids], densearray)
+        # valid position is correct
+
+        for i in validids:
+            assert data[i] == values[i]
+        # null position is correct
+        for i in range(len(values)):
+            if i not in validids:
+                assert values[i] is cudf.NA
+
+    with pd.option_context("display.max_rows", 10):
+        got = df.to_string()
+        expect = textwrap.dedent(
+            """\
+               a   b     c
+            0  1  11     0
+            1  2  12  <NA>
+            2  3  13     2
+            3  4  14     3
+            4  5  15  <NA>
+            5  6  16     5"""
+        )
+        assert got == expect
 
 
 def test_dataframe_to_string_wide(monkeypatch):

From e1a19bcfdd619d2014831a229a85ab60756b2edd Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 14 Jan 2022 13:57:13 -0800
Subject: [PATCH 58/58] Split test_dataframe_to_string into multiple tests.

---
 python/cudf/cudf/tests/test_dataframe.py | 133 ++++++++++++++---------
 1 file changed, 81 insertions(+), 52 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index f71e857918d..40d0d0f4fcc 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -746,69 +746,98 @@ def test_index_astype(nelem):
     np.testing.assert_equal(df.index.to_numpy(), df["a"].to_numpy())
 
 
-def test_dataframe_to_string():
-    with pd.option_context("display.max_rows", 5, "display.max_columns", 8):
-        # Test basic
-        df = cudf.DataFrame(
-            {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
-        )
-        string = str(df)
+def test_dataframe_to_string_with_skipped_rows():
+    # Test skipped rows
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
+    )
 
-        assert string.splitlines()[-1] == "[6 rows x 2 columns]"
+    with pd.option_context("display.max_rows", 5):
+        got = df.to_string()
 
-        # Test skipped columns
-        df = cudf.DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5, 6],
-                "b": [11, 12, 13, 14, 15, 16],
-                "c": [11, 12, 13, 14, 15, 16],
-                "d": [11, 12, 13, 14, 15, 16],
-            }
-        )
-        string = df.to_string()
+    expect = textwrap.dedent(
+        """\
+            a   b
+        0   1  11
+        1   2  12
+        .. ..  ..
+        4   5  15
+        5   6  16
 
-        assert string.splitlines()[-1] == "[6 rows x 4 columns]"
+        [6 rows x 2 columns]"""
+    )
+    assert got == expect
 
-        # Test masked
-        df = cudf.DataFrame(
-            {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
-        )
 
-        data = np.arange(6)
-        mask = np.zeros(1, dtype=cudf.utils.utils.mask_dtype)
-        mask[0] = 0b00101101
+def test_dataframe_to_string_with_skipped_rows_and_columns():
+    # Test skipped rows and skipped columns
+    df = cudf.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5, 6],
+            "b": [11, 12, 13, 14, 15, 16],
+            "c": [11, 12, 13, 14, 15, 16],
+            "d": [11, 12, 13, 14, 15, 16],
+        }
+    )
+
+    with pd.option_context("display.max_rows", 5, "display.max_columns", 3):
+        got = df.to_string()
 
-        masked = cudf.Series.from_masked_array(data, mask)
-        assert masked.null_count == 2
-        df["c"] = masked
+    expect = textwrap.dedent(
+        """\
+            a  ...   d
+        0   1  ...  11
+        1   2  ...  12
+        .. ..  ...  ..
+        4   5  ...  15
+        5   6  ...  16
 
-        # check data
-        values = masked.copy()
-        validids = [0, 2, 3, 5]
-        densearray = masked.dropna().to_numpy()
-        np.testing.assert_equal(data[validids], densearray)
-        # valid position is correct
+        [6 rows x 4 columns]"""
+    )
+    assert got == expect
 
-        for i in validids:
-            assert data[i] == values[i]
-        # null position is correct
-        for i in range(len(values)):
-            if i not in validids:
-                assert values[i] is cudf.NA
+
+def test_dataframe_to_string_with_masked_data():
+    # Test masked data
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3, 4, 5, 6], "b": [11, 12, 13, 14, 15, 16]}
+    )
+
+    data = np.arange(6)
+    mask = np.zeros(1, dtype=cudf.utils.utils.mask_dtype)
+    mask[0] = 0b00101101
+
+    masked = cudf.Series.from_masked_array(data, mask)
+    assert masked.null_count == 2
+    df["c"] = masked
+
+    # Check data
+    values = masked.copy()
+    validids = [0, 2, 3, 5]
+    densearray = masked.dropna().to_numpy()
+    np.testing.assert_equal(data[validids], densearray)
+    # Valid position is correct
+    for i in validids:
+        assert data[i] == values[i]
+    # Null position is correct
+    for i in range(len(values)):
+        if i not in validids:
+            assert values[i] is cudf.NA
 
     with pd.option_context("display.max_rows", 10):
         got = df.to_string()
-        expect = textwrap.dedent(
-            """\
-               a   b     c
-            0  1  11     0
-            1  2  12  <NA>
-            2  3  13     2
-            3  4  14     3
-            4  5  15  <NA>
-            5  6  16     5"""
-        )
-        assert got == expect
+
+    expect = textwrap.dedent(
+        """\
+           a   b     c
+        0  1  11     0
+        1  2  12  <NA>
+        2  3  13     2
+        3  4  14     3
+        4  5  15  <NA>
+        5  6  16     5"""
+    )
+    assert got == expect
 
 
 def test_dataframe_to_string_wide(monkeypatch):