From f7c35d56cdfb7af842b54255029b7481ca9b6d94 Mon Sep 17 00:00:00 2001
From: martinfalisse <45781926+martinfalisse@users.noreply.github.com>
Date: Thu, 14 Apr 2022 20:27:51 +0200
Subject: [PATCH 1/7] Add support for numeric_only in DataFrame._reduce
 (#10629)

Add support for numeric_only in DataFrame._reduce, this way can use df.mean(numeric_only=True), etc. Resolves https://github.com/rapidsai/cudf/issues/2067. Also partially addresses https://github.com/rapidsai/cudf/issues/9009.

Authors:
  - https://github.com/martinfalisse

Approvers:
  - Michael Wang (https://github.com/isVoid)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10629
---
 python/cudf/cudf/core/dataframe.py           | 25 +++---
 python/cudf/cudf/core/single_column_frame.py |  4 +-
 python/cudf/cudf/tests/test_dataframe.py     | 54 +++++++++++++
 python/cudf/cudf/tests/test_stats.py         | 83 +++++++++++++++++---
 4 files changed, 145 insertions(+), 21 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 2b2c09fa2a0..ae60cd91fac 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5180,26 +5180,33 @@ def _reduce(
         if level is not None:
             raise NotImplementedError("level parameter is not implemented yet")
 
-        if numeric_only not in (None, True):
-            raise NotImplementedError(
-                "numeric_only parameter is not implemented yet"
+        source = self
+        if numeric_only:
+            numeric_cols = (
+                name
+                for name in self._data.names
+                if is_numeric_dtype(self._data[name])
             )
-        axis = self._get_axis_from_axis_arg(axis)
+            source = self._get_columns_by_label(numeric_cols)
+            if source.empty:
+                return Series(index=cudf.StringIndex([]))
+
+        axis = source._get_axis_from_axis_arg(axis)
 
         if axis == 0:
             try:
                 result = [
-                    getattr(self._data[col], op)(**kwargs)
-                    for col in self._data.names
+                    getattr(source._data[col], op)(**kwargs)
+                    for col in source._data.names
                 ]
             except AttributeError:
-                raise TypeError(f"cannot perform {op} with type {self.dtype}")
+                raise TypeError(f"Not all column dtypes support op {op}")
 
             return Series._from_data(
-                {None: result}, as_index(self._data.names)
+                {None: result}, as_index(source._data.names)
             )
         elif axis == 1:
-            return self._apply_cupy_method_axis_1(op, **kwargs)
+            return source._apply_cupy_method_axis_1(op, **kwargs)
 
     @_cudf_nvtx_annotate
     def _scan(
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index 003f8ea7fdb..addc823e7f1 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -52,9 +52,9 @@ def _reduce(
         if level is not None:
             raise NotImplementedError("level parameter is not implemented yet")
 
-        if numeric_only not in (None, True):
+        if numeric_only:
             raise NotImplementedError(
-                "numeric_only parameter is not implemented yet"
+                f"Series.{op} does not implement numeric_only"
             )
         try:
             return getattr(self._column, op)(**kwargs)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index a7fad792bd0..13ab0b35822 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9230,3 +9230,57 @@ def test_dataframe_pct_change(data, periods, fill_method):
     expected = pdf.pct_change(periods=periods, fill_method=fill_method)
 
     assert_eq(expected, actual)
+
+
+def test_mean_timeseries():
+    gdf = cudf.datasets.timeseries()
+    pdf = gdf.to_pandas()
+
+    expected = pdf.mean(numeric_only=True)
+    actual = gdf.mean(numeric_only=True)
+
+    assert_eq(expected, actual)
+
+    with pytest.raises(TypeError):
+        gdf.mean()
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "a": [1, 2, 3, 4, 5],
+            "b": ["a", "b", "c", "d", "e"],
+            "c": [1.0, 2.0, 3.0, 4.0, 5.0],
+        }
+    ],
+)
+def test_std_different_dtypes(data):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    expected = pdf.std(numeric_only=True)
+    actual = gdf.std(numeric_only=True)
+
+    assert_eq(expected, actual)
+
+    with pytest.raises(TypeError):
+        gdf.std()
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
+            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
+        }
+    ],
+)
+def test_empty_numeric_only(data):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+    expected = pdf.prod(numeric_only=True)
+    actual = gdf.prod(numeric_only=True)
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index 977a01952db..08f662f0ba7 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -239,13 +239,10 @@ def test_misc_quantiles(data, q):
         cudf.Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]),
         cudf.Series([]),
         cudf.Series([-3]),
-        randomdata(
-            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
-        ),
     ],
 )
 @pytest.mark.parametrize("null_flag", [False, True])
-def test_kurtosis(data, null_flag):
+def test_kurtosis_series(data, null_flag):
     pdata = data.to_pandas()
 
     if null_flag and len(data) > 2:
@@ -262,8 +259,13 @@ def test_kurtosis(data, null_flag):
     expected = pdata.kurt()
     np.testing.assert_array_almost_equal(got, expected)
 
+    got = data.kurt(numeric_only=False)
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurt(numeric_only=False)
+    np.testing.assert_array_almost_equal(got, expected)
+
     with pytest.raises(NotImplementedError):
-        data.kurt(numeric_only=False)
+        data.kurt(numeric_only=True)
 
 
 @pytest.mark.parametrize(
@@ -280,13 +282,10 @@ def test_kurtosis(data, null_flag):
         cudf.Series([1.1032, 2.32, 43.4, 13, -312.0], index=[0, 4, 3, 19, 6]),
         cudf.Series([]),
         cudf.Series([-3]),
-        randomdata(
-            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
-        ),
     ],
 )
 @pytest.mark.parametrize("null_flag", [False, True])
-def test_skew(data, null_flag):
+def test_skew_series(data, null_flag):
     pdata = data.to_pandas()
 
     if null_flag and len(data) > 2:
@@ -298,8 +297,13 @@ def test_skew(data, null_flag):
     got = got if np.isscalar(got) else got.to_numpy()
     np.testing.assert_array_almost_equal(got, expected)
 
+    got = data.skew(numeric_only=False)
+    expected = pdata.skew(numeric_only=False)
+    got = got if np.isscalar(got) else got.to_numpy()
+    np.testing.assert_array_almost_equal(got, expected)
+
     with pytest.raises(NotImplementedError):
-        data.skew(numeric_only=False)
+        data.skew(numeric_only=True)
 
 
 @pytest.mark.parametrize("dtype", params_dtypes)
@@ -541,3 +545,62 @@ def test_cov_corr_invalid_dtypes(gsr):
         rfunc_args_and_kwargs=([gsr],),
         compare_error_message=False,
     )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        randomdata(
+            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
+        ),
+    ],
+)
+@pytest.mark.parametrize("null_flag", [False, True])
+def test_kurtosis_df(data, null_flag):
+    pdata = data.to_pandas()
+
+    if null_flag and len(data) > 2:
+        data.iloc[[0, 2]] = None
+        pdata.iloc[[0, 2]] = None
+
+    got = data.kurtosis()
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurtosis()
+    np.testing.assert_array_almost_equal(got, expected)
+
+    got = data.kurt()
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurt()
+    np.testing.assert_array_almost_equal(got, expected)
+
+    got = data.kurt(numeric_only=True)
+    got = got if np.isscalar(got) else got.to_numpy()
+    expected = pdata.kurt(numeric_only=True)
+    np.testing.assert_array_almost_equal(got, expected)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        randomdata(
+            nrows=1000, dtypes={"a": float, "b": int, "c": float, "d": str}
+        ),
+    ],
+)
+@pytest.mark.parametrize("null_flag", [False, True])
+def test_skew_df(data, null_flag):
+    pdata = data.to_pandas()
+
+    if null_flag and len(data) > 2:
+        data.iloc[[0, 2]] = None
+        pdata.iloc[[0, 2]] = None
+
+    got = data.skew()
+    expected = pdata.skew()
+    got = got if np.isscalar(got) else got.to_numpy()
+    np.testing.assert_array_almost_equal(got, expected)
+
+    got = data.skew(numeric_only=True)
+    expected = pdata.skew(numeric_only=True)
+    got = got if np.isscalar(got) else got.to_numpy()
+    np.testing.assert_array_almost_equal(got, expected)

From 77fa49eddf1c961277ec5e0fb3616433f2a46ea4 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 14 Apr 2022 14:13:06 -0700
Subject: [PATCH 2/7] Clean up C++ includes to use <> instead of "". (#10658)

This PR cleans up some C++ includes to use `#include <...>` instead of `#include "..."` where appropriate.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/10658
---
 cpp/benchmarks/io/orc/orc_writer.cpp              |  2 +-
 cpp/benchmarks/sort/rank.cpp                      |  2 +-
 cpp/benchmarks/string/convert_durations.cpp       | 15 +++++++--------
 cpp/include/cudf/detail/reduction_functions.hpp   |  2 +-
 cpp/libcudf_kafka/src/kafka_callback.cpp          |  2 +-
 cpp/libcudf_kafka/src/kafka_consumer.cpp          |  2 +-
 cpp/src/merge/merge.cu                            |  2 +-
 cpp/src/structs/structs_column_view.cpp           |  4 ++--
 .../binaryop/binop-compiled-fixed_point-test.cpp  |  2 +-
 cpp/tests/hash_map/map_test.cu                    |  2 +-
 cpp/tests/iterator/value_iterator_test_strings.cu | 10 ++++++----
 cpp/tests/partitioning/partition_test.cpp         | 10 +++++-----
 12 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index 525c13af5c0..f61dac7677b 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "cudf/io/types.hpp"
 #include <benchmark/benchmark.h>
 
 #include <benchmarks/common/generate_input.hpp>
@@ -23,6 +22,7 @@
 #include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/io/orc.hpp>
+#include <cudf/io/types.hpp>
 
 // to enable, run cmake with -DBUILD_BENCHMARKS=ON
 
diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp
index 22acb241f0b..c3c77ebd52f 100644
--- a/cpp/benchmarks/sort/rank.cpp
+++ b/cpp/benchmarks/sort/rank.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "cudf/column/column_view.hpp"
+#include <cudf/column/column_view.hpp>
 #include <cudf/sorting.hpp>
 
 #include <cudf_test/base_fixture.hpp>
diff --git a/cpp/benchmarks/string/convert_durations.cpp b/cpp/benchmarks/string/convert_durations.cpp
index dc9a1e991b2..8af111d9a63 100644
--- a/cpp/benchmarks/string/convert_durations.cpp
+++ b/cpp/benchmarks/string/convert_durations.cpp
@@ -13,25 +13,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-#include <benchmark/benchmark.h>
-
+#include <cudf/column/column_view.hpp>
 #include <cudf/strings/convert/convert_durations.hpp>
 #include <cudf/types.hpp>
+#include <cudf/wrappers/durations.hpp>
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 
+#include <benchmark/benchmark.h>
+
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
 #include <algorithm>
 #include <random>
 
-#include "../fixture/benchmark_fixture.hpp"
-#include "../synchronization/synchronization.hpp"
-#include "cudf/column/column_view.hpp"
-#include "cudf/wrappers/durations.hpp"
-
 class DurationsToString : public cudf::benchmark {
 };
 template <class TypeParam>
diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
index 3a6113e66ce..317e4d0cf47 100644
--- a/cpp/include/cudf/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -17,9 +17,9 @@
 #pragma once
 
 #include <cudf/column/column_view.hpp>
+#include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar.hpp>
 
-#include "cudf/lists/lists_column_view.hpp"
 #include <rmm/cuda_stream_view.hpp>
 
 namespace cudf {
diff --git a/cpp/libcudf_kafka/src/kafka_callback.cpp b/cpp/libcudf_kafka/src/kafka_callback.cpp
index 6b98747c145..79a40640627 100644
--- a/cpp/libcudf_kafka/src/kafka_callback.cpp
+++ b/cpp/libcudf_kafka/src/kafka_callback.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "cudf_kafka/kafka_callback.hpp"
+#include <cudf_kafka/kafka_callback.hpp>
 
 #include <librdkafka/rdkafkacpp.h>
 
diff --git a/cpp/libcudf_kafka/src/kafka_consumer.cpp b/cpp/libcudf_kafka/src/kafka_consumer.cpp
index 49e89a56e60..2ddaa9892da 100644
--- a/cpp/libcudf_kafka/src/kafka_consumer.cpp
+++ b/cpp/libcudf_kafka/src/kafka_consumer.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "cudf_kafka/kafka_consumer.hpp"
+#include <cudf_kafka/kafka_consumer.hpp>
 
 #include <librdkafka/rdkafkacpp.h>
 
diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu
index 01a94457b69..9c94a6220d6 100644
--- a/cpp/src/merge/merge.cu
+++ b/cpp/src/merge/merge.cu
@@ -26,6 +26,7 @@
 #include <cudf/structs/structs_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
+#include <cudf/utilities/traits.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -38,7 +39,6 @@
 #include <thrust/transform.h>
 #include <thrust/tuple.h>
 
-#include "cudf/utilities/traits.hpp"
 #include <queue>
 #include <vector>
 
diff --git a/cpp/src/structs/structs_column_view.cpp b/cpp/src/structs/structs_column_view.cpp
index db9496f18be..681f13386ff 100644
--- a/cpp/src/structs/structs_column_view.cpp
+++ b/cpp/src/structs/structs_column_view.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "cudf/utilities/error.hpp"
 #include <cudf/column/column.hpp>
 #include <cudf/structs/structs_column_view.hpp>
+#include <cudf/utilities/error.hpp>
 
 namespace cudf {
 
diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
index 64462669f90..28df893aff1 100644
--- a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
@@ -20,13 +20,13 @@
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/types.hpp>
 #include <cudf/unary.hpp>
+#include <cudf/utilities/error.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include "cudf/utilities/error.hpp"
 #include <tests/binaryop/assert-binops.h>
 #include <tests/binaryop/binop-fixture.hpp>
 
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
index d69aee57756..f42549514e6 100644
--- a/cpp/tests/hash_map/map_test.cu
+++ b/cpp/tests/hash_map/map_test.cu
@@ -23,12 +23,12 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/logical.h>
 #include <thrust/pair.h>
 #include <thrust/tabulate.h>
 
-#include "rmm/exec_policy.hpp"
 #include <cstdlib>
 #include <iostream>
 #include <limits>
diff --git a/cpp/tests/iterator/value_iterator_test_strings.cu b/cpp/tests/iterator/value_iterator_test_strings.cu
index 5bddbfbd4aa..9aa18eb844f 100644
--- a/cpp/tests/iterator/value_iterator_test_strings.cu
+++ b/cpp/tests/iterator/value_iterator_test_strings.cu
@@ -12,10 +12,12 @@
  * or implied. See the License for the specific language governing permissions and limitations under
  * the License.
  */
-#include "cudf/detail/utilities/vector_factories.hpp"
-#include "rmm/cuda_stream_view.hpp"
-#include "rmm/device_uvector.hpp"
-#include <tests/iterator/iterator_tests.cuh>
+#include "iterator_tests.cuh"
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 
 #include <thrust/host_vector.h>
 #include <thrust/iterator/counting_iterator.h>
diff --git a/cpp/tests/partitioning/partition_test.cpp b/cpp/tests/partitioning/partition_test.cpp
index 785af409c4c..014a19e93a9 100644
--- a/cpp/tests/partitioning/partition_test.cpp
+++ b/cpp/tests/partitioning/partition_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,16 +13,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <cudf/copying.hpp>
-#include <cudf/partitioning.hpp>
-#include <cudf/table/table.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include "cudf/sorting.hpp"
+#include <cudf/copying.hpp>
+#include <cudf/partitioning.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/table/table.hpp>
 
 template <typename T>
 class PartitionTest : public cudf::test::BaseFixture {

From 14a32619a5b1c0eff49588b141f8ef2eb754cadf Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 14 Apr 2022 14:40:20 -0700
Subject: [PATCH 3/7] Improve User Guide docs (#10663)

This PR makes some minor improvements to the cuDF user guide and some docstrings.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/10663
---
 docs/cudf/source/basics/basics.rst            | 58 ++++++++++---------
 docs/cudf/source/basics/internals.rst         |  4 +-
 .../cudf/source/basics/io-gds-integration.rst | 24 ++++----
 .../source/basics/io-nvcomp-integration.rst   |  4 +-
 python/cudf/cudf/core/cut.py                  | 46 ++++++++++-----
 python/cudf/cudf/core/groupby/groupby.py      | 21 +++----
 python/cudf/cudf/core/single_column_frame.py  |  4 +-
 7 files changed, 91 insertions(+), 70 deletions(-)

diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst
index 60a65558033..9b8983fba49 100644
--- a/docs/cudf/source/basics/basics.rst
+++ b/docs/cudf/source/basics/basics.rst
@@ -15,36 +15,40 @@ The following table lists all of cudf types. For methods requiring dtype argumen
 .. rst-class:: special-table
 .. table::
 
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Kind of Data           | Data Type        | Scalar                                                                              | String Aliases                              |
-    +========================+==================+=====================================================================================+=============================================+
-    | Integer                |                  | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_,                   | ``'int8'``, ``'int16'``, ``'int32'``,       |
-    |                        |                  | np.uint32_, np.uint64_                                                              | ``'int64'``, ``'uint8'``, ``'uint16'``,     |
-    |                        |                  |                                                                                     | ``'uint32'``, ``'uint64'``                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Float                  |                  | np.float32_, np.float64_                                                            | ``'float32'``, ``'float64'``                |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Strings                |                  | `str <https://docs.python.org/3/library/stdtypes.html#str>`_                        | ``'string'``, ``'object'``                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Datetime               |                  | np.datetime64_                                                                      | ``'datetime64[s]'``, ``'datetime64[ms]'``,  |
-    |                        |                  |                                                                                     | ``'datetime64[us]'``, ``'datetime64[ns]'``  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Timedelta              |                  | np.timedelta64_                                                                     | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,|
-    | (duration type)        |                  |                                                                                     | ``'timedelta64[us]'``, ``'timedelta64[ns]'``|
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Categorical            | CategoricalDtype | (none)                                                                              | ``'category'``                              |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Boolean                |                  | np.bool_                                                                            | ``'bool'``                                  |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
-    | Decimal                | Decimal32Dtype,  | (none)                                                                              | (none)                                      |
-    |                        | Decimal64Dtype,  |                                                                                     |                                             |
-    |                        | Decimal128Dtype  |                                                                                     |                                             |
-    +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Kind of Data    | Data Type        | Scalar                                                       | String Aliases                               |
+    +=================+==================+==============================================================+==============================================+
+    | Integer         |                  | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_,        | ``'int8'``, ``'int16'``, ``'int32'``,        |
+    |                 |                  | np.uint16_, np.uint32_, np.uint64_                           | ``'int64'``, ``'uint8'``, ``'uint16'``,      |
+    |                 |                  |                                                              | ``'uint32'``, ``'uint64'``                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Float           |                  | np.float32_, np.float64_                                     | ``'float32'``, ``'float64'``                 |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Strings         |                  | `str <https://docs.python.org/3/library/stdtypes.html#str>`_ | ``'string'``, ``'object'``                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Datetime        |                  | np.datetime64_                                               | ``'datetime64[s]'``, ``'datetime64[ms]'``,   |
+    |                 |                  |                                                              | ``'datetime64[us]'``, ``'datetime64[ns]'``   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Timedelta       |                  | np.timedelta64_                                              | ``'timedelta64[s]'``, ``'timedelta64[ms]'``, |
+    | (duration type) |                  |                                                              | ``'timedelta64[us]'``, ``'timedelta64[ns]'`` |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Categorical     | CategoricalDtype | (none)                                                       | ``'category'``                               |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Boolean         |                  | np.bool_                                                     | ``'bool'``                                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Decimal         | Decimal32Dtype,  | (none)                                                       | (none)                                       |
+    |                 | Decimal64Dtype,  |                                                              |                                              |
+    |                 | Decimal128Dtype  |                                                              |                                              |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Lists           | ListDtype        | list                                                         | ``'list'``                                   |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
+    | Structs         | StructDtype      | dict                                                         | ``'struct'``                                 |
+    +-----------------+------------------+--------------------------------------------------------------+----------------------------------------------+
 
 **Note: All dtypes above are Nullable**
 
-.. _np.int8: 
-.. _np.int16: 
+.. _np.int8:
+.. _np.int16:
 .. _np.int32:
 .. _np.int64:
 .. _np.uint8:
diff --git a/docs/cudf/source/basics/internals.rst b/docs/cudf/source/basics/internals.rst
index 60b63c6fab8..96ef40d51e6 100644
--- a/docs/cudf/source/basics/internals.rst
+++ b/docs/cudf/source/basics/internals.rst
@@ -54,7 +54,7 @@ As another example, the ``StringColumn`` backing the Series
 2. No mask buffer as there are no nulls in the Series
 3. Two children columns:
 
-    -  A column of 8-bit characters
+    -  A column of UTF-8 characters
        ``['d', 'o', 'y', 'o', 'u', h' ... '?']``
     -  A column of "offsets" to the characters column (in this case,
        ``[0, 2, 5, 9, 12, 19]``)
@@ -172,7 +172,7 @@ Selecting columns by index:
     >>> ca.select_by_index(1)
     ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
     >>> ca.select_by_index([0, 1])
-    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))    
+    ColumnAccessor(OrderedColumnDict([('x', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5789e0>), ('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>)]), multiindex=False, level_names=(None,))
     >>> ca.select_by_index(slice(1, 3))
     ColumnAccessor(OrderedColumnDict([('y', <cudf.core.column.string.StringColumn object at 0x7f5a7d578830>), ('z', <cudf.core.column.numerical.NumericalColumn object at 0x7f5a7d5788c0>)]), multiindex=False, level_names=(None,))
 
diff --git a/docs/cudf/source/basics/io-gds-integration.rst b/docs/cudf/source/basics/io-gds-integration.rst
index 71c114e9149..5ff07ac29c5 100644
--- a/docs/cudf/source/basics/io-gds-integration.rst
+++ b/docs/cudf/source/basics/io-gds-integration.rst
@@ -1,14 +1,14 @@
 GPUDirect Storage Integration
 =============================
 
-Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations. 
-GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU. 
-GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer. 
+Many IO APIs can use GPUDirect Storage (GDS) library to optimize IO operations.
+GDS enables a direct data path for direct memory access (DMA) transfers between GPU memory and storage, which avoids a bounce buffer through the CPU.
+GDS also has a compatibility mode that allows the library to fall back to copying through a CPU bounce buffer.
 The SDK is available for download `here <https://developer.nvidia.com/gpudirect-storage>`_.
 GDS is also included in CUDA Toolkit 11.4 and higher.
 
-Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``. 
-This variable also controls the GDS compatibility mode. 
+Use of GPUDirect Storage in cuDF is enabled by default, but can be disabled through the environment variable ``LIBCUDF_CUFILE_POLICY``.
+This variable also controls the GDS compatibility mode.
 
 There are three valid values for the environment variable:
 
@@ -20,17 +20,17 @@ If no value is set, behavior will be the same as the "GDS" option.
 
 This environment variable also affects how cuDF treats GDS errors.
 When ``LIBCUDF_CUFILE_POLICY`` is set to "GDS" and a GDS API call fails for any reason, cuDF falls back to the internal implementation with bounce buffers.
-When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on), 
+When ``LIBCUDF_CUFILE_POLICY`` is set to "ALWAYS" and a GDS API call fails for any reason (unlikely, given that the compatibility mode is on),
 cuDF throws an exception to propagate the error to te user.
 
 Operations that support the use of GPUDirect Storage:
 
-- `read_avro`
-- `read_parquet`
-- `read_orc`
-- `to_csv`
-- `to_parquet`
-- `to_orc`
+- :py:func:`cudf.read_avro`
+- :py:func:`cudf.read_parquet`
+- :py:func:`cudf.read_orc`
+- :py:meth:`cudf.DataFrame.to_csv`
+- :py:meth:`cudf.DataFrame.to_parquet`
+- :py:meth:`cudf.DataFrame.to_orc`
 
 Several parameters that can be used to tune the performance of GDS-enabled I/O are exposed through environment variables:
 
diff --git a/docs/cudf/source/basics/io-nvcomp-integration.rst b/docs/cudf/source/basics/io-nvcomp-integration.rst
index 521833e2afd..fc24e0c15f4 100644
--- a/docs/cudf/source/basics/io-nvcomp-integration.rst
+++ b/docs/cudf/source/basics/io-nvcomp-integration.rst
@@ -1,14 +1,14 @@
 nvCOMP Integration
 =============================
 
-Some types of compression/decompression can be performed using either `nvCOMP library <https://github.com/NVIDIA/nvcomp>`_ or the internal implementation. 
+Some types of compression/decompression can be performed using either the `nvCOMP library <https://github.com/NVIDIA/nvcomp>`_ or the internal implementation.
 
 Which implementation is used by default depends on the data format and the compression type.
 Behavior can be influenced through environment variable ``LIBCUDF_NVCOMP_POLICY``.
 
 There are three valid values for the environment variable:
 
-- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use. 
+- "STABLE": Only enable the nvCOMP in places where it has been deemed stable for production use.
 - "ALWAYS": Enable all available uses of nvCOMP, including new, experimental combinations.
 - "OFF": Disable nvCOMP use whenever possible and use the internal implementations instead.
 
diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
index 7c585602c23..915383e4852 100644
--- a/python/cudf/cudf/core/cut.py
+++ b/python/cudf/cudf/core/cut.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
 from collections.abc import Sequence
 
 import cupy
@@ -21,21 +23,27 @@ def cut(
     duplicates: str = "raise",
     ordered: bool = True,
 ):
+    """Bin values into discrete intervals.
 
-    """
-    Bin values into discrete intervals.
     Use cut when you need to segment and sort data values into bins. This
     function is also useful for going from a continuous variable to a
     categorical variable.
+
     Parameters
     ----------
     x : array-like
         The input array to be binned. Must be 1-dimensional.
     bins : int, sequence of scalars, or IntervalIndex
         The criteria to bin by.
-        * int : Defines the number of equal-width bins in the
-        range of x. The range of x is extended by .1% on each
-        side to include the minimum and maximum values of x.
+
+        * int : Defines the number of equal-width bins in the range of `x`. The
+          range of `x` is extended by .1% on each side to include the minimum
+          and maximum values of `x`.
+        * sequence of scalars : Defines the bin edges allowing for non-uniform
+          width. No extension of the range of `x` is done.
+        * IntervalIndex : Defines the exact bins to be used. Note that
+          IntervalIndex for `bins` must be non-overlapping.
+
     right : bool, default True
         Indicates whether bins includes the rightmost edge or not.
     labels : array or False, default None
@@ -66,30 +74,38 @@ def cut(
         For scalar or sequence bins, this is an ndarray with the computed
         bins. If set duplicates=drop, bins will drop non-unique bin. For
         an IntervalIndex bins, this is equal to bins.
+
     Examples
     --------
     Discretize into three equal-sized bins.
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3)
     CategoricalIndex([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0],
-    ...         (5.0, 7.0],(0.994, 3.0]], categories=[(0.994, 3.0],
-    ...         (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category')
+                (5.0, 7.0], (0.994, 3.0]], categories=[(0.994, 3.0],
+                (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category')
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True)
     (CategoricalIndex([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0],
-    ...         (5.0, 7.0],(0.994, 3.0]],categories=[(0.994, 3.0],
-    ...         (3.0, 5.0], (5.0, 7.0]],ordered=True, dtype='category'),
-    array([0.994, 3.   , 5.   , 7.   ]))
+                (5.0, 7.0], (0.994, 3.0]], categories=[(0.994, 3.0],
+                (3.0, 5.0], (5.0, 7.0]], ordered=True, dtype='category'),
+     array([0.994, 3.   , 5.   , 7.   ]))
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]),
-    ...        3, labels=["bad", "medium", "good"])
+    ...          3, labels=["bad", "medium", "good"])
     CategoricalIndex(['bad', 'good', 'medium', 'medium', 'good', 'bad'],
-    ...       categories=['bad', 'medium', 'good'],ordered=True,
-    ...       dtype='category')
+                     categories=['bad', 'medium', 'good'],ordered=True,
+                     dtype='category')
+
     >>> cudf.cut(np.array([1, 7, 5, 4, 6, 3]), 3,
-    ...       labels=["B", "A", "B"], ordered=False)
+    ...          labels=["B", "A", "B"], ordered=False)
     CategoricalIndex(['B', 'B', 'A', 'A', 'B', 'B'], categories=['A', 'B'],
-    ...        ordered=False, dtype='category')
+               ordered=False, dtype='category')
+
     >>> cudf.cut([0, 1, 1, 2], bins=4, labels=False)
     array([0, 1, 1, 3], dtype=int32)
+
     Passing a Series as an input returns a Series with categorical dtype:
+
     >>> s = cudf.Series(np.array([2, 4, 6, 8, 10]),
     ...        index=['a', 'b', 'c', 'd', 'e'])
     >>> cudf.cut(s, 3)
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 6b98e82d553..40f8eda0e4f 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -566,19 +566,20 @@ def mult(df):
             .. code-block::
 
                 >>> df = pd.DataFrame({
-                    'a': [1, 1, 2, 2],
-                    'b': [1, 2, 1, 2],
-                    'c': [1, 2, 3, 4]})
+                ...     'a': [1, 1, 2, 2],
+                ...     'b': [1, 2, 1, 2],
+                ...     'c': [1, 2, 3, 4],
+                ... })
                 >>> gdf = cudf.from_pandas(df)
                 >>> df.groupby('a').apply(lambda x: x.iloc[[0]])
-                        a  b  c
-                    a
-                    1 0  1  1  1
-                    2 2  2  1  3
+                     a  b  c
+                a
+                1 0  1  1  1
+                2 2  2  1  3
                 >>> gdf.groupby('a').apply(lambda x: x.iloc[[0]])
-                        a  b  c
-                    0  1  1  1
-                    2  2  1  3
+                   a  b  c
+                0  1  1  1
+                2  2  1  3
         """
         if not callable(function):
             raise TypeError(f"type {type(function)} is not callable")
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index addc823e7f1..7fa66bd831d 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -81,8 +81,8 @@ def name(self, value):
 
     @property  # type: ignore
     @_cudf_nvtx_annotate
-    def ndim(self):
-        """Get the dimensionality (always 1 for single-columned frames)."""
+    def ndim(self):  # noqa: D401
+        """Number of dimensions of the underlying data, by definition 1."""
         return 1
 
     @property  # type: ignore

From 6e6c325e7cb99baeecaec65aff8c97aa2450ff51 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 14 Apr 2022 18:58:48 -0500
Subject: [PATCH 4/7] Fix some docstrings formatting (#10660)

This PR fixes some of the broken docstring formattings in the code-base.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/10660
---
 docs/cudf/source/api_docs/dataframe.rst       | 3 +++
 docs/cudf/source/api_docs/index_objects.rst   | 2 ++
 docs/cudf/source/api_docs/series.rst          | 2 ++
 docs/cudf/source/api_docs/string_handling.rst | 1 -
 docs/cudf/source/conf.py                      | 1 +
 python/cudf/cudf/core/_base_index.py          | 2 +-
 python/cudf/cudf/core/cut.py                  | 1 +
 python/cudf/cudf/core/indexed_frame.py        | 2 ++
 python/cudf/cudf/core/tools/numeric.py        | 2 +-
 9 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
index 1d600acfef1..e0ef3cb2ff0 100644
--- a/docs/cudf/source/api_docs/dataframe.rst
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -149,6 +149,7 @@ Computations / descriptive stats
    DataFrame.round
    DataFrame.skew
    DataFrame.sum
+   DataFrame.sum_of_squares
    DataFrame.std
    DataFrame.var
    DataFrame.nunique
@@ -248,9 +249,11 @@ Serialization / IO / conversion
    DataFrame.to_dlpack
    DataFrame.to_parquet
    DataFrame.to_csv
+   DataFrame.to_cupy
    DataFrame.to_hdf
    DataFrame.to_dict
    DataFrame.to_json
+   DataFrame.to_numpy
    DataFrame.to_pandas
    DataFrame.to_feather
    DataFrame.to_records
diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
index 6f5affd0ecd..8e0e3bbd411 100644
--- a/docs/cudf/source/api_docs/index_objects.rst
+++ b/docs/cudf/source/api_docs/index_objects.rst
@@ -92,7 +92,9 @@ Conversion
 
    Index.astype
    Index.to_arrow
+   Index.to_cupy
    Index.to_list
+   Index.to_numpy
    Index.to_series
    Index.to_frame
    Index.to_pandas
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 95aa71919e4..d7015c9348d 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -390,10 +390,12 @@ Serialization / IO / conversion
    :toctree: api/
 
    Series.to_arrow
+   Series.to_cupy
    Series.to_dlpack
    Series.to_frame
    Series.to_hdf
    Series.to_json
+   Series.to_numpy
    Series.to_pandas
    Series.to_string
    Series.from_arrow
diff --git a/docs/cudf/source/api_docs/string_handling.rst b/docs/cudf/source/api_docs/string_handling.rst
index 3087bcaa826..8d4646c47a7 100644
--- a/docs/cudf/source/api_docs/string_handling.rst
+++ b/docs/cudf/source/api_docs/string_handling.rst
@@ -83,7 +83,6 @@ strings and apply several methods to it. These can be accessed like
    rsplit
    startswith
    strip
-   subword_tokenize
    swapcase
    title
    token_count
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index dbdf8e59e6a..d65b77ef74b 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -252,6 +252,7 @@ def process_class_docstrings(app, what, name, obj, options, lines):
             lines[:] = lines[:cut_index]
 
 
+nitpick_ignore = [("py:class", "SeriesOrIndex"),]
 
 
 def setup(app):
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 259a7f711c3..6fed6510484 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -118,7 +118,7 @@ def get_level_values(self, level):
 
         See Also
         --------
-        cudf.core.multiindex.MultiIndex.get_level_values : Get values for
+        cudf.MultiIndex.get_level_values : Get values for
             a level of a MultiIndex.
 
         Notes
diff --git a/python/cudf/cudf/core/cut.py b/python/cudf/cudf/core/cut.py
index 915383e4852..0fef6630248 100644
--- a/python/cudf/cudf/core/cut.py
+++ b/python/cudf/cudf/core/cut.py
@@ -64,6 +64,7 @@ def cut(
         Categorical and Series (with Categorical dtype). If True,
         the resulting categorical will be ordered. If False, the resulting
         categorical will be unordered (labels must be provided).
+
     Returns
     -------
     out : CategoricalIndex
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 10736948b57..ea722ec3968 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -991,6 +991,7 @@ def add_prefix(self, prefix):
         Examples
         --------
         **Series**
+
         >>> s = cudf.Series([1, 2, 3, 4])
         >>> s
         0    1
@@ -1006,6 +1007,7 @@ def add_prefix(self, prefix):
         dtype: int64
 
         **DataFrame**
+
         >>> df = cudf.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
         >>> df
            A  B
diff --git a/python/cudf/cudf/core/tools/numeric.py b/python/cudf/cudf/core/tools/numeric.py
index 7eea7cedaad..0273227010b 100644
--- a/python/cudf/cudf/core/tools/numeric.py
+++ b/python/cudf/cudf/core/tools/numeric.py
@@ -57,7 +57,7 @@ def to_numeric(arg, errors="raise", downcast=None):
         otherwise ndarray
 
     Notes
-    -------
+    -----
     An important difference from pandas is that this function does not accept
     mixed numeric/non-numeric type sequences. For example ``[1, 'a']``.
     A ``TypeError`` will be raised when such input is received, regardless of

From 8f5a04451f8f61015d08c5699f0427b550afb53b Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 14 Apr 2022 17:24:37 -0700
Subject: [PATCH 5/7] Add option to drop cache in cuIO benchmarks (#10488)

Dropping cache allows us to benchmark I/O times in a realistic/fair way.
Cache is dropped before each iteration if `CUDF_BENCHMARK_DROP_CACHE` environment variable is set.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - MithunR (https://github.com/mythrocks)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10488
---
 cpp/benchmarks/io/csv/csv_reader.cpp         |  2 ++
 cpp/benchmarks/io/cuio_common.cpp            | 28 ++++++++++++++++++++
 cpp/benchmarks/io/cuio_common.hpp            | 10 +++++++
 cpp/benchmarks/io/orc/orc_reader.cpp         |  2 ++
 cpp/benchmarks/io/parquet/parquet_reader.cpp |  2 ++
 cpp/benchmarks/io/text/multibyte_split.cpp   |  1 +
 6 files changed, 45 insertions(+)

diff --git a/cpp/benchmarks/io/csv/csv_reader.cpp b/cpp/benchmarks/io/csv/csv_reader.cpp
index c50f5220200..6f5e7160cd3 100644
--- a/cpp/benchmarks/io/csv/csv_reader.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader.cpp
@@ -52,6 +52,7 @@ void BM_csv_read_varying_input(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_csv(read_options);
   }
@@ -98,6 +99,7 @@ void BM_csv_read_varying_options(benchmark::State& state)
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
   auto mem_stats_logger               = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
       // only read the header in the first chunk
diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index afe0cc77a4c..7d356263220 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -141,3 +141,31 @@ std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks,
 
   return selected_segments;
 }
+
+// Executes the command and returns stderr output
+std::string exec_cmd(std::string_view cmd)
+{
+  // Switch stderr and stdout to only capture stderr
+  auto const redirected_cmd = std::string{"( "}.append(cmd).append(" 3>&2 2>&1 1>&3) 2>/dev/null");
+  std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(redirected_cmd.c_str(), "r"), pclose);
+  CUDF_EXPECTS(pipe != nullptr, "popen() failed");
+
+  std::array<char, 128> buffer;
+  std::string error_out;
+  while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
+    error_out += buffer.data();
+  }
+  return error_out;
+}
+
+void try_drop_l3_cache()
+{
+  static bool is_drop_cache_enabled = std::getenv("CUDF_BENCHMARK_DROP_CACHE") != nullptr;
+  if (not is_drop_cache_enabled) { return; }
+
+  std::array drop_cache_cmds{"/sbin/sysctl vm.drop_caches=3", "sudo /sbin/sysctl vm.drop_caches=3"};
+  CUDF_EXPECTS(std::any_of(drop_cache_cmds.cbegin(),
+                           drop_cache_cmds.cend(),
+                           [](auto& cmd) { return exec_cmd(cmd).empty(); }),
+               "Failed to execute the drop cache command");
+}
diff --git a/cpp/benchmarks/io/cuio_common.hpp b/cpp/benchmarks/io/cuio_common.hpp
index 2ed534d5333..ff900d20e6f 100644
--- a/cpp/benchmarks/io/cuio_common.hpp
+++ b/cpp/benchmarks/io/cuio_common.hpp
@@ -132,3 +132,13 @@ std::vector<std::string> select_column_names(std::vector<std::string> const& col
  * The segments could be Parquet row groups or ORC stripes.
  */
 std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks, int chunk);
+
+/**
+ * @brief Drops L3 cache if `CUDF_BENCHMARK_DROP_CACHE` environment variable is set.
+ *
+ * Has no effect if the environment variable is not set.
+ * May require sudo access ro run successfully.
+ *
+ * @throw cudf::logic_error if the environment variable is set and the command fails
+ */
+void try_drop_l3_cache();
diff --git a/cpp/benchmarks/io/orc/orc_reader.cpp b/cpp/benchmarks/io/orc/orc_reader.cpp
index 0fc2238a272..fc76fbe7603 100644
--- a/cpp/benchmarks/io/orc/orc_reader.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader.cpp
@@ -60,6 +60,7 @@ void BM_orc_read_varying_input(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_orc(read_opts);
   }
@@ -117,6 +118,7 @@ void BM_orc_read_varying_options(benchmark::State& state)
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
   auto mem_stats_logger               = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
 
     cudf::size_type rows_read = 0;
diff --git a/cpp/benchmarks/io/parquet/parquet_reader.cpp b/cpp/benchmarks/io/parquet/parquet_reader.cpp
index 8a97fd35c31..b20534e8ac0 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader.cpp
@@ -60,6 +60,7 @@ void BM_parq_read_varying_input(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer const raii(state, true);  // flush_l2_cache = true, stream = 0
     cudf_io::read_parquet(read_opts);
   }
@@ -117,6 +118,7 @@ void BM_parq_read_varying_options(benchmark::State& state)
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
   auto mem_stats_logger               = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
 
     cudf::size_type rows_read = 0;
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index ada8856e8e5..af6c2c5e030 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -137,6 +137,7 @@ static void BM_multibyte_split(benchmark::State& state)
 
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
+    try_drop_l3_cache();
     cuda_event_timer raii(state, true);
     auto output = cudf::io::text::multibyte_split(*source, delim);
   }

From b542678fda6ea40544d42e759caf3a6f8ad2b44d Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Fri, 15 Apr 2022 09:59:51 -0400
Subject: [PATCH 6/7] cuco isn't a cudf dependency when we are built shared
 (#10662)

With the corrections in https://github.com/rapidsai/cudf/pull/10545 we didn't install the cuco headers / cmake files as they aren't needed for shared builds. But we forgot to remove the `find_package(cuco)` call from the generated cudf-config.cmake.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Paul Taylor (https://github.com/trxcllnt)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10662
---
 cpp/cmake/thirdparty/get_cucollections.cmake | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cpp/cmake/thirdparty/get_cucollections.cmake b/cpp/cmake/thirdparty/get_cucollections.cmake
index 1639655d1e9..5232821d113 100644
--- a/cpp/cmake/thirdparty/get_cucollections.cmake
+++ b/cpp/cmake/thirdparty/get_cucollections.cmake
@@ -21,12 +21,14 @@ function(find_and_configure_cucollections)
     cuco 0.0.1
     GLOBAL_TARGETS cuco::cuco
     BUILD_EXPORT_SET cudf-exports
-    INSTALL_EXPORT_SET cudf-exports
     CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
     GIT_TAG fb58a38701f1c24ecfe07d8f1f208bbe80930da5
     EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS}
     OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
   )
+  if(NOT BUILD_SHARED_LIBS)
+    rapids_export_package(INSTALL cuco cudf-exports)
+  endif()
 
 endfunction()
 

From 4e668f27ba741ec1065b6ae6f99c0a4608df4336 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 15 Apr 2022 09:40:49 -0500
Subject: [PATCH 7/7] Update UDF notebook in User Guide. (#10668)

I noticed a couple lines I didn't expect in the UDF notebook in the User Guide while working on #10663. I didn't get these changes into that PR (had to wait for a local build to verify some things). The two changes are:
- We don't require `method="cudf"` in groupby statements.
- We don't need to execute `from cudf.utils import cudautils` to run this notebook.

(The cell execution counts also changed. There were some cells executed multiple times the last time this notebook was executed so they got out of order - this fixes it.)

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/10668
---
 .../source/user_guide/guide-to-udfs.ipynb     | 152 +++++++++---------
 1 file changed, 75 insertions(+), 77 deletions(-)

diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
index 41bce8b865e..0d05ddb00b4 100644
--- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb
+++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
@@ -138,7 +138,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -148,7 +148,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -160,7 +160,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -193,7 +193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -205,7 +205,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -218,7 +218,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -229,7 +229,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -241,7 +241,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -260,7 +260,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -274,7 +274,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -286,7 +286,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -322,7 +322,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -331,7 +331,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -355,7 +355,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -373,7 +373,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -452,7 +452,7 @@
        "4   979   982  1011   9790.0"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -497,7 +497,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -514,7 +514,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -569,7 +569,7 @@
        "2  3     6"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -591,7 +591,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -603,7 +603,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -621,7 +621,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -633,7 +633,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -658,7 +658,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -709,7 +709,7 @@
        "2     3"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -728,7 +728,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -740,7 +740,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -758,7 +758,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -813,7 +813,7 @@
        "2  3  1"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -836,7 +836,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -848,7 +848,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -866,7 +866,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -921,7 +921,7 @@
        "2  3  3.14"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -939,7 +939,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -951,7 +951,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -982,7 +982,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -1033,7 +1033,7 @@
        "2  5"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1054,7 +1054,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -1066,7 +1066,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1084,7 +1084,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -1151,7 +1151,7 @@
        "2  3  6     4  8  6"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1172,7 +1172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -1184,7 +1184,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1212,7 +1212,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1241,7 +1241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -1312,7 +1312,7 @@
        "2  3  6     4  8  6  9.0"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1344,7 +1344,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -1417,7 +1417,7 @@
        "4   979   982  1011"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1443,7 +1443,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
@@ -1522,7 +1522,7 @@
        "4   979   982  1011  1961.0"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1555,7 +1555,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -1570,7 +1570,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1582,7 +1582,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -1591,7 +1591,7 @@
        "Rolling [window=3,min_periods=3,center=False]"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1610,7 +1610,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1634,7 +1634,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
@@ -1649,7 +1649,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 41,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1667,7 +1667,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -1734,7 +1734,7 @@
        "4  59.0  59.0"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1748,7 +1748,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -1845,7 +1845,7 @@
        "9        100.0        100.0"
       ]
      },
-     "execution_count": 43,
+     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1863,12 +1863,12 @@
     "\n",
     "We can also apply UDFs to grouped DataFrames using `apply_grouped`. This example is also drawn and adapted from the RAPIDS [API documentation]().\n",
     "\n",
-    "First, we'll group our DataFrame based on column `b`, which is either True or False. Note that we currently need to pass `method=\"cudf\"` to use UDFs with GroupBy objects."
+    "First, we'll group our DataFrame based on column `b`, which is either True or False."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -1947,7 +1947,7 @@
        "4 -0.970850  False   Sarah  0.342905"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 40,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1959,7 +1959,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1975,7 +1975,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2002,7 +2002,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -2132,7 +2132,7 @@
        "9 -0.725581   True  George  0.405245       0.271319"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 43,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2162,7 +2162,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
@@ -2171,7 +2171,7 @@
        "array([ 1.,  2.,  3.,  4., 10.])"
       ]
      },
-     "execution_count": 48,
+     "execution_count": 44,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2193,7 +2193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
@@ -2207,14 +2207,12 @@
        "dtype: int32"
       ]
      },
-     "execution_count": 49,
+     "execution_count": 45,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from cudf.utils import cudautils\n",
-    "\n",
     "@cuda.jit\n",
     "def multiply_by_5(x, out):\n",
     "    i = cuda.grid(1)\n",
@@ -2235,7 +2233,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [
     {
@@ -2244,7 +2242,7 @@
        "array([ 5., 10., 15., 20., 50.])"
       ]
      },
-     "execution_count": 50,
+     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2307,7 +2305,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,