From 466a90d5c6febff34872395cfcdecceff8513a7f Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 27 Sep 2022 12:21:50 -0400
Subject: [PATCH 1/7] Document that minimum required CMake version is now
 3.23.1 (#11751)

With rapids-cmake now requiring CMake 3.23.1 update consumers to correctly express this requirement

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Ray Douglass (https://github.com/raydouglass)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/11751
---
 CONTRIBUTING.md                                   | 2 +-
 conda/environments/cudf_dev_cuda11.5.yml          | 2 +-
 conda/recipes/cudf/conda_build_config.yaml        | 2 +-
 conda/recipes/cudf_kafka/meta.yaml                | 2 +-
 conda/recipes/libcudf/conda_build_config.yaml     | 2 +-
 conda/recipes/strings_udf/conda_build_config.yaml | 2 +-
 cpp/CMakeLists.txt                                | 2 +-
 cpp/libcudf_kafka/CMakeLists.txt                  | 2 +-
 java/src/main/native/CMakeLists.txt               | 2 +-
 python/cudf/CMakeLists.txt                        | 2 +-
 python/cudf/pyproject.toml                        | 2 +-
 python/strings_udf/CMakeLists.txt                 | 2 +-
 python/strings_udf/cpp/CMakeLists.txt             | 2 +-
 13 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 38a57caa5f7..6eb621abcc3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -73,7 +73,7 @@ Compilers:
 
 * `gcc` version 9.3+
 * `nvcc` version 11.5+
-* `cmake` version 3.20.1+
+* `cmake` version 3.23.1+
 
 CUDA/GPU:
 
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index 973ca731853..2b4cbdec97b 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -14,7 +14,7 @@ dependencies:
   - clang-tools=11.1.0
   - cupy>=9.5.0,<12.0.0a0
   - rmm=22.10.*
-  - cmake>=3.20.1,!=3.23.0
+  - cmake>=3.23.1
   - cmake_setuptools>=0.1.3
   - scikit-build>=0.13.1
   - python>=3.8,<3.10
diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml
index d9c3f21448f..0027a80f1ec 100644
--- a/conda/recipes/cudf/conda_build_config.yaml
+++ b/conda/recipes/cudf/conda_build_config.yaml
@@ -8,7 +8,7 @@ sysroot_version:
   - "2.17"
 
 cmake_version:
-  - ">=3.20.1,!=3.23.0"
+  - ">=3.23.1"
 
 cuda_compiler:
   - nvcc
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
index df9cfa7c3c5..a65373efec3 100644
--- a/conda/recipes/cudf_kafka/meta.yaml
+++ b/conda/recipes/cudf_kafka/meta.yaml
@@ -22,7 +22,7 @@ build:
 
 requirements:
   build:
-    - cmake >=3.20.1,!=3.23.0
+    - cmake >=3.23.1
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
     - sysroot_{{ target_platform }} {{ sysroot_version }}
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index 4cf672997d3..7f5bf219f1f 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
+++ b/conda/recipes/libcudf/conda_build_config.yaml
@@ -11,7 +11,7 @@ sysroot_version:
   - "2.17"
 
 cmake_version:
-  - ">=3.20.1,!=3.23.0"
+  - ">=3.23.1"
 
 gtest_version:
   - "=1.10.0"
diff --git a/conda/recipes/strings_udf/conda_build_config.yaml b/conda/recipes/strings_udf/conda_build_config.yaml
index d9c3f21448f..0027a80f1ec 100644
--- a/conda/recipes/strings_udf/conda_build_config.yaml
+++ b/conda/recipes/strings_udf/conda_build_config.yaml
@@ -8,7 +8,7 @@ sysroot_version:
   - "2.17"
 
 cmake_version:
-  - ">=3.20.1,!=3.23.0"
+  - ">=3.23.1"
 
 cuda_compiler:
   - nvcc
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 96fc75adcff..c84589af345 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
 include(../fetch_rapids.cmake)
 include(rapids-cmake)
diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt
index 76a012e7c6e..f355fa01c28 100644
--- a/cpp/libcudf_kafka/CMakeLists.txt
+++ b/cpp/libcudf_kafka/CMakeLists.txt
@@ -11,7 +11,7 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 # =============================================================================
-cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
 include(../../fetch_rapids.cmake)
 include(rapids-cmake)
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index 9410f8eacf3..fca797fbccd 100755
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -11,7 +11,7 @@
 # or implied. See the License for the specific language governing permissions and limitations under
 # the License.
 # =============================================================================
-cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
 include(../../../../fetch_rapids.cmake)
 include(rapids-cmake)
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index 72e1779401f..9762eacbbed 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
 set(cudf_version 22.10.00)
 
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index ab00b37e4fa..52490444dba 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -7,6 +7,6 @@ requires = [
     "setuptools",
     "cython>=0.29,<0.30",
     "scikit-build>=0.13.1",
-    "cmake>=3.20.1,!=3.23.0",
+    "cmake>=3.23.1",
     "ninja",
 ]
diff --git a/python/strings_udf/CMakeLists.txt b/python/strings_udf/CMakeLists.txt
index 59d8ae795f2..53d31575363 100644
--- a/python/strings_udf/CMakeLists.txt
+++ b/python/strings_udf/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
 set(strings_udf_version 22.10.00)
 
diff --git a/python/strings_udf/cpp/CMakeLists.txt b/python/strings_udf/cpp/CMakeLists.txt
index d157acfefde..5bbb6ae4791 100644
--- a/python/strings_udf/cpp/CMakeLists.txt
+++ b/python/strings_udf/cpp/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-cmake_minimum_required(VERSION 3.20.1)
+cmake_minimum_required(VERSION 3.23.1)
 
 include(rapids-cmake)
 include(rapids-cpm)

From d8feede5d6520695a53bc2c54ef69226f7825260 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Tue, 27 Sep 2022 14:23:30 -0500
Subject: [PATCH 2/7] Reduce code duplication for `dask` & `distributed`
 nightly/stable installs (#11565)

After https://github.com/dask/dask/issues/9367 was fixed in dask upstream we had to bump the minimum version of dask to 2022.8.0 to correctly fetch nightly(if channel exists) or stable (if `dask/dev` label doesn't exist). Without this fix, conda builds were always picking up `2022.7.1` only and/or there would be a mix of nightly & stable packages in an env.

This PR also does some cleanup and makes the `build.sh` script easy to maintain.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Charles Blackmon-Luca (https://github.com/charlesbluca)

URL: https://github.com/rapidsai/cudf/pull/11565
---
 ci/benchmark/build.sh |  7 +++++--
 ci/gpu/build.sh       | 10 ++++++++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index e9ea4630133..34fcbb6e104 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -39,6 +39,9 @@ export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
 export INSTALL_DASK_MAIN=1
 
+# Dask version to install when `INSTALL_DASK_MAIN=0`
+export DASK_STABLE_VERSION="2022.7.1"
+
 function remove_libcudf_kernel_cache_dir {
     EXITCODE=$?
     logger "removing kernel cache dir: $LIBCUDF_KERNEL_CACHE_PATH"
@@ -82,8 +85,8 @@ if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
     gpuci_logger "gpuci_mamba_retry update dask"
     gpuci_mamba_retry update dask
 else
-    gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.7.1 conda-forge::distributed>=2022.7.1 conda-forge::dask-core>=2022.7.1 --force-reinstall"
-    gpuci_mamba_retry install conda-forge::dask>=2022.7.1 conda-forge::distributed>=2022.7.1 conda-forge::dask-core>=2022.7.1 --force-reinstall
+    gpuci_logger "gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
+    gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall
 fi
 
 # Install the master version of streamz
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 118bdb263af..c2a2287f69a 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -34,6 +34,9 @@ unset GIT_DESCRIBE_TAG
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
 export INSTALL_DASK_MAIN=1
 
+# Dask version to install when `INSTALL_DASK_MAIN=0`
+export DASK_STABLE_VERSION="2022.7.1"
+
 # ucx-py version
 export UCX_PY_VERSION='0.28.*'
 
@@ -91,8 +94,8 @@ function install_dask {
         gpuci_mamba_retry update dask
         conda list
     else
-        gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.7.1 conda-forge::distributed>=2022.7.1 conda-forge::dask-core>=2022.7.1 --force-reinstall"
-        gpuci_mamba_retry install conda-forge::dask>=2022.7.1 conda-forge::distributed>=2022.7.1 conda-forge::dask-core>=2022.7.1 --force-reinstall
+        gpuci_logger "gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
+        gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall
     fi
     # Install the main version of streamz
     gpuci_logger "Install the main version of streamz"
@@ -188,6 +191,9 @@ else
     # copied by CI from the upstream 11.5 jobs into $CONDA_ARTIFACT_PATH
     gpuci_logger "Installing cudf, dask-cudf, cudf_kafka, and custreamz"
     gpuci_mamba_retry install cudf dask-cudf cudf_kafka custreamz -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
+    
+    gpuci_logger "Check current conda environment"
+    conda list --show-channel-urls
 
     gpuci_logger "GoogleTests"
     # Run libcudf and libcudf_kafka gtests from libcudf-tests package

From 4005a7fa002c8304ffd806a6b6b084af9b774129 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 27 Sep 2022 14:39:10 -0500
Subject: [PATCH 3/7] Build `cudf` locally before building `strings_udf` conda
 packages in CI (#11785)

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/11785
---
 ci/cpu/build.sh | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
index a931546292e..87d378f4d65 100755
--- a/ci/cpu/build.sh
+++ b/ci/cpu/build.sh
@@ -83,10 +83,17 @@ if [ "$BUILD_LIBCUDF" == '1' ]; then
 
   # BUILD_LIBCUDF == 1 means this job is being run on the cpu_build jobs
   # that is where we must also build the strings_udf package
+  mkdir -p ${CONDA_BLD_DIR}/strings_udf/work
+  STRINGS_UDF_BUILD_DIR=${CONDA_BLD_DIR}/strings_udf/work
+  gpuci_logger "Build conda pkg for cudf (python 3.8), for strings_udf"
+  gpuci_conda_retry mambabuild --no-build-id --croot ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/cudf ${CONDA_BUILD_ARGS} --python=3.8
+  gpuci_logger "Build conda pkg for cudf (python 3.9), for strings_udf"
+  gpuci_conda_retry mambabuild --no-build-id --croot ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/cudf ${CONDA_BUILD_ARGS} --python=3.9
+
   gpuci_logger "Build conda pkg for strings_udf (python 3.8)"
-  gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.8
+  gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} -c ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.8
   gpuci_logger "Build conda pkg for strings_udf (python 3.9)"
-  gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.9
+  gpuci_conda_retry mambabuild --no-build-id --croot ${CONDA_BLD_DIR} -c ${STRINGS_UDF_BUILD_DIR} -c ${CONDA_BLD_DIR} conda/recipes/strings_udf $CONDA_BUILD_ARGS --python=3.9
 
   mkdir -p ${CONDA_BLD_DIR}/libcudf/work
   cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcudf/work

From bcf361fda974bda0d28b73ec00d63cc851c77308 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Tue, 27 Sep 2022 15:56:42 -0500
Subject: [PATCH 4/7] Expose "explicit-comms" option in shuffle-based dask_cudf
 functions (#11576)

This PR exposes an option to use Dask-CUDA's explicit-comms shuffle for the primary shuffle-based `dask_cudf.DataFrame` methods: `shuffle`, `sort_values`, and `set_index`. Although "explicit-comms" is still experimental, the explicit-shuffle algorithm is known to consistently outperform the "task"-based shuffle.

As far as I can tell, it is not currently possible to use an "explicit-comms" shuffle in `dask_cudf` without directly importing the function from Dask-CUDA (@madsbk - please do correct me if I am mistaken).  In order to simplify benchmarking, and to utilize the optimized shuffle within high-cardinality groupby code, I propose that we make it easier to access the explicit shuffle.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Benjamin Zaitlen (https://github.com/quasiben)

URL: https://github.com/rapidsai/cudf/pull/11576
---
 python/dask_cudf/dask_cudf/core.py            | 49 ++++++++---------
 python/dask_cudf/dask_cudf/sorting.py         | 39 +++++++++++++-
 .../dask_cudf/tests/test_distributed.py       | 53 +++++++++++++++++++
 3 files changed, 111 insertions(+), 30 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py
index 2641ef68379..0bf39df313a 100644
--- a/python/dask_cudf/dask_cudf/core.py
+++ b/python/dask_cudf/dask_cudf/core.py
@@ -29,6 +29,7 @@
 
 from dask_cudf import sorting
 from dask_cudf.accessors import ListMethods, StructMethods
+from dask_cudf.sorting import _get_shuffle_type
 
 DASK_VERSION = LooseVersion(dask.__version__)
 
@@ -133,25 +134,16 @@ def do_apply_rows(df, func, incols, outcols, kwargs):
         )
 
     @_dask_cudf_nvtx_annotate
-    def merge(self, other, **kwargs):
-        if kwargs.pop("shuffle", "tasks") != "tasks":
-            raise ValueError(
-                "Dask-cudf only supports task based shuffling, got %s"
-                % kwargs["shuffle"]
-            )
+    def merge(self, other, shuffle=None, **kwargs):
         on = kwargs.pop("on", None)
         if isinstance(on, tuple):
             on = list(on)
-        return super().merge(other, on=on, shuffle="tasks", **kwargs)
+        return super().merge(
+            other, on=on, shuffle=_get_shuffle_type(shuffle), **kwargs
+        )
 
     @_dask_cudf_nvtx_annotate
-    def join(self, other, **kwargs):
-        if kwargs.pop("shuffle", "tasks") != "tasks":
-            raise ValueError(
-                "Dask-cudf only supports task based shuffling, got %s"
-                % kwargs["shuffle"]
-            )
-
+    def join(self, other, shuffle=None, **kwargs):
         # CuDF doesn't support "right" join yet
         how = kwargs.pop("how", "left")
         if how == "right":
@@ -160,15 +152,15 @@ def join(self, other, **kwargs):
         on = kwargs.pop("on", None)
         if isinstance(on, tuple):
             on = list(on)
-        return super().join(other, how=how, on=on, shuffle="tasks", **kwargs)
+        return super().join(
+            other, how=how, on=on, shuffle=_get_shuffle_type(shuffle), **kwargs
+        )
 
     @_dask_cudf_nvtx_annotate
-    def set_index(self, other, sorted=False, divisions=None, **kwargs):
-        if kwargs.pop("shuffle", "tasks") != "tasks":
-            raise ValueError(
-                "Dask-cudf only supports task based shuffling, got %s"
-                % kwargs["shuffle"]
-            )
+    def set_index(
+        self, other, sorted=False, divisions=None, shuffle=None, **kwargs
+    ):
+
         pre_sorted = sorted
         del sorted
 
@@ -196,13 +188,13 @@ def set_index(self, other, sorted=False, divisions=None, **kwargs):
                 divisions = None
 
             # Use dask_cudf's sort_values
-            # TODO: Handle `sorted=True`
             df = self.sort_values(
                 by,
                 max_branch=kwargs.get("max_branch", None),
                 divisions=divisions,
                 set_divisions=True,
                 ignore_index=True,
+                shuffle=shuffle,
             )
 
             # Ignore divisions if its a dataframe
@@ -229,7 +221,7 @@ def set_index(self, other, sorted=False, divisions=None, **kwargs):
         return super().set_index(
             other,
             sorted=pre_sorted,
-            shuffle="tasks",
+            shuffle=_get_shuffle_type(shuffle),
             divisions=divisions,
             **kwargs,
         )
@@ -246,6 +238,7 @@ def sort_values(
         na_position="last",
         sort_function=None,
         sort_function_kwargs=None,
+        shuffle=None,
         **kwargs,
     ):
         if kwargs:
@@ -262,6 +255,7 @@ def sort_values(
             ignore_index=ignore_index,
             ascending=ascending,
             na_position=na_position,
+            shuffle=shuffle,
             sort_function=sort_function,
             sort_function_kwargs=sort_function_kwargs,
         )
@@ -338,12 +332,11 @@ def repartition(self, *args, **kwargs):
         return super().repartition(*args, **kwargs)
 
     @_dask_cudf_nvtx_annotate
-    def shuffle(self, *args, **kwargs):
+    def shuffle(self, *args, shuffle=None, **kwargs):
         """Wraps dask.dataframe DataFrame.shuffle method"""
-        shuffle_arg = kwargs.pop("shuffle", None)
-        if shuffle_arg and shuffle_arg != "tasks":
-            raise ValueError("dask_cudf does not support disk-based shuffle.")
-        return super().shuffle(*args, shuffle="tasks", **kwargs)
+        return super().shuffle(
+            *args, shuffle=_get_shuffle_type(shuffle), **kwargs
+        )
 
     @_dask_cudf_nvtx_annotate
     def groupby(self, by=None, **kwargs):
diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py
index 1c89baba592..a359920229d 100644
--- a/python/dask_cudf/dask_cudf/sorting.py
+++ b/python/dask_cudf/dask_cudf/sorting.py
@@ -6,10 +6,10 @@
 import numpy as np
 import tlz as toolz
 
+import dask
 from dask.base import tokenize
 from dask.dataframe import methods
 from dask.dataframe.core import DataFrame, Index, Series
-from dask.dataframe.shuffle import rearrange_by_column
 from dask.highlevelgraph import HighLevelGraph
 from dask.utils import M
 
@@ -231,10 +231,18 @@ def sort_values(
     ignore_index=False,
     ascending=True,
     na_position="last",
+    shuffle=None,
     sort_function=None,
     sort_function_kwargs=None,
 ):
     """Sort by the given list/tuple of column names."""
+
+    shuffle = _get_shuffle_type(shuffle)
+    # Note that we cannot import `rearrange_by_column` in
+    # the header, because we need to allow dask-cuda to
+    # patch this function before we import it here
+    from dask.dataframe.shuffle import rearrange_by_column
+
     if not isinstance(ascending, bool):
         raise ValueError("ascending must be either True or False")
     if na_position not in ("first", "last"):
@@ -285,7 +293,7 @@ def sort_values(
         "_partitions",
         max_branch=max_branch,
         npartitions=len(divisions) - 1,
-        shuffle="tasks",
+        shuffle=shuffle,
         ignore_index=ignore_index,
     ).drop(columns=["_partitions"])
     df3.divisions = (None,) * (df3.npartitions + 1)
@@ -297,3 +305,30 @@ def sort_values(
         df4.divisions = tuple(methods.tolist(divisions))
 
     return df4
+
+
+def _get_shuffle_type(shuffle):
+    # Utility to set the shuffle-kwarg default
+    # and to validate user-specified options
+    #
+    # Supported Options:
+    #  - "tasks"
+    #  - "explicit-comms"  (requires dask_cuda)
+    #
+    shuffle = shuffle or dask.config.get("shuffle", "tasks")
+    if shuffle not in {"tasks", "explicit-comms"}:
+        raise ValueError(
+            f"Dask-cudf only supports in-memory shuffling with "
+            f"'tasks' or 'explicit-comms'. Got shuffle={shuffle}"
+        )
+
+    if shuffle == "explicit-comms":
+        try:
+            import dask_cuda  # noqa: F401
+        except ImportError:
+            raise ValueError(
+                "shuffle='explicit-comms' requires dask_cuda. "
+                "Please install dask_cuda, or use shuffle='tasks'."
+            )
+
+    return shuffle
diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py
index e24feaa2ea4..6f7f720f29c 100644
--- a/python/dask_cudf/dask_cudf/tests/test_distributed.py
+++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py
@@ -6,6 +6,7 @@
 import dask
 from dask import dataframe as dd
 from dask.distributed import Client
+from dask.utils_test import hlg_layer
 from distributed.utils_test import cleanup, loop, loop_in_thread  # noqa: F401
 
 import cudf
@@ -77,3 +78,55 @@ def test_str_series_roundtrip():
 
             actual = dask_series.compute()
             assert_eq(actual, expected)
+
+
+def test_shuffle_explicit_comms():
+    with dask_cuda.LocalCUDACluster(n_workers=2) as cluster:
+        with Client(cluster):
+            df = cudf.DataFrame({"a": [1, 2, 3, 4], "b": [3, 1, 2, 4]})
+            ddf = dask_cudf.from_cudf(df, npartitions=4)
+
+            # Test sort_values API
+            got_ec = ddf.sort_values(["b"], shuffle="explicit-comms")
+            got_tasks = ddf.sort_values(["b"], shuffle="tasks")
+            assert hlg_layer(got_ec.dask, "explicit")
+            assert_eq(got_ec.compute(), got_tasks.compute())
+
+            # Test set_index API
+            got_ec = ddf.set_index("b", shuffle="explicit-comms")
+            got_tasks = ddf.set_index("b", shuffle="tasks")
+            assert got_ec.divisions == got_tasks.divisions
+            assert hlg_layer(got_ec.dask, "explicit")
+            assert_eq(got_ec.compute(), got_tasks.compute())
+
+            # Test shuffle API
+            got_ec = ddf.shuffle(["b"], shuffle="explicit-comms")
+            assert hlg_layer(got_ec.dask, "explicit")
+            assert len(got_ec) == len(ddf)
+
+            # Test merge API
+            got_ec = ddf.merge(ddf.copy(), on="b", shuffle="explicit-comms")
+            got_tasks = ddf.merge(ddf.copy(), on="b", shuffle="tasks")
+            assert hlg_layer(got_ec.dask, "explicit")
+            assert_eq(got_ec.compute(), got_tasks.compute())
+
+            # Test join API
+            got_ec = ddf.join(
+                ddf.set_index("b"),
+                on="b",
+                lsuffix="_l",
+                rsuffix="_r",
+                shuffle="explicit-comms",
+            )
+            got_tasks = ddf.join(
+                ddf.set_index("b"),
+                on="b",
+                lsuffix="_l",
+                rsuffix="_r",
+                shuffle="tasks",
+            )
+            assert hlg_layer(got_ec.dask, "explicit")
+            assert_eq(
+                got_ec.compute().sort_index(),
+                got_tasks.compute().sort_index(),
+            )

From 1003e33fba724c00d0eed906a37b8df4b4509b06 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 27 Sep 2022 16:17:31 -0500
Subject: [PATCH 5/7] Add docs for use of string data to `DataFrame.apply` and
 `Series.apply` and update guide to UDFs notebook (#11733)

This PR updates some docstrings around cuDF to show some examples of how to use strings inside UDFs, as well as provide some caveats. It also adds a section with some detail and examples to our guide to udfs ipython notebook.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/11733
---
 .../source/user_guide/guide-to-udfs.ipynb     | 343 ++++++++++++++----
 python/cudf/cudf/core/dataframe.py            |  46 +++
 python/cudf/cudf/core/series.py               |  43 +++
 3 files changed, 367 insertions(+), 65 deletions(-)

diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
index ef7500a2be9..32a51224668 100644
--- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb
+++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
@@ -27,7 +27,7 @@
    "source": [
     "Like many tabular data processing APIs, cuDF provides a range of composable, DataFrame style operators. While out of the box functions are flexible and useful, it is sometimes necessary to write custom code, or user-defined functions (UDFs), that can be applied to rows, columns, and other groupings of the cells making up the DataFrame.\n",
     "\n",
-    "In conjunction with the broader GPU PyData ecosystem, cuDF provides interfaces to run UDFs on a variety of data structures. Currently, we can only execute UDFs on numeric, boolean, datetime, and timedelta typed data (support for strings is being planned). This guide covers writing and executing UDFs on the following data structures:\n",
+    "In conjunction with the broader GPU PyData ecosystem, cuDF provides interfaces to run UDFs on a variety of data structures. Currently, we can only execute UDFs on numeric, boolean, datetime, and timedelta typed data with partial support for strings in some APIs. This guide covers writing and executing UDFs on the following data structures:\n",
     "\n",
     "- Series\n",
     "- DataFrame\n",
@@ -328,6 +328,91 @@
     "In addition, `cudf.NA` can be returned from a function directly or conditionally. This capability should allow you to implement custom null handling in a wide variety of cases."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "cc7c7e67",
+   "metadata": {},
+   "source": [
+    "### String data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c0980218",
+   "metadata": {},
+   "source": [
+    "Support for a subset of string functionality is available for `apply` through the [strings_udf](https://anaconda.org/rapidsai-nightly/strings_udf) package, which is separately installable. Currently, the following string operations are provided through `strings_udf`:\",\n",
+    "- `str.count`\n",
+    "- `str.startswith`\n",
+    "- `str.endswith`\n",
+    "- `str.find`\n",
+    "- `str.rfind`\n",
+    "- `str.isalnum`\n",
+    "- `str.isdecimal`\n",
+    "- `str.isdigit`\n",
+    "- `str.islower`\n",
+    "- `str.isupper`\n",
+    "- `str.isalpha`\n",
+    "- `str.istitle`\n",
+    "- `str.isspace`\n",
+    "- `==`, `!=`, `>=`, `<=`, `>`, `<` (between two strings)\n",
+    "- `len` (e.g. `len(some_string))`\n",
+    "- `__contains__` (e.g, `'abc' in some_string`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "d7d1abd7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sr = cudf.Series(['', 'abc', 'some_example'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "e8538ba0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def f(st):\n",
+    "    if len(st) > 0:\n",
+    "        if st.startswith('a'):\n",
+    "            return 1\n",
+    "        elif 'example' in st:\n",
+    "            return 2\n",
+    "        else:\n",
+    "            return -1\n",
+    "    else:\n",
+    "        return 42"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "23524fd8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    42\n",
+       "1     1\n",
+       "2     2\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sr.apply(f)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "54cafbc0",
@@ -349,7 +434,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 15,
    "id": "732434f6",
    "metadata": {},
    "outputs": [],
@@ -359,7 +444,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 16,
    "id": "4f5997e5",
    "metadata": {},
    "outputs": [],
@@ -385,7 +470,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 17,
    "id": "ea6008a6",
    "metadata": {},
    "outputs": [],
@@ -405,7 +490,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 18,
    "id": "183a82ed",
    "metadata": {},
    "outputs": [
@@ -485,7 +570,7 @@
        "4   979   982  1011   9790.0"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -534,7 +619,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 19,
    "id": "73653918",
    "metadata": {},
    "outputs": [],
@@ -553,7 +638,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 20,
    "id": "077feb75",
    "metadata": {},
    "outputs": [
@@ -609,7 +694,7 @@
        "2  3     6"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -632,7 +717,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 21,
    "id": "091e39e1",
    "metadata": {},
    "outputs": [
@@ -645,7 +730,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -664,7 +749,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 22,
    "id": "bd345fab",
    "metadata": {},
    "outputs": [
@@ -677,7 +762,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -704,7 +789,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 23,
    "id": "b70f4b3b",
    "metadata": {},
    "outputs": [
@@ -756,7 +841,7 @@
        "2     3"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -775,7 +860,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 24,
    "id": "0313c8df",
    "metadata": {},
    "outputs": [
@@ -788,7 +873,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -807,7 +892,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 25,
    "id": "96a7952a",
    "metadata": {},
    "outputs": [
@@ -863,7 +948,7 @@
        "2  3  1"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -886,7 +971,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 26,
    "id": "e0815f60",
    "metadata": {},
    "outputs": [
@@ -899,7 +984,7 @@
        "dtype: int64"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -918,7 +1003,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 27,
    "id": "495efd14",
    "metadata": {},
    "outputs": [
@@ -974,7 +1059,7 @@
        "2  3  3.14"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -992,7 +1077,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 28,
    "id": "678b0b5a",
    "metadata": {},
    "outputs": [
@@ -1005,7 +1090,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1037,7 +1122,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 29,
    "id": "acf48d56",
    "metadata": {},
    "outputs": [
@@ -1089,7 +1174,7 @@
        "2  5"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1110,7 +1195,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 30,
    "id": "78a98172",
    "metadata": {},
    "outputs": [
@@ -1123,7 +1208,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1142,7 +1227,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 31,
    "id": "142c30a9",
    "metadata": {},
    "outputs": [
@@ -1210,7 +1295,7 @@
        "2  3  6     4  8  6"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1231,7 +1316,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 32,
    "id": "fee9198a",
    "metadata": {},
    "outputs": [
@@ -1244,7 +1329,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1253,6 +1338,134 @@
     "df.apply(f, axis=1)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "71e30d33",
+   "metadata": {},
+   "source": [
+    "### String Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1a3694ea",
+   "metadata": {},
+   "source": [
+    "String data may be used inside `DataFrame.apply` UDFs, subject to the same constraints as those for `Series.apply`. See the section on string handling for `Series` UDFs above for details. Below is a simple example extending the row UDF logic from above in the case of a string column:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "cccd59f7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>str_col</th>\n",
+       "      <th>scale</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>abc</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>ABC</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Example</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   str_col  scale\n",
+       "0      abc      1\n",
+       "1      ABC      2\n",
+       "2  Example      3"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "str_df = cudf.DataFrame({\n",
+    "    'str_col': ['abc', 'ABC', 'Example'],\n",
+    "    'scale': [1, 2, 3]\n",
+    "})\n",
+    "str_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "35737fd9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def f(row):\n",
+    "    st = row['str_col']\n",
+    "    scale = row['scale']\n",
+    "    \n",
+    "    if len(st) > 5:\n",
+    "        return len(st) + scale\n",
+    "    else:\n",
+    "        return len(st)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "4ede4d5b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0     3\n",
+       "1     3\n",
+       "2    10\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "str_df.apply(f, axis=1)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "9c587bd2",
@@ -1273,7 +1486,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 36,
    "id": "90cbcd85",
    "metadata": {},
    "outputs": [],
@@ -1304,7 +1517,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 37,
    "id": "e782daff",
    "metadata": {},
    "outputs": [
@@ -1376,7 +1589,7 @@
        "2  3  6     4  8  6  9.0"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1410,7 +1623,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 38,
    "id": "befd8333",
    "metadata": {},
    "outputs": [
@@ -1484,7 +1697,7 @@
        "4   979   982  1011"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1511,7 +1724,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 39,
    "id": "d1f3dcaf",
    "metadata": {},
    "outputs": [
@@ -1591,7 +1804,7 @@
        "4   979   982  1011  1961.0"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1626,7 +1839,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 40,
    "id": "6bc6aea3",
    "metadata": {},
    "outputs": [
@@ -1642,7 +1855,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 34,
+     "execution_count": 40,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1654,7 +1867,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 41,
    "id": "a4c31df1",
    "metadata": {},
    "outputs": [
@@ -1664,7 +1877,7 @@
        "Rolling [window=3,min_periods=3,center=False]"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 41,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1684,7 +1897,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 42,
    "id": "eb5a081b",
    "metadata": {},
    "outputs": [],
@@ -1710,7 +1923,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 43,
    "id": "ddec3263",
    "metadata": {},
    "outputs": [
@@ -1726,7 +1939,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 43,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1745,7 +1958,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 44,
    "id": "8b61094a",
    "metadata": {},
    "outputs": [
@@ -1813,7 +2026,7 @@
        "4  59.0  59.0"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 44,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1827,7 +2040,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 45,
    "id": "bb8c3019",
    "metadata": {},
    "outputs": [
@@ -1925,7 +2138,7 @@
        "9        100.0        100.0"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 45,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1949,7 +2162,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 46,
    "id": "3dc272ab",
    "metadata": {},
    "outputs": [
@@ -2029,7 +2242,7 @@
        "4 -0.970850  False   Sarah  0.342905"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2041,7 +2254,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 47,
    "id": "c0578e0a",
    "metadata": {},
    "outputs": [],
@@ -2059,7 +2272,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 48,
    "id": "19f0f7fe",
    "metadata": {},
    "outputs": [],
@@ -2088,7 +2301,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 49,
    "id": "c43426c3",
    "metadata": {},
    "outputs": [
@@ -2219,7 +2432,7 @@
        "9 -0.725581   True  George  0.405245       0.271319"
       ]
      },
-     "execution_count": 43,
+     "execution_count": 49,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2251,7 +2464,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 50,
    "id": "aa6a8509",
    "metadata": {},
    "outputs": [
@@ -2261,7 +2474,7 @@
        "array([ 1.,  2.,  3.,  4., 10.])"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 50,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2284,7 +2497,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 51,
    "id": "0bb8bf93",
    "metadata": {},
    "outputs": [
@@ -2299,7 +2512,7 @@
        "dtype: int32"
       ]
      },
-     "execution_count": 45,
+     "execution_count": 51,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2326,7 +2539,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 52,
    "id": "ce60b639",
    "metadata": {},
    "outputs": [
@@ -2336,7 +2549,7 @@
        "array([ 5., 10., 15., 20., 50.])"
       ]
      },
-     "execution_count": 46,
+     "execution_count": 52,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2360,7 +2573,7 @@
    "id": "fe7eb68b",
    "metadata": {},
    "source": [
-    "- Only numeric nondecimal scalar types are currently supported as of yet, but strings and structured types are in planning. Attempting to use this API with those types will throw a `TypeError`.\n",
+    "- UDFs are currently only supported for numeric nondecimal scalar types (full support) and strings in `Series.apply` and `DataFrame.apply` (partial support, subject to the caveats outlined above). Attempting to use this API with unsupported types will raise a `TypeError`.\n",
     "- We do not yet fully support all arithmetic operators. Certain ops like bitwise operations are not currently implemented, but planned in future releases. If an operator is needed, a github issue should be raised so that it can be properly prioritized and implemented."
    ]
   },
@@ -2402,7 +2615,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.9.13"
   }
  },
  "nbformat": 4,
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index cac37f1f274..f00c7d1f2b5 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3955,6 +3955,12 @@ def apply(
         For more information, see the `cuDF guide to user defined functions
         <https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html>`__.
 
+        Support for use of string data within UDFs is provided through the
+        `strings_udf <https://anaconda.org/rapidsai-nightly/strings_udf>`__
+        RAPIDS library. Supported operations on strings include the subset of
+        functions and string methods that expect an input string but do not
+        return a string. Refer to caveats in the UDF guide referenced above.
+
         Parameters
         ----------
         func : function
@@ -4078,6 +4084,46 @@ def apply(
         1     4.8
         2     5.0
         dtype: float64
+
+        UDFs manipulating string data are allowed, as long as
+        they neither modify strings in place nor create new strings.
+        For example, the following UDF is allowed:
+
+        >>> def f(row):
+        ...     st = row['str_col']
+        ...     scale = row['scale']
+        ...     if len(st) == 0:
+        ...             return -1
+        ...     elif st.startswith('a'):
+        ...             return 1 - scale
+        ...     elif 'example' in st:
+        ...             return 1 + scale
+        ...     else:
+        ...             return 42
+        ...
+        >>> df = cudf.DataFrame({
+        ...     'str_col': ['', 'abc', 'some_example'],
+        ...     'scale': [1, 2, 3]
+        ... })
+        >>> df.apply(f, axis=1)  # doctest: +SKIP
+        0   -1
+        1   -1
+        2    4
+        dtype: int64
+
+        However, the following UDF is not allowed since it includes an
+        operation that requires the creation of a new string: a call to the
+        ``upper`` method. Methods that are not supported in this manner
+        will raise an ``AttributeError``.
+
+        >>> def f(row):
+        ...     st = row['str_col'].upper()
+        ...     return 'ABC' in st
+        >>> df.apply(f, axis=1)  # doctest: +SKIP
+
+        For a complete list of supported functions and methods that may be
+        used to manipulate string data, see the the UDF guide,
+        <https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html>
         """
         if axis != 1:
             raise ValueError(
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index e94ca8d653d..f11052096e3 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2251,6 +2251,12 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
         For more information, see the `cuDF guide to user defined functions
         <https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html>`__.
 
+        Support for use of string data within UDFs is provided through the
+        `strings_udf <https://anaconda.org/rapidsai-nightly/strings_udf>`__
+        RAPIDS library. Supported operations on strings include the subset of
+        functions and string methods that expect an input string but do not
+        return a string. Refer to caveats in the UDF guide referenced above.
+
         Parameters
         ----------
         func : function
@@ -2332,6 +2338,43 @@ def apply(self, func, convert_dtype=True, args=(), **kwargs):
         1    <NA>
         2     4.5
         dtype: float64
+
+        UDFs manipulating string data are allowed, as long as
+        they neither modify strings in place nor create new strings.
+        For example, the following UDF is allowed:
+
+        >>> def f(st):
+        ...     if len(st) == 0:
+        ...             return -1
+        ...     elif st.startswith('a'):
+        ...             return 1
+        ...     elif 'example' in st:
+        ...             return 2
+        ...     else:
+        ...             return 3
+        ...
+        >>> sr = cudf.Series(['', 'abc', 'some_example'])
+        >>> sr.apply(f)  # doctest: +SKIP
+        0   -1
+        1    1
+        2    2
+        dtype: int64
+
+        However, the following UDF is not allowed since it includes an
+        operation that requires the creation of a new string: a call to the
+        ``upper`` method. Methods that are not supported in this manner
+        will raise an ``AttributeError``.
+
+        >>> def f(st):
+        ...     new = st.upper()
+        ...     return 'ABC' in new
+        ...
+        >>> sr.apply(f)  # doctest: +SKIP
+
+        For a complete list of supported functions and methods that may be
+        used to manipulate string data, see the the UDF guide,
+        <https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html>
+
         """
         if convert_dtype is not True:
             raise ValueError("Series.apply only supports convert_dtype=True")

From 5a416a02feca89da1e9f32e30a7fc501d1a2d34d Mon Sep 17 00:00:00 2001
From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com>
Date: Tue, 27 Sep 2022 16:27:10 -0500
Subject: [PATCH 6/7] Fix an issue in cudf::row_bit_count involving structs and
 lists at multiple levels. (#11779)

`row_bit_count` keeps track of a stack of "branches" which represent a span of rows to be included in the computed size.  As you traverse through a hierarchy of lists, that span of rows is maintained as a stack.  The code that was handling jumping out from the bottom of a stack to a new column was making the faulty assumption that the jump was only 1 level up.

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Alessandro Bellina (https://github.com/abellina)

URL: https://github.com/rapidsai/cudf/pull/11779
---
 cpp/src/transform/row_bit_count.cu        | 10 +++---
 cpp/tests/transform/row_bit_count_test.cu | 38 +++++++++++++++++++++++
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu
index e60e9f95440..9545b5289f9 100644
--- a/cpp/src/transform/row_bit_count.cu
+++ b/cpp/src/transform/row_bit_count.cu
@@ -409,7 +409,8 @@ __global__ void compute_row_sizes(device_span<column_device_view const> cols,
   auto const num_rows = output.size();
   if (tid >= num_rows) { return; }
 
-  // branch stack. points to the last list prior to branching.
+  // my_branch_stack points to the last span prior to branching. a branch occurs only
+  // when we are inside of a list contained within a struct column.
   row_span* my_branch_stack = thread_branch_stacks + (threadIdx.x * max_branch_depth);
   size_type branch_depth{0};
 
@@ -424,11 +425,12 @@ __global__ void compute_row_sizes(device_span<column_device_view const> cols,
   for (size_type idx = 0; idx < cols.size(); idx++) {
     column_device_view const& col = cols[idx];
 
-    // if we've returned from a branch
+    // if we've returned from a branch, pop to the proper span
     if (info[idx].branch_depth_start < last_branch_depth) {
-      cur_span = my_branch_stack[--branch_depth];
+      branch_depth = info[idx].branch_depth_start;
+      cur_span     = my_branch_stack[branch_depth];
     }
-    // if we're entering a new branch.
+    // if we're entering a new branch, push the current span
     // NOTE: this case can happen (a pop and a push by the same column)
     // when we have a struct<list, list>
     if (info[idx].branch_depth_end > info[idx].branch_depth_start) {
diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu
index 61c2fa12895..8151e0d6d8d 100644
--- a/cpp/tests/transform/row_bit_count_test.cu
+++ b/cpp/tests/transform/row_bit_count_test.cu
@@ -618,9 +618,47 @@ TEST_F(RowBitCount, Table)
     thrust::make_counting_iterator(0) + t.num_rows(),
     mcv.begin<size_type>(),
     sum_functor{cv0.data<size_type>(), cv1.data<size_type>(), cv2.data<size_type>()});
+
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result);
 }
 
+TEST_F(RowBitCount, DepthJump)
+{
+  // jump more than 1 branch depth.
+
+  using T = int;
+
+  // struct<list<struct<list<int>>, int>
+  // the jump occurs from depth 2 (the leafmost int column)
+  // to depth 0 (the topmost int column)
+  cudf::test::fixed_width_column_wrapper<T> ____c0{1, 2, 3, 5, 5, 6, 7, 8};
+  cudf::test::fixed_width_column_wrapper<cudf::offset_type> ___offsets{0, 2, 4, 6, 8};
+  auto ___c0 = cudf::make_lists_column(4, ___offsets.release(), ____c0.release(), 0, {});
+  std::vector<std::unique_ptr<cudf::column>> __children;
+  __children.push_back(std::move(___c0));
+  cudf::test::structs_column_wrapper __c0(std::move(__children));
+  cudf::test::fixed_width_column_wrapper<cudf::offset_type> _offsets{0, 3, 4};
+  auto _c0 = cudf::make_lists_column(2, _offsets.release(), __c0.release(), 0, {});
+  cudf::test::fixed_width_column_wrapper<int> _c1{3, 4};
+  std::vector<std::unique_ptr<cudf::column>> children;
+  children.push_back(std::move(_c0));
+  children.push_back(_c1.release());
+  cudf::test::structs_column_wrapper c0(std::move(children));
+
+  table_view t({c0});
+  auto result = cudf::row_bit_count(t);
+
+  // expected size = (num rows at level 1 + num_rows at level 2) + (# values the leaf int column) +
+  // 1 (value in topmost int column)
+  constexpr size_type offset_size = sizeof(cudf::offset_type) * CHAR_BIT;
+  constexpr size_type type_size   = sizeof(T) * CHAR_BIT;
+  cudf::test::fixed_width_column_wrapper<size_type> expected{
+    ((1 + 3) * offset_size) + (6 * type_size) + (1 * type_size),
+    ((1 + 1) * offset_size) + (2 * type_size) + (1 * type_size)};
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+}
+
 TEST_F(RowBitCount, SlicedColumnsFixedWidth)
 {
   auto const slice_size = 7;

From f72c4ce715080525fbf79d4298b18af862822bd7 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Wed, 28 Sep 2022 10:04:22 -0400
Subject: [PATCH 7/7] add change from 11771

---
 python/strings_udf/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/strings_udf/CMakeLists.txt b/python/strings_udf/CMakeLists.txt
index 53d31575363..41d0d0090cb 100644
--- a/python/strings_udf/CMakeLists.txt
+++ b/python/strings_udf/CMakeLists.txt
@@ -14,7 +14,7 @@
 
 cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
-set(strings_udf_version 22.10.00)
+set(strings_udf_version 22.12.00)
 
 include(../../fetch_rapids.cmake)