From c2c67de0a487ef767f2b16e6b95132121e2eec04 Mon Sep 17 00:00:00 2001
From: Conor Hoekstra <36027403+codereport@users.noreply.github.com>
Date: Mon, 10 May 2021 16:10:32 -0400
Subject: [PATCH 01/11] Remove `boost` dependency (#7932)

Once we remove Boost `filesystem` dependency and use `std::filesystem`, we can remove the boost dependency.

This change requires dropping support for GCC < 9.

Authors:
  - Conor Hoekstra (https://github.com/codereport)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Jason Lowe (https://github.com/jlowe)
  - Keith Kraus (https://github.com/kkraus14)

URL: https://github.com/rapidsai/cudf/pull/7932
---
 Dockerfile                                |  5 ++-
 conda/environments/cudf_dev_cuda11.0.yml  |  1 -
 conda/environments/cudf_dev_cuda11.1.yml  |  1 -
 conda/environments/cudf_dev_cuda11.2.yml  |  1 -
 conda/recipes/libcudf/meta.yaml           |  2 --
 cpp/CMakeLists.txt                        |  3 --
 cpp/cmake/cudf-build-config.cmake.in      |  2 --
 cpp/cmake/cudf-config.cmake.in            |  5 ---
 cpp/cmake/thirdparty/CUDF_FindBoost.cmake | 38 -----------------------
 cpp/src/jit/cache.cpp                     | 18 +++++------
 docker_build/Dockerfile                   |  4 ---
 java/LICENSE-bundled                      | 26 ----------------
 java/README.md                            | 18 +----------
 java/ci/Dockerfile.centos7                |  5 ---
 java/ci/build-in-docker.sh                |  2 +-
 java/pom.xml                              |  7 -----
 16 files changed, 13 insertions(+), 125 deletions(-)
 delete mode 100644 cpp/cmake/thirdparty/CUDF_FindBoost.cmake
 delete mode 100644 java/LICENSE-bundled

diff --git a/Dockerfile b/Dockerfile
index d24c5d05556..eef8a04067d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -19,7 +19,6 @@ RUN apt update -y --fix-missing && \
       git \
       gcc-${CC} \
       g++-${CXX} \
-      libboost-all-dev \
       tzdata && \
     apt-get autoremove -y && \
     apt-get clean && \
@@ -68,8 +67,8 @@ RUN if [ -f /cudf/docker/package_versions.sh ]; \
          conda env create --name cudf --file /cudf/conda/environments/cudf_dev_cuda${CUDA_SHORT_VERSION}.yml ; \
     fi
 
-ENV CC=/usr/bin/gcc-${CC}
-ENV CXX=/usr/bin/g++-${CXX}
+ENV CC=/opts/conda/envs/rapids/bin/gcc-${CC}
+ENV CXX=/opts/conda/envs/rapids/bin/g++-${CXX}
 
 # libcudf & cudf build/install
 RUN source activate cudf && \
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
index 7c9fdc318c1..a44472de7dc 100644
--- a/conda/environments/cudf_dev_cuda11.0.yml
+++ b/conda/environments/cudf_dev_cuda11.0.yml
@@ -47,7 +47,6 @@ dependencies:
   - dlpack
   - arrow-cpp=1.0.1
   - arrow-cpp-proc * cuda
-  - boost-cpp>=1.72.0
   - double-conversion
   - rapidjson
   - flatbuffers
diff --git a/conda/environments/cudf_dev_cuda11.1.yml b/conda/environments/cudf_dev_cuda11.1.yml
index e278454d371..fde8bb0c0e8 100644
--- a/conda/environments/cudf_dev_cuda11.1.yml
+++ b/conda/environments/cudf_dev_cuda11.1.yml
@@ -47,7 +47,6 @@ dependencies:
   - dlpack
   - arrow-cpp=1.0.1
   - arrow-cpp-proc * cuda
-  - boost-cpp>=1.72.0
   - double-conversion
   - rapidjson
   - flatbuffers
diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml
index 02eb40893dc..6e8d3e58cf1 100644
--- a/conda/environments/cudf_dev_cuda11.2.yml
+++ b/conda/environments/cudf_dev_cuda11.2.yml
@@ -47,7 +47,6 @@ dependencies:
   - dlpack
   - arrow-cpp=1.0.1
   - arrow-cpp-proc * cuda
-  - boost-cpp>=1.72.0
   - double-conversion
   - rapidjson
   - flatbuffers
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 1c7d332dd27..93644c8b213 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -39,12 +39,10 @@ requirements:
     - cudatoolkit {{ cuda_version }}.*
     - arrow-cpp 1.0.1
     - arrow-cpp-proc * cuda
-    - boost-cpp 1.72.0
     - dlpack
   run:
     - {{ pin_compatible('cudatoolkit', max_pin='x.x') }}
     - arrow-cpp-proc * cuda
-    - {{ pin_compatible('boost-cpp', max_pin='x.x.x') }}
     - {{ pin_compatible('dlpack', max_pin='x.x') }}
 
 test:
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 96e93d06d2d..52edcca82c6 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -121,8 +121,6 @@ find_package(ZLIB REQUIRED)
 find_package(Threads REQUIRED)
 # add third party dependencies using CPM
 include(cmake/thirdparty/CUDF_GetCPM.cmake)
-# find boost
-include(cmake/thirdparty/CUDF_FindBoost.cmake)
 # find jitify
 include(cmake/thirdparty/CUDF_GetJitify.cmake)
 # find thrust/cub
@@ -476,7 +474,6 @@ add_dependencies(cudf jitify_preprocess_run)
 # Specify the target module library dependencies
 target_link_libraries(cudf
            PUBLIC ZLIB::ZLIB
-                  Boost::filesystem
                   ${ARROW_LIBRARIES}
                   cudf::Thrust
                   rmm::rmm)
diff --git a/cpp/cmake/cudf-build-config.cmake.in b/cpp/cmake/cudf-build-config.cmake.in
index ed1926f20f0..358c4377078 100644
--- a/cpp/cmake/cudf-build-config.cmake.in
+++ b/cpp/cmake/cudf-build-config.cmake.in
@@ -43,8 +43,6 @@ find_dependency(ZLIB)
 
 # add third party dependencies using CPM
 include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetCPM.cmake)
-# find boost
-include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_FindBoost.cmake)
 # find jitify
 include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetJitify.cmake)
 # find thrust/cub
diff --git a/cpp/cmake/cudf-config.cmake.in b/cpp/cmake/cudf-config.cmake.in
index 66c669851fa..86755696607 100644
--- a/cpp/cmake/cudf-config.cmake.in
+++ b/cpp/cmake/cudf-config.cmake.in
@@ -71,11 +71,6 @@ find_dependency(CUDAToolkit)
 find_dependency(Threads)
 find_dependency(ZLIB)
 
-# Don't look for a Boost CMake configuration file because it adds the
-# `-DBOOST_ALL_NO_LIB` and `-DBOOST_FILESYSTEM_DYN_LINK` compile defs
-set(Boost_NO_BOOST_CMAKE ON)
-find_dependency(Boost @CUDF_MIN_VERSION_Boost@ COMPONENTS filesystem)
-
 find_dependency(Arrow @CUDF_VERSION_Arrow@)
 
 set(ArrowCUDA_DIR "${Arrow_DIR}")
diff --git a/cpp/cmake/thirdparty/CUDF_FindBoost.cmake b/cpp/cmake/thirdparty/CUDF_FindBoost.cmake
deleted file mode 100644
index fef393d7f20..00000000000
--- a/cpp/cmake/thirdparty/CUDF_FindBoost.cmake
+++ /dev/null
@@ -1,38 +0,0 @@
-#=============================================================================
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#=============================================================================
-
-# Min version set to newest boost in Ubuntu bionic apt repositories
-set(CUDF_MIN_VERSION_Boost 1.65.0)
-
-# Don't look for a Boost CMake configuration file because it adds the
-# `-DBOOST_ALL_NO_LIB` and `-DBOOST_FILESYSTEM_DYN_LINK` compile defs
-set(Boost_NO_BOOST_CMAKE ON)
-
-# TODO: Use CPMFindPackage to add or build Boost
-
-find_package(Boost ${CUDF_MIN_VERSION_Boost} QUIET MODULE COMPONENTS filesystem)
-
-message(VERBOSE "CUDF: Boost_FOUND: ${Boost_FOUND}")
-
-if(NOT Boost_FOUND)
-    message(FATAL_ERROR "CUDF: Boost not found, please check your settings.")
-endif()
-
-message(VERBOSE "CUDF: Boost_LIBRARIES: ${Boost_LIBRARIES}")
-message(VERBOSE "CUDF: Boost_INCLUDE_DIRS: ${Boost_INCLUDE_DIRS}")
-
-list(APPEND CUDF_CXX_DEFINITIONS BOOST_NO_CXX14_CONSTEXPR)
-list(APPEND CUDF_CUDA_DEFINITIONS BOOST_NO_CXX14_CONSTEXPR)
diff --git a/cpp/src/jit/cache.cpp b/cpp/src/jit/cache.cpp
index f86018276e7..37b5f58da22 100644
--- a/cpp/src/jit/cache.cpp
+++ b/cpp/src/jit/cache.cpp
@@ -17,29 +17,29 @@
 #include <cudf/utilities/error.hpp>
 
 #include <cuda.h>
-#include <boost/filesystem.hpp>
 #include <jitify2.hpp>
 
 #include <cstddef>
+#include <filesystem>
 
 namespace cudf {
 namespace jit {
 
 // Get the directory in home to use for storing the cache
-boost::filesystem::path get_user_home_cache_dir()
+std::filesystem::path get_user_home_cache_dir()
 {
   auto home_dir = std::getenv("HOME");
   if (home_dir != nullptr) {
-    return boost::filesystem::path(home_dir) / ".cudf";
+    return std::filesystem::path(home_dir) / ".cudf";
   } else {
-    return boost::filesystem::path();
+    return std::filesystem::path();
   }
 }
 
 // Default `LIBCUDF_KERNEL_CACHE_PATH` to `$HOME/.cudf/$CUDF_VERSION`.
 // This definition can be overridden at compile time by specifying a
 // `-DLIBCUDF_KERNEL_CACHE_PATH=/kernel/cache/path` CMake argument.
-// Use `boost::filesystem` for cross-platform path resolution and dir
+// Use `std::filesystem` for cross-platform path resolution and dir
 // creation. This path is used in the `getCacheDir()` function below.
 #if !defined(LIBCUDF_KERNEL_CACHE_PATH)
 #define LIBCUDF_KERNEL_CACHE_PATH get_user_home_cache_dir()
@@ -59,12 +59,12 @@ boost::filesystem::path get_user_home_cache_dir()
  * are used and if $HOME is not defined, returns an empty path and file
  * caching is not used.
  */
-boost::filesystem::path get_cache_dir()
+std::filesystem::path get_cache_dir()
 {
   // The environment variable always overrides the
   // default/compile-time value of `LIBCUDF_KERNEL_CACHE_PATH`
   auto kernel_cache_path_env = std::getenv("LIBCUDF_KERNEL_CACHE_PATH");
-  auto kernel_cache_path     = boost::filesystem::path(
+  auto kernel_cache_path     = std::filesystem::path(
     kernel_cache_path_env != nullptr ? kernel_cache_path_env : LIBCUDF_KERNEL_CACHE_PATH);
 
   // Cache path could be empty when env HOME is unset or LIBCUDF_KERNEL_CACHE_PATH is defined to be
@@ -86,10 +86,10 @@ boost::filesystem::path get_cache_dir()
 
     try {
       // `mkdir -p` the kernel cache path if it doesn't exist
-      boost::filesystem::create_directories(kernel_cache_path);
+      std::filesystem::create_directories(kernel_cache_path);
     } catch (const std::exception& e) {
       // if directory creation fails for any reason, return empty path
-      return boost::filesystem::path();
+      return std::filesystem::path();
     }
   }
   return kernel_cache_path;
diff --git a/docker_build/Dockerfile b/docker_build/Dockerfile
index 0c04cab152a..696a6969778 100644
--- a/docker_build/Dockerfile
+++ b/docker_build/Dockerfile
@@ -34,15 +34,11 @@ RUN apt update -y --fix-missing && \
 
 RUN apt install -y --no-install-recommends \
       git \
-      libboost-all-dev \
       python3.8-dev \
       build-essential \
       autoconf \
       bison \
       flex \
-      libboost-filesystem-dev \
-      libboost-system-dev \
-      libboost-regex-dev \
       libjemalloc-dev \
       wget \
       libssl-dev \
diff --git a/java/LICENSE-bundled b/java/LICENSE-bundled
deleted file mode 100644
index 165befcb174..00000000000
--- a/java/LICENSE-bundled
+++ /dev/null
@@ -1,26 +0,0 @@
-The binary distribution of this product bundles binaries of Boost
-which are available under the following license:
-
-Boost Software License - Version 1.0 - August 17th, 2003
-
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
diff --git a/java/README.md b/java/README.md
index 7cc1fcf5aec..ed445cd764f 100644
--- a/java/README.md
+++ b/java/README.md
@@ -52,23 +52,7 @@ CUDA 11.0:
 
 Build the native code first, and make sure the a JDK is installed and available.
 
-When building libcudf, make sure you install boost first:
-```bash
-# Install Boost C++ for Ubuntu 16.04/18.04/20.04
-sudo apt install libboost-filesystem-dev
-```
-or for a smaller installation footprint (Boost is a large library), build it from the source:
-```bash
-wget https://dl.bintray.com/boostorg/release/1.74.0/source/boost_1_74_0.tar.bz2
-tar xvf boost_1_74_0.tar.bz2
-cd boost_1_74_0
-./bootstrap.sh --with-libraries=filesystem
-./b2 cxxflags=-fPIC link=static
-sudo cp stage/lib/libboost_filesystem.a /usr/local/lib/
-```
-and pass in the cmake options
-`-DCUDF_USE_ARROW_STATIC=ON -DBoost_USE_STATIC_LIBS=ON` so that Apache Arrow and Boost libraries are
-linked statically.
+Pass in the cmake option `-DCUDF_USE_ARROW_STATIC=ON` so that Apache Arrow is linked statically.
 
 If you use the default cmake options libcudart will be dynamically linked to libcudf
 which is included.  If you do this the resulting jar will have a classifier associated with it
diff --git a/java/ci/Dockerfile.centos7 b/java/ci/Dockerfile.centos7
index 607520692d0..4efc029cb47 100644
--- a/java/ci/Dockerfile.centos7
+++ b/java/ci/Dockerfile.centos7
@@ -31,11 +31,6 @@ RUN yum install -y git zlib-devel maven tar wget patch
 ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins
 RUN mkdir /usr/local/rapids && mkdir /rapids && chmod 777 /usr/local/rapids && chmod 777 /rapids
 
-RUN cd /rapids/ && wget https://dl.bintray.com/boostorg/release/1.72.0/source/boost_1_72_0.tar.gz && \
-   tar zxf boost_1_72_0.tar.gz && \
-   cd boost_1_72_0 && \
-   scl enable devtoolset-9 "./bootstrap.sh --prefix=/usr && ./b2 install --with-filesystem threading=multi link=static cxxflags=-fPIC; exit 0"
-
 RUN cd /usr/local/ && wget --quiet https://github.com/Kitware/CMake/releases/download/v3.19.0/cmake-3.19.0-Linux-x86_64.tar.gz && \
    tar zxf cmake-3.19.0-Linux-x86_64.tar.gz
 
diff --git a/java/ci/build-in-docker.sh b/java/ci/build-in-docker.sh
index 10be5b9c639..25f36f04e48 100755
--- a/java/ci/build-in-docker.sh
+++ b/java/ci/build-in-docker.sh
@@ -55,7 +55,7 @@ export PATH=/usr/local/cmake-3.19.0-Linux-x86_64/bin:$PATH
 rm -rf $WORKSPACE/cpp/build
 mkdir -p $WORKSPACE/cpp/build
 cd $WORKSPACE/cpp/build
-cmake .. -DUSE_NVTX=$ENABLE_NVTX -DCUDF_USE_ARROW_STATIC=ON -DBoost_USE_STATIC_LIBS=ON -DBUILD_TESTS=$SKIP_CPP_TESTS -DPER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS -DRMM_LOGGING_LEVEL=$RMM_LOGGING_LEVEL
+cmake .. -DUSE_NVTX=$ENABLE_NVTX -DCUDF_USE_ARROW_STATIC=ON -DBUILD_TESTS=$SKIP_CPP_TESTS -DPER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS -DRMM_LOGGING_LEVEL=$RMM_LOGGING_LEVEL
 make -j$PARALLEL_LEVEL
 make install DESTDIR=$INSTALL_PREFIX
 
diff --git a/java/pom.xml b/java/pom.xml
index d94d51944a0..206faf0a443 100755
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -283,13 +283,6 @@
                 <include>LICENSE</include>
               </includes>
             </resource>
-            <resource>
-              <directory>${basedir}</directory>
-              <targetPath>META-INF</targetPath>
-              <includes>
-                <include>LICENSE-bundled</include>
-              </includes>
-            </resource>
             <resource>
                 <directory>${project.build.directory}/native-deps/</directory>
             </resource>

From 9328c563d46a1ba6dee4679f4081f49c2ab7f5f4 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Tue, 11 May 2021 09:50:38 -0500
Subject: [PATCH 02/11] Add notes in IO supported types doc table. (#8203)

This PR adds a note for JSON writer, that it is not GPU accelerated.
This PR also fixes a misc doc build issue around formatting.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Keith Kraus (https://github.com/kkraus14)

URL: https://github.com/rapidsai/cudf/pull/8203
---
 docs/cudf/source/io-supported-types.rst | 112 ++++++++++++------------
 python/cudf/cudf/core/column/string.py  |   8 +-
 2 files changed, 62 insertions(+), 58 deletions(-)

diff --git a/docs/cudf/source/io-supported-types.rst b/docs/cudf/source/io-supported-types.rst
index ce2f7a85fbf..a74f3239044 100644
--- a/docs/cudf/source/io-supported-types.rst
+++ b/docs/cudf/source/io-supported-types.rst
@@ -5,58 +5,62 @@ The following table lists are compatible cudf types for each supported IO format
 
 .. rst-class:: io-supported-types-table
 .. table::
-    :widths: 15 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
+    :widths: 15 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
 
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+-----------------+--------+--------+--------+--------+
-    |                       |       CSV       |      Parquet    |      JSON       |       ORC       |  AVRO  |       HDF       |       DLPack    |      Feather    |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | Data Type             | Writer | Reader | Writer | Reader | Writer | Reader | Writer | Reader | Reader | Writer | Reader | Writer | Reader | Writer | Reader |
-    +=======================+========+========+========+========+========+========+========+========+========+========+========+========+========+========+========+
-    | int8                  | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | int16                 | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | int32                 | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | int64                 | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | uint8                 | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ✅     | ❌     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | uint16                | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ✅     | ❌     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | uint32                | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ✅     | ❌     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | uint64                | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ❌     | ❌     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | float32               | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | float64               | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | bool                  | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | str                   | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | category              | ✅     | ❌     | ❌     | ❌     | ✅     | ❌     | ❌     | ❌     | ❌     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | list                  | ❌     | ❌     | ✅     | ✅     | ✅     | ❌     | ❌     | ❌     | ❌     | ❌     | ❌     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | timedelta64[s]        | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ❌     | ❌     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | timedelta64[ms]       | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ❌     | ❌     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | timedelta64[us]       | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ❌     | ❌     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | timedelta64[ns]       | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ❌     | ❌     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | datetime64[s]         | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | datetime64[ms]        | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | datetime64[us]        | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | datetime64[ns]        | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | struct                | ❌     | ❌     | ✅     | ✅     | ❌     | ❌     | ❌     | ❌     | ❌     | ✅     | ✅     | ❌     | ❌     | ✅     | ✅     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
-    | decimal64             | ❌     | ❌     | ✅     | ✅     | ❌     | ❌     | ❌     | ✅     | ❌     | ❌     | ❌     | ❌     | ❌     | ❌     | ❌     |
-    +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+-------------------+--------+--------+---------+---------+
+    |                       |       CSV       |      Parquet    |       JSON       |       ORC       |  AVRO  |        HDF        |       DLPack    |      Feather      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | Data Type             | Writer | Reader | Writer | Reader | Writer¹ | Reader | Writer | Reader | Reader | Writer¹ | Reader¹ | Writer | Reader | Writer¹ | Reader¹ |
+    +=======================+========+========+========+========+=========+========+========+========+========+=========+=========+========+========+=========+=========+
+    | int8                  | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | int16                 | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | int32                 | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | int64                 | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | uint8                 | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ❌     | ✅     | ❌     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | uint16                | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ❌     | ✅     | ❌     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | uint32                | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ❌     | ✅     | ❌     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | uint64                | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ❌     | ❌     | ❌     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | float32               | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | float64               | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | bool                  | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ✅     | ✅     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | str                   | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | category              | ✅     | ❌     | ❌     | ❌     | ✅      | ❌     | ❌     | ❌     | ❌     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | list                  | ❌     | ❌     | ✅     | ✅     | ✅      | ❌     | ❌     | ❌     | ❌     | ❌      | ❌      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | timedelta64[s]        | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ❌     | ❌     | ❌     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | timedelta64[ms]       | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ❌     | ❌     | ❌     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | timedelta64[us]       | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ❌     | ❌     | ❌     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | timedelta64[ns]       | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ❌     | ❌     | ❌     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | datetime64[s]         | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | datetime64[ms]        | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | datetime64[us]        | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | datetime64[ns]        | ✅     | ✅     | ✅     | ✅     | ✅      | ✅     | ✅     | ✅     | ✅     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | struct                | ❌     | ❌     | ✅     | ✅     | ❌      | ❌     | ❌     | ❌     | ❌     | ✅      | ✅      | ❌     | ❌     | ✅      | ✅      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+    | decimal64             | ❌     | ❌     | ✅     | ✅     | ❌      | ❌     | ❌     | ✅     | ❌     | ❌      | ❌      | ❌     | ❌     | ❌      | ❌      |
+    +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+
+
+**Notes:**
+
+* [¹] - Not GPU-accelerated.
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 14b71ad5528..e64aecf9dea 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -2201,7 +2201,7 @@ def get_json_object(self, json_path):
         >>> import cudf
         >>> s = cudf.Series(
             [
-                \"\"\"
+                \\"\\"\\"
                 {
                     "store":{
                         "book":[
@@ -2220,13 +2220,13 @@ def get_json_object(self, json_path):
                         ]
                     }
                 }
-                \"\"\"
+                \\"\\"\\"
             ])
         >>> s
-            0    {"store": {\n        "book": [\n        { "cat...
+            0    {"store": {\\n        "book": [\\n        { "cat...
             dtype: object
         >>> s.str.get_json_object("$.store.book")
-            0    [\n        { "category": "reference",\n       ...
+            0    [\\n        { "category": "reference",\\n       ...
             dtype: object
         """
 

From 9a063b650636a623452a16ffb74cd6f1f365c97e Mon Sep 17 00:00:00 2001
From: Conor Hoekstra <36027403+codereport@users.noreply.github.com>
Date: Tue, 11 May 2021 14:29:36 -0400
Subject: [PATCH 03/11] Abstract Syntax Tree Cleanup and Tests (#7418)

Resolves https://github.com/rapidsai/cudf/issues/6320

Authors:
  - Conor Hoekstra (https://github.com/codereport)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Jake Hemstad (https://github.com/jrhemstad)
  - https://github.com/nvdbaranec

URL: https://github.com/rapidsai/cudf/pull/7418
---
 conda/recipes/libcudf/meta.yaml               |   2 +-
 cpp/include/cudf/ast/detail/linearizer.hpp    |   9 +-
 cpp/include/cudf/ast/detail/operators.hpp     |  47 -------
 cpp/include/cudf/ast/detail/transform.cuh     | 119 ++++++++++++------
 .../cudf/ast/{linearizer.hpp => nodes.hpp}    |  38 ++----
 cpp/include/cudf/ast/transform.hpp            |   2 +-
 cpp/src/ast/linearizer.cpp                    |   2 +-
 cpp/src/ast/transform.cu                      |  75 +++--------
 cpp/tests/ast/transform_tests.cpp             |  66 ++++++++++
 9 files changed, 177 insertions(+), 183 deletions(-)
 rename cpp/include/cudf/ast/{linearizer.hpp => nodes.hpp} (90%)

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 93644c8b213..ffd30758a50 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -53,7 +53,7 @@ test:
     - test -f $PREFIX/include/cudf/ast/transform.hpp
     - test -f $PREFIX/include/cudf/ast/detail/linearizer.hpp
     - test -f $PREFIX/include/cudf/ast/detail/operators.hpp
-    - test -f $PREFIX/include/cudf/ast/linearizer.hpp
+    - test -f $PREFIX/include/cudf/ast/nodes.hpp
     - test -f $PREFIX/include/cudf/ast/operators.hpp
     - test -f $PREFIX/include/cudf/binaryop.hpp
     - test -f $PREFIX/include/cudf/labeling/label_bins.hpp
diff --git a/cpp/include/cudf/ast/detail/linearizer.hpp b/cpp/include/cudf/ast/detail/linearizer.hpp
index 68319a24e5d..166a0408703 100644
--- a/cpp/include/cudf/ast/detail/linearizer.hpp
+++ b/cpp/include/cudf/ast/detail/linearizer.hpp
@@ -83,10 +83,7 @@ class linearizer;
  * This class is a part of a "visitor" pattern with the `linearizer` class.
  * Nodes inheriting from this class can accept visitors.
  */
-class node {
-  friend class detail::linearizer;
-
- private:
+struct node {
   virtual cudf::size_type accept(detail::linearizer& visitor) const = 0;
 };
 
@@ -102,10 +99,6 @@ class node {
  * resolved into intermediate data storage in shared memory.
  */
 class linearizer {
-  friend class literal;
-  friend class column_reference;
-  friend class expression;
-
  public:
   /**
    * @brief Construct a new linearizer object
diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp
index 27bcb0d320b..8ae60f96997 100644
--- a/cpp/include/cudf/ast/detail/operators.hpp
+++ b/cpp/include/cudf/ast/detail/operators.hpp
@@ -753,43 +753,6 @@ struct operator_functor<ast_operator::NOT> {
   }
 };
 
-#if 0
-/**
- * @brief Functor used to double-type-dispatch binary operators.
- *
- * This functor's `operator()` is templated to validate calls to its operators based on the input
- * type, as determined by the `is_valid_binary_op` trait.
- *
- * @tparam OperatorFunctor Binary operator functor.
- */
-template <typename OperatorFunctor>
-struct double_dispatch_binary_operator_types {
-  template <typename LHS,
-            typename RHS,
-            typename F,
-            typename... Ts,
-            std::enable_if_t<is_valid_binary_op<OperatorFunctor, LHS, RHS>>* = nullptr>
-  CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args)
-  {
-    f.template operator()<OperatorFunctor, LHS, RHS>(std::forward<Ts>(args)...);
-  }
-
-  template <typename LHS,
-            typename RHS,
-            typename F,
-            typename... Ts,
-            std::enable_if_t<!is_valid_binary_op<OperatorFunctor, LHS, RHS>>* = nullptr>
-  CUDA_HOST_DEVICE_CALLABLE void operator()(F&& f, Ts&&... args)
-  {
-#ifndef __CUDA_ARCH__
-    CUDF_FAIL("Invalid binary operation.");
-#else
-    cudf_assert(false && "Invalid binary operation.");
-#endif
-  }
-};
-#endif
-
 /**
  * @brief Functor used to single-type-dispatch binary operators.
  *
@@ -856,16 +819,6 @@ struct type_dispatch_binary_op {
                                             F&& f,
                                             Ts&&... args)
   {
-#if 0
-    // Double dispatch
-    /*
-    double_type_dispatcher(lhs_type,
-                           rhs_type,
-                           detail::double_dispatch_binary_operator_types<operator_functor<op>>{},
-                           std::forward<F>(f),
-                           std::forward<Ts>(args)...);
-    */
-#endif
     // Single dispatch (assume lhs_type == rhs_type)
     type_dispatcher(lhs_type,
                     detail::single_dispatch_binary_operator_types<operator_functor<op>>{},
diff --git a/cpp/include/cudf/ast/detail/transform.cuh b/cpp/include/cudf/ast/detail/transform.cuh
index f9d7426e2e4..f69927a3601 100644
--- a/cpp/include/cudf/ast/detail/transform.cuh
+++ b/cpp/include/cudf/ast/detail/transform.cuh
@@ -15,8 +15,9 @@
  */
 #pragma once
 
+#include <cudf/ast/detail/linearizer.hpp>
 #include <cudf/ast/detail/operators.hpp>
-#include <cudf/ast/linearizer.hpp>
+#include <cudf/ast/nodes.hpp>
 #include <cudf/ast/operators.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
@@ -25,6 +26,7 @@
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
 #include <cudf/utilities/traits.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -155,10 +157,11 @@ struct row_evaluator {
    * storing intermediates.
    * @param output_column The output column where results are stored.
    */
-  __device__ row_evaluator(table_device_view const& table,
-                           const cudf::detail::fixed_width_scalar_device_view_base* literals,
-                           std::int64_t* thread_intermediate_storage,
-                           mutable_column_device_view* output_column)
+  __device__ row_evaluator(
+    table_device_view const& table,
+    device_span<const cudf::detail::fixed_width_scalar_device_view_base> literals,
+    std::int64_t* thread_intermediate_storage,
+    mutable_column_device_view* output_column)
     : table(table),
       literals(literals),
       thread_intermediate_storage(thread_intermediate_storage),
@@ -264,7 +267,7 @@ struct row_evaluator {
 
  private:
   table_device_view const& table;
-  const cudf::detail::fixed_width_scalar_device_view_base* literals;
+  device_span<const cudf::detail::fixed_width_scalar_device_view_base> literals;
   std::int64_t* thread_intermediate_storage;
   mutable_column_device_view* output_column;
 };
@@ -298,15 +301,15 @@ __device__ void row_output::resolve_output(detail::device_data_reference device_
  * @param num_operators Number of operators.
  * @param row_index Row index of data column(s).
  */
-__device__ void evaluate_row_expression(detail::row_evaluator const& evaluator,
-                                        const detail::device_data_reference* data_references,
-                                        const ast_operator* operators,
-                                        const cudf::size_type* operator_source_indices,
-                                        cudf::size_type num_operators,
-                                        cudf::size_type row_index)
+__device__ void evaluate_row_expression(
+  detail::row_evaluator const& evaluator,
+  device_span<const detail::device_data_reference> data_references,
+  device_span<const ast_operator> operators,
+  device_span<const cudf::size_type> operator_source_indices,
+  cudf::size_type row_index)
 {
-  auto operator_source_index = cudf::size_type(0);
-  for (cudf::size_type operator_index(0); operator_index < num_operators; operator_index++) {
+  auto operator_source_index = static_cast<cudf::size_type>(0);
+  for (cudf::size_type operator_index = 0; operator_index < operators.size(); operator_index++) {
     // Execute operator
     auto const op    = operators[operator_index];
     auto const arity = ast_operator_arity(op);
@@ -336,41 +339,79 @@ __device__ void evaluate_row_expression(detail::row_evaluator const& evaluator,
   }
 }
 
+/**
+ * @brief The AST plan creates a device buffer of data needed to execute an AST.
+ *
+ * On construction, an AST plan creates a single "packed" host buffer of all necessary data arrays,
+ * and copies that to the device with a single host-device memory copy. Because the plan tends to be
+ * small, this is the most efficient approach for low latency.
+ *
+ */
 struct ast_plan {
- public:
-  ast_plan() : sizes(), data_pointers() {}
+  ast_plan(linearizer const& expr_linearizer,
+           rmm::cuda_stream_view stream,
+           rmm::mr::device_memory_resource* mr)
+    : _sizes{}, _data_pointers{}
+  {
+    add_to_plan(expr_linearizer.data_references());
+    add_to_plan(expr_linearizer.literals());
+    add_to_plan(expr_linearizer.operators());
+    add_to_plan(expr_linearizer.operator_source_indices());
+
+    // Create device buffer
+    auto const buffer_size = std::accumulate(_sizes.cbegin(), _sizes.cend(), 0);
+    auto buffer_offsets    = std::vector<int>(_sizes.size());
+    thrust::exclusive_scan(_sizes.cbegin(), _sizes.cend(), buffer_offsets.begin(), 0);
+
+    auto h_data_buffer = std::make_unique<char[]>(buffer_size);
+    for (unsigned int i = 0; i < _data_pointers.size(); ++i) {
+      std::memcpy(h_data_buffer.get() + buffer_offsets[i], _data_pointers[i], _sizes[i]);
+    }
 
-  using buffer_type = std::pair<std::unique_ptr<char[]>, int>;
+    _device_data_buffer = rmm::device_buffer(h_data_buffer.get(), buffer_size, stream, mr);
+
+    stream.synchronize();
+
+    // Create device pointers to components of plan
+    auto device_data_buffer_ptr = static_cast<const char*>(_device_data_buffer.data());
+    _device_data_references     = device_span<const detail::device_data_reference>(
+      reinterpret_cast<const detail::device_data_reference*>(device_data_buffer_ptr +
+                                                             buffer_offsets[0]),
+      expr_linearizer.data_references().size());
+    _device_literals = device_span<const cudf::detail::fixed_width_scalar_device_view_base>(
+      reinterpret_cast<const cudf::detail::fixed_width_scalar_device_view_base*>(
+        device_data_buffer_ptr + buffer_offsets[1]),
+      expr_linearizer.literals().size());
+    _device_operators = device_span<const ast_operator>(
+      reinterpret_cast<const ast_operator*>(device_data_buffer_ptr + buffer_offsets[2]),
+      expr_linearizer.operators().size());
+    _device_operator_source_indices = device_span<const cudf::size_type>(
+      reinterpret_cast<const cudf::size_type*>(device_data_buffer_ptr + buffer_offsets[3]),
+      expr_linearizer.operator_source_indices().size());
+  }
 
+  /**
+   * @brief Helper function for adding components (operators, literals, etc) to AST plan
+   *
+   * @tparam T  The underlying type of the input `std::vector`
+   * @param  v  The `std::vector` containing components (operators, literals, etc)
+   */
   template <typename T>
   void add_to_plan(std::vector<T> const& v)
   {
     auto const data_size = sizeof(T) * v.size();
-    sizes.push_back(data_size);
-    data_pointers.push_back(v.data());
+    _sizes.push_back(data_size);
+    _data_pointers.push_back(v.data());
   }
 
-  buffer_type get_host_data_buffer() const
-  {
-    auto const total_size = std::accumulate(sizes.cbegin(), sizes.cend(), 0);
-    auto host_data_buffer = std::make_unique<char[]>(total_size);
-    auto const offsets    = get_offsets();
-    for (unsigned int i = 0; i < data_pointers.size(); ++i) {
-      std::memcpy(host_data_buffer.get() + offsets[i], data_pointers[i], sizes[i]);
-    }
-    return std::make_pair(std::move(host_data_buffer), total_size);
-  }
+  std::vector<cudf::size_type> _sizes;
+  std::vector<const void*> _data_pointers;
 
-  std::vector<cudf::size_type> get_offsets() const
-  {
-    auto offsets = std::vector<int>(sizes.size());
-    thrust::exclusive_scan(sizes.cbegin(), sizes.cend(), offsets.begin(), 0);
-    return offsets;
-  }
-
- private:
-  std::vector<cudf::size_type> sizes;
-  std::vector<const void*> data_pointers;
+  rmm::device_buffer _device_data_buffer;
+  device_span<const detail::device_data_reference> _device_data_references;
+  device_span<const cudf::detail::fixed_width_scalar_device_view_base> _device_literals;
+  device_span<const ast_operator> _device_operators;
+  device_span<const cudf::size_type> _device_operator_source_indices;
 };
 
 /**
diff --git a/cpp/include/cudf/ast/linearizer.hpp b/cpp/include/cudf/ast/nodes.hpp
similarity index 90%
rename from cpp/include/cudf/ast/linearizer.hpp
rename to cpp/include/cudf/ast/nodes.hpp
index e5ccb2e8069..70dda58816e 100644
--- a/cpp/include/cudf/ast/linearizer.hpp
+++ b/cpp/include/cudf/ast/nodes.hpp
@@ -38,17 +38,10 @@ enum class table_reference {
   OUTPUT  // Column index in the output table
 };
 
-// Forward declaration
-class literal;
-class column_reference;
-class expression;
-
 /**
  * @brief A literal value used in an abstract syntax tree.
  */
 class literal : public detail::node {
-  friend class detail::linearizer;
-
  public:
   /**
    * @brief Construct a new literal object.
@@ -90,7 +83,6 @@ class literal : public detail::node {
    */
   cudf::data_type get_data_type() const { return get_value().type(); }
 
- private:
   /**
    * @brief Get the value object.
    *
@@ -106,6 +98,7 @@ class literal : public detail::node {
    */
   cudf::size_type accept(detail::linearizer& visitor) const override;
 
+ private:
   const cudf::detail::fixed_width_scalar_device_view_base value;
 };
 
@@ -113,8 +106,6 @@ class literal : public detail::node {
  * @brief A node referring to data from a column in a table.
  */
 class column_reference : public detail::node {
-  friend class detail::linearizer;
-
  public:
   /**
    * @brief Construct a new column reference object
@@ -175,7 +166,6 @@ class column_reference : public detail::node {
     return table.column(get_column_index()).type();
   }
 
- private:
   /**
    * @brief Accepts a visitor class.
    *
@@ -184,6 +174,7 @@ class column_reference : public detail::node {
    */
   cudf::size_type accept(detail::linearizer& visitor) const override;
 
+ private:
   cudf::size_type column_index;
   table_reference table_source;
 };
@@ -192,8 +183,6 @@ class column_reference : public detail::node {
  * @brief An expression node holds an operator and zero or more operands.
  */
 class expression : public detail::node {
-  friend class detail::linearizer;
-
  public:
   /**
    * @brief Construct a new unary expression object.
@@ -208,11 +197,6 @@ class expression : public detail::node {
     }
   }
 
-  /**
-   * @brief `expression` doesn't accept r-value references for expression nodes
-   */
-  expression(ast_operator op, node&& input) = delete;
-
   /**
    * @brief Construct a new binary expression object.
    *
@@ -227,19 +211,11 @@ class expression : public detail::node {
     }
   }
 
-  /**
-   * @brief `expression` doesn't accept r-value references for expression nodes
-   */
-  expression(ast_operator op, node&& left, node&& right) = delete;
-
-  /**
-   * @brief `expression` doesn't accept r-value references for expression nodes
-   */
+  // expression only stores references to nodes, so it does not accept r-value
+  // references: the calling code must own the nodes.
+  expression(ast_operator op, node&& input)                   = delete;
+  expression(ast_operator op, node&& left, node&& right)      = delete;
   expression(ast_operator op, node&& left, node const& right) = delete;
-
-  /**
-   * @brief `expression` doesn't accept r-value references for expression nodes
-   */
   expression(ast_operator op, node const& left, node&& right) = delete;
 
   /**
@@ -256,7 +232,6 @@ class expression : public detail::node {
    */
   std::vector<std::reference_wrapper<const node>> get_operands() const { return operands; }
 
- private:
   /**
    * @brief Accepts a visitor class.
    *
@@ -265,6 +240,7 @@ class expression : public detail::node {
    */
   cudf::size_type accept(detail::linearizer& visitor) const override;
 
+ private:
   const ast_operator op;
   const std::vector<std::reference_wrapper<const node>> operands;
 };
diff --git a/cpp/include/cudf/ast/transform.hpp b/cpp/include/cudf/ast/transform.hpp
index 513f92ea251..59697e5f75c 100644
--- a/cpp/include/cudf/ast/transform.hpp
+++ b/cpp/include/cudf/ast/transform.hpp
@@ -15,7 +15,7 @@
  */
 #pragma once
 
-#include <cudf/ast/linearizer.hpp>
+#include <cudf/ast/nodes.hpp>
 #include <cudf/table/table_view.hpp>
 
 namespace cudf {
diff --git a/cpp/src/ast/linearizer.cpp b/cpp/src/ast/linearizer.cpp
index cc70845e1ff..66a32ead35e 100644
--- a/cpp/src/ast/linearizer.cpp
+++ b/cpp/src/ast/linearizer.cpp
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include <cudf/ast/detail/linearizer.hpp>
-#include <cudf/ast/linearizer.hpp>
+#include <cudf/ast/nodes.hpp>
 #include <cudf/ast/operators.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
diff --git a/cpp/src/ast/transform.cu b/cpp/src/ast/transform.cu
index bc055d46869..43d3bde97c2 100644
--- a/cpp/src/ast/transform.cu
+++ b/cpp/src/ast/transform.cu
@@ -15,7 +15,7 @@
  */
 
 #include <cudf/ast/detail/transform.cuh>
-#include <cudf/ast/linearizer.hpp>
+#include <cudf/ast/nodes.hpp>
 #include <cudf/ast/operators.hpp>
 #include <cudf/ast/transform.hpp>
 #include <cudf/column/column_device_view.cuh>
@@ -61,27 +61,25 @@ namespace detail {
  * each thread.
  */
 template <cudf::size_type max_block_size>
-__launch_bounds__(max_block_size) __global__
-  void compute_column_kernel(table_device_view const table,
-                             const cudf::detail::fixed_width_scalar_device_view_base* literals,
-                             mutable_column_device_view output_column,
-                             const detail::device_data_reference* data_references,
-                             const ast_operator* operators,
-                             const cudf::size_type* operator_source_indices,
-                             cudf::size_type num_operators,
-                             cudf::size_type num_intermediates)
+__launch_bounds__(max_block_size) __global__ void compute_column_kernel(
+  table_device_view const table,
+  device_span<const cudf::detail::fixed_width_scalar_device_view_base> literals,
+  mutable_column_device_view output_column,
+  device_span<const detail::device_data_reference> data_references,
+  device_span<const ast_operator> operators,
+  device_span<const cudf::size_type> operator_source_indices,
+  cudf::size_type num_intermediates)
 {
   extern __shared__ std::int64_t intermediate_storage[];
   auto thread_intermediate_storage = &intermediate_storage[threadIdx.x * num_intermediates];
-  auto const start_idx             = cudf::size_type(threadIdx.x + blockIdx.x * blockDim.x);
-  auto const stride                = cudf::size_type(blockDim.x * gridDim.x);
-  auto const num_rows              = table.num_rows();
+  auto const start_idx = static_cast<cudf::size_type>(threadIdx.x + blockIdx.x * blockDim.x);
+  auto const stride    = static_cast<cudf::size_type>(blockDim.x * gridDim.x);
   auto const evaluator =
     cudf::ast::detail::row_evaluator(table, literals, thread_intermediate_storage, &output_column);
 
-  for (cudf::size_type row_index = start_idx; row_index < num_rows; row_index += stride) {
+  for (cudf::size_type row_index = start_idx; row_index < table.num_rows(); row_index += stride) {
     evaluate_row_expression(
-      evaluator, data_references, operators, operator_source_indices, num_operators, row_index);
+      evaluator, data_references, operators, operator_source_indices, row_index);
   }
 }
 
@@ -90,40 +88,8 @@ std::unique_ptr<column> compute_column(table_view const table,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
 {
-  // Linearize the AST
-  auto const expr_linearizer         = linearizer(expr, table);
-  auto const data_references         = expr_linearizer.data_references();
-  auto const literals                = expr_linearizer.literals();
-  auto const operators               = expr_linearizer.operators();
-  auto const num_operators           = cudf::size_type(operators.size());
-  auto const operator_source_indices = expr_linearizer.operator_source_indices();
-  auto const expr_data_type          = expr_linearizer.root_data_type();
-
-  // Create ast_plan and device buffer
-  auto plan = ast_plan();
-  plan.add_to_plan(data_references);
-  plan.add_to_plan(literals);
-  plan.add_to_plan(operators);
-  plan.add_to_plan(operator_source_indices);
-  auto const host_data_buffer = plan.get_host_data_buffer();
-  auto const buffer_offsets   = plan.get_offsets();
-  auto const buffer_size      = host_data_buffer.second;
-  auto device_data_buffer =
-    rmm::device_buffer(host_data_buffer.first.get(), buffer_size, stream, mr);
-  // To reduce overhead, we don't call a stream sync here.
-  // The stream is synced later when the table_device_view is created.
-
-  // Create device pointers to components of plan
-  auto const device_data_buffer_ptr = static_cast<const char*>(device_data_buffer.data());
-  auto const device_data_references = reinterpret_cast<const detail::device_data_reference*>(
-    device_data_buffer_ptr + buffer_offsets[0]);
-  auto const device_literals =
-    reinterpret_cast<const cudf::detail::fixed_width_scalar_device_view_base*>(
-      device_data_buffer_ptr + buffer_offsets[1]);
-  auto const device_operators =
-    reinterpret_cast<const ast_operator*>(device_data_buffer_ptr + buffer_offsets[2]);
-  auto const device_operator_source_indices =
-    reinterpret_cast<const cudf::size_type*>(device_data_buffer_ptr + buffer_offsets[3]);
+  auto const expr_linearizer = linearizer(expr, table);                // Linearize the AST
+  auto const plan            = ast_plan{expr_linearizer, stream, mr};  // Create ast_plan
 
   // Create table device view
   auto table_device         = table_device_view::create(table, stream);
@@ -131,7 +97,7 @@ std::unique_ptr<column> compute_column(table_view const table,
 
   // Prepare output column
   auto output_column = cudf::make_fixed_width_column(
-    expr_data_type, table_num_rows, mask_state::UNALLOCATED, stream, mr);
+    expr_linearizer.root_data_type(), table_num_rows, mask_state::UNALLOCATED, stream, mr);
   auto mutable_output_device =
     cudf::mutable_column_device_view::create(output_column->mutable_view(), stream);
 
@@ -155,12 +121,11 @@ std::unique_ptr<column> compute_column(table_view const table,
   cudf::ast::detail::compute_column_kernel<MAX_BLOCK_SIZE>
     <<<config.num_blocks, config.num_threads_per_block, shmem_size_per_block, stream.value()>>>(
       *table_device,
-      device_literals,
+      plan._device_literals,
       *mutable_output_device,
-      device_data_references,
-      device_operators,
-      device_operator_source_indices,
-      num_operators,
+      plan._device_data_references,
+      plan._device_operators,
+      plan._device_operator_source_indices,
       num_intermediates);
   CHECK_CUDA(stream.value());
   return output_column;
diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp
index 8f4a46e2a54..74937d4deea 100644
--- a/cpp/tests/ast/transform_tests.cpp
+++ b/cpp/tests/ast/transform_tests.cpp
@@ -18,6 +18,7 @@
 #include <cudf/ast/transform.hpp>
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
+#include <cudf/detail/iterator.cuh>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
 #include <cudf/scalar/scalar_factories.hpp>
@@ -30,6 +31,8 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
 
+#include <thrust/iterator/counting_iterator.h>
+
 #include <limits>
 #include <type_traits>
 
@@ -55,6 +58,22 @@ TEST_F(TransformTest, BasicAddition)
   cudf::test::expect_columns_equal(expected, result->view(), true);
 }
 
+TEST_F(TransformTest, BasicAdditionLarge)
+{
+  auto a     = thrust::make_counting_iterator(0);
+  auto col   = column_wrapper<int32_t>(a, a + 2000);
+  auto table = cudf::table_view{{col, col}};
+
+  auto col_ref    = cudf::ast::column_reference(0);
+  auto expression = cudf::ast::expression(cudf::ast::ast_operator::ADD, col_ref, col_ref);
+
+  auto b        = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
+  auto expected = column_wrapper<int32_t>(b, b + 2000);
+  auto result   = cudf::ast::compute_column(table, expression);
+
+  cudf::test::expect_columns_equal(expected, result->view(), true);
+}
+
 TEST_F(TransformTest, LessComparator)
 {
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
@@ -71,6 +90,25 @@ TEST_F(TransformTest, LessComparator)
   cudf::test::expect_columns_equal(expected, result->view(), true);
 }
 
+TEST_F(TransformTest, LessComparatorLarge)
+{
+  auto a     = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
+  auto b     = thrust::make_counting_iterator(500);
+  auto c_0   = column_wrapper<int32_t>(a, a + 2000);
+  auto c_1   = column_wrapper<int32_t>(b, b + 2000);
+  auto table = cudf::table_view{{c_0, c_1}};
+
+  auto col_ref_0  = cudf::ast::column_reference(0);
+  auto col_ref_1  = cudf::ast::column_reference(1);
+  auto expression = cudf::ast::expression(cudf::ast::ast_operator::LESS, col_ref_0, col_ref_1);
+
+  auto c        = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i < 500; });
+  auto expected = column_wrapper<bool>(c, c + 2000);
+  auto result   = cudf::ast::compute_column(table, expression);
+
+  cudf::test::expect_columns_equal(expected, result->view(), true);
+}
+
 TEST_F(TransformTest, MultiLevelTreeArithmetic)
 {
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
@@ -97,6 +135,34 @@ TEST_F(TransformTest, MultiLevelTreeArithmetic)
   cudf::test::expect_columns_equal(expected, result->view(), true);
 }
 
+TEST_F(TransformTest, MultiLevelTreeArithmeticLarge)
+{
+  using namespace cudf::ast;
+
+  auto a     = thrust::make_counting_iterator(0);
+  auto b     = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i + 1; });
+  auto c     = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2; });
+  auto c_0   = column_wrapper<int32_t>(a, a + 2000);
+  auto c_1   = column_wrapper<int32_t>(b, b + 2000);
+  auto c_2   = column_wrapper<int32_t>(c, c + 2000);
+  auto table = cudf::table_view{{c_0, c_1, c_2}};
+
+  auto col_ref_0 = column_reference(0);
+  auto col_ref_1 = column_reference(1);
+  auto col_ref_2 = column_reference(2);
+
+  auto expr_left_subtree  = expression(cudf::ast::ast_operator::MUL, col_ref_0, col_ref_1);
+  auto expr_right_subtree = expression(cudf::ast::ast_operator::ADD, col_ref_2, col_ref_0);
+  auto expr_tree          = expression(ast_operator::SUB, expr_left_subtree, expr_right_subtree);
+
+  auto result = cudf::ast::compute_column(table, expr_tree);
+  auto calc   = [](auto i) { return (i * (i + 1)) - (i + (i * 2)); };
+  auto d      = cudf::detail::make_counting_transform_iterator(0, [&](auto i) { return calc(i); });
+  auto expected = column_wrapper<int32_t>(d, d + 2000);
+
+  cudf::test::expect_columns_equal(expected, result->view(), true);
+}
+
 TEST_F(TransformTest, ImbalancedTreeArithmetic)
 {
   auto c_0   = column_wrapper<double>{0.15, 0.37, 4.2, 21.3};

From 2c70f1d5084c12bd04f676b95d5f20cb53f67250 Mon Sep 17 00:00:00 2001
From: Raza Jafri <razajafri@users.noreply.github.com>
Date: Tue, 11 May 2021 11:47:34 -0700
Subject: [PATCH 04/11] Closed column view to avoid memory leak (#8202)

fixes https://github.com/rapidsai/cudf/issues/8177

Authors:
  - Raza Jafri (https://github.com/razajafri)

Approvers:
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/8202
---
 .../main/java/ai/rapids/cudf/ColumnView.java  | 37 ++++++++++++-------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 5a5c3d3f687..90077829898 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -1457,23 +1457,32 @@ public ColumnView replaceChildrenWithViews(int[] indices,
       map.put(indices[index], views[index]);
     });
     List<ColumnView> newChildren = new ArrayList<>(getNumChildren());
-    IntStream.range(0, getNumChildren()).forEach(i -> {
-      ColumnView view = map.remove(i);
-      ColumnView child = getChildColumnView(i);
-      if (view == null) {
-        newChildren.add(child);
-      } else {
-        if (child.getRowCount() != view.getRowCount()) {
-          throw new IllegalArgumentException("Child row count doesn't match the old child");
+    List<ColumnView> toClose = new ArrayList<>(getNumChildren());
+    try {
+      IntStream.range(0, getNumChildren()).forEach(i -> {
+        ColumnView view = map.remove(i);
+        ColumnView child = getChildColumnView(i);
+        toClose.add(child);
+        if (view == null) {
+          newChildren.add(child);
+        } else {
+          if (child.getRowCount() != view.getRowCount()) {
+            throw new IllegalArgumentException("Child row count doesn't match the old child");
+          }
+          newChildren.add(view);
         }
-        newChildren.add(view);
+      });
+      if (!map.isEmpty()) {
+        throw new IllegalArgumentException("One or more invalid child indices passed to be " +
+            "replaced");
+      }
+      return new ColumnView(type, getRowCount(), Optional.of(getNullCount()), getValid(),
+          getOffsets(), newChildren.stream().toArray(n -> new ColumnView[n]));
+    } finally {
+      for (ColumnView columnView: toClose) {
+        columnView.close();
       }
-    });
-    if (!map.isEmpty()) {
-      throw new IllegalArgumentException("One or more invalid child indices passed to be replaced");
     }
-    return new ColumnView(type, getRowCount(), Optional.of(getNullCount()), getValid(),
-        getOffsets(), newChildren.stream().toArray(n -> new ColumnView[n]));
   }
 
   /**

From ae08422ac7b7b9f693579370652cf6de0891ba71 Mon Sep 17 00:00:00 2001
From: Christopher Harris <charris@nvidia.com>
Date: Tue, 11 May 2021 18:00:54 -0500
Subject: [PATCH 05/11] patch thrust to fix intmax num elements limitation in
 scan_by_key (#8199)

same fix seen here, but via patch: https://github.com/NVIDIA/thrust/pull/1424

Also fixes https://github.com/rapidsai/cuspatial/issues/393

Alternatively, we could wait and update our thrust version, rather than patching the existing one.

Authors:
  - Christopher Harris (https://github.com/cwharris)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Paul Taylor (https://github.com/trxcllnt)

URL: https://github.com/rapidsai/cudf/pull/8199
---
 cpp/cmake/thrust.patch | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/cpp/cmake/thrust.patch b/cpp/cmake/thrust.patch
index 3f876f7ffb7..3cedff8b80d 100644
--- a/cpp/cmake/thrust.patch
+++ b/cpp/cmake/thrust.patch
@@ -42,3 +42,25 @@ index 1ffeef0..5e80800 100644
            for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM)
            {
              if (ITEMS_PER_THREAD * tid + ITEM < num_remaining)
+diff --git a/thrust/system/cuda/detail/scan_by_key.h b/thrust/system/cuda/detail/scan_by_key.h
+index fe4b321c..b3974c69 100644
+--- a/thrust/system/cuda/detail/scan_by_key.h
++++ b/thrust/system/cuda/detail/scan_by_key.h
+@@ -513,7 +513,7 @@ namespace __scan_by_key {
+             scan_op(scan_op_)
+       {
+         int  tile_idx      = blockIdx.x;
+-        Size tile_base     = ITEMS_PER_TILE * tile_idx;
++        Size tile_base     = ITEMS_PER_TILE * static_cast<Size>(tile_idx);
+         Size num_remaining = num_items - tile_base;
+ 
+         if (num_remaining > ITEMS_PER_TILE)
+@@ -734,7 +734,7 @@ namespace __scan_by_key {
+                              ScanOp                     scan_op,
+                              AddInitToScan              add_init_to_scan)
+   {
+-    int          num_items    = static_cast<int>(thrust::distance(keys_first, keys_last));
++    size_t       num_items    = static_cast<size_t>(thrust::distance(keys_first, keys_last));
+     size_t       storage_size = 0;
+     cudaStream_t stream       = cuda_cub::stream(policy);
+     bool         debug_sync   = THRUST_DEBUG_SYNC_FLAG;

From e94daa0e2b67a0b394a23228d4585d0e399de0e2 Mon Sep 17 00:00:00 2001
From: Alfred Xu <lovedreamf@gmail.com>
Date: Wed, 12 May 2021 07:06:07 +0800
Subject: [PATCH 06/11] Fix concatenate_rows issue with lists of all empty
 strings (#8210)

Current PR is to fix a tiny bug in `compute_string_sizes_and_concatenate_lists_fn`, who serves concatenating list of string rows with nullify policy.  The bug can be triggered by some corner cases involving lists of all empty strings, such as:
 ```concatenate_with_nullifying_rows(["", ""], ["a", "b", "c"], ["d", "e"], ["f"])```

Authors:
  - Alfred Xu (https://github.com/sperlingxx)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Mark Harris (https://github.com/harrism)

URL: https://github.com/rapidsai/cudf/pull/8210
---
 cpp/src/lists/concatenate_rows.cu          |  2 +-
 cpp/tests/lists/concatenate_rows_tests.cpp | 38 ++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/cpp/src/lists/concatenate_rows.cu b/cpp/src/lists/concatenate_rows.cu
index 51df7255df9..8942bcc898d 100644
--- a/cpp/src/lists/concatenate_rows.cu
+++ b/cpp/src/lists/concatenate_rows.cu
@@ -224,8 +224,8 @@ struct compute_string_sizes_and_concatenate_lists_fn {
               start_byte;
             auto const output_ptr = d_chars + d_offsets[write_idx];
             thrust::copy(thrust::seq, input_ptr, input_ptr + end_byte - start_byte, output_ptr);
-            write_idx += end_str_idx - start_str_idx;
           }
+          write_idx += end_str_idx - start_str_idx;
         }
       });
   }
diff --git a/cpp/tests/lists/concatenate_rows_tests.cpp b/cpp/tests/lists/concatenate_rows_tests.cpp
index 131949ec1e9..5abaf99f739 100644
--- a/cpp/tests/lists/concatenate_rows_tests.cpp
+++ b/cpp/tests/lists/concatenate_rows_tests.cpp
@@ -286,6 +286,44 @@ TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsWithNulls)
   }
 }
 
+TEST_F(ListConcatenateRowsTest, SimpleInputStringsColumnsWithEmptyLists)
+{
+  auto const col1 =
+    StrListsCol{StrListsCol{{"" /*NULL*/}, null_at(0)}, StrListsCol{"One"}}.release();
+  auto const col2 = StrListsCol{
+    StrListsCol{{"Tomato", "" /*NULL*/, "Apple"}, null_at(1)},
+    StrListsCol{
+      "Two"}}.release();
+  auto const col3 =
+    StrListsCol{{StrListsCol{"Lemon", "Peach"}, StrListsCol{"Three"} /*NULL*/}, null_at(1)}
+      .release();
+
+  // Ignore null list elements
+  {
+    auto const results =
+      cudf::lists::concatenate_rows(TView{{col1->view(), col2->view(), col3->view()}});
+    auto const expected = StrListsCol{
+      StrListsCol{{"" /*NULL*/, "Tomato", "" /*NULL*/, "Apple", "Lemon", "Peach"}, null_at({0, 2})},
+      StrListsCol{"One",
+                  "Two"}}.release();
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
+  }
+
+  // Null list rows result in null list rows
+  {
+    auto const results =
+      cudf::lists::concatenate_rows(TView{{col1->view(), col2->view(), col3->view()}},
+                                    cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW);
+    auto const expected =
+      StrListsCol{{StrListsCol{{"" /*NULL*/, "Tomato", "" /*NULL*/, "Apple", "Lemon", "Peach"},
+                               null_at({0, 2})},
+                   StrListsCol{""} /*NULL*/},
+                  null_at(1)}
+        .release();
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *results, print_all);
+  }
+}
+
 TYPED_TEST(ListConcatenateRowsTypedTest, SlicedColumnsInputNoNull)
 {
   using ListsCol = cudf::test::lists_column_wrapper<TypeParam>;

From 3f064f9ace514ccd03c3de30c21efcce92dd6d3f Mon Sep 17 00:00:00 2001
From: MithunR <mythrocks@gmail.com>
Date: Tue, 11 May 2021 17:09:10 -0700
Subject: [PATCH 07/11] Account for offset columns in lists::contains() (#8204)

Fixes #8186.

This commit corrects the list rows' offset calculation in
`list_device_view`, by taking the parent column's offset
into account.
A test has been added to cover this case.

Authors:
  - MithunR (https://github.com/mythrocks)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Mark Harris (https://github.com/harrism)

URL: https://github.com/rapidsai/cudf/pull/8204
---
 cpp/include/cudf/lists/list_device_view.cuh   |  4 +-
 .../cudf/lists/lists_column_device_view.cuh   |  6 +++
 cpp/tests/copying/scatter_list_tests.cpp      | 36 +++++++++++++
 cpp/tests/lists/contains_tests.cpp            | 50 +++++++++++++++++++
 4 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/lists/list_device_view.cuh b/cpp/include/cudf/lists/list_device_view.cuh
index 4f207474526..802639f2393 100644
--- a/cpp/include/cudf/lists/list_device_view.cuh
+++ b/cpp/include/cudf/lists/list_device_view.cuh
@@ -40,10 +40,10 @@ class list_device_view {
     cudf_assert(row_index >= 0 && row_index < lists_column.size() && row_index < offsets.size() &&
                 "row_index out of bounds");
 
-    begin_offset = offsets.element<size_type>(row_index);
+    begin_offset = offsets.element<size_type>(row_index + lists_column.offset());
     cudf_assert(begin_offset >= 0 && begin_offset <= lists_column.child().size() &&
                 "begin_offset out of bounds.");
-    _size = offsets.element<size_type>(row_index + 1) - begin_offset;
+    _size = offsets.element<size_type>(row_index + 1 + lists_column.offset()) - begin_offset;
   }
 
   ~list_device_view() = default;
diff --git a/cpp/include/cudf/lists/lists_column_device_view.cuh b/cpp/include/cudf/lists/lists_column_device_view.cuh
index 187b9c2cf6a..d8f082c9a42 100644
--- a/cpp/include/cudf/lists/lists_column_device_view.cuh
+++ b/cpp/include/cudf/lists/lists_column_device_view.cuh
@@ -75,6 +75,12 @@ class lists_column_device_view {
    */
   CUDA_DEVICE_CALLABLE bool is_null(size_type idx) const { return underlying.is_null(idx); }
 
+  /**
+   * @brief Fetches the offset of the underlying column_device_view,
+   *        in case it is a sliced/offset column.
+   */
+  CUDA_DEVICE_CALLABLE size_type offset() const { return underlying.offset(); }
+
  private:
   column_device_view underlying;
 };
diff --git a/cpp/tests/copying/scatter_list_tests.cpp b/cpp/tests/copying/scatter_list_tests.cpp
index 64fdf6d00d5..b60f875fdd9 100644
--- a/cpp/tests/copying/scatter_list_tests.cpp
+++ b/cpp/tests/copying/scatter_list_tests.cpp
@@ -18,6 +18,7 @@
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/copying.hpp>
+#include <cudf/detail/copy.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
@@ -62,6 +63,41 @@ TYPED_TEST(TypedScatterListsTest, ListsOfFixedWidth)
       {8, 8, 8}, {1, 1}, {9, 9, 9, 9}, {3, 3}, {4, 4}, {5, 5}, {6, 6}});
 }
 
+TYPED_TEST(TypedScatterListsTest, SlicedInputLists)
+{
+  using namespace cudf::test;
+  using T = TypeParam;
+
+  auto src_list_column =
+    lists_column_wrapper<T, int32_t>{{0, 0, 0, 0}, {9, 9, 9, 9}, {8, 8, 8}, {7, 7, 7}}.release();
+  auto src_sliced =
+    cudf::detail::slice(src_list_column->view(), {1, 3}, rmm::cuda_stream_default).front();
+
+  auto target_list_column =
+    lists_column_wrapper<T, int32_t>{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}
+      .release();
+
+  auto scatter_map = fixed_width_column_wrapper<cudf::size_type>{2, 0};
+
+  auto ret_1 = cudf::scatter(
+    cudf::table_view({src_sliced}), scatter_map, cudf::table_view({target_list_column->view()}));
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
+    ret_1->get_column(0),
+    lists_column_wrapper<T, int32_t>{
+      {8, 8, 8}, {1, 1}, {9, 9, 9, 9}, {3, 3}, {4, 4}, {5, 5}, {6, 6}});
+
+  auto target_sliced =
+    cudf::detail::slice(target_list_column->view(), {1, 6}, rmm::cuda_stream_default);
+
+  auto ret_2 =
+    cudf::scatter(cudf::table_view({src_sliced}), scatter_map, cudf::table_view({target_sliced}));
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
+    ret_2->get_column(0),
+    lists_column_wrapper<T, int32_t>{{8, 8, 8}, {2, 2}, {9, 9, 9, 9}, {4, 4}, {5, 5}});
+}
+
 TYPED_TEST(TypedScatterListsTest, EmptyListsOfFixedWidth)
 {
   using namespace cudf::test;
diff --git a/cpp/tests/lists/contains_tests.cpp b/cpp/tests/lists/contains_tests.cpp
index 7f8ae436a27..73194271a32 100644
--- a/cpp/tests/lists/contains_tests.cpp
+++ b/cpp/tests/lists/contains_tests.cpp
@@ -16,6 +16,7 @@
  */
 
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/copy.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/lists/contains.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
@@ -154,6 +155,55 @@ TYPED_TEST(TypedContainsTest, ListContainsScalarWithNullLists)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result);
 }
 
+TYPED_TEST(TypedContainsTest, SlicedLists)
+{
+  // Test sliced List columns.
+
+  using namespace cudf;
+
+  using T     = TypeParam;
+  using bools = fixed_width_column_wrapper<bool>;
+
+  auto search_space = lists_column_wrapper<T, int32_t>{
+    {{0, 1, 2},
+     {3, 4, 5},
+     {6, 7, 8},
+     {},
+     {9, 0, 1},
+     {2, 3, 4},
+     {5, 6, 7},
+     {8, 9, 0},
+     {},
+     {1, 2, 3},
+     {}},
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+      return (i != 3) && (i != 10);
+    })}.release();
+
+  auto sliced_column_1 = cudf::detail::slice(search_space->view(), {1, 8}).front();
+
+  auto search_key_one = create_scalar_search_key<T>(1);
+  auto result_1       = lists::contains(sliced_column_1, *search_key_one);
+
+  auto expected_result_1 = bools{
+    {0, 0, 0, 1, 0, 0, 0}, cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+      return (i != 2);
+    })}.release();
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result_1->view(), result_1->view());
+
+  auto sliced_column_2 = cudf::detail::slice(search_space->view(), {3, 10}).front();
+
+  auto result_2 = lists::contains(sliced_column_2, *search_key_one);
+
+  auto expected_result_2 = bools{
+    {0, 1, 0, 0, 0, 0, 1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) {
+      return (i != 0);
+    })}.release();
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result_2->view(), result_2->view());
+}
+
 TYPED_TEST(TypedContainsTest, ListContainsScalarNonNullListsWithNullValues)
 {
   // Test List columns that have no NULL list rows, but NULL elements in some list rows.

From 188b630c89fa9b548d92c008e004dbd9d0036495 Mon Sep 17 00:00:00 2001
From: pxLi <pxli@nyu.edu>
Date: Wed, 12 May 2021 10:46:41 +0800
Subject: [PATCH 08/11] Bump up GDS user-space lib version to 0.95.1 (#8221)

Signed-off-by: Peixin Li <pxli@nyu.edu>

verified build locally

Authors:
  - pxLi (https://github.com/pxLi)

Approvers:
  - https://github.com/NvTimLiu

URL: https://github.com/rapidsai/cudf/pull/8221
---
 java/ci/Dockerfile.centos7 | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/java/ci/Dockerfile.centos7 b/java/ci/Dockerfile.centos7
index 4efc029cb47..a030366b86a 100644
--- a/java/ci/Dockerfile.centos7
+++ b/java/ci/Dockerfile.centos7
@@ -35,8 +35,8 @@ RUN cd /usr/local/ && wget --quiet https://github.com/Kitware/CMake/releases/dow
    tar zxf cmake-3.19.0-Linux-x86_64.tar.gz
 
 # get GDS user-space lib
-RUN cd /tmp/ && wget https://developer.download.nvidia.com/gds/redist/rel-0.95.0/gds-redistrib-0.95.0.tgz && \
-    tar zxf gds-redistrib-0.95.0.tgz && \
-    cp -R ./gds-redistrib-0.95.0/targets/x86_64-linux/lib/* /usr/local/cuda/targets/x86_64-linux/lib && \
-    cp -R ./gds-redistrib-0.95.0/targets/x86_64-linux/include/* /usr/local/cuda/targets/x86_64-linux/include && \
-    rm -rf gds-redistrib-0.95.0*
+RUN cd /tmp/ && wget https://developer.download.nvidia.com/gds/redist/rel-0.95.1/gds-redistrib-0.95.1.tgz && \
+    tar zxf gds-redistrib-0.95.1.tgz && \
+    cp -R ./gds-redistrib-0.95.1/targets/x86_64-linux/lib/* /usr/local/cuda/targets/x86_64-linux/lib && \
+    cp -R ./gds-redistrib-0.95.1/targets/x86_64-linux/include/* /usr/local/cuda/targets/x86_64-linux/include && \
+    rm -rf gds-redistrib-0.95.1*

From 667b9bc232b4d15a87565dccf997d3d404c09b39 Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com>
Date: Wed, 12 May 2021 10:45:56 -0700
Subject: [PATCH 09/11] Add Python bindings to list concatenation functions
 (#8087)

Fixes: #8078

Authors:
  - Sheilah Kirui (https://github.com/skirui-source)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/8087
---
 .../cudf/_lib/cpp/lists/concatenate_rows.pxd  | 12 ++++
 python/cudf/cudf/_lib/lists.pyx               | 18 ++++++
 python/cudf/cudf/core/column/lists.py         | 56 +++++++++++++++++++
 python/cudf/cudf/tests/test_list.py           | 17 ++++++
 4 files changed, 103 insertions(+)
 create mode 100644 python/cudf/cudf/_lib/cpp/lists/concatenate_rows.pxd

diff --git a/python/cudf/cudf/_lib/cpp/lists/concatenate_rows.pxd b/python/cudf/cudf/_lib/cpp/lists/concatenate_rows.pxd
new file mode 100644
index 00000000000..8c4dabf5168
--- /dev/null
+++ b/python/cudf/cudf/_lib/cpp/lists/concatenate_rows.pxd
@@ -0,0 +1,12 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+
+from cudf._lib.cpp.column.column cimport column
+from cudf._lib.cpp.table.table_view cimport table_view
+
+cdef extern from "cudf/lists/concatenate_rows.hpp" namespace \
+        "cudf::lists" nogil:
+    cdef unique_ptr[column] concatenate_rows(
+        const table_view input_table
+    ) except +
diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index 9bc0550bdf0..46f034dc525 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -16,6 +16,9 @@ from cudf._lib.cpp.lists.drop_list_duplicates cimport (
 from cudf._lib.cpp.lists.sorting cimport (
     sort_lists as cpp_sort_lists
 )
+from cudf._lib.cpp.lists.concatenate_rows cimport (
+    concatenate_rows as cpp_concatenate_rows
+)
 from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.column.column cimport column
@@ -28,6 +31,7 @@ from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport (
     size_type,
     null_equality,
+    null_policy,
     order,
     null_order,
     nan_equality
@@ -163,3 +167,17 @@ def contains_scalar(Column col, object py_search_key):
         ))
     result = Column.from_unique_ptr(move(c_result))
     return result
+
+
+def concatenate_rows(Table tbl):
+    cdef unique_ptr[column] c_result
+
+    cdef table_view c_table_view = tbl.view()
+
+    with nogil:
+        c_result = move(cpp_concatenate_rows(
+            c_table_view,
+        ))
+
+    result = Column.from_unique_ptr(move(c_result))
+    return result
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index ea783c3737a..7ea02c0e878 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -8,15 +8,19 @@
 import cudf
 from cudf._lib.copying import segmented_gather
 from cudf._lib.lists import (
+    concatenate_rows,
     contains_scalar,
     count_elements,
     drop_list_duplicates,
     extract_element,
     sort_lists,
 )
+from cudf._lib.table import Table
+from cudf._typing import BinaryOperand
 from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, as_column, column
 from cudf.core.column.methods import ColumnMethodsMixin
+from cudf.core.dtypes import ListDtype
 from cudf.utils.dtypes import is_list_dtype, is_numerical_dtype
 
 
@@ -74,6 +78,58 @@ def __sizeof__(self):
     def base_size(self):
         return len(self.base_children[0]) - 1
 
+    def binary_operator(
+        self, binop: str, other: BinaryOperand, reflect: bool = False
+    ) -> ColumnBase:
+        """
+        Calls a binary operator *binop* on operands *self*
+        and *other*.
+
+        Parameters
+        ----------
+        self, other : list columns
+
+        binop :  binary operator
+            Only "add" operator is currently being supported
+            for lists concatenation functions
+
+        reflect : boolean, default False
+            If ``reflect`` is ``True``, swap the order of
+            the operands.
+
+        Returns
+        -------
+        Series : the output dtype is determined by the
+            input operands.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> gdf = cudf.DataFrame({'val': [['a', 'a'], ['b'], ['c']]})
+        >>> gdf
+            val
+        0  [a, a]
+        1     [b]
+        2     [c]
+        >>> gdf['val'] + gdf['val']
+        0    [a, a, a, a]
+        1          [b, b]
+        2          [c, c]
+        Name: val, dtype: list
+
+        """
+
+        if isinstance(other.dtype, ListDtype):
+            if binop == "add":
+                return concatenate_rows(Table({0: self, 1: other}))
+            else:
+                raise NotImplementedError(
+                    "Lists concatenation for this operation is not yet"
+                    "supported"
+                )
+        else:
+            raise TypeError("can only concatenate list to list")
+
     @property
     def elements(self):
         """
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 9906600304b..5dcecc6c9e1 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -315,3 +315,20 @@ def test_contains_null_search_key(data, expect):
     expect = cudf.Series(expect, dtype="bool")
     got = sr.list.contains(cudf.Scalar(cudf.NA, sr.dtype.element_type))
     assert_eq(expect, got)
+
+
+def test_concatenate_rows_of_lists():
+    pdf = pd.DataFrame({"val": [["a", "a"], ["b"], ["c"]]})
+    gdf = cudf.from_pandas(pdf)
+
+    expect = pdf["val"] + pdf["val"]
+    got = gdf["val"] + gdf["val"]
+
+    assert_eq(expect, got)
+
+
+def test_concatenate_list_with_nonlist():
+    with pytest.raises(TypeError, match="can only concatenate list to list"):
+        gdf1 = cudf.DataFrame({"A": [["a", "c"], ["b", "d"], ["c", "d"]]})
+        gdf2 = cudf.DataFrame({"A": ["a", "b", "c"]})
+        gdf1["A"] + gdf2["A"]

From bda8457b8bae599297a9345e72ddd283a2abc8b1 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 12 May 2021 17:36:23 -0400
Subject: [PATCH 10/11] Update split-by-char to use input offsets column
 (#8195)

Closes #8094

This PR changes the `_split_by_character` utility to copy the input strings offsets column as the offsets column for the output lists column.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/8195
---
 python/cudf/cudf/core/column/string.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index e64aecf9dea..84dcad516df 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -601,10 +601,6 @@ def join(
         else:
             # If self._column is not a ListColumn, we will have to
             # split each row by character and create a ListColumn out of it.
-
-            # TODO: Remove this workaround after the following
-            # feature request is resolved
-            # FEA: https://github.com/rapidsai/cudf/issues/8094
             strings_column = self._split_by_character()
 
         if is_scalar(sep):
@@ -641,13 +637,7 @@ def join(
     def _split_by_character(self):
         result_col = cpp_character_tokenize(self._column)
 
-        bytes_count = cpp_count_bytes(self._column)
-        offset_col = cudf.core.column.column_empty(
-            row_count=len(bytes_count) + 1, dtype="int32"
-        )
-        offset_col[0] = 0
-        offset_col[1:] = bytes_count
-        offset_col = offset_col._apply_scan_op("sum")
+        offset_col = self._column.children[0]
 
         res = cudf.core.column.ListColumn(
             size=len(self._column),

From cdf09ad2552f8d7b13bd7fc47bc23f411a7d0187 Mon Sep 17 00:00:00 2001
From: Mark Harris <mharris@nvidia.com>
Date: Thu, 13 May 2021 08:37:29 +1000
Subject: [PATCH 11/11] Convert tests to use device_uvector (#8205)

Converts all remaining tests to use device_uvector instead of device_vector.

Contributes to #7287

Also converts a lot of `std::vector` in tests to `thrust::host_vector` to avoid problems with `vector<bool>`.

Adds a new utility `cudf::detail::make_host_vector_async` (and sync version) which creates a `thrust::host_vector<T>` from a `device_span`. Also makes it possible to create a `host_span` from a `std::string`.

Authors:
  - Mark Harris (https://github.com/harrism)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/8205
---
 .../detail/utilities/vector_factories.hpp     | 116 ++++++++--
 cpp/include/cudf/utilities/span.hpp           |   5 +
 cpp/include/cudf_test/type_lists.hpp          |  11 +-
 cpp/src/copying/concatenate.cu                |  11 +-
 cpp/tests/CMakeLists.txt                      |   4 +-
 .../{bitmask_tests.cu => bitmask_tests.cpp}   | 205 ++++++++++--------
 cpp/tests/bitmask/set_nullmask_tests.cu       |  26 ++-
 cpp/tests/column/compound_test.cu             |  43 ++--
 .../device_atomics/device_atomics_test.cu     |  72 +++---
 cpp/tests/fixed_point/fixed_point_tests.cu    |  26 ++-
 cpp/tests/hash_map/map_test.cu                |  84 ++++---
 cpp/tests/hash_map/multimap_test.cu           |  12 +-
 cpp/tests/iterator/iterator_tests.cuh         |  61 ++----
 .../optional_iterator_test_numeric.cu         |   2 +-
 cpp/tests/iterator/value_iterator_test.cuh    |   3 +-
 .../iterator/value_iterator_test_strings.cu   |  35 ++-
 cpp/tests/reductions/reduction_tests.cpp      |   1 -
 cpp/tests/replace/clamp_test.cpp              |   4 +-
 cpp/tests/replace/replace_nulls_tests.cpp     |  67 +++---
 cpp/tests/replace/replace_tests.cpp           |  98 +++++----
 cpp/tests/rolling/rolling_test.cpp            |   9 +-
 cpp/tests/scalar/scalar_device_view_test.cu   |   8 +-
 .../drop_duplicates_tests.cpp                 |   8 +-
 cpp/tests/strings/array_tests.cu              |  23 +-
 cpp/tests/strings/factories_test.cu           |  30 +--
 cpp/tests/strings/hash_string.cu              |  18 +-
 .../{integers_tests.cu => integers_tests.cpp} |  19 +-
 cpp/tests/table/table_view_tests.cu           |   5 +-
 cpp/tests/transform/row_bit_count_test.cu     |   8 +-
 cpp/tests/types/type_dispatcher_test.cu       |  16 +-
 cpp/tests/utilities/column_utilities.cu       |  33 +--
 cpp/tests/wrappers/timestamps_test.cu         |  17 +-
 32 files changed, 613 insertions(+), 467 deletions(-)
 rename cpp/tests/bitmask/{bitmask_tests.cu => bitmask_tests.cpp} (73%)
 rename cpp/tests/strings/{integers_tests.cu => integers_tests.cpp} (96%)

diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index db8e6e4a156..1e735719400 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -47,7 +47,7 @@ namespace detail {
 template <typename T>
 rmm::device_uvector<T> make_zeroed_device_uvector_async(
   std::size_t size,
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   rmm::device_uvector<T> ret(size, stream, mr);
@@ -93,7 +93,7 @@ rmm::device_uvector<T> make_zeroed_device_uvector_sync(
 template <typename T>
 rmm::device_uvector<T> make_device_uvector_async(
   host_span<T const> source_data,
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   rmm::device_uvector<T> ret(source_data.size(), stream, mr);
@@ -124,7 +124,7 @@ template <typename Container,
                                 host_span<typename Container::value_type const>>::value>* = nullptr>
 rmm::device_uvector<typename Container::value_type> make_device_uvector_async(
   Container const& c,
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   return make_device_uvector_async(host_span<typename Container::value_type const>{c}, stream, mr);
@@ -177,7 +177,7 @@ template <
     nullptr>
 rmm::device_uvector<typename Container::value_type> make_device_uvector_async(
   Container const& c,
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   return make_device_uvector_async(
@@ -281,6 +281,16 @@ rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
   return make_device_uvector_sync(device_span<typename Container::value_type const>{c}, stream, mr);
 }
 
+// Utility function template to allow copying to either a thrust::host_vector or std::vector
+template <typename T, typename OutContainer>
+OutContainer make_vector_async(device_span<T const> v, rmm::cuda_stream_view stream)
+{
+  OutContainer result(v.size());
+  CUDA_TRY(cudaMemcpyAsync(
+    result.data(), v.data(), v.size() * sizeof(T), cudaMemcpyDeviceToHost, stream.value()));
+  return result;
+}
+
 /**
  * @brief Asynchronously construct a `std::vector` containing a copy of data from a
  * `device_span`
@@ -293,13 +303,9 @@ rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
  * @return The data copied to the host
  */
 template <typename T>
-std::vector<T> make_std_vector_async(device_span<T const> v,
-                                     rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+std::vector<T> make_std_vector_async(device_span<T const> v, rmm::cuda_stream_view stream)
 {
-  std::vector<T> result(v.size());
-  CUDA_TRY(cudaMemcpyAsync(
-    result.data(), v.data(), v.size() * sizeof(T), cudaMemcpyDeviceToHost, stream.value()));
-  return result;
+  return make_vector_async<T, std::vector<T>>(v, stream);
 }
 
 /**
@@ -319,8 +325,8 @@ template <
   std::enable_if_t<
     std::is_convertible<Container, device_span<typename Container::value_type const>>::value>* =
     nullptr>
-std::vector<typename Container::value_type> make_std_vector_async(
-  Container const& c, rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+std::vector<typename Container::value_type> make_std_vector_async(Container const& c,
+                                                                  rmm::cuda_stream_view stream)
 {
   return make_std_vector_async(device_span<typename Container::value_type const>{c}, stream);
 }
@@ -337,8 +343,7 @@ std::vector<typename Container::value_type> make_std_vector_async(
  * @return The data copied to the host
  */
 template <typename T>
-std::vector<T> make_std_vector_sync(device_span<T const> v,
-                                    rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+std::vector<T> make_std_vector_sync(device_span<T const> v, rmm::cuda_stream_view stream)
 {
   auto result = make_std_vector_async(v, stream);
   stream.synchronize();
@@ -368,6 +373,89 @@ std::vector<typename Container::value_type> make_std_vector_sync(
   return make_std_vector_sync(device_span<typename Container::value_type const>{c}, stream);
 }
 
+/**
+ * @brief Asynchronously construct a `thrust::host_vector` containing a copy of data from a
+ * `device_span`
+ *
+ * @note This function does not synchronize `stream`.
+ *
+ * @tparam T The type of the data to copy
+ * @param source_data The device data to copy
+ * @param stream The stream on which to perform the copy
+ * @return The data copied to the host
+ */
+template <typename T>
+thrust::host_vector<T> make_host_vector_async(device_span<T const> v, rmm::cuda_stream_view stream)
+{
+  return make_vector_async<T, thrust::host_vector<T>>(v, stream);
+}
+
+/**
+ * @brief Asynchronously construct a `std::vector` containing a copy of data from a device
+ * container
+ *
+ * @note This function synchronizes `stream`.
+ *
+ * @tparam Container The type of the container to copy from
+ * @tparam T The type of the data to copy
+ * @param c The input device container from which to copy
+ * @param stream The stream on which to perform the copy
+ * @return The data copied to the host
+ */
+template <
+  typename Container,
+  std::enable_if_t<
+    std::is_convertible<Container, device_span<typename Container::value_type const>>::value>* =
+    nullptr>
+thrust::host_vector<typename Container::value_type> make_host_vector_async(
+  Container const& c, rmm::cuda_stream_view stream)
+{
+  return make_host_vector_async(device_span<typename Container::value_type const>{c}, stream);
+}
+
+/**
+ * @brief Synchronously construct a `std::vector` containing a copy of data from a
+ * `device_span`
+ *
+ * @note This function does a synchronize on `stream`.
+ *
+ * @tparam T The type of the data to copy
+ * @param source_data The device data to copy
+ * @param stream The stream on which to perform the copy
+ * @return The data copied to the host
+ */
+template <typename T>
+thrust::host_vector<T> make_host_vector_sync(
+  device_span<T const> v, rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+{
+  auto result = make_host_vector_async(v, stream);
+  stream.synchronize();
+  return result;
+}
+
+/**
+ * @brief Synchronously construct a `std::vector` containing a copy of data from a device
+ * container
+ *
+ * @note This function synchronizes `stream`.
+ *
+ * @tparam Container The type of the container to copy from
+ * @tparam T The type of the data to copy
+ * @param c The input device container from which to copy
+ * @param stream The stream on which to perform the copy
+ * @return The data copied to the host
+ */
+template <
+  typename Container,
+  std::enable_if_t<
+    std::is_convertible<Container, device_span<typename Container::value_type const>>::value>* =
+    nullptr>
+thrust::host_vector<typename Container::value_type> make_host_vector_sync(
+  Container const& c, rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+{
+  return make_host_vector_sync(device_span<typename Container::value_type const>{c}, stream);
+}
+
 }  // namespace detail
 
 }  // namespace cudf
diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index 2f3577623a9..52ad0648e23 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -120,6 +120,11 @@ struct is_host_span_supported_container<  //
   thrust::host_vector<T, Alloc>> : std::true_type {
 };
 
+template <typename T, typename Alloc>
+struct is_host_span_supported_container<  //
+  std::basic_string<T, std::char_traits<T>, Alloc>> : std::true_type {
+};
+
 template <typename T, std::size_t Extent = cudf::dynamic_extent>
 struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent>> {
   using base = cudf::detail::span_base<T, Extent, host_span<T, Extent>>;
diff --git a/cpp/include/cudf_test/type_lists.hpp b/cpp/include/cudf_test/type_lists.hpp
index 71c2b74b37b..a344173144d 100644
--- a/cpp/include/cudf_test/type_lists.hpp
+++ b/cpp/include/cudf_test/type_lists.hpp
@@ -25,6 +25,8 @@
 #include <cudf/wrappers/timestamps.hpp>
 #include <cudf_test/type_list_utilities.hpp>
 
+#include <thrust/host_vector.h>
+
 #include <array>
 #include <tuple>
 
@@ -79,10 +81,10 @@ constexpr auto types_to_ids()
 template <typename TypeParam, typename T>
 typename std::enable_if<cudf::is_fixed_width<TypeParam>() &&
                           !cudf::is_timestamp_t<TypeParam>::value,
-                        std::vector<TypeParam>>::type
+                        thrust::host_vector<TypeParam>>::type
 make_type_param_vector(std::initializer_list<T> const& init_list)
 {
-  std::vector<TypeParam> vec(init_list.size());
+  thrust::host_vector<TypeParam> vec(init_list.size());
   std::transform(std::cbegin(init_list), std::cend(init_list), std::begin(vec), [](auto const& e) {
     if (std::is_unsigned<TypeParam>::value)
       return static_cast<TypeParam>(std::abs(e));
@@ -93,10 +95,11 @@ make_type_param_vector(std::initializer_list<T> const& init_list)
 }
 
 template <typename TypeParam, typename T>
-typename std::enable_if<cudf::is_timestamp_t<TypeParam>::value, std::vector<TypeParam>>::type
+typename std::enable_if<cudf::is_timestamp_t<TypeParam>::value,
+                        thrust::host_vector<TypeParam>>::type
 make_type_param_vector(std::initializer_list<T> const& init_list)
 {
-  std::vector<TypeParam> vec(init_list.size());
+  thrust::host_vector<TypeParam> vec(init_list.size());
   std::transform(std::cbegin(init_list), std::cend(init_list), std::begin(vec), [](auto const& e) {
     return TypeParam{typename TypeParam::duration{e}};
   });
diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index e87cadbffe8..6ba10bef396 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -57,9 +57,6 @@ auto create_device_views(host_span<column_view const> views, rmm::cuda_stream_vi
     column_device_view::create(std::declval<column_view>(), std::declval<rmm::cuda_stream_view>()));
   auto device_view_owners = std::vector<CDViewPtr>(views.size());
   std::transform(views.begin(), views.end(), device_view_owners.begin(), [stream](auto const& col) {
-    // TODO creating this device view can invoke null count computation
-    // even though it isn't used. See this issue:
-    // https://github.com/rapidsai/cudf/issues/4368
     return column_device_view::create(col, stream);
   });
 
@@ -70,10 +67,8 @@ auto create_device_views(host_span<column_view const> views, rmm::cuda_stream_vi
                  device_view_owners.cend(),
                  std::back_inserter(device_views),
                  [](auto const& col) { return *col; });
-  // TODO each of these device vector copies invoke stream synchronization
-  // which appears to add unnecessary overhead. See this issue:
-  // https://github.com/rapidsai/rmm/issues/120
-  auto d_views = make_device_uvector_async(device_views);
+
+  auto d_views = make_device_uvector_async(device_views, stream);
 
   // Compute the partition offsets
   auto offsets = thrust::host_vector<size_t>(views.size() + 1);
@@ -84,7 +79,7 @@ auto create_device_views(host_span<column_view const> views, rmm::cuda_stream_vi
     std::next(offsets.begin()),
     [](auto const& col) { return col.size(); },
     thrust::plus<size_t>{});
-  auto d_offsets         = make_device_uvector_async(offsets);
+  auto d_offsets         = make_device_uvector_async(offsets, stream);
   auto const output_size = offsets.back();
 
   return std::make_tuple(
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 3246f565443..2766cbb86fc 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -342,7 +342,7 @@ ConfigureTest(STRINGS_TEST
     strings/fixed_point_tests.cpp
     strings/floats_tests.cpp
     strings/hash_string.cu
-    strings/integers_tests.cu
+    strings/integers_tests.cpp
     strings/ipv4_tests.cpp
     strings/json_tests.cpp
     strings/pad_tests.cpp
@@ -375,7 +375,7 @@ ConfigureTest(TEXT_TEST
 ConfigureTest(BITMASK_TEST
     bitmask/valid_if_tests.cu
     bitmask/set_nullmask_tests.cu
-    bitmask/bitmask_tests.cu
+    bitmask/bitmask_tests.cpp
     bitmask/is_element_valid_tests.cpp)
 
 
diff --git a/cpp/tests/bitmask/bitmask_tests.cu b/cpp/tests/bitmask/bitmask_tests.cpp
similarity index 73%
rename from cpp/tests/bitmask/bitmask_tests.cu
rename to cpp/tests/bitmask/bitmask_tests.cpp
index 2f820da687e..3fb12efcc93 100644
--- a/cpp/tests/bitmask/bitmask_tests.cu
+++ b/cpp/tests/bitmask/bitmask_tests.cpp
@@ -15,6 +15,7 @@
  */
 #include <cudf/concatenate.hpp>
 #include <cudf/copying.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/null_mask.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
@@ -23,9 +24,9 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 
-#include <thrust/device_ptr.h>
-#include <thrust/device_vector.h>
 #include <rmm/device_buffer.hpp>
+#include "rmm/cuda_stream_view.hpp"
+#include "rmm/device_uvector.hpp"
 
 struct BitmaskUtilitiesTest : public cudf::test::BaseFixture {
 };
@@ -76,161 +77,177 @@ TEST_F(CountBitmaskTest, NullMask)
   }
 }
 
+// Utility to construct a mask vector. If fill_valid is false (default), it is initialized to all
+// null. Otherwise it is initialized to all valid.
+rmm::device_uvector<cudf::bitmask_type> make_mask(cudf::size_type size, bool fill_valid = false)
+{
+  if (!fill_valid) {
+    return cudf::detail::make_zeroed_device_uvector_sync<cudf::bitmask_type>(size);
+  } else {
+    auto ret = rmm::device_uvector<cudf::bitmask_type>(size, rmm::cuda_stream_default);
+    CUDA_TRY(cudaMemsetAsync(ret.data(),
+                             ~cudf::bitmask_type{0},
+                             size * sizeof(cudf::bitmask_type),
+                             rmm::cuda_stream_default.value()));
+    return ret;
+  }
+}
+
 TEST_F(CountBitmaskTest, NegativeStart)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, 0);
-  EXPECT_THROW(cudf::count_set_bits(mask.data().get(), -1, 32), cudf::logic_error);
+  auto mask = make_mask(1);
+  EXPECT_THROW(cudf::count_set_bits(mask.data(), -1, 32), cudf::logic_error);
 
   std::vector<cudf::size_type> indices = {0, 16, -1, 32};
-  EXPECT_THROW(cudf::segmented_count_set_bits(mask.data().get(), indices), cudf::logic_error);
+  EXPECT_THROW(cudf::segmented_count_set_bits(mask.data(), indices), cudf::logic_error);
 }
 
 TEST_F(CountBitmaskTest, StartLargerThanStop)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, 0);
-  EXPECT_THROW(cudf::count_set_bits(mask.data().get(), 32, 31), cudf::logic_error);
+  auto mask = make_mask(1);
+  EXPECT_THROW(cudf::count_set_bits(mask.data(), 32, 31), cudf::logic_error);
 
   std::vector<cudf::size_type> indices = {0, 16, 31, 30};
-  EXPECT_THROW(cudf::segmented_count_set_bits(mask.data().get(), indices), cudf::logic_error);
+  EXPECT_THROW(cudf::segmented_count_set_bits(mask.data(), indices), cudf::logic_error);
 }
 
 TEST_F(CountBitmaskTest, EmptyRange)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, 0);
-  EXPECT_EQ(0, cudf::count_set_bits(mask.data().get(), 17, 17));
+  auto mask = make_mask(1);
+  EXPECT_EQ(0, cudf::count_set_bits(mask.data(), 17, 17));
 
   std::vector<cudf::size_type> indices = {0, 0, 17, 17};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{0, 0}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordAllZero)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, 0);
-  EXPECT_EQ(0, cudf::count_set_bits(mask.data().get(), 0, 32));
+  auto mask = make_mask(1);
+  EXPECT_EQ(0, cudf::count_set_bits(mask.data(), 0, 32));
 
   std::vector<cudf::size_type> indices = {0, 32, 0, 32};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{0, 0}));
 }
 
 TEST_F(CountBitmaskTest, SingleBitAllZero)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, 0);
-  EXPECT_EQ(0, cudf::count_set_bits(mask.data().get(), 17, 18));
+  auto mask = make_mask(1);
+  EXPECT_EQ(0, cudf::count_set_bits(mask.data(), 17, 18));
 
   std::vector<cudf::size_type> indices = {17, 18, 7, 8};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{0, 0}));
 }
 
 TEST_F(CountBitmaskTest, SingleBitAllSet)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, ~cudf::bitmask_type{0});
-  EXPECT_EQ(1, cudf::count_set_bits(mask.data().get(), 13, 14));
+  auto mask = make_mask(1, true);
+  EXPECT_EQ(1, cudf::count_set_bits(mask.data(), 13, 14));
 
   std::vector<cudf::size_type> indices = {13, 14, 0, 1};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{1, 1}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordAllBitsSet)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, ~cudf::bitmask_type{0});
-  EXPECT_EQ(32, cudf::count_set_bits(mask.data().get(), 0, 32));
+  auto mask = make_mask(1, true);
+  EXPECT_EQ(32, cudf::count_set_bits(mask.data(), 0, 32));
 
   std::vector<cudf::size_type> indices = {0, 32, 0, 32};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{32, 32}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordPreSlack)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, ~cudf::bitmask_type{0});
-  EXPECT_EQ(25, cudf::count_set_bits(mask.data().get(), 7, 32));
+  auto mask = make_mask(1, true);
+  EXPECT_EQ(25, cudf::count_set_bits(mask.data(), 7, 32));
 
   std::vector<cudf::size_type> indices = {7, 32, 8, 32};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{25, 24}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordPostSlack)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, ~cudf::bitmask_type{0});
-  EXPECT_EQ(17, cudf::count_set_bits(mask.data().get(), 0, 17));
+  auto mask = make_mask(1, true);
+  EXPECT_EQ(17, cudf::count_set_bits(mask.data(), 0, 17));
 
   std::vector<cudf::size_type> indices = {0, 17, 0, 18};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{17, 18}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordSubset)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, ~cudf::bitmask_type{0});
-  EXPECT_EQ(30, cudf::count_set_bits(mask.data().get(), 1, 31));
+  auto mask = make_mask(1, true);
+  EXPECT_EQ(30, cudf::count_set_bits(mask.data(), 1, 31));
 
   std::vector<cudf::size_type> indices = {1, 31, 7, 17};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{30, 10}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordSubset2)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, ~cudf::bitmask_type{0});
-  EXPECT_EQ(28, cudf::count_set_bits(mask.data().get(), 2, 30));
+  auto mask = make_mask(1, true);
+  EXPECT_EQ(28, cudf::count_set_bits(mask.data(), 2, 30));
 
   std::vector<cudf::size_type> indices = {4, 16, 2, 30};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{12, 28}));
 }
 
 TEST_F(CountBitmaskTest, MultipleWordsAllBits)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(10, ~cudf::bitmask_type{0});
-  EXPECT_EQ(320, cudf::count_set_bits(mask.data().get(), 0, 320));
+  auto mask = make_mask(10, true);
+  EXPECT_EQ(320, cudf::count_set_bits(mask.data(), 0, 320));
 
   std::vector<cudf::size_type> indices = {0, 320, 0, 320};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{320, 320}));
 }
 
 TEST_F(CountBitmaskTest, MultipleWordsSubsetWordBoundary)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(10, ~cudf::bitmask_type{0});
-  EXPECT_EQ(256, cudf::count_set_bits(mask.data().get(), 32, 288));
+  auto mask = make_mask(10, true);
+  EXPECT_EQ(256, cudf::count_set_bits(mask.data(), 32, 288));
 
   std::vector<cudf::size_type> indices = {32, 192, 32, 288};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{160, 256}));
 }
 
 TEST_F(CountBitmaskTest, MultipleWordsSplitWordBoundary)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(10, ~cudf::bitmask_type{0});
-  EXPECT_EQ(2, cudf::count_set_bits(mask.data().get(), 31, 33));
+  auto mask = make_mask(10, true);
+  EXPECT_EQ(2, cudf::count_set_bits(mask.data(), 31, 33));
 
   std::vector<cudf::size_type> indices = {31, 33, 60, 67};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{2, 7}));
 }
 
 TEST_F(CountBitmaskTest, MultipleWordsSubset)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(10, ~cudf::bitmask_type{0});
-  EXPECT_EQ(226, cudf::count_set_bits(mask.data().get(), 67, 293));
+  auto mask = make_mask(10, true);
+  EXPECT_EQ(226, cudf::count_set_bits(mask.data(), 67, 293));
 
   std::vector<cudf::size_type> indices = {67, 293, 37, 319};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{226, 282}));
 }
 
 TEST_F(CountBitmaskTest, MultipleWordsSingleBit)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(10, ~cudf::bitmask_type{0});
-  EXPECT_EQ(1, cudf::count_set_bits(mask.data().get(), 67, 68));
+  auto mask = make_mask(10, true);
+  EXPECT_EQ(1, cudf::count_set_bits(mask.data(), 67, 68));
 
   std::vector<cudf::size_type> indices = {67, 68, 31, 32, 192, 193};
-  auto counts                          = cudf::segmented_count_set_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_set_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{1, 1, 1}));
 }
 
@@ -238,11 +255,11 @@ using CountUnsetBitsTest = CountBitmaskTest;
 
 TEST_F(CountUnsetBitsTest, SingleBitAllSet)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, ~cudf::bitmask_type{0});
-  EXPECT_EQ(0, cudf::count_unset_bits(mask.data().get(), 13, 14));
+  auto mask = make_mask(1, true);
+  EXPECT_EQ(0, cudf::count_unset_bits(mask.data(), 13, 14));
 
   std::vector<cudf::size_type> indices = {13, 14, 31, 32};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{0, 0}));
 }
 
@@ -258,101 +275,101 @@ TEST_F(CountUnsetBitsTest, NullMask)
 
 TEST_F(CountUnsetBitsTest, SingleWordAllBits)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, cudf::bitmask_type{0});
-  EXPECT_EQ(32, cudf::count_unset_bits(mask.data().get(), 0, 32));
+  auto mask = make_mask(1);
+  EXPECT_EQ(32, cudf::count_unset_bits(mask.data(), 0, 32));
 
   std::vector<cudf::size_type> indices = {0, 32, 0, 32};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{32, 32}));
 }
 
 TEST_F(CountUnsetBitsTest, SingleWordPreSlack)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, cudf::bitmask_type{0});
-  EXPECT_EQ(25, cudf::count_unset_bits(mask.data().get(), 7, 32));
+  auto mask = make_mask(1);
+  EXPECT_EQ(25, cudf::count_unset_bits(mask.data(), 7, 32));
 
   std::vector<cudf::size_type> indices = {7, 32, 8, 32};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{25, 24}));
 }
 
 TEST_F(CountUnsetBitsTest, SingleWordPostSlack)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, cudf::bitmask_type{0});
-  EXPECT_EQ(17, cudf::count_unset_bits(mask.data().get(), 0, 17));
+  auto mask = make_mask(1);
+  EXPECT_EQ(17, cudf::count_unset_bits(mask.data(), 0, 17));
 
   std::vector<cudf::size_type> indices = {0, 17, 0, 18};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{17, 18}));
 }
 
 TEST_F(CountUnsetBitsTest, SingleWordSubset)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, cudf::bitmask_type{0});
-  EXPECT_EQ(30, cudf::count_unset_bits(mask.data().get(), 1, 31));
+  auto mask = make_mask(1);
+  EXPECT_EQ(30, cudf::count_unset_bits(mask.data(), 1, 31));
 
   std::vector<cudf::size_type> indices = {1, 31, 7, 17};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{30, 10}));
 }
 
 TEST_F(CountUnsetBitsTest, SingleWordSubset2)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, cudf::bitmask_type{0});
-  EXPECT_EQ(28, cudf::count_unset_bits(mask.data().get(), 2, 30));
+  auto mask = make_mask(1);
+  EXPECT_EQ(28, cudf::count_unset_bits(mask.data(), 2, 30));
 
   std::vector<cudf::size_type> indices = {4, 16, 2, 30};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{12, 28}));
 }
 
 TEST_F(CountUnsetBitsTest, MultipleWordsAllBits)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(10, cudf::bitmask_type{0});
-  EXPECT_EQ(320, cudf::count_unset_bits(mask.data().get(), 0, 320));
+  auto mask = make_mask(10);
+  EXPECT_EQ(320, cudf::count_unset_bits(mask.data(), 0, 320));
 
   std::vector<cudf::size_type> indices = {0, 320, 0, 320};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{320, 320}));
 }
 
 TEST_F(CountUnsetBitsTest, MultipleWordsSubsetWordBoundary)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(10, cudf::bitmask_type{0});
-  EXPECT_EQ(256, cudf::count_unset_bits(mask.data().get(), 32, 288));
+  auto mask = make_mask(10);
+  EXPECT_EQ(256, cudf::count_unset_bits(mask.data(), 32, 288));
 
   std::vector<cudf::size_type> indices = {32, 192, 32, 288};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, testing::ContainerEq(std::vector<cudf::size_type>{160, 256}));
 }
 
 TEST_F(CountUnsetBitsTest, MultipleWordsSplitWordBoundary)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(10, cudf::bitmask_type{0});
-  EXPECT_EQ(2, cudf::count_unset_bits(mask.data().get(), 31, 33));
+  auto mask = make_mask(10);
+  EXPECT_EQ(2, cudf::count_unset_bits(mask.data(), 31, 33));
 
   std::vector<cudf::size_type> indices = {31, 33, 60, 67};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, ::testing::ContainerEq(std::vector<cudf::size_type>{2, 7}));
 }
 
 TEST_F(CountUnsetBitsTest, MultipleWordsSubset)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(10, cudf::bitmask_type{0});
-  EXPECT_EQ(226, cudf::count_unset_bits(mask.data().get(), 67, 293));
+  auto mask = make_mask(10);
+  EXPECT_EQ(226, cudf::count_unset_bits(mask.data(), 67, 293));
 
   std::vector<cudf::size_type> indices = {67, 293, 37, 319};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, ::testing::ContainerEq(std::vector<cudf::size_type>{226, 282}));
 }
 
 TEST_F(CountUnsetBitsTest, MultipleWordsSingleBit)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(10, cudf::bitmask_type{0});
-  EXPECT_EQ(1, cudf::count_unset_bits(mask.data().get(), 67, 68));
+  auto mask = make_mask(10);
+  EXPECT_EQ(1, cudf::count_unset_bits(mask.data(), 67, 68));
 
   std::vector<cudf::size_type> indices = {67, 68, 31, 32, 192, 193};
-  auto counts = cudf::segmented_count_unset_bits(mask.data().get(), indices);
+  auto counts                          = cudf::segmented_count_unset_bits(mask.data(), indices);
   EXPECT_THAT(counts, ::testing::ContainerEq(std::vector<cudf::size_type>{1, 1, 1}));
 }
 
@@ -362,7 +379,7 @@ struct CopyBitmaskTest : public cudf::test::BaseFixture, cudf::test::UniformRand
 
 void cleanEndWord(rmm::device_buffer &mask, int begin_bit, int end_bit)
 {
-  thrust::device_ptr<cudf::bitmask_type> ptr(static_cast<cudf::bitmask_type *>(mask.data()));
+  auto ptr = static_cast<cudf::bitmask_type *>(mask.data());
 
   auto number_of_mask_words = cudf::num_bitmask_words(static_cast<size_t>(end_bit - begin_bit));
   auto number_of_bits       = end_bit - begin_bit;
@@ -374,20 +391,20 @@ void cleanEndWord(rmm::device_buffer &mask, int begin_bit, int end_bit)
 
 TEST_F(CopyBitmaskTest, NegativeStart)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, 0);
-  EXPECT_THROW(cudf::copy_bitmask(mask.data().get(), -1, 32), cudf::logic_error);
+  auto mask = make_mask(1);
+  EXPECT_THROW(cudf::copy_bitmask(mask.data(), -1, 32), cudf::logic_error);
 }
 
 TEST_F(CopyBitmaskTest, StartLargerThanStop)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, 0);
-  EXPECT_THROW(cudf::copy_bitmask(mask.data().get(), 32, 31), cudf::logic_error);
+  auto mask = make_mask(1);
+  EXPECT_THROW(cudf::copy_bitmask(mask.data(), 32, 31), cudf::logic_error);
 }
 
 TEST_F(CopyBitmaskTest, EmptyRange)
 {
-  thrust::device_vector<cudf::bitmask_type> mask(1, 0);
-  auto buff = cudf::copy_bitmask(mask.data().get(), 17, 17);
+  auto mask = make_mask(1);
+  auto buff = cudf::copy_bitmask(mask.data(), 17, 17);
   EXPECT_EQ(0, static_cast<int>(buff.size()));
 }
 
@@ -399,7 +416,7 @@ TEST_F(CopyBitmaskTest, NullPtr)
 
 TEST_F(CopyBitmaskTest, TestZeroOffset)
 {
-  thrust::host_vector<int> validity_bit(1000);
+  std::vector<int> validity_bit(1000);
   for (auto &m : validity_bit) { m = this->generate(); }
   auto input_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end());
 
@@ -419,7 +436,7 @@ TEST_F(CopyBitmaskTest, TestZeroOffset)
 
 TEST_F(CopyBitmaskTest, TestNonZeroOffset)
 {
-  thrust::host_vector<int> validity_bit(1000);
+  std::vector<int> validity_bit(1000);
   for (auto &m : validity_bit) { m = this->generate(); }
   auto input_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end());
 
@@ -441,7 +458,7 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous)
 {
   cudf::data_type t{cudf::type_id::INT32};
   cudf::size_type num_elements = 1001;
-  thrust::host_vector<int> validity_bit(num_elements);
+  std::vector<int> validity_bit(num_elements);
   for (auto &m : validity_bit) { m = this->generate(); }
   auto gold_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end());
 
@@ -476,7 +493,7 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous)
 {
   cudf::data_type t{cudf::type_id::INT32};
   cudf::size_type num_elements = 1001;
-  thrust::host_vector<int> validity_bit(num_elements);
+  std::vector<int> validity_bit(num_elements);
   for (auto &m : validity_bit) { m = this->generate(); }
   auto gold_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end());
   std::vector<cudf::size_type> split{0, 104, 128, 152, 311, 491, 583, 734, 760, num_elements};
diff --git a/cpp/tests/bitmask/set_nullmask_tests.cu b/cpp/tests/bitmask/set_nullmask_tests.cu
index ae4896827fd..235aec7ddf8 100644
--- a/cpp/tests/bitmask/set_nullmask_tests.cu
+++ b/cpp/tests/bitmask/set_nullmask_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,11 +18,15 @@
 
 #include <cudf_test/base_fixture.hpp>
 
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/null_mask.hpp>
+#include <cudf/utilities/bit.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
-#include <thrust/device_vector.h>
 #include <thrust/transform.h>
-#include <cudf/utilities/bit.hpp>
 
 struct valid_bit_functor {
   cudf::bitmask_type const* _null_mask;
@@ -41,12 +45,18 @@ std::ostream& operator<<(std::ostream& stream, thrust::host_vector<bool> const&
 struct SetBitmaskTest : public cudf::test::BaseFixture {
   void expect_bitmask_equal(cudf::bitmask_type const* bitmask,  // Device Ptr
                             cudf::size_type start_bit,
-                            thrust::host_vector<bool> const& expect)
+                            thrust::host_vector<bool> const& expect,
+                            rmm::cuda_stream_view stream = rmm::cuda_stream_default)
   {
-    auto itb_dev = thrust::make_transform_iterator(thrust::counting_iterator<cudf::size_type>{0},
-                                                   valid_bit_functor{bitmask});
-    thrust::device_vector<bool> result(itb_dev + start_bit, itb_dev + start_bit + expect.size());
-    thrust::host_vector<bool> host_result(result);
+    rmm::device_uvector<bool> result(expect.size(), stream);
+    auto counting_iter = thrust::counting_iterator<cudf::size_type>{0};
+    thrust::transform(rmm::exec_policy(stream),
+                      counting_iter + start_bit,
+                      counting_iter + start_bit + expect.size(),
+                      result.begin(),
+                      valid_bit_functor{bitmask});
+
+    auto host_result = cudf::detail::make_host_vector_sync(result, stream);
     EXPECT_THAT(host_result, testing::ElementsAreArray(expect));
   }
 
diff --git a/cpp/tests/column/compound_test.cu b/cpp/tests/column/compound_test.cu
index 97a6dbb0c22..0df1cfaeccc 100644
--- a/cpp/tests/column/compound_test.cu
+++ b/cpp/tests/column/compound_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,10 +21,13 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 
-#include <thrust/device_vector.h>
-#include <thrust/execution_policy.h>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
 #include <thrust/logical.h>
 #include <thrust/sequence.h>
+
 #include <vector>
 
 struct CompoundColumnTest : public cudf::test::BaseFixture {
@@ -61,13 +64,13 @@ struct checker_for_level2 {
 
 TEST_F(CompoundColumnTest, ChildrenLevel1)
 {
-  thrust::device_vector<int32_t> data(1000);
-  thrust::sequence(thrust::device, data.begin(), data.end(), 1);
+  rmm::device_uvector<int32_t> data(1000, rmm::cuda_stream_default);
+  thrust::sequence(rmm::exec_policy(), data.begin(), data.end(), 1);
 
   auto null_mask = cudf::create_null_mask(100, cudf::mask_state::UNALLOCATED);
-  rmm::device_buffer data1(data.data().get() + 100, 100 * sizeof(int32_t));
-  rmm::device_buffer data2(data.data().get() + 200, 100 * sizeof(int32_t));
-  rmm::device_buffer data3(data.data().get() + 300, 100 * sizeof(int32_t));
+  rmm::device_buffer data1(data.data() + 100, 100 * sizeof(int32_t));
+  rmm::device_buffer data2(data.data() + 200, 100 * sizeof(int32_t));
+  rmm::device_buffer data3(data.data() + 300, 100 * sizeof(int32_t));
   auto child1 =
     std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::INT32}, 100, data1, null_mask, 0);
   auto child2 =
@@ -89,14 +92,14 @@ TEST_F(CompoundColumnTest, ChildrenLevel1)
 
   {
     auto column = cudf::column_device_view::create(parent->view());
-    EXPECT_TRUE(thrust::any_of(thrust::device,
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level1<cudf::column_device_view>{*column}));
   }
   {
     auto column = cudf::mutable_column_device_view::create(parent->mutable_view());
-    EXPECT_TRUE(thrust::any_of(thrust::device,
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level1<cudf::mutable_column_device_view>{*column}));
@@ -105,16 +108,16 @@ TEST_F(CompoundColumnTest, ChildrenLevel1)
 
 TEST_F(CompoundColumnTest, ChildrenLevel2)
 {
-  thrust::device_vector<int32_t> data(1000);
-  thrust::sequence(thrust::device, data.begin(), data.end(), 1);
+  rmm::device_uvector<int32_t> data(1000, rmm::cuda_stream_default);
+  thrust::sequence(rmm::exec_policy(), data.begin(), data.end(), 1);
 
   auto null_mask = cudf::create_null_mask(100, cudf::mask_state::UNALLOCATED);
-  rmm::device_buffer data11(data.data().get() + 100, 100 * sizeof(int32_t));
-  rmm::device_buffer data12(data.data().get() + 200, 100 * sizeof(int32_t));
-  rmm::device_buffer data13(data.data().get() + 300, 100 * sizeof(int32_t));
-  rmm::device_buffer data21(data.data().get() + 400, 100 * sizeof(int32_t));
-  rmm::device_buffer data22(data.data().get() + 500, 100 * sizeof(int32_t));
-  rmm::device_buffer data23(data.data().get() + 600, 100 * sizeof(int32_t));
+  rmm::device_buffer data11(data.data() + 100, 100 * sizeof(int32_t));
+  rmm::device_buffer data12(data.data() + 200, 100 * sizeof(int32_t));
+  rmm::device_buffer data13(data.data() + 300, 100 * sizeof(int32_t));
+  rmm::device_buffer data21(data.data() + 400, 100 * sizeof(int32_t));
+  rmm::device_buffer data22(data.data() + 500, 100 * sizeof(int32_t));
+  rmm::device_buffer data23(data.data() + 600, 100 * sizeof(int32_t));
   auto gchild11 = std::make_unique<cudf::column>(
     cudf::data_type{cudf::type_id::INT32}, 100, data11, null_mask, 0);
   auto gchild12 = std::make_unique<cudf::column>(
@@ -162,14 +165,14 @@ TEST_F(CompoundColumnTest, ChildrenLevel2)
 
   {
     auto column = cudf::column_device_view::create(parent->view());
-    EXPECT_TRUE(thrust::any_of(thrust::device,
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level2<cudf::column_device_view>{*column}));
   }
   {
     auto column = cudf::mutable_column_device_view::create(parent->mutable_view());
-    EXPECT_TRUE(thrust::any_of(thrust::device,
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level2<cudf::mutable_column_device_view>{*column}));
diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu
index 1cdfd6ad8ef..aa53877f27d 100644
--- a/cpp/tests/device_atomics/device_atomics_test.cu
+++ b/cpp/tests/device_atomics/device_atomics_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,19 @@
  * limitations under the License.
  */
 
-#include <algorithm>
+#include <cudf/detail/utilities/device_atomics.cuh>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/traits.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
-#include <cudf/detail/utilities/device_atomics.cuh>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/timestamp_utilities.cuh>
 #include <cudf_test/type_lists.hpp>
 
+#include <rmm/cuda_stream_view.hpp>
+
+#include <algorithm>
+
 template <typename T>
 __global__ void gpu_atomic_test(T* result, T* data, size_t size)
 {
@@ -89,13 +92,13 @@ __global__ void gpu_atomicCAS_test(T* result, T* data, size_t size)
 }
 
 template <typename T>
-typename std::enable_if_t<!cudf::is_timestamp<T>(), T> accumulate(std::vector<T> const& xs)
+typename std::enable_if_t<!cudf::is_timestamp<T>(), T> accumulate(cudf::host_span<T const> xs)
 {
   return std::accumulate(xs.begin(), xs.end(), T{0});
 }
 
 template <typename T>
-typename std::enable_if_t<cudf::is_timestamp<T>(), T> accumulate(std::vector<T> const& xs)
+typename std::enable_if_t<cudf::is_timestamp<T>(), T> accumulate(cudf::host_span<T const> xs)
 {
   auto ys = std::vector<typename T::rep>(xs.size());
   std::transform(
@@ -112,8 +115,8 @@ struct AtomicsTest : public cudf::test::BaseFixture {
   {
     size_t vec_size = v_input.size();
 
-    // use transform from std::vector<int> instead.
-    std::vector<T> v(vec_size);
+    // use transform from thrust::host_vector<int> instead.
+    thrust::host_vector<T> v(vec_size);
     std::transform(v_input.begin(), v_input.end(), v.begin(), [](int x) {
       T t = cudf::test::make_type_param_scalar<T>(x);
       return t;
@@ -124,7 +127,7 @@ struct AtomicsTest : public cudf::test::BaseFixture {
     exact[1] = *(std::min_element(v.begin(), v.end()));
     exact[2] = *(std::max_element(v.begin(), v.end()));
 
-    std::vector<T> result_init(9);  // +3 padding for int8 tests
+    thrust::host_vector<T> result_init(9);  // +3 padding for int8 tests
     result_init[0] = cudf::test::make_type_param_scalar<T>(0);
     result_init[1] = std::numeric_limits<T>::max();
     result_init[2] = std::numeric_limits<T>::min();
@@ -132,22 +135,20 @@ struct AtomicsTest : public cudf::test::BaseFixture {
     result_init[4] = result_init[1];
     result_init[5] = result_init[2];
 
-    thrust::device_vector<T> dev_data(v);
-    thrust::device_vector<T> dev_result(result_init);
+    auto dev_data   = cudf::detail::make_device_uvector_sync(v);
+    auto dev_result = cudf::detail::make_device_uvector_sync(result_init);
 
     if (block_size == 0) { block_size = vec_size; }
 
     if (is_cas_test) {
-      gpu_atomicCAS_test<<<grid_size, block_size>>>(
-        dev_result.data().get(), dev_data.data().get(), vec_size);
+      gpu_atomicCAS_test<<<grid_size, block_size>>>(dev_result.data(), dev_data.data(), vec_size);
     } else {
-      gpu_atomic_test<<<grid_size, block_size>>>(
-        dev_result.data().get(), dev_data.data().get(), vec_size);
+      gpu_atomic_test<<<grid_size, block_size>>>(dev_result.data(), dev_data.data(), vec_size);
     }
 
-    thrust::host_vector<T> host_result(dev_result);
-    CUDA_TRY(cudaDeviceSynchronize());
-    CHECK_CUDA(0);
+    auto host_result = cudf::detail::make_host_vector_sync(dev_result);
+
+    CHECK_CUDA(rmm::cuda_stream_default.value());
 
     if (!is_timestamp_sum<T, cudf::DeviceSum>()) {
       EXPECT_EQ(host_result[0], exact[0]) << "atomicAdd test failed";
@@ -272,15 +273,10 @@ struct AtomicsBitwiseOpTest : public cudf::test::BaseFixture {
       return t;
     });
 
-    std::vector<T> identity = {T(~0ull),
-                               T(0),
-                               T(0),
-                               T(~0ull),
-                               T(0),
-                               T(0),
-                               T(0),
-                               T(0),
-                               T(0)};  // +3 elements padding for int8 tests
+    thrust::host_vector<T> identity(9, T{0});  // +3 elements padding for int8 tests
+    identity[0] = T(~0ull);
+    identity[3] = T(~0ull);
+
     T exact[3];
     exact[0] = std::accumulate(
       v.begin(), v.end(), identity[0], [](T acc, uint64_t i) { return acc & T(i); });
@@ -289,22 +285,20 @@ struct AtomicsBitwiseOpTest : public cudf::test::BaseFixture {
     exact[2] = std::accumulate(
       v.begin(), v.end(), identity[2], [](T acc, uint64_t i) { return acc ^ T(i); });
 
-    thrust::device_vector<T> dev_result(identity);
-    thrust::device_vector<T> dev_data(v);
+    auto dev_result = cudf::detail::make_device_uvector_sync(identity);
+    auto dev_data   = cudf::detail::make_device_uvector_sync(v);
 
     if (block_size == 0) { block_size = vec_size; }
 
-    gpu_atomic_bitwiseOp_test<T>
-      <<<grid_size, block_size>>>(reinterpret_cast<T*>(dev_result.data().get()),
-                                  reinterpret_cast<T*>(dev_data.data().get()),
-                                  vec_size);
+    gpu_atomic_bitwiseOp_test<T><<<grid_size, block_size>>>(
+      reinterpret_cast<T*>(dev_result.data()), reinterpret_cast<T*>(dev_data.data()), vec_size);
+
+    auto host_result = cudf::detail::make_host_vector_sync(dev_result);
 
-    thrust::host_vector<T> host_result(dev_result);
-    CUDA_TRY(cudaDeviceSynchronize());
-    CHECK_CUDA(0);
+    CHECK_CUDA(rmm::cuda_stream_default.value());
 
-    print_exact(exact, "exact");
-    print_exact(host_result.data(), "result");
+    // print_exact(exact, "exact");
+    // print_exact(host_result.data(), "result");
 
     EXPECT_EQ(host_result[0], exact[0]) << "atomicAnd test failed";
     EXPECT_EQ(host_result[1], exact[1]) << "atomicOr  test failed";
@@ -314,7 +308,7 @@ struct AtomicsBitwiseOpTest : public cudf::test::BaseFixture {
     EXPECT_EQ(host_result[5], exact[2]) << "atomicXor test(2) failed";
   }
 
-  void print_exact(const T* v, const char* msg)
+  [[maybe_unused]] void print_exact(const T* v, const char* msg)
   {
     std::cout << std::hex << std::showbase;
     std::cout << "The " << msg << " = {" << +v[0] << ", " << +v[1] << ", " << +v[2] << "}"
diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu
index 5f74e459bb1..124d9339ebf 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cu
+++ b/cpp/tests/fixed_point/fixed_point_tests.cu
@@ -35,6 +35,8 @@
 #include <numeric>
 #include <type_traits>
 #include <vector>
+#include "cudf/detail/utilities/vector_factories.hpp"
+#include "rmm/cuda_stream_view.hpp"
 
 using namespace numeric;
 
@@ -507,37 +509,39 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
 {
   using decimal32 = fixed_point<int32_t, Radix::BASE_10>;
 
-  thrust::device_vector<decimal32> vec1(1000, decimal32{1, scale_type{-2}});
+  std::vector<decimal32> vec1(1000, decimal32{1, scale_type{-2}});
+  auto d_vec1 = cudf::detail::make_device_uvector_sync(vec1);
 
   auto const sum = thrust::reduce(
-    rmm::exec_policy(), std::cbegin(vec1), std::cend(vec1), decimal32{0, scale_type{-2}});
+    rmm::exec_policy(), std::cbegin(d_vec1), std::cend(d_vec1), decimal32{0, scale_type{-2}});
 
   EXPECT_EQ(static_cast<int32_t>(sum), 1000);
 
   // TODO: Once nvbugs/1990211 is fixed (ExclusiveSum initial_value = 0 bug)
   //       change inclusive scan to run on device (avoid copying to host)
-  thrust::host_vector<decimal32> vec1_host = vec1;
+  thrust::inclusive_scan(std::cbegin(vec1), std::cend(vec1), std::begin(vec1));
 
-  thrust::inclusive_scan(std::cbegin(vec1_host), std::cend(vec1_host), std::begin(vec1_host));
-
-  vec1 = vec1_host;
+  d_vec1 = cudf::detail::make_device_uvector_sync(vec1);
 
   std::vector<int32_t> vec2(1000);
   std::iota(std::begin(vec2), std::end(vec2), 1);
 
   auto const res1 = thrust::reduce(
-    rmm::exec_policy(), std::cbegin(vec1), std::cend(vec1), decimal32{0, scale_type{-2}});
+    rmm::exec_policy(), std::cbegin(d_vec1), std::cend(d_vec1), decimal32{0, scale_type{-2}});
 
   auto const res2 = std::accumulate(std::cbegin(vec2), std::cend(vec2), 0);
 
   EXPECT_EQ(static_cast<int32_t>(res1), res2);
 
-  thrust::device_vector<int32_t> vec3(1000);
+  rmm::device_uvector<int32_t> d_vec3(1000, rmm::cuda_stream_default);
 
-  thrust::transform(
-    rmm::exec_policy(), std::cbegin(vec1), std::cend(vec1), std::begin(vec3), cast_to_int32_fn{});
+  thrust::transform(rmm::exec_policy(),
+                    std::cbegin(d_vec1),
+                    std::cend(d_vec1),
+                    std::begin(d_vec3),
+                    cast_to_int32_fn{});
 
-  thrust::host_vector<int32_t> vec3_host = vec3;
+  auto vec3 = cudf::detail::make_std_vector_sync(d_vec3);
 
   EXPECT_EQ(vec2, vec3);
 }
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
index 49cfda078b1..a747646d894 100644
--- a/cpp/tests/hash_map/map_test.cu
+++ b/cpp/tests/hash_map/map_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,24 +14,25 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+
 #include <hash/concurrent_unordered_map.cuh>
 
 #include <cudf/types.hpp>
-#include <cudf_test/base_fixture.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_vector.hpp>
+#include <rmm/device_uvector.hpp>
 
 #include <thrust/logical.h>
 
-#include <gtest/gtest.h>
-
 #include <cstdlib>
 #include <iostream>
 #include <limits>
 #include <random>
 #include <unordered_map>
 #include <vector>
+#include "rmm/exec_policy.hpp"
 
 template <typename K, typename V>
 struct key_value_types {
@@ -53,13 +54,13 @@ struct InsertTest : public cudf::test::BaseFixture {
     // prevent overflow of small types
     const size_t input_size =
       std::min(static_cast<key_type>(size), std::numeric_limits<key_type>::max());
-    pairs.resize(input_size);
+    pairs.resize(input_size, rmm::cuda_stream_default);
     map = std::move(map_type::create(compute_hash_table_size(size)));
     rmm::cuda_stream_default.synchronize();
   }
 
   const cudf::size_type size{10000};
-  rmm::device_vector<pair_type> pairs;
+  rmm::device_uvector<pair_type> pairs{static_cast<std::size_t>(size), rmm::cuda_stream_default};
   std::unique_ptr<map_type, std::function<void(map_type*)>> map;
 };
 
@@ -137,53 +138,78 @@ TYPED_TEST(InsertTest, UniqueKeysUniqueValues)
 {
   using map_type  = typename TypeParam::map_type;
   using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(this->pairs.begin(), this->pairs.end(), unique_pair_generator<pair_type>{});
+  thrust::tabulate(
+    rmm::exec_policy(), this->pairs.begin(), this->pairs.end(), unique_pair_generator<pair_type>{});
   // All pairs should be new inserts
-  EXPECT_TRUE(thrust::all_of(
-    this->pairs.begin(), this->pairs.end(), insert_pair<map_type, pair_type>{*this->map}));
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+                             this->pairs.begin(),
+                             this->pairs.end(),
+                             insert_pair<map_type, pair_type>{*this->map}));
 
   // All pairs should be present in the map
-  EXPECT_TRUE(thrust::all_of(
-    this->pairs.begin(), this->pairs.end(), find_pair<map_type, pair_type>{*this->map}));
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+                             this->pairs.begin(),
+                             this->pairs.end(),
+                             find_pair<map_type, pair_type>{*this->map}));
 }
 
 TYPED_TEST(InsertTest, IdenticalKeysIdenticalValues)
 {
   using map_type  = typename TypeParam::map_type;
   using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(this->pairs.begin(), this->pairs.end(), identical_pair_generator<pair_type>{});
+  thrust::tabulate(rmm::exec_policy(),
+                   this->pairs.begin(),
+                   this->pairs.end(),
+                   identical_pair_generator<pair_type>{});
   // Insert a single pair
-  EXPECT_TRUE(thrust::all_of(
-    this->pairs.begin(), this->pairs.begin() + 1, insert_pair<map_type, pair_type>{*this->map}));
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+                             this->pairs.begin(),
+                             this->pairs.begin() + 1,
+                             insert_pair<map_type, pair_type>{*this->map}));
   // Identical inserts should all return false (no new insert)
-  EXPECT_FALSE(thrust::all_of(
-    this->pairs.begin(), this->pairs.end(), insert_pair<map_type, pair_type>{*this->map}));
+  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(),
+                              this->pairs.begin(),
+                              this->pairs.end(),
+                              insert_pair<map_type, pair_type>{*this->map}));
 
   // All pairs should be present in the map
-  EXPECT_TRUE(thrust::all_of(
-    this->pairs.begin(), this->pairs.end(), find_pair<map_type, pair_type>{*this->map}));
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+                             this->pairs.begin(),
+                             this->pairs.end(),
+                             find_pair<map_type, pair_type>{*this->map}));
 }
 
 TYPED_TEST(InsertTest, IdenticalKeysUniqueValues)
 {
   using map_type  = typename TypeParam::map_type;
   using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(this->pairs.begin(), this->pairs.end(), identical_key_generator<pair_type>{});
+  thrust::tabulate(rmm::exec_policy(),
+                   this->pairs.begin(),
+                   this->pairs.end(),
+                   identical_key_generator<pair_type>{});
 
   // Insert a single pair
-  EXPECT_TRUE(thrust::all_of(
-    this->pairs.begin(), this->pairs.begin() + 1, insert_pair<map_type, pair_type>{*this->map}));
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+                             this->pairs.begin(),
+                             this->pairs.begin() + 1,
+                             insert_pair<map_type, pair_type>{*this->map}));
 
   // Identical key inserts should all return false (no new insert)
-  EXPECT_FALSE(thrust::all_of(
-    this->pairs.begin() + 1, this->pairs.end(), insert_pair<map_type, pair_type>{*this->map}));
+  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(),
+                              this->pairs.begin() + 1,
+                              this->pairs.end(),
+                              insert_pair<map_type, pair_type>{*this->map}));
 
   // Only first pair is present in map
-  EXPECT_TRUE(thrust::all_of(
-    this->pairs.begin(), this->pairs.begin() + 1, find_pair<map_type, pair_type>{*this->map}));
-
-  EXPECT_FALSE(thrust::all_of(
-    this->pairs.begin() + 1, this->pairs.end(), find_pair<map_type, pair_type>{*this->map}));
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+                             this->pairs.begin(),
+                             this->pairs.begin() + 1,
+                             find_pair<map_type, pair_type>{*this->map}));
+
+  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(),
+                              this->pairs.begin() + 1,
+                              this->pairs.end(),
+                              find_pair<map_type, pair_type>{*this->map}));
 }
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/hash_map/multimap_test.cu b/cpp/tests/hash_map/multimap_test.cu
index 7fd69e90efd..21135746227 100644
--- a/cpp/tests/hash_map/multimap_test.cu
+++ b/cpp/tests/hash_map/multimap_test.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,21 +14,17 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+
 #include <hash/concurrent_unordered_multimap.cuh>
 #include <hash/hash_allocator.cuh>
 
-#include <cudf_test/base_fixture.hpp>
-
 #include <rmm/cuda_stream_view.hpp>
 
-#include <thrust/device_vector.h>
-
 #include <gtest/gtest.h>
 
-#include <cstdlib>
-#include <iostream>
 #include <limits>
-#include <vector>
 
 // This is necessary to do a parametrized typed-test over multiple template
 // arguments
diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh
index 69a905386e2..06ac472d6d5 100644
--- a/cpp/tests/iterator/iterator_tests.cuh
+++ b/cpp/tests/iterator/iterator_tests.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,27 +14,30 @@
  */
 #pragma once
 
-#include <cudf/detail/iterator.cuh>                             // include iterator header
-#include <cudf/detail/utilities/transform_unary_functions.cuh>  //for meanvar
-
-#include <bitset>
-#include <cstdint>
-#include <iostream>
-#include <numeric>
-#include <random>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <cudf/detail/iterator.cuh>                             // include iterator header
+#include <cudf/detail/utilities/transform_unary_functions.cuh>  //for meanvar
+#include <cudf/detail/utilities/vector_factories.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
 #include <thrust/equal.h>
 #include <thrust/functional.h>
 #include <thrust/transform.h>
 
-// for reduction tests
-#include <thrust/device_vector.h>
 #include <cub/device/device_reduce.cuh>
 
+#include <bitset>
+#include <cstdint>
+#include <iostream>
+#include <numeric>
+#include <random>
+
 // Base Typed test fixture for iterator test
 template <typename T>
 struct IteratorTest : public cudf::test::BaseFixture {
@@ -43,7 +46,7 @@ struct IteratorTest : public cudf::test::BaseFixture {
   void iterator_test_cub(T_output expected, InputIterator d_in, int num_items)
   {
     T_output init = cudf::test::make_type_param_scalar<T_output>(0);
-    thrust::device_vector<T_output> dev_result(1, init);
+    rmm::device_uvector<T_output> dev_result(1, rmm::cuda_stream_default);
 
     // Get temporary storage size
     size_t temp_storage_bytes = 0;
@@ -72,57 +75,41 @@ struct IteratorTest : public cudf::test::BaseFixture {
 
   // iterator test case which uses thrust
   template <typename InputIterator, typename T_output>
-  void iterator_test_thrust(thrust::host_vector<T_output>& expected,
+  void iterator_test_thrust(thrust::host_vector<T_output> const& expected,
                             InputIterator d_in,
                             int num_items)
   {
     InputIterator d_in_last = d_in + num_items;
     EXPECT_EQ(thrust::distance(d_in, d_in_last), num_items);
-    thrust::device_vector<T_output> dev_expected(expected);
+    auto dev_expected = cudf::detail::make_device_uvector_sync(expected);
 
     // Can't use this because time_point make_pair bug in libcudacxx
     // bool result = thrust::equal(thrust::device, d_in, d_in_last, dev_expected.begin());
     bool result = thrust::transform_reduce(
-      thrust::device,
+      rmm::exec_policy(),
       thrust::make_zip_iterator(thrust::make_tuple(d_in, dev_expected.begin())),
       thrust::make_zip_iterator(thrust::make_tuple(d_in_last, dev_expected.end())),
       [] __device__(auto it) {
-        return static_cast<typename InputIterator::value_type>(thrust::get<0>(it)) ==
-               T_output(thrust::get<1>(it));
+        return static_cast<T_output>(thrust::get<0>(it)) == T_output(thrust::get<1>(it));
       },
       true,
       thrust::logical_and<bool>());
-#ifndef NDEBUG
-    thrust::device_vector<bool> vec(expected.size(), false);
-    thrust::transform(
-      thrust::device,
-      thrust::make_zip_iterator(thrust::make_tuple(d_in, dev_expected.begin())),
-      thrust::make_zip_iterator(thrust::make_tuple(d_in_last, dev_expected.end())),
-      vec.begin(),
-      [] __device__(auto it) { return (thrust::get<0>(it)) == T_output(thrust::get<1>(it)); });
-    thrust::copy(vec.begin(), vec.end(), std::ostream_iterator<bool>(std::cout, " "));
-    std::cout << std::endl;
-#endif
 
     EXPECT_TRUE(result) << "thrust test";
   }
 
   template <typename T_output>
   void evaluate(T_output expected,
-                thrust::device_vector<T_output>& dev_result,
+                rmm::device_uvector<T_output> const& dev_result,
                 const char* msg = nullptr)
   {
-    thrust::host_vector<T_output> hos_result(dev_result);
+    auto host_result = cudf::detail::make_host_vector_sync(dev_result);
 
-    EXPECT_EQ(expected, hos_result[0]) << msg;
-    // std::cout << "Done: expected <" << msg
-    //           << "> = "
-    //           //<< hos_result[0] //TODO uncomment after time_point ostream operator<<
-    //           << std::endl;
+    EXPECT_EQ(expected, host_result[0]) << msg;
   }
 
   template <typename T_output>
-  void values_equal_test(thrust::host_vector<T_output>& expected,
+  void values_equal_test(thrust::host_vector<T_output> const& expected,
                          const cudf::column_device_view& col)
   {
     if (col.nullable()) {
diff --git a/cpp/tests/iterator/optional_iterator_test_numeric.cu b/cpp/tests/iterator/optional_iterator_test_numeric.cu
index 90dc33ba628..313fd1358f6 100644
--- a/cpp/tests/iterator/optional_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/optional_iterator_test_numeric.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/tests/iterator/value_iterator_test.cuh b/cpp/tests/iterator/value_iterator_test.cuh
index f8dab90b2b5..3a7ef075a41 100644
--- a/cpp/tests/iterator/value_iterator_test.cuh
+++ b/cpp/tests/iterator/value_iterator_test.cuh
@@ -13,13 +13,14 @@
  * the License.
  */
 #include <tests/iterator/iterator_tests.cuh>
+#include "cudf/detail/utilities/vector_factories.hpp"
 
 // tests for non-null iterator (pointer of device array)
 template <typename T>
 void non_null_iterator(IteratorTest<T>& testFixture)
 {
   auto host_array = cudf::test::make_type_param_vector<T>({0, 6, 0, -14, 13, 64, -13, -20, 45});
-  thrust::device_vector<T> dev_array(host_array);
+  auto dev_array  = cudf::detail::make_device_uvector_sync(host_array);
 
   // calculate the expected value by CPU.
   thrust::host_vector<T> replaced_array(host_array);
diff --git a/cpp/tests/iterator/value_iterator_test_strings.cu b/cpp/tests/iterator/value_iterator_test_strings.cu
index 2d343bf7cdd..f28067649fd 100644
--- a/cpp/tests/iterator/value_iterator_test_strings.cu
+++ b/cpp/tests/iterator/value_iterator_test_strings.cu
@@ -13,6 +13,9 @@
  * the License.
  */
 #include <tests/iterator/iterator_tests.cuh>
+#include "cudf/detail/utilities/vector_factories.hpp"
+#include "rmm/cuda_stream_view.hpp"
+#include "rmm/device_uvector.hpp"
 
 auto strings_to_string_views(std::vector<std::string>& input_strings)
 {
@@ -21,15 +24,14 @@ auto strings_to_string_views(std::vector<std::string>& input_strings)
   std::vector<int32_t> offsets;
   std::tie(chars, offsets) = cudf::test::detail::make_chars_and_offsets(
     input_strings.begin(), input_strings.end(), all_valid);
-  thrust::device_vector<char> dev_chars(chars);
-  char* c_start = thrust::raw_pointer_cast(dev_chars.data());
+  auto dev_chars = cudf::detail::make_device_uvector_sync(chars);
 
   // calculate the expected value by CPU. (but contains device pointers)
-  std::vector<cudf::string_view> replaced_array(input_strings.size());
+  thrust::host_vector<cudf::string_view> replaced_array(input_strings.size());
   std::transform(thrust::counting_iterator<size_t>(0),
                  thrust::counting_iterator<size_t>(replaced_array.size()),
                  replaced_array.begin(),
-                 [c_start, offsets](auto i) {
+                 [c_start = dev_chars.begin(), offsets](auto i) {
                    return cudf::string_view(c_start + offsets[i], offsets[i + 1] - offsets[i]);
                  });
   return std::make_tuple(std::move(dev_chars), replaced_array);
@@ -41,11 +43,10 @@ struct StringIteratorTest : public IteratorTest<cudf::string_view> {
 TEST_F(StringIteratorTest, string_view_null_iterator)
 {
   using T = cudf::string_view;
-  // T init = T{"", 0};
   std::string zero("zero");
   // the char data has to be in GPU
-  thrust::device_vector<char> initmsg(zero.begin(), zero.end());
-  T init = T{initmsg.data().get(), int(initmsg.size())};
+  auto initmsg = cudf::detail::make_device_uvector_sync(zero);
+  T init       = T{initmsg.data(), int(initmsg.size())};
 
   // data and valid arrays
   std::vector<std::string> host_values(
@@ -60,9 +61,7 @@ TEST_F(StringIteratorTest, string_view_null_iterator)
                  replaced_strings.begin(),
                  [zero](auto s, auto b) { return b ? s : zero; });
 
-  thrust::device_vector<char> dev_chars;
-  thrust::host_vector<T> replaced_array(host_values.size());
-  std::tie(dev_chars, replaced_array) = strings_to_string_views(replaced_strings);
+  auto [dev_chars, replaced_array] = strings_to_string_views(replaced_strings);
 
   // create a column with bool vector
   cudf::test::strings_column_wrapper w_col(
@@ -81,16 +80,14 @@ TEST_F(StringIteratorTest, string_view_no_null_iterator)
   // T init = T{"", 0};
   std::string zero("zero");
   // the char data has to be in GPU
-  thrust::device_vector<char> initmsg(zero.begin(), zero.end());
-  T init = T{initmsg.data().get(), int(initmsg.size())};
+  auto initmsg = cudf::detail::make_device_uvector_sync(zero);
+  T init       = T{initmsg.data(), int(initmsg.size())};
 
   // data array
   std::vector<std::string> host_values(
     {"one", "two", "three", "four", "five", "six", "eight", "nine"});
 
-  thrust::device_vector<char> dev_chars;
-  thrust::host_vector<T> all_array(host_values.size());
-  std::tie(dev_chars, all_array) = strings_to_string_views(host_values);
+  auto [dev_chars, all_array] = strings_to_string_views(host_values);
 
   // create a column with bool vector
   cudf::test::strings_column_wrapper w_col(host_values.begin(), host_values.end());
@@ -107,15 +104,13 @@ TEST_F(StringIteratorTest, string_scalar_iterator)
   // T init = T{"", 0};
   std::string zero("zero");
   // the char data has to be in GPU
-  thrust::device_vector<char> initmsg(zero.begin(), zero.end());
-  T init = T{initmsg.data().get(), int(initmsg.size())};
+  auto initmsg = cudf::detail::make_device_uvector_sync(zero);
+  T init       = T{initmsg.data(), int(initmsg.size())};
 
   // data array
   std::vector<std::string> host_values(100, zero);
 
-  thrust::device_vector<char> dev_chars;
-  thrust::host_vector<T> all_array(host_values.size());
-  std::tie(dev_chars, all_array) = strings_to_string_views(host_values);
+  auto [dev_chars, all_array] = strings_to_string_views(host_values);
 
   // calculate the expected value by CPU.
   thrust::host_vector<thrust::pair<T, bool>> value_and_validity(host_values.size());
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index fce9e77dc55..9c66ccd4623 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -27,7 +27,6 @@
 #include <cudf/types.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
-#include <thrust/device_vector.h>
 #include <thrust/iterator/counting_iterator.h>
 
 #include <iostream>
diff --git a/cpp/tests/replace/clamp_test.cpp b/cpp/tests/replace/clamp_test.cpp
index 47599035709..499745c7dc4 100644
--- a/cpp/tests/replace/clamp_test.cpp
+++ b/cpp/tests/replace/clamp_test.cpp
@@ -135,8 +135,8 @@ TEST_F(ClampEmptyCaseTest, EmptyInput)
 
 template <class T>
 struct ClampTestNumeric : public cudf::test::BaseFixture {
-  std::unique_ptr<cudf::column> run_clamp(std::vector<T> input,
-                                          std::vector<cudf::size_type> input_validity,
+  std::unique_ptr<cudf::column> run_clamp(cudf::host_span<T const> input,
+                                          cudf::host_span<cudf::size_type const> input_validity,
                                           T lo,
                                           bool lo_validity,
                                           T hi,
diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp
index f6937c29d04..cd19b0a70f3 100644
--- a/cpp/tests/replace/replace_nulls_tests.cpp
+++ b/cpp/tests/replace/replace_nulls_tests.cpp
@@ -267,10 +267,11 @@ void ReplaceNullsScalar(cudf::test::fixed_width_column_wrapper<T> input,
 
 TYPED_TEST(ReplaceNullsTest, ReplaceColumn)
 {
-  std::vector<TypeParam> inputColumn =
+  auto const inputColumn =
     cudf::test::make_type_param_vector<TypeParam>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
-  std::vector<cudf::valid_type> inputValid{0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
-  std::vector<TypeParam> replacementColumn =
+  auto const inputValid =
+    cudf::test::make_type_param_vector<cudf::valid_type>({0, 0, 0, 0, 0, 1, 1, 1, 1, 1});
+  auto const replacementColumn =
     cudf::test::make_type_param_vector<TypeParam>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
 
   ReplaceNullsColumn<TypeParam>(cudf::test::fixed_width_column_wrapper<TypeParam>(
@@ -290,10 +291,11 @@ TYPED_TEST(ReplaceNullsTest, ReplaceColumn_Empty)
 
 TYPED_TEST(ReplaceNullsTest, ReplaceScalar)
 {
-  std::vector<TypeParam> inputColumn =
+  auto const inputColumn =
     cudf::test::make_type_param_vector<TypeParam>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
-  std::vector<cudf::valid_type> inputValid{0, 0, 0, 0, 0, 1, 1, 1, 1, 1};
-  std::vector<TypeParam> expectedColumn =
+  auto const inputValid =
+    cudf::test::make_type_param_vector<cudf::valid_type>({0, 0, 0, 0, 0, 1, 1, 1, 1, 1});
+  auto const expectedColumn =
     cudf::test::make_type_param_vector<TypeParam>({1, 1, 1, 1, 1, 5, 6, 7, 8, 9});
   cudf::numeric_scalar<TypeParam> replacement(1);
 
@@ -308,13 +310,16 @@ TYPED_TEST(ReplaceNullsTest, ReplacementHasNulls)
 {
   using T = TypeParam;
 
-  std::vector<T> input_column   = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
-  std::vector<T> replace_column = cudf::test::make_type_param_vector<T>({4, 5, 6, 7, 8, 9, 0, 1});
-  std::vector<T> result_column  = cudf::test::make_type_param_vector<T>({4, 5, 6, 3, 1, 2, 8, 4});
+  auto const input_column   = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
+  auto const replace_column = cudf::test::make_type_param_vector<T>({4, 5, 6, 7, 8, 9, 0, 1});
+  auto const result_column  = cudf::test::make_type_param_vector<T>({4, 5, 6, 3, 1, 2, 8, 4});
 
-  std::vector<cudf::valid_type> input_valid{0, 0, 1, 1, 1, 1, 1, 1};
-  std::vector<cudf::valid_type> replace_valid{1, 0, 1, 1, 1, 1, 1, 1};
-  std::vector<cudf::valid_type> result_valid{1, 0, 1, 1, 1, 1, 1, 1};
+  auto const input_valid =
+    cudf::test::make_type_param_vector<cudf::valid_type>({0, 0, 1, 1, 1, 1, 1, 1});
+  auto const replace_valid =
+    cudf::test::make_type_param_vector<cudf::valid_type>({1, 0, 1, 1, 1, 1, 1, 1});
+  auto const result_valid =
+    cudf::test::make_type_param_vector<cudf::valid_type>({1, 0, 1, 1, 1, 1, 1, 1});
 
   ReplaceNullsColumn<T>(cudf::test::fixed_width_column_wrapper<T>(
                           input_column.begin(), input_column.end(), input_valid.begin()),
@@ -375,11 +380,9 @@ void TestReplaceNullsWithPolicy(cudf::test::fixed_width_column_wrapper<T> input,
 
 TYPED_TEST(ReplaceNullsPolicyTest, PrecedingFill)
 {
-  std::vector<TypeParam> col =
-    cudf::test::make_type_param_vector<TypeParam>({42, 2, 1, -10, 20, -30});
-  std::vector<cudf::valid_type> mask =
-    cudf::test::make_type_param_vector<cudf::valid_type>({1, 0, 0, 1, 0, 1});
-  std::vector<TypeParam> expect_col =
+  auto const col  = cudf::test::make_type_param_vector<TypeParam>({42, 2, 1, -10, 20, -30});
+  auto const mask = cudf::test::make_type_param_vector<cudf::valid_type>({1, 0, 0, 1, 0, 1});
+  auto const expect_col =
     cudf::test::make_type_param_vector<TypeParam>({42, 42, 42, -10, -10, -30});
 
   TestReplaceNullsWithPolicy(
@@ -391,11 +394,9 @@ TYPED_TEST(ReplaceNullsPolicyTest, PrecedingFill)
 
 TYPED_TEST(ReplaceNullsPolicyTest, FollowingFill)
 {
-  std::vector<TypeParam> col =
-    cudf::test::make_type_param_vector<TypeParam>({42, 2, 1, -10, 20, -30});
-  std::vector<cudf::valid_type> mask =
-    cudf::test::make_type_param_vector<cudf::valid_type>({1, 0, 0, 1, 0, 1});
-  std::vector<TypeParam> expect_col =
+  auto const col  = cudf::test::make_type_param_vector<TypeParam>({42, 2, 1, -10, 20, -30});
+  auto const mask = cudf::test::make_type_param_vector<cudf::valid_type>({1, 0, 0, 1, 0, 1});
+  auto const expect_col =
     cudf::test::make_type_param_vector<TypeParam>({42, -10, -10, -10, -30, -30});
 
   TestReplaceNullsWithPolicy(
@@ -407,13 +408,10 @@ TYPED_TEST(ReplaceNullsPolicyTest, FollowingFill)
 
 TYPED_TEST(ReplaceNullsPolicyTest, PrecedingFillLeadingNulls)
 {
-  std::vector<TypeParam> col = cudf::test::make_type_param_vector<TypeParam>({1, 2, 3, 4, 5});
-  std::vector<cudf::valid_type> mask =
-    cudf::test::make_type_param_vector<cudf::valid_type>({0, 0, 1, 0, 1});
-  std::vector<TypeParam> expect_col =
-    cudf::test::make_type_param_vector<TypeParam>({1, 2, 3, 3, 5});
-  std::vector<cudf::valid_type> expect_mask =
-    cudf::test::make_type_param_vector<cudf::valid_type>({0, 0, 1, 1, 1});
+  auto const col         = cudf::test::make_type_param_vector<TypeParam>({1, 2, 3, 4, 5});
+  auto const mask        = cudf::test::make_type_param_vector<cudf::valid_type>({0, 0, 1, 0, 1});
+  auto const expect_col  = cudf::test::make_type_param_vector<TypeParam>({1, 2, 3, 3, 5});
+  auto const expect_mask = cudf::test::make_type_param_vector<cudf::valid_type>({0, 0, 1, 1, 1});
 
   TestReplaceNullsWithPolicy(
     cudf::test::fixed_width_column_wrapper<TypeParam>(col.begin(), col.end(), mask.begin()),
@@ -424,13 +422,10 @@ TYPED_TEST(ReplaceNullsPolicyTest, PrecedingFillLeadingNulls)
 
 TYPED_TEST(ReplaceNullsPolicyTest, FollowingFillTrailingNulls)
 {
-  std::vector<TypeParam> col = cudf::test::make_type_param_vector<TypeParam>({1, 2, 3, 4, 5});
-  std::vector<cudf::valid_type> mask =
-    cudf::test::make_type_param_vector<cudf::valid_type>({1, 0, 1, 0, 0});
-  std::vector<TypeParam> expect_col =
-    cudf::test::make_type_param_vector<TypeParam>({1, 3, 3, 4, 5});
-  std::vector<cudf::valid_type> expect_mask =
-    cudf::test::make_type_param_vector<cudf::valid_type>({1, 1, 1, 0, 0});
+  auto const col         = cudf::test::make_type_param_vector<TypeParam>({1, 2, 3, 4, 5});
+  auto const mask        = cudf::test::make_type_param_vector<cudf::valid_type>({1, 0, 1, 0, 0});
+  auto const expect_col  = cudf::test::make_type_param_vector<TypeParam>({1, 3, 3, 4, 5});
+  auto const expect_mask = cudf::test::make_type_param_vector<cudf::valid_type>({1, 1, 1, 0, 0});
 
   TestReplaceNullsWithPolicy(
     cudf::test::fixed_width_column_wrapper<TypeParam>(col.begin(), col.end(), mask.begin()),
diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp
index 32ddf11f16d..58ef08f6052 100644
--- a/cpp/tests/replace/replace_tests.cpp
+++ b/cpp/tests/replace/replace_tests.cpp
@@ -25,17 +25,17 @@
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/dictionary/encode.hpp>
+#include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/null_mask.hpp>
 #include <cudf/replace.hpp>
-#include "cudf/fixed_point/fixed_point.hpp"
 
-#include <thrust/device_vector.h>
 #include <thrust/iterator/transform_iterator.h>
 
 #include <gtest/gtest.h>
 #include <cstdlib>
 #include <iostream>
 #include <vector>
+#include "cudf/types.hpp"
 
 struct ReplaceErrorTest : public cudf::test::BaseFixture {
 };
@@ -315,13 +315,12 @@ struct ReplaceTest : cudf::test::BaseFixture {
  * @param print Optionally print the set of columns for debug
  */
 template <typename T>
-void test_replace(
-  std::vector<T> const& input_column,
-  std::vector<T> const& values_to_replace_column,
-  std::vector<T> const& replacement_values_column,
-  std::vector<cudf::valid_type> const& input_column_valid       = std::vector<cudf::valid_type>{},
-  std::vector<cudf::valid_type> const& replacement_values_valid = std::vector<cudf::valid_type>{},
-  bool print                                                    = false)
+void test_replace(cudf::host_span<T const> input_column,
+                  cudf::host_span<T const> values_to_replace_column,
+                  cudf::host_span<T const> replacement_values_column,
+                  cudf::host_span<cudf::valid_type const> input_column_valid       = {},
+                  cudf::host_span<cudf::valid_type const> replacement_values_valid = {},
+                  bool print                                                       = false)
 {
   cudf::test::fixed_width_column_wrapper<T> _input_column(input_column.begin(), input_column.end());
   if (input_column_valid.size() > 0) {
@@ -346,9 +345,10 @@ void test_replace(
                     _input_column, _values_to_replace_column, _replacement_values_column));
 
   /* computing the expected result */
-  std::vector<T> reference_result(input_column);
-  std::vector<bool> isReplaced(reference_result.size(), false);
-  std::vector<cudf::valid_type> expected_valid(input_column_valid);
+  thrust::host_vector<T> reference_result(input_column.begin(), input_column.end());
+  thrust::host_vector<bool> isReplaced(reference_result.size(), false);
+  thrust::host_vector<cudf::valid_type> expected_valid(input_column_valid.begin(),
+                                                       input_column_valid.end());
   if (replacement_values_valid.size() > 0 && 0 == input_column_valid.size()) {
     expected_valid.assign(input_column.size(), true);
   }
@@ -396,10 +396,10 @@ TYPED_TEST_CASE(ReplaceTest, Types);
 // Simple test, replacing all even replacement_values_column
 TYPED_TEST(ReplaceTest, ReplaceEvenPosition)
 {
-  using T                     = TypeParam;
-  std::vector<T> input_column = cudf::test::make_type_param_vector<T>({1, 2, 3, 4, 5, 6, 7, 8});
-  std::vector<T> values_to_replace_column  = cudf::test::make_type_param_vector<T>({2, 6, 4, 8});
-  std::vector<T> replacement_values_column = cudf::test::make_type_param_vector<T>({0, 4, 2, 6});
+  using T                 = TypeParam;
+  auto const input_column = cudf::test::make_type_param_vector<T>({1, 2, 3, 4, 5, 6, 7, 8});
+  auto const values_to_replace_column  = cudf::test::make_type_param_vector<T>({2, 6, 4, 8});
+  auto const replacement_values_column = cudf::test::make_type_param_vector<T>({0, 4, 2, 6});
 
   test_replace<T>(input_column, values_to_replace_column, replacement_values_column);
 }
@@ -407,10 +407,10 @@ TYPED_TEST(ReplaceTest, ReplaceEvenPosition)
 // Similar test as ReplaceEvenPosition, but with unordered data
 TYPED_TEST(ReplaceTest, Unordered)
 {
-  using T                     = TypeParam;
-  std::vector<T> input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
-  std::vector<T> values_to_replace_column  = cudf::test::make_type_param_vector<T>({2, 6, 4, 8});
-  std::vector<T> replacement_values_column = cudf::test::make_type_param_vector<T>({0, 4, 2, 6});
+  using T                 = TypeParam;
+  auto const input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
+  auto const values_to_replace_column  = cudf::test::make_type_param_vector<T>({2, 6, 4, 8});
+  auto const replacement_values_column = cudf::test::make_type_param_vector<T>({0, 4, 2, 6});
 
   test_replace<T>(input_column, values_to_replace_column, replacement_values_column);
 }
@@ -418,10 +418,10 @@ TYPED_TEST(ReplaceTest, Unordered)
 // Testing with Nothing To Replace
 TYPED_TEST(ReplaceTest, NothingToReplace)
 {
-  using T                     = TypeParam;
-  std::vector<T> input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
-  std::vector<T> values_to_replace_column  = cudf::test::make_type_param_vector<T>({10, 11, 12});
-  std::vector<T> replacement_values_column = cudf::test::make_type_param_vector<T>({15, 16, 17});
+  using T                 = TypeParam;
+  auto const input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
+  auto const values_to_replace_column  = cudf::test::make_type_param_vector<T>({10, 11, 12});
+  auto const replacement_values_column = cudf::test::make_type_param_vector<T>({15, 16, 17});
 
   test_replace<T>(input_column, values_to_replace_column, replacement_values_column);
 }
@@ -430,9 +430,9 @@ TYPED_TEST(ReplaceTest, NothingToReplace)
 TYPED_TEST(ReplaceTest, EmptyData)
 {
   using T = TypeParam;
-  std::vector<T> input_column{{}};
-  std::vector<T> values_to_replace_column  = cudf::test::make_type_param_vector<T>({10, 11, 12});
-  std::vector<T> replacement_values_column = cudf::test::make_type_param_vector<T>({15, 16, 17});
+  thrust::host_vector<T> input_column{{}};
+  auto const values_to_replace_column  = cudf::test::make_type_param_vector<T>({10, 11, 12});
+  auto const replacement_values_column = cudf::test::make_type_param_vector<T>({15, 16, 17});
 
   test_replace<T>(input_column, values_to_replace_column, replacement_values_column);
 }
@@ -440,10 +440,10 @@ TYPED_TEST(ReplaceTest, EmptyData)
 // Testing with empty Replace
 TYPED_TEST(ReplaceTest, EmptyReplace)
 {
-  using T                     = TypeParam;
-  std::vector<T> input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
-  std::vector<T> values_to_replace_column{};
-  std::vector<T> replacement_values_column{};
+  using T                 = TypeParam;
+  auto const input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
+  thrust::host_vector<T> values_to_replace_column{};
+  thrust::host_vector<T> replacement_values_column{};
 
   test_replace<T>(input_column, values_to_replace_column, replacement_values_column);
 }
@@ -451,11 +451,12 @@ TYPED_TEST(ReplaceTest, EmptyReplace)
 // Testing with input column containing nulls
 TYPED_TEST(ReplaceTest, NullsInData)
 {
-  using T                     = TypeParam;
-  std::vector<T> input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
-  std::vector<cudf::valid_type> input_column_valid{1, 1, 1, 0, 0, 1, 1, 1};
-  std::vector<T> values_to_replace_column  = cudf::test::make_type_param_vector<T>({2, 6, 4, 8});
-  std::vector<T> replacement_values_column = cudf::test::make_type_param_vector<T>({0, 4, 2, 6});
+  using T                 = TypeParam;
+  auto const input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
+  auto const input_column_valid =
+    cudf::test::make_type_param_vector<cudf::valid_type>({1, 1, 1, 0, 0, 1, 1, 1});
+  auto const values_to_replace_column  = cudf::test::make_type_param_vector<T>({2, 6, 4, 8});
+  auto const replacement_values_column = cudf::test::make_type_param_vector<T>({0, 4, 2, 6});
 
   test_replace<T>(
     input_column, values_to_replace_column, replacement_values_column, input_column_valid);
@@ -464,11 +465,12 @@ TYPED_TEST(ReplaceTest, NullsInData)
 // Testing with replacement column containing nulls
 TYPED_TEST(ReplaceTest, NullsInNewValues)
 {
-  using T                     = TypeParam;
-  std::vector<T> input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
-  std::vector<T> values_to_replace_column  = cudf::test::make_type_param_vector<T>({2, 6, 4, 8});
-  std::vector<T> replacement_values_column = cudf::test::make_type_param_vector<T>({0, 4, 2, 6});
-  std::vector<cudf::valid_type> replacement_values_valid{0, 1, 1, 1};
+  using T                 = TypeParam;
+  auto const input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
+  auto const values_to_replace_column  = cudf::test::make_type_param_vector<T>({2, 6, 4, 8});
+  auto const replacement_values_column = cudf::test::make_type_param_vector<T>({0, 4, 2, 6});
+  auto const replacement_values_valid =
+    cudf::test::make_type_param_vector<cudf::valid_type>({0, 1, 1, 1});
 
   test_replace<TypeParam>(input_column,
                           values_to_replace_column,
@@ -480,12 +482,14 @@ TYPED_TEST(ReplaceTest, NullsInNewValues)
 // Testing with both replacement and input column containing nulls
 TYPED_TEST(ReplaceTest, NullsInBoth)
 {
-  using T                     = TypeParam;
-  std::vector<T> input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
-  std::vector<cudf::valid_type> input_column_valid{1, 1, 1, 0, 0, 1, 1, 1};
-  std::vector<T> values_to_replace_column  = cudf::test::make_type_param_vector<T>({2, 6, 4, 8});
-  std::vector<T> replacement_values_column = cudf::test::make_type_param_vector<T>({0, 4, 2, 6});
-  std::vector<cudf::valid_type> replacement_values_valid{1, 1, 0, 1};
+  using T                 = TypeParam;
+  auto const input_column = cudf::test::make_type_param_vector<T>({7, 5, 6, 3, 1, 2, 8, 4});
+  auto const input_column_valid =
+    cudf::test::make_type_param_vector<cudf::valid_type>({1, 1, 1, 0, 0, 1, 1, 1});
+  auto const values_to_replace_column  = cudf::test::make_type_param_vector<T>({2, 6, 4, 8});
+  auto const replacement_values_column = cudf::test::make_type_param_vector<T>({0, 4, 2, 6});
+  auto const replacement_values_valid =
+    cudf::test::make_type_param_vector<cudf::valid_type>({1, 1, 0, 1});
 
   test_replace<TypeParam>(input_column,
                           values_to_replace_column,
diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp
index 33171b269ce..c22acf6b022 100644
--- a/cpp/tests/rolling/rolling_test.cpp
+++ b/cpp/tests/rolling/rolling_test.cpp
@@ -603,8 +603,7 @@ TYPED_TEST_CASE(RollingTest, cudf::test::FixedWidthTypesWithoutFixedPoint);
 TYPED_TEST(RollingTest, SimpleStatic)
 {
   // https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rolling.html
-  const std::vector<TypeParam> col_data =
-    cudf::test::make_type_param_vector<TypeParam>({0, 1, 2, 0, 4});
+  auto const col_data              = cudf::test::make_type_param_vector<TypeParam>({0, 1, 2, 0, 4});
   const std::vector<bool> col_mask = {1, 1, 1, 0, 1};
 
   fixed_width_column_wrapper<TypeParam> input(col_data.begin(), col_data.end(), col_mask.begin());
@@ -632,8 +631,7 @@ TYPED_TEST(RollingTest, NegativeWindowSizes)
 TYPED_TEST(RollingTest, SimpleDynamic)
 {
   // https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rolling.html
-  const std::vector<TypeParam> col_data =
-    cudf::test::make_type_param_vector<TypeParam>({0, 1, 2, 0, 4});
+  auto const col_data              = cudf::test::make_type_param_vector<TypeParam>({0, 1, 2, 0, 4});
   const std::vector<bool> col_mask = {1, 1, 1, 0, 1};
 
   fixed_width_column_wrapper<TypeParam> input(col_data.begin(), col_data.end(), col_mask.begin());
@@ -647,8 +645,7 @@ TYPED_TEST(RollingTest, SimpleDynamic)
 // this is a special test to check the volatile count variable issue (see rolling.cu for detail)
 TYPED_TEST(RollingTest, VolatileCount)
 {
-  const std::vector<TypeParam> col_data =
-    cudf::test::make_type_param_vector<TypeParam>({8, 70, 45, 20, 59, 80});
+  auto const col_data = cudf::test::make_type_param_vector<TypeParam>({8, 70, 45, 20, 59, 80});
   const std::vector<bool> col_mask = {1, 1, 0, 0, 1, 0};
 
   fixed_width_column_wrapper<TypeParam> input(col_data.begin(), col_data.end(), col_mask.begin());
diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu
index c501071ccbe..d0b6b0db44a 100644
--- a/cpp/tests/scalar/scalar_device_view_test.cu
+++ b/cpp/tests/scalar/scalar_device_view_test.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
 #include <cudf/strings/string_view.cuh>
@@ -24,8 +25,6 @@
 #include <cudf_test/type_list_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include <rmm/device_vector.hpp>
-
 #include <thrust/sequence.h>
 #include <random>
 
@@ -126,10 +125,9 @@ TEST_F(StringScalarDeviceViewTest, Value)
 
   auto scalar_device_view = cudf::get_scalar_device_view(s);
   rmm::device_scalar<bool> result;
-  rmm::device_vector<char> value_v(value.begin(), value.end());
+  auto value_v = cudf::detail::make_device_uvector_sync(value);
 
-  test_string_value<<<1, 1>>>(
-    scalar_device_view, value_v.data().get(), value.size(), result.data());
+  test_string_value<<<1, 1>>>(scalar_device_view, value_v.data(), value.size(), result.data());
   CHECK_CUDA(0);
 
   EXPECT_TRUE(result.value());
diff --git a/cpp/tests/stream_compaction/drop_duplicates_tests.cpp b/cpp/tests/stream_compaction/drop_duplicates_tests.cpp
index ee4a4df38e8..373cd50fb1f 100644
--- a/cpp/tests/stream_compaction/drop_duplicates_tests.cpp
+++ b/cpp/tests/stream_compaction/drop_duplicates_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,7 +41,7 @@ TYPED_TEST(DistinctCountCommon, NoNull)
 {
   using T = TypeParam;
 
-  std::vector<T> input = cudf::test::make_type_param_vector<T>(
+  auto const input = cudf::test::make_type_param_vector<T>(
     {1, 3, 3, 4, 31, 1, 8, 2, 0, 4, 1, 4, 10, 40, 31, 42, 0, 42, 8, 5, 4});
 
   cudf::test::fixed_width_column_wrapper<T> input_col(input.begin(), input.end());
@@ -55,9 +55,9 @@ TYPED_TEST(DistinctCountCommon, TableNoNull)
 {
   using T = TypeParam;
 
-  std::vector<T> input1 = cudf::test::make_type_param_vector<T>(
+  auto const input1 = cudf::test::make_type_param_vector<T>(
     {1, 3, 3, 4, 31, 1, 8, 2, 0, 4, 1, 4, 10, 40, 31, 42, 0, 42, 8, 5, 4});
-  std::vector<T> input2 = cudf::test::make_type_param_vector<T>(
+  auto const input2 = cudf::test::make_type_param_vector<T>(
     {3, 3, 4, 31, 1, 8, 5, 0, 4, 1, 4, 10, 40, 31, 42, 0, 42, 8, 5, 4, 1});
 
   std::vector<std::pair<T, T>> pair_input;
diff --git a/cpp/tests/strings/array_tests.cu b/cpp/tests/strings/array_tests.cu
index 2d1ae1a862d..a4d8ecb2bec 100644
--- a/cpp/tests/strings/array_tests.cu
+++ b/cpp/tests/strings/array_tests.cu
@@ -14,10 +14,16 @@
  * limitations under the License.
  */
 
+#include <tests/strings/utilities.h>
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
 #include <cudf/detail/iterator.cuh>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/sorting.hpp>
 #include <cudf/strings/detail/copying.hpp>
@@ -27,10 +33,7 @@
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/error.hpp>
 
-#include <tests/strings/utilities.h>
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
+#include <rmm/cuda_stream_view.hpp>
 
 #include <thrust/iterator/constant_iterator.h>
 
@@ -192,9 +195,8 @@ TEST_F(StringsColumnTest, Scatter)
     thrust::make_transform_iterator(h_strings2.begin(), [](auto str) { return str != nullptr; }));
   auto source = cudf::strings_column_view(strings2);
 
-  rmm::device_vector<int32_t> scatter_map;
-  scatter_map.push_back(4);
-  scatter_map.push_back(1);
+  std::vector<int32_t> h_scatter_map({4, 1});
+  auto scatter_map = cudf::detail::make_device_uvector_sync(h_scatter_map);
 
   auto source_column = cudf::column_device_view::create(source.parent());
   auto begin =
@@ -220,9 +222,8 @@ TEST_F(StringsColumnTest, ScatterScalar)
     thrust::make_transform_iterator(h_strings1.begin(), [](auto str) { return str != nullptr; }));
   auto target = cudf::strings_column_view(strings1);
 
-  rmm::device_vector<int32_t> scatter_map;
-  scatter_map.push_back(0);
-  scatter_map.push_back(5);
+  std::vector<int32_t> h_scatter_map({0, 5});
+  auto scatter_map = cudf::detail::make_device_uvector_sync(h_scatter_map);
 
   cudf::string_scalar scalar("__");
   auto begin = thrust::make_constant_iterator(cudf::string_view(scalar.data(), scalar.size()));
@@ -246,7 +247,7 @@ TEST_F(StringsColumnTest, ScatterZeroSizeStringsColumn)
   cudf::column_view values(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
   auto target = cudf::strings_column_view(values);
 
-  rmm::device_vector<int32_t> scatter_map;
+  rmm::device_uvector<int32_t> scatter_map(0, rmm::cuda_stream_default);
   cudf::string_scalar scalar("");
   auto begin = thrust::make_constant_iterator(cudf::string_view(scalar.data(), scalar.size()));
 
diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu
index be592478b13..854194d13c8 100644
--- a/cpp/tests/strings/factories_test.cu
+++ b/cpp/tests/strings/factories_test.cu
@@ -28,6 +28,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 
+#include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/execution_policy.h>
@@ -55,7 +56,7 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair)
     memsize += *itr ? (cudf::size_type)strlen(*itr) : 0;
   cudf::size_type count = (cudf::size_type)h_test_strings.size();
   thrust::host_vector<char> h_buffer(memsize);
-  thrust::device_vector<char> d_buffer(memsize);
+  rmm::device_uvector<char> d_buffer(memsize, rmm::cuda_stream_default);
   thrust::host_vector<thrust::pair<const char*, cudf::size_type>> strings(count);
   thrust::host_vector<cudf::size_type> h_offsets(count + 1);
   cudf::size_type offset = 0;
@@ -69,14 +70,13 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair)
     } else {
       cudf::size_type length = (cudf::size_type)strlen(str);
       memcpy(h_buffer.data() + offset, str, length);
-      strings[idx] =
-        thrust::pair<const char*, cudf::size_type>{d_buffer.data().get() + offset, length};
+      strings[idx] = thrust::pair<const char*, cudf::size_type>{d_buffer.data() + offset, length};
       offset += length;
     }
     h_offsets[idx + 1] = offset;
   }
-  rmm::device_vector<thrust::pair<const char*, cudf::size_type>> d_strings(strings);
-  CUDA_TRY(cudaMemcpy(d_buffer.data().get(), h_buffer.data(), memsize, cudaMemcpyHostToDevice));
+  auto d_strings = cudf::detail::make_device_uvector_sync(strings);
+  CUDA_TRY(cudaMemcpy(d_buffer.data(), h_buffer.data(), memsize, cudaMemcpyHostToDevice));
   auto column = cudf::make_strings_column(d_strings);
   EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING});
   EXPECT_EQ(column->null_count(), nulls);
@@ -133,11 +133,12 @@ TEST_F(StringsFactoriesTest, CreateColumnFromOffsets)
       null_count++;
     h_offsets[idx + 1] = offset;
   }
+
   std::vector<cudf::bitmask_type> h_nulls{h_null_mask};
-  rmm::device_vector<char> d_buffer(h_buffer);
-  rmm::device_vector<cudf::size_type> d_offsets(h_offsets);
-  rmm::device_vector<cudf::bitmask_type> d_nulls(h_nulls);
-  auto column = cudf::make_strings_column(d_buffer, d_offsets, d_nulls, null_count);
+  auto d_buffer  = cudf::detail::make_device_uvector_sync(h_buffer);
+  auto d_offsets = cudf::detail::make_device_uvector_sync(h_offsets);
+  auto d_nulls   = cudf::detail::make_device_uvector_sync(h_nulls);
+  auto column    = cudf::make_strings_column(d_buffer, d_offsets, d_nulls, null_count);
   EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING});
   EXPECT_EQ(column->null_count(), null_count);
   EXPECT_EQ(2, column->num_children());
@@ -169,14 +170,15 @@ TEST_F(StringsFactoriesTest, CreateScalar)
 
 TEST_F(StringsFactoriesTest, EmptyStringsColumn)
 {
-  rmm::device_vector<char> d_chars;
-  rmm::device_vector<cudf::size_type> d_offsets(1, 0);
-  rmm::device_vector<cudf::bitmask_type> d_nulls;
+  rmm::device_uvector<char> d_chars{0, rmm::cuda_stream_default};
+  auto d_offsets = cudf::detail::make_zeroed_device_uvector_sync<cudf::size_type>(1);
+  rmm::device_uvector<cudf::bitmask_type> d_nulls{0, rmm::cuda_stream_default};
 
   auto results = cudf::make_strings_column(d_chars, d_offsets, d_nulls, 0);
   cudf::test::expect_strings_empty(results->view());
 
-  rmm::device_vector<thrust::pair<const char*, cudf::size_type>> d_strings;
+  rmm::device_uvector<thrust::pair<const char*, cudf::size_type>> d_strings{
+    0, rmm::cuda_stream_default};
   results = cudf::make_strings_column(d_strings);
   cudf::test::expect_strings_empty(results->view());
 }
@@ -224,7 +226,7 @@ TEST_F(StringsFactoriesTest, StringPairWithNullsAndEmpty)
     {0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1});
 
   auto d_column = cudf::column_device_view::create(data);
-  rmm::device_vector<string_pair> pairs(d_column->size());
+  rmm::device_uvector<string_pair> pairs(d_column->size(), rmm::cuda_stream_default);
   thrust::transform(thrust::device,
                     d_column->pair_begin<cudf::string_view, true>(),
                     d_column->pair_end<cudf::string_view, true>(),
diff --git a/cpp/tests/strings/hash_string.cu b/cpp/tests/strings/hash_string.cu
index 629c02a989e..023d648cfdf 100644
--- a/cpp/tests/strings/hash_string.cu
+++ b/cpp/tests/strings/hash_string.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,21 @@
  * limitations under the License.
  */
 
+#include "./utilities.h"
+#include "rmm/exec_policy.hpp"
+
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include "./utilities.h"
 
-#include <thrust/execution_policy.h>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
 #include <thrust/transform.h>
 #include <vector>
 
@@ -58,8 +64,8 @@ TEST_F(StringsHashTest, HashTest)
   auto strings_column = cudf::column_device_view::create(strings_view.parent());
   auto d_view         = *strings_column;
 
-  thrust::device_vector<uint32_t> d_values(strings_view.size());
-  thrust::transform(thrust::device,
+  rmm::device_uvector<uint32_t> d_values(strings_view.size(), rmm::cuda_stream_default);
+  thrust::transform(rmm::exec_policy(),
                     thrust::make_counting_iterator<uint32_t>(0),
                     thrust::make_counting_iterator<uint32_t>(strings_view.size()),
                     d_values.begin(),
@@ -67,6 +73,6 @@ TEST_F(StringsHashTest, HashTest)
 
   uint32_t h_expected[] = {
     2739798893, 2739798893, 3506676360, 1891213601, 3778137224, 0, 0, 1551088011};
-  thrust::host_vector<uint32_t> h_values(d_values);
+  auto h_values = cudf::detail::make_host_vector_sync(d_values);
   for (uint32_t idx = 0; idx < h_values.size(); ++idx) EXPECT_EQ(h_values[idx], h_expected[idx]);
 }
diff --git a/cpp/tests/strings/integers_tests.cu b/cpp/tests/strings/integers_tests.cpp
similarity index 96%
rename from cpp/tests/strings/integers_tests.cu
rename to cpp/tests/strings/integers_tests.cpp
index f15116ae4c2..d5f17954c50 100644
--- a/cpp/tests/strings/integers_tests.cu
+++ b/cpp/tests/strings/integers_tests.cpp
@@ -18,11 +18,15 @@
 #include <cudf/strings/strings_column_view.hpp>
 
 #include <tests/strings/utilities.h>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
 #include <string>
 #include <vector>
 
@@ -287,16 +291,16 @@ TYPED_TEST_CASE(StringsIntegerConvertTest, cudf::test::IntegralTypesNotBool);
 
 TYPED_TEST(StringsIntegerConvertTest, FromToInteger)
 {
-  thrust::device_vector<TypeParam> d_integers(255);
-  thrust::sequence(
-    thrust::device, d_integers.begin(), d_integers.end(), -(TypeParam)(d_integers.size() / 2));
-  d_integers.push_back(std::numeric_limits<TypeParam>::min());
-  d_integers.push_back(std::numeric_limits<TypeParam>::max());
+  thrust::host_vector<TypeParam> h_integers(255);
+  std::iota(h_integers.begin(), h_integers.end(), -(TypeParam)(h_integers.size() / 2));
+  h_integers.push_back(std::numeric_limits<TypeParam>::min());
+  h_integers.push_back(std::numeric_limits<TypeParam>::max());
+  auto d_integers    = cudf::detail::make_device_uvector_sync(h_integers);
   auto integers      = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id<TypeParam>()},
                                             (cudf::size_type)d_integers.size());
   auto integers_view = integers->mutable_view();
   CUDA_TRY(cudaMemcpy(integers_view.data<TypeParam>(),
-                      d_integers.data().get(),
+                      d_integers.data(),
                       d_integers.size() * sizeof(TypeParam),
                       cudaMemcpyDeviceToDevice));
   integers_view.set_null_count(0);
@@ -304,7 +308,8 @@ TYPED_TEST(StringsIntegerConvertTest, FromToInteger)
   // convert to strings
   auto results_strings = cudf::strings::from_integers(integers->view());
 
-  thrust::host_vector<TypeParam> h_integers(d_integers);
+  // copy back to host
+  h_integers = cudf::detail::make_host_vector_sync(d_integers);
   std::vector<std::string> h_strings;
   for (auto itr = h_integers.begin(); itr != h_integers.end(); ++itr)
     h_strings.push_back(std::to_string(*itr));
diff --git a/cpp/tests/table/table_view_tests.cu b/cpp/tests/table/table_view_tests.cu
index d700892de78..1fb4b88c79e 100644
--- a/cpp/tests/table/table_view_tests.cu
+++ b/cpp/tests/table/table_view_tests.cu
@@ -20,6 +20,7 @@
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/column/column_view.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
@@ -42,10 +43,10 @@ void row_comparison(cudf::table_view input1,
 
   auto device_table_1 = cudf::table_device_view::create(input1, stream);
   auto device_table_2 = cudf::table_device_view::create(input2, stream);
-  rmm::device_vector<cudf::order> d_column_order(column_order);
+  auto d_column_order = cudf::detail::make_device_uvector_sync(column_order);
 
   auto comparator = cudf::row_lexicographic_comparator<false>(
-    *device_table_1, *device_table_2, d_column_order.data().get());
+    *device_table_1, *device_table_2, d_column_order.data());
 
   thrust::transform(rmm::exec_policy(stream),
                     thrust::make_counting_iterator(0),
diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu
index 313113a58e0..1431710f3ca 100644
--- a/cpp/tests/transform/row_bit_count_test.cu
+++ b/cpp/tests/transform/row_bit_count_test.cu
@@ -45,7 +45,7 @@ TYPED_TEST(RowBitCountTyped, SimpleTypes)
   // expect size of the type per row
   auto expected = make_fixed_width_column(data_type{type_id::INT32}, 16);
   cudf::mutable_column_view mcv(*expected);
-  thrust::fill(rmm::exec_policy(rmm::cuda_stream_default),
+  thrust::fill(rmm::exec_policy(),
                mcv.begin<size_type>(),
                mcv.end<size_type>(),
                sizeof(device_storage_type_t<T>) * CHAR_BIT);
@@ -68,7 +68,7 @@ TYPED_TEST(RowBitCountTyped, SimpleTypesWithNulls)
   // expect size of the type + 1 bit per row
   auto expected = make_fixed_width_column(data_type{type_id::INT32}, 16);
   cudf::mutable_column_view mcv(*expected);
-  thrust::fill(rmm::exec_policy(rmm::cuda_stream_default),
+  thrust::fill(rmm::exec_policy(),
                mcv.begin<size_type>(),
                mcv.end<size_type>(),
                (sizeof(device_storage_type_t<T>) * CHAR_BIT) + 1);
@@ -488,7 +488,7 @@ TEST_F(RowBitCount, Table)
   auto expected   = cudf::make_fixed_width_column(data_type{type_id::INT32}, t.num_rows());
   cudf::mutable_column_view mcv(*expected);
   thrust::transform(
-    rmm::exec_policy(rmm::cuda_stream_default),
+    rmm::exec_policy(),
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(0) + t.num_rows(),
     mcv.begin<size_type>(),
@@ -591,4 +591,4 @@ TEST_F(RowBitCount, EmptyTable)
     auto result = cudf::row_bit_count(empty);
     CUDF_EXPECTS(result != nullptr && result->size() == 0, "Expected an empty column");
   }
-}
\ No newline at end of file
+}
diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu
index 3924fa1ac19..bc690e04f21 100644
--- a/cpp/tests/types/type_dispatcher_test.cu
+++ b/cpp/tests/types/type_dispatcher_test.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 #include <cudf_test/base_fixture.hpp>
@@ -21,7 +22,8 @@
 #include <cudf_test/type_list_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include <thrust/device_vector.h>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
 
 struct DispatcherTest : public cudf::test::BaseFixture {
 };
@@ -67,10 +69,10 @@ __global__ void dispatch_test_kernel(cudf::type_id id, bool* d_result)
 
 TYPED_TEST(TypedDispatcherTest, DeviceDispatch)
 {
-  thrust::device_vector<bool> result(1, false);
-  dispatch_test_kernel<<<1, 1>>>(cudf::type_to_id<TypeParam>(), result.data().get());
+  auto result = cudf::detail::make_zeroed_device_uvector_sync<bool>(1);
+  dispatch_test_kernel<<<1, 1>>>(cudf::type_to_id<TypeParam>(), result.data());
   CUDA_TRY(cudaDeviceSynchronize());
-  EXPECT_EQ(true, result[0]);
+  EXPECT_EQ(true, result.front_element(rmm::cuda_stream_default));
 }
 
 struct IdDispatcherTest : public DispatcherTest, public testing::WithParamInterface<cudf::type_id> {
@@ -128,11 +130,11 @@ __global__ void double_dispatch_test_kernel(cudf::type_id id1, cudf::type_id id2
 
 TYPED_TEST(TypedDoubleDispatcherTest, DeviceDoubleDispatch)
 {
-  thrust::device_vector<bool> result(1, false);
+  auto result = cudf::detail::make_zeroed_device_uvector_sync<bool>(1);
   double_dispatch_test_kernel<<<1, 1>>>(
-    cudf::type_to_id<TypeParam>(), cudf::type_to_id<TypeParam>(), result.data().get());
+    cudf::type_to_id<TypeParam>(), cudf::type_to_id<TypeParam>(), result.data());
   CUDA_TRY(cudaDeviceSynchronize());
-  EXPECT_EQ(true, result[0]);
+  EXPECT_EQ(true, result.front_element(rmm::cuda_stream_default));
 }
 
 struct IdDoubleDispatcherTest : public DispatcherTest,
diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu
index f17446ca1dc..8aac7370b13 100644
--- a/cpp/tests/utilities/column_utilities.cu
+++ b/cpp/tests/utilities/column_utilities.cu
@@ -42,6 +42,8 @@
 
 #include <numeric>
 #include <sstream>
+#include "cudf/detail/utilities/vector_factories.hpp"
+#include "rmm/cuda_stream_view.hpp"
 
 namespace cudf {
 namespace test {
@@ -170,7 +172,7 @@ class corresponding_rows_not_equivalent {
 };
 
 // Stringify the inconsistent values resulted from the comparison of two columns element-wise
-std::string stringify_column_differences(thrust::device_vector<int> const& differences,
+std::string stringify_column_differences(cudf::device_span<int const> differences,
                                          column_view const& lhs,
                                          column_view const& rhs,
                                          bool print_all_differences,
@@ -182,10 +184,10 @@ std::string stringify_column_differences(thrust::device_vector<int> const& diffe
     std::ostringstream buffer;
     buffer << depth_str << "differences:" << std::endl;
 
-    // thrust may crash if a device_vector is passed to fixed_width_column_wrapper,
+    // thrust may crash if a device vector is passed to fixed_width_column_wrapper,
     // thus we construct fixed_width_column_wrapper from a host_vector instead
-    thrust::host_vector<int> h_differences(differences);
-    auto source_table = cudf::table_view({lhs, rhs});
+    auto h_differences = cudf::detail::make_host_vector_sync(differences);
+    auto source_table  = cudf::table_view({lhs, rhs});
     auto diff_column =
       fixed_width_column_wrapper<int32_t>(h_differences.begin(), h_differences.end());
     auto diff_table = cudf::gather(source_table, diff_column);
@@ -222,16 +224,18 @@ struct column_comparator_impl {
                                               corresponding_rows_unequal,
                                               corresponding_rows_not_equivalent>;
 
-    auto differences = thrust::device_vector<int>(lhs.size());  // worst case: everything different
-    auto diff_iter   = thrust::copy_if(thrust::device,
+    auto differences = rmm::device_uvector<int>(
+      lhs.size(), rmm::cuda_stream_default);  // worst case: everything different
+    auto diff_iter = thrust::copy_if(rmm::exec_policy(),
                                      thrust::make_counting_iterator(0),
                                      thrust::make_counting_iterator(lhs.size()),
                                      differences.begin(),
                                      ComparatorType(*d_lhs, *d_rhs));
 
-    differences.resize(thrust::distance(differences.begin(), diff_iter));  // shrink back down
+    differences.resize(thrust::distance(differences.begin(), diff_iter),
+                       rmm::cuda_stream_default);  // shrink back down
 
-    if (not differences.empty())
+    if (not differences.is_empty())
       GTEST_FAIL() << stringify_column_differences(
         differences, lhs, rhs, print_all_differences, depth);
   }
@@ -256,7 +260,7 @@ struct column_comparator_impl<list_view, check_exact_equality> {
     if (lhs_l.is_empty()) { return; }
 
     // worst case - everything is different
-    thrust::device_vector<int> differences(lhs.size());
+    rmm::device_uvector<int> differences(lhs.size(), rmm::cuda_stream_default);
 
     // TODO : determine how equals/equivalency should work for columns with divergent underlying
     // data, but equivalent null masks. Example:
@@ -307,7 +311,7 @@ struct column_comparator_impl<list_view, check_exact_equality> {
       });
 
     auto diff_iter = thrust::copy_if(
-      thrust::device,
+      rmm::exec_policy(),
       thrust::make_counting_iterator(0),
       thrust::make_counting_iterator(lhs_l.size() + 1),
       differences.begin(),
@@ -323,9 +327,10 @@ struct column_comparator_impl<list_view, check_exact_equality> {
         return lhs_offsets[index] == rhs_offsets[index] ? false : true;
       });
 
-    differences.resize(thrust::distance(differences.begin(), diff_iter));  // shrink back down
+    differences.resize(thrust::distance(differences.begin(), diff_iter),
+                       rmm::cuda_stream_default);  // shrink back down
 
-    if (not differences.empty())
+    if (not differences.is_empty())
       GTEST_FAIL() << stringify_column_differences(
         differences, lhs, rhs, print_all_differences, depth);
 
@@ -522,7 +527,7 @@ std::string nested_offsets_to_string(NestedColumnView const& c, std::string cons
   // the first offset value to normalize everything against
   size_type first =
     cudf::detail::get_value<size_type>(offsets, c.offset(), rmm::cuda_stream_default);
-  rmm::device_vector<size_type> shifted_offsets(output_size);
+  rmm::device_uvector<size_type> shifted_offsets(output_size, rmm::cuda_stream_default);
 
   // normalize the offset values for the column offset
   size_type const* d_offsets = offsets.head<size_type>() + c.offset();
@@ -533,7 +538,7 @@ std::string nested_offsets_to_string(NestedColumnView const& c, std::string cons
     shifted_offsets.begin(),
     [first] __device__(int32_t offset) { return static_cast<size_type>(offset - first); });
 
-  thrust::host_vector<size_type> h_shifted_offsets(shifted_offsets);
+  auto const h_shifted_offsets = cudf::detail::make_host_vector_sync(shifted_offsets);
   std::ostringstream buffer;
   for (size_t idx = 0; idx < h_shifted_offsets.size(); idx++) {
     buffer << h_shifted_offsets[idx];
diff --git a/cpp/tests/wrappers/timestamps_test.cu b/cpp/tests/wrappers/timestamps_test.cu
index d1c0ad5d840..64d9ad6fc3f 100644
--- a/cpp/tests/wrappers/timestamps_test.cu
+++ b/cpp/tests/wrappers/timestamps_test.cu
@@ -29,6 +29,8 @@
 #include <cudf/wrappers/timestamps.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 template <typename T>
 struct ChronoColumnTest : public cudf::test::BaseFixture {
@@ -88,9 +90,10 @@ TYPED_TEST(ChronoColumnTest, ChronoDurationsMatchPrimitiveRepresentation)
   auto primitive_col =
     fixed_width_column_wrapper<Rep>(chrono_col_data.begin(), chrono_col_data.end());
 
-  thrust::device_vector<int32_t> indices(this->size());
-  thrust::sequence(indices.begin(), indices.end());
-  EXPECT_TRUE(thrust::all_of(indices.begin(),
+  rmm::device_uvector<int32_t> indices(this->size(), rmm::cuda_stream_default);
+  thrust::sequence(rmm::exec_policy(), indices.begin(), indices.end());
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(),
+                             indices.begin(),
                              indices.end(),
                              compare_chrono_elements_to_primitive_representation<T>{
                                *cudf::column_device_view::create(primitive_col),
@@ -141,10 +144,11 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
   auto chrono_rhs_col =
     generate_timestamps<T>(this->size(), time_point_ms(start_rhs), time_point_ms(stop_rhs_));
 
-  thrust::device_vector<int32_t> indices(this->size());
-  thrust::sequence(indices.begin(), indices.end());
+  rmm::device_uvector<int32_t> indices(this->size(), rmm::cuda_stream_default);
+  thrust::sequence(rmm::exec_policy(), indices.begin(), indices.end());
 
   EXPECT_TRUE(thrust::all_of(
+    rmm::exec_policy(),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::LESS,
@@ -152,6 +156,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
                                        *cudf::column_device_view::create(chrono_rhs_col)}));
 
   EXPECT_TRUE(thrust::all_of(
+    rmm::exec_policy(),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::GREATER,
@@ -159,6 +164,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
                                        *cudf::column_device_view::create(chrono_lhs_col)}));
 
   EXPECT_TRUE(thrust::all_of(
+    rmm::exec_policy(),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::LESS_EQUAL,
@@ -166,6 +172,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
                                        *cudf::column_device_view::create(chrono_lhs_col)}));
 
   EXPECT_TRUE(thrust::all_of(
+    rmm::exec_policy(),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::GREATER_EQUAL,