From 8d9d22231b983f0a85ce594dc9758ab6a6c09559 Mon Sep 17 00:00:00 2001
From: Paul Taylor <paul.e.taylor@me.com>
Date: Mon, 29 Nov 2021 09:26:03 -0600
Subject: [PATCH 01/28] [FIX] Add `arrow_dataset` and `parquet` targets to
 build exports (#9491)

This PR adds the `arrow_dataset` and `parquet` targets to the build export when Arrow is built from source by CPM, similar to what we have to do today for `arrow` and `arrow_cuda` targets.
---
 cpp/CMakeLists.txt                   | 21 ++++++
 cpp/cmake/thirdparty/get_arrow.cmake | 96 +++++++++++++++++++++++-----
 2 files changed, 100 insertions(+), 17 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 966728d7647..59dc3c74af2 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -733,6 +733,27 @@ set(install_code_string
     [=[
 set(ArrowCUDA_DIR "${Arrow_DIR}")
 find_dependency(ArrowCUDA)
+]=]
+)
+
+if(CUDF_ENABLE_ARROW_PARQUET)
+  string(
+    APPEND
+    install_code_string
+    [=[
+  if(NOT Parquet_DIR)
+    set(Parquet_DIR "${Arrow_DIR}")
+  endif()
+  set(ArrowDataset_DIR "${Arrow_DIR}")
+  find_dependency(ArrowDataset)
+  ]=]
+  )
+endif()
+
+string(
+  APPEND
+  install_code_string
+  [=[
 if(testing IN_LIST cudf_FIND_COMPONENTS)
   enable_language(CUDA)
   if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-dependencies.cmake")
diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
index 5fe37402096..ae1448da502 100644
--- a/cpp/cmake/thirdparty/get_arrow.cmake
+++ b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -90,7 +90,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
 
   rapids_cpm_find(
     Arrow ${VERSION}
-    GLOBAL_TARGETS arrow_shared arrow_cuda_shared
+    GLOBAL_TARGETS arrow_shared parquet_shared arrow_cuda_shared arrow_dataset_shared
     CPM_ARGS
     GIT_REPOSITORY https://github.com/apache/arrow.git
     GIT_TAG apache-arrow-${VERSION}
@@ -142,6 +142,15 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
       set(ArrowCUDA_DIR "${Arrow_DIR}")
       find_package(Arrow REQUIRED QUIET)
       find_package(ArrowCUDA REQUIRED QUIET)
+      if(ENABLE_PARQUET)
+        if(NOT Parquet_DIR)
+          # Set this to enable `find_package(Parquet)`
+          set(Parquet_DIR "${Arrow_DIR}")
+        endif()
+        # Set this to enable `find_package(ArrowDataset)`
+        set(ArrowDataset_DIR "${Arrow_DIR}")
+        find_package(ArrowDataset REQUIRED QUIET)
+      endif()
     elseif(Arrow_ADDED)
       # Copy these files so we can avoid adding paths in Arrow_BINARY_DIR to
       # target_include_directories. That defeats ccache.
@@ -182,24 +191,15 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
   endif()
 
   if(Arrow_ADDED)
+
     set(arrow_code_string
         [=[
-        if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared))
-            add_library(arrow_shared ALIAS cudf::arrow_shared)
-        endif()
-        if (TARGET cudf::arrow_static AND (NOT TARGET arrow_static))
-            add_library(arrow_static ALIAS cudf::arrow_static)
-        endif()
-        ]=]
-    )
-    set(arrow_cuda_code_string
-        [=[
-        if (TARGET cudf::arrow_cuda_shared AND (NOT TARGET arrow_cuda_shared))
-            add_library(arrow_cuda_shared ALIAS cudf::arrow_cuda_shared)
-        endif()
-        if (TARGET cudf::arrow_cuda_static AND (NOT TARGET arrow_cuda_static))
-            add_library(arrow_cuda_static ALIAS cudf::arrow_cuda_static)
-        endif()
+          if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared))
+              add_library(arrow_shared ALIAS cudf::arrow_shared)
+          endif()
+          if (TARGET cudf::arrow_static AND (NOT TARGET arrow_static))
+              add_library(arrow_static ALIAS cudf::arrow_static)
+          endif()
         ]=]
     )
 
@@ -212,6 +212,17 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
       FINAL_CODE_BLOCK arrow_code_string
     )
 
+    set(arrow_cuda_code_string
+        [=[
+          if (TARGET cudf::arrow_cuda_shared AND (NOT TARGET arrow_cuda_shared))
+              add_library(arrow_cuda_shared ALIAS cudf::arrow_cuda_shared)
+          endif()
+          if (TARGET cudf::arrow_cuda_static AND (NOT TARGET arrow_cuda_static))
+              add_library(arrow_cuda_static ALIAS cudf::arrow_cuda_static)
+          endif()
+        ]=]
+    )
+
     rapids_export(
       BUILD ArrowCUDA
       VERSION ${VERSION}
@@ -220,6 +231,49 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
       NAMESPACE cudf::
       FINAL_CODE_BLOCK arrow_cuda_code_string
     )
+
+    if(ENABLE_PARQUET)
+
+      set(arrow_dataset_code_string
+          [=[
+              if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared))
+                  add_library(arrow_dataset_shared ALIAS cudf::arrow_dataset_shared)
+              endif()
+              if (TARGET cudf::arrow_dataset_static AND (NOT TARGET arrow_dataset_static))
+                  add_library(arrow_dataset_static ALIAS cudf::arrow_dataset_static)
+              endif()
+            ]=]
+      )
+
+      rapids_export(
+        BUILD ArrowDataset
+        VERSION ${VERSION}
+        EXPORT_SET arrow_dataset_targets
+        GLOBAL_TARGETS arrow_dataset_shared arrow_dataset_static
+        NAMESPACE cudf::
+        FINAL_CODE_BLOCK arrow_dataset_code_string
+      )
+
+      set(parquet_code_string
+          [=[
+              if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared))
+                  add_library(parquet_shared ALIAS cudf::parquet_shared)
+              endif()
+              if (TARGET cudf::parquet_static AND (NOT TARGET parquet_static))
+                  add_library(parquet_static ALIAS cudf::parquet_static)
+              endif()
+            ]=]
+      )
+
+      rapids_export(
+        BUILD Parquet
+        VERSION ${VERSION}
+        EXPORT_SET parquet_targets
+        GLOBAL_TARGETS parquet_shared parquet_static
+        NAMESPACE cudf::
+        FINAL_CODE_BLOCK parquet_code_string
+      )
+    endif()
   endif()
   # We generate the arrow-config and arrowcuda-config files when we built arrow locally, so always
   # do `find_dependency`
@@ -230,10 +284,18 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
   # ArrowCUDA_DIR to be where Arrow was found, since Arrow packages ArrowCUDA.config in a
   # non-standard location
   rapids_export_package(BUILD ArrowCUDA cudf-exports)
+  if(ENABLE_PARQUET)
+    rapids_export_package(BUILD Parquet cudf-exports)
+    rapids_export_package(BUILD ArrowDataset cudf-exports)
+  endif()
 
   include("${rapids-cmake-dir}/export/find_package_root.cmake")
   rapids_export_find_package_root(BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports)
   rapids_export_find_package_root(BUILD ArrowCUDA [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports)
+  if(ENABLE_PARQUET)
+    rapids_export_find_package_root(BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports)
+    rapids_export_find_package_root(BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports)
+  endif()
 
   set(ARROW_FOUND
       "${ARROW_FOUND}"

From a1ca8c1e408ac1791c4f4bae563e775bbddb5a29 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Mon, 29 Nov 2021 11:32:43 -0500
Subject: [PATCH 02/28] Use ptxcompiler to patch Numba at runtime to support
 CUDA enhanced compatibility. (#9687)

---
 conda/environments/cudf_dev_cuda11.0.yml      |  3 +-
 conda/environments/cudf_dev_cuda11.2.yml      |  3 +-
 conda/environments/cudf_dev_cuda11.5.yml      |  3 +-
 conda/recipes/cudf/meta.yaml                  |  5 +-
 python/cudf/cudf/__init__.py                  | 11 ++++
 .../cudf/tests/test_extension_compilation.py  | 57 ++++++++++++-------
 6 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
index e2ead779861..7c22b4d35e3 100644
--- a/conda/environments/cudf_dev_cuda11.0.yml
+++ b/conda/environments/cudf_dev_cuda11.0.yml
@@ -14,7 +14,7 @@ dependencies:
   - cmake>=3.20.1
   - cmake_setuptools>=0.1.3
   - python>=3.7,<3.9
-  - numba>=0.53.1
+  - numba>=0.54
   - numpy
   - pandas>=1.0,<1.4.0dev0
   - pyarrow=5.0.0=*cuda
@@ -66,3 +66,4 @@ dependencies:
       - git+https://github.com/dask/distributed.git@main
       - git+https://github.com/python-streamz/streamz.git@master
       - pyorc
+  - ptxcompiler  # [linux64]
diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml
index 6146d84835a..0978ae7c8f9 100644
--- a/conda/environments/cudf_dev_cuda11.2.yml
+++ b/conda/environments/cudf_dev_cuda11.2.yml
@@ -14,7 +14,7 @@ dependencies:
   - cmake>=3.20.1
   - cmake_setuptools>=0.1.3
   - python>=3.7,<3.9
-  - numba>=0.53.1
+  - numba>=0.54
   - numpy
   - pandas>=1.0,<1.4.0dev0
   - pyarrow=5.0.0=*cuda
@@ -66,3 +66,4 @@ dependencies:
       - git+https://github.com/dask/distributed.git@main
       - git+https://github.com/python-streamz/streamz.git@master
       - pyorc
+  - ptxcompiler  # [linux64]
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index 043c81c9e01..d2d0a38c44e 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -14,7 +14,7 @@ dependencies:
   - cmake>=3.20.1
   - cmake_setuptools>=0.1.3
   - python>=3.7,<3.9
-  - numba>=0.53.1
+  - numba>=0.54
   - numpy
   - pandas>=1.0,<1.4.0dev0
   - pyarrow=5.0.0=*cuda
@@ -66,3 +66,4 @@ dependencies:
       - git+https://github.com/dask/distributed.git@main
       - git+https://github.com/python-streamz/streamz.git@master
       - pyorc
+  - ptxcompiler  # [linux64]
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 6d56b0c0c94..46eefbc825f 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -29,7 +29,7 @@ requirements:
     - python
     - cython >=0.29,<0.30
     - setuptools
-    - numba >=0.53.1
+    - numba >=0.54
     - dlpack>=0.5,<0.6.0a0
     - pyarrow 5.0.0 *cuda
     - libcudf {{ version }}
@@ -41,7 +41,7 @@ requirements:
     - typing_extensions
     - pandas >=1.0,<1.4.0dev0
     - cupy >=9.5.0,<10.0.0a0
-    - numba >=0.53.1
+    - numba >=0.54
     - numpy
     - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} *cuda
     - fastavro >=0.22.0
@@ -51,6 +51,7 @@ requirements:
     - nvtx >=0.2.1
     - packaging
     - cachetools
+    - ptxcompiler  # [linux64]  # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler
 
 test:                                   # [linux64]
   requires:                             # [linux64]
diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index bc35551b5bd..b24e71e7785 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -102,6 +102,17 @@
 from cudf.utils.dtypes import _NA_REP
 from cudf.utils.utils import set_allocator
 
+try:
+    from ptxcompiler.patch import patch_numba_codegen_if_needed
+except ImportError:
+    pass
+else:
+    # Patch Numba to support CUDA enhanced compatibility.
+    # See https://github.com/rapidsai/ptxcompiler for
+    # details.
+    patch_numba_codegen_if_needed()
+    del patch_numba_codegen_if_needed
+
 cuda.set_memory_manager(rmm.RMMNumbaManager)
 cupy.cuda.set_allocator(rmm.rmm_cupy_allocator)
 
diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py
index 39fa7b11ce2..47c9448cf63 100644
--- a/python/cudf/cudf/tests/test_extension_compilation.py
+++ b/python/cudf/cudf/tests/test_extension_compilation.py
@@ -1,5 +1,6 @@
 import operator
 
+import cupy as cp
 import pytest
 from numba import cuda, types
 from numba.cuda import compile_ptx
@@ -71,8 +72,8 @@ def test_execute_masked_binary(op, ty):
     def func(x, y):
         return op(x, y)
 
-    @cuda.jit(debug=True)
-    def test_kernel(x, y):
+    @cuda.jit
+    def test_kernel(x, y, err):
         # Reference result with unmasked value
         u = func(x, y)
 
@@ -87,14 +88,22 @@ def test_kernel(x, y):
         # Check masks are as expected, and unmasked result matches masked
         # result
         if r0.valid:
-            raise RuntimeError("Expected r0 to be invalid")
+            # TODO: ideally, we would raise an exception here rather
+            # than return an "error code", and that is what the
+            # previous version of this (and below) tests did. But,
+            # Numba kernels cannot currently use `debug=True` with
+            # CUDA enhanced compatibility.  Once a solution to that is
+            # reached, we should switch back to raising exceptions
+            # here.
+            err[0] = 1
         if not r1.valid:
-            raise RuntimeError("Expected r1 to be valid")
+            err[0] = 2
         if u != r1.value:
-            print("Values: ", u, r1.value)
-            raise RuntimeError("u != r1.value")
+            err[0] = 3
 
-    test_kernel[1, 1](1, 2)
+    err = cp.asarray([0], dtype="int8")
+    test_kernel[1, 1](1, 2, err)
+    assert err[0] == 0
 
 
 @pytest.mark.parametrize("op", ops)
@@ -187,18 +196,20 @@ def test_is_na(fn):
 
     device_fn = cuda.jit(device=True)(fn)
 
-    @cuda.jit(debug=True)
-    def test_kernel():
+    @cuda.jit
+    def test_kernel(err):
         valid_is_na = device_fn(valid)
         invalid_is_na = device_fn(invalid)
 
         if valid_is_na:
-            raise RuntimeError("Valid masked value is NA and should not be")
+            err[0] = 1
 
         if not invalid_is_na:
-            raise RuntimeError("Invalid masked value is not NA and should be")
+            err[0] = 2
 
-    test_kernel[1, 1]()
+    err = cp.asarray([0], dtype="int8")
+    test_kernel[1, 1](err)
+    assert err[0] == 0
 
 
 def func_lt_na(x):
@@ -271,8 +282,8 @@ def test_na_masked_comparisons(fn, ty):
 
     device_fn = cuda.jit(device=True)(fn)
 
-    @cuda.jit(debug=True)
-    def test_kernel():
+    @cuda.jit
+    def test_kernel(err):
         unmasked = ty(1)
         valid_masked = Masked(unmasked, True)
         invalid_masked = Masked(unmasked, False)
@@ -281,12 +292,14 @@ def test_kernel():
         invalid_cmp_na = device_fn(invalid_masked)
 
         if valid_cmp_na:
-            raise RuntimeError("Valid masked value compared True with NA")
+            err[0] = 1
 
         if invalid_cmp_na:
-            raise RuntimeError("Invalid masked value compared True with NA")
+            err[0] = 2
 
-    test_kernel[1, 1]()
+    err = cp.asarray([0], dtype="int8")
+    test_kernel[1, 1](err)
+    assert err[0] == 0
 
 
 # xfail because scalars do not yet cast for a comparison to NA
@@ -297,13 +310,15 @@ def test_na_scalar_comparisons(fn, ty):
 
     device_fn = cuda.jit(device=True)(fn)
 
-    @cuda.jit(debug=True)
-    def test_kernel():
+    @cuda.jit
+    def test_kernel(err):
         unmasked = ty(1)
 
         unmasked_cmp_na = device_fn(unmasked)
 
         if unmasked_cmp_na:
-            raise RuntimeError("Unmasked value compared True with NA")
+            err[0] = 1
 
-    test_kernel[1, 1]()
+    err = cp.asarray([0], dtype="int8")
+    test_kernel[1, 1](err)
+    assert err[0] == 0

From 0ebeffa4c8122cd1f54fe9fc05c4bec660b7e37e Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 23 Nov 2021 16:14:17 -0500
Subject: [PATCH 03/28] Only run runtime jit tests with CUDA 11.5 runtime

CI runs all tests over a variety of different hardware. Tests
that have to use NVRTC to re-compile cudf C++ code are only
supported on CUDA 11.5+.
---
 cpp/tests/binaryop/binop-generic-ptx-test.cpp |  6 ++++
 cpp/tests/binaryop/binop-integration-test.cpp |  6 ++++
 cpp/tests/binaryop/binop-null-test.cpp        |  8 +++++
 cpp/tests/binaryop/util/runtime_support.h     | 33 +++++++++++++++++++
 4 files changed, 53 insertions(+)
 create mode 100644 cpp/tests/binaryop/util/runtime_support.h

diff --git a/cpp/tests/binaryop/binop-generic-ptx-test.cpp b/cpp/tests/binaryop/binop-generic-ptx-test.cpp
index 6e35bdac41c..0b6cfdab498 100644
--- a/cpp/tests/binaryop/binop-generic-ptx-test.cpp
+++ b/cpp/tests/binaryop/binop-generic-ptx-test.cpp
@@ -21,11 +21,17 @@
 
 #include <tests/binaryop/assert-binops.h>
 #include <tests/binaryop/binop-fixture.hpp>
+#include <tests/binaryop/util/runtime_support.h>
 
 namespace cudf {
 namespace test {
 namespace binop {
 struct BinaryOperationGenericPTXTest : public BinaryOperationTest {
+ protected:
+  void SetUp() override
+  {
+    if (!can_do_runtime_jit()) { GTEST_SKIP() << "Skipping tests that require 11.5 runtime"; }
+  }
 };
 
 TEST_F(BinaryOperationGenericPTXTest, CAdd_Vector_Vector_FP32_FP32_FP32)
diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp
index 427a21512a3..21696a419ee 100644
--- a/cpp/tests/binaryop/binop-integration-test.cpp
+++ b/cpp/tests/binaryop/binop-integration-test.cpp
@@ -31,6 +31,7 @@
 
 #include <tests/binaryop/assert-binops.h>
 #include <tests/binaryop/binop-fixture.hpp>
+#include <tests/binaryop/util/runtime_support.h>
 #include "cudf/utilities/error.hpp"
 
 namespace cudf {
@@ -40,6 +41,11 @@ namespace binop {
 constexpr debug_output_level verbosity{debug_output_level::ALL_ERRORS};
 
 struct BinaryOperationIntegrationTest : public BinaryOperationTest {
+ protected:
+  void SetUp() override
+  {
+    if (!can_do_runtime_jit()) { GTEST_SKIP() << "Skipping tests that require 11.5 runtime"; }
+  }
 };
 
 TEST_F(BinaryOperationIntegrationTest, Add_Scalar_Vector_SI32_FP32_SI64)
diff --git a/cpp/tests/binaryop/binop-null-test.cpp b/cpp/tests/binaryop/binop-null-test.cpp
index 25ec3b30834..b7e7702bd6c 100644
--- a/cpp/tests/binaryop/binop-null-test.cpp
+++ b/cpp/tests/binaryop/binop-null-test.cpp
@@ -23,6 +23,8 @@
 #include <tests/binaryop/assert-binops.h>
 #include <tests/binaryop/binop-fixture.hpp>
 
+#include <tests/binaryop/util/runtime_support.h>
+
 namespace cudf {
 namespace test {
 namespace binop {
@@ -52,6 +54,12 @@ struct BinaryOperationNullTest : public BinaryOperationTest {
       default: CUDF_FAIL("Unknown mask state " + std::to_string(static_cast<int64_t>(state)));
     }
   }
+
+ protected:
+  void SetUp() override
+  {
+    if (!can_do_runtime_jit()) { GTEST_SKIP() << "Skipping tests that require 11.5 runtime"; }
+  }
 };  // namespace binop
 
 TEST_F(BinaryOperationNullTest, Scalar_Null_Vector_Valid)
diff --git a/cpp/tests/binaryop/util/runtime_support.h b/cpp/tests/binaryop/util/runtime_support.h
new file mode 100644
index 00000000000..b6cfdadee3c
--- /dev/null
+++ b/cpp/tests/binaryop/util/runtime_support.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Copyright 2018-2019 BlazingDB, Inc.
+ *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cuda_runtime.h>
+
+
+bool can_do_runtime_jit()
+{
+  // We require a CUDA NVRTC of 11.5+ to do runtime jit
+  // as we need support for __int128
+
+  int runtime = 0;
+  auto error_value = cudaRuntimeGetVersion(&runtime);
+  return (error_value == cudaSuccess) && (runtime >= 11050);
+}

From dfcb48d09a56daa226ec5962acb00ddcaadaf494 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 23 Nov 2021 16:22:08 -0500
Subject: [PATCH 04/28] Fix style issues found by CI

---
 cpp/tests/binaryop/binop-generic-ptx-test.cpp | 2 +-
 cpp/tests/binaryop/binop-integration-test.cpp | 2 +-
 cpp/tests/binaryop/util/runtime_support.h     | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/cpp/tests/binaryop/binop-generic-ptx-test.cpp b/cpp/tests/binaryop/binop-generic-ptx-test.cpp
index 0b6cfdab498..f4407834786 100644
--- a/cpp/tests/binaryop/binop-generic-ptx-test.cpp
+++ b/cpp/tests/binaryop/binop-generic-ptx-test.cpp
@@ -20,8 +20,8 @@
 #include <cudf/binaryop.hpp>
 
 #include <tests/binaryop/assert-binops.h>
-#include <tests/binaryop/binop-fixture.hpp>
 #include <tests/binaryop/util/runtime_support.h>
+#include <tests/binaryop/binop-fixture.hpp>
 
 namespace cudf {
 namespace test {
diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp
index 21696a419ee..4181b20220b 100644
--- a/cpp/tests/binaryop/binop-integration-test.cpp
+++ b/cpp/tests/binaryop/binop-integration-test.cpp
@@ -30,8 +30,8 @@
 #include <cudf_test/type_lists.hpp>
 
 #include <tests/binaryop/assert-binops.h>
-#include <tests/binaryop/binop-fixture.hpp>
 #include <tests/binaryop/util/runtime_support.h>
+#include <tests/binaryop/binop-fixture.hpp>
 #include "cudf/utilities/error.hpp"
 
 namespace cudf {
diff --git a/cpp/tests/binaryop/util/runtime_support.h b/cpp/tests/binaryop/util/runtime_support.h
index b6cfdadee3c..a7ee0c3a391 100644
--- a/cpp/tests/binaryop/util/runtime_support.h
+++ b/cpp/tests/binaryop/util/runtime_support.h
@@ -21,13 +21,12 @@
 
 #include <cuda_runtime.h>
 
-
-bool can_do_runtime_jit()
+inline bool can_do_runtime_jit()
 {
   // We require a CUDA NVRTC of 11.5+ to do runtime jit
   // as we need support for __int128
 
-  int runtime = 0;
+  int runtime      = 0;
   auto error_value = cudaRuntimeGetVersion(&runtime);
   return (error_value == cudaSuccess) && (runtime >= 11050);
 }

From bbf137eb16ba69e14de9924acd9ca47997da0324 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Wed, 24 Nov 2021 09:58:53 -0500
Subject: [PATCH 05/28] WIP: disable csv test

---
 cpp/tests/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 98bade7e15f..39ca11d8184 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -192,7 +192,7 @@ ConfigureTest(
 # * io tests --------------------------------------------------------------------------------------
 ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp)
 
-ConfigureTest(CSV_TEST io/csv_test.cpp)
+# ConfigureTest(CSV_TEST io/csv_test.cpp)
 ConfigureTest(ORC_TEST io/orc_test.cpp)
 ConfigureTest(PARQUET_TEST io/parquet_test.cpp)
 ConfigureTest(JSON_TEST io/json_test.cpp)

From a24d2a841e9ab0b94f39418790b4f4b1d88234ff Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Wed, 24 Nov 2021 11:15:22 -0500
Subject: [PATCH 06/28] WIP: disable all io tests

---
 cpp/tests/CMakeLists.txt | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 39ca11d8184..f60a2361752 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -192,15 +192,11 @@ ConfigureTest(
 # * io tests --------------------------------------------------------------------------------------
 ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp)
 
-# ConfigureTest(CSV_TEST io/csv_test.cpp)
-ConfigureTest(ORC_TEST io/orc_test.cpp)
-ConfigureTest(PARQUET_TEST io/parquet_test.cpp)
-ConfigureTest(JSON_TEST io/json_test.cpp)
-ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp)
-ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp)
-if(CUDF_ENABLE_ARROW_S3)
-  target_compile_definitions(ARROW_IO_SOURCE_TEST PRIVATE "S3_ENABLED")
-endif()
+# ConfigureTest(CSV_TEST io/csv_test.cpp) ConfigureTest(ORC_TEST io/orc_test.cpp)
+# ConfigureTest(PARQUET_TEST io/parquet_test.cpp) ConfigureTest(JSON_TEST io/json_test.cpp)
+# ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp) ConfigureTest(MULTIBYTE_SPLIT_TEST
+# io/text/multibyte_split_test.cpp) if(CUDF_ENABLE_ARROW_S3)
+# target_compile_definitions(ARROW_IO_SOURCE_TEST PRIVATE "S3_ENABLED") endif()
 
 # ##################################################################################################
 # * sort tests ------------------------------------------------------------------------------------

From f6143952bf27ccd3c29c7b015099d9db0fdf0528 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Thu, 25 Nov 2021 23:06:33 +0530
Subject: [PATCH 07/28] remove jit integration tests

---
 cpp/tests/CMakeLists.txt                      |    1 -
 .../binop-compiled-fixed_point-test.cpp       |   40 +
 cpp/tests/binaryop/binop-compiled-test.cpp    |    2 +
 cpp/tests/binaryop/binop-integration-test.cpp | 2722 -----------------
 cpp/tests/binaryop/binop-null-test.cpp        |   32 +-
 .../binaryop/binop-verify-input-test.cpp      |   12 +-
 6 files changed, 64 insertions(+), 2745 deletions(-)
 delete mode 100644 cpp/tests/binaryop/binop-integration-test.cpp

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index f60a2361752..8ae31d7d74d 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -164,7 +164,6 @@ ConfigureTest(
   BINARY_TEST
   binaryop/binop-verify-input-test.cpp
   binaryop/binop-null-test.cpp
-  binaryop/binop-integration-test.cpp
   binaryop/binop-compiled-test.cpp
   binaryop/binop-compiled-fixed_point-test.cpp
   binaryop/binop-generic-ptx-test.cpp
diff --git a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
index 7925f0dd618..5020fbf898b 100644
--- a/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-fixed_point-test.cpp
@@ -684,4 +684,44 @@ TYPED_TEST(FixedPointCompiledTestBothReps, FixedPointBinaryOpThrows)
                cudf::logic_error);
 }
 
+template <typename T>
+struct FixedPointTest_64_128_Reps : public cudf::test::BaseFixture {
+};
+
+using Decimal64And128Types = cudf::test::Types<numeric::decimal64, numeric::decimal128>;
+TYPED_TEST_SUITE(FixedPointTest_64_128_Reps, Decimal64And128Types);
+
+TYPED_TEST(FixedPointTest_64_128_Reps, FixedPoint_64_128_ComparisonTests)
+{
+  using namespace numeric;
+  using decimalXX = TypeParam;
+  using RepType   = device_storage_type_t<decimalXX>;
+
+  for (auto const rhs_value : {10000000000000000, 100000000000000000}) {
+    auto const lhs       = fp_wrapper<RepType>{{33041, 97290, 36438, 25379, 48473}, scale_type{2}};
+    auto const rhs       = make_fixed_point_scalar<decimalXX>(rhs_value, scale_type{0});
+    auto const trues     = wrapper<bool>{{1, 1, 1, 1, 1}};
+    auto const falses    = wrapper<bool>{{0, 0, 0, 0, 0}};
+    auto const bool_type = cudf::data_type{type_id::BOOL8};
+
+    auto const a = cudf::binary_operation(lhs, *rhs, binary_operator::LESS, bool_type);
+    auto const b = cudf::binary_operation(lhs, *rhs, binary_operator::LESS_EQUAL, bool_type);
+    auto const c = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER, bool_type);
+    auto const d = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER_EQUAL, bool_type);
+    auto const e = cudf::binary_operation(*rhs, lhs, binary_operator::GREATER, bool_type);
+    auto const f = cudf::binary_operation(*rhs, lhs, binary_operator::GREATER_EQUAL, bool_type);
+    auto const g = cudf::binary_operation(*rhs, lhs, binary_operator::LESS, bool_type);
+    auto const h = cudf::binary_operation(*rhs, lhs, binary_operator::LESS_EQUAL, bool_type);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(a->view(), trues);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(b->view(), trues);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(c->view(), falses);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(d->view(), falses);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(e->view(), trues);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(f->view(), trues);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(g->view(), falses);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(h->view(), falses);
+  }
+}
+
 }  // namespace cudf::test::binop
diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp
index 7a9f6135bcd..37212c30d80 100644
--- a/cpp/tests/binaryop/binop-compiled-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-test.cpp
@@ -679,3 +679,5 @@ TEST_F(BinaryOperationCompiledTest_NullOpsString, NullMin_Vector_Vector)
 }
 
 }  // namespace cudf::test::binop
+
+CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/binaryop/binop-integration-test.cpp b/cpp/tests/binaryop/binop-integration-test.cpp
deleted file mode 100644
index 4181b20220b..00000000000
--- a/cpp/tests/binaryop/binop-integration-test.cpp
+++ /dev/null
@@ -1,2722 +0,0 @@
-/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
- *
- * Copyright 2018-2019 BlazingDB, Inc.
- *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/binaryop.hpp>
-#include <cudf/detail/iterator.cuh>
-#include <cudf/fixed_point/fixed_point.hpp>
-#include <cudf/scalar/scalar_factories.hpp>
-#include <cudf/types.hpp>
-#include <cudf/unary.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
-
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/type_lists.hpp>
-
-#include <tests/binaryop/assert-binops.h>
-#include <tests/binaryop/util/runtime_support.h>
-#include <tests/binaryop/binop-fixture.hpp>
-#include "cudf/utilities/error.hpp"
-
-namespace cudf {
-namespace test {
-namespace binop {
-
-constexpr debug_output_level verbosity{debug_output_level::ALL_ERRORS};
-
-struct BinaryOperationIntegrationTest : public BinaryOperationTest {
- protected:
-  void SetUp() override
-  {
-    if (!can_do_runtime_jit()) { GTEST_SKIP() << "Skipping tests that require 11.5 runtime"; }
-  }
-};
-
-TEST_F(BinaryOperationIntegrationTest, Add_Scalar_Vector_SI32_FP32_SI64)
-{
-  using TypeOut = int32_t;
-  using TypeLhs = float;
-  using TypeRhs = int64_t;
-
-  using ADD = cudf::library::operation::Add<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_scalar<TypeLhs>();
-  auto rhs = make_random_wrapped_column<TypeRhs>(10000);
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Add_Vector_Vector_SI32_FP32_FP32)
-{
-  using TypeOut = int32_t;
-  using TypeLhs = float;
-  using TypeRhs = float;
-
-  using ADD = cudf::library::operation::Add<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(10000);
-  auto rhs = make_random_wrapped_column<TypeRhs>(10000);
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Sub_Scalar_Vector_SI32_FP32_FP32)
-{
-  using TypeOut = int32_t;
-  using TypeLhs = float;
-  using TypeRhs = int64_t;
-
-  using SUB = cudf::library::operation::Sub<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_scalar<TypeLhs>();
-  auto rhs = make_random_wrapped_column<TypeRhs>(10000);
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SUB());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Add_Vector_Scalar_SI08_SI16_SI32)
-{
-  using TypeOut = int8_t;
-  using TypeLhs = int16_t;
-  using TypeRhs = int32_t;
-
-  using ADD = cudf::library::operation::Add<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_scalar<TypeRhs>();
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Add_Vector_Vector_SI32_FP64_SI08)
-{
-  using TypeOut = int32_t;
-  using TypeLhs = double;
-  using TypeRhs = int8_t;
-
-  using ADD = cudf::library::operation::Add<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Sub_Vector_Vector_SI64)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int64_t;
-  using TypeRhs = int64_t;
-
-  using SUB = cudf::library::operation::Sub<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SUB());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Sub_Vector_Scalar_SI64_FP64_SI32)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = double;
-  using TypeRhs = int32_t;
-
-  using SUB = cudf::library::operation::Sub<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(10000);
-  auto rhs = make_random_wrapped_scalar<TypeRhs>();
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SUB());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Sub_Vector_Vector_TimepointD_DurationS_TimepointUS)
-{
-  using TypeOut = cudf::timestamp_us;
-  using TypeLhs = cudf::timestamp_D;
-  using TypeRhs = cudf::duration_s;
-
-  using SUB = cudf::library::operation::Sub<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SUB());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Sub_Vector_Scalar_TimepointD_TimepointS_DurationS)
-{
-  using TypeOut = cudf::duration_s;
-  using TypeLhs = cudf::timestamp_D;
-  using TypeRhs = cudf::timestamp_s;
-
-  using SUB = cudf::library::operation::Sub<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = cudf::scalar_type_t<TypeRhs>(typename TypeRhs::duration{34}, true);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SUB());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Sub_Scalar_Vector_DurationS_DurationD_DurationMS)
-{
-  using TypeOut = cudf::duration_ms;
-  using TypeLhs = cudf::duration_s;
-  using TypeRhs = cudf::duration_D;
-
-  using SUB = cudf::library::operation::Sub<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::scalar_type_t<TypeLhs>(TypeLhs{-9});
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SUB, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SUB());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Mul_Vector_Vector_SI64)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int64_t;
-  using TypeRhs = int64_t;
-
-  using MUL = cudf::library::operation::Mul<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::MUL, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, MUL());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Mul_Vector_Vector_SI64_FP32_FP32)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = float;
-  using TypeRhs = float;
-
-  using MUL = cudf::library::operation::Mul<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::MUL, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, MUL());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Mul_Scalar_Vector_SI32_DurationD_DurationMS)
-{
-  // Double the duration of days and convert the time interval to ms
-  using TypeOut = cudf::duration_ms;
-  using TypeLhs = int32_t;
-  using TypeRhs = cudf::duration_D;
-
-  using MUL = cudf::library::operation::Mul<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::scalar_type_t<TypeLhs>(2);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::MUL, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, MUL());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Mul_Vector_Vector_DurationS_SI32_DurationNS)
-{
-  // Multiple each duration with some random value and promote the result
-  using TypeOut = cudf::duration_ns;
-  using TypeLhs = cudf::duration_s;
-  using TypeRhs = int32_t;
-
-  using MUL = cudf::library::operation::Mul<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::MUL, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, MUL());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Div_Vector_Vector_SI64)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int64_t;
-  using TypeRhs = int64_t;
-
-  using DIV = cudf::library::operation::Div<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::DIV, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, DIV());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Div_Vector_Vector_SI64_FP32_FP32)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = float;
-  using TypeRhs = float;
-
-  using DIV = cudf::library::operation::Div<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::DIV, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, DIV());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Div_Scalar_Vector_DurationD_SI32_DurationS)
-{
-  using TypeOut = cudf::duration_s;
-  using TypeLhs = cudf::duration_D;
-  using TypeRhs = int64_t;
-
-  using DIV = cudf::library::operation::Div<TypeOut, TypeLhs, TypeRhs>;
-
-  // Divide 2 days by an integer and convert the ticks to seconds
-  auto lhs = cudf::scalar_type_t<TypeLhs>(TypeLhs{2});
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::DIV, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, DIV());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Div_Vector_Vector_DurationD_DurationS_DurationMS)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = cudf::duration_D;
-  using TypeRhs = cudf::duration_s;
-
-  using DIV = cudf::library::operation::Div<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::DIV, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, DIV());
-}
-
-TEST_F(BinaryOperationIntegrationTest, TrueDiv_Vector_Vector_SI64)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int64_t;
-  using TypeRhs = int64_t;
-
-  using TRUEDIV = cudf::library::operation::TrueDiv<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::TRUE_DIV, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, TRUEDIV());
-}
-
-TEST_F(BinaryOperationIntegrationTest, FloorDiv_Vector_Vector_SI64)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int64_t;
-  using TypeRhs = int64_t;
-
-  using FLOORDIV = cudf::library::operation::FloorDiv<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::FLOOR_DIV, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, FLOORDIV());
-}
-
-TEST_F(BinaryOperationIntegrationTest, FloorDiv_Vector_Vector_SI64_FP32_FP32)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = float;
-  using TypeRhs = float;
-
-  using FLOORDIV = cudf::library::operation::FloorDiv<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::FLOOR_DIV, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, FLOORDIV());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Vector_SI64)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int64_t;
-  using TypeRhs = int64_t;
-
-  using MOD = cudf::library::operation::Mod<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, MOD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Vector_FP32)
-{
-  using TypeOut = float;
-  using TypeLhs = float;
-  using TypeRhs = float;
-
-  using MOD = cudf::library::operation::Mod<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, MOD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Vector_SI64_FP32_FP32)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = float;
-  using TypeRhs = float;
-
-  using MOD = cudf::library::operation::Mod<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, MOD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Vector_FP64)
-{
-  using TypeOut = double;
-  using TypeLhs = double;
-  using TypeRhs = double;
-
-  using MOD = cudf::library::operation::Mod<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, MOD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Scalar_DurationD_SI32_DurationUS)
-{
-  using TypeOut = cudf::duration_us;
-  using TypeLhs = cudf::duration_D;
-  using TypeRhs = int64_t;
-
-  using MOD = cudf::library::operation::Mod<TypeOut, TypeLhs, TypeRhs>;
-
-  // Half the number of days and convert the remainder ticks to microseconds
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = cudf::scalar_type_t<TypeRhs>(2);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, MOD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Mod_Vector_Scalar_DurationS_DurationMS_DurationUS)
-{
-  using TypeOut = cudf::duration_us;
-  using TypeLhs = cudf::duration_s;
-  using TypeRhs = cudf::duration_ms;
-
-  using MOD = cudf::library::operation::Mod<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::MOD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, MOD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Pow_Vector_Vector_FP64_SI64_SI64)
-{
-  using TypeOut = double;
-  using TypeLhs = int64_t;
-  using TypeRhs = int64_t;
-
-  using POW = cudf::library::operation::Pow<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::POW, data_type(type_to_id<TypeOut>()));
-
-  /**
-   * According to CUDA Programming Guide, 'E.1. Standard Functions', 'Table 7 - Double-Precision
-   * Mathematical Standard Library Functions with Maximum ULP Error'
-   * The pow function has 2 (full range) maximum ulp error.
-   */
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, POW(), NearEqualComparator<TypeOut>{2});
-}
-
-TEST_F(BinaryOperationIntegrationTest, Pow_Vector_Vector_FP32)
-{
-  using TypeOut = float;
-  using TypeLhs = float;
-  using TypeRhs = float;
-
-  using POW = cudf::library::operation::Pow<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::POW, data_type(type_to_id<TypeOut>()));
-  /**
-   * According to CUDA Programming Guide, 'E.1. Standard Functions', 'Table 7 - Double-Precision
-   * Mathematical Standard Library Functions with Maximum ULP Error'
-   * The pow function has 2 (full range) maximum ulp error.
-   */
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, POW(), NearEqualComparator<TypeOut>{2});
-}
-
-TEST_F(BinaryOperationIntegrationTest, And_Vector_Vector_SI16_SI64_SI32)
-{
-  using TypeOut = int16_t;
-  using TypeLhs = int64_t;
-  using TypeRhs = int32_t;
-
-  using AND = cudf::library::operation::BitwiseAnd<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::BITWISE_AND, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, AND());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Or_Vector_Vector_SI64_SI16_SI32)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int16_t;
-  using TypeRhs = int32_t;
-
-  using OR = cudf::library::operation::BitwiseOr<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::BITWISE_OR, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, OR());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Xor_Vector_Vector_SI32_SI16_SI64)
-{
-  using TypeOut = int32_t;
-  using TypeLhs = int16_t;
-  using TypeRhs = int64_t;
-
-  using XOR = cudf::library::operation::BitwiseXor<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::BITWISE_XOR, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, XOR());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Logical_And_Vector_Vector_SI16_FP64_SI8)
-{
-  using TypeOut = int16_t;
-  using TypeLhs = double;
-  using TypeRhs = int8_t;
-
-  using AND = cudf::library::operation::LogicalAnd<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LOGICAL_AND, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, AND());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Logical_Or_Vector_Vector_B8_SI16_SI64)
-{
-  using TypeOut = bool;
-  using TypeLhs = int16_t;
-  using TypeRhs = int64_t;
-
-  using OR = cudf::library::operation::LogicalOr<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LOGICAL_OR, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, OR());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Less_Scalar_Vector_B8_TSS_TSS)
-{
-  using TypeOut = bool;
-  using TypeLhs = cudf::timestamp_s;
-  using TypeRhs = cudf::timestamp_s;
-
-  using LESS = cudf::library::operation::Less<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_scalar<TypeLhs>();
-  auto rhs = make_random_wrapped_column<TypeRhs>(10);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LESS, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, LESS());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Greater_Scalar_Vector_B8_TSMS_TSS)
-{
-  using TypeOut = bool;
-  using TypeLhs = cudf::timestamp_ms;
-  using TypeRhs = cudf::timestamp_s;
-
-  using GREATER = cudf::library::operation::Greater<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_scalar<TypeLhs>();
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::GREATER, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, GREATER());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Less_Vector_Vector_B8_TSS_TSS)
-{
-  using TypeOut = bool;
-  using TypeLhs = cudf::timestamp_s;
-  using TypeRhs = cudf::timestamp_s;
-
-  using LESS = cudf::library::operation::Less<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(10);
-  auto rhs = make_random_wrapped_column<TypeRhs>(10);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LESS, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, LESS());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Greater_Vector_Vector_B8_TSMS_TSS)
-{
-  using TypeOut = bool;
-  using TypeLhs = cudf::timestamp_ms;
-  using TypeRhs = cudf::timestamp_s;
-
-  using GREATER = cudf::library::operation::Greater<TypeOut, TypeLhs, TypeRhs>;
-
-  cudf::test::UniformRandomGenerator<long> rand_gen(1, 10);
-  auto itr = cudf::detail::make_counting_transform_iterator(
-    0, [&rand_gen](auto row) { return rand_gen.generate() * 1000; });
-
-  cudf::test::fixed_width_column_wrapper<TypeLhs, typename decltype(itr)::value_type> lhs(
-    itr, itr + 100, make_validity_iter());
-
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::GREATER, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, GREATER());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Less_Scalar_Vector_B8_STR_STR)
-{
-  using TypeOut = bool;
-  using TypeLhs = std::string;
-  using TypeRhs = std::string;
-
-  using LESS = cudf::library::operation::Less<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::string_scalar("eee");
-  auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "<null>", "bb", "eee"});
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LESS, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, LESS());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Less_Vector_Scalar_B8_STR_STR)
-{
-  using TypeOut = bool;
-  using TypeLhs = std::string;
-  using TypeRhs = std::string;
-
-  using LESS = cudf::library::operation::Less<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "<null>", "bb", "eee"});
-  auto rhs = cudf::string_scalar("eee");
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LESS, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, LESS());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Less_Vector_Vector_B8_STR_STR)
-{
-  using TypeOut = bool;
-  using TypeLhs = std::string;
-  using TypeRhs = std::string;
-
-  using LESS = cudf::library::operation::Less<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"});
-  auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "<null>", "bb", "eee"});
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LESS, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, LESS());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Greater_Vector_Vector_B8_STR_STR)
-{
-  using TypeOut = bool;
-  using TypeLhs = std::string;
-  using TypeRhs = std::string;
-
-  using GREATER = cudf::library::operation::Greater<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"});
-  auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "<null>", "bb", "eee"});
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::GREATER, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, GREATER());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Equal_Vector_Vector_B8_STR_STR)
-{
-  using TypeOut = bool;
-  using TypeLhs = std::string;
-  using TypeRhs = std::string;
-
-  using EQUAL = cudf::library::operation::Equal<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"});
-  auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "<null>", "bb", "eee"});
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::EQUAL, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, EQUAL());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Equal_Vector_Scalar_B8_STR_STR)
-{
-  using TypeOut = bool;
-  using TypeLhs = std::string;
-  using TypeRhs = std::string;
-
-  using EQUAL = cudf::library::operation::Equal<TypeOut, TypeLhs, TypeRhs>;
-
-  auto rhs = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"});
-  auto lhs = cudf::string_scalar("");
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::EQUAL, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, EQUAL());
-}
-
-TEST_F(BinaryOperationIntegrationTest, LessEqual_Vector_Vector_B8_STR_STR)
-{
-  using TypeOut = bool;
-  using TypeLhs = std::string;
-  using TypeRhs = std::string;
-
-  using LESS_EQUAL = cudf::library::operation::LessEqual<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"});
-  auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "<null>", "bb", "eee"});
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LESS_EQUAL, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, LESS_EQUAL());
-}
-
-TEST_F(BinaryOperationIntegrationTest, GreaterEqual_Vector_Vector_B8_STR_STR)
-{
-  using TypeOut = bool;
-  using TypeLhs = std::string;
-  using TypeRhs = std::string;
-
-  using GREATER_EQUAL = cudf::library::operation::GreaterEqual<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"});
-  auto rhs = cudf::test::strings_column_wrapper({"ééé", "bbb", "aa", "", "<null>", "bb", "eee"});
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::GREATER_EQUAL, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, GREATER_EQUAL());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftLeft_Vector_Vector_SI32)
-{
-  using TypeOut = int;
-  using TypeLhs = int;
-  using TypeRhs = int;
-
-  using SHIFT_LEFT = cudf::library::operation::ShiftLeft<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_LEFT, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_LEFT());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftLeft_Vector_Vector_SI32_SI16_SI64)
-{
-  using TypeOut = int;
-  using TypeLhs = int16_t;
-  using TypeRhs = int64_t;
-
-  using SHIFT_LEFT = cudf::library::operation::ShiftLeft<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_LEFT, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_LEFT());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftLeft_Scalar_Vector_SI32)
-{
-  using TypeOut = int;
-  using TypeLhs = int;
-  using TypeRhs = int;
-
-  using SHIFT_LEFT = cudf::library::operation::ShiftLeft<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_scalar<TypeLhs>();
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_LEFT, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_LEFT());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftLeft_Vector_Scalar_SI32)
-{
-  using TypeOut = int;
-  using TypeLhs = int;
-  using TypeRhs = int;
-
-  using SHIFT_LEFT = cudf::library::operation::ShiftLeft<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_scalar<TypeRhs>();
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_LEFT, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_LEFT());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftRight_Vector_Vector_SI32)
-{
-  using TypeOut = int;
-  using TypeLhs = int;
-  using TypeRhs = int;
-
-  using SHIFT_RIGHT = cudf::library::operation::ShiftRight<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_RIGHT, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_RIGHT());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftRight_Vector_Vector_SI32_SI16_SI64)
-{
-  using TypeOut = int;
-  using TypeLhs = int16_t;
-  using TypeRhs = int64_t;
-
-  using SHIFT_RIGHT = cudf::library::operation::ShiftRight<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_RIGHT, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_RIGHT());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftRight_Scalar_Vector_SI32)
-{
-  using TypeOut = int;
-  using TypeLhs = int;
-  using TypeRhs = int;
-
-  using SHIFT_RIGHT = cudf::library::operation::ShiftRight<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_scalar<TypeLhs>();
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_RIGHT, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_RIGHT());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftRight_Vector_Scalar_SI32)
-{
-  using TypeOut = int;
-  using TypeLhs = int;
-  using TypeRhs = int;
-
-  using SHIFT_RIGHT = cudf::library::operation::ShiftRight<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_scalar<TypeRhs>();
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_RIGHT, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_RIGHT());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftRightUnsigned_Vector_Vector_SI32)
-{
-  using TypeOut = int;
-  using TypeLhs = int;
-  using TypeRhs = int;
-
-  int num_els = 4;
-
-  TypeLhs lhs[] = {-8, 78, -93, 0, -INT_MAX};
-  cudf::test::fixed_width_column_wrapper<TypeLhs> lhs_w(lhs, lhs + num_els);
-
-  TypeRhs shift[] = {1, 1, 3, 2, 16};
-  cudf::test::fixed_width_column_wrapper<TypeRhs> shift_w(shift, shift + num_els);
-
-  TypeOut expected[] = {2147483644, 39, 536870900, 0, 32768};
-  cudf::test::fixed_width_column_wrapper<TypeOut> expected_w(expected, expected + num_els);
-
-  auto out = cudf::jit::binary_operation(
-    lhs_w, shift_w, cudf::binary_operator::SHIFT_RIGHT_UNSIGNED, data_type(type_to_id<TypeOut>()));
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*out, expected_w);
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftRightUnsigned_Vector_Vector_SI32_SI16_SI64)
-{
-  using TypeOut = int;
-  using TypeLhs = int16_t;
-  using TypeRhs = int64_t;
-
-  using SHIFT_RIGHT_UNSIGNED =
-    cudf::library::operation::ShiftRightUnsigned<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_RIGHT_UNSIGNED, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_RIGHT_UNSIGNED());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftRightUnsigned_Scalar_Vector_SI32)
-{
-  using TypeOut = int;
-  using TypeLhs = int;
-  using TypeRhs = int;
-
-  using SHIFT_RIGHT_UNSIGNED =
-    cudf::library::operation::ShiftRightUnsigned<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_scalar<TypeLhs>();
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_RIGHT_UNSIGNED, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_RIGHT_UNSIGNED());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftRightUnsigned_Vector_Scalar_SI32)
-{
-  using TypeOut = int;
-  using TypeLhs = int;
-  using TypeRhs = int;
-
-  using SHIFT_RIGHT_UNSIGNED =
-    cudf::library::operation::ShiftRightUnsigned<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_scalar<TypeRhs>();
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_RIGHT_UNSIGNED, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_RIGHT_UNSIGNED());
-}
-
-TEST_F(BinaryOperationIntegrationTest, LogBase_Vector_Scalar_SI32_SI32_float)
-{
-  using TypeOut = int;      // Cast the result value to int for easy comparison
-  using TypeLhs = int32_t;  // All input types get converted into doubles
-  using TypeRhs = float;
-
-  using LOG_BASE = cudf::library::operation::LogBase<TypeOut, TypeLhs, TypeRhs>;
-
-  // Make sure there are no zeros. The log value is purposefully cast to int for easy comparison
-  auto elements = cudf::detail::make_counting_transform_iterator(1, [](auto i) { return i + 10; });
-  fixed_width_column_wrapper<TypeLhs> lhs(elements, elements + 100);
-  // Find log to the base 10
-  auto rhs = numeric_scalar<TypeRhs>(10);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LOG_BASE, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, LOG_BASE());
-}
-
-TEST_F(BinaryOperationIntegrationTest, LogBase_Scalar_Vector_float_SI32)
-{
-  using TypeOut = float;
-  using TypeLhs = int;
-  using TypeRhs = int;  // Integral types promoted to double
-
-  using LOG_BASE = cudf::library::operation::LogBase<TypeOut, TypeLhs, TypeRhs>;
-
-  // Make sure there are no zeros
-  auto elements = cudf::detail::make_counting_transform_iterator(1, [](auto i) { return i + 30; });
-  fixed_width_column_wrapper<TypeRhs> rhs(elements, elements + 100);
-  // Find log to the base 2
-  auto lhs = numeric_scalar<TypeLhs>(2);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LOG_BASE, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, LOG_BASE());
-}
-
-TEST_F(BinaryOperationIntegrationTest, LogBase_Vector_Vector_double_SI64_SI32)
-{
-  using TypeOut = double;
-  using TypeLhs = int64_t;
-  using TypeRhs = int32_t;  // Integral types promoted to double
-
-  using LOG_BASE = cudf::library::operation::LogBase<TypeOut, TypeLhs, TypeRhs>;
-
-  // Make sure there are no zeros
-  auto elements =
-    cudf::detail::make_counting_transform_iterator(1, [](auto i) { return std::pow(2, i); });
-  fixed_width_column_wrapper<TypeLhs> lhs(elements, elements + 50);
-
-  // Find log to the base 7
-  auto rhs_elements = cudf::detail::make_counting_transform_iterator(0, [](auto) { return 7; });
-  fixed_width_column_wrapper<TypeRhs> rhs(rhs_elements, rhs_elements + 50);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::LOG_BASE, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, LOG_BASE());
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Scalar_B8_SI32_SI32)
-{
-  using TypeOut = bool;
-  using TypeLhs = int32_t;
-  using TypeRhs = int32_t;
-
-  auto int_col =
-    fixed_width_column_wrapper<TypeLhs>{{999, -37, 0, INT32_MAX}, {true, true, true, false}};
-  auto int_scalar = cudf::scalar_type_t<TypeRhs>(999);
-
-  auto op_col = cudf::jit::binary_operation(
-    int_col, int_scalar, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{true, false, false, false}, {true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_ScalarInvalid_B8_SI32_SI32)
-{
-  using TypeOut = bool;
-  using TypeLhs = int32_t;
-  using TypeRhs = int32_t;
-
-  auto int_col    = fixed_width_column_wrapper<TypeLhs>{{-INT32_MAX, -37, 0, 499, 44, INT32_MAX},
-                                                     {false, true, false, true, true, false}};
-  auto int_scalar = cudf::scalar_type_t<TypeRhs>(999);
-  int_scalar.set_valid_async(false);
-
-  auto op_col = cudf::jit::binary_operation(
-    int_col, int_scalar, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col,
-                                 fixed_width_column_wrapper<bool>{
-                                   {true, false, true, false, false, true},
-                                   {true, true, true, true, true, true},
-                                 },
-                                 verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Scalar_Vector_B8_tsD_tsD)
-{
-  using TypeOut = bool;
-  using TypeLhs = cudf::timestamp_D;
-  using TypeRhs = cudf::timestamp_D;
-
-  cudf::test::fixed_width_column_wrapper<TypeLhs, TypeLhs::rep> ts_col{
-    {
-      999,    // Random nullable field
-      0,      // This is the UNIX epoch - 1970-01-01
-      44376,  // 2091-07-01 00:00:00 GMT
-      47695,  // 2100-08-02 00:00:00 GMT
-      3,      // Random nullable field
-      66068,  // 2150-11-21 00:00:00 GMT
-      22270,  // 2030-12-22 00:00:00 GMT
-      111,    // Random nullable field
-    },
-    {false, true, true, true, false, true, true, false}};
-  auto ts_scalar = cudf::scalar_type_t<TypeRhs>(typename TypeRhs::duration{44376}, true);
-
-  auto op_col = cudf::jit::binary_operation(
-    ts_scalar, ts_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col,
-                                 fixed_width_column_wrapper<bool>{
-                                   {false, false, true, false, false, false, false, false},
-                                   {true, true, true, true, true, true, true, true},
-                                 },
-                                 verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Scalar_B8_string_string_EmptyString)
-{
-  using TypeOut = bool;
-
-  auto str_col = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"},
-                                                    {true, false, true, true, true, false, true});
-  // Empty string
-  cudf::string_scalar str_scalar("");
-
-  auto op_col = cudf::jit::binary_operation(
-    str_col, str_scalar, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{false, false, false, true, false, false, false},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Scalar_Vector_B8_string_string_ValidString)
-{
-  using TypeOut = bool;
-
-  auto str_col = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"},
-                                                    {true, false, true, true, true, false, true});
-  // Match a valid string
-  cudf::string_scalar str_scalar("<null>");
-
-  auto op_col = cudf::jit::binary_operation(
-    str_scalar, str_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{false, false, true, false, false, false, false},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Scalar_B8_string_string_NoMatch)
-{
-  using TypeOut = bool;
-
-  // Try with non nullable input
-  auto str_col =
-    cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"});
-  // Matching a string that isn't present
-  cudf::string_scalar str_scalar("foo");
-
-  auto op_col = cudf::jit::binary_operation(
-    str_col, str_scalar, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{false, false, false, false, false, false, false},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Scalar_Vector_B8_string_string_NullNonNull)
-{
-  using TypeOut = bool;
-
-  // Try with all invalid input
-  auto str_col = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"},
-                                                    {true, true, true, true, true, true, true});
-  // Matching a scalar that is invalid
-  cudf::string_scalar str_scalar("foo");
-  str_scalar.set_valid_async(false);
-
-  auto op_col = cudf::jit::binary_operation(
-    str_scalar, str_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{false, false, false, false, false, false, false},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Scalar_B8_string_string_NullNonNull)
-{
-  using TypeOut = bool;
-
-  // Try with all invalid input
-  auto str_col =
-    cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"},
-                                       {false, false, false, false, false, false, false});
-  // Matching a scalar that is valid
-  cudf::string_scalar str_scalar("foo");
-
-  auto op_col = cudf::jit::binary_operation(
-    str_scalar, str_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{false, false, false, false, false, false, false},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Scalar_Vector_B8_string_string_NullNull)
-{
-  using TypeOut = bool;
-
-  // Try with all invalid input
-  auto str_col =
-    cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"},
-                                       {false, false, false, false, false, false, false});
-  // Matching a scalar that is invalid
-  cudf::string_scalar str_scalar("foo");
-  str_scalar.set_valid_async(false);
-
-  auto op_col = cudf::jit::binary_operation(
-    str_scalar, str_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{true, true, true, true, true, true, true},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Scalar_Vector_B8_string_string_MatchInvalid)
-{
-  using TypeOut = bool;
-
-  auto str_col = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"},
-                                                    {true, false, true, true, true, false, true});
-  // Matching an invalid string
-  cudf::string_scalar str_scalar("bb");
-
-  auto op_col = cudf::jit::binary_operation(
-    str_scalar, str_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{false, false, false, false, false, false, false},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_InvalidScalar_B8_string_string)
-{
-  using TypeOut = bool;
-
-  auto str_col = cudf::test::strings_column_wrapper({"eee", "bb", "<null>", "", "aa", "bbb", "ééé"},
-                                                    {true, false, true, true, true, false, true});
-  // Valid string invalidated
-  cudf::string_scalar str_scalar("bb");
-  str_scalar.set_valid_async(false);
-
-  auto op_col = cudf::jit::binary_operation(
-    str_col, str_scalar, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{false, true, false, false, false, true, false},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_tsD_tsD_NonNullable)
-{
-  using TypeOut = bool;
-  using TypeLhs = cudf::timestamp_D;
-  using TypeRhs = cudf::timestamp_D;
-
-  cudf::test::fixed_width_column_wrapper<TypeLhs, TypeLhs::rep> lhs_col{
-    0,      // This is the UNIX epoch - 1970-01-01
-    44376,  // 2091-07-01 00:00:00 GMT
-    47695,  // 2100-08-02 00:00:00 GMT
-    66068,  // 2150-11-21 00:00:00 GMT
-    22270,  // 2030-12-22 00:00:00 GMT
-  };
-  ASSERT_EQ(column_view{lhs_col}.nullable(), false);
-  cudf::test::fixed_width_column_wrapper<TypeRhs, TypeRhs::rep> rhs_col{
-    0,      // This is the UNIX epoch - 1970-01-01
-    44380,  // Mismatched
-    47695,  // 2100-08-02 00:00:00 GMT
-    66070,  // Mismatched
-    22270,  // 2030-12-22 00:00:00 GMT
-  };
-
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col,
-                                 fixed_width_column_wrapper<bool>{
-                                   {true, false, true, false, true},
-                                   {true, true, true, true, true},
-                                 },
-                                 verbosity);
-}
-
-// Both vectors with mixed validity
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_MixMix)
-{
-  using TypeOut = bool;
-
-  auto lhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"},
-                                       {true, false, true, true, true, false, true});
-  auto rhs_col =
-    cudf::test::strings_column_wrapper({"foo", "valid", "<null>", "", "invalid", "inv", "ééé"},
-                                       {true, true, true, true, false, false, true});
-
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{false, false, true, true, false, true, true},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_MixValid)
-{
-  using TypeOut = bool;
-
-  auto lhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"},
-                                       {true, false, true, true, true, false, true});
-  auto rhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"});
-
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{true, false, true, true, true, false, true},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_MixInvalid)
-{
-  using TypeOut = bool;
-
-  auto lhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"},
-                                       {true, false, true, true, true, false, true});
-  auto rhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"},
-                                       {false, false, false, false, false, false, false});
-
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{false, true, false, false, false, true, false},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_ValidValid)
-{
-  using TypeOut = bool;
-
-  auto lhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"});
-  auto rhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"});
-
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{true, true, true, true, true, true, true},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_ValidInvalid)
-{
-  using TypeOut = bool;
-
-  auto lhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"});
-  auto rhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"},
-                                       {false, false, false, false, false, false, false});
-
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{false, false, false, false, false, false, false},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_Vector_B8_string_string_InvalidInvalid)
-{
-  using TypeOut = bool;
-
-  auto lhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"},
-                                       {false, false, false, false, false, false, false});
-  auto rhs_col =
-    cudf::test::strings_column_wrapper({"eee", "invalid", "<null>", "", "aa", "invalid", "ééé"},
-                                       {false, false, false, false, false, false, false});
-
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<bool>{{true, true, true, true, true, true, true},
-                                     {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareEqual_Vector_VectorAllInvalid_B8_SI32_SI32)
-{
-  using TypeOut = bool;
-  using TypeLhs = int32_t;
-
-  auto lhs_col = fixed_width_column_wrapper<TypeLhs>{{-INT32_MAX, -37, 0, 499, 44, INT32_MAX},
-                                                     {false, false, false, false, false, false}};
-  auto rhs_col = fixed_width_column_wrapper<TypeLhs>{{-47, 37, 12, 99, 4, -INT32_MAX},
-                                                     {false, false, false, false, false, false}};
-
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, rhs_col, cudf::binary_operator::NULL_EQUALS, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col,
-                                 fixed_width_column_wrapper<bool>{
-                                   {true, true, true, true, true, true},
-                                   {true, true, true, true, true, true},
-                                 },
-                                 verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMin_Vector_Scalar_SI64_SI32_SI8)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int32_t;
-  using TypeRhs = int8_t;
-
-  auto int_col = fixed_width_column_wrapper<TypeLhs>{
-    {999, -37, 0, INT32_MAX},
-  };
-  auto int_scalar = cudf::scalar_type_t<TypeRhs>(77);
-
-  auto op_col = cudf::jit::binary_operation(
-    int_col, int_scalar, cudf::binary_operator::NULL_MIN, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<TypeOut>{{77, -37, 0, 77}, {true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Scalar_Vector_FP64_SI32_SI64)
-{
-  using TypeOut = double;
-  using TypeLhs = int32_t;
-  using TypeRhs = int64_t;
-
-  auto int_col =
-    fixed_width_column_wrapper<TypeLhs>{{999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55},
-                                        {false, true, false, true, false, true, false}};
-  auto int_scalar = cudf::scalar_type_t<TypeRhs>(INT32_MAX);
-
-  auto op_col = cudf::jit::binary_operation(
-    int_scalar, int_col, cudf::binary_operator::NULL_MAX, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<TypeOut>{
-      {INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX},
-      {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMin_Vector_Scalar_SI64_SI32_FP32)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int32_t;
-  using TypeRhs = float;
-
-  auto int_col =
-    fixed_width_column_wrapper<TypeLhs>{{999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55},
-                                        {false, true, false, true, false, true, false}};
-  auto float_scalar = cudf::scalar_type_t<TypeRhs>(-3.14f);
-  float_scalar.set_valid_async(false);
-
-  auto op_col = cudf::jit::binary_operation(
-    int_col, float_scalar, cudf::binary_operator::NULL_MIN, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<TypeOut>{{0, -37, 0, INT32_MAX, 0, -4379, 0},
-                                        {false, true, false, true, false, true, false}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Scalar_Vector_SI8_SI8_FP32)
-{
-  using TypeOut = int8_t;
-  using TypeLhs = int8_t;
-  using TypeRhs = float;
-
-  auto int_col = fixed_width_column_wrapper<TypeLhs>{
-    {9, -37, 0, 32, -47, -4, 55}, {false, false, false, false, false, false, false}};
-  auto float_scalar = cudf::scalar_type_t<TypeRhs>(-3.14f);
-  float_scalar.set_valid_async(false);
-
-  auto op_col = cudf::jit::binary_operation(
-    float_scalar, int_col, cudf::binary_operator::NULL_MAX, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<TypeOut>{{0, 0, 0, 0, 0, 0, 0},
-                                        {false, false, false, false, false, false, false}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMin_Vector_Vector_SI64_SI32_SI8)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int32_t;
-
-  auto int_col =
-    fixed_width_column_wrapper<TypeLhs>{{999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55},
-                                        {false, false, false, false, false, false, false}};
-  auto another_int_col = fixed_width_column_wrapper<TypeLhs>{
-    {9, -37, 0, 32, -47, -4, 55}, {false, false, false, false, false, false, false}};
-
-  auto op_col = cudf::jit::binary_operation(
-    int_col, another_int_col, cudf::binary_operator::NULL_MIN, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<TypeOut>{{0, 0, 0, 0, 0, 0, 0},
-                                        {false, false, false, false, false, false, false}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Vector_Vector_SI64_SI32_SI8)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int32_t;
-
-  auto int_col = fixed_width_column_wrapper<TypeLhs>{
-    {999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55}, {true, true, true, true, true, true, true}};
-  auto another_int_col = fixed_width_column_wrapper<TypeLhs>{
-    {9, -37, 0, 32, -47, -4, 55}, {false, false, false, false, false, false, false}};
-
-  auto op_col = cudf::jit::binary_operation(
-    int_col, another_int_col, cudf::binary_operator::NULL_MAX, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<TypeOut>{{999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55},
-                                        {true, true, true, true, true, true, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMin_Vector_Vector_tsD_tsD_tsD)
-{
-  cudf::test::fixed_width_column_wrapper<cudf::timestamp_D, cudf::timestamp_D::rep> lhs_col{
-    {
-      0,      // This is the UNIX epoch - 1970-01-01
-      44376,  // 2091-07-01 00:00:00 GMT
-      47695,  // 2100-08-02 00:00:00 GMT
-      66068,  // 2150-11-21 00:00:00 GMT
-      22270,  // 2030-12-22 00:00:00 GMT
-    },
-    {true, false, true, true, false}};
-  cudf::test::fixed_width_column_wrapper<cudf::timestamp_D, cudf::timestamp_D::rep> rhs_col{
-    {
-      0,      // This is the UNIX epoch - 1970-01-01
-      44380,  // Mismatched
-      47695,  // 2100-08-02 00:00:00 GMT
-      66070,  // Mismatched
-      22270,  // 2030-12-22 00:00:00 GMT
-    },
-    {false, true, true, true, false}};
-
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, rhs_col, cudf::binary_operator::NULL_MIN, data_type(type_to_id<cudf::timestamp_D>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<cudf::timestamp_D, cudf::timestamp_D::rep>{
-      {0, 44380, 47695, 66068, 0}, {true, true, true, true, false}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Vector_Vector_SI32_SI64_SI8)
-{
-  using TypeOut = int32_t;
-  using TypeLhs = int64_t;
-
-  auto int_col =
-    fixed_width_column_wrapper<TypeLhs>{{999, -37, 0, INT32_MAX, -INT32_MAX, -4379, 55},
-                                        {false, false, false, false, false, false, false}};
-  auto another_int_col = fixed_width_column_wrapper<TypeLhs>{
-    {9, -37, 0, 32, -47, -4, 55}, {true, false, true, false, true, false, true}};
-
-  auto op_col = cudf::jit::binary_operation(
-    int_col, another_int_col, cudf::binary_operator::NULL_MAX, data_type(type_to_id<TypeOut>()));
-
-  // Every row has a value
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    *op_col,
-    fixed_width_column_wrapper<TypeOut>{{9, 0, 0, 0, -47, 0, 55},
-                                        {true, false, true, false, true, false, true}},
-    verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Vector_Vector_string_string_string_Mix)
-{
-  auto lhs_col = cudf::test::strings_column_wrapper(
-    {"eee", "invalid", "<null>", "", "", "", "ééé", "foo", "bar", "abc", "def"},
-    {false, true, true, false, true, true, true, false, false, true, true});
-  auto rhs_col = cudf::test::strings_column_wrapper(
-    {"eee", "goo", "<null>", "", "", "", "ééé", "bar", "foo", "def", "abc"},
-    {false, true, true, true, false, true, true, false, false, true, true});
-
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, rhs_col, cudf::binary_operator::NULL_MAX, data_type{type_id::STRING});
-
-  auto exp_col = cudf::test::strings_column_wrapper(
-    {"", "invalid", "<null>", "", "", "", "ééé", "", "", "def", "def"},
-    {false, true, true, true, true, true, true, false, false, true, true});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col, exp_col, verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMin_Vector_Scalar_string_string_string_Mix)
-{
-  auto lhs_col = cudf::test::strings_column_wrapper(
-    {"eee", "invalid", "<null>", "", "", "", "ééé", "foo", "bar", "abc", "foo"},
-    {false, true, true, false, true, true, true, false, false, true, true});
-  cudf::string_scalar str_scalar("foo");
-
-  // Returns a non-nullable column as all elements are valid - it will have the scalar
-  // value at the very least
-  auto op_col = cudf::jit::binary_operation(
-    lhs_col, str_scalar, cudf::binary_operator::NULL_MIN, data_type{type_id::STRING});
-
-  auto exp_col = cudf::test::strings_column_wrapper(
-    {"foo", "foo", "<null>", "foo", "", "", "foo", "foo", "foo", "abc", "foo"});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col, exp_col, verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, NullAwareMax_Scalar_Vector_string_string_string_Mix)
-{
-  auto lhs_col = cudf::test::strings_column_wrapper(
-    {"eee", "invalid", "<null>", "", "", "", "ééé", "foo", "bar", "abc", "foo"},
-    {false, true, true, false, true, true, true, false, false, true, true});
-  cudf::string_scalar str_scalar("foo");
-  str_scalar.set_valid_async(false);
-
-  // Returns the lhs_col
-  auto op_col = cudf::jit::binary_operation(
-    str_scalar, lhs_col, cudf::binary_operator::NULL_MAX, data_type{type_id::STRING});
-
-  auto exp_col = cudf::test::strings_column_wrapper(
-    {"", "invalid", "<null>", "", "", "", "ééé", "", "", "abc", "foo"},
-    {false, true, true, false, true, true, true, false, false, true, true});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*op_col, exp_col, verbosity);
-}
-
-TEST_F(BinaryOperationIntegrationTest, CastAdd_Vector_Vector_SI32_float_float)
-{
-  using TypeOut = int32_t;
-  using TypeLhs = float;
-  using TypeRhs = float;  // Integral types promoted to double
-
-  using ADD = cudf::library::operation::Add<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs      = cudf::test::fixed_width_column_wrapper<float>{1.3f, 1.6f};
-  auto rhs      = cudf::test::fixed_width_column_wrapper<float>{1.3f, 1.6f};
-  auto expected = cudf::test::fixed_width_column_wrapper<int>{2, 3};
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Add_Vector_Vector_TimepointD_DurationS_TimepointUS)
-{
-  using TypeOut = cudf::timestamp_us;
-  using TypeLhs = cudf::timestamp_D;
-  using TypeRhs = cudf::duration_s;
-
-  using ADD = cudf::library::operation::Add<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Add_Vector_Scalar_DurationD_TimepointS_TimepointS)
-{
-  using TypeOut = cudf::timestamp_s;
-  using TypeLhs = cudf::duration_D;
-  using TypeRhs = cudf::timestamp_s;
-
-  using ADD = cudf::library::operation::Add<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(100);
-  auto rhs = cudf::scalar_type_t<TypeRhs>(typename TypeRhs::duration{34}, true);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, Add_Scalar_Vector_DurationS_DurationD_DurationMS)
-{
-  using TypeOut = cudf::duration_ms;
-  using TypeLhs = cudf::duration_s;
-  using TypeRhs = cudf::duration_D;
-
-  using ADD = cudf::library::operation::Add<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::scalar_type_t<TypeLhs>(TypeLhs{-9});
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ShiftRightUnsigned_Scalar_Vector_SI64_SI64_SI32)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int64_t;
-  using TypeRhs = int32_t;
-
-  using SHIFT_RIGHT_UNSIGNED =
-    cudf::library::operation::ShiftRightUnsigned<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = cudf::scalar_type_t<TypeLhs>(-12);
-  // this generates values in the range 1-10 which should be reasonable for the shift
-  auto rhs = make_random_wrapped_column<TypeRhs>(100);
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::SHIFT_RIGHT_UNSIGNED, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, SHIFT_RIGHT_UNSIGNED());
-}
-
-TEST_F(BinaryOperationIntegrationTest, PMod_Scalar_Vector_FP32)
-{
-  using TypeOut = float;
-  using TypeLhs = float;
-  using TypeRhs = float;
-
-  auto lhs = cudf::scalar_type_t<TypeLhs>(-86099.68377);
-  auto rhs = fixed_width_column_wrapper<TypeRhs>{{90770.74881, -15456.4335, 32213.22119}};
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id<TypeOut>()));
-
-  auto expected_result =
-    fixed_width_column_wrapper<TypeOut>{{4671.0625, -8817.51953125, 10539.974609375}};
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*out, expected_result);
-}
-
-TEST_F(BinaryOperationIntegrationTest, PMod_Vector_Scalar_FP64)
-{
-  using TypeOut = double;
-  using TypeLhs = double;
-  using TypeRhs = double;
-
-  auto lhs = fixed_width_column_wrapper<TypeLhs>{{90770.74881, -15456.4335, 32213.22119}};
-  auto rhs = cudf::scalar_type_t<TypeRhs>(-86099.68377);
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id<TypeOut>()));
-
-  auto expected_result = fixed_width_column_wrapper<TypeOut>{
-    {4671.0650400000013178, -15456.433499999999185, 32213.221190000000206}};
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*out, expected_result);
-}
-
-TEST_F(BinaryOperationIntegrationTest, PMod_Vector_Vector_FP64_FP32_FP64)
-{
-  using TypeOut = double;
-  using TypeLhs = float;
-  using TypeRhs = double;
-
-  auto lhs = fixed_width_column_wrapper<TypeLhs>{
-    {24854.55893, 79946.87288, -86099.68377, -86099.68377, 1.0, 1.0, -1.0, -1.0}};
-  auto rhs = fixed_width_column_wrapper<TypeRhs>{{90770.74881,
-                                                  -15456.4335,
-                                                  36223.96138,
-                                                  -15456.4335,
-                                                  2.1336193413893147E307,
-                                                  -2.1336193413893147E307,
-                                                  2.1336193413893147E307,
-                                                  -2.1336193413893147E307}};
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id<TypeOut>()));
-
-  auto expected_result = fixed_width_column_wrapper<TypeOut>{{24854.55859375,
-                                                              2664.7075000000040745,
-                                                              22572.196640000001935,
-                                                              -8817.5200000000040745,
-                                                              1.0,
-                                                              1.0,
-                                                              0.0,
-                                                              0.0}};
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*out, expected_result);
-}
-
-TEST_F(BinaryOperationIntegrationTest, PMod_Vector_Vector_FP64_SI32_SI64)
-{
-  using TypeOut = double;
-  using TypeLhs = int32_t;
-  using TypeRhs = int64_t;
-
-  using PMOD = cudf::library::operation::PMod<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(1000);
-  auto rhs = make_random_wrapped_column<TypeRhs>(1000);
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, PMOD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, PMod_Vector_Vector_SI64_SI32_SI64)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = int32_t;
-  using TypeRhs = int64_t;
-
-  using PMOD = cudf::library::operation::PMod<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(1000);
-  auto rhs = make_random_wrapped_column<TypeRhs>(1000);
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, PMOD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, PMod_Vector_Vector_SI64_FP64_FP64)
-{
-  using TypeOut = int64_t;
-  using TypeLhs = double;
-  using TypeRhs = double;
-
-  using PMOD = cudf::library::operation::PMod<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(1000);
-  auto rhs = make_random_wrapped_column<TypeRhs>(1000);
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::PMOD, data_type(type_to_id<TypeOut>()));
-
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, PMOD());
-}
-
-TEST_F(BinaryOperationIntegrationTest, ATan2_Scalar_Vector_FP32)
-{
-  using TypeOut = float;
-  using TypeLhs = float;
-  using TypeRhs = float;
-
-  using ATAN2 = cudf::library::operation::ATan2<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_scalar<TypeLhs>();
-  auto rhs = make_random_wrapped_column<TypeRhs>(10000);
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ATAN2, data_type(type_to_id<TypeOut>()));
-
-  // atan2 has a max ULP error of 2 per CUDA programming guide
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ATAN2(), NearEqualComparator<TypeOut>{2});
-}
-
-TEST_F(BinaryOperationIntegrationTest, ATan2_Vector_Scalar_FP64)
-{
-  using TypeOut = double;
-  using TypeLhs = double;
-  using TypeRhs = double;
-
-  using ATAN2 = cudf::library::operation::ATan2<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(10000);
-  auto rhs = make_random_wrapped_scalar<TypeRhs>();
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ATAN2, data_type(type_to_id<TypeOut>()));
-
-  // atan2 has a max ULP error of 2 per CUDA programming guide
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ATAN2(), NearEqualComparator<TypeOut>{2});
-}
-
-TEST_F(BinaryOperationIntegrationTest, ATan2_Vector_Vector_FP64_FP32_FP64)
-{
-  using TypeOut = double;
-  using TypeLhs = float;
-  using TypeRhs = double;
-
-  using ATAN2 = cudf::library::operation::ATan2<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(10000);
-  auto rhs = make_random_wrapped_column<TypeRhs>(10000);
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ATAN2, data_type(type_to_id<TypeOut>()));
-
-  // atan2 has a max ULP error of 2 per CUDA programming guide
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ATAN2(), NearEqualComparator<TypeOut>{2});
-}
-
-TEST_F(BinaryOperationIntegrationTest, ATan2_Vector_Vector_FP64_SI32_SI64)
-{
-  using TypeOut = double;
-  using TypeLhs = int32_t;
-  using TypeRhs = int64_t;
-
-  using ATAN2 = cudf::library::operation::ATan2<TypeOut, TypeLhs, TypeRhs>;
-
-  auto lhs = make_random_wrapped_column<TypeLhs>(10000);
-  auto rhs = make_random_wrapped_column<TypeRhs>(10000);
-
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ATAN2, data_type(type_to_id<TypeOut>()));
-
-  // atan2 has a max ULP error of 2 per CUDA programming guide
-  ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ATAN2(), NearEqualComparator<TypeOut>{2});
-}
-
-template <typename T>
-struct FixedPointTestAllReps : public cudf::test::BaseFixture {
-};
-
-template <typename T>
-using wrapper = cudf::test::fixed_width_column_wrapper<T>;
-TYPED_TEST_SUITE(FixedPointTestAllReps, cudf::test::FixedPointTypes);
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-
-  auto const sz = std::size_t{1000};
-
-  auto begin      = cudf::detail::make_counting_transform_iterator(1, [](auto i) {
-    return decimalXX{i, scale_type{0}};
-  });
-  auto const vec1 = std::vector<decimalXX>(begin, begin + sz);
-  auto const vec2 = std::vector<decimalXX>(sz, decimalXX{2, scale_type{0}});
-  auto expected   = std::vector<decimalXX>(sz);
-
-  std::transform(std::cbegin(vec1),
-                 std::cend(vec1),
-                 std::cbegin(vec2),
-                 std::begin(expected),
-                 std::plus<decimalXX>());
-
-  auto const lhs          = wrapper<decimalXX>(vec1.begin(), vec1.end());
-  auto const rhs          = wrapper<decimalXX>(vec2.begin(), vec2.end());
-  auto const expected_col = wrapper<decimalXX>(expected.begin(), expected.end());
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD,
-                                                   static_cast<cudf::column_view>(lhs).type(),
-                                                   static_cast<cudf::column_view>(rhs).type());
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiply)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-
-  auto const sz = std::size_t{1000};
-
-  auto begin      = cudf::detail::make_counting_transform_iterator(1, [](auto i) {
-    return decimalXX{i, scale_type{0}};
-  });
-  auto const vec1 = std::vector<decimalXX>(begin, begin + sz);
-  auto const vec2 = std::vector<decimalXX>(sz, decimalXX{2, scale_type{0}});
-  auto expected   = std::vector<decimalXX>(sz);
-
-  std::transform(std::cbegin(vec1),
-                 std::cend(vec1),
-                 std::cbegin(vec2),
-                 std::begin(expected),
-                 std::multiplies<decimalXX>());
-
-  auto const lhs          = wrapper<decimalXX>(vec1.begin(), vec1.end());
-  auto const rhs          = wrapper<decimalXX>(vec2.begin(), vec2.end());
-  auto const expected_col = wrapper<decimalXX>(expected.begin(), expected.end());
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL,
-                                                   static_cast<cudf::column_view>(lhs).type(),
-                                                   static_cast<cudf::column_view>(rhs).type());
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_col, result->view());
-}
-
-template <typename T>
-using fp_wrapper = cudf::test::fixed_point_column_wrapper<T>;
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiply2)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{11, 22, 33, 44, 55}, scale_type{-1}};
-  auto const rhs      = fp_wrapper<RepType>{{10, 10, 10, 10, 10}, scale_type{0}};
-  auto const expected = fp_wrapper<RepType>{{110, 220, 330, 440, 550}, scale_type{-1}};
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::MUL,
-                                                   static_cast<cudf::column_view>(lhs).type(),
-                                                   static_cast<cudf::column_view>(rhs).type());
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::MUL, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{10, 30, 50, 70}, scale_type{-1}};
-  auto const rhs      = fp_wrapper<RepType>{{4, 4, 4, 4}, scale_type{0}};
-  auto const expected = fp_wrapper<RepType>{{2, 7, 12, 17}, scale_type{-1}};
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV,
-                                                   static_cast<cudf::column_view>(lhs).type(),
-                                                   static_cast<cudf::column_view>(rhs).type());
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv2)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{10, 30, 50, 70}, scale_type{-1}};
-  auto const rhs      = fp_wrapper<RepType>{{4, 4, 4, 4}, scale_type{-2}};
-  auto const expected = fp_wrapper<RepType>{{2, 7, 12, 17}, scale_type{1}};
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::DIV,
-                                                   static_cast<cudf::column_view>(lhs).type(),
-                                                   static_cast<cudf::column_view>(rhs).type());
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv3)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{10, 30, 50, 70}, scale_type{-1}};
-  auto const rhs      = make_fixed_point_scalar<decimalXX>(12, scale_type{-1});
-  auto const expected = fp_wrapper<RepType>{{0, 2, 4, 5}, scale_type{0}};
-
-  auto const type = cudf::binary_operation_fixed_point_output_type(
-    cudf::binary_operator::DIV, static_cast<cudf::column_view>(lhs).type(), rhs->type());
-  auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpDiv4)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto begin = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 11; });
-  auto result_begin =
-    cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i * 11) / 12; });
-  auto const lhs      = fp_wrapper<RepType>(begin, begin + 1000, scale_type{-1});
-  auto const rhs      = make_fixed_point_scalar<decimalXX>(12, scale_type{-1});
-  auto const expected = fp_wrapper<RepType>(result_begin, result_begin + 1000, scale_type{0});
-
-  auto const type = cudf::binary_operation_fixed_point_output_type(
-    cudf::binary_operator::DIV, static_cast<cudf::column_view>(lhs).type(), rhs->type());
-  auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd2)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{11, 22, 33, 44, 55}, scale_type{-1}};
-  auto const rhs      = fp_wrapper<RepType>{{100, 200, 300, 400, 500}, scale_type{-2}};
-  auto const expected = fp_wrapper<RepType>{{210, 420, 630, 840, 1050}, scale_type{-2}};
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD,
-                                                   static_cast<cudf::column_view>(lhs).type(),
-                                                   static_cast<cudf::column_view>(rhs).type());
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd3)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{1100, 2200, 3300, 4400, 5500}, scale_type{-3}};
-  auto const rhs      = fp_wrapper<RepType>{{100, 200, 300, 400, 500}, scale_type{-2}};
-  auto const expected = fp_wrapper<RepType>{{2100, 4200, 6300, 8400, 10500}, scale_type{-3}};
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD,
-                                                   static_cast<cudf::column_view>(lhs).type(),
-                                                   static_cast<cudf::column_view>(rhs).type());
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd4)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{11, 22, 33, 44, 55}, scale_type{-1}};
-  auto const rhs      = make_fixed_point_scalar<decimalXX>(100, scale_type{-2});
-  auto const expected = fp_wrapper<RepType>{{210, 320, 430, 540, 650}, scale_type{-2}};
-
-  auto const type = cudf::binary_operation_fixed_point_output_type(
-    cudf::binary_operator::ADD, static_cast<cudf::column_view>(lhs).type(), rhs->type());
-  auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::ADD, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd5)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = make_fixed_point_scalar<decimalXX>(100, scale_type{-2});
-  auto const rhs      = fp_wrapper<RepType>{{11, 22, 33, 44, 55}, scale_type{-1}};
-  auto const expected = fp_wrapper<RepType>{{210, 320, 430, 540, 650}, scale_type{-2}};
-
-  auto const type = cudf::binary_operation_fixed_point_output_type(
-    cudf::binary_operator::ADD, lhs->type(), static_cast<cudf::column_view>(rhs).type());
-  auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::ADD, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpAdd6)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const col = fp_wrapper<RepType>{{3, 4, 5, 6, 7, 8}, scale_type{0}};
-
-  auto const expected1 = fp_wrapper<RepType>{{6, 8, 10, 12, 14, 16}, scale_type{0}};
-  auto const expected2 = fp_wrapper<RepType>{{0, 0, 1, 1, 1, 1}, scale_type{1}};
-  auto const type1     = cudf::data_type{cudf::type_to_id<decimalXX>(), 0};
-  auto const type2     = cudf::data_type{cudf::type_to_id<decimalXX>(), 1};
-  auto const result1   = cudf::binary_operation(col, col, cudf::binary_operator::ADD, type1);
-  auto const result2   = cudf::binary_operation(col, col, cudf::binary_operator::ADD, type2);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, result1->view());
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, result2->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointCast)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const col      = fp_wrapper<RepType>{{6, 8, 10, 12, 14, 16}, scale_type{0}};
-  auto const expected = fp_wrapper<RepType>{{0, 0, 1, 1, 1, 1}, scale_type{1}};
-  auto const type     = cudf::data_type{cudf::type_to_id<decimalXX>(), 1};
-  auto const result   = cudf::cast(col, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpMultiplyScalar)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{11, 22, 33, 44, 55}, scale_type{-1}};
-  auto const rhs      = make_fixed_point_scalar<decimalXX>(100, scale_type{-1});
-  auto const expected = fp_wrapper<RepType>{{1100, 2200, 3300, 4400, 5500}, scale_type{-2}};
-
-  auto const type = cudf::binary_operation_fixed_point_output_type(
-    cudf::binary_operator::MUL, static_cast<cudf::column_view>(lhs).type(), rhs->type());
-  auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::MUL, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpSimplePlus)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{150, 200}, scale_type{-2}};
-  auto const rhs      = fp_wrapper<RepType>{{2250, 1005}, scale_type{-3}};
-  auto const expected = fp_wrapper<RepType>{{3750, 3005}, scale_type{-3}};
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD,
-                                                   static_cast<cudf::column_view>(lhs).type(),
-                                                   static_cast<cudf::column_view>(rhs).type());
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimple)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const trues    = std::vector<bool>(4, true);
-  auto const col1     = fp_wrapper<RepType>{{1, 2, 3, 4}, scale_type{0}};
-  auto const col2     = fp_wrapper<RepType>{{100, 200, 300, 400}, scale_type{-2}};
-  auto const expected = wrapper<bool>(trues.begin(), trues.end());
-
-  auto const result =
-    cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale0)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const trues    = std::vector<bool>(4, true);
-  auto const col      = fp_wrapper<RepType>{{1, 2, 3, 4}, scale_type{0}};
-  auto const expected = wrapper<bool>(trues.begin(), trues.end());
-
-  auto const result =
-    cudf::binary_operation(col, col, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale0Null)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const col1     = fp_wrapper<RepType>{{1, 2, 3, 4}, {1, 1, 1, 1}, scale_type{0}};
-  auto const col2     = fp_wrapper<RepType>{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}};
-  auto const expected = wrapper<bool>{{0, 1, 0, 1}, {0, 0, 0, 0}};
-
-  auto const result =
-    cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualSimpleScale2Null)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const col1     = fp_wrapper<RepType>{{1, 2, 3, 4}, {1, 1, 1, 1}, scale_type{-2}};
-  auto const col2     = fp_wrapper<RepType>{{1, 2, 3, 4}, {0, 0, 0, 0}, scale_type{0}};
-  auto const expected = wrapper<bool>{{0, 1, 0, 1}, {0, 0, 0, 0}};
-
-  auto const result =
-    cudf::binary_operation(col1, col2, binary_operator::EQUAL, cudf::data_type{type_id::BOOL8});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpEqualLessGreater)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const sz = std::size_t{1000};
-
-  // TESTING binary op ADD
-
-  auto begin = cudf::detail::make_counting_transform_iterator(1, [](auto e) { return e * 1000; });
-  auto const vec1 = std::vector<RepType>(begin, begin + sz);
-  auto const vec2 = std::vector<RepType>(sz, 0);
-
-  auto const iota_3  = fp_wrapper<RepType>(vec1.begin(), vec1.end(), scale_type{-3});
-  auto const zeros_3 = fp_wrapper<RepType>(vec2.begin(), vec2.end(), scale_type{-1});
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::ADD,
-                                                   static_cast<cudf::column_view>(iota_3).type(),
-                                                   static_cast<cudf::column_view>(zeros_3).type());
-  auto const iota_3_after_add = cudf::binary_operation(zeros_3, iota_3, binary_operator::ADD, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(iota_3, iota_3_after_add->view());
-
-  // TESTING binary op EQUAL, LESS, GREATER
-
-  auto const trues    = std::vector<bool>(sz, true);
-  auto const true_col = wrapper<bool>(trues.begin(), trues.end());
-
-  auto const btype = cudf::data_type{type_id::BOOL8};
-  auto const equal_result =
-    cudf::binary_operation(iota_3, iota_3_after_add->view(), binary_operator::EQUAL, btype);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, equal_result->view());
-
-  auto const less_result =
-    cudf::binary_operation(zeros_3, iota_3_after_add->view(), binary_operator::LESS, btype);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, less_result->view());
-
-  auto const greater_result =
-    cudf::binary_operation(iota_3_after_add->view(), zeros_3, binary_operator::GREATER, btype);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(true_col, greater_result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullMaxSimple)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const trues    = std::vector<bool>(4, true);
-  auto const col1     = fp_wrapper<RepType>{{40, 30, 20, 10, 0}, {1, 0, 1, 1, 0}, scale_type{-2}};
-  auto const col2     = fp_wrapper<RepType>{{10, 20, 30, 40, 0}, {1, 1, 1, 0, 0}, scale_type{-2}};
-  auto const expected = fp_wrapper<RepType>{{40, 20, 30, 10, 0}, {1, 1, 1, 1, 0}, scale_type{-2}};
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MAX,
-                                                   static_cast<cudf::column_view>(col1).type(),
-                                                   static_cast<cudf::column_view>(col2).type());
-  auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_MAX, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullMinSimple)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const trues    = std::vector<bool>(4, true);
-  auto const col1     = fp_wrapper<RepType>{{40, 30, 20, 10, 0}, {1, 1, 1, 0, 0}, scale_type{-1}};
-  auto const col2     = fp_wrapper<RepType>{{10, 20, 30, 40, 0}, {1, 0, 1, 1, 0}, scale_type{-1}};
-  auto const expected = fp_wrapper<RepType>{{10, 30, 20, 40, 0}, {1, 1, 1, 1, 0}, scale_type{-1}};
-
-  auto const type =
-    cudf::binary_operation_fixed_point_output_type(cudf::binary_operator::NULL_MIN,
-                                                   static_cast<cudf::column_view>(col1).type(),
-                                                   static_cast<cudf::column_view>(col2).type());
-  auto const result = cudf::binary_operation(col1, col2, binary_operator::NULL_MIN, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpNullEqualsSimple)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const trues    = std::vector<bool>(4, true);
-  auto const col1     = fp_wrapper<RepType>{{400, 300, 300, 100}, {1, 1, 1, 0}, scale_type{-2}};
-  auto const col2     = fp_wrapper<RepType>{{40, 200, 20, 400}, {1, 0, 1, 0}, scale_type{-1}};
-  auto const expected = wrapper<bool>{{1, 0, 0, 1}, {1, 1, 1, 1}};
-
-  auto const result = cudf::binary_operation(
-    col1, col2, binary_operator::NULL_EQUALS, cudf::data_type{type_id::BOOL8});
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{100, 300, 500, 700}, scale_type{-2}};
-  auto const rhs      = fp_wrapper<RepType>{{4, 4, 4, 4}, scale_type{0}};
-  auto const expected = fp_wrapper<RepType>{{25, 75, 125, 175}, scale_type{-2}};
-
-  auto const type   = data_type{type_to_id<decimalXX>(), -2};
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div2)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{100000, 300000, 500000, 700000}, scale_type{-3}};
-  auto const rhs      = fp_wrapper<RepType>{{20, 20, 20, 20}, scale_type{-1}};
-  auto const expected = fp_wrapper<RepType>{{5000, 15000, 25000, 35000}, scale_type{-2}};
-
-  auto const type   = data_type{type_to_id<decimalXX>(), -2};
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div3)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{10000, 30000, 50000, 70000}, scale_type{-2}};
-  auto const rhs      = fp_wrapper<RepType>{{3, 9, 3, 3}, scale_type{0}};
-  auto const expected = fp_wrapper<RepType>{{3333, 3333, 16666, 23333}, scale_type{-2}};
-
-  auto const type   = data_type{type_to_id<decimalXX>(), -2};
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div4)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{10, 30, 50, 70}, scale_type{1}};
-  auto const rhs      = make_fixed_point_scalar<decimalXX>(3, scale_type{0});
-  auto const expected = fp_wrapper<RepType>{{3, 10, 16, 23}, scale_type{1}};
-
-  auto const type   = data_type{type_to_id<decimalXX>(), 1};
-  auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div6)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs = make_fixed_point_scalar<decimalXX>(3000, scale_type{-3});
-  auto const rhs = fp_wrapper<RepType>{{10, 30, 50, 70}, scale_type{-1}};
-
-  auto const expected = fp_wrapper<RepType>{{300, 100, 60, 42}, scale_type{-2}};
-
-  auto const type   = data_type{type_to_id<decimalXX>(), -2};
-  auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div7)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs = make_fixed_point_scalar<decimalXX>(1200, scale_type{0});
-  auto const rhs = fp_wrapper<RepType>{{100, 200, 300, 500, 600, 800, 1200, 1300}, scale_type{-2}};
-
-  auto const expected = fp_wrapper<RepType>{{12, 6, 4, 2, 2, 1, 1, 0}, scale_type{2}};
-
-  auto const type   = data_type{type_to_id<decimalXX>(), 2};
-  auto const result = cudf::binary_operation(*lhs, rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div8)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{4000, 6000, 80000}, scale_type{-1}};
-  auto const rhs      = make_fixed_point_scalar<decimalXX>(5000, scale_type{-3});
-  auto const expected = fp_wrapper<RepType>{{0, 1, 16}, scale_type{2}};
-
-  auto const type   = data_type{type_to_id<decimalXX>(), 2};
-  auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div9)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{10, 20, 30}, scale_type{2}};
-  auto const rhs      = make_fixed_point_scalar<decimalXX>(7, scale_type{1});
-  auto const expected = fp_wrapper<RepType>{{1, 2, 4}, scale_type{1}};
-
-  auto const type   = data_type{type_to_id<decimalXX>(), 1};
-  auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div10)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{100, 200, 300}, scale_type{1}};
-  auto const rhs      = make_fixed_point_scalar<decimalXX>(7, scale_type{0});
-  auto const expected = fp_wrapper<RepType>{{14, 28, 42}, scale_type{1}};
-
-  auto const type   = data_type{type_to_id<decimalXX>(), 1};
-  auto const result = cudf::binary_operation(lhs, *rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOp_Div11)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const lhs      = fp_wrapper<RepType>{{1000, 2000, 3000}, scale_type{1}};
-  auto const rhs      = fp_wrapper<RepType>{{7, 7, 7}, scale_type{0}};
-  auto const expected = fp_wrapper<RepType>{{142, 285, 428}, scale_type{1}};
-
-  auto const type   = data_type{type_to_id<decimalXX>(), 1};
-  auto const result = cudf::binary_operation(lhs, rhs, cudf::binary_operator::DIV, type);
-
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
-}
-
-TYPED_TEST(FixedPointTestAllReps, FixedPointBinaryOpThrows)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  auto const col           = fp_wrapper<RepType>{{100, 300, 500, 700}, scale_type{-2}};
-  auto const non_bool_type = data_type{type_to_id<decimalXX>(), -2};
-  EXPECT_THROW(cudf::binary_operation(col, col, cudf::binary_operator::LESS, non_bool_type),
-               cudf::logic_error);
-}
-
-template <typename T>
-struct FixedPointTest_64_128_Reps : public cudf::test::BaseFixture {
-};
-
-using Decimal64And128Types = cudf::test::Types<numeric::decimal64, numeric::decimal128>;
-TYPED_TEST_SUITE(FixedPointTest_64_128_Reps, Decimal64And128Types);
-
-TYPED_TEST(FixedPointTest_64_128_Reps, FixedPoint_64_128_ComparisonTests)
-{
-  using namespace numeric;
-  using decimalXX = TypeParam;
-  using RepType   = device_storage_type_t<decimalXX>;
-
-  for (auto const rhs_value : {10000000000000000, 100000000000000000}) {
-    auto const lhs       = fp_wrapper<RepType>{{33041, 97290, 36438, 25379, 48473}, scale_type{2}};
-    auto const rhs       = make_fixed_point_scalar<decimalXX>(rhs_value, scale_type{0});
-    auto const trues     = wrapper<bool>{{1, 1, 1, 1, 1}};
-    auto const falses    = wrapper<bool>{{0, 0, 0, 0, 0}};
-    auto const bool_type = cudf::data_type{type_id::BOOL8};
-
-    auto const a = cudf::binary_operation(lhs, *rhs, binary_operator::LESS, bool_type);
-    auto const b = cudf::binary_operation(lhs, *rhs, binary_operator::LESS_EQUAL, bool_type);
-    auto const c = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER, bool_type);
-    auto const d = cudf::binary_operation(lhs, *rhs, binary_operator::GREATER_EQUAL, bool_type);
-    auto const e = cudf::binary_operation(*rhs, lhs, binary_operator::GREATER, bool_type);
-    auto const f = cudf::binary_operation(*rhs, lhs, binary_operator::GREATER_EQUAL, bool_type);
-    auto const g = cudf::binary_operation(*rhs, lhs, binary_operator::LESS, bool_type);
-    auto const h = cudf::binary_operation(*rhs, lhs, binary_operator::LESS_EQUAL, bool_type);
-
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(a->view(), trues);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(b->view(), trues);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(c->view(), falses);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(d->view(), falses);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(e->view(), trues);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(f->view(), trues);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(g->view(), falses);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(h->view(), falses);
-  }
-}
-
-}  // namespace binop
-}  // namespace test
-}  // namespace cudf
-
-CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/binaryop/binop-null-test.cpp b/cpp/tests/binaryop/binop-null-test.cpp
index b7e7702bd6c..55ddde5ce5f 100644
--- a/cpp/tests/binaryop/binop-null-test.cpp
+++ b/cpp/tests/binaryop/binop-null-test.cpp
@@ -74,8 +74,8 @@ TEST_F(BinaryOperationNullTest, Scalar_Null_Vector_Valid)
   lhs.set_valid_async(false);
   auto rhs = make_random_wrapped_column<TypeRhs>(100, mask_state::ALL_VALID);
 
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
+  auto out =
+    cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
 
   ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
 }
@@ -91,8 +91,8 @@ TEST_F(BinaryOperationNullTest, Scalar_Valid_Vector_NonNullable)
   auto lhs = make_random_wrapped_scalar<TypeLhs>();
   auto rhs = make_random_wrapped_column<TypeRhs>(100, mask_state::UNALLOCATED);
 
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
+  auto out =
+    cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
 
   ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
 }
@@ -109,8 +109,8 @@ TEST_F(BinaryOperationNullTest, Scalar_Null_Vector_NonNullable)
   lhs.set_valid_async(false);
   auto rhs = make_random_wrapped_column<TypeRhs>(100, mask_state::UNALLOCATED);
 
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
+  auto out =
+    cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
 
   ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
 }
@@ -126,8 +126,8 @@ TEST_F(BinaryOperationNullTest, Vector_Null_Scalar_Valid)
   auto lhs = make_random_wrapped_scalar<TypeLhs>();
   auto rhs = make_random_wrapped_column<TypeRhs>(100, mask_state::ALL_NULL);
 
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
+  auto out =
+    cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
 
   ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
 }
@@ -143,8 +143,8 @@ TEST_F(BinaryOperationNullTest, Vector_Null_Vector_Valid)
   auto lhs = make_random_wrapped_column<TypeLhs>(100, mask_state::ALL_NULL);
   auto rhs = make_random_wrapped_column<TypeRhs>(100, mask_state::ALL_VALID);
 
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
+  auto out =
+    cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
 
   ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
 }
@@ -160,8 +160,8 @@ TEST_F(BinaryOperationNullTest, Vector_Null_Vector_NonNullable)
   auto lhs = make_random_wrapped_column<TypeLhs>(100, mask_state::ALL_NULL);
   auto rhs = make_random_wrapped_column<TypeRhs>(100, mask_state::UNALLOCATED);
 
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
+  auto out =
+    cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
 
   ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
 }
@@ -177,8 +177,8 @@ TEST_F(BinaryOperationNullTest, Vector_Valid_Vector_NonNullable)
   auto lhs = make_random_wrapped_column<TypeLhs>(100, mask_state::ALL_VALID);
   auto rhs = make_random_wrapped_column<TypeRhs>(100, mask_state::UNALLOCATED);
 
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
+  auto out =
+    cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
 
   ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
 }
@@ -194,8 +194,8 @@ TEST_F(BinaryOperationNullTest, Vector_NonNullable_Vector_NonNullable)
   auto lhs = make_random_wrapped_column<TypeLhs>(100, mask_state::UNALLOCATED);
   auto rhs = make_random_wrapped_column<TypeRhs>(100, mask_state::UNALLOCATED);
 
-  auto out = cudf::jit::binary_operation(
-    lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
+  auto out =
+    cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>()));
 
   ASSERT_BINOP<TypeOut, TypeLhs, TypeRhs>(*out, lhs, rhs, ADD());
 }
diff --git a/cpp/tests/binaryop/binop-verify-input-test.cpp b/cpp/tests/binaryop/binop-verify-input-test.cpp
index 779dc7c4c1f..167fbc22bde 100644
--- a/cpp/tests/binaryop/binop-verify-input-test.cpp
+++ b/cpp/tests/binaryop/binop-verify-input-test.cpp
@@ -35,9 +35,9 @@ TEST_F(BinopVerifyInputTest, Vector_Scalar_ErrorOutputVectorType)
   auto lhs = make_random_wrapped_scalar<TypeLhs>();
   auto rhs = make_random_wrapped_column<TypeRhs>(10);
 
-  EXPECT_THROW(cudf::jit::binary_operation(
-                 lhs, rhs, cudf::binary_operator::ADD, data_type(type_id::NUM_TYPE_IDS)),
-               cudf::logic_error);
+  EXPECT_THROW(
+    cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_id::NUM_TYPE_IDS)),
+    cudf::logic_error);
 }
 
 TEST_F(BinopVerifyInputTest, Vector_Vector_ErrorSecondOperandVectorZeroSize)
@@ -49,9 +49,9 @@ TEST_F(BinopVerifyInputTest, Vector_Vector_ErrorSecondOperandVectorZeroSize)
   auto lhs = make_random_wrapped_column<TypeLhs>(1);
   auto rhs = make_random_wrapped_column<TypeRhs>(10);
 
-  EXPECT_THROW(cudf::jit::binary_operation(
-                 lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>())),
-               cudf::logic_error);
+  EXPECT_THROW(
+    cudf::binary_operation(lhs, rhs, cudf::binary_operator::ADD, data_type(type_to_id<TypeOut>())),
+    cudf::logic_error);
 }
 
 }  // namespace binop

From 16fcf4880e74c264899a8b09332cd8a99d08babe Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Thu, 25 Nov 2021 23:40:44 +0530
Subject: [PATCH 08/28] remove jit code which are supported by compiled binops

---
 cpp/include/cudf/binaryop.hpp        |  78 ----
 cpp/include/cudf/detail/binaryop.hpp |  45 +-
 cpp/src/binaryop/binaryop.cpp        | 237 +---------
 cpp/src/binaryop/jit/kernel.cu       |  51 ---
 cpp/src/binaryop/jit/operation.hpp   | 646 ---------------------------
 cpp/src/binaryop/jit/traits.hpp      |  68 ---
 cpp/src/binaryop/jit/util.hpp        |  88 ----
 7 files changed, 6 insertions(+), 1207 deletions(-)
 delete mode 100644 cpp/src/binaryop/jit/operation.hpp
 delete mode 100644 cpp/src/binaryop/jit/traits.hpp
 delete mode 100644 cpp/src/binaryop/jit/util.hpp

diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp
index fe548a36cf0..a514010c1f0 100644
--- a/cpp/include/cudf/binaryop.hpp
+++ b/cpp/include/cudf/binaryop.hpp
@@ -210,83 +210,5 @@ cudf::data_type binary_operation_fixed_point_output_type(binary_operator op,
                                                          cudf::data_type const& lhs,
                                                          cudf::data_type const& rhs);
 
-namespace jit {
-/**
- * @brief Performs a binary operation between a scalar and a column.
- *
- * The output contains the result of `op(lhs, rhs[i])` for all `0 <= i < rhs.size()`
- * The scalar is the left operand and the column elements are the right operand.
- * This distinction is significant in case of non-commutative binary operations
- *
- * Regardless of the operator, the validity of the output value is the logical
- * AND of the validity of the two operands
- *
- * @param lhs         The left operand scalar
- * @param rhs         The right operand column
- * @param op          The binary operator
- * @param output_type The desired data type of the output column
- * @param mr          Device memory resource used to allocate the returned column's device memory
- * @return            Output column of `output_type` type containing the result of
- *                    the binary operation
- * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
- */
-std::unique_ptr<column> binary_operation(
-  scalar const& lhs,
-  column_view const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Performs a binary operation between a column and a scalar.
- *
- * The output contains the result of `op(lhs[i], rhs)` for all `0 <= i < lhs.size()`
- * The column elements are the left operand and the scalar is the right operand.
- * This distinction is significant in case of non-commutative binary operations
- *
- * Regardless of the operator, the validity of the output value is the logical
- * AND of the validity of the two operands
- *
- * @param lhs         The left operand column
- * @param rhs         The right operand scalar
- * @param op          The binary operator
- * @param output_type The desired data type of the output column
- * @param mr          Device memory resource used to allocate the returned column's device memory
- * @return            Output column of `output_type` type containing the result of
- *                    the binary operation
- * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
- */
-std::unique_ptr<column> binary_operation(
-  column_view const& lhs,
-  scalar const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Performs a binary operation between two columns.
- *
- * The output contains the result of `op(lhs[i], rhs[i])` for all `0 <= i < lhs.size()`
- *
- * Regardless of the operator, the validity of the output value is the logical
- * AND of the validity of the two operands
- *
- * @param lhs         The left operand column
- * @param rhs         The right operand column
- * @param op          The binary operator
- * @param output_type The desired data type of the output column
- * @param mr          Device memory resource used to allocate the returned column's device memory
- * @return            Output column of `output_type` type containing the result of
- *                    the binary operation
- * @throw cudf::logic_error if @p lhs and @p rhs are different sizes
- * @throw cudf::logic_error if @p output_type dtype isn't fixed-width
- */
-std::unique_ptr<column> binary_operation(
-  column_view const& lhs,
-  column_view const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-}  // namespace jit
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp
index ce7731ef7d2..9fa31d0e01d 100644
--- a/cpp/include/cudf/detail/binaryop.hpp
+++ b/cpp/include/cudf/detail/binaryop.hpp
@@ -22,52 +22,9 @@
 namespace cudf {
 //! Inner interfaces and implementations
 namespace detail {
-namespace jit {
-/**
- * @copydoc cudf::jit::binary_operation(scalar const&, column_view const&, binary_operator,
- * data_type, rmm::mr::device_memory_resource *)
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
- */
-std::unique_ptr<column> binary_operation(
-  scalar const& lhs,
-  column_view const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @copydoc cudf::jit::binary_operation(column_view const&, scalar const&, binary_operator,
- * data_type, rmm::mr::device_memory_resource *)
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
- */
-std::unique_ptr<column> binary_operation(
-  column_view const& lhs,
-  scalar const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @copydoc cudf::jit::binary_operation(column_view const&, column_view const&,
- * binary_operator, data_type, rmm::mr::device_memory_resource *)
- *
- * @param stream CUDA stream used for device memory operations and kernel launches.
- */
-std::unique_ptr<column> binary_operation(
-  column_view const& lhs,
-  column_view const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-}  // namespace jit
-
-/**
- * @copydoc cudf::jit::binary_operation(column_view const&, column_view const&,
+ * @copydoc cudf::binary_operation(column_view const&, column_view const&,
  * std::string const&, data_type, rmm::mr::device_memory_resource *)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp
index e84e175eaca..3398592d5b4 100644
--- a/cpp/src/binaryop/binaryop.cpp
+++ b/cpp/src/binaryop/binaryop.cpp
@@ -18,7 +18,6 @@
  */
 
 #include "compiled/binary_ops.hpp"
-#include "jit/util.hpp"
 
 #include <jit_preprocessed_files/binaryop/jit/kernel.cu.jit.hpp>
 
@@ -126,113 +125,6 @@ bool is_same_scale_necessary(binary_operator op)
 }
 
 namespace jit {
-
-void binary_operation(mutable_column_view& out,
-                      column_view const& lhs,
-                      scalar const& rhs,
-                      binary_operator op,
-                      OperatorType op_type,
-                      rmm::cuda_stream_view stream)
-{
-  if (is_null_dependent(op)) {
-    std::string kernel_name =
-      jitify2::reflection::Template("cudf::binops::jit::kernel_v_s_with_validity")  //
-        .instantiate(cudf::jit::get_type_name(out.type()),  // list of template arguments
-                     cudf::jit::get_type_name(lhs.type()),
-                     cudf::jit::get_type_name(rhs.type()),
-                     get_operator_name(op, op_type));
-
-    cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
-      .get_kernel(kernel_name, {}, {}, {"-arch=sm_."})       //
-      ->configure_1d_max_occupancy(0, 0, 0, stream.value())  //
-      ->launch(out.size(),
-               cudf::jit::get_data_ptr(out),
-               cudf::jit::get_data_ptr(lhs),
-               cudf::jit::get_data_ptr(rhs),
-               out.null_mask(),
-               lhs.null_mask(),
-               lhs.offset(),
-               rhs.is_valid(stream));
-  } else {
-    std::string kernel_name =
-      jitify2::reflection::Template("cudf::binops::jit::kernel_v_s")  //
-        .instantiate(cudf::jit::get_type_name(out.type()),            // list of template arguments
-                     cudf::jit::get_type_name(lhs.type()),
-                     cudf::jit::get_type_name(rhs.type()),
-                     get_operator_name(op, op_type));
-
-    cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
-      .get_kernel(kernel_name, {}, {}, {"-arch=sm_."})       //
-      ->configure_1d_max_occupancy(0, 0, 0, stream.value())  //
-      ->launch(out.size(),
-               cudf::jit::get_data_ptr(out),
-               cudf::jit::get_data_ptr(lhs),
-               cudf::jit::get_data_ptr(rhs));
-  }
-}
-
-void binary_operation(mutable_column_view& out,
-                      column_view const& lhs,
-                      scalar const& rhs,
-                      binary_operator op,
-                      rmm::cuda_stream_view stream)
-{
-  return binary_operation(out, lhs, rhs, op, OperatorType::Direct, stream);
-}
-
-void binary_operation(mutable_column_view& out,
-                      scalar const& lhs,
-                      column_view const& rhs,
-                      binary_operator op,
-                      rmm::cuda_stream_view stream)
-{
-  return binary_operation(out, rhs, lhs, op, OperatorType::Reverse, stream);
-}
-
-void binary_operation(mutable_column_view& out,
-                      column_view const& lhs,
-                      column_view const& rhs,
-                      binary_operator op,
-                      rmm::cuda_stream_view stream)
-{
-  if (is_null_dependent(op)) {
-    std::string kernel_name =
-      jitify2::reflection::Template("cudf::binops::jit::kernel_v_v_with_validity")  //
-        .instantiate(cudf::jit::get_type_name(out.type()),  // list of template arguments
-                     cudf::jit::get_type_name(lhs.type()),
-                     cudf::jit::get_type_name(rhs.type()),
-                     get_operator_name(op, OperatorType::Direct));
-
-    cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
-      .get_kernel(kernel_name, {}, {}, {"-arch=sm_."})       //
-      ->configure_1d_max_occupancy(0, 0, 0, stream.value())  //
-      ->launch(out.size(),
-               cudf::jit::get_data_ptr(out),
-               cudf::jit::get_data_ptr(lhs),
-               cudf::jit::get_data_ptr(rhs),
-               out.null_mask(),
-               lhs.null_mask(),
-               rhs.offset(),
-               rhs.null_mask(),
-               rhs.offset());
-  } else {
-    std::string kernel_name =
-      jitify2::reflection::Template("cudf::binops::jit::kernel_v_v")  //
-        .instantiate(cudf::jit::get_type_name(out.type()),            // list of template arguments
-                     cudf::jit::get_type_name(lhs.type()),
-                     cudf::jit::get_type_name(rhs.type()),
-                     get_operator_name(op, OperatorType::Direct));
-
-    cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
-      .get_kernel(kernel_name, {}, {}, {"-arch=sm_."})       //
-      ->configure_1d_max_occupancy(0, 0, 0, stream.value())  //
-      ->launch(out.size(),
-               cudf::jit::get_data_ptr(out),
-               cudf::jit::get_data_ptr(lhs),
-               cudf::jit::get_data_ptr(rhs));
-  }
-}
-
 void binary_operation(mutable_column_view& out,
                       column_view const& lhs,
                       column_view const& rhs,
@@ -246,12 +138,11 @@ void binary_operation(mutable_column_view& out,
   std::string cuda_source =
     cudf::jit::parse_single_function_ptx(ptx, "GENERIC_BINARY_OP", output_type_name);
 
-  std::string kernel_name =
-    jitify2::reflection::Template("cudf::binops::jit::kernel_v_v")  //
-      .instantiate(output_type_name,                                // list of template arguments
-                   cudf::jit::get_type_name(lhs.type()),
-                   cudf::jit::get_type_name(rhs.type()),
-                   get_operator_name(binary_operator::GENERIC_BINARY, OperatorType::Direct));
+  std::string kernel_name = jitify2::reflection::Template("cudf::binops::jit::kernel_v_v")  //
+                              .instantiate(output_type_name,  // list of template arguments
+                                           cudf::jit::get_type_name(lhs.type()),
+                                           cudf::jit::get_type_name(rhs.type()),
+                                           std::string("UserDefinedOp"));
 
   cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
     .get_kernel(
@@ -418,126 +309,8 @@ std::unique_ptr<column> make_fixed_width_column_for_output(column_view const& lh
       output_type, lhs.size(), std::move(new_mask), null_count, stream, mr);
   }
 };
-
-namespace jit {
-
-std::unique_ptr<column> binary_operation(scalar const& lhs,
-                                         column_view const& rhs,
-                                         binary_operator op,
-                                         data_type output_type,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  // calls compiled ops for string types
-  if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING)
-    return detail::binary_operation(lhs, rhs, op, output_type, stream, mr);
-
-  // Check for datatype
-  CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype");
-  CUDF_EXPECTS(not is_fixed_point(lhs.type()), "Invalid/Unsupported lhs datatype");
-  CUDF_EXPECTS(not is_fixed_point(rhs.type()), "Invalid/Unsupported rhs datatype");
-  CUDF_EXPECTS(is_fixed_width(lhs.type()), "Invalid/Unsupported lhs datatype");
-  CUDF_EXPECTS(is_fixed_width(rhs.type()), "Invalid/Unsupported rhs datatype");
-
-  auto out = make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr);
-
-  if (rhs.is_empty()) return out;
-
-  auto out_view = out->mutable_view();
-  binops::jit::binary_operation(out_view, lhs, rhs, op, stream);
-  return out;
-}
-
-std::unique_ptr<column> binary_operation(column_view const& lhs,
-                                         scalar const& rhs,
-                                         binary_operator op,
-                                         data_type output_type,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  // calls compiled ops for string types
-  if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING)
-    return detail::binary_operation(lhs, rhs, op, output_type, stream, mr);
-
-  // Check for datatype
-  CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype");
-  CUDF_EXPECTS(not is_fixed_point(lhs.type()), "Invalid/Unsupported lhs datatype");
-  CUDF_EXPECTS(not is_fixed_point(rhs.type()), "Invalid/Unsupported rhs datatype");
-  CUDF_EXPECTS(is_fixed_width(lhs.type()), "Invalid/Unsupported lhs datatype");
-  CUDF_EXPECTS(is_fixed_width(rhs.type()), "Invalid/Unsupported rhs datatype");
-
-  auto out = make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr);
-
-  if (lhs.is_empty()) return out;
-
-  auto out_view = out->mutable_view();
-  binops::jit::binary_operation(out_view, lhs, rhs, op, stream);
-  return out;
-}
-
-std::unique_ptr<column> binary_operation(column_view const& lhs,
-                                         column_view const& rhs,
-                                         binary_operator op,
-                                         data_type output_type,
-                                         rmm::cuda_stream_view stream,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  CUDF_EXPECTS(lhs.size() == rhs.size(), "Column sizes don't match");
-
-  // calls compiled ops for string types
-  if (lhs.type().id() == type_id::STRING and rhs.type().id() == type_id::STRING)
-    return detail::binary_operation(lhs, rhs, op, output_type, stream, mr);
-
-  // Check for datatype
-  CUDF_EXPECTS(is_fixed_width(output_type), "Invalid/Unsupported output datatype");
-  CUDF_EXPECTS(not is_fixed_point(lhs.type()), "Invalid/Unsupported lhs datatype");
-  CUDF_EXPECTS(not is_fixed_point(rhs.type()), "Invalid/Unsupported rhs datatype");
-  CUDF_EXPECTS(is_fixed_width(lhs.type()), "Invalid/Unsupported lhs datatype");
-  CUDF_EXPECTS(is_fixed_width(rhs.type()), "Invalid/Unsupported rhs datatype");
-
-  auto out = make_fixed_width_column_for_output(lhs, rhs, op, output_type, stream, mr);
-
-  if (lhs.is_empty() or rhs.is_empty()) return out;
-
-  auto out_view = out->mutable_view();
-  binops::jit::binary_operation(out_view, lhs, rhs, op, stream);
-  return out;
-}
-}  // namespace jit
 }  // namespace detail
 
-namespace jit {
-std::unique_ptr<column> binary_operation(scalar const& lhs,
-                                         column_view const& rhs,
-                                         binary_operator op,
-                                         data_type output_type,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::jit::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr);
-}
-
-std::unique_ptr<column> binary_operation(column_view const& lhs,
-                                         scalar const& rhs,
-                                         binary_operator op,
-                                         data_type output_type,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::jit::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr);
-}
-
-std::unique_ptr<column> binary_operation(column_view const& lhs,
-                                         column_view const& rhs,
-                                         binary_operator op,
-                                         data_type output_type,
-                                         rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::jit::binary_operation(lhs, rhs, op, output_type, rmm::cuda_stream_default, mr);
-}
-}  // namespace jit
-
 namespace detail {
 std::unique_ptr<column> binary_operation(scalar const& lhs,
                                          column_view const& rhs,
diff --git a/cpp/src/binaryop/jit/kernel.cu b/cpp/src/binaryop/jit/kernel.cu
index fcfe16f979d..3130cf65bb3 100644
--- a/cpp/src/binaryop/jit/kernel.cu
+++ b/cpp/src/binaryop/jit/kernel.cu
@@ -18,8 +18,6 @@
  * limitations under the License.
  */
 
-#include <binaryop/jit/operation.hpp>
-
 #include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/bit.hpp>
@@ -30,55 +28,6 @@ namespace cudf {
 namespace binops {
 namespace jit {
 
-template <typename TypeOut, typename TypeLhs, typename TypeRhs, typename TypeOpe>
-__global__ void kernel_v_s_with_validity(cudf::size_type size,
-                                         TypeOut* out_data,
-                                         TypeLhs* lhs_data,
-                                         TypeRhs* rhs_data,
-                                         cudf::bitmask_type* output_mask,
-                                         cudf::bitmask_type const* mask,
-                                         cudf::size_type offset,
-                                         bool scalar_valid)
-{
-  int tid    = threadIdx.x;
-  int blkid  = blockIdx.x;
-  int blksz  = blockDim.x;
-  int gridsz = gridDim.x;
-
-  int start = tid + blkid * blksz;
-  int step  = blksz * gridsz;
-
-  for (cudf::size_type i = start; i < size; i += step) {
-    bool output_valid = false;
-    out_data[i]       = TypeOpe::template operate<TypeOut, TypeLhs, TypeRhs>(
-      lhs_data[i],
-      rhs_data[0],
-      mask ? cudf::bit_is_set(mask, offset + i) : true,
-      scalar_valid,
-      output_valid);
-    if (output_mask && !output_valid) cudf::clear_bit(output_mask, i);
-  }
-}
-
-template <typename TypeOut, typename TypeLhs, typename TypeRhs, typename TypeOpe>
-__global__ void kernel_v_s(cudf::size_type size,
-                           TypeOut* out_data,
-                           TypeLhs* lhs_data,
-                           TypeRhs* rhs_data)
-{
-  int tid    = threadIdx.x;
-  int blkid  = blockIdx.x;
-  int blksz  = blockDim.x;
-  int gridsz = gridDim.x;
-
-  int start = tid + blkid * blksz;
-  int step  = blksz * gridsz;
-
-  for (cudf::size_type i = start; i < size; i += step) {
-    out_data[i] = TypeOpe::template operate<TypeOut, TypeLhs, TypeRhs>(lhs_data[i], rhs_data[0]);
-  }
-}
-
 template <typename TypeOut, typename TypeLhs, typename TypeRhs, typename TypeOpe>
 __global__ void kernel_v_v(cudf::size_type size,
                            TypeOut* out_data,
diff --git a/cpp/src/binaryop/jit/operation.hpp b/cpp/src/binaryop/jit/operation.hpp
deleted file mode 100644
index d117f2182f9..00000000000
--- a/cpp/src/binaryop/jit/operation.hpp
+++ /dev/null
@@ -1,646 +0,0 @@
-/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
- *
- * Copyright 2018-2019 BlazingDB, Inc.
- *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/types.hpp>
-
-#include <binaryop/jit/operation-udf.hpp>
-#include <binaryop/jit/traits.hpp>
-
-#include <cmath>
-
-#include <cuda/std/type_traits>
-
-#pragma once
-
-using namespace cuda::std;
-
-namespace cudf {
-namespace binops {
-namespace jit {
-
-struct Add {
-  // Allow sum between chronos only when both input and output types
-  // are chronos. Unsupported combinations will fail to compile
-  template <
-    typename TypeOut,
-    typename TypeLhs,
-    typename TypeRhs,
-    enable_if_t<(is_chrono_v<TypeOut> && is_chrono_v<TypeLhs> && is_chrono_v<TypeRhs>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return x + y;
-  }
-
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(!is_chrono_v<TypeOut> || !is_chrono_v<TypeLhs> ||
-                         !is_chrono_v<TypeRhs>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    using TypeCommon = typename common_type<TypeOut, TypeLhs, TypeRhs>::type;
-    return static_cast<TypeOut>(static_cast<TypeCommon>(x) + static_cast<TypeCommon>(y));
-  }
-};
-
-using RAdd = Add;
-
-struct Sub {
-  // Allow difference between chronos only when both input and output types
-  // are chronos. Unsupported combinations will fail to compile
-  template <
-    typename TypeOut,
-    typename TypeLhs,
-    typename TypeRhs,
-    enable_if_t<(is_chrono_v<TypeOut> && is_chrono_v<TypeLhs> && is_chrono_v<TypeRhs>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return x - y;
-  }
-
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(!is_chrono_v<TypeOut> || !is_chrono_v<TypeLhs> ||
-                         !is_chrono_v<TypeRhs>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    using TypeCommon = typename common_type<TypeOut, TypeLhs, TypeRhs>::type;
-    return static_cast<TypeOut>(static_cast<TypeCommon>(x) - static_cast<TypeCommon>(y));
-  }
-};
-
-struct RSub {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return Sub::operate<TypeOut, TypeRhs, TypeLhs>(y, x);
-  }
-};
-
-struct Mul {
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(!is_duration_v<TypeOut>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    using TypeCommon = typename common_type<TypeOut, TypeLhs, TypeRhs>::type;
-    return static_cast<TypeOut>(static_cast<TypeCommon>(x) * static_cast<TypeCommon>(y));
-  }
-
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(is_duration_v<TypeOut>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return DurationProduct<TypeOut>(x, y);
-  }
-
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(is_duration_v<TypeLhs> && is_integral_v<TypeRhs>) ||
-                        (is_integral_v<TypeLhs> && is_duration_v<TypeRhs>)>* = nullptr>
-  static TypeOut DurationProduct(TypeLhs x, TypeRhs y)
-  {
-    return x * y;
-  }
-};
-
-using RMul = Mul;
-
-struct Div {
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(!is_duration_v<TypeLhs>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    using TypeCommon = typename common_type<TypeOut, TypeLhs, TypeRhs>::type;
-    return static_cast<TypeOut>(static_cast<TypeCommon>(x) / static_cast<TypeCommon>(y));
-  }
-
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(is_duration_v<TypeLhs>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return DurationDivide<TypeOut>(x, y);
-  }
-
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(is_integral_v<TypeRhs> || is_duration_v<TypeRhs>)>* = nullptr>
-  static TypeOut DurationDivide(TypeLhs x, TypeRhs y)
-  {
-    return x / y;
-  }
-};
-
-struct RDiv {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return Div::operate<TypeOut, TypeRhs, TypeLhs>(y, x);
-  }
-};
-
-struct TrueDiv {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (static_cast<double>(x) / static_cast<double>(y));
-  }
-};
-
-struct RTrueDiv {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return TrueDiv::operate<TypeOut, TypeRhs, TypeLhs>(y, x);
-  }
-};
-
-struct FloorDiv {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return floor(static_cast<double>(x) / static_cast<double>(y));
-  }
-};
-
-struct RFloorDiv {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return FloorDiv::operate<TypeOut, TypeRhs, TypeLhs>(y, x);
-  }
-};
-
-struct Mod {
-  template <
-    typename TypeOut,
-    typename TypeLhs,
-    typename TypeRhs,
-    enable_if_t<(is_integral_v<typename common_type<TypeOut, TypeLhs, TypeRhs>::type>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    using TypeCommon = typename common_type<TypeOut, TypeLhs, TypeRhs>::type;
-    return static_cast<TypeOut>(static_cast<TypeCommon>(x) % static_cast<TypeCommon>(y));
-  }
-
-  template <
-    typename TypeOut,
-    typename TypeLhs,
-    typename TypeRhs,
-    enable_if_t<(isFloat<typename common_type<TypeOut, TypeLhs, TypeRhs>::type>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return static_cast<TypeOut>(fmodf(static_cast<float>(x), static_cast<float>(y)));
-  }
-
-  template <
-    typename TypeOut,
-    typename TypeLhs,
-    typename TypeRhs,
-    enable_if_t<(isDouble<typename common_type<TypeOut, TypeLhs, TypeRhs>::type>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return static_cast<TypeOut>(fmod(static_cast<double>(x), static_cast<double>(y)));
-  }
-
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(is_duration_v<TypeLhs> && is_duration_v<TypeOut>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return x % y;
-  }
-};
-
-struct RMod {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return Mod::operate<TypeOut, TypeRhs, TypeLhs>(y, x);
-  }
-};
-
-struct PyMod {
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(is_integral_v<TypeOut>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return ((x % y) + y) % y;
-  }
-
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(is_floating_point_v<TypeOut>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    double x1 = static_cast<double>(x);
-    double y1 = static_cast<double>(y);
-    return fmod(fmod(x1, y1) + y1, y1);
-  }
-
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(is_duration_v<TypeLhs> && is_duration_v<TypeOut>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return ((x % y) + y) % y;
-  }
-};
-
-struct RPyMod {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return PyMod::operate<TypeOut, TypeRhs, TypeLhs>(y, x);
-  }
-};
-
-struct Pow {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return pow(static_cast<double>(x), static_cast<double>(y));
-  }
-};
-
-struct RPow {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return Pow::operate<TypeOut, TypeRhs, TypeLhs>(y, x);
-  }
-};
-
-struct Equal {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (x == y);
-  }
-};
-
-using REqual = Equal;
-
-struct NotEqual {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (x != y);
-  }
-};
-
-using RNotEqual = NotEqual;
-
-struct Less {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (x < y);
-  }
-};
-
-struct RLess {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (y < x);
-  }
-};
-
-struct Greater {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (x > y);
-  }
-};
-
-struct RGreater {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (y > x);
-  }
-};
-
-struct LessEqual {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (x <= y);
-  }
-};
-
-struct RLessEqual {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (y <= x);
-  }
-};
-
-struct GreaterEqual {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (x >= y);
-  }
-};
-
-struct RGreaterEqual {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (y >= x);
-  }
-};
-
-struct BitwiseAnd {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (static_cast<TypeOut>(x) & static_cast<TypeOut>(y));
-  }
-};
-
-using RBitwiseAnd = BitwiseAnd;
-
-struct BitwiseOr {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (static_cast<TypeOut>(x) | static_cast<TypeOut>(y));
-  }
-};
-
-using RBitwiseOr = BitwiseOr;
-
-struct BitwiseXor {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (static_cast<TypeOut>(x) ^ static_cast<TypeOut>(y));
-  }
-};
-
-using RBitwiseXor = BitwiseXor;
-
-struct LogicalAnd {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (x && y);
-  }
-};
-
-using RLogicalAnd = LogicalAnd;
-
-struct LogicalOr {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (x || y);
-  }
-};
-
-using RLogicalOr = LogicalOr;
-
-struct UserDefinedOp {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    TypeOut output;
-    using TypeCommon = typename common_type<TypeOut, TypeLhs, TypeRhs>::type;
-    GENERIC_BINARY_OP(&output, static_cast<TypeCommon>(x), static_cast<TypeCommon>(y));
-    return output;
-  }
-};
-
-struct ShiftLeft {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (x << y);
-  }
-};
-
-struct RShiftLeft {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (y << x);
-  }
-};
-
-struct ShiftRight {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (x >> y);
-  }
-};
-
-struct RShiftRight {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (y >> x);
-  }
-};
-
-struct ShiftRightUnsigned {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (static_cast<make_unsigned_t<TypeLhs>>(x) >> y);
-  }
-};
-
-struct RShiftRightUnsigned {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (static_cast<make_unsigned_t<TypeRhs>>(y) >> x);
-  }
-};
-
-struct LogBase {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return (std::log(static_cast<double>(x)) / std::log(static_cast<double>(y)));
-  }
-};
-
-struct RLogBase {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return LogBase::operate<TypeOut, TypeRhs, TypeLhs>(y, x);
-  }
-};
-
-struct NullEquals {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid)
-  {
-    output_valid = true;
-    if (!lhs_valid && !rhs_valid) return true;
-    if (lhs_valid && rhs_valid) return x == y;
-    return false;
-  }
-};
-
-struct RNullEquals {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid)
-  {
-    output_valid = true;
-    return NullEquals::operate<TypeOut, TypeRhs, TypeLhs>(y, x, rhs_valid, lhs_valid, output_valid);
-  }
-};
-
-struct NullMax {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid)
-  {
-    output_valid = true;
-    if (!lhs_valid && !rhs_valid) {
-      output_valid = false;
-      return TypeOut{};
-    } else if (lhs_valid && rhs_valid) {
-      return (TypeOut{x} > TypeOut{y}) ? TypeOut{x} : TypeOut{y};
-    } else if (lhs_valid)
-      return TypeOut{x};
-    else
-      return TypeOut{y};
-  }
-};
-
-struct RNullMax {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid)
-  {
-    return NullMax::operate<TypeOut, TypeRhs, TypeLhs>(y, x, rhs_valid, lhs_valid, output_valid);
-  }
-};
-
-struct NullMin {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid)
-  {
-    output_valid = true;
-    if (!lhs_valid && !rhs_valid) {
-      output_valid = false;
-      return TypeOut{};
-    } else if (lhs_valid && rhs_valid) {
-      return (TypeOut{x} < TypeOut{y}) ? TypeOut{x} : TypeOut{y};
-    } else if (lhs_valid)
-      return TypeOut{x};
-    else
-      return TypeOut{y};
-  }
-};
-
-struct RNullMin {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y, bool lhs_valid, bool rhs_valid, bool& output_valid)
-  {
-    return NullMin::operate<TypeOut, TypeRhs, TypeLhs>(y, x, rhs_valid, lhs_valid, output_valid);
-  }
-};
-
-struct PMod {
-  // Ideally, these two specializations - one for integral types and one for non integral
-  // types shouldn't be required, as std::fmod should promote integral types automatically
-  // to double and call the std::fmod overload for doubles. Sadly, doing this in jitified
-  // code does not work - it is having trouble deciding between float/double overloads
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<(is_integral_v<typename cuda::std::common_type<TypeLhs, TypeRhs>::type>)>* =
-              nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    using common_t = typename cuda::std::common_type<TypeLhs, TypeRhs>::type;
-    common_t xconv{x};
-    common_t yconv{y};
-    auto rem = xconv % yconv;
-    if (rem < 0) rem = (rem + yconv) % yconv;
-    return TypeOut{rem};
-  }
-
-  template <typename TypeOut,
-            typename TypeLhs,
-            typename TypeRhs,
-            enable_if_t<
-              !(is_integral_v<typename cuda::std::common_type<TypeLhs, TypeRhs>::type>)>* = nullptr>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    using common_t = typename cuda::std::common_type<TypeLhs, TypeRhs>::type;
-    common_t xconv{x};
-    common_t yconv{y};
-    auto rem = std::fmod(xconv, yconv);
-    if (rem < 0) rem = std::fmod(rem + yconv, yconv);
-    return TypeOut{rem};
-  }
-};
-
-struct RPMod {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return PMod::operate<TypeOut, TypeRhs, TypeLhs>(y, x);
-  }
-};
-
-struct ATan2 {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return TypeOut{std::atan2(double{x}, double{y})};
-  }
-};
-
-struct RATan2 {
-  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
-  static TypeOut operate(TypeLhs x, TypeRhs y)
-  {
-    return TypeOut{ATan2::operate<TypeOut, TypeRhs, TypeLhs>(y, x)};
-  }
-};
-
-}  // namespace jit
-}  // namespace binops
-}  // namespace cudf
diff --git a/cpp/src/binaryop/jit/traits.hpp b/cpp/src/binaryop/jit/traits.hpp
deleted file mode 100644
index 1033d38a668..00000000000
--- a/cpp/src/binaryop/jit/traits.hpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
- *
- * Copyright 2018-2019 BlazingDB, Inc.
- *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-// Include Jitify's cstddef header first
-#include <cstddef>
-
-#include <cuda/std/climits>
-#include <cuda/std/cstddef>
-#include <cuda/std/limits>
-#include <cuda/std/type_traits>
-
-#include <cudf/wrappers/durations.hpp>
-#include <cudf/wrappers/timestamps.hpp>
-
-namespace cudf {
-namespace binops {
-namespace jit {
-
-// -------------------------------------------------------------------------
-// type_traits cannot tell the difference between float and double
-template <typename Type>
-constexpr bool isFloat = false;
-
-template <typename T>
-constexpr bool is_timestamp_v =
-  cuda::std::is_same_v<cudf::timestamp_D, T> || cuda::std::is_same_v<cudf::timestamp_s, T> ||
-  cuda::std::is_same_v<cudf::timestamp_ms, T> || cuda::std::is_same_v<cudf::timestamp_us, T> ||
-  cuda::std::is_same_v<cudf::timestamp_ns, T>;
-
-template <typename T>
-constexpr bool is_duration_v =
-  cuda::std::is_same_v<cudf::duration_D, T> || cuda::std::is_same_v<cudf::duration_s, T> ||
-  cuda::std::is_same_v<cudf::duration_ms, T> || cuda::std::is_same_v<cudf::duration_us, T> ||
-  cuda::std::is_same_v<cudf::duration_ns, T>;
-
-template <typename T>
-constexpr bool is_chrono_v = is_timestamp_v<T> || is_duration_v<T>;
-
-template <>
-constexpr bool isFloat<float> = true;
-
-template <typename Type>
-constexpr bool isDouble = false;
-
-template <>
-constexpr bool isDouble<double> = true;
-
-}  // namespace jit
-}  // namespace binops
-}  // namespace cudf
diff --git a/cpp/src/binaryop/jit/util.hpp b/cpp/src/binaryop/jit/util.hpp
deleted file mode 100644
index 34c42e28a8b..00000000000
--- a/cpp/src/binaryop/jit/util.hpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/binaryop.hpp>
-
-#include <string>
-
-namespace cudf {
-namespace binops {
-namespace jit {
-
-/**
- * @brief Orientation of lhs and rhs in operator
- */
-enum class OperatorType {
-  Direct,  ///< Orientation of operands is op(lhs, rhs)
-  Reverse  ///< Orientation of operands is op(rhs, lhs)
-};
-
-/**
- * @brief Get the Operator Name
- *
- * @param op The binary operator as enum of type cudf::binary_operator
- * @param type @see OperatorType
- * @return std::string The name of the operator as string
- */
-std::string inline get_operator_name(binary_operator op, OperatorType type)
-{
-  std::string const operator_name = [op] {
-    // clang-format off
-    switch (op) {
-      case binary_operator::ADD:                  return "Add";
-      case binary_operator::SUB:                  return "Sub";
-      case binary_operator::MUL:                  return "Mul";
-      case binary_operator::DIV:                  return "Div";
-      case binary_operator::TRUE_DIV:             return "TrueDiv";
-      case binary_operator::FLOOR_DIV:            return "FloorDiv";
-      case binary_operator::MOD:                  return "Mod";
-      case binary_operator::PYMOD:                return "PyMod";
-      case binary_operator::POW:                  return "Pow";
-      case binary_operator::EQUAL:                return "Equal";
-      case binary_operator::NOT_EQUAL:            return "NotEqual";
-      case binary_operator::LESS:                 return "Less";
-      case binary_operator::GREATER:              return "Greater";
-      case binary_operator::LESS_EQUAL:           return "LessEqual";
-      case binary_operator::GREATER_EQUAL:        return "GreaterEqual";
-      case binary_operator::BITWISE_AND:          return "BitwiseAnd";
-      case binary_operator::BITWISE_OR:           return "BitwiseOr";
-      case binary_operator::BITWISE_XOR:          return "BitwiseXor";
-      case binary_operator::LOGICAL_AND:          return "LogicalAnd";
-      case binary_operator::LOGICAL_OR:           return "LogicalOr";
-      case binary_operator::GENERIC_BINARY:       return "UserDefinedOp";
-      case binary_operator::SHIFT_LEFT:           return "ShiftLeft";
-      case binary_operator::SHIFT_RIGHT:          return "ShiftRight";
-      case binary_operator::SHIFT_RIGHT_UNSIGNED: return "ShiftRightUnsigned";
-      case binary_operator::LOG_BASE:             return "LogBase";
-      case binary_operator::ATAN2:                return "ATan2";
-      case binary_operator::PMOD:                 return "PMod";
-      case binary_operator::NULL_EQUALS:          return "NullEquals";
-      case binary_operator::NULL_MAX:             return "NullMax";
-      case binary_operator::NULL_MIN:             return "NullMin";
-      default:                                    return "";
-    }
-    // clang-format on
-  }();
-
-  if (operator_name == "") { return "None"; }
-
-  return "cudf::binops::jit::" +
-         (type == OperatorType::Direct ? operator_name : 'R' + operator_name);
-}
-
-}  // namespace jit
-}  // namespace binops
-}  // namespace cudf

From 1b9d624af8b3b5d097be3e478d5f878fa26e7632 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Thu, 25 Nov 2021 23:53:17 +0530
Subject: [PATCH 09/28] remove jit benchmark

---
 cpp/benchmarks/CMakeLists.txt                 |  1 -
 .../binaryop/binaryop_benchmark.cpp           |  8 +-
 .../binaryop/jit_binaryop_benchmark.cpp       | 99 -------------------
 3 files changed, 4 insertions(+), 104 deletions(-)
 delete mode 100644 cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index fa1e61e26fd..72b247ae748 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -208,7 +208,6 @@ ConfigureBench(AST_BENCH ast/transform_benchmark.cpp)
 # * binaryop benchmark ----------------------------------------------------------------------------
 ConfigureBench(
   BINARYOP_BENCH binaryop/binaryop_benchmark.cpp binaryop/compiled_binaryop_benchmark.cpp
-  binaryop/jit_binaryop_benchmark.cpp
 )
 
 # ##################################################################################################
diff --git a/cpp/benchmarks/binaryop/binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/binaryop_benchmark.cpp
index 9de1112a9db..314d657679b 100644
--- a/cpp/benchmarks/binaryop/binaryop_benchmark.cpp
+++ b/cpp/benchmarks/binaryop/binaryop_benchmark.cpp
@@ -74,14 +74,14 @@ static void BM_binaryop_transform(benchmark::State& state)
     auto const op         = cudf::binary_operator::ADD;
     auto result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
     if (reuse_columns) {
-      auto result = cudf::jit::binary_operation(columns.at(0), columns.at(0), op, result_data_type);
+      auto result = cudf::binary_operation(columns.at(0), columns.at(0), op, result_data_type);
       for (cudf::size_type i = 0; i < tree_levels - 1; i++) {
-        result = cudf::jit::binary_operation(result->view(), columns.at(0), op, result_data_type);
+        result = cudf::binary_operation(result->view(), columns.at(0), op, result_data_type);
       }
     } else {
-      auto result = cudf::jit::binary_operation(columns.at(0), columns.at(1), op, result_data_type);
+      auto result = cudf::binary_operation(columns.at(0), columns.at(1), op, result_data_type);
       std::for_each(std::next(columns.cbegin(), 2), columns.cend(), [&](auto const& col) {
-        result = cudf::jit::binary_operation(result->view(), col, op, result_data_type);
+        result = cudf::binary_operation(result->view(), col, op, result_data_type);
       });
     }
   }
diff --git a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp b/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp
deleted file mode 100644
index 7fda4a50ea1..00000000000
--- a/cpp/benchmarks/binaryop/jit_binaryop_benchmark.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <fixture/benchmark_fixture.hpp>
-#include <synchronization/synchronization.hpp>
-
-#include <cudf_test/column_wrapper.hpp>
-
-#include <cudf/binaryop.hpp>
-
-#include <thrust/iterator/counting_iterator.h>
-
-template <typename TypeLhs, typename TypeRhs, typename TypeOut, cudf::binary_operator>
-class JIT_BINARYOP : public cudf::benchmark {
-};
-
-template <typename TypeLhs, typename TypeRhs, typename TypeOut>
-void BM_binaryop(benchmark::State& state, cudf::binary_operator binop)
-{
-  const cudf::size_type column_size{(cudf::size_type)state.range(0)};
-
-  auto data_it = thrust::make_counting_iterator(0);
-  cudf::test::fixed_width_column_wrapper<TypeLhs> input1(data_it, data_it + column_size);
-  cudf::test::fixed_width_column_wrapper<TypeRhs> input2(data_it, data_it + column_size);
-
-  auto lhs          = cudf::column_view(input1);
-  auto rhs          = cudf::column_view(input2);
-  auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());
-
-  // Call once for hot cache.
-  cudf::jit::binary_operation(lhs, rhs, binop, output_dtype);
-
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-    cudf::jit::binary_operation(lhs, rhs, binop, output_dtype);
-  }
-}
-
-// TODO tparam boolean for null.
-#define BINARYOP_BENCHMARK_DEFINE(TypeLhs, TypeRhs, binop, TypeOut)               \
-  BENCHMARK_TEMPLATE_DEFINE_F(                                                    \
-    JIT_BINARYOP, binop, TypeLhs, TypeRhs, TypeOut, cudf::binary_operator::binop) \
-  (::benchmark::State & st)                                                       \
-  {                                                                               \
-    BM_binaryop<TypeLhs, TypeRhs, TypeOut>(st, cudf::binary_operator::binop);     \
-  }                                                                               \
-  BENCHMARK_REGISTER_F(JIT_BINARYOP, binop)                                       \
-    ->Unit(benchmark::kMicrosecond)                                               \
-    ->UseManualTime()                                                             \
-    ->Arg(10000)      /* 10k */                                                   \
-    ->Arg(100000)     /* 100k */                                                  \
-    ->Arg(1000000)    /* 1M */                                                    \
-    ->Arg(10000000)   /* 10M */                                                   \
-    ->Arg(100000000); /* 100M */
-
-using namespace cudf;
-using namespace numeric;
-
-// clang-format off
-BINARYOP_BENCHMARK_DEFINE(float,        int64_t,      ADD,                  int32_t);
-BINARYOP_BENCHMARK_DEFINE(duration_s,   duration_D,   SUB,                  duration_ms);
-BINARYOP_BENCHMARK_DEFINE(float,        float,        MUL,                  int64_t);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      DIV,                  int64_t);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      TRUE_DIV,             int64_t);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      FLOOR_DIV,            int64_t);
-BINARYOP_BENCHMARK_DEFINE(double,       double,       MOD,                  double);
-BINARYOP_BENCHMARK_DEFINE(int32_t,      int64_t,      PMOD,                 double);
-BINARYOP_BENCHMARK_DEFINE(int32_t,      uint8_t,      PYMOD,                int64_t);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int64_t,      POW,                  double);
-BINARYOP_BENCHMARK_DEFINE(float,        double,       LOG_BASE,             double);
-BINARYOP_BENCHMARK_DEFINE(float,        double,       ATAN2,                double);
-BINARYOP_BENCHMARK_DEFINE(int,          int,          SHIFT_LEFT,           int);
-BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      SHIFT_RIGHT,          int);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      SHIFT_RIGHT_UNSIGNED, int64_t);
-BINARYOP_BENCHMARK_DEFINE(int64_t,      int32_t,      BITWISE_AND,          int16_t);
-BINARYOP_BENCHMARK_DEFINE(int16_t,      int32_t,      BITWISE_OR,           int64_t);
-BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      BITWISE_XOR,          int32_t);
-BINARYOP_BENCHMARK_DEFINE(double,       int8_t,       LOGICAL_AND,          bool);
-BINARYOP_BENCHMARK_DEFINE(int16_t,      int64_t,      LOGICAL_OR,           bool);
-BINARYOP_BENCHMARK_DEFINE(duration_ms,  duration_ns,  EQUAL,                bool);
-BINARYOP_BENCHMARK_DEFINE(decimal32,    decimal32,    NOT_EQUAL,            bool);
-BINARYOP_BENCHMARK_DEFINE(timestamp_s,  timestamp_s,  LESS,                 bool);
-BINARYOP_BENCHMARK_DEFINE(timestamp_ms, timestamp_s,  GREATER,              bool);
-BINARYOP_BENCHMARK_DEFINE(duration_ms,  duration_ns,  NULL_EQUALS,          bool);
-BINARYOP_BENCHMARK_DEFINE(decimal32,    decimal32,    NULL_MAX,             decimal32);
-BINARYOP_BENCHMARK_DEFINE(timestamp_D, timestamp_s,   NULL_MIN,             timestamp_s);

From e49a3430ec0a75379fcf83a5041f0ae81a1ffdba Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Fri, 26 Nov 2021 00:40:40 +0530
Subject: [PATCH 10/28] skip generic op udf (jit ptx) in pytest CUDA<11.5

---
 python/cudf/cudf/tests/test_udf_binops.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/python/cudf/cudf/tests/test_udf_binops.py b/python/cudf/cudf/tests/test_udf_binops.py
index 4d6188acf8c..935c3868a68 100644
--- a/python/cudf/cudf/tests/test_udf_binops.py
+++ b/python/cudf/cudf/tests/test_udf_binops.py
@@ -6,11 +6,20 @@
 from numba.cuda import compile_ptx
 from numba.np import numpy_support
 
+import rmm
+
 import cudf
 from cudf import Series, _lib as libcudf
 from cudf.utils import dtypes as dtypeutils
 
+_driver_version = rmm._cuda.gpu.driverGetVersion()
+_runtime_version = rmm._cuda.gpu.runtimeGetVersion()
+_CUDA_JIT128INT_SUPPORTED = (_driver_version >= 11050) and (
+    _runtime_version >= 11050
+)
+
 
+@pytest.mark.skipif(not _CUDA_JIT128INT_SUPPORTED, reason="requires CUDA 11.5")
 @pytest.mark.parametrize(
     "dtype", sorted(list(dtypeutils.NUMERIC_TYPES - {"int8"}))
 )

From 8f640866b7512409af5e12b38bd7e8b3633b9a9e Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Fri, 26 Nov 2021 02:07:56 +0530
Subject: [PATCH 11/28] add deleted UserDefinedOp

---
 cpp/src/binaryop/binaryop.cpp  |  2 +-
 cpp/src/binaryop/jit/kernel.cu | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp
index 3398592d5b4..a89e9795283 100644
--- a/cpp/src/binaryop/binaryop.cpp
+++ b/cpp/src/binaryop/binaryop.cpp
@@ -142,7 +142,7 @@ void binary_operation(mutable_column_view& out,
                               .instantiate(output_type_name,  // list of template arguments
                                            cudf::jit::get_type_name(lhs.type()),
                                            cudf::jit::get_type_name(rhs.type()),
-                                           std::string("UserDefinedOp"));
+                                           std::string("cudf::binops::jit::UserDefinedOp"));
 
   cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
     .get_kernel(
diff --git a/cpp/src/binaryop/jit/kernel.cu b/cpp/src/binaryop/jit/kernel.cu
index 3130cf65bb3..4eb011a1846 100644
--- a/cpp/src/binaryop/jit/kernel.cu
+++ b/cpp/src/binaryop/jit/kernel.cu
@@ -28,6 +28,17 @@ namespace cudf {
 namespace binops {
 namespace jit {
 
+struct UserDefinedOp {
+  template <typename TypeOut, typename TypeLhs, typename TypeRhs>
+  static TypeOut operate(TypeLhs x, TypeRhs y)
+  {
+    TypeOut output;
+    using TypeCommon = typename common_type<TypeOut, TypeLhs, TypeRhs>::type;
+    GENERIC_BINARY_OP(&output, static_cast<TypeCommon>(x), static_cast<TypeCommon>(y));
+    return output;
+  }
+};
+
 template <typename TypeOut, typename TypeLhs, typename TypeRhs, typename TypeOpe>
 __global__ void kernel_v_v(cudf::size_type size,
                            TypeOut* out_data,

From efb203bcd847d786faacc25da754251bf4c5eb2b Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Fri, 26 Nov 2021 03:51:38 +0530
Subject: [PATCH 12/28] fix missing includes

---
 cpp/src/binaryop/jit/kernel.cu | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cpp/src/binaryop/jit/kernel.cu b/cpp/src/binaryop/jit/kernel.cu
index 4eb011a1846..c9cc61a4f34 100644
--- a/cpp/src/binaryop/jit/kernel.cu
+++ b/cpp/src/binaryop/jit/kernel.cu
@@ -24,6 +24,9 @@
 #include <cudf/wrappers/durations.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
+#include <binaryop/jit/operation-udf.hpp>
+#include <cuda/std/type_traits>
+
 namespace cudf {
 namespace binops {
 namespace jit {
@@ -33,7 +36,7 @@ struct UserDefinedOp {
   static TypeOut operate(TypeLhs x, TypeRhs y)
   {
     TypeOut output;
-    using TypeCommon = typename common_type<TypeOut, TypeLhs, TypeRhs>::type;
+    using TypeCommon = typename cuda::std::common_type<TypeOut, TypeLhs, TypeRhs>::type;
     GENERIC_BINARY_OP(&output, static_cast<TypeCommon>(x), static_cast<TypeCommon>(y));
     return output;
   }

From 011fb482fdba796bc4238add7f86d81829100bff Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 29 Nov 2021 14:06:28 +0530
Subject: [PATCH 13/28] fix segfault by nullptr check in cufile_shim dtor

---
 cpp/src/io/utilities/file_io_utilities.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp
index 387452e171a..f6175fae4ec 100644
--- a/cpp/src/io/utilities/file_io_utilities.cpp
+++ b/cpp/src/io/utilities/file_io_utilities.cpp
@@ -106,8 +106,8 @@ class cufile_shim {
 
   ~cufile_shim()
   {
-    driver_close();
-    dlclose(cf_lib);
+    if (driver_close) driver_close();
+    if (cf_lib) dlclose(cf_lib);
   }
 
   decltype(cuFileHandleRegister)* handle_register     = nullptr;

From 9bdc28b9964b4bd75e3d387829747315bed7ac45 Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 29 Nov 2021 14:13:43 +0530
Subject: [PATCH 14/28] enable cuio tests again

---
 cpp/tests/CMakeLists.txt | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 8ae31d7d74d..c1c209b2413 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -191,11 +191,15 @@ ConfigureTest(
 # * io tests --------------------------------------------------------------------------------------
 ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp)
 
-# ConfigureTest(CSV_TEST io/csv_test.cpp) ConfigureTest(ORC_TEST io/orc_test.cpp)
-# ConfigureTest(PARQUET_TEST io/parquet_test.cpp) ConfigureTest(JSON_TEST io/json_test.cpp)
-# ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp) ConfigureTest(MULTIBYTE_SPLIT_TEST
-# io/text/multibyte_split_test.cpp) if(CUDF_ENABLE_ARROW_S3)
-# target_compile_definitions(ARROW_IO_SOURCE_TEST PRIVATE "S3_ENABLED") endif()
+ConfigureTest(CSV_TEST io/csv_test.cpp)
+ConfigureTest(ORC_TEST io/orc_test.cpp)
+ConfigureTest(PARQUET_TEST io/parquet_test.cpp)
+ConfigureTest(JSON_TEST io/json_test.cpp)
+ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp)
+ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp)
+if(CUDF_ENABLE_ARROW_S3)
+  target_compile_definitions(ARROW_IO_SOURCE_TEST PRIVATE "S3_ENABLED")
+endif()
 
 # ##################################################################################################
 # * sort tests ------------------------------------------------------------------------------------

From a3ba687b2e53a4fcc2ed9d9502733251ec12096b Mon Sep 17 00:00:00 2001
From: Karthikeyan Natarajan <karthikeyann@users.noreply.github.com>
Date: Mon, 29 Nov 2021 21:31:55 +0530
Subject: [PATCH 15/28] addres review comments

---
 cpp/src/binaryop/binaryop.cpp             | 10 +++-------
 cpp/tests/binaryop/util/runtime_support.h |  5 +----
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp
index a89e9795283..7087b71a84e 100644
--- a/cpp/src/binaryop/binaryop.cpp
+++ b/cpp/src/binaryop/binaryop.cpp
@@ -138,16 +138,15 @@ void binary_operation(mutable_column_view& out,
   std::string cuda_source =
     cudf::jit::parse_single_function_ptx(ptx, "GENERIC_BINARY_OP", output_type_name);
 
-  std::string kernel_name = jitify2::reflection::Template("cudf::binops::jit::kernel_v_v")  //
+  std::string kernel_name = jitify2::reflection::Template("cudf::binops::jit::kernel_v_v")
                               .instantiate(output_type_name,  // list of template arguments
                                            cudf::jit::get_type_name(lhs.type()),
                                            cudf::jit::get_type_name(rhs.type()),
                                            std::string("cudf::binops::jit::UserDefinedOp"));
 
   cudf::jit::get_program_cache(*binaryop_jit_kernel_cu_jit)
-    .get_kernel(
-      kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."})  //
-    ->configure_1d_max_occupancy(0, 0, 0, stream.value())                                  //
+    .get_kernel(kernel_name, {}, {{"binaryop/jit/operation-udf.hpp", cuda_source}}, {"-arch=sm_."})
+    ->configure_1d_max_occupancy(0, 0, 0, stream.value())
     ->launch(out.size(),
              cudf::jit::get_data_ptr(out),
              cudf::jit::get_data_ptr(lhs),
@@ -221,7 +220,6 @@ namespace detail {
 
 // There are 3 overloads of each of the following functions:
 // - `make_fixed_width_column_for_output`
-// - `fixed_point_binary_operation`
 // - `binary_operation`
 
 // The overloads are overloaded on the first two parameters of each function:
@@ -309,9 +307,7 @@ std::unique_ptr<column> make_fixed_width_column_for_output(column_view const& lh
       output_type, lhs.size(), std::move(new_mask), null_count, stream, mr);
   }
 };
-}  // namespace detail
 
-namespace detail {
 std::unique_ptr<column> binary_operation(scalar const& lhs,
                                          column_view const& rhs,
                                          binary_operator op,
diff --git a/cpp/tests/binaryop/util/runtime_support.h b/cpp/tests/binaryop/util/runtime_support.h
index a7ee0c3a391..250d34a0879 100644
--- a/cpp/tests/binaryop/util/runtime_support.h
+++ b/cpp/tests/binaryop/util/runtime_support.h
@@ -1,8 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
- *
- * Copyright 2018-2019 BlazingDB, Inc.
- *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
+ * Copyright (c) 2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 0fa0cc48a6b3b93e79f918d419a012b75765561c Mon Sep 17 00:00:00 2001
From: Nghia Truong <ttnghia@users.noreply.github.com>
Date: Tue, 30 Nov 2021 07:50:12 -0700
Subject: [PATCH 16/28] Support `min` and `max` in inclusive scan for structs
 (#9725)

This PR continues to address https://github.com/rapidsai/cudf/issues/8974, adding support for structs in `min` and `max` inclusive scan. Exclusive scan support is not needed in the near future.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - https://github.com/nvdbaranec

URL: https://github.com/rapidsai/cudf/pull/9725
---
 cpp/include/cudf/detail/scan.hpp          |  71 ++++----
 cpp/src/groupby/sort/group_scan_util.cuh  |   6 +
 cpp/src/reductions/scan/scan_inclusive.cu |  87 +++++++++-
 cpp/tests/reductions/reduction_tests.cpp  |   2 +-
 cpp/tests/reductions/scan_tests.cpp       | 196 ++++++++++++++++++++++
 5 files changed, 325 insertions(+), 37 deletions(-)

diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp
index 113c15f19a1..8e3db1c7b10 100644
--- a/cpp/include/cudf/detail/scan.hpp
+++ b/cpp/include/cudf/detail/scan.hpp
@@ -26,22 +26,25 @@ namespace detail {
 /**
  * @brief Computes the exclusive scan of a column.
  *
- * The null values are skipped for the operation, and if an input element
- * at `i` is null, then the output element at `i` will also be null.
+ * The null values are skipped for the operation, and if an input element at `i` is null, then the
+ * output element at `i` will also be null.
  *
- * The identity value for the column type as per the aggregation type
- * is used for the value of the first element in the output column.
+ * The identity value for the column type as per the aggregation type is used for the value of the
+ * first element in the output column.
  *
- * @throws cudf::logic_error if column data_type is not an arithmetic type.
+ * Struct columns are allowed with aggregation types Min and Max.
  *
- * @param input The input column view for the scan
- * @param agg unique_ptr to aggregation operator applied by the scan
- * @param null_handling Exclude null values when computing the result if
- *                      null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE.
- *                      Any operation with a null results in a null.
+ * @throws cudf::logic_error if column data_type is not an arithmetic type or struct type but the
+ *                           `agg` is not Min or Max.
+ *
+ * @param input The input column view for the scan.
+ * @param agg unique_ptr to aggregation operator applied by the scan.
+ * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE.
+ *                      Include nulls if null_policy::INCLUDE. Any operation with a null results in
+ *                      a null.
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory
- * @returns Column with scan results
+ * @param mr Device memory resource used to allocate the returned scalar's device memory.
+ * @returns Column with scan results.
  */
 std::unique_ptr<column> scan_exclusive(column_view const& input,
                                        std::unique_ptr<aggregation> const& agg,
@@ -52,22 +55,22 @@ std::unique_ptr<column> scan_exclusive(column_view const& input,
 /**
  * @brief Computes the inclusive scan of a column.
  *
- * The null values are skipped for the operation, and if an input element
- * at `i` is null, then the output element at `i` will also be null.
+ * The null values are skipped for the operation, and if an input element at `i` is null, then the
+ * output element at `i` will also be null.
  *
- * String columns are allowed with aggregation types Min and Max.
+ * String and struct columns are allowed with aggregation types Min and Max.
  *
- * @throws cudf::logic_error if column data_type is not an arithmetic type
- *                           or string type but the `agg` is not Min or Max
+ * @throws cudf::logic_error if column data_type is not an arithmetic type or string/struct types
+ *                           but the `agg` is not Min or Max.
  *
- * @param input The input column view for the scan
- * @param agg unique_ptr to aggregation operator applied by the scan
- * @param null_handling Exclude null values when computing the result if
- *                      null_policy::EXCLUDE. Include nulls if null_policy::INCLUDE.
- *                      Any operation with a null results in a null.
+ * @param input The input column view for the scan.
+ * @param agg unique_ptr to aggregation operator applied by the scan.
+ * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE.
+ *                      Include nulls if null_policy::INCLUDE. Any operation with a null results in
+ *                      a null.
  * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned scalar's device memory
- * @returns Column with scan results
+ * @param mr Device memory resource used to allocate the returned scalar's device memory.
+ * @returns Column with scan results.
  */
 std::unique_ptr<column> scan_inclusive(column_view const& input,
                                        std::unique_ptr<aggregation> const& agg,
@@ -76,24 +79,24 @@ std::unique_ptr<column> scan_inclusive(column_view const& input,
                                        rmm::mr::device_memory_resource* mr);
 
 /**
- * @brief Generate row ranks for a column
+ * @brief Generate row ranks for a column.
  *
- * @param order_by Input column to generate ranks for
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return rank values
+ * @param order_by Input column to generate ranks for.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return rank values.
  */
 std::unique_ptr<column> inclusive_rank_scan(column_view const& order_by,
                                             rmm::cuda_stream_view stream,
                                             rmm::mr::device_memory_resource* mr);
 
 /**
- * @brief Generate row dense ranks for a column
+ * @brief Generate row dense ranks for a column.
  *
- * @param order_by Input column to generate ranks for
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return rank values
+ * @param order_by Input column to generate ranks for.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @return rank values.
  */
 std::unique_ptr<column> inclusive_dense_rank_scan(column_view const& order_by,
                                                   rmm::cuda_stream_view stream,
diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh
index b565e8dc6d8..ae3e3232e06 100644
--- a/cpp/src/groupby/sort/group_scan_util.cuh
+++ b/cpp/src/groupby/sort/group_scan_util.cuh
@@ -239,7 +239,13 @@ struct group_scan_functor<K,
     auto gather_map_view =
       column_view(data_type{type_to_id<offset_type>()}, gather_map.size(), gather_map.data());
 
+    //
     // Gather the children elements of the prefix min/max struct elements first.
+    //
+    // Typically, we should use `get_sliced_child` for each child column to properly handle the
+    // input if it is a sliced view. However, since the input to this function is just generated
+    // from groupby internal APIs which is never a sliced view, we just use `child_begin` and
+    // `child_end` iterators for simplicity.
     auto scanned_children =
       cudf::detail::gather(
         table_view(std::vector<column_view>{values.child_begin(), values.child_end()}),
diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu
index 02ecd6df4d9..70f5ca90539 100644
--- a/cpp/src/reductions/scan/scan_inclusive.cu
+++ b/cpp/src/reductions/scan/scan_inclusive.cu
@@ -14,13 +14,17 @@
  * limitations under the License.
  */
 
-#include "scan.cuh"
+#include <reductions/arg_minmax_util.cuh>
+#include <reductions/scan/scan.cuh>
 
 #include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
 #include <cudf/detail/copy.hpp>
 #include <cudf/detail/gather.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/structs/utilities.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/reduction.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -150,6 +154,72 @@ struct scan_functor<Op, cudf::string_view> {
   }
 };
 
+template <typename Op>
+struct scan_functor<Op, cudf::struct_view> {
+  static std::unique_ptr<column> invoke(column_view const& input,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr)
+  {
+    // Op is used only to determined if we want to find the min or max element.
+    auto constexpr is_min_op = std::is_same_v<Op, DeviceMin>;
+
+    // Build indices of the scan operation results (ARGMIN/ARGMAX).
+    // When finding ARGMIN, we need to consider nulls as larger than non-null elements, and the
+    // opposite for ARGMAX.
+    auto gather_map    = rmm::device_uvector<size_type>(input.size(), stream);
+    auto const do_scan = [&](auto const& binop) {
+      thrust::inclusive_scan(rmm::exec_policy(stream),
+                             thrust::counting_iterator<size_type>(0),
+                             thrust::counting_iterator<size_type>(input.size()),
+                             gather_map.begin(),
+                             binop);
+    };
+
+    auto constexpr null_precedence = is_min_op ? cudf::null_order::AFTER : cudf::null_order::BEFORE;
+    auto const flattened_input     = cudf::structs::detail::flatten_nested_columns(
+      table_view{{input}}, {}, std::vector<null_order>{null_precedence});
+    auto const d_flattened_input_ptr = table_device_view::create(flattened_input, stream);
+    auto const flattened_null_precedences =
+      is_min_op ? cudf::detail::make_device_uvector_async(flattened_input.null_orders(), stream)
+                : rmm::device_uvector<cudf::null_order>(0, stream);
+
+    if (input.has_nulls()) {
+      auto const binop = cudf::reduction::detail::row_arg_minmax_fn<true>(
+        input.size(), *d_flattened_input_ptr, flattened_null_precedences.data(), is_min_op);
+      do_scan(binop);
+    } else {
+      auto const binop = cudf::reduction::detail::row_arg_minmax_fn<false>(
+        input.size(), *d_flattened_input_ptr, flattened_null_precedences.data(), is_min_op);
+      do_scan(binop);
+    }
+
+    // Gather the children columns of the input column. Must use `get_sliced_child` to properly
+    // handle input in case it is a sliced view.
+    auto const input_children = [&] {
+      auto const it = cudf::detail::make_counting_transform_iterator(
+        0, [structs_view = structs_column_view{input}, stream](auto const child_idx) {
+          return structs_view.get_sliced_child(child_idx);
+        });
+      return std::vector<column_view>(it, it + input.num_children());
+    }();
+
+    // Gather the children elements of the prefix min/max struct elements for the output.
+    auto scanned_children = cudf::detail::gather(table_view{input_children},
+                                                 gather_map,
+                                                 out_of_bounds_policy::DONT_CHECK,
+                                                 negative_index_policy::NOT_ALLOWED,
+                                                 stream,
+                                                 mr)
+                              ->release();
+
+    // Don't need to set a null mask because that will be handled at the caller.
+    return make_structs_column(input.size(),
+                               std::move(scanned_children),
+                               UNKNOWN_NULL_COUNT,
+                               rmm::device_buffer{0, stream, mr});
+  }
+};
+
 /**
  * @brief Dispatcher for running a Scan operation on an input column
  *
@@ -161,7 +231,11 @@ struct scan_dispatcher {
   template <typename T>
   static constexpr bool is_supported()
   {
-    return std::is_invocable_v<Op, T, T> && !cudf::is_dictionary<T>();
+    if constexpr (std::is_same_v<T, cudf::struct_view>) {
+      return std::is_same_v<Op, DeviceMin> || std::is_same_v<Op, DeviceMax>;
+    } else {
+      return std::is_invocable_v<Op, T, T> && !cudf::is_dictionary<T>();
+    }
   }
 
  public:
@@ -209,6 +283,15 @@ std::unique_ptr<column> scan_inclusive(
     output->set_null_mask(mask_scan(input, scan_type::INCLUSIVE, stream, mr), UNKNOWN_NULL_COUNT);
   }
 
+  // If the input is a structs column, we also need to push down nulls from the parent output column
+  // into the children columns.
+  if (input.type().id() == type_id::STRUCT && output->has_nulls()) {
+    for (size_type idx = 0; idx < output->num_children(); ++idx) {
+      structs::detail::superimpose_parent_nulls(
+        output->view().null_mask(), output->null_count(), output->child(idx), stream, mr);
+    }
+  }
+
   return output;
 }
 }  // namespace detail
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 2c9279260e7..d8ee8f9d08d 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/copying.hpp>
@@ -28,7 +29,6 @@
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/types.hpp>
 #include <cudf/wrappers/timestamps.hpp>
-#include <cudf_test/table_utilities.hpp>
 
 #include <thrust/iterator/counting_iterator.h>
 
diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp
index d1e983460d5..0892436eb47 100644
--- a/cpp/tests/reductions/scan_tests.cpp
+++ b/cpp/tests/reductions/scan_tests.cpp
@@ -18,6 +18,7 @@
 
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/iterator.cuh>
@@ -397,3 +398,198 @@ TYPED_TEST(ScanDurationTest, Sum)
   EXPECT_THROW(cudf::scan(col, cudf::make_sum_aggregation(), cudf::scan_type::EXCLUSIVE),
                cudf::logic_error);
 }
+
+struct StructScanTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(StructScanTest, StructScanMinMaxNoNull)
+{
+  using INTS_CW    = cudf::test::fixed_width_column_wrapper<int32_t>;
+  using STRINGS_CW = cudf::test::strings_column_wrapper;
+  using STRUCTS_CW = cudf::test::structs_column_wrapper;
+
+  auto const input = [] {
+    auto child1 = STRINGS_CW{"año", "bit", "₹1", "aaa", "zit", "bat", "aab", "$1", "€1", "wut"};
+    auto child2 = INTS_CW{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+    return STRUCTS_CW{{child1, child2}};
+  }();
+
+  {
+    auto const expected = [] {
+      auto child1 = STRINGS_CW{"año", "año", "año", "aaa", "aaa", "aaa", "aaa", "$1", "$1", "$1"};
+      auto child2 = INTS_CW{1, 1, 1, 4, 4, 4, 4, 8, 8, 8};
+      return STRUCTS_CW{{child1, child2}};
+    }();
+    auto const result = cudf::scan(input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+  }
+
+  {
+    auto const expected = [] {
+      auto child1 = STRINGS_CW{"año", "bit", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1"};
+      auto child2 = INTS_CW{1, 2, 3, 3, 3, 3, 3, 3, 3, 3};
+      return STRUCTS_CW{{child1, child2}};
+    }();
+    auto const result = cudf::scan(input, cudf::make_max_aggregation(), cudf::scan_type::INCLUSIVE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+  }
+}
+
+TEST_F(StructScanTest, StructScanMinMaxSlicedInput)
+{
+  using INTS_CW    = cudf::test::fixed_width_column_wrapper<int>;
+  using STRINGS_CW = cudf::test::strings_column_wrapper;
+  using STRUCTS_CW = cudf::test::structs_column_wrapper;
+  constexpr int32_t dont_care{1};
+
+  auto const input_original = [] {
+    auto child1 = STRINGS_CW{"$dont_care",
+                             "$dont_care",
+                             "año",
+                             "bit",
+                             "₹1",
+                             "aaa",
+                             "zit",
+                             "bat",
+                             "aab",
+                             "$1",
+                             "€1",
+                             "wut",
+                             "₹dont_care"};
+    auto child2 = INTS_CW{dont_care, dont_care, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, dont_care};
+    return STRUCTS_CW{{child1, child2}};
+  }();
+
+  auto const input = cudf::slice(input_original, {2, 12})[0];
+
+  {
+    auto const expected = [] {
+      auto child1 = STRINGS_CW{"año", "año", "año", "aaa", "aaa", "aaa", "aaa", "$1", "$1", "$1"};
+      auto child2 = INTS_CW{1, 1, 1, 4, 4, 4, 4, 8, 8, 8};
+      return STRUCTS_CW{{child1, child2}};
+    }();
+    auto const result = cudf::scan(input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+  }
+
+  {
+    auto const expected = [] {
+      auto child1 = STRINGS_CW{"año", "bit", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1", "₹1"};
+      auto child2 = INTS_CW{1, 2, 3, 3, 3, 3, 3, 3, 3, 3};
+      return STRUCTS_CW{{child1, child2}};
+    }();
+    auto const result = cudf::scan(input, cudf::make_max_aggregation(), cudf::scan_type::INCLUSIVE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+  }
+}
+
+TEST_F(StructScanTest, StructScanMinMaxWithNulls)
+{
+  using INTS_CW    = cudf::test::fixed_width_column_wrapper<int>;
+  using STRINGS_CW = cudf::test::strings_column_wrapper;
+  using STRUCTS_CW = cudf::test::structs_column_wrapper;
+  using cudf::test::iterators::nulls_at;
+
+  auto const input = [] {
+    auto child1 = STRINGS_CW{{"año",
+                              "bit",
+                              "₹1" /*NULL*/,
+                              "aaa" /*NULL*/,
+                              "zit",
+                              "bat",
+                              "aab",
+                              "$1" /*NULL*/,
+                              "€1" /*NULL*/,
+                              "wut"},
+                             nulls_at({2, 7})};
+    auto child2 = INTS_CW{{1, 2, 3 /*NULL*/, 4 /*NULL*/, 5, 6, 7, 8 /*NULL*/, 9 /*NULL*/, 10},
+                          nulls_at({2, 7})};
+    return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})};
+  }();
+
+  {
+    auto const expected = [] {
+      auto child1 = STRINGS_CW{
+        "año", "año", "año", "" /*NULL*/, "año", "año", "aab", "aab", "" /*NULL*/, "aab"};
+      auto child2 = INTS_CW{1, 1, 1, 0 /*NULL*/, 1, 1, 7, 7, 0 /*NULL*/, 7};
+      return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})};
+    }();
+
+    auto const result = cudf::scan(
+      input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE, null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+  }
+
+  {
+    auto const expected = [] {
+      auto child1 = STRINGS_CW{
+        "año", "bit", "bit", "" /*NULL*/, "zit", "zit", "zit", "zit", "" /*NULL*/, "zit"};
+      auto child2 = INTS_CW{1, 2, 2, 0 /*NULL*/, 5, 5, 5, 5, 0 /*NULL*/, 5};
+      return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})};
+    }();
+
+    auto const result = cudf::scan(
+      input, cudf::make_max_aggregation(), cudf::scan_type::INCLUSIVE, null_policy::EXCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+  }
+
+  {
+    auto const expected = [] {
+      auto child1 = STRINGS_CW{"año",
+                               "año",
+                               "año",
+                               "" /*NULL*/,
+                               "" /*NULL*/,
+                               "" /*NULL*/,
+                               "" /*NULL*/,
+                               "" /*NULL*/,
+                               "" /*NULL*/,
+                               "" /*NULL*/};
+      auto child2 = INTS_CW{1,
+                            1,
+                            1,
+                            0 /*NULL*/,
+                            0 /*NULL*/,
+                            0 /*NULL*/,
+                            0 /*NULL*/,
+                            0 /*NULL*/,
+                            0 /*NULL*/,
+                            0 /*NULL*/};
+      return STRUCTS_CW{{child1, child2}, nulls_at({3, 4, 5, 6, 7, 8, 9})};
+    }();
+
+    auto const result = cudf::scan(
+      input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE, null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+  }
+
+  {
+    auto const expected = [] {
+      auto child1 = STRINGS_CW{"año",
+                               "bit",
+                               "bit",
+                               "" /*NULL*/,
+                               "" /*NULL*/,
+                               "" /*NULL*/,
+                               "" /*NULL*/,
+                               "" /*NULL*/,
+                               "" /*NULL*/,
+                               "" /*NULL*/};
+      auto child2 = INTS_CW{1,
+                            2,
+                            2,
+                            0 /*NULL*/,
+                            0 /*NULL*/,
+                            0 /*NULL*/,
+                            0 /*NULL*/,
+                            0 /*NULL*/,
+                            0 /*NULL*/,
+                            0 /*NULL*/};
+      return STRUCTS_CW{{child1, child2}, nulls_at({3, 4, 5, 6, 7, 8, 9})};
+    }();
+
+    auto const result = cudf::scan(
+      input, cudf::make_max_aggregation(), cudf::scan_type::INCLUSIVE, null_policy::INCLUDE);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+  }
+}

From 74ac6ed5e06be9a1ee37f3ceaa1d45b2224266f2 Mon Sep 17 00:00:00 2001
From: Alfred Xu <lovedreamf@gmail.com>
Date: Tue, 30 Nov 2021 22:58:19 +0800
Subject: [PATCH 17/28] fix make_empty_scalar_like (#9782)

Signed-off-by: sperlingxx <lovedreamf@gmail.com>
---
 cpp/src/io/orc/writer_impl.cu | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index 2bf020d08a2..9e493c192e4 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -1496,15 +1496,23 @@ orc_table_view make_orc_table_view(table_view const& table,
           append_orc_column(col.child(lists_column_view::child_column_index),
                             &orc_columns[new_col_idx],
                             col_meta.child(lists_column_view::child_column_index));
-        } else if (kind == TypeKind::STRUCT or kind == TypeKind::MAP) {
-          // MAP: skip to the list child - include grandchildren columns instead of children
-          auto const real_parent_col =
-            kind == TypeKind::MAP ? col.child(lists_column_view::child_column_index) : col;
-          for (auto child_idx = 0; child_idx != real_parent_col.num_children(); ++child_idx) {
-            append_orc_column(real_parent_col.child(child_idx),
-                              &orc_columns[new_col_idx],
-                              col_meta.child(child_idx));
+        } else if (kind == TypeKind::STRUCT) {
+          for (auto child_idx = 0; child_idx != col.num_children(); ++child_idx) {
+            append_orc_column(
+              col.child(child_idx), &orc_columns[new_col_idx], col_meta.child(child_idx));
           }
+        } else if (kind == TypeKind::MAP) {
+          // MAP: skip to the list child - include grandchildren columns instead of children
+          auto const real_parent_col   = col.child(lists_column_view::child_column_index);
+          auto const& real_parent_meta = col_meta.child(lists_column_view::child_column_index);
+          CUDF_EXPECTS(real_parent_meta.num_children() == 2,
+                       "Map struct column should have exactly two children");
+          // process MAP key
+          append_orc_column(
+            real_parent_col.child(0), &orc_columns[new_col_idx], real_parent_meta.child(0));
+          // process MAP value
+          append_orc_column(
+            real_parent_col.child(1), &orc_columns[new_col_idx], real_parent_meta.child(1));
         }
       };
 

From dca8a0a0356e90e2b9dfa2a2cedf38d0c90935cb Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Tue, 30 Nov 2021 10:40:18 -0600
Subject: [PATCH 18/28] Fix dtype-argument bug in dask_cudf read_csv (#9796)

Closes #9719

`dask_cudf.read_csv` currently fails when both `usecols` and `dtype` are specified. This PR is  a simple fix.  In the near future, the `_internal_read_csv` implementation should also be modified to produce a `Blockwise` HLG Layer, but I will leave that for a separate PR.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/9796
---
 python/dask_cudf/dask_cudf/io/csv.py          | 19 +++++++++++--------
 .../dask_cudf/dask_cudf/io/tests/test_csv.py  |  5 +++--
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/io/csv.py b/python/dask_cudf/dask_cudf/io/csv.py
index 132201a349e..ebb02e3b6d4 100644
--- a/python/dask_cudf/dask_cudf/io/csv.py
+++ b/python/dask_cudf/dask_cudf/io/csv.py
@@ -110,9 +110,17 @@ def _internal_read_csv(path, chunksize="256 MiB", **kwargs):
     if chunksize is None:
         return read_csv_without_chunksize(path, **kwargs)
 
+    # Let dask.dataframe generate meta
     dask_reader = make_reader(cudf.read_csv, "read_csv", "CSV")
-    usecols = kwargs.pop("usecols", None)
-    meta = dask_reader(filenames[0], **kwargs)._meta
+    kwargs1 = kwargs.copy()
+    usecols = kwargs1.pop("usecols", None)
+    dtype = kwargs1.pop("dtype", None)
+    meta = dask_reader(filenames[0], **kwargs1)._meta
+    names = meta.columns
+    if usecols or dtype:
+        # Regenerate meta with original kwargs if
+        # `usecols` or `dtype` was specified
+        meta = dask_reader(filenames[0], **kwargs)._meta
 
     dsk = {}
     i = 0
@@ -127,18 +135,13 @@ def _internal_read_csv(path, chunksize="256 MiB", **kwargs):
                 chunksize,
             )  # specify which chunk of the file we care about
             if start != 0:
-                kwargs2[
-                    "names"
-                ] = meta.columns  # no header in the middle of the file
+                kwargs2["names"] = names  # no header in the middle of the file
                 kwargs2["header"] = None
-            kwargs2["usecols"] = usecols
             dsk[(name, i)] = (apply, _read_csv, [fn, dtypes], kwargs2)
 
             i += 1
 
     divisions = [None] * (len(dsk) + 1)
-    if usecols is not None:
-        meta = meta[usecols]
     return dd.core.new_dd_object(dsk, name, meta, divisions)
 
 
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_csv.py b/python/dask_cudf/dask_cudf/io/tests/test_csv.py
index 98061f6c624..32960a90bd7 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_csv.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_csv.py
@@ -136,7 +136,8 @@ def test_read_csv_chunksize_none(tmp_path, compression, size):
     dd.assert_eq(df, df2)
 
 
-def test_csv_reader_usecols(tmp_path):
+@pytest.mark.parametrize("dtype", [{"b": str, "c": int}, None])
+def test_csv_reader_usecols(tmp_path, dtype):
     df = cudf.DataFrame(
         {
             "a": [1, 2, 3, 4] * 100,
@@ -147,6 +148,6 @@ def test_csv_reader_usecols(tmp_path):
     csv_path = str(tmp_path / "usecols_data.csv")
     df.to_csv(csv_path, index=False)
     ddf = dask_cudf.from_cudf(df[["b", "c"]], npartitions=5)
-    ddf2 = dask_cudf.read_csv(csv_path, usecols=["b", "c"])
+    ddf2 = dask_cudf.read_csv(csv_path, usecols=["b", "c"], dtype=dtype)
 
     dd.assert_eq(ddf, ddf2, check_divisions=False, check_index=False)

From 1db05c9d889d04df113986eeee0356778ce8b003 Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Tue, 30 Nov 2021 11:45:54 -0600
Subject: [PATCH 19/28] Use Java classloader to find test resources (#9760)

Updates the Java tests to use the classloader to locate test files rather than reaching directly into the source directory.

Authors:
  - Jason Lowe (https://github.com/jlowe)

Approvers:
  - Gera Shegalov (https://github.com/gerashegalov)

URL: https://github.com/rapidsai/cudf/pull/9760
---
 .../src/test/java/ai/rapids/cudf/TableTest.java | 14 +++++++-------
 .../src/test/java/ai/rapids/cudf/TestUtils.java | 17 ++++++++++++++++-
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 4512a08430c..b4247e9bb7c 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -70,11 +70,11 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class TableTest extends CudfTestBase {
-  private static final File TEST_PARQUET_FILE = new File("src/test/resources/acq.parquet");
-  private static final File TEST_ORC_FILE = new File("src/test/resources/TestOrcFile.orc");
-  private static final File TEST_ORC_TIMESTAMP_DATE_FILE = new File(
-      "src/test/resources/timestamp-date-test.orc");
-  private static final File TEST_DECIMAL_PARQUET_FILE = new File("src/test/resources/decimal.parquet");
+  private static final File TEST_PARQUET_FILE = TestUtils.getResourceAsFile("acq.parquet");
+  private static final File TEST_ORC_FILE = TestUtils.getResourceAsFile("TestOrcFile.orc");
+  private static final File TEST_ORC_TIMESTAMP_DATE_FILE = TestUtils.getResourceAsFile("timestamp-date-test.orc");
+  private static final File TEST_DECIMAL_PARQUET_FILE = TestUtils.getResourceAsFile("decimal.parquet");
+  private static final File TEST_SIMPLE_CSV_FILE = TestUtils.getResourceAsFile("simple.csv");
 
   private static final Schema CSV_DATA_BUFFER_SCHEMA = Schema.builder()
       .column(DType.INT32, "A")
@@ -548,7 +548,7 @@ void testReadCSVPrune() {
         .column(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
         .column(110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.2, 119.8)
         .build();
-         Table table = Table.readCSV(schema, opts, new File("./src/test/resources/simple.csv"))) {
+         Table table = Table.readCSV(schema, opts, TEST_SIMPLE_CSV_FILE)) {
       assertTablesAreEqual(expected, table);
     }
   }
@@ -675,7 +675,7 @@ void testReadCSV() {
         .column(120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L)
         .column("one", "two", "three", "four", "five", "six", "seven\ud801\uddb8", "eight\uBF68", "nine\u03E8", "ten")
         .build();
-         Table table = Table.readCSV(schema, new File("./src/test/resources/simple.csv"))) {
+         Table table = Table.readCSV(schema, TEST_SIMPLE_CSV_FILE)) {
       assertTablesAreEqual(expected, table);
     }
   }
diff --git a/java/src/test/java/ai/rapids/cudf/TestUtils.java b/java/src/test/java/ai/rapids/cudf/TestUtils.java
index 5a799c666c2..a1acab5883b 100644
--- a/java/src/test/java/ai/rapids/cudf/TestUtils.java
+++ b/java/src/test/java/ai/rapids/cudf/TestUtils.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2020, NVIDIA CORPORATION.
+ *  Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -18,6 +18,9 @@
 
 package ai.rapids.cudf;
 
+import java.io.File;
+import java.net.URISyntaxException;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
@@ -211,4 +214,16 @@ static Double[] getDoubles(final long seed, final int size, int specialValues) {
     });
     return result;
   }
+
+  public static File getResourceAsFile(String resourceName) {
+    URL url = TestUtils.class.getClassLoader().getResource(resourceName);
+    if (url == null) {
+      throw new IllegalArgumentException("Unable to locate resource: " + resourceName);
+    }
+    try {
+      return new File(url.toURI());
+    } catch (URISyntaxException e) {
+      throw new RuntimeException(e);
+    }
+  }
 }

From 1697f63b9e6e80695cb157f479fada72d053fa1a Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Tue, 30 Nov 2021 23:39:13 +0530
Subject: [PATCH 20/28] Run compute-sanitizer in nightly build (#9641)

Addresses part of https://github.com/rapidsai/cudf/issues/904
- This PR enables run of `compute-sanitizer --tool memcheck` on libcudf unit tests when env `COMPUTE_SANITIZER_ENABLE=true`
  This env `COMPUTE_SANITIZER_ENABLE` will be enabled only in nightly builds of cudf. (To be Enabled in PR https://github.com/rapidsai/gpuci-scripts/pull/675)
- This PR also adds script to parse compute-sanitizer log to junit xml file which can be processed by Jenkins.
  Reports only failures. If no errors, no tests are reported under memcheck results.

Note: Only `memcheck` is enabled now.  when required, other checks of compute-sanitizer could be enabled later.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - MithunR (https://github.com/mythrocks)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/9641
---
 ci/gpu/build.sh | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 664e774c68a..8f83c169330 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 ##############################################
 # cuDF GPU build and test script for CI      #
 ##############################################
@@ -176,6 +176,28 @@ else
         ${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
     done
 
+    ################################################################################
+    # MEMCHECK - Run compute-sanitizer on GoogleTest (only in nightly builds)
+    ################################################################################
+    if [[ "$BUILD_MODE" == "branch" && "$BUILD_TYPE" == "gpu" ]]; then
+        if [[ "$COMPUTE_SANITIZER_ENABLE" == "true" ]]; then
+            gpuci_logger "Memcheck on GoogleTests with rmm_mode=cuda"
+            export GTEST_CUDF_RMM_MODE=cuda
+            COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck"
+            mkdir -p "$WORKSPACE/test-results/"
+            for gt in gtests/*; do
+                test_name=$(basename ${gt})
+                if [[ "$test_name" == "ERROR_TEST" ]]; then
+                  continue
+                fi
+                echo "Running GoogleTest $test_name"
+                ${COMPUTE_SANITIZER_CMD} ${gt} | tee "$WORKSPACE/test-results/${test_name}.cs.log"
+            done
+            unset GTEST_CUDF_RMM_MODE
+            # test-results/*.cs.log are processed in gpuci
+        fi
+    fi
+
     CUDF_CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcudf-*.tar.bz2"`
     CUDF_CONDA_FILE=`basename "$CUDF_CONDA_FILE" .tar.bz2` #get filename without extension
     CUDF_CONDA_FILE=${CUDF_CONDA_FILE//-/=} #convert to conda install

From 69d576543b5414372f36d02a189a7217d3bb8006 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 30 Nov 2021 14:40:34 -0500
Subject: [PATCH 21/28] Update check for inf/nan strings in libcudf float
 conversion to ignore case (#9694)

Reference https://github.com/rapidsai/cudf/pull/9613/files#r743579126

Add support to ignore case for strings `INF`, `INFINITY` and `NAN` to `cudf::strings::is_float` and `cudf::strings::to_float` for consistency with https://en.cppreference.com/w/cpp/string/basic_string/stof

Also, remove the expensive `replace` call in the cudf  before calling this from Python.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Robert Maynard (https://github.com/robertmaynard)
  - Nghia Truong (https://github.com/ttnghia)
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/9694
---
 cpp/include/cudf/strings/string.cuh           | 64 +++++++++++++---
 cpp/src/strings/convert/convert_floats.cu     | 13 ++--
 cpp/tests/strings/floats_tests.cpp            | 51 ++++---------
 .../java/ai/rapids/cudf/ColumnVectorTest.java | 23 +++---
 python/cudf/cudf/core/column/string.py        | 73 -------------------
 5 files changed, 85 insertions(+), 139 deletions(-)

diff --git a/cpp/include/cudf/strings/string.cuh b/cpp/include/cudf/strings/string.cuh
index 82da5ad8f10..d85d19d7f10 100644
--- a/cpp/include/cudf/strings/string.cuh
+++ b/cpp/include/cudf/strings/string.cuh
@@ -52,6 +52,43 @@ inline __device__ bool is_integer(string_view const& d_str)
            thrust::seq, begin, end, [] __device__(auto chr) { return chr >= '0' && chr <= '9'; });
 }
 
+/**
+ * @brief Returns true if input contains the not-a-number string.
+ *
+ * The following are valid for this function: "NAN" and "NaN"
+ * @param d_str input string
+ * @return true if input is as valid NaN string.
+ */
+inline __device__ bool is_nan_str(string_view const& d_str)
+{
+  auto const ptr = d_str.data();
+  return (d_str.size_bytes() == 3) && (ptr[0] == 'N' || ptr[0] == 'n') &&
+         (ptr[1] == 'A' || ptr[1] == 'a') && (ptr[2] == 'N' || ptr[2] == 'n');
+}
+
+/**
+ * @brief Returns true if input contains the infinity string.
+ *
+ * The following are valid for this function: "INF", "INFINITY", and "Inf"
+ * @param d_str input string
+ * @return true if input is as valid Inf string.
+ */
+inline __device__ bool is_inf_str(string_view const& d_str)
+{
+  auto const ptr  = d_str.data();
+  auto const size = d_str.size_bytes();
+
+  if (size != 3 && size != 8) return false;
+
+  auto const prefix_valid = (ptr[0] == 'I' || ptr[0] == 'i') && (ptr[1] == 'N' || ptr[1] == 'n') &&
+                            (ptr[2] == 'F' || ptr[2] == 'f');
+
+  return prefix_valid &&
+         ((size == 3) || ((ptr[3] == 'I' || ptr[3] == 'i') && (ptr[4] == 'N' || ptr[4] == 'n') &&
+                          (ptr[5] == 'I' || ptr[5] == 'i') && (ptr[6] == 'T' || ptr[6] == 't') &&
+                          (ptr[7] == 'Y' || ptr[7] == 'y')));
+}
+
 /**
  * @brief Returns `true` if all characters in the string
  * are valid for conversion to a float type.
@@ -65,8 +102,8 @@ inline __device__ bool is_integer(string_view const& d_str)
  * An empty string returns `false`.
  * No bounds checking is performed to verify if the value would fit
  * within a specific float type.
- * The following strings are also allowed "NaN", "Inf" and, "-Inf"
- * and will return true.
+ * The following strings are also allowed and will return true:
+ *  "NaN", "NAN", "Inf", "INF", "INFINITY"
  *
  * @param d_str String to check.
  * @return true if string has valid float characters
@@ -74,29 +111,32 @@ inline __device__ bool is_integer(string_view const& d_str)
 inline __device__ bool is_float(string_view const& d_str)
 {
   if (d_str.empty()) return false;
-  // strings allowed by the converter
-  if (d_str.compare("NaN", 3) == 0) return true;
-  if (d_str.compare("Inf", 3) == 0) return true;
-  if (d_str.compare("-Inf", 4) == 0) return true;
   bool decimal_found  = false;
   bool exponent_found = false;
   size_type bytes     = d_str.size_bytes();
   const char* data    = d_str.data();
   // sign character allowed at the beginning of the string
-  size_type chidx = (*data == '-' || *data == '+') ? 1 : 0;
-  bool result     = chidx < bytes;
+  size_type ch_idx = (*data == '-' || *data == '+') ? 1 : 0;
+
+  bool result = ch_idx < bytes;
+  // check for nan and infinity strings
+  if (result && data[ch_idx] > '9') {
+    auto const inf_nan = string_view(data + ch_idx, bytes - ch_idx);
+    if (is_nan_str(inf_nan) || is_inf_str(inf_nan)) return true;
+  }
+
   // check for float chars [0-9] and a single decimal '.'
   // and scientific notation [eE][+-][0-9]
-  for (; chidx < bytes; ++chidx) {
-    auto chr = data[chidx];
+  for (; ch_idx < bytes; ++ch_idx) {
+    auto chr = data[ch_idx];
     if (chr >= '0' && chr <= '9') continue;
     if (!decimal_found && chr == '.') {
       decimal_found = true;  // no more decimals
       continue;
     }
     if (!exponent_found && (chr == 'e' || chr == 'E')) {
-      if (chidx + 1 < bytes) chr = data[chidx + 1];
-      if (chr == '-' || chr == '+') ++chidx;
+      if (ch_idx + 1 < bytes) chr = data[ch_idx + 1];
+      if (chr == '-' || chr == '+') ++ch_idx;
       decimal_found  = true;  // no decimal allowed in exponent
       exponent_found = true;  // no more exponents
       continue;
diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu
index 366d4fe7d42..70b5f528213 100644
--- a/cpp/src/strings/convert/convert_floats.cu
+++ b/cpp/src/strings/convert/convert_floats.cu
@@ -45,7 +45,7 @@ namespace {
  * @brief This function converts the given string into a
  * floating point double value.
  *
- * This will also map strings containing "NaN", "Inf" and "-Inf"
+ * This will also map strings containing "NaN", "Inf", etc.
  * to the appropriate float values.
  *
  * This function will also handle scientific notation format.
@@ -55,16 +55,19 @@ __device__ inline double stod(string_view const& d_str)
   const char* in_ptr = d_str.data();
   const char* end    = in_ptr + d_str.size_bytes();
   if (end == in_ptr) return 0.0;
-  // special strings
-  if (d_str.compare("NaN", 3) == 0) return std::numeric_limits<double>::quiet_NaN();
-  if (d_str.compare("Inf", 3) == 0) return std::numeric_limits<double>::infinity();
-  if (d_str.compare("-Inf", 4) == 0) return -std::numeric_limits<double>::infinity();
   double sign{1.0};
   if (*in_ptr == '-' || *in_ptr == '+') {
     sign = (*in_ptr == '-' ? -1 : 1);
     ++in_ptr;
   }
 
+  // special strings: NaN, Inf
+  if ((in_ptr < end) && *in_ptr > '9') {
+    auto const inf_nan = string_view(in_ptr, static_cast<size_type>(thrust::distance(in_ptr, end)));
+    if (string::is_nan_str(inf_nan)) return std::numeric_limits<double>::quiet_NaN();
+    if (string::is_inf_str(inf_nan)) return sign * std::numeric_limits<double>::infinity();
+  }
+
   // Parse and store the mantissa as much as we can,
   // until we are about to exceed the limit of uint64_t
   constexpr uint64_t max_holding = (std::numeric_limits<uint64_t>::max() - 9L) / 10L;
diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp
index 126bffa1e49..e6f4f6bb8d9 100644
--- a/cpp/tests/strings/floats_tests.cpp
+++ b/cpp/tests/strings/floats_tests.cpp
@@ -58,32 +58,20 @@ TEST_F(StringsConvertTest, IsFloat)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected1);
 
   cudf::test::strings_column_wrapper strings2(
-    {"+175", "-34", "9.8", "1234567890", "6.7e17", "-917.2e5"});
+    {"-34", "9.8", "1234567890", "-917.2e5", "INF", "NAN", "-Inf", "INFINITY"});
   results = cudf::strings::is_float(cudf::strings_column_view(strings2));
-  cudf::test::fixed_width_column_wrapper<bool> expected2({1, 1, 1, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<bool> expected2({1, 1, 1, 1, 1, 1, 1, 1});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2);
 }
 
 TEST_F(StringsConvertTest, ToFloats32)
 {
-  std::vector<const char*> h_strings{"1234",
-                                     nullptr,
-                                     "-876",
-                                     "543.2",
-                                     "-0.12",
-                                     ".25",
-                                     "-.002",
-                                     "",
-                                     "-0.0",
-                                     "1.2e4",
-                                     "NaN",
-                                     "abc123",
-                                     "123abc",
-                                     "456e",
-                                     "-1.78e+5",
-                                     "-122.33644782123456789",
-                                     "12e+309",
-                                     "3.4028236E38"};
+  std::vector<const char*> h_strings{
+    "1234",    nullptr,        "-876",     "543.2",
+    "-0.12",   ".25",          "-.002",    "",
+    "-0.0",    "1.2e4",        "NAN",      "abc123",
+    "123abc",  "456e",         "-1.78e+5", "-122.33644782123456789",
+    "12e+309", "3.4028236E38", "INF",      "Infinity"};
   cudf::test::strings_column_wrapper strings(
     h_strings.begin(),
     h_strings.end(),
@@ -135,24 +123,11 @@ TEST_F(StringsConvertTest, FromFloats32)
 
 TEST_F(StringsConvertTest, ToFloats64)
 {
-  std::vector<const char*> h_strings{"1234",
-                                     nullptr,
-                                     "-876",
-                                     "543.2",
-                                     "-0.12",
-                                     ".25",
-                                     "-.002",
-                                     "",
-                                     "-0.0",
-                                     "1.28e256",
-                                     "NaN",
-                                     "abc123",
-                                     "123abc",
-                                     "456e",
-                                     "-1.78e+5",
-                                     "-122.33644782",
-                                     "12e+309",
-                                     "1.7976931348623159E308"};
+  std::vector<const char*> h_strings{
+    "1234",   nullptr,    "-876",     "543.2",         "-0.12",   ".25",
+    "-.002",  "",         "-0.0",     "1.28e256",      "NaN",     "abc123",
+    "123abc", "456e",     "-1.78e+5", "-122.33644782", "12e+309", "1.7976931348623159E308",
+    "-Inf",   "-INFINITY"};
   cudf::test::strings_column_wrapper strings(
     h_strings.begin(),
     h_strings.end(),
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index a582541a0d4..cf602c26717 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -4919,11 +4919,12 @@ void testIsFloat() {
     try (ColumnVector floatStringCV = ColumnVector.fromStrings(floatStrings);
          ColumnVector isFloat = floatStringCV.isFloat();
          ColumnVector floats = floatStringCV.asFloats();
-         ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, 0f, Float.POSITIVE_INFINITY,
-             Float.NEGATIVE_INFINITY, 0f, 0f, -0f, 0f, Float.MAX_VALUE, Float.POSITIVE_INFINITY,
-             -Float.MAX_VALUE, Float.NEGATIVE_INFINITY, 1.2e-24f, 0f, 0f, null, 423f);
-         ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false,
-             false, true, true, true, true, true, true, true, false, false, null, true)) {
+         ColumnVector expectedFloats = ColumnVector.fromBoxedFloats(0f, Float.NaN, Float.POSITIVE_INFINITY,
+             Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY, Float.POSITIVE_INFINITY, -0f, 0f,
+             Float.MAX_VALUE, Float.POSITIVE_INFINITY, -Float.MAX_VALUE, Float.NEGATIVE_INFINITY,
+             1.2e-24f, 0f, 0f, null, 423f);
+         ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, true, true,
+             true, true, true, true, true, true, true, true, false, false, null, true)) {
       assertColumnsAreEqual(expected, isFloat);
       assertColumnsAreEqual(expectedFloats, floats);
     }
@@ -4944,12 +4945,12 @@ void testIsDouble() {
     try (ColumnVector doubleStringCV = ColumnVector.fromStrings(doubleStrings);
          ColumnVector isDouble = doubleStringCV.isFloat();
          ColumnVector doubles = doubleStringCV.asDoubles();
-         ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, 0d,
-             Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, 0d, 0d, -0d, 0d, Double.MAX_VALUE,
-             Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY, 1.2e-234d, 0d,
-             0d, null, 423d);
-         ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, true, false,
-             false, true, true, true, true, true, true, true, false, false, null, true)) {
+         ColumnVector expectedDoubles = ColumnVector.fromBoxedDoubles(0d, Double.NaN,
+             Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY,
+             -0d, 0d, Double.MAX_VALUE, Double.POSITIVE_INFINITY, -Double.MAX_VALUE, Double.NEGATIVE_INFINITY,
+             1.2e-234d, 0d, 0d, null, 423d);
+         ColumnVector expected = ColumnVector.fromBoxedBooleans(false, true, true, true, true,
+             true, true, true, true, true, true, true, true, false, false, null, true)) {
       assertColumnsAreEqual(expected, isDouble);
       assertColumnsAreEqual(expectedDoubles, doubles);
     }
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index a167383c65c..2a91abc5701 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -97,69 +97,6 @@ def str_to_boolean(column: StringColumn):
     cudf.dtype("timedelta64[ns]"): str_cast.int2timedelta,
 }
 
-_NAN_INF_VARIATIONS = [
-    "nan",
-    "NAN",
-    "Nan",
-    "naN",
-    "nAN",
-    "NAn",
-    "nAn",
-    "-inf",
-    "-INF",
-    "-InF",
-    "-inF",
-    "-iNF",
-    "-INf",
-    "-iNf",
-    "+inf",
-    "+INF",
-    "+InF",
-    "+inF",
-    "+iNF",
-    "+INf",
-    "+Inf",
-    "+iNf",
-    "inf",
-    "INF",
-    "InF",
-    "inF",
-    "iNF",
-    "INf",
-    "iNf",
-]
-_LIBCUDF_SUPPORTED_NAN_INF_VARIATIONS = [
-    "NaN",
-    "NaN",
-    "NaN",
-    "NaN",
-    "NaN",
-    "NaN",
-    "NaN",
-    "-Inf",
-    "-Inf",
-    "-Inf",
-    "-Inf",
-    "-Inf",
-    "-Inf",
-    "-Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-    "Inf",
-]
-
 
 def _is_supported_regex_flags(flags):
     return flags == 0 or (
@@ -5309,16 +5246,6 @@ def as_numerical_column(
                     "type due to presence of non-integer values."
                 )
         elif out_dtype.kind == "f":
-            # TODO: Replace this `replace` call with a
-            # case-insensitive method once following
-            # issue is fixed: https://github.com/rapidsai/cudf/issues/5217
-            old_values = cudf.core.column.as_column(_NAN_INF_VARIATIONS)
-            new_values = cudf.core.column.as_column(
-                _LIBCUDF_SUPPORTED_NAN_INF_VARIATIONS
-            )
-            string_col = libcudf.replace.replace(
-                string_col, old_values, new_values
-            )
             if not libstrings.is_float(string_col).all():
                 raise ValueError(
                     "Could not convert strings to float "

From 00a8845780ae9289f483f1113e5c62d4acd7dfe7 Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Tue, 30 Nov 2021 14:02:24 -0600
Subject: [PATCH 22/28] Refactor TableTest assertion methods to a separate
 utility class (#9762)

TableTest has a number of dependencies, e.g.: Parquet, Hadoop, etc., that make it less ideal to be used in an external project. This moves the column and table assertion methods to a separate AssertUtils utility class that avoids the extra dependencies.

Authors:
  - Jason Lowe (https://github.com/jlowe)

Approvers:
  - Gera Shegalov (https://github.com/gerashegalov)

URL: https://github.com/rapidsai/cudf/pull/9762
---
 .../ai/rapids/cudf/ArrowColumnVectorTest.java |   3 +-
 .../test/java/ai/rapids/cudf/AssertUtils.java | 272 ++++++++++++++++++
 .../java/ai/rapids/cudf/BinaryOpTest.java     |   2 +-
 .../ai/rapids/cudf/ByteColumnVectorTest.java  |   6 +-
 .../java/ai/rapids/cudf/ColumnVectorTest.java |  38 +--
 .../test/java/ai/rapids/cudf/IfElseTest.java  |   2 +-
 .../ai/rapids/cudf/IntColumnVectorTest.java   |   4 +-
 .../test/java/ai/rapids/cudf/ScalarTest.java  |   2 +-
 .../test/java/ai/rapids/cudf/TableTest.java   | 251 +---------------
 .../cudf/TimestampColumnVectorTest.java       |   2 +-
 .../test/java/ai/rapids/cudf/UnaryOpTest.java |   2 +-
 .../cudf/ast/CompiledExpressionTest.java      |   2 +-
 12 files changed, 309 insertions(+), 277 deletions(-)
 create mode 100644 java/src/test/java/ai/rapids/cudf/AssertUtils.java

diff --git a/java/src/test/java/ai/rapids/cudf/ArrowColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ArrowColumnVectorTest.java
index d5d4059d18d..2a11b24b3a8 100644
--- a/java/src/test/java/ai/rapids/cudf/ArrowColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ArrowColumnVectorTest.java
@@ -21,7 +21,6 @@
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 
-import ai.rapids.cudf.HostColumnVector.BasicType;
 import ai.rapids.cudf.HostColumnVector.ListType;
 import ai.rapids.cudf.HostColumnVector.StructType;
 
@@ -40,7 +39,7 @@
 
 import org.junit.jupiter.api.Test;
 
-import static ai.rapids.cudf.TableTest.assertColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
diff --git a/java/src/test/java/ai/rapids/cudf/AssertUtils.java b/java/src/test/java/ai/rapids/cudf/AssertUtils.java
new file mode 100644
index 00000000000..184e7dd0c57
--- /dev/null
+++ b/java/src/test/java/ai/rapids/cudf/AssertUtils.java
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ai.rapids.cudf;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+/** Utility methods for asserting in unit tests */
+public class AssertUtils {
+
+  /**
+   * Checks and asserts that passed in columns match
+   * @param expect The expected result column
+   * @param cv The input column
+   */
+  public static void assertColumnsAreEqual(ColumnView expect, ColumnView cv) {
+    assertColumnsAreEqual(expect, cv, "unnamed");
+  }
+
+  /**
+   * Checks and asserts that passed in columns match
+   * @param expected The expected result column
+   * @param cv The input column
+   * @param colName The name of the column
+   */
+  public static void assertColumnsAreEqual(ColumnView expected, ColumnView cv, String colName) {
+    assertPartialColumnsAreEqual(expected, 0, expected.getRowCount(), cv, colName, true, false);
+  }
+
+  /**
+   * Checks and asserts that passed in host columns match
+   * @param expected The expected result host column
+   * @param cv The input host column
+   * @param colName The name of the host column
+   */
+  public static void assertColumnsAreEqual(HostColumnVector expected, HostColumnVector cv, String colName) {
+    assertPartialColumnsAreEqual(expected, 0, expected.getRowCount(), cv, colName, true, false);
+  }
+
+  /**
+   * Checks and asserts that passed in Struct columns match
+   * @param expected The expected result Struct column
+   * @param cv The input Struct column
+   */
+  public static void assertStructColumnsAreEqual(ColumnView expected, ColumnView cv) {
+    assertPartialStructColumnsAreEqual(expected, 0, expected.getRowCount(), cv, "unnamed", true, false);
+  }
+
+  /**
+   * Checks and asserts that passed in Struct columns match
+   * @param expected The expected result Struct column
+   * @param rowOffset The row number to look from
+   * @param length The number of rows to consider
+   * @param cv The input Struct column
+   * @param colName The name of the column
+   * @param enableNullCountCheck Whether to check for nulls in the Struct column
+   * @param enableNullabilityCheck Whether the table have a validity mask
+   */
+  public static void assertPartialStructColumnsAreEqual(ColumnView expected, long rowOffset, long length,
+      ColumnView cv, String colName, boolean enableNullCountCheck, boolean enableNullabilityCheck) {
+    try (HostColumnVector hostExpected = expected.copyToHost();
+         HostColumnVector hostcv = cv.copyToHost()) {
+      assertPartialColumnsAreEqual(hostExpected, rowOffset, length, hostcv, colName, enableNullCountCheck, enableNullabilityCheck);
+    }
+  }
+
+  /**
+   * Checks and asserts that passed in columns match
+   * @param expected The expected result column
+   * @param cv The input column
+   * @param colName The name of the column
+   * @param enableNullCheck Whether to check for nulls in the column
+   * @param enableNullabilityCheck Whether the table have a validity mask
+   */
+  public static void assertPartialColumnsAreEqual(ColumnView expected, long rowOffset, long length,
+      ColumnView cv, String colName, boolean enableNullCheck, boolean enableNullabilityCheck) {
+    try (HostColumnVector hostExpected = expected.copyToHost();
+         HostColumnVector hostcv = cv.copyToHost()) {
+      assertPartialColumnsAreEqual(hostExpected, rowOffset, length, hostcv, colName, enableNullCheck, enableNullabilityCheck);
+    }
+  }
+
+  /**
+   * Checks and asserts that passed in host columns match
+   * @param expected The expected result host column
+   * @param rowOffset start row index
+   * @param length  number of rows from starting offset
+   * @param cv The input host column
+   * @param colName The name of the host column
+   * @param enableNullCountCheck Whether to check for nulls in the host column
+   */
+  public static void assertPartialColumnsAreEqual(HostColumnVectorCore expected, long rowOffset, long length,
+                                                  HostColumnVectorCore cv, String colName, boolean enableNullCountCheck, boolean enableNullabilityCheck) {
+    assertEquals(expected.getType(), cv.getType(), "Type For Column " + colName);
+    assertEquals(length, cv.getRowCount(), "Row Count For Column " + colName);
+    assertEquals(expected.getNumChildren(), cv.getNumChildren(), "Child Count for Column " + colName);
+    if (enableNullCountCheck) {
+      assertEquals(expected.getNullCount(), cv.getNullCount(), "Null Count For Column " + colName);
+    } else {
+      // TODO add in a proper check when null counts are supported by serializing a partitioned column
+    }
+    if (enableNullabilityCheck) {
+      assertEquals(expected.hasValidityVector(), cv.hasValidityVector(), "Column nullability is different than expected");
+    }
+    DType type = expected.getType();
+    for (long expectedRow = rowOffset; expectedRow < (rowOffset + length); expectedRow++) {
+      long tableRow = expectedRow - rowOffset;
+      assertEquals(expected.isNull(expectedRow), cv.isNull(tableRow),
+          "NULL for Column " + colName + " Row " + tableRow);
+      if (!expected.isNull(expectedRow)) {
+        switch (type.typeId) {
+          case BOOL8: // fall through
+          case INT8: // fall through
+          case UINT8:
+            assertEquals(expected.getByte(expectedRow), cv.getByte(tableRow),
+                "Column " + colName + " Row " + tableRow);
+            break;
+          case INT16: // fall through
+          case UINT16:
+            assertEquals(expected.getShort(expectedRow), cv.getShort(tableRow),
+                "Column " + colName + " Row " + tableRow);
+            break;
+          case INT32: // fall through
+          case UINT32: // fall through
+          case TIMESTAMP_DAYS:
+          case DURATION_DAYS:
+          case DECIMAL32:
+            assertEquals(expected.getInt(expectedRow), cv.getInt(tableRow),
+                "Column " + colName + " Row " + tableRow);
+            break;
+          case INT64: // fall through
+          case UINT64: // fall through
+          case DURATION_MICROSECONDS: // fall through
+          case DURATION_MILLISECONDS: // fall through
+          case DURATION_NANOSECONDS: // fall through
+          case DURATION_SECONDS: // fall through
+          case TIMESTAMP_MICROSECONDS: // fall through
+          case TIMESTAMP_MILLISECONDS: // fall through
+          case TIMESTAMP_NANOSECONDS: // fall through
+          case TIMESTAMP_SECONDS:
+          case DECIMAL64:
+            assertEquals(expected.getLong(expectedRow), cv.getLong(tableRow),
+                "Column " + colName + " Row " + tableRow);
+            break;
+          case DECIMAL128:
+            assertEquals(expected.getBigDecimal(expectedRow), cv.getBigDecimal(tableRow),
+                "Column " + colName + " Row " + tableRow);
+            break;
+          case FLOAT32:
+            CudfTestBase.assertEqualsWithinPercentage(expected.getFloat(expectedRow), cv.getFloat(tableRow), 0.0001,
+                "Column " + colName + " Row " + tableRow);
+            break;
+          case FLOAT64:
+            CudfTestBase.assertEqualsWithinPercentage(expected.getDouble(expectedRow), cv.getDouble(tableRow), 0.0001,
+                "Column " + colName + " Row " + tableRow);
+            break;
+          case STRING:
+            assertArrayEquals(expected.getUTF8(expectedRow), cv.getUTF8(tableRow),
+                "Column " + colName + " Row " + tableRow);
+            break;
+          case LIST:
+            HostMemoryBuffer expectedOffsets = expected.getOffsets();
+            HostMemoryBuffer cvOffsets = cv.getOffsets();
+            int expectedChildRows = expectedOffsets.getInt((expectedRow + 1) * 4) -
+                expectedOffsets.getInt(expectedRow * 4);
+            int cvChildRows = cvOffsets.getInt((tableRow + 1) * 4) -
+                cvOffsets.getInt(tableRow * 4);
+            assertEquals(expectedChildRows, cvChildRows, "Child row count for Column " +
+                colName + " Row " + tableRow);
+            break;
+          case STRUCT:
+            // parent column only has validity which was checked above
+            break;
+          default:
+            throw new IllegalArgumentException(type + " is not supported yet");
+        }
+      }
+    }
+
+    if (type.isNestedType()) {
+      switch (type.typeId) {
+        case LIST:
+          int expectedChildRowOffset = 0;
+          int numChildRows = 0;
+          if (length > 0) {
+            HostMemoryBuffer expectedOffsets = expected.getOffsets();
+            HostMemoryBuffer cvOffsets = cv.getOffsets();
+            expectedChildRowOffset = expectedOffsets.getInt(rowOffset * 4);
+            numChildRows = expectedOffsets.getInt((rowOffset + length) * 4) -
+                expectedChildRowOffset;
+          }
+          assertPartialColumnsAreEqual(expected.getNestedChildren().get(0), expectedChildRowOffset,
+              numChildRows, cv.getNestedChildren().get(0), colName + " list child",
+              enableNullCountCheck, enableNullabilityCheck);
+          break;
+        case STRUCT:
+          List<HostColumnVectorCore> expectedChildren = expected.getNestedChildren();
+          List<HostColumnVectorCore> cvChildren = cv.getNestedChildren();
+          for (int i = 0; i < expectedChildren.size(); i++) {
+            HostColumnVectorCore expectedChild = expectedChildren.get(i);
+            HostColumnVectorCore cvChild = cvChildren.get(i);
+            String childName = colName + " child " + i;
+            assertEquals(length, cvChild.getRowCount(), "Row Count for Column " + colName);
+            assertPartialColumnsAreEqual(expectedChild, rowOffset, length, cvChild,
+                colName, enableNullCountCheck, enableNullabilityCheck);
+          }
+          break;
+        default:
+          throw new IllegalArgumentException(type + " is not supported yet");
+      }
+    }
+  }
+
+  /**
+   * Checks and asserts that the two tables from a given rowindex match based on a provided schema.
+   * @param expected the expected result table
+   * @param rowOffset the row number to start checking from
+   * @param length the number of rows to check
+   * @param table the input table to compare against expected
+   * @param enableNullCheck whether to check for nulls or not
+   * @param enableNullabilityCheck whether the table have a validity mask
+   */
+  public static void assertPartialTablesAreEqual(Table expected, long rowOffset, long length, Table table,
+                                                 boolean enableNullCheck, boolean enableNullabilityCheck) {
+    assertEquals(expected.getNumberOfColumns(), table.getNumberOfColumns());
+    assertEquals(length, table.getRowCount(), "ROW COUNT");
+    for (int col = 0; col < expected.getNumberOfColumns(); col++) {
+      ColumnVector expect = expected.getColumn(col);
+      ColumnVector cv = table.getColumn(col);
+      String name = String.valueOf(col);
+      if (rowOffset != 0 || length != expected.getRowCount()) {
+        name = name + " PART " + rowOffset + "-" + (rowOffset + length - 1);
+      }
+      assertPartialColumnsAreEqual(expect, rowOffset, length, cv, name, enableNullCheck, enableNullabilityCheck);
+    }
+  }
+
+  /**
+   * Checks and asserts that the two tables match
+   * @param expected the expected result table
+   * @param table the input table to compare against expected
+   */
+  public static void assertTablesAreEqual(Table expected, Table table) {
+    assertPartialTablesAreEqual(expected, 0, expected.getRowCount(), table, true, false);
+  }
+
+  public static void assertTableTypes(DType[] expectedTypes, Table t) {
+    int len = t.getNumberOfColumns();
+    assertEquals(expectedTypes.length, len);
+    for (int i = 0; i < len; i++) {
+      ColumnVector vec = t.getColumn(i);
+      DType type = vec.getType();
+      assertEquals(expectedTypes[i], type, "Types don't match at " + i);
+    }
+  }
+}
diff --git a/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java b/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java
index 894861b8c44..0ca997d3c80 100644
--- a/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java
+++ b/java/src/test/java/ai/rapids/cudf/BinaryOpTest.java
@@ -27,7 +27,7 @@
 import java.util.Arrays;
 import java.util.stream.IntStream;
 
-import static ai.rapids.cudf.TableTest.assertColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
 import static ai.rapids.cudf.TestUtils.*;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
diff --git a/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java
index 878fa7e4516..a26dbec4907 100644
--- a/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ByteColumnVectorTest.java
@@ -127,9 +127,9 @@ public void testCastToByte() {
          ColumnVector expected1 = ColumnVector.fromBytes((byte)4, (byte)3, (byte)8);
          ColumnVector expected2 = ColumnVector.fromBytes((byte)100);
          ColumnVector expected3 = ColumnVector.fromBytes((byte)-23)) {
-      TableTest.assertColumnsAreEqual(expected1, byteColumnVector1);
-      TableTest.assertColumnsAreEqual(expected2, byteColumnVector2);
-      TableTest.assertColumnsAreEqual(expected3, byteColumnVector3);
+      AssertUtils.assertColumnsAreEqual(expected1, byteColumnVector1);
+      AssertUtils.assertColumnsAreEqual(expected2, byteColumnVector2);
+      AssertUtils.assertColumnsAreEqual(expected3, byteColumnVector3);
     }
   }
 
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index cf602c26717..fa9052029cc 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -34,8 +34,10 @@
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertStructColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertTablesAreEqual;
 import static ai.rapids.cudf.QuantileMethod.*;
-import static ai.rapids.cudf.TableTest.*;
 import static org.junit.jupiter.api.Assertions.*;
 import static org.junit.jupiter.api.Assumptions.assumeTrue;
 
@@ -86,8 +88,8 @@ void testTransformVector() {
          ColumnVector cv1 = cv.transform(ptx, true);
          ColumnVector cv2 = cv.transform(cuda, false);
          ColumnVector expected = ColumnVector.fromBoxedInts(2*2-2, 3*3-3, null, 4*4-4)) {
-      TableTest.assertColumnsAreEqual(expected, cv1);
-      TableTest.assertColumnsAreEqual(expected, cv2);
+      assertColumnsAreEqual(expected, cv1);
+      assertColumnsAreEqual(expected, cv2);
     }
   }
 
@@ -252,7 +254,7 @@ void testStringCreation() {
     try (ColumnVector cv = ColumnVector.fromStrings("d", "sd", "sde", null, "END");
          HostColumnVector host = cv.copyToHost();
          ColumnVector backAgain = host.copyToDevice()) {
-      TableTest.assertColumnsAreEqual(cv, backAgain);
+      assertColumnsAreEqual(cv, backAgain);
     }
   }
 
@@ -265,7 +267,7 @@ void testUTF8StringCreation() {
             null,
             "END".getBytes(StandardCharsets.UTF_8));
          ColumnVector expected = ColumnVector.fromStrings("d", "sd", "sde", null, "END")) {
-      TableTest.assertColumnsAreEqual(expected, cv);
+      assertColumnsAreEqual(expected, cv);
     }
   }
 
@@ -299,7 +301,7 @@ void testConcatNoNulls() {
          ColumnVector v2 = ColumnVector.fromInts(8, 9);
          ColumnVector v = ColumnVector.concatenate(v0, v1, v2);
          ColumnVector expected = ColumnVector.fromInts(1, 2, 3, 4, 5, 6, 7, 8, 9)) {
-      TableTest.assertColumnsAreEqual(expected, v);
+      assertColumnsAreEqual(expected, v);
     }
   }
 
@@ -310,7 +312,7 @@ void testConcatWithNulls() {
          ColumnVector v2 = ColumnVector.fromBoxedDoubles(null, 9.0);
          ColumnVector v = ColumnVector.concatenate(v0, v1, v2);
          ColumnVector expected = ColumnVector.fromBoxedDoubles(1., 2., 3., 4., 5., 6., 7., null, 9.)) {
-      TableTest.assertColumnsAreEqual(expected, v);
+      assertColumnsAreEqual(expected, v);
     }
   }
 
@@ -1882,13 +1884,13 @@ void testSubvector() {
     try (ColumnVector vec = ColumnVector.fromBoxedInts(1, 2, 3, null, 5);
          ColumnVector expected = ColumnVector.fromBoxedInts(2, 3, null, 5);
          ColumnVector found = vec.subVector(1, 5)) {
-      TableTest.assertColumnsAreEqual(expected, found);
+      assertColumnsAreEqual(expected, found);
     }
 
     try (ColumnVector vec = ColumnVector.fromStrings("1", "2", "3", null, "5");
          ColumnVector expected = ColumnVector.fromStrings("2", "3", null, "5");
          ColumnVector found = vec.subVector(1, 5)) {
-      TableTest.assertColumnsAreEqual(expected, found);
+      assertColumnsAreEqual(expected, found);
     }
   }
 
@@ -2014,7 +2016,7 @@ void testTrimStringsWhiteSpace() {
     try (ColumnVector cv = ColumnVector.fromStrings(" 123", "123 ", null, " 123 ", "\t\t123\n\n");
          ColumnVector trimmed = cv.strip();
          ColumnVector expected = ColumnVector.fromStrings("123", "123", null, "123", "123")) {
-      TableTest.assertColumnsAreEqual(expected, trimmed);
+      assertColumnsAreEqual(expected, trimmed);
     }
   }
 
@@ -2024,7 +2026,7 @@ void testTrimStrings() {
          Scalar one = Scalar.fromString(" 1");
          ColumnVector trimmed = cv.strip(one);
          ColumnVector expected = ColumnVector.fromStrings("23", "23", null, "23", "\t\t123\n\n")) {
-      TableTest.assertColumnsAreEqual(expected, trimmed);
+      assertColumnsAreEqual(expected, trimmed);
     }
   }
 
@@ -2033,7 +2035,7 @@ void testLeftTrimStringsWhiteSpace() {
     try (ColumnVector cv = ColumnVector.fromStrings(" 123", "123 ", null, " 123 ", "\t\t123\n\n");
          ColumnVector trimmed = cv.lstrip();
          ColumnVector expected = ColumnVector.fromStrings("123", "123 ", null, "123 ", "123\n\n")) {
-      TableTest.assertColumnsAreEqual(expected, trimmed);
+      assertColumnsAreEqual(expected, trimmed);
     }
   }
 
@@ -2043,7 +2045,7 @@ void testLeftTrimStrings() {
          Scalar one = Scalar.fromString(" 1");
          ColumnVector trimmed = cv.lstrip(one);
          ColumnVector expected = ColumnVector.fromStrings("23", "23 ", null, "231", "\t\t123\n\n")) {
-      TableTest.assertColumnsAreEqual(expected, trimmed);
+      assertColumnsAreEqual(expected, trimmed);
     }
   }
 
@@ -2052,7 +2054,7 @@ void testRightTrimStringsWhiteSpace() {
     try (ColumnVector cv = ColumnVector.fromStrings(" 123", "123 ", null, " 123 ", "\t\t123\n\n");
          ColumnVector trimmed = cv.rstrip();
          ColumnVector expected = ColumnVector.fromStrings(" 123", "123", null, " 123", "\t\t123")) {
-      TableTest.assertColumnsAreEqual(expected, trimmed);
+      assertColumnsAreEqual(expected, trimmed);
     }
   }
 
@@ -2062,7 +2064,7 @@ void testRightTrimStrings() {
          Scalar one = Scalar.fromString(" 1");
          ColumnVector trimmed = cv.rstrip(one);
          ColumnVector expected = ColumnVector.fromStrings("123", "123", null, "123", "\t\t123\n\n")) {
-      TableTest.assertColumnsAreEqual(expected, trimmed);
+      assertColumnsAreEqual(expected, trimmed);
     }
   }
 
@@ -2108,7 +2110,7 @@ void testCountElements() {
         Arrays.asList(1, 2, 3), Arrays.asList(1, 2, 3, 4));
          ColumnVector lengths = cv.countElements();
          ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, null, 2, 3, 4)) {
-      TableTest.assertColumnsAreEqual(expected, lengths);
+      assertColumnsAreEqual(expected, lengths);
     }
   }
 
@@ -2117,7 +2119,7 @@ void testStringLengths() {
     try (ColumnVector cv = ColumnVector.fromStrings("1", "12", null, "123", "1234");
       ColumnVector lengths = cv.getCharLengths();
       ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, null, 3, 4)) {
-      TableTest.assertColumnsAreEqual(expected, lengths);
+      assertColumnsAreEqual(expected, lengths);
     }
   }
 
@@ -2126,7 +2128,7 @@ void testGetByteCount() {
     try (ColumnVector cv = ColumnVector.fromStrings("1", "12", "123", null, "1234");
          ColumnVector byteLengthVector = cv.getByteCount();
          ColumnVector expected = ColumnVector.fromBoxedInts(1, 2, 3, null, 4)) {
-      TableTest.assertColumnsAreEqual(expected, byteLengthVector);
+      assertColumnsAreEqual(expected, byteLengthVector);
     }
   }
 
diff --git a/java/src/test/java/ai/rapids/cudf/IfElseTest.java b/java/src/test/java/ai/rapids/cudf/IfElseTest.java
index 86ddcc23416..a078befdf40 100644
--- a/java/src/test/java/ai/rapids/cudf/IfElseTest.java
+++ b/java/src/test/java/ai/rapids/cudf/IfElseTest.java
@@ -25,7 +25,7 @@
 
 import java.util.stream.Stream;
 
-import static ai.rapids.cudf.TableTest.assertColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
 public class IfElseTest extends CudfTestBase {
diff --git a/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java
index dd03c4de69e..2fb8164534b 100644
--- a/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/IntColumnVectorTest.java
@@ -117,8 +117,8 @@ public void testCastToInt() {
          ColumnVector expected1 = ColumnVector.fromInts(4, 3, 8);
          ColumnVector intColumnVector2 = shortColumnVector.asInts();
          ColumnVector expected2 = ColumnVector.fromInts(100)) {
-      TableTest.assertColumnsAreEqual(expected1, intColumnVector1);
-      TableTest.assertColumnsAreEqual(expected2, intColumnVector2);
+      AssertUtils.assertColumnsAreEqual(expected1, intColumnVector1);
+      AssertUtils.assertColumnsAreEqual(expected2, intColumnVector2);
     }
   }
 
diff --git a/java/src/test/java/ai/rapids/cudf/ScalarTest.java b/java/src/test/java/ai/rapids/cudf/ScalarTest.java
index 0889363c2d0..86c340bb321 100644
--- a/java/src/test/java/ai/rapids/cudf/ScalarTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ScalarTest.java
@@ -29,7 +29,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
-import static ai.rapids.cudf.TableTest.assertColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
 import static org.junit.jupiter.api.Assertions.*;
 
 public class ScalarTest extends CudfTestBase {
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index b4247e9bb7c..fa221e19387 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -57,6 +57,11 @@
 import java.util.stream.Collectors;
 
 import static ai.rapids.cudf.ColumnWriterOptions.mapColumn;
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertPartialColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertPartialTablesAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertTableTypes;
+import static ai.rapids.cudf.AssertUtils.assertTablesAreEqual;
 import static ai.rapids.cudf.ParquetWriterOptions.listBuilder;
 import static ai.rapids.cudf.ParquetWriterOptions.structBuilder;
 import static ai.rapids.cudf.Table.TestBuilder;
@@ -94,242 +99,6 @@ public class TableTest extends CudfTestBase {
       "8|118.2|128\n" +
       "9|119.8|129").getBytes(StandardCharsets.UTF_8);
 
-  /**
-   * Checks and asserts that passed in columns match
-   * @param expect The expected result column
-   * @param cv The input column
-   */
-  public static void assertColumnsAreEqual(ColumnView expect, ColumnView cv) {
-    assertColumnsAreEqual(expect, cv, "unnamed");
-  }
-
-  /**
-   * Checks and asserts that passed in columns match
-   * @param expected The expected result column
-   * @param cv The input column
-   * @param colName The name of the column
-   */
-  public static void assertColumnsAreEqual(ColumnView expected, ColumnView cv, String colName) {
-    assertPartialColumnsAreEqual(expected, 0, expected.getRowCount(), cv, colName, true, false);
-  }
-
-  /**
-   * Checks and asserts that passed in host columns match
-   * @param expected The expected result host column
-   * @param cv The input host column
-   * @param colName The name of the host column
-   */
-  public static void assertColumnsAreEqual(HostColumnVector expected, HostColumnVector cv, String colName) {
-    assertPartialColumnsAreEqual(expected, 0, expected.getRowCount(), cv, colName, true, false);
-  }
-
-  /**
-   * Checks and asserts that passed in Struct columns match
-   * @param expected The expected result Struct column
-   * @param cv The input Struct column
-   */
-  public static void assertStructColumnsAreEqual(ColumnView expected, ColumnView cv) {
-    assertPartialStructColumnsAreEqual(expected, 0, expected.getRowCount(), cv, "unnamed", true, false);
-  }
-
-  /**
-   * Checks and asserts that passed in Struct columns match
-   * @param expected The expected result Struct column
-   * @param rowOffset The row number to look from
-   * @param length The number of rows to consider
-   * @param cv The input Struct column
-   * @param colName The name of the column
-   * @param enableNullCountCheck Whether to check for nulls in the Struct column
-   * @param enableNullabilityCheck Whether the table have a validity mask
-   */
-  public static void assertPartialStructColumnsAreEqual(ColumnView expected, long rowOffset, long length,
-      ColumnView cv, String colName, boolean enableNullCountCheck, boolean enableNullabilityCheck) {
-    try (HostColumnVector hostExpected = expected.copyToHost();
-         HostColumnVector hostcv = cv.copyToHost()) {
-      assertPartialColumnsAreEqual(hostExpected, rowOffset, length, hostcv, colName, enableNullCountCheck, enableNullabilityCheck);
-    }
-  }
-
-  /**
-   * Checks and asserts that passed in columns match
-   * @param expected The expected result column
-   * @param cv The input column
-   * @param colName The name of the column
-   * @param enableNullCheck Whether to check for nulls in the column
-   * @param enableNullabilityCheck Whether the table have a validity mask
-   */
-  public static void assertPartialColumnsAreEqual(ColumnView expected, long rowOffset, long length,
-      ColumnView cv, String colName, boolean enableNullCheck, boolean enableNullabilityCheck) {
-    try (HostColumnVector hostExpected = expected.copyToHost();
-         HostColumnVector hostcv = cv.copyToHost()) {
-      assertPartialColumnsAreEqual(hostExpected, rowOffset, length, hostcv, colName, enableNullCheck, enableNullabilityCheck);
-    }
-  }
-
-  /**
-   * Checks and asserts that passed in host columns match
-   * @param expected The expected result host column
-   * @param rowOffset start row index
-   * @param length  number of rows from starting offset
-   * @param cv The input host column
-   * @param colName The name of the host column
-   * @param enableNullCountCheck Whether to check for nulls in the host column
-   */
-  public static void assertPartialColumnsAreEqual(HostColumnVectorCore expected, long rowOffset, long length,
-                                                  HostColumnVectorCore cv, String colName, boolean enableNullCountCheck, boolean enableNullabilityCheck) {
-    assertEquals(expected.getType(), cv.getType(), "Type For Column " + colName);
-    assertEquals(length, cv.getRowCount(), "Row Count For Column " + colName);
-    assertEquals(expected.getNumChildren(), cv.getNumChildren(), "Child Count for Column " + colName);
-    if (enableNullCountCheck) {
-      assertEquals(expected.getNullCount(), cv.getNullCount(), "Null Count For Column " + colName);
-    } else {
-      // TODO add in a proper check when null counts are supported by serializing a partitioned column
-    }
-    if (enableNullabilityCheck) {
-      assertEquals(expected.hasValidityVector(), cv.hasValidityVector(), "Column nullability is different than expected");
-    }
-    DType type = expected.getType();
-    for (long expectedRow = rowOffset; expectedRow < (rowOffset + length); expectedRow++) {
-      long tableRow = expectedRow - rowOffset;
-      assertEquals(expected.isNull(expectedRow), cv.isNull(tableRow),
-          "NULL for Column " + colName + " Row " + tableRow);
-      if (!expected.isNull(expectedRow)) {
-        switch (type.typeId) {
-          case BOOL8: // fall through
-          case INT8: // fall through
-          case UINT8:
-            assertEquals(expected.getByte(expectedRow), cv.getByte(tableRow),
-                "Column " + colName + " Row " + tableRow);
-            break;
-          case INT16: // fall through
-          case UINT16:
-            assertEquals(expected.getShort(expectedRow), cv.getShort(tableRow),
-                "Column " + colName + " Row " + tableRow);
-            break;
-          case INT32: // fall through
-          case UINT32: // fall through
-          case TIMESTAMP_DAYS:
-          case DURATION_DAYS:
-          case DECIMAL32:
-            assertEquals(expected.getInt(expectedRow), cv.getInt(tableRow),
-                "Column " + colName + " Row " + tableRow);
-            break;
-          case INT64: // fall through
-          case UINT64: // fall through
-          case DURATION_MICROSECONDS: // fall through
-          case DURATION_MILLISECONDS: // fall through
-          case DURATION_NANOSECONDS: // fall through
-          case DURATION_SECONDS: // fall through
-          case TIMESTAMP_MICROSECONDS: // fall through
-          case TIMESTAMP_MILLISECONDS: // fall through
-          case TIMESTAMP_NANOSECONDS: // fall through
-          case TIMESTAMP_SECONDS:
-          case DECIMAL64:
-            assertEquals(expected.getLong(expectedRow), cv.getLong(tableRow),
-                "Column " + colName + " Row " + tableRow);
-            break;
-          case DECIMAL128:
-            assertEquals(expected.getBigDecimal(expectedRow), cv.getBigDecimal(tableRow),
-                "Column " + colName + " Row " + tableRow);
-            break;
-          case FLOAT32:
-            assertEqualsWithinPercentage(expected.getFloat(expectedRow), cv.getFloat(tableRow), 0.0001,
-                "Column " + colName + " Row " + tableRow);
-            break;
-          case FLOAT64:
-            assertEqualsWithinPercentage(expected.getDouble(expectedRow), cv.getDouble(tableRow), 0.0001,
-                "Column " + colName + " Row " + tableRow);
-            break;
-          case STRING:
-            assertArrayEquals(expected.getUTF8(expectedRow), cv.getUTF8(tableRow),
-                "Column " + colName + " Row " + tableRow);
-            break;
-          case LIST:
-            HostMemoryBuffer expectedOffsets = expected.getOffsets();
-            HostMemoryBuffer cvOffsets = cv.getOffsets();
-            int expectedChildRows = expectedOffsets.getInt((expectedRow + 1) * 4) -
-                expectedOffsets.getInt(expectedRow * 4);
-            int cvChildRows = cvOffsets.getInt((tableRow + 1) * 4) -
-                cvOffsets.getInt(tableRow * 4);
-            assertEquals(expectedChildRows, cvChildRows, "Child row count for Column " +
-                colName + " Row " + tableRow);
-            break;
-          case STRUCT:
-            // parent column only has validity which was checked above
-            break;
-          default:
-            throw new IllegalArgumentException(type + " is not supported yet");
-        }
-      }
-    }
-
-    if (type.isNestedType()) {
-      switch (type.typeId) {
-        case LIST:
-          int expectedChildRowOffset = 0;
-          int numChildRows = 0;
-          if (length > 0) {
-            HostMemoryBuffer expectedOffsets = expected.getOffsets();
-            HostMemoryBuffer cvOffsets = cv.getOffsets();
-            expectedChildRowOffset = expectedOffsets.getInt(rowOffset * 4);
-            numChildRows = expectedOffsets.getInt((rowOffset + length) * 4) -
-                expectedChildRowOffset;
-          }
-          assertPartialColumnsAreEqual(expected.getNestedChildren().get(0), expectedChildRowOffset,
-              numChildRows, cv.getNestedChildren().get(0), colName + " list child",
-              enableNullCountCheck, enableNullabilityCheck);
-          break;
-        case STRUCT:
-          List<HostColumnVectorCore> expectedChildren = expected.getNestedChildren();
-          List<HostColumnVectorCore> cvChildren = cv.getNestedChildren();
-          for (int i = 0; i < expectedChildren.size(); i++) {
-            HostColumnVectorCore expectedChild = expectedChildren.get(i);
-            HostColumnVectorCore cvChild = cvChildren.get(i);
-            String childName = colName + " child " + i;
-            assertEquals(length, cvChild.getRowCount(), "Row Count for Column " + colName);
-            assertPartialColumnsAreEqual(expectedChild, rowOffset, length, cvChild,
-                colName, enableNullCountCheck, enableNullabilityCheck);
-          }
-          break;
-        default:
-          throw new IllegalArgumentException(type + " is not supported yet");
-      }
-    }
-  }
-
-  /**
-   * Checks and asserts that the two tables from a given rowindex match based on a provided schema.
-   * @param expected the expected result table
-   * @param rowOffset the row number to start checking from
-   * @param length the number of rows to check
-   * @param table the input table to compare against expected
-   * @param enableNullCheck whether to check for nulls or not
-   * @param enableNullabilityCheck whether the table have a validity mask
-   */
-  public static void assertPartialTablesAreEqual(Table expected, long rowOffset, long length, Table table,
-                                                 boolean enableNullCheck, boolean enableNullabilityCheck) {
-    assertEquals(expected.getNumberOfColumns(), table.getNumberOfColumns());
-    assertEquals(length, table.getRowCount(), "ROW COUNT");
-    for (int col = 0; col < expected.getNumberOfColumns(); col++) {
-      ColumnVector expect = expected.getColumn(col);
-      ColumnVector cv = table.getColumn(col);
-      String name = String.valueOf(col);
-      if (rowOffset != 0 || length != expected.getRowCount()) {
-        name = name + " PART " + rowOffset + "-" + (rowOffset + length - 1);
-      }
-      assertPartialColumnsAreEqual(expect, rowOffset, length, cv, name, enableNullCheck, enableNullabilityCheck);
-    }
-  }
-
-  /**
-   * Checks and asserts that the two tables match
-   * @param expected the expected result table
-   * @param table the input table to compare against expected
-   */
-  public static void assertTablesAreEqual(Table expected, Table table) {
-    assertPartialTablesAreEqual(expected, 0, expected.getRowCount(), table, true, false);
-  }
-
   void assertTablesHaveSameValues(HashMap<Object, Integer>[] expectedTable, Table table) {
     assertEquals(expectedTable.length, table.getNumberOfColumns());
     int numCols = table.getNumberOfColumns();
@@ -358,16 +127,6 @@ void assertTablesHaveSameValues(HashMap<Object, Integer>[] expectedTable, Table
     }
   }
 
-  public static void assertTableTypes(DType[] expectedTypes, Table t) {
-    int len = t.getNumberOfColumns();
-    assertEquals(expectedTypes.length, len);
-    for (int i = 0; i < len; i++) {
-      ColumnVector vec = t.getColumn(i);
-      DType type = vec.getType();
-      assertEquals(expectedTypes[i], type, "Types don't match at " + i);
-    }
-  }
-
   @Test
   void testMergeSimple() {
     try (Table table1 = new Table.TestBuilder()
diff --git a/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java
index 8bf1370a0f7..9a929cec98d 100644
--- a/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java
@@ -22,7 +22,7 @@
 
 import java.util.function.Function;
 
-import static ai.rapids.cudf.TableTest.assertColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class TimestampColumnVectorTest extends CudfTestBase {
diff --git a/java/src/test/java/ai/rapids/cudf/UnaryOpTest.java b/java/src/test/java/ai/rapids/cudf/UnaryOpTest.java
index 76970e8bf76..7fcb7cbd85b 100644
--- a/java/src/test/java/ai/rapids/cudf/UnaryOpTest.java
+++ b/java/src/test/java/ai/rapids/cudf/UnaryOpTest.java
@@ -22,7 +22,7 @@
 import ai.rapids.cudf.HostColumnVector.Builder;
 import org.junit.jupiter.api.Test;
 
-import static ai.rapids.cudf.TableTest.assertColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
 
 public class UnaryOpTest extends CudfTestBase {
   private static final Double[] DOUBLES_1 = new Double[]{1.0, 10.0, -100.1, 5.3, 50.0, 100.0, null, Double.NaN, Double.POSITIVE_INFINITY, 1/9.0, Double.NEGATIVE_INFINITY, 500.0, -500.0};
diff --git a/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java b/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java
index 2fb6792b409..e50da0a4d4d 100644
--- a/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ast/CompiledExpressionTest.java
@@ -36,7 +36,7 @@
 import java.util.function.Function;
 import java.util.stream.Stream;
 
-import static ai.rapids.cudf.TableTest.assertColumnsAreEqual;
+import static ai.rapids.cudf.AssertUtils.assertColumnsAreEqual;
 
 public class CompiledExpressionTest extends CudfTestBase {
   @Test

From 554ac817498e64ba1c7ef054873fab7dc658d25c Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Tue, 30 Nov 2021 15:50:56 -0600
Subject: [PATCH 23/28] Load native dependencies when Java ColumnView is loaded
 (#9800)

The Java ColumnView class has native methods but does not ensure the corresponding native libraries that implement those methods are loaded.  This adds a static code block to the ColumnView class to load the native libraries when the ColumnView class is loaded.

Authors:
  - Jason Lowe (https://github.com/jlowe)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Kuhu Shukla (https://github.com/kuhushukla)

URL: https://github.com/rapidsai/cudf/pull/9800
---
 java/src/main/java/ai/rapids/cudf/ColumnView.java | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 729444f460c..6d0d24baf99 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -30,6 +30,10 @@
  */
 public class ColumnView implements AutoCloseable, BinaryOperable {
 
+  static {
+    NativeDepsLoader.loadNativeDeps();
+  }
+
   public static final long UNKNOWN_NULL_COUNT = -1;
 
   protected long viewHandle;

From 20d6723fcb5eaffb6398e5cf6c14de8d774ca917 Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Tue, 30 Nov 2021 15:51:12 -0600
Subject: [PATCH 24/28] Copy Java native dependencies directly into classpath
 (#9787)

Eliminates the intermediate copy of the native libraries for the Java bindings into target/native-deps, instead copying libcudf.so and libcudfjni.so directly into the classpath resources.  This eliminates the need to search target/native-deps at runtime when the native libraries are not in the classpath in the case of running tests before the jar is built.

Authors:
  - Jason Lowe (https://github.com/jlowe)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: https://github.com/rapidsai/cudf/pull/9787
---
 java/pom.xml                                          |  7 ++-----
 .../main/java/ai/rapids/cudf/NativeDepsLoader.java    | 11 ++---------
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/java/pom.xml b/java/pom.xml
index 87d43ec1272..c5a3bc64fad 100755
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -297,9 +297,6 @@
                 <include>LICENSE</include>
               </includes>
             </resource>
-            <resource>
-                <directory>${project.build.directory}/native-deps/</directory>
-            </resource>
         </resources>
         <pluginManagement>
             <plugins>
@@ -499,14 +496,14 @@
                 <executions>
                     <execution>
                         <id>copy-native-libs</id>
-                        <phase>validate</phase>
+                        <phase>generate-resources</phase>
                         <goals>
                             <goal>copy-resources</goal>
                         </goals>
                         <configuration>
                             <overwrite>true</overwrite>
                             <skip>${skipNativeCopy}</skip>
-                            <outputDirectory>${project.build.directory}/native-deps/${os.arch}/${os.name}</outputDirectory>
+                            <outputDirectory>${project.build.outputDirectory}/${os.arch}/${os.name}</outputDirectory>
                             <resources>
                                 <resource>
                                     <directory>${native.build.path}</directory>
diff --git a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
index 8780ecc3aa3..9663fbcafb4 100755
--- a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
+++ b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java
@@ -81,9 +81,7 @@ public static synchronized void loadNativeDeps() {
 
   /**
    * Allows other libraries to reuse the same native deps loading logic. Libraries will be searched
-   * for under ${os.arch}/${os.name}/ in the class path using the class loader for this class. It
-   * will also look for the libraries under ./target/native-deps/${os.arch}/${os.name} to help
-   * facilitate testing while building.
+   * for under ${os.arch}/${os.name}/ in the class path using the class loader for this class.
    * <br/>
    * Because this just loads the libraries and loading the libraries themselves needs to be a
    * singleton operation it is recommended that any library using this provide their own wrapper
@@ -203,12 +201,7 @@ private static File createFile(String os, String arch, String baseName) throws I
     File loc;
     URL resource = loader.getResource(path);
     if (resource == null) {
-      // It looks like we are not running from the jar, or there are issues with the jar
-      File f = new File("./target/native-deps/" + path);
-      if (!f.exists()) {
-        throw new FileNotFoundException("Could not locate native dependency " + path);
-      }
-      resource = f.toURI().toURL();
+      throw new FileNotFoundException("Could not locate native dependency " + path);
     }
     try (InputStream in = resource.openStream()) {
       loc = File.createTempFile(baseName, ".so");

From 991136c78be01d4de20387086a185cfd5a21713b Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com>
Date: Tue, 30 Nov 2021 15:31:53 -0800
Subject: [PATCH 25/28] Add Pearson correlation for sort groupby (python)
 (#9166)

Fixes: https://github.com/rapidsai/cudf/issues/8691

Authors:
  - Sheilah Kirui (https://github.com/skirui-source)
  - Karthikeyan (https://github.com/karthikeyann)
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Michael Wang (https://github.com/isVoid)
  - Mayank Anand (https://github.com/mayankanand007)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/9166
---
 docs/cudf/source/api_docs/groupby.rst     |   1 +
 docs/cudf/source/basics/groupby.rst       |  10 ++
 python/cudf/cudf/_lib/aggregation.pyx     |  55 +++++++++-
 python/cudf/cudf/_lib/cpp/aggregation.pxd |  15 ++-
 python/cudf/cudf/_lib/groupby.pyx         |   4 +-
 python/cudf/cudf/core/groupby/groupby.py  | 121 +++++++++++++++++++++-
 python/cudf/cudf/tests/test_dataframe.py  | 115 ++++++++++++++++++++
 7 files changed, 314 insertions(+), 7 deletions(-)

diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst
index cf08d1d791b..575d7442cdf 100644
--- a/docs/cudf/source/api_docs/groupby.rst
+++ b/docs/cudf/source/api_docs/groupby.rst
@@ -59,6 +59,7 @@ Computations / descriptive stats
    GroupBy.std
    GroupBy.sum
    GroupBy.var
+   GroupBy.corr
    
 The following methods are available in both ``SeriesGroupBy`` and
 ``DataFrameGroupBy`` objects, but may differ slightly, usually in that
diff --git a/docs/cudf/source/basics/groupby.rst b/docs/cudf/source/basics/groupby.rst
index 04c4d42fa2a..f3269768025 100644
--- a/docs/cudf/source/basics/groupby.rst
+++ b/docs/cudf/source/basics/groupby.rst
@@ -127,6 +127,13 @@ Aggregations on groups is supported via the ``agg`` method:
     a
     1   4   1  2.0
     2   5   2  4.5
+    >>> df.groupby("a").corr(method="pearson")
+              b          c
+    a                      
+    1 b  1.000000  0.866025
+      c  0.866025  1.000000
+    2 b  1.000000  1.000000
+      c  1.000000  1.000000
 
 The following table summarizes the available aggregations and the types
 that support them:
@@ -169,6 +176,9 @@ that support them:
    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
    | unique                             | ✅        | ✅         | ✅       | ✅            |        |          |            |           |
    +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+   | corr                               | ✅        |            |          |               |        |          |            | ✅        |
+   +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+
+   
 
 GroupBy apply
 -------------
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 4f703724cef..68f7101b6ee 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -1,6 +1,6 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
-from enum import Enum
+from enum import Enum, IntEnum
 
 import numba
 import numpy as np
@@ -30,6 +30,7 @@ from cudf._lib.types import Interpolation
 
 cimport cudf._lib.cpp.aggregation as libcudf_aggregation
 cimport cudf._lib.cpp.types as libcudf_types
+from cudf._lib.cpp.aggregation cimport underlying_type_t_correlation_type
 
 import cudf
 
@@ -57,6 +58,22 @@ class AggregationKind(Enum):
     UNIQUE = libcudf_aggregation.aggregation.Kind.COLLECT_SET
     PTX = libcudf_aggregation.aggregation.Kind.PTX
     CUDA = libcudf_aggregation.aggregation.Kind.CUDA
+    CORRELATION = libcudf_aggregation.aggregation.Kind.CORRELATION
+
+
+class CorrelationType(IntEnum):
+    PEARSON = (
+        <underlying_type_t_correlation_type>
+        libcudf_aggregation.correlation_type.PEARSON
+    )
+    KENDALL = (
+        <underlying_type_t_correlation_type>
+        libcudf_aggregation.correlation_type.KENDALL
+    )
+    SPEARMAN = (
+        <underlying_type_t_correlation_type>
+        libcudf_aggregation.correlation_type.SPEARMAN
+    )
 
 
 cdef class Aggregation:
@@ -321,6 +338,22 @@ cdef class Aggregation:
             ))
         return agg
 
+    @classmethod
+    def corr(cls, method, libcudf_types.size_type min_periods):
+        cdef Aggregation agg = cls()
+        cdef libcudf_aggregation.correlation_type c_method = (
+            <libcudf_aggregation.correlation_type> (
+                <underlying_type_t_correlation_type> (
+                    CorrelationType[method.upper()]
+                )
+            )
+        )
+        agg.c_obj = move(
+            libcudf_aggregation.make_correlation_aggregation[aggregation](
+                c_method, min_periods
+            ))
+        return agg
+
 cdef class RollingAggregation:
     """A Cython wrapper for rolling window aggregations.
 
@@ -692,6 +725,24 @@ cdef class GroupbyAggregation:
         )
         return agg
 
+    @classmethod
+    def corr(cls, method, libcudf_types.size_type min_periods):
+        cdef GroupbyAggregation agg = cls()
+        cdef libcudf_aggregation.correlation_type c_method = (
+            <libcudf_aggregation.correlation_type> (
+                <underlying_type_t_correlation_type> (
+                    CorrelationType[method.upper()]
+                )
+            )
+        )
+        agg.c_obj = move(
+            libcudf_aggregation.
+            make_correlation_aggregation[groupby_aggregation](
+                c_method, min_periods
+            ))
+        return agg
+
+
 cdef class GroupbyScanAggregation:
     """A Cython wrapper for groupby scan aggregations.
 
diff --git a/python/cudf/cudf/_lib/cpp/aggregation.pxd b/python/cudf/cudf/_lib/cpp/aggregation.pxd
index 13bfa49057c..3982b4fecbb 100644
--- a/python/cudf/cudf/_lib/cpp/aggregation.pxd
+++ b/python/cudf/cudf/_lib/cpp/aggregation.pxd
@@ -1,5 +1,5 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
@@ -11,6 +11,7 @@ from cudf._lib.cpp.types cimport (
     size_type,
 )
 
+ctypedef int32_t underlying_type_t_correlation_type
 
 cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
 
@@ -38,6 +39,8 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
             COLLECT_SET 'cudf::aggregation::COLLECT_SET'
             PTX 'cudf::aggregation::PTX'
             CUDA 'cudf::aggregation::CUDA'
+            CORRELATION 'cudf::aggregation::CORRELATION'
+
         Kind kind
 
     cdef cppclass rolling_aggregation:
@@ -53,6 +56,11 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         CUDA 'cudf::udf_type::CUDA'
         PTX 'cudf::udf_type::PTX'
 
+    ctypedef enum correlation_type:
+        PEARSON 'cudf::correlation_type::PEARSON'
+        KENDALL 'cudf::correlation_type::KENDALL'
+        SPEARMAN 'cudf::correlation_type::SPEARMAN'
+
     cdef unique_ptr[T] make_sum_aggregation[T]() except +
 
     cdef unique_ptr[T] make_product_aggregation[T]() except +
@@ -106,3 +114,6 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         udf_type type,
         string user_defined_aggregator,
         data_type output_type) except +
+
+    cdef unique_ptr[T] make_correlation_aggregation[T](
+        correlation_type type, size_type min_periods) except +
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index 0968d22d465..314542c9549 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 from collections import defaultdict
 
@@ -54,7 +54,7 @@ _CATEGORICAL_AGGS = {"COUNT", "SIZE", "NUNIQUE", "UNIQUE"}
 _STRING_AGGS = {"COUNT", "SIZE", "MAX", "MIN", "NUNIQUE", "NTH", "COLLECT",
                 "UNIQUE"}
 _LIST_AGGS = {"COLLECT"}
-_STRUCT_AGGS = set()
+_STRUCT_AGGS = {"CORRELATION"}
 _INTERVAL_AGGS = set()
 _DECIMAL_AGGS = {"COUNT", "SUM", "ARGMIN", "ARGMAX", "MIN", "MAX", "NUNIQUE",
                  "NTH", "COLLECT"}
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 7f9f61ed3fd..f1d622362e2 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 import collections
+import itertools
 import pickle
 import warnings
 
@@ -13,7 +14,8 @@
 from cudf._typing import DataFrameOrSeries
 from cudf.api.types import is_list_like
 from cudf.core.abc import Serializable
-from cudf.core.column.column import arange
+from cudf.core.column.column import arange, as_column
+from cudf.core.multiindex import MultiIndex
 from cudf.utils.utils import GetAttrGetItemMixin, cached_property
 
 
@@ -69,6 +71,8 @@ def __init__(
         """
         self.obj = obj
         self._as_index = as_index
+        self._by = by
+        self._level = level
         self._sort = sort
         self._dropna = dropna
 
@@ -777,6 +781,121 @@ def median(self):
         """Get the column-wise median of the values in each group."""
         return self.agg("median")
 
+    def corr(self, method="pearson", min_periods=1):
+        """
+        Compute pairwise correlation of columns, excluding NA/null values.
+
+        Parameters
+        ----------
+        method: {"pearson", "kendall", "spearman"} or callable,
+            default "pearson". Currently only the pearson correlation
+            coefficient is supported.
+
+        min_periods: int, optional
+            Minimum number of observations required per pair of columns
+            to have a valid result.
+
+        Returns
+        ----------
+        DataFrame
+            Correlation matrix.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> gdf = cudf.DataFrame({
+        ...             "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+        ...             "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+        ...             "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+        ...             "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1]})
+        >>> gdf
+        id  val1  val2  val3
+        0  a     5     4     4
+        1  a     4     5     5
+        2  a     6     6     6
+        3  b     4     1     1
+        4  b     8     2     2
+        5  b     7     9     9
+        6  c     4     8     8
+        7  c     5     5     5
+        8  c     2     1     1
+        >>> gdf.groupby("id").corr(method="pearson")
+                    val1      val2      val3
+        id
+        a   val1  1.000000  0.500000  0.500000
+            val2  0.500000  1.000000  1.000000
+            val3  0.500000  1.000000  1.000000
+        b   val1  1.000000  0.385727  0.385727
+            val2  0.385727  1.000000  1.000000
+            val3  0.385727  1.000000  1.000000
+        c   val1  1.000000  0.714575  0.714575
+            val2  0.714575  1.000000  1.000000
+            val3  0.714575  1.000000  1.000000
+        """
+
+        if not method.lower() in ("pearson",):
+            raise NotImplementedError(
+                "Only pearson correlation is currently supported"
+            )
+
+        # create expanded dataframe consisting all combinations of the
+        # struct columns-pairs to be correlated
+        # i.e (('col1', 'col1'), ('col1', 'col2'), ('col2', 'col2'))
+        _cols = self.grouping.values.columns.tolist()
+        len_cols = len(_cols)
+
+        new_df_data = {}
+        for x, y in itertools.combinations_with_replacement(_cols, 2):
+            new_df_data[(x, y)] = cudf.DataFrame._from_data(
+                {"x": self.obj._data[x], "y": self.obj._data[y]}
+            ).to_struct()
+        new_gb = cudf.DataFrame._from_data(new_df_data).groupby(
+            by=self.grouping.keys
+        )
+
+        try:
+            gb_corr = new_gb.agg(lambda x: x.corr(method, min_periods))
+        except RuntimeError as e:
+            if "Unsupported groupby reduction type-agg combination" in str(e):
+                raise TypeError(
+                    "Correlation accepts only numerical column-pairs"
+                )
+            raise
+
+        # ensure that column-pair labels are arranged in ascending order
+        cols_list = [
+            (y, x) if i > j else (x, y)
+            for j, y in enumerate(_cols)
+            for i, x in enumerate(_cols)
+        ]
+        cols_split = [
+            cols_list[i : i + len_cols]
+            for i in range(0, len(cols_list), len_cols)
+        ]
+
+        # interleave: combine the correlation results for each column-pair
+        # into a single column
+        res = cudf.DataFrame._from_data(
+            {
+                x: gb_corr.loc[:, i].interleave_columns()
+                for i, x in zip(cols_split, _cols)
+            }
+        )
+
+        # create a multiindex for the groupby correlated dataframe,
+        # to match pandas behavior
+        unsorted_idx = gb_corr.index.repeat(len_cols)
+        idx_sort_order = unsorted_idx._get_sorted_inds()
+        sorted_idx = unsorted_idx._gather(idx_sort_order)
+        if len(gb_corr):
+            # TO-DO: Should the operation below be done on the CPU instead?
+            sorted_idx._data[None] = as_column(
+                cudf.Series(_cols).tile(len(gb_corr.index))
+            )
+        res.index = MultiIndex._from_data(sorted_idx._data)
+
+        return res
+
     def var(self, ddof=1):
         """Compute the column-wise variance of the values in each group.
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index d07caef11d5..d555b5c4033 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8924,3 +8924,118 @@ def test_frame_series_where_other(data):
     expected = gdf.where(gdf["b"] == 1, 0)
     actual = pdf.where(pdf["b"] == 1, 0)
     assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data, gkey",
+    [
+        (
+            {
+                "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+                "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+                "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+                "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+            },
+            ["id", "val1", "val2"],
+        ),
+        (
+            {
+                "id": [0] * 4 + [1] * 3,
+                "a": [10, 3, 4, 2, -3, 9, 10],
+                "b": [10, 23, -4, 2, -3, 9, 19],
+            },
+            ["id", "a"],
+        ),
+        (
+            {
+                "id": ["a", "a", "b", "b", "c", "c"],
+                "val": [None, None, None, None, None, None],
+            },
+            ["id"],
+        ),
+        (
+            {
+                "id": ["a", "a", "b", "b", "c", "c"],
+                "val1": [None, 4, 6, 8, None, 2],
+                "val2": [4, 5, None, 2, 9, None],
+            },
+            ["id"],
+        ),
+        ({"id": [1.0], "val1": [2.0], "val2": [3.0]}, ["id"]),
+    ],
+)
+@pytest.mark.parametrize(
+    "min_per", [0, 1, 2, 3, 4],
+)
+def test_pearson_corr_passing(data, gkey, min_per):
+    gdf = cudf.DataFrame(data)
+    pdf = gdf.to_pandas()
+
+    actual = gdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
+    expected = pdf.groupby(gkey).corr(method="pearson", min_periods=min_per)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("method", ["kendall", "spearman"])
+def test_pearson_corr_unsupported_methods(method):
+    gdf = cudf.DataFrame(
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": [5, 4, 6, 4, 8, 7, 4, 5, 2],
+            "val2": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+            "val3": [4, 5, 6, 1, 2, 9, 8, 5, 1],
+        }
+    )
+
+    with pytest.raises(
+        NotImplementedError,
+        match="Only pearson correlation is currently supported",
+    ):
+        gdf.groupby("id").corr(method)
+
+
+def test_pearson_corr_empty_columns():
+    gdf = cudf.DataFrame(columns=["id", "val1", "val2"])
+    pdf = gdf.to_pandas()
+
+    actual = gdf.groupby("id").corr("pearson")
+    expected = pdf.groupby("id").corr("pearson")
+
+    assert_eq(
+        expected, actual, check_dtype=False, check_index_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": ["v", "n", "k", "l", "m", "i", "y", "r", "w"],
+            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
+        },
+        {
+            "id": ["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+            "val1": [1, 1, 1, 2, 2, 2, 3, 3, 3],
+            "val2": ["d", "d", "d", "e", "e", "e", "f", "f", "f"],
+        },
+    ],
+)
+@pytest.mark.parametrize("gkey", ["id", "val1", "val2"])
+def test_pearson_corr_invalid_column_types(data, gkey):
+    with pytest.raises(
+        TypeError, match="Correlation accepts only numerical column-pairs",
+    ):
+        cudf.DataFrame(data).groupby(gkey).corr("pearson")
+
+
+def test_pearson_corr_multiindex_dataframe():
+    gdf = cudf.DataFrame(
+        {"a": [1, 1, 2, 2], "b": [1, 1, 2, 3], "c": [2, 3, 4, 5]}
+    ).set_index(["a", "b"])
+
+    actual = gdf.groupby(level="a").corr("pearson")
+    expected = gdf.to_pandas().groupby(level="a").corr("pearson")
+
+    assert_eq(expected, actual)

From 1eabcb73b7df235de9985e207e2087af9dfb0e14 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Wed, 1 Dec 2021 17:03:36 +0530
Subject: [PATCH 26/28] Fix some doxygen warnings and add missing documentation
 (#9770)

fix to ignore `__device__ void` return type warnings.
add missing documentation on some functions
Correct doxygen doc style comment fixes

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/9770
---
 cpp/doxygen/Doxyfile                             |  7 ++++---
 cpp/include/cudf/lists/combine.hpp               |  2 +-
 cpp/include/cudf/scalar/scalar_device_view.cuh   | 16 ++++++++++++++++
 .../cudf/strings/convert/convert_lists.hpp       |  2 +-
 cpp/include/cudf/table/row_operators.cuh         |  3 ++-
 cpp/include/cudf_test/base_fixture.hpp           |  3 +++
 cpp/include/cudf_test/column_wrapper.hpp         |  3 +++
 cpp/include/cudf_test/file_utilities.hpp         |  9 +++++++++
 cpp/include/cudf_test/table_utilities.hpp        |  2 +-
 9 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 55e5119040e..6a556bb4b34 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -2089,7 +2089,7 @@ ENABLE_PREPROCESSING   = YES
 # The default value is: NO.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-MACRO_EXPANSION        = NO
+MACRO_EXPANSION        = YES
 
 # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
 # the macro expansion is limited to the macros specified with the PREDEFINED and
@@ -2097,7 +2097,7 @@ MACRO_EXPANSION        = NO
 # The default value is: NO.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-EXPAND_ONLY_PREDEF     = NO
+EXPAND_ONLY_PREDEF     = YES
 
 # If the SEARCH_INCLUDES tag is set to YES, the include files in the
 # INCLUDE_PATH will be searched if a #include is found.
@@ -2129,7 +2129,8 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-PREDEFINED             =
+PREDEFINED              = __device__= \
+                          __host__=
 
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
 # tag can be used to specify a list of macro names that should be expanded. The
diff --git a/cpp/include/cudf/lists/combine.hpp b/cpp/include/cudf/lists/combine.hpp
index a9407ed57ca..61a81e8a745 100644
--- a/cpp/include/cudf/lists/combine.hpp
+++ b/cpp/include/cudf/lists/combine.hpp
@@ -26,7 +26,7 @@ namespace lists {
  * @file
  */
 
-/*
+/**
  * @brief Flag to specify whether a null list element will be ignored from concatenation, or the
  * entire concatenation result involving null list elements will be a null element.
  */
diff --git a/cpp/include/cudf/scalar/scalar_device_view.cuh b/cpp/include/cudf/scalar/scalar_device_view.cuh
index 884b412d3e2..56afa150dfc 100644
--- a/cpp/include/cudf/scalar/scalar_device_view.cuh
+++ b/cpp/include/cudf/scalar/scalar_device_view.cuh
@@ -91,6 +91,12 @@ class fixed_width_scalar_device_view_base : public detail::scalar_device_view_ba
     return *data<T>();
   }
 
+  /**
+   * @brief Stores the value in scalar
+   *
+   * @tparam T The desired type
+   * @param value The value to store in scalar
+   */
   template <typename T>
   __device__ void set_value(T value)
   {
@@ -159,6 +165,11 @@ class fixed_width_scalar_device_view : public detail::fixed_width_scalar_device_
     return fixed_width_scalar_device_view_base::value<T>();
   }
 
+  /**
+   * @brief Stores the value in scalar
+   *
+   * @param value The value to store in scalar
+   */
   __device__ void set_value(T value) { fixed_width_scalar_device_view_base::set_value<T>(value); }
 
   /**
@@ -218,6 +229,11 @@ class fixed_point_scalar_device_view : public detail::scalar_device_view_base {
   {
   }
 
+  /**
+   * @brief Stores the value in scalar
+   *
+   * @param value The value to store in scalar
+   */
   __device__ void set_value(rep_type value) { *_data = value; }
 
   /**
diff --git a/cpp/include/cudf/strings/convert/convert_lists.hpp b/cpp/include/cudf/strings/convert/convert_lists.hpp
index ec22186ea99..279bf44e7fc 100644
--- a/cpp/include/cudf/strings/convert/convert_lists.hpp
+++ b/cpp/include/cudf/strings/convert/convert_lists.hpp
@@ -50,7 +50,7 @@ namespace strings {
  *
  * @param input Lists column to format.
  * @param na_rep Replacment string for null elements.
- * @param separator Strings to use for enclosing list components and separating elements.
+ * @param separators Strings to use for enclosing list components and separating elements.
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh
index c719c564a87..70ccac2f75d 100644
--- a/cpp/include/cudf/table/row_operators.cuh
+++ b/cpp/include/cudf/table/row_operators.cuh
@@ -67,7 +67,7 @@ __device__ weak_ordering compare_elements(Element lhs, Element rhs)
 }
 }  // namespace detail
 
-/*
+/**
  * @brief A specialization for floating-point `Element` type relational comparison
  * to derive the order of the elements with respect to `lhs`. Specialization is to
  * handle `nan` in the order shown below.
@@ -187,6 +187,7 @@ class element_equality_comparator {
    *
    * @param lhs_element_index The index of the first element
    * @param rhs_element_index The index of the second element
+   * @return True if both lhs and rhs element are both nulls and `nulls_are_equal` is true, or equal
    *
    */
   template <typename Element,
diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp
index 5fa07fd5568..e08bf6aa53a 100644
--- a/cpp/include/cudf_test/base_fixture.hpp
+++ b/cpp/include/cudf_test/base_fixture.hpp
@@ -50,6 +50,7 @@ class BaseFixture : public ::testing::Test {
   /**
    * @brief Returns pointer to `device_memory_resource` that should be used for
    * all tests inheriting from this fixture
+   * @return pointer to memory resource
    */
   rmm::mr::device_memory_resource* mr() { return _mr; }
 };
@@ -170,6 +171,7 @@ class UniformRandomGenerator {
 
   /**
    * @brief Returns the next random number.
+   * @return generated random number
    */
   template <typename TL = T, std::enable_if_t<!cudf::is_timestamp<TL>()>* = nullptr>
   T generate()
@@ -211,6 +213,7 @@ class TempDirTestEnvironment : public ::testing::Environment {
   /**
    * @brief Get a temporary filepath to use for the specified filename
    *
+   * @param filename name of the file to be placed in temporary directory.
    * @return std::string The temporary filepath
    */
   std::string get_temp_filepath(std::string filename) { return tmpdir.path() + filename; }
diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp
index f291b04776a..cd2ac9f3ec1 100644
--- a/cpp/include/cudf_test/column_wrapper.hpp
+++ b/cpp/include/cudf_test/column_wrapper.hpp
@@ -79,6 +79,7 @@ class column_wrapper {
 
   /**
    * @brief Releases internal unique_ptr to wrapped column
+   * @return unique_ptr to wrapped column
    */
   std::unique_ptr<cudf::column> release() { return std::move(wrapped); }
 
@@ -1040,11 +1041,13 @@ class dictionary_column_wrapper<std::string> : public detail::column_wrapper {
 
   /**
    * @brief Access keys column view
+   * @return column_view to keys column
    */
   column_view keys() const { return cudf::dictionary_column_view{wrapped->view()}.keys(); }
 
   /**
    * @brief Access indices column view
+   * @return column_view to indices column
    */
   column_view indices() const { return cudf::dictionary_column_view{wrapped->view()}.indices(); }
 
diff --git a/cpp/include/cudf_test/file_utilities.hpp b/cpp/include/cudf_test/file_utilities.hpp
index 90bf0cd99dc..8e242e5a4f3 100644
--- a/cpp/include/cudf_test/file_utilities.hpp
+++ b/cpp/include/cudf_test/file_utilities.hpp
@@ -24,6 +24,10 @@
 
 #include <cudf/utilities/error.hpp>
 
+/**
+ * @brief RAII class for creating a temporary directory.
+ *
+ */
 class temp_directory {
   std::string _path;
 
@@ -49,5 +53,10 @@ class temp_directory {
     nftw(_path.c_str(), rm_files, 10, FTW_DEPTH | FTW_MOUNT | FTW_PHYS);
   }
 
+  /**
+   * @brief Returns the path of the temporary directory
+   *
+   * @return string path of the temporary directory
+   */
   const std::string& path() const { return _path; }
 };
diff --git a/cpp/include/cudf_test/table_utilities.hpp b/cpp/include/cudf_test/table_utilities.hpp
index 831c9f5ac14..f2427c5b8c6 100644
--- a/cpp/include/cudf_test/table_utilities.hpp
+++ b/cpp/include/cudf_test/table_utilities.hpp
@@ -39,7 +39,7 @@ void expect_table_properties_equal(cudf::table_view lhs, cudf::table_view rhs);
  */
 void expect_tables_equal(cudf::table_view lhs, cudf::table_view rhs);
 
-/*
+/**
  * @brief Verifies the equivalency of two tables.
  *
  * Treats null elements as equivalent.  Columns that have nullability but no nulls,

From 1ceb8ab01120ffe463600db14e6893e196cbb991 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 1 Dec 2021 10:10:10 -0500
Subject: [PATCH 27/28] Improve build time of libcudf iterator tests (#9788)

While working on #9641 I noticed that building the iterator gtests takes alot of time in CI. Here is a link to the individual build times for libcudf including the gtests:
https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/prb/job/cudf-gpu-test/CUDA=11.5,GPU_LABEL=driver-495,LINUX_VER=ubuntu20.04,PYTHON=3.8/5173/testReport/(root)/BuildTime/
(you can sort by Duration by clicking on table colum header).

Here is a table of the top 20 compile time offenders as recorded on my local machine. Note that like the CI build output, 6 of the top 20 are just building the `ITERATOR_TEST`

| rank | time (ms) | file |
| ---:| ---:|:--- |
|  1 | 814334 | /cudf.dir/src/search/search.cu.o
|  2 | 755375 | /cudf.dir/src/sort/sort_column.cu.o
|  3 | 686235 | /ITERATOR_TEST.dir/iterator/optional_iterator_test_numeric.cu.o
|  4 | 670587 | /cudf.dir/src/groupby/sort/group_nunique.cu.o
|  5 | 585524 | /cudf.dir/src/reductions/scan/scan_inclusive.cu.o
|  6 | 582677 | /ITERATOR_TEST.dir/iterator/pair_iterator_test_numeric.cu.o
|  7 | 568418 | /ITERATOR_TEST.dir/iterator/scalar_iterator_test.cu.o
|  8 | 563196 | /cudf.dir/src/sort/sort.cu.o
|  9 | 548816 | /ITERATOR_TEST.dir/iterator/value_iterator_test_numeric.cu.o
| 10 | 535315 | /cudf.dir/src/groupby/sort/sort_helper.cu.o
| 11 | 531384 | /cudf.dir/src/sort/is_sorted.cu.o
| 12 | 530382 | /ITERATOR_TEST.dir/iterator/value_iterator_test_chrono.cu.o
| 13 | 525187 | /cudf.dir/src/join/semi_join.cu.o
| 14 | 523726 | /cudf.dir/src/rolling/rolling.cu.o
| 15 | 517909 | /cudf.dir/src/reductions/product.cu.o
| 16 | 513119 | /cudf.dir/src/stream_compaction/distinct_count.cu.o
| 17 | 512569 | /ITERATOR_TEST.dir/iterator/optional_iterator_test_chrono.cu.o
| 18 | 508978 | /cudf.dir/src/reductions/sum_of_squares.cu.o
| 19 | 508460 | /cudf.dir/src/lists/drop_list_duplicates.cu.o
| 20 | 505247 | /cudf.dir/src/reductions/sum.cu.o

I made some simple changes to the iterator code logic to use different thrust functions along with a temporary device vector. This approach improved the compile time of the `ITERATOR_TEST` by about 3x. Here are the results of compiling the above 6 files with the changes in this PR.

| new rank | new time (ms) | file |
| ---:| ---:|:--- |
| 59 | 232691 (2.9x) | optional_iterator_test_numeric.cu.o |
| 26 | 416951 (1.4x) | pair_iterator_test_numeric.cu.o |
| 92 | 165947 (3.4x) | scalar_iterator_test.cu.o |
| 65 | 216364 (2.5x) | value_iterator_test_numeric.cu.o |
| 77 | 186583 (2.8x) | value_iterator_test_chrono.cu.o |
| 111 | 137789 (3.7x) | optional_iterator_test_chrono.cu.o |

Total overall build time improved locally by ~3m (10%) using `ninja -j48 install` on a Dell 5820.

Here are the build time results of a CI build with these changes.
https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/prb/job/cudf-gpu-test/CUDA=11.5,GPU_LABEL=driver-495,LINUX_VER=ubuntu20.04,PYTHON=3.8/5190/testReport/(root)/BuildTime/

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Devavret Makkar (https://github.com/devavret)

URL: https://github.com/rapidsai/cudf/pull/9788
---
 cpp/tests/iterator/iterator_tests.cuh         | 17 +++++++--
 .../optional_iterator_test_numeric.cu         | 37 +++++++++----------
 2 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh
index 4ec347c4bc1..07eb595449c 100644
--- a/cpp/tests/iterator/iterator_tests.cuh
+++ b/cpp/tests/iterator/iterator_tests.cuh
@@ -18,8 +18,8 @@
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include <cudf/detail/iterator.cuh>                             // include iterator header
-#include <cudf/detail/utilities/transform_unary_functions.cuh>  //for meanvar
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/utilities/transform_unary_functions.cuh>  // for meanvar
 #include <cudf/detail/utilities/vector_factories.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -28,6 +28,7 @@
 
 #include <thrust/equal.h>
 #include <thrust/functional.h>
+#include <thrust/logical.h>
 #include <thrust/transform.h>
 
 #include <cub/device/device_reduce.cuh>
@@ -83,7 +84,17 @@ struct IteratorTest : public cudf::test::BaseFixture {
     EXPECT_EQ(thrust::distance(d_in, d_in_last), num_items);
     auto dev_expected = cudf::detail::make_device_uvector_sync(expected);
 
-    bool result = thrust::equal(thrust::device, d_in, d_in_last, dev_expected.begin());
+    // using a temporary vector and calling transform and all_of separately is
+    // equivalent to thrust::equal but compiles ~3x faster
+    auto dev_results = rmm::device_uvector<bool>(num_items, rmm::cuda_stream_default);
+    thrust::transform(thrust::device,
+                      d_in,
+                      d_in_last,
+                      dev_expected.begin(),
+                      dev_results.begin(),
+                      thrust::equal_to{});
+    auto result = thrust::all_of(
+      thrust::device, dev_results.begin(), dev_results.end(), thrust::identity<bool>{});
     EXPECT_TRUE(result) << "thrust test";
   }
 
diff --git a/cpp/tests/iterator/optional_iterator_test_numeric.cu b/cpp/tests/iterator/optional_iterator_test_numeric.cu
index 6d51f4a5c14..a8c135a726f 100644
--- a/cpp/tests/iterator/optional_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/optional_iterator_test_numeric.cu
@@ -50,21 +50,15 @@ struct transformer_optional_meanvar {
   }
 };
 
-struct sum_if_not_null {
-  template <typename T>
-  CUDA_HOST_DEVICE_CALLABLE thrust::optional<T> operator()(const thrust::optional<T>& lhs,
-                                                           const thrust::optional<T>& rhs)
-  {
-    return lhs.value_or(T{0}) + rhs.value_or(T{0});
-  }
+template <typename T>
+struct optional_to_meanvar {
+  CUDA_HOST_DEVICE_CALLABLE T operator()(const thrust::optional<T>& v) { return v.value_or(T{0}); }
 };
 
 // TODO: enable this test also at __CUDACC_DEBUG__
 // This test causes fatal compilation error only at device debug mode.
 // Workaround: exclude this test only at device debug mode.
 #if !defined(__CUDACC_DEBUG__)
-// This test computes `count`, `sum`, `sum_of_squares` at a single reduction call.
-// It would be useful for `var`, `std` operation
 TYPED_TEST(NumericOptionalIteratorTest, mean_var_output)
 {
   using T        = TypeParam;
@@ -104,22 +98,27 @@ TYPED_TEST(NumericOptionalIteratorTest, mean_var_output)
   expected_value.value_squared = std::accumulate(
     replaced_array.begin(), replaced_array.end(), T{0}, [](T acc, T i) { return acc + i * i; });
 
-  // std::cout << "expected <mixed_output> = " << expected_value << std::endl;
-
   // GPU test
   auto it_dev         = d_col->optional_begin<T>(cudf::contains_nulls::YES{});
   auto it_dev_squared = thrust::make_transform_iterator(it_dev, transformer);
-  auto result         = thrust::reduce(it_dev_squared,
-                               it_dev_squared + d_col->size(),
-                               thrust::optional<T_output>{T_output{}},
-                               sum_if_not_null{});
+
+  // this can be computed with a single reduce and without a temporary output vector
+  // but the approach increases the compile time by ~2x
+  auto results = rmm::device_uvector<T_output>(d_col->size(), rmm::cuda_stream_default);
+  thrust::transform(thrust::device,
+                    it_dev_squared,
+                    it_dev_squared + d_col->size(),
+                    results.begin(),
+                    optional_to_meanvar<T_output>{});
+  auto result = thrust::reduce(thrust::device, results.begin(), results.end(), T_output{});
+
   if (not std::is_floating_point<T>()) {
-    EXPECT_EQ(expected_value, *result) << "optional iterator reduction sum";
+    EXPECT_EQ(expected_value, result) << "optional iterator reduction sum";
   } else {
-    EXPECT_NEAR(expected_value.value, result->value, 1e-3) << "optional iterator reduction sum";
-    EXPECT_NEAR(expected_value.value_squared, result->value_squared, 1e-3)
+    EXPECT_NEAR(expected_value.value, result.value, 1e-3) << "optional iterator reduction sum";
+    EXPECT_NEAR(expected_value.value_squared, result.value_squared, 1e-3)
       << "optional iterator reduction sum squared";
-    EXPECT_EQ(expected_value.count, result->count) << "optional iterator reduction count";
+    EXPECT_EQ(expected_value.count, result.count) << "optional iterator reduction count";
   }
 }
 #endif

From 11c3dfef2e7fe6fd67ff93bdf36a47c0a5b2eb37 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 1 Dec 2021 10:28:24 -0600
Subject: [PATCH 28/28] Remove unused masked udf cython/c++ code (#9792)

This PR removes the c++ side of the original masked UDF code introduced in https://github.com/rapidsai/cudf/pull/8213. These kernels had some limitations and are now superseded by the numba-generated versions we moved to in https://github.com/rapidsai/cudf/pull/9174. As far as I can tell, cuDF python was the only thing consuming this API for the short time it has existed. However I am marking this breaking just in case.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Mark Harris (https://github.com/harrism)
  - David Wendt (https://github.com/davidwendt)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/9792
---
 .../Modules/JitifyPreprocessKernels.cmake     |   4 +-
 cpp/include/cudf/transform.hpp                |   6 --
 cpp/src/transform/jit/masked_udf_kernel.cu    |  85 ---------------
 cpp/src/transform/transform.cpp               | 102 ------------------
 python/cudf/cudf/_lib/cpp/transform.pxd       |   6 --
 python/cudf/cudf/_lib/transform.pyx           |  24 -----
 6 files changed, 2 insertions(+), 225 deletions(-)
 delete mode 100644 cpp/src/transform/jit/masked_udf_kernel.cu

diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
index c2ad25760b8..6ab1293ab6f 100644
--- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
+++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
@@ -51,8 +51,8 @@ function(jit_preprocess_files)
 endfunction()
 
 jit_preprocess_files(
-  SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu
-  transform/jit/masked_udf_kernel.cu transform/jit/kernel.cu rolling/jit/kernel.cu
+  SOURCE_DIRECTORY ${CUDF_SOURCE_DIR}/src FILES binaryop/jit/kernel.cu transform/jit/kernel.cu
+  rolling/jit/kernel.cu
 )
 
 add_custom_target(
diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp
index 55e7bc84dbe..45e8ff1310c 100644
--- a/cpp/include/cudf/transform.hpp
+++ b/cpp/include/cudf/transform.hpp
@@ -54,12 +54,6 @@ std::unique_ptr<column> transform(
   bool is_ptx,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
-std::unique_ptr<column> generalized_masked_op(
-  table_view const& data_view,
-  std::string const& binary_udf,
-  data_type output_type,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
 /**
  * @brief Creates a null_mask from `input` by converting `NaN` to null and
  * preserving existing null values and also returns new null_count.
diff --git a/cpp/src/transform/jit/masked_udf_kernel.cu b/cpp/src/transform/jit/masked_udf_kernel.cu
deleted file mode 100644
index 319ad730c53..00000000000
--- a/cpp/src/transform/jit/masked_udf_kernel.cu
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cstddef>
-#include <cstdint>
-#include <transform/jit/operation-udf.hpp>
-
-#include <cudf/types.hpp>
-#include <cudf/utilities/bit.hpp>
-
-#include <cuda/std/climits>
-#include <cuda/std/cstddef>
-#include <cuda/std/limits>
-#include <cuda/std/tuple>
-#include <cuda/std/type_traits>
-
-namespace cudf {
-namespace transformation {
-namespace jit {
-
-template <typename T>
-struct Masked {
-  T value;
-  bool valid;
-};
-
-template <typename TypeIn, typename MaskType, typename OffsetType>
-__device__ auto make_args(cudf::size_type id, TypeIn in_ptr, MaskType in_mask, OffsetType in_offset)
-{
-  bool valid = in_mask ? cudf::bit_is_set(in_mask, in_offset + id) : true;
-  return cuda::std::make_tuple(in_ptr[id], valid);
-}
-
-template <typename InType, typename MaskType, typename OffsetType, typename... Arguments>
-__device__ auto make_args(cudf::size_type id,
-                          InType in_ptr,
-                          MaskType in_mask,      // in practice, always cudf::bitmask_type const*
-                          OffsetType in_offset,  // in practice, always cudf::size_type
-                          Arguments... args)
-{
-  bool valid = in_mask ? cudf::bit_is_set(in_mask, in_offset + id) : true;
-  return cuda::std::tuple_cat(cuda::std::make_tuple(in_ptr[id], valid), make_args(id, args...));
-}
-
-template <typename TypeOut, typename... Arguments>
-__global__ void generic_udf_kernel(cudf::size_type size,
-                                   TypeOut* out_data,
-                                   bool* out_mask,
-                                   Arguments... args)
-{
-  int const tid    = threadIdx.x;
-  int const blkid  = blockIdx.x;
-  int const blksz  = blockDim.x;
-  int const gridsz = gridDim.x;
-  int const start  = tid + blkid * blksz;
-  int const step   = blksz * gridsz;
-
-  Masked<TypeOut> output;
-  for (cudf::size_type i = start; i < size; i += step) {
-    auto func_args = cuda::std::tuple_cat(
-      cuda::std::make_tuple(&output.value),
-      make_args(i, args...)  // passed int64*, bool*, int64, int64*, bool*, int64
-    );
-    cuda::std::apply(GENERIC_OP, func_args);
-    out_data[i] = output.value;
-    out_mask[i] = output.valid;
-  }
-}
-
-}  // namespace jit
-}  // namespace transformation
-}  // namespace cudf
diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp
index 5230b853a79..0cca6699586 100644
--- a/cpp/src/transform/transform.cpp
+++ b/cpp/src/transform/transform.cpp
@@ -19,12 +19,10 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/transform.hpp>
 #include <cudf/null_mask.hpp>
-#include <cudf/table/table_view.hpp>
 #include <cudf/utilities/traits.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <jit_preprocessed_files/transform/jit/kernel.cu.jit.hpp>
-#include <jit_preprocessed_files/transform/jit/masked_udf_kernel.cu.jit.hpp>
 
 #include <jit/cache.hpp>
 #include <jit/parser.hpp>
@@ -65,80 +63,6 @@ void unary_operation(mutable_column_view output,
              cudf::jit::get_data_ptr(input));
 }
 
-std::vector<std::string> make_template_types(column_view outcol_view, table_view const& data_view)
-{
-  std::string mskptr_type =
-    cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id<cudf::bitmask_type>())) + "*";
-  std::string offset_type =
-    cudf::jit::get_type_name(cudf::data_type(cudf::type_to_id<cudf::offset_type>()));
-
-  std::vector<std::string> template_types;
-  template_types.reserve((3 * data_view.num_columns()) + 1);
-
-  template_types.push_back(cudf::jit::get_type_name(outcol_view.type()));
-  for (auto const& col : data_view) {
-    template_types.push_back(cudf::jit::get_type_name(col.type()) + "*");
-    template_types.push_back(mskptr_type);
-    template_types.push_back(offset_type);
-  }
-  return template_types;
-}
-
-void generalized_operation(table_view const& data_view,
-                           std::string const& udf,
-                           data_type output_type,
-                           mutable_column_view outcol_view,
-                           mutable_column_view outmsk_view,
-                           rmm::cuda_stream_view stream,
-                           rmm::mr::device_memory_resource* mr)
-{
-  auto const template_types = make_template_types(outcol_view, data_view);
-
-  std::string generic_kernel_name =
-    jitify2::reflection::Template("cudf::transformation::jit::generic_udf_kernel")
-      .instantiate(template_types);
-
-  std::string generic_cuda_source = cudf::jit::parse_single_function_ptx(
-    udf, "GENERIC_OP", cudf::jit::get_type_name(output_type), {0});
-
-  std::vector<void*> kernel_args;
-  kernel_args.reserve((data_view.num_columns() * 3) + 3);
-
-  cudf::size_type size   = outcol_view.size();
-  const void* outcol_ptr = cudf::jit::get_data_ptr(outcol_view);
-  const void* outmsk_ptr = cudf::jit::get_data_ptr(outmsk_view);
-  kernel_args.insert(kernel_args.begin(), {&size, &outcol_ptr, &outmsk_ptr});
-
-  std::vector<const void*> data_ptrs;
-  std::vector<cudf::bitmask_type const*> mask_ptrs;
-  std::vector<cudf::offset_type> offsets;
-
-  data_ptrs.reserve(data_view.num_columns());
-  mask_ptrs.reserve(data_view.num_columns());
-  offsets.reserve(data_view.num_columns());
-
-  auto const iters = thrust::make_zip_iterator(
-    thrust::make_tuple(data_ptrs.begin(), mask_ptrs.begin(), offsets.begin()));
-
-  std::for_each(iters, iters + data_view.num_columns(), [&](auto const& tuple_vals) {
-    kernel_args.push_back(&thrust::get<0>(tuple_vals));
-    kernel_args.push_back(&thrust::get<1>(tuple_vals));
-    kernel_args.push_back(&thrust::get<2>(tuple_vals));
-  });
-
-  std::transform(data_view.begin(), data_view.end(), iters, [&](column_view const& col) {
-    return thrust::make_tuple(cudf::jit::get_data_ptr(col), col.null_mask(), col.offset());
-  });
-
-  cudf::jit::get_program_cache(*transform_jit_masked_udf_kernel_cu_jit)
-    .get_kernel(generic_kernel_name,
-                {},
-                {{"transform/jit/operation-udf.hpp", generic_cuda_source}},
-                {"-arch=sm_."})
-    ->configure_1d_max_occupancy(0, 0, 0, stream.value())
-    ->launch(kernel_args.data());
-}
-
 }  // namespace jit
 }  // namespace transformation
 
@@ -165,24 +89,6 @@ std::unique_ptr<column> transform(column_view const& input,
   return output;
 }
 
-std::unique_ptr<column> generalized_masked_op(table_view const& data_view,
-                                              std::string const& udf,
-                                              data_type output_type,
-                                              rmm::cuda_stream_view stream,
-                                              rmm::mr::device_memory_resource* mr)
-{
-  std::unique_ptr<column> output = make_fixed_width_column(output_type, data_view.num_rows());
-  std::unique_ptr<column> output_mask =
-    make_fixed_width_column(cudf::data_type{cudf::type_id::BOOL8}, data_view.num_rows());
-
-  transformation::jit::generalized_operation(
-    data_view, udf, output_type, *output, *output_mask, stream, mr);
-
-  auto final_output_mask = cudf::bools_to_mask(*output_mask);
-  output.get()->set_null_mask(std::move(*(final_output_mask.first)));
-  return output;
-}
-
 }  // namespace detail
 
 std::unique_ptr<column> transform(column_view const& input,
@@ -195,12 +101,4 @@ std::unique_ptr<column> transform(column_view const& input,
   return detail::transform(input, unary_udf, output_type, is_ptx, rmm::cuda_stream_default, mr);
 }
 
-std::unique_ptr<column> generalized_masked_op(table_view const& data_view,
-                                              std::string const& udf,
-                                              data_type output_type,
-                                              rmm::mr::device_memory_resource* mr)
-{
-  return detail::generalized_masked_op(data_view, udf, output_type, rmm::cuda_stream_default, mr);
-}
-
 }  // namespace cudf
diff --git a/python/cudf/cudf/_lib/cpp/transform.pxd b/python/cudf/cudf/_lib/cpp/transform.pxd
index 3153427ce3c..590a371ff52 100644
--- a/python/cudf/cudf/_lib/cpp/transform.pxd
+++ b/python/cudf/cudf/_lib/cpp/transform.pxd
@@ -34,12 +34,6 @@ cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
         bool is_ptx
     ) except +
 
-    cdef unique_ptr[column] generalized_masked_op(
-        const table_view& data_view,
-        string udf,
-        data_type output_type,
-    ) except +
-
     cdef pair[unique_ptr[table], unique_ptr[column]] encode(
         table_view input
     ) except +
diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index a0eb7c68183..96d25cb92c9 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -123,30 +123,6 @@ def transform(Column input, op):
     return Column.from_unique_ptr(move(c_output))
 
 
-def masked_udf(incols, op, output_type):
-    cdef table_view data_view = table_view_from_table(
-        incols, ignore_index=True)
-    cdef string c_str = op.encode("UTF-8")
-    cdef type_id c_tid
-    cdef data_type c_dtype
-
-    c_tid = <type_id> (
-        <underlying_type_t_type_id> SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[
-            output_type
-        ]
-    )
-    c_dtype = data_type(c_tid)
-
-    with nogil:
-        c_output = move(libcudf_transform.generalized_masked_op(
-            data_view,
-            c_str,
-            c_dtype,
-        ))
-
-    return Column.from_unique_ptr(move(c_output))
-
-
 def table_encode(input):
     cdef table_view c_input = table_view_from_table(
         input, ignore_index=True)