Merge branch 'branch-24.02' into impr-reduce-pq-cpp-tests

rapidsai · Jan 17, 2024 · 066cf1d · 066cf1d
2 parents cb32503 + c811987
commit 066cf1d
Show file tree

Hide file tree

Showing 185 changed files with 1,401 additions and 1,055 deletions.
diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <benchmark/benchmark.h>
+#include <rmm/cuda_device.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
 #include <rmm/mr/device/owning_wrapper.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
@@ -33,7 +34,8 @@ inline auto make_pool_instance()
 {
   static rmm::mr::cuda_memory_resource cuda_mr;
   static auto pool_mr =
-    std::make_shared<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>>(&cuda_mr);
+    std::make_shared<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>>(
+      &cuda_mr, rmm::percent_of_free_device_memory(50));
   return pool_mr;
 }
 }  // namespace

diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 
 #include <cudf/utilities/error.hpp>
 
+#include <rmm/cuda_device.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/cuda_async_memory_resource.hpp>
 #include <rmm/mr/device/cuda_memory_resource.hpp>
@@ -42,7 +43,8 @@ struct nvbench_base_fixture {
 
   inline auto make_pool()
   {
-    return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda());
+    return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
+      make_cuda(), rmm::percent_of_free_device_memory(50));
   }
 
   inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
@@ -56,7 +58,8 @@ struct nvbench_base_fixture {
 
   inline auto make_managed_pool()
   {
-    return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_managed());
+    return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
+      make_managed(), rmm::percent_of_free_device_memory(50));
   }
 
   inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(

diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@ static void bench_hash(nvbench::state& state)
 
   // collect statistics
   cudf::strings_column_view input(data->get_column(1).view());
-  auto const chars_size = input.chars_size();
+  auto const chars_size = input.chars_size(stream);
   // add memory read from string column
   state.add_global_memory_reads<nvbench::int8_t>(chars_size);
   // add memory read from int64_t column

diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,19 +31,19 @@
 
 #include <cassert>
 
-__global__ static void init_curand(curandState* state, int const nstates)
+CUDF_KERNEL void init_curand(curandState* state, int const nstates)
 {
   int ithread = threadIdx.x + blockIdx.x * blockDim.x;
 
   if (ithread < nstates) { curand_init(1234ULL, ithread, 0, state + ithread); }
 }
 
 template <typename key_type, typename size_type>
-__global__ static void init_build_tbl(key_type* const build_tbl,
-                                      size_type const build_tbl_size,
-                                      int const multiplicity,
-                                      curandState* state,
-                                      int const num_states)
+CUDF_KERNEL void init_build_tbl(key_type* const build_tbl,
+                                size_type const build_tbl_size,
+                                int const multiplicity,
+                                curandState* state,
+                                int const num_states)
 {
   auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x;
   auto const stride    = blockDim.x * gridDim.x;
@@ -61,14 +61,14 @@ __global__ static void init_build_tbl(key_type* const build_tbl,
 }
 
 template <typename key_type, typename size_type>
-__global__ void init_probe_tbl(key_type* const probe_tbl,
-                               size_type const probe_tbl_size,
-                               size_type const build_tbl_size,
-                               key_type const rand_max,
-                               double const selectivity,
-                               int const multiplicity,
-                               curandState* state,
-                               int const num_states)
+CUDF_KERNEL void init_probe_tbl(key_type* const probe_tbl,
+                                size_type const probe_tbl_size,
+                                size_type const build_tbl_size,
+                                key_type const rand_max,
+                                double const selectivity,
+                                int const multiplicity,
+                                curandState* state,
+                                int const num_states)
 {
   auto const start_idx = blockIdx.x * blockDim.x + threadIdx.x;
   auto const stride    = blockDim.x * gridDim.x;

diff --git a/cpp/benchmarks/json/json.cu b/cpp/benchmarks/json/json.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -190,7 +190,7 @@ void BM_case(benchmark::State& state, std::string query_arg)
   int desired_bytes = state.range(1);
   auto input        = build_json_string_column(desired_bytes, num_rows);
   cudf::strings_column_view scv(input->view());
-  size_t num_chars = scv.chars().size();
+  size_t num_chars = scv.chars_size(cudf::get_default_stream());
 
   std::string json_path(query_arg);
 

diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -43,28 +43,28 @@ void bench_case(nvbench::state& state)
   if (encoding == "ascii") {
     data_profile ascii_profile = data_profile_builder().no_validity().distribution(
       cudf::type_id::INT8, distribution_id::UNIFORM, 32, 126);  // nice ASCII range
-    auto input = cudf::strings_column_view(col_view);
-    auto ascii_column =
-      create_random_column(cudf::type_id::INT8, row_count{input.chars_size()}, ascii_profile);
+    auto input        = cudf::strings_column_view(col_view);
+    auto ascii_column = create_random_column(
+      cudf::type_id::INT8, row_count{input.chars_size(cudf::get_default_stream())}, ascii_profile);
     auto ascii_data = ascii_column->view();
 
     col_view = cudf::column_view(col_view.type(),
                                  col_view.size(),
-                                 nullptr,
+                                 ascii_data.data<char>(),
                                  col_view.null_mask(),
                                  col_view.null_count(),
                                  0,
-                                 {input.offsets(), ascii_data});
+                                 {input.offsets()});
 
     ascii_contents = ascii_column->release();
   }
   auto input = cudf::strings_column_view(col_view);
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
 
-  state.add_element_count(input.chars_size(), "chars_size");
-  state.add_global_memory_reads<nvbench::int8_t>(input.chars_size());
-  state.add_global_memory_writes<nvbench::int8_t>(input.chars_size());
+  state.add_element_count(input.chars_size(cudf::get_default_stream()), "chars_size");
+  state.add_global_memory_reads<nvbench::int8_t>(input.chars_size(cudf::get_default_stream()));
+  state.add_global_memory_writes<nvbench::int8_t>(input.chars_size(cudf::get_default_stream()));
 
   state.exec(nvbench::exec_tag::sync,
              [&](nvbench::launch& launch) { auto result = cudf::strings::to_lower(input); });

diff --git a/cpp/benchmarks/string/char_types.cpp b/cpp/benchmarks/string/char_types.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -42,7 +42,7 @@ static void bench_char_types(nvbench::state& state)
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   // gather some throughput statistics as well
-  auto chars_size = input.chars_size();
+  auto chars_size = input.chars_size(cudf::get_default_stream());
   state.add_global_memory_reads<nvbench::int8_t>(chars_size);  // all bytes are read;
   if (api_type == "all") {
     state.add_global_memory_writes<nvbench::int8_t>(num_rows);  // output is a bool8 per row

diff --git a/cpp/benchmarks/string/combine.cpp b/cpp/benchmarks/string/combine.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -44,7 +44,8 @@ static void BM_combine(benchmark::State& state)
     cudf::strings::concatenate(table->view(), separator);
   }
 
-  state.SetBytesProcessed(state.iterations() * (input1.chars_size() + input2.chars_size()));
+  state.SetBytesProcessed(state.iterations() * (input1.chars_size(cudf::get_default_stream()) +
+                                                input2.chars_size(cudf::get_default_stream())));
 }
 
 static void generate_bench_args(benchmark::internal::Benchmark* b)

diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -100,7 +100,7 @@ static void bench_contains(nvbench::state& state)
   auto pattern = patterns[pattern_index];
   auto program = cudf::strings::regex_program::create(pattern);
 
-  auto chars_size = input.chars_size();
+  auto chars_size = input.chars_size(cudf::get_default_stream());
   state.add_element_count(chars_size, "chars_size");
   state.add_global_memory_reads<nvbench::int8_t>(chars_size);
   state.add_global_memory_writes<nvbench::int32_t>(input.size());

diff --git a/cpp/benchmarks/string/convert_datetime.cpp b/cpp/benchmarks/string/convert_datetime.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -48,7 +48,8 @@ void BM_convert_datetime(benchmark::State& state, direction dir)
       cudf::strings::from_timestamps(input, "%Y-%m-%d %H:%M:%S");
   }
 
-  auto const bytes = dir == direction::to ? source_string.chars_size() : n_rows * sizeof(TypeParam);
+  auto const bytes = dir == direction::to ? source_string.chars_size(cudf::get_default_stream())
+                                          : n_rows * sizeof(TypeParam);
   state.SetBytesProcessed(state.iterations() * bytes);
 }
 

diff --git a/cpp/benchmarks/string/convert_fixed_point.cpp b/cpp/benchmarks/string/convert_fixed_point.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -49,8 +49,9 @@ void convert_to_fixed_point(benchmark::State& state)
   }
 
   // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(state.iterations() *
-                          (strings_view.chars_size() + rows * cudf::size_of(dtype)));
+  state.SetBytesProcessed(
+    state.iterations() *
+    (strings_view.chars_size(cudf::get_default_stream()) + rows * cudf::size_of(dtype)));
 }
 
 class StringsFromFixedPoint : public cudf::benchmark {};
@@ -74,7 +75,8 @@ void convert_from_fixed_point(benchmark::State& state)
   // bytes_processed = bytes_input + bytes_output
   state.SetBytesProcessed(
     state.iterations() *
-    (cudf::strings_column_view(results->view()).chars_size() + rows * cudf::size_of(dtype)));
+    (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
+     rows * cudf::size_of(dtype)));
 }
 
 #define CONVERT_TO_FIXED_POINT_BMD(name, fixed_point_type)                  \

diff --git a/cpp/benchmarks/string/convert_numerics.cpp b/cpp/benchmarks/string/convert_numerics.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -63,8 +63,9 @@ void convert_to_number(benchmark::State& state)
   }
 
   // bytes_processed = bytes_input + bytes_output
-  state.SetBytesProcessed(state.iterations() *
-                          (strings_view.chars_size() + rows * sizeof(NumericType)));
+  state.SetBytesProcessed(
+    state.iterations() *
+    (strings_view.chars_size(cudf::get_default_stream()) + rows * sizeof(NumericType)));
 }
 
 class StringsFromNumeric : public cudf::benchmark {};
@@ -90,7 +91,8 @@ void convert_from_number(benchmark::State& state)
   // bytes_processed = bytes_input + bytes_output
   state.SetBytesProcessed(
     state.iterations() *
-    (cudf::strings_column_view(results->view()).chars_size() + rows * sizeof(NumericType)));
+    (cudf::strings_column_view(results->view()).chars_size(cudf::get_default_stream()) +
+     rows * sizeof(NumericType)));
 }
 
 #define CONVERT_TO_NUMERICS_BD(name, type)                               \

diff --git a/cpp/benchmarks/string/copy.cu b/cpp/benchmarks/string/copy.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -64,8 +64,9 @@ static void BM_copy(benchmark::State& state, copy_type ct)
     }
   }
 
-  state.SetBytesProcessed(state.iterations() *
-                          cudf::strings_column_view(source->view().column(0)).chars_size());
+  state.SetBytesProcessed(
+    state.iterations() *
+    cudf::strings_column_view(source->view().column(0)).chars_size(cudf::get_default_stream()));
 }
 
 static void generate_bench_args(benchmark::internal::Benchmark* b)

diff --git a/cpp/benchmarks/string/count.cpp b/cpp/benchmarks/string/count.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -47,7 +47,7 @@ static void bench_count(nvbench::state& state)
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   // gather some throughput statistics as well
-  auto chars_size = input.chars_size();
+  auto chars_size = input.chars_size(cudf::get_default_stream());
   state.add_element_count(chars_size, "chars_size");
   state.add_global_memory_reads<nvbench::int8_t>(chars_size);
   state.add_global_memory_writes<nvbench::int32_t>(input.size());

diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -67,7 +67,7 @@ static void bench_extract(nvbench::state& state)
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   // gather some throughput statistics as well
-  auto chars_size = strings_view.chars_size();
+  auto chars_size = strings_view.chars_size(cudf::get_default_stream());
   state.add_element_count(chars_size, "chars_size");            // number of bytes;
   state.add_global_memory_reads<nvbench::int8_t>(chars_size);   // all bytes are read;
   state.add_global_memory_writes<nvbench::int8_t>(chars_size);  // all bytes are written

diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -67,7 +67,7 @@ static void BM_factory(benchmark::State& state)
   }
 
   cudf::strings_column_view input(column->view());
-  state.SetBytesProcessed(state.iterations() * input.chars_size());
+  state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream()));
 }
 
 static void generate_bench_args(benchmark::internal::Benchmark* b)

diff --git a/cpp/benchmarks/string/filter.cpp b/cpp/benchmarks/string/filter.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -57,7 +57,7 @@ static void BM_filter_chars(benchmark::State& state, FilterAPI api)
     }
   }
 
-  state.SetBytesProcessed(state.iterations() * input.chars_size());
+  state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream()));
 }
 
 static void generate_bench_args(benchmark::internal::Benchmark* b)