Add benchmark for strings/integers convert APIs (#8402)

Reference #5698 This PR adds benchmark tests for `cudf::strings::to_integers` and `cudf::strings::from_integers`. The current `convert_floats_benchmark.cpp` was refactored to work with floats and integer types and changed to `convert_numerics_benchmark.cpp` Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Mike Wilson (https://github.com/hyperbolic2346) URL: #8402
rapidsai · Jun 2, 2021 · 1ba59bd · 1ba59bd
1 parent f31a094
commit 1ba59bd
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 121 deletions.
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -211,7 +211,7 @@ ConfigureBench(STRINGS_BENCH
   string/contains_benchmark.cpp
   string/convert_datetime_benchmark.cpp
   string/convert_durations_benchmark.cpp
-  string/convert_floats_benchmark.cpp
+  string/convert_numerics_benchmark.cpp
   string/copy_benchmark.cpp
   string/extract_benchmark.cpp
   string/factory_benchmark.cu

diff --git a/cpp/benchmarks/string/convert_floats_benchmark.cpp b/cpp/benchmarks/string/convert_floats_benchmark.cpp
diff --git a/cpp/benchmarks/string/convert_numerics_benchmark.cpp b/cpp/benchmarks/string/convert_numerics_benchmark.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fixture/benchmark_fixture.hpp>
+#include <synchronization/synchronization.hpp>
+
+#include <benchmark/benchmark.h>
+#include <benchmarks/common/generate_benchmark_input.hpp>
+
+#include <cudf/strings/convert/convert_floats.hpp>
+#include <cudf/strings/convert/convert_integers.hpp>
+#include <cudf/types.hpp>
+
+namespace {
+
+template <typename NumericType>
+std::unique_ptr<cudf::column> get_numerics_column(cudf::size_type rows)
+{
+  std::unique_ptr<cudf::table> result =
+    create_random_table({cudf::type_to_id<NumericType>()}, 1, row_count{rows});
+  return std::move(result->release().front());
+}
+
+template <typename NumericType>
+std::unique_ptr<cudf::column> get_strings_column(cudf::size_type rows)
+{
+  auto const numerics_col = get_numerics_column<NumericType>(rows);
+  if constexpr (std::is_floating_point_v<NumericType>) {
+    return cudf::strings::from_floats(numerics_col->view());
+  } else {
+    return cudf::strings::from_integers(numerics_col->view());
+  }
+}
+}  // anonymous namespace
+
+class StringsToNumeric : public cudf::benchmark {
+};
+
+template <typename NumericType>
+void convert_to_number(benchmark::State& state)
+{
+  auto const rows = static_cast<cudf::size_type>(state.range(0));
+
+  auto const strings_col  = get_strings_column<NumericType>(rows);
+  auto const strings_view = cudf::strings_column_view(strings_col->view());
+  auto const col_type     = cudf::type_to_id<NumericType>();
+
+  for (auto _ : state) {
+    cuda_event_timer raii(state, true);
+    if constexpr (std::is_floating_point_v<NumericType>) {
+      cudf::strings::to_floats(strings_view, cudf::data_type{col_type});
+    } else {
+      cudf::strings::to_integers(strings_view, cudf::data_type{col_type});
+    }
+  }
+
+  // bytes_processed = bytes_input + bytes_output
+  state.SetBytesProcessed(state.iterations() *
+                          (strings_view.chars_size() + rows * sizeof(NumericType)));
+}
+
+class StringsFromNumeric : public cudf::benchmark {
+};
+
+template <typename NumericType>
+void convert_from_number(benchmark::State& state)
+{
+  auto const rows = static_cast<cudf::size_type>(state.range(0));
+
+  auto const numerics_col  = get_numerics_column<NumericType>(rows);
+  auto const numerics_view = numerics_col->view();
+
+  std::unique_ptr<cudf::column> results = nullptr;
+
+  for (auto _ : state) {
+    cuda_event_timer raii(state, true);
+    if constexpr (std::is_floating_point_v<NumericType>)
+      results = cudf::strings::from_floats(numerics_view);
+    else
+      results = cudf::strings::from_integers(numerics_view);
+  }
+
+  // bytes_processed = bytes_input + bytes_output
+  state.SetBytesProcessed(
+    state.iterations() *
+    (cudf::strings_column_view(results->view()).chars_size() + rows * sizeof(NumericType)));
+}
+
+#define CONVERT_TO_NUMERICS_BD(name, type)                               \
+  BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \
+  {                                                                      \
+    convert_to_number<type>(state);                                      \
+  }                                                                      \
+  BENCHMARK_REGISTER_F(StringsToNumeric, name)                           \
+    ->RangeMultiplier(4)                                                 \
+    ->Range(1 << 10, 1 << 17)                                            \
+    ->UseManualTime()                                                    \
+    ->Unit(benchmark::kMicrosecond);
+
+#define CONVERT_FROM_NUMERICS_BD(name, type)                               \
+  BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \
+  {                                                                        \
+    convert_from_number<type>(state);                                      \
+  }                                                                        \
+  BENCHMARK_REGISTER_F(StringsFromNumeric, name)                           \
+    ->RangeMultiplier(4)                                                   \
+    ->Range(1 << 10, 1 << 17)                                              \
+    ->UseManualTime()                                                      \
+    ->Unit(benchmark::kMicrosecond);
+
+CONVERT_TO_NUMERICS_BD(strings_to_float32, float);
+CONVERT_TO_NUMERICS_BD(strings_to_float64, double);
+CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t);
+CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t);
+CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t);
+CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t);
+
+CONVERT_FROM_NUMERICS_BD(strings_from_float32, float);
+CONVERT_FROM_NUMERICS_BD(strings_from_float64, double);
+CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t);
+CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t);
+CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t);
+CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t);