Fix quantile gtests coded in namespace cudf::test (#12049)

Fixes `cpp/tests/quantiles` gtests source files coded in namespace `cudf::test` The `tdigest_utilities.cu` was moved to `cpp/tests/utilities` since it is used by quantiles, groupby, reductions tests. Also, the header for the functions defined in this source file is in `cpp/include/cudf_tests/`. The `cpp/include/cudf_tests/tdigest_utilities.cuh` was also including a source file header from `cudf/tests/groupby` which seemed odd and was corrected by moving the code it needed directly into the `tdigest_utilities.cuh` header. These functions were used by quantiles, groupby, reductions, etc so it made sense for them to be moved into this utility header. Simple reworking some of the code in `percentile_approx_test.cu` allowed it to become a `.cpp` file as well. Also made some minor changes to the `tdigest_column_view` class to isolate a functor inside the class instead of the namespace scope. No function or test has changed just the source code reworked or moved around. Reference #11734 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Nghia Truong (https://github.com/ttnghia) - Vyas Ramasubramani (https://github.com/vyasr) URL: #12049
rapidsai · Nov 7, 2022 · f9a2512 · f9a2512
1 parent 17b6b2e
commit f9a2512
Show file tree

Hide file tree

Showing 10 changed files with 323 additions and 343 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -690,10 +690,10 @@ add_library(cudf::cudf ALIAS cudf)
 add_library(
   cudftestutil STATIC
   tests/io/metadata_utilities.cpp
-  tests/quantiles/tdigest_utilities.cu
   tests/utilities/base_fixture.cpp
   tests/utilities/column_utilities.cu
   tests/utilities/table_utilities.cu
+  tests/utilities/tdigest_utilities.cu
 )
 
 set_target_properties(

diff --git a/cpp/include/cudf/tdigest/tdigest_column_view.cuh b/cpp/include/cudf/tdigest/tdigest_column_view.cuh
@@ -22,24 +22,6 @@
 namespace cudf {
 namespace tdigest {
 
-/**
- * @brief Functor to compute the size of each tdigest of a column.
- *
- */
-struct tdigest_size {
-  size_type const* offsets;  ///< Offsets of the t-digest column
-  /**
-   * @brief Returns size of the each tdigest in the column
-   *
-   * @param tdigest_index Index of the tdigest in the column
-   * @return Size of the tdigest
-   */
-  __device__ size_type operator()(size_type tdigest_index)
-  {
-    return offsets[tdigest_index + 1] - offsets[tdigest_index];
-  }
-};
-
 /**
  * @brief Given a column_view containing tdigest data, an instance of this class
  * provides a wrapper on the compound column for tdigest operations.
@@ -127,6 +109,22 @@ class tdigest_column_view : private column_view {
    */
   [[nodiscard]] column_view weights() const;
 
+  /**
+   * @brief Functor to compute the size of each tdigest of a column.
+   */
+  struct tdigest_size_fn {
+    size_type const* offsets;  ///< Offsets of the t-digest column
+    /**
+     * @brief Returns size of the each tdigest in the column
+     *
+     * @param tdigest_index Index of the tdigest in the column
+     * @return Size of the tdigest
+     */
+    __device__ size_type operator()(size_type tdigest_index)
+    {
+      return offsets[tdigest_index + 1] - offsets[tdigest_index];
+    }
+  };
   /**
    * @brief Returns an iterator that returns the size of each tdigest
    * in the column (each row is 1 digest)
@@ -136,7 +134,7 @@ class tdigest_column_view : private column_view {
   [[nodiscard]] auto size_begin() const
   {
     return cudf::detail::make_counting_transform_iterator(
-      0, tdigest_size{centroids().offsets_begin()});
+      0, tdigest_size_fn{centroids().offsets_begin()});
   }
 
   /**

diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh
@@ -16,16 +16,14 @@
 
 #pragma once
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/detail/tdigest/tdigest.hpp>
 #include <cudf/detail/unary.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/tdigest/tdigest_column_view.cuh>
 #include <cudf/utilities/default_stream.hpp>
 
-#include <cudf_test/column_wrapper.hpp>
-
-#include <tests/groupby/groupby_test_util.hpp>
-
 #include <thrust/device_vector.h>
 #include <thrust/execution_policy.h>
 #include <thrust/extrema.h>
@@ -102,6 +100,58 @@ struct tdigest_gen {
   // @endcond
 };
 
+template <typename T>
+inline T frand()
+{
+  return static_cast<T>(rand()) / static_cast<T>(RAND_MAX);
+}
+
+template <typename T>
+inline T rand_range(T min, T max)
+{
+  return min + static_cast<T>(frand<T>() * (max - min));
+}
+
+inline std::unique_ptr<column> generate_typed_percentile_distribution(
+  std::vector<double> const& buckets,
+  std::vector<int> const& sizes,
+  data_type t,
+  bool sorted = false)
+{
+  srand(0);
+
+  std::vector<double> values;
+  size_t total_size = std::reduce(sizes.begin(), sizes.end(), 0);
+  values.reserve(total_size);
+  for (size_t idx = 0; idx < sizes.size(); idx++) {
+    double min = idx == 0 ? 0.0f : buckets[idx - 1];
+    double max = buckets[idx];
+
+    for (int v_idx = 0; v_idx < sizes[idx]; v_idx++) {
+      values.push_back(rand_range(min, max));
+    }
+  }
+
+  if (sorted) { std::sort(values.begin(), values.end()); }
+
+  cudf::test::fixed_width_column_wrapper<double> src(values.begin(), values.end());
+  return cudf::cast(src, t);
+}
+
+// "standardized" means the parameters sent into generate_typed_percentile_distribution. the intent
+// is to provide a standardized set of inputs for use with tdigest generation tests and
+// percentile_approx tests. std::vector<double>
+// buckets{10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}; std::vector<int>
+// sizes{50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
+inline std::unique_ptr<column> generate_standardized_percentile_distribution(
+  data_type t = data_type{type_id::FLOAT64}, bool sorted = false)
+{
+  std::vector<double> buckets{10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0, 90.0f, 100.0f};
+  std::vector<int> b_sizes{
+    50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
+  return generate_typed_percentile_distribution(buckets, b_sizes, t, sorted);
+}
+
 /**
  * @brief Compare a tdigest column against a sampling of expected values.
  */

diff --git a/cpp/src/quantiles/tdigest/tdigest_column_view.cpp b/cpp/src/quantiles/tdigest/tdigest_column_view.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,6 @@
 namespace cudf {
 namespace tdigest {
 
-using namespace cudf;
-
 tdigest_column_view::tdigest_column_view(column_view const& col) : column_view(col)
 {
   // sanity check that this is actually tdigest data

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
@@ -146,7 +146,7 @@ ConfigureTest(HASH_MAP_TEST hash_map/map_test.cu)
 # ##################################################################################################
 # * quantiles tests -------------------------------------------------------------------------------
 ConfigureTest(
-  QUANTILES_TEST quantiles/percentile_approx_test.cu quantiles/quantile_test.cpp
+  QUANTILES_TEST quantiles/percentile_approx_test.cpp quantiles/quantile_test.cpp
   quantiles/quantiles_test.cpp
 )
 

diff --git a/cpp/tests/groupby/groupby_test_util.hpp b/cpp/tests/groupby/groupby_test_util.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -131,57 +131,5 @@ inline void test_single_scan(column_view const& keys,
     expect_vals, *result.second[0].results[0], debug_output_level::ALL_ERRORS);
 }
 
-template <typename T>
-inline T frand()
-{
-  return static_cast<T>(rand()) / static_cast<T>(RAND_MAX);
-}
-
-template <typename T>
-inline T rand_range(T min, T max)
-{
-  return min + static_cast<T>(frand<T>() * (max - min));
-}
-
-inline std::unique_ptr<column> generate_typed_percentile_distribution(
-  std::vector<double> const& buckets,
-  std::vector<int> const& sizes,
-  data_type t,
-  bool sorted = false)
-{
-  srand(0);
-
-  std::vector<double> values;
-  size_t total_size = std::reduce(sizes.begin(), sizes.end(), 0);
-  values.reserve(total_size);
-  for (size_t idx = 0; idx < sizes.size(); idx++) {
-    double min = idx == 0 ? 0.0f : buckets[idx - 1];
-    double max = buckets[idx];
-
-    for (int v_idx = 0; v_idx < sizes[idx]; v_idx++) {
-      values.push_back(rand_range(min, max));
-    }
-  }
-
-  if (sorted) { std::sort(values.begin(), values.end()); }
-
-  cudf::test::fixed_width_column_wrapper<double> src(values.begin(), values.end());
-  return cudf::cast(src, t);
-}
-
-// "standardized" means the parameters sent into generate_typed_percentile_distribution. the intent
-// is to provide a standardized set of inputs for use with tdigest generation tests and
-// percentile_approx tests. std::vector<double>
-// buckets{10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}; std::vector<int>
-// sizes{50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
-inline std::unique_ptr<column> generate_standardized_percentile_distribution(
-  data_type t = data_type{type_id::FLOAT64}, bool sorted = false)
-{
-  std::vector<double> buckets{10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0, 90.0f, 100.0f};
-  std::vector<int> b_sizes{
-    50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
-  return generate_typed_percentile_distribution(buckets, b_sizes, t, sorted);
-}
-
 }  // namespace test
 }  // namespace cudf