Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix quantile gtests coded in namespace cudf::test #12049

Merged
merged 10 commits into from
Nov 7, 2022
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -690,10 +690,10 @@ add_library(cudf::cudf ALIAS cudf)
add_library(
cudftestutil STATIC
tests/io/metadata_utilities.cpp
tests/quantiles/tdigest_utilities.cu
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
tests/utilities/base_fixture.cpp
tests/utilities/column_utilities.cu
tests/utilities/table_utilities.cu
tests/utilities/tdigest_utilities.cu
)

set_target_properties(
Expand Down
36 changes: 17 additions & 19 deletions cpp/include/cudf/tdigest/tdigest_column_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,6 @@
namespace cudf {
namespace tdigest {

/**
* @brief Functor to compute the size of each tdigest of a column.
*
*/
struct tdigest_size {
size_type const* offsets; ///< Offsets of the t-digest column
/**
* @brief Returns size of the each tdigest in the column
*
* @param tdigest_index Index of the tdigest in the column
* @return Size of the tdigest
*/
__device__ size_type operator()(size_type tdigest_index)
{
return offsets[tdigest_index + 1] - offsets[tdigest_index];
}
};

/**
* @brief Given a column_view containing tdigest data, an instance of this class
* provides a wrapper on the compound column for tdigest operations.
Expand Down Expand Up @@ -127,6 +109,22 @@ class tdigest_column_view : private column_view {
*/
[[nodiscard]] column_view weights() const;

/**
* @brief Functor to compute the size of each tdigest of a column.
*/
struct tdigest_size_fn {
size_type const* offsets; ///< Offsets of the t-digest column
/**
* @brief Returns size of the each tdigest in the column
*
* @param tdigest_index Index of the tdigest in the column
* @return Size of the tdigest
*/
__device__ size_type operator()(size_type tdigest_index)
{
return offsets[tdigest_index + 1] - offsets[tdigest_index];
}
};
/**
* @brief Returns an iterator that returns the size of each tdigest
* in the column (each row is 1 digest)
Expand All @@ -136,7 +134,7 @@ class tdigest_column_view : private column_view {
[[nodiscard]] auto size_begin() const
{
return cudf::detail::make_counting_transform_iterator(
0, tdigest_size{centroids().offsets_begin()});
0, tdigest_size_fn{centroids().offsets_begin()});
}

/**
Expand Down
58 changes: 54 additions & 4 deletions cpp/include/cudf_test/tdigest_utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,14 @@

#pragma once

#include <cudf_test/column_wrapper.hpp>

#include <cudf/detail/tdigest/tdigest.hpp>
#include <cudf/detail/unary.hpp>
#include <cudf/groupby.hpp>
#include <cudf/tdigest/tdigest_column_view.cuh>
#include <cudf/utilities/default_stream.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <tests/groupby/groupby_test_util.hpp>

#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <thrust/extrema.h>
Expand Down Expand Up @@ -102,6 +100,58 @@ struct tdigest_gen {
// @endcond
};

template <typename T>
inline T frand()
{
return static_cast<T>(rand()) / static_cast<T>(RAND_MAX);
}

template <typename T>
inline T rand_range(T min, T max)
{
return min + static_cast<T>(frand<T>() * (max - min));
}

inline std::unique_ptr<column> generate_typed_percentile_distribution(
std::vector<double> const& buckets,
std::vector<int> const& sizes,
data_type t,
bool sorted = false)
{
srand(0);

std::vector<double> values;
size_t total_size = std::reduce(sizes.begin(), sizes.end(), 0);
values.reserve(total_size);
for (size_t idx = 0; idx < sizes.size(); idx++) {
double min = idx == 0 ? 0.0f : buckets[idx - 1];
double max = buckets[idx];

for (int v_idx = 0; v_idx < sizes[idx]; v_idx++) {
values.push_back(rand_range(min, max));
}
}

if (sorted) { std::sort(values.begin(), values.end()); }

cudf::test::fixed_width_column_wrapper<double> src(values.begin(), values.end());
return cudf::cast(src, t);
}

// "standardized" means the parameters sent into generate_typed_percentile_distribution. the intent
// is to provide a standardized set of inputs for use with tdigest generation tests and
// percentile_approx tests. std::vector<double>
// buckets{10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}; std::vector<int>
// sizes{50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
inline std::unique_ptr<column> generate_standardized_percentile_distribution(
data_type t = data_type{type_id::FLOAT64}, bool sorted = false)
{
std::vector<double> buckets{10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0, 90.0f, 100.0f};
std::vector<int> b_sizes{
50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
return generate_typed_percentile_distribution(buckets, b_sizes, t, sorted);
}

/**
* @brief Compare a tdigest column against a sampling of expected values.
*/
Expand Down
4 changes: 1 addition & 3 deletions cpp/src/quantiles/tdigest/tdigest_column_view.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,8 +22,6 @@
namespace cudf {
namespace tdigest {

using namespace cudf;

tdigest_column_view::tdigest_column_view(column_view const& col) : column_view(col)
{
// sanity check that this is actually tdigest data
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ ConfigureTest(HASH_MAP_TEST hash_map/map_test.cu)
# ##################################################################################################
# * quantiles tests -------------------------------------------------------------------------------
ConfigureTest(
QUANTILES_TEST quantiles/percentile_approx_test.cu quantiles/quantile_test.cpp
QUANTILES_TEST quantiles/percentile_approx_test.cpp quantiles/quantile_test.cpp
quantiles/quantiles_test.cpp
)

Expand Down
54 changes: 1 addition & 53 deletions cpp/tests/groupby/groupby_test_util.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -131,57 +131,5 @@ inline void test_single_scan(column_view const& keys,
expect_vals, *result.second[0].results[0], debug_output_level::ALL_ERRORS);
}

template <typename T>
inline T frand()
{
return static_cast<T>(rand()) / static_cast<T>(RAND_MAX);
}

template <typename T>
inline T rand_range(T min, T max)
{
return min + static_cast<T>(frand<T>() * (max - min));
}

inline std::unique_ptr<column> generate_typed_percentile_distribution(
std::vector<double> const& buckets,
std::vector<int> const& sizes,
data_type t,
bool sorted = false)
{
srand(0);

std::vector<double> values;
size_t total_size = std::reduce(sizes.begin(), sizes.end(), 0);
values.reserve(total_size);
for (size_t idx = 0; idx < sizes.size(); idx++) {
double min = idx == 0 ? 0.0f : buckets[idx - 1];
double max = buckets[idx];

for (int v_idx = 0; v_idx < sizes[idx]; v_idx++) {
values.push_back(rand_range(min, max));
}
}

if (sorted) { std::sort(values.begin(), values.end()); }

cudf::test::fixed_width_column_wrapper<double> src(values.begin(), values.end());
return cudf::cast(src, t);
}

// "standardized" means the parameters sent into generate_typed_percentile_distribution. the intent
// is to provide a standardized set of inputs for use with tdigest generation tests and
// percentile_approx tests. std::vector<double>
// buckets{10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}; std::vector<int>
// sizes{50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
inline std::unique_ptr<column> generate_standardized_percentile_distribution(
data_type t = data_type{type_id::FLOAT64}, bool sorted = false)
{
std::vector<double> buckets{10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0, 90.0f, 100.0f};
std::vector<int> b_sizes{
50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
return generate_typed_percentile_distribution(buckets, b_sizes, t, sorted);
}

} // namespace test
} // namespace cudf
Loading