Skip to content

Commit

Permalink
Fix decimal benchmark input data generation (#11863)
Browse files Browse the repository at this point in the history
closes #11850
Fixes decimal benchmark input data generation.
Generated data alternated between two values because `device_uvector<T>` has both value and scale. scale is fixed for a column and hence when this data is copied to `cudf::column`, this column values alternated between values and scale.
Fix is to use `device_storage_type_t<T>` instead of `T`.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)

URL: #11863
  • Loading branch information
karthikeyann authored Oct 6, 2022
1 parent e323f0a commit 1ef722d
Showing 1 changed file with 27 additions and 25 deletions.
52 changes: 27 additions & 25 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -247,40 +247,33 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_chrono<T>()>> {
*/
template <typename T>
struct random_value_fn<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
using rep = typename T::rep;
rep const lower_bound;
rep const upper_bound;
distribution_fn<rep> dist;
using DeviceType = cudf::device_storage_type_t<T>;
DeviceType const lower_bound;
DeviceType const upper_bound;
distribution_fn<DeviceType> dist;
std::optional<numeric::scale_type> scale;

random_value_fn(distribution_params<rep> const& desc)
random_value_fn(distribution_params<DeviceType> const& desc)
: lower_bound{desc.lower_bound},
upper_bound{desc.upper_bound},
dist{make_distribution<rep>(desc.id, desc.lower_bound, desc.upper_bound)}
dist{make_distribution<DeviceType>(desc.id, desc.lower_bound, desc.upper_bound)}
{
}

rmm::device_uvector<T> operator()(thrust::minstd_rand& engine, unsigned size)
[[nodiscard]] numeric::scale_type get_scale(thrust::minstd_rand& engine)
{
if (not scale.has_value()) {
int const max_scale = std::numeric_limits<rep>::digits10;
constexpr int max_scale = std::numeric_limits<DeviceType>::digits10;
std::uniform_int_distribution<int> scale_dist{-max_scale, max_scale};
std::mt19937 engine_scale(engine());
scale = numeric::scale_type{scale_dist(engine_scale)};
}
auto const ints = dist(engine, size);
rmm::device_uvector<T> result(size, cudf::default_stream_value);
// Clamp the generated random value to the specified range
thrust::transform(thrust::device,
ints.begin(),
ints.end(),
result.begin(),
[scale = *(this->scale),
upper_bound = this->upper_bound,
lower_bound = this->lower_bound] __device__(auto int_value) {
return T{std::clamp(int_value, lower_bound, upper_bound), scale};
});
return result;
return scale.value_or(numeric::scale_type{0});
}

rmm::device_uvector<DeviceType> operator()(thrust::minstd_rand& engine, unsigned size)
{
return dist(engine, size);
}
};

Expand Down Expand Up @@ -398,9 +391,17 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
distribution_params<bool>{1. - profile.get_null_probability().value_or(0)});
auto value_dist = random_value_fn<T>{profile.get_distribution_params<T>()};

using DeviceType = cudf::device_storage_type_t<T>;
cudf::data_type const dtype = [&]() {
if constexpr (cudf::is_fixed_point<T>())
return cudf::data_type{cudf::type_to_id<T>(), value_dist.get_scale(engine)};
else
return cudf::data_type{cudf::type_to_id<T>()};
}();

// Distribution for picking elements from the array of samples
auto const avg_run_len = profile.get_avg_run_length();
rmm::device_uvector<T> data(0, cudf::default_stream_value);
rmm::device_uvector<DeviceType> data(0, cudf::default_stream_value);
rmm::device_uvector<bool> null_mask(0, cudf::default_stream_value);

if (profile.get_cardinality() == 0 and avg_run_len == 1) {
Expand All @@ -412,11 +413,12 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
: profile_cardinality;
}();
rmm::device_uvector<bool> samples_null_mask = valid_dist(engine, cardinality);
rmm::device_uvector<T> samples = value_dist(engine, cardinality);
rmm::device_uvector<DeviceType> samples = value_dist(engine, cardinality);

// generate n samples and gather.
auto const sample_indices =
sample_indices_with_run_length(avg_run_len, cardinality, num_rows, engine);
data = rmm::device_uvector<T>(num_rows, cudf::default_stream_value);
data = rmm::device_uvector<DeviceType>(num_rows, cudf::default_stream_value);
null_mask = rmm::device_uvector<bool>(num_rows, cudf::default_stream_value);
thrust::gather(
thrust::device, sample_indices.begin(), sample_indices.end(), samples.begin(), data.begin());
Expand All @@ -431,7 +433,7 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
cudf::detail::valid_if(null_mask.begin(), null_mask.end(), thrust::identity<bool>{});

return std::make_unique<cudf::column>(
cudf::data_type{cudf::type_to_id<T>()},
dtype,
num_rows,
data.release(),
profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});
Expand Down

0 comments on commit 1ef722d

Please sign in to comment.