Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add device create_sequence_table for benchmarks #10300

Merged
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
916ce00
rename generate_input.cpp to generate_input.cu
karthikeyann Feb 14, 2022
d7f0f29
add create_sequence_table, create_random_null_mask
karthikeyann Feb 14, 2022
bb74cc7
fix includes, seed
karthikeyann Feb 15, 2022
0ea4f60
use cuda::std to include int128
karthikeyann Feb 15, 2022
a25241e
use -std=gnu++17 for generate_input.cu for int128 support
karthikeyann Feb 15, 2022
dfd33f2
go back to using BENCHMARK_TEMPLATE_DEFINE_F
karthikeyann Feb 15, 2022
f9f3eec
use create_sequence_table in ast bench
karthikeyann Feb 15, 2022
81ac53a
use create_sequence_table in binops bench
karthikeyann Feb 15, 2022
6c659d4
use create_sequence_table, thrust::shuffle in scatter bench
karthikeyann Feb 15, 2022
9f5c5ba
use cudf::sequence, create_random_null_mask in search bench
karthikeyann Feb 15, 2022
6758095
update copyright year
karthikeyann Feb 15, 2022
718e269
style fix clang format
karthikeyann Feb 15, 2022
704bb72
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Feb 15, 2022
0ad778e
address review comments
karthikeyann Feb 17, 2022
9dd9244
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into fea-benc…
karthikeyann Feb 17, 2022
bda1f6c
const auto to auto const
karthikeyann Feb 19, 2022
d568d09
address review comments
karthikeyann Feb 19, 2022
993c85d
reduce code duplication
karthikeyann Feb 21, 2022
bdbdf49
Merge branch 'branch-22.04' into fea-benchmark_speedup_2.6
karthikeyann Feb 22, 2022
02ef0d2
Revert "rename generate_input.cpp to generate_input.cu"
karthikeyann Feb 22, 2022
820b417
rename generator functor
karthikeyann Feb 24, 2022
9028a80
simplify create null mask
karthikeyann Feb 24, 2022
4f1f3e8
rename repeat_dtypes to cycle_dtypes
karthikeyann Feb 24, 2022
b31de3a
move cycle_dtypes out for create_sequence_table
karthikeyann Feb 24, 2022
1d4d57a
move cycle_dtypes out of create_random_table
karthikeyann Feb 24, 2022
581e4b8
fix null mask null_count
karthikeyann Feb 24, 2022
fbd5708
address review comments
karthikeyann Feb 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,12 @@

find_package(Threads REQUIRED)

add_library(cudf_datagen STATIC common/generate_input.cpp)
add_library(cudf_datagen STATIC common/generate_input.cu)
set_property(
SOURCE common/generate_input.cu
APPEND
PROPERTY COMPILE_FLAGS "-Xcompiler=-std=gnu++17"
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
)
target_compile_features(cudf_datagen PUBLIC cxx_std_17 cuda_std_17)

target_compile_options(
Expand Down
71 changes: 22 additions & 49 deletions cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,72 +14,40 @@
* limitations under the License.
*/

#include <cudf/column/column_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/transform.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <benchmark/benchmark.h>
#include <fixture/benchmark_fixture.hpp>
#include <fixture/templated_benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <algorithm>
#include <list>
#include <numeric>
#include <random>
#include <memory>
#include <vector>

enum class TreeType {
IMBALANCED_LEFT // All operator expressions have a left child operator expression and a right
// child column reference
};

template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
class AST : public cudf::benchmark {
};

template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
static void BM_ast_transform(benchmark::State& state)
{
const cudf::size_type table_size{(cudf::size_type)state.range(0)};
const cudf::size_type tree_levels = (cudf::size_type)state.range(1);
auto const table_size{static_cast<cudf::size_type>(state.range(0))};
auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};

// Create table data
auto n_cols = reuse_columns ? 1 : tree_levels + 1;
auto column_wrappers = std::vector<cudf::test::fixed_width_column_wrapper<key_type>>(n_cols);
auto columns = std::vector<cudf::column_view>(n_cols);

auto data_iterator = thrust::make_counting_iterator(0);

if constexpr (Nullable) {
auto validities = std::vector<bool>(table_size);
std::random_device rd;
std::mt19937 gen(rd());

std::generate(
validities.begin(), validities.end(), [&]() { return gen() > (0.5 * gen.max()); });
std::generate_n(column_wrappers.begin(), n_cols, [=]() {
return cudf::test::fixed_width_column_wrapper<key_type>(
data_iterator, data_iterator + table_size, validities.begin());
});
} else {
std::generate_n(column_wrappers.begin(), n_cols, [=]() {
return cudf::test::fixed_width_column_wrapper<key_type>(data_iterator,
data_iterator + table_size);
});
}
std::transform(
column_wrappers.begin(), column_wrappers.end(), columns.begin(), [](auto const& col) {
return static_cast<cudf::column_view>(col);
});

cudf::table_view table{columns};
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table = create_sequence_table(
{cudf::type_to_id<key_type>()}, n_cols, row_count{table_size}, Nullable ? 0.5 : -1.0);
auto table = source_table->view();

// Create column references
auto column_refs = std::vector<cudf::ast::column_reference>();
Expand Down Expand Up @@ -138,10 +106,15 @@ static void CustomRanges(benchmark::internal::Benchmark* b)
}
}

#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
TEMPLATED_BENCHMARK_F(AST, BM_ast_transform, key_type, tree_type, reuse_columns, nullable) \
->Apply(CustomRanges) \
->Unit(benchmark::kMillisecond) \
#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(AST, name, key_type, tree_type, reuse_columns, nullable) \
(::benchmark::State & st) \
{ \
BM_ast_transform<key_type, tree_type, reuse_columns, nullable>(st); \
} \
BENCHMARK_REGISTER_F(AST, name) \
->Apply(CustomRanges) \
->Unit(benchmark::kMillisecond) \
->UseManualTime();

AST_TRANSFORM_BENCHMARK_DEFINE(
Expand Down
53 changes: 17 additions & 36 deletions cpp/benchmarks/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,23 +14,15 @@
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/binaryop.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <benchmark/benchmark.h>
#include <fixture/benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <algorithm>
#include <numeric>
#include <vector>

// This set of benchmarks is designed to be a comparison for the AST benchmarks
Expand All @@ -47,40 +39,29 @@ class BINARYOP : public cudf::benchmark {
template <typename key_type, TreeType tree_type, bool reuse_columns>
static void BM_binaryop_transform(benchmark::State& state)
{
const cudf::size_type table_size{(cudf::size_type)state.range(0)};
const cudf::size_type tree_levels = (cudf::size_type)state.range(1);
auto const table_size{static_cast<cudf::size_type>(state.range(0))};
auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};

// Create table data
auto n_cols = reuse_columns ? 1 : tree_levels + 1;
auto column_wrappers = std::vector<cudf::test::fixed_width_column_wrapper<key_type>>();
auto columns = std::vector<cudf::column_view>(n_cols);

auto data_iterator = thrust::make_counting_iterator(0);
std::generate_n(std::back_inserter(column_wrappers), n_cols, [=]() {
return cudf::test::fixed_width_column_wrapper<key_type>(data_iterator,
data_iterator + table_size);
});
std::transform(
column_wrappers.begin(), column_wrappers.end(), columns.begin(), [](auto const& col) {
return static_cast<cudf::column_view>(col);
});

cudf::table_view table{columns};
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table =
create_sequence_table({cudf::type_to_id<key_type>()}, n_cols, row_count{table_size});
cudf::table_view table{*source_table};

// Execute benchmark
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
// Execute tree that chains additions like (((a + b) + c) + d)
auto const op = cudf::binary_operator::ADD;
auto result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
auto const op = cudf::binary_operator::ADD;
auto const result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
if (reuse_columns) {
auto result = cudf::binary_operation(columns.at(0), columns.at(0), op, result_data_type);
auto result = cudf::binary_operation(table.column(0), table.column(0), op, result_data_type);
for (cudf::size_type i = 0; i < tree_levels - 1; i++) {
result = cudf::binary_operation(result->view(), columns.at(0), op, result_data_type);
result = cudf::binary_operation(result->view(), table.column(0), op, result_data_type);
}
} else {
auto result = cudf::binary_operation(columns.at(0), columns.at(1), op, result_data_type);
std::for_each(std::next(columns.cbegin(), 2), columns.cend(), [&](auto const& col) {
auto result = cudf::binary_operation(table.column(0), table.column(1), op, result_data_type);
std::for_each(std::next(table.begin(), 2), table.end(), [&](auto const& col) {
result = cudf::binary_operation(result->view(), col, op, result_data_type);
});
}
Expand Down
21 changes: 8 additions & 13 deletions cpp/benchmarks/binaryop/compiled_binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,25 @@
* limitations under the License.
*/

#include <fixture/benchmark_fixture.hpp>
#include <fixture/templated_benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <cudf_test/column_wrapper.hpp>
#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/binaryop.hpp>

#include <thrust/iterator/counting_iterator.h>

class COMPILED_BINARYOP : public cudf::benchmark {
};

template <typename TypeLhs, typename TypeRhs, typename TypeOut>
void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
{
const cudf::size_type column_size{(cudf::size_type)state.range(0)};
auto const column_size{static_cast<cudf::size_type>(state.range(0))};

auto data_it = thrust::make_counting_iterator(0);
cudf::test::fixed_width_column_wrapper<TypeLhs> input1(data_it, data_it + column_size);
cudf::test::fixed_width_column_wrapper<TypeRhs> input2(data_it, data_it + column_size);
auto const source_table = create_sequence_table(
{cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, 2, row_count{column_size});

auto lhs = cudf::column_view(input1);
auto rhs = cudf::column_view(input2);
auto lhs = cudf::column_view(source_table->get_column(0));
auto rhs = cudf::column_view(source_table->get_column(1));
auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());

// Call once for hot cache.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,18 @@

#include <cudf/column/column.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/filling.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/utilities/bit.hpp>

#include <cudf_test/column_utilities.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <thrust/random.h>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>
Expand Down Expand Up @@ -544,6 +550,44 @@ std::unique_ptr<cudf::column> create_random_column<cudf::list_view>(data_profile
return list_column; // return the top-level column
}

struct valid_generator {
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
thrust::minstd_rand engine;
thrust::uniform_real_distribution<float> dist;
float valid_prob;
valid_generator(thrust::minstd_rand engine, float valid_probability)
: engine(engine), dist{0, 1}, valid_prob{valid_probability}
{
}
valid_generator(unsigned seed, float valid_probability)
: engine(seed), dist{0, 1}, valid_prob{valid_probability}
{
}

__device__ bool operator()(size_t n)
{
engine.discard(n);
return dist(engine) < valid_prob;
}
};

std::pair<rmm::device_buffer, cudf::size_type> create_random_null_mask(cudf::size_type size,
float null_probability,
unsigned seed)
{
if (null_probability < 0.0f) {
return {rmm::device_buffer{}, 0};
} else if (null_probability >= 1.0f or null_probability == 0.0f) {
return {
cudf::create_null_mask(
size, null_probability >= 1.0f ? cudf::mask_state::ALL_VALID : cudf::mask_state::ALL_NULL),
null_probability >= 1.0f ? size : 0};
} else {
return cudf::detail::valid_if(thrust::make_counting_iterator<cudf::size_type>(0),
thrust::make_counting_iterator<cudf::size_type>(size),
valid_generator{seed, 1.0f - null_probability});
}
};

using columns_vector = std::vector<std::unique_ptr<cudf::column>>;

/**
Expand Down Expand Up @@ -642,6 +686,38 @@ std::unique_ptr<cudf::table> create_random_table(std::vector<cudf::type_id> cons
return std::make_unique<cudf::table>(std::move(output_columns));
}

std::unique_ptr<cudf::table> create_sequence_table(std::vector<cudf::type_id> const& dtype_ids,
cudf::size_type num_cols,
row_count num_rows,
float null_probability,
unsigned seed)
{
auto const out_dtype_ids = repeat_dtypes(dtype_ids, num_cols);
auto columns = std::vector<std::unique_ptr<cudf::column>>(num_cols);
auto init = cudf::make_default_constructed_scalar(cudf::data_type{dtype_ids[0]});
if (dtype_ids.size() == 1) {
std::generate_n(columns.begin(), num_cols, [&]() mutable {
auto col = cudf::sequence(num_rows.count, *init);
auto [mask, count] = create_random_null_mask(num_rows.count, null_probability, seed++);
col->set_null_mask(std::move(mask), count);
return col;
});
} else {
std::transform(out_dtype_ids.begin(),
out_dtype_ids.end(),
columns.begin(),
[num_rows, &seed, null_probability](auto dtype) mutable {
auto init = cudf::make_default_constructed_scalar(cudf::data_type{dtype});
auto col = cudf::sequence(num_rows.count, *init);
auto [mask, count] =
create_random_null_mask(num_rows.count, null_probability, seed++);
col->set_null_mask(std::move(mask), count);
return col;
});
}
return std::make_unique<cudf::table>(std::move(columns));
}

std::vector<cudf::type_id> get_type_or_group(int32_t id)
{
// identity transformation when passing a concrete type_id
Expand Down
Loading