Skip to content

Commit

Permalink
Merge branch 'branch-22.04' into regex-dispatcher
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Feb 28, 2022
2 parents 85639db + 4c9ef51 commit 3015451
Show file tree
Hide file tree
Showing 132 changed files with 2,476 additions and 1,659 deletions.
4 changes: 4 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,10 @@ if hasArg clean; then
rmdir ${bd} || true
fi
done

# Cleaning up python artifacts
find ${REPODIR}/python/ | grep -E "(__pycache__|\.pyc|\.pyo|\.so$)" | xargs rm -rf

fi


Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

find_package(Threads REQUIRED)

add_library(cudf_datagen STATIC common/generate_input.cpp)
add_library(cudf_datagen STATIC common/generate_input.cpp common/generate_nullmask.cu)
target_compile_features(cudf_datagen PUBLIC cxx_std_17 cuda_std_17)

target_compile_options(
Expand Down Expand Up @@ -276,7 +276,7 @@ ConfigureBench(

# ##################################################################################################
# * json benchmark -------------------------------------------------------------------
ConfigureBench(JSON_BENCH string/json.cpp)
ConfigureBench(JSON_BENCH string/json.cu)

# ##################################################################################################
# * io benchmark ---------------------------------------------------------------------
Expand Down
73 changes: 24 additions & 49 deletions cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,72 +14,42 @@
* limitations under the License.
*/

#include <cudf/column/column_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/transform.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <benchmark/benchmark.h>
#include <fixture/benchmark_fixture.hpp>
#include <fixture/templated_benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <algorithm>
#include <list>
#include <numeric>
#include <random>
#include <memory>
#include <vector>

enum class TreeType {
IMBALANCED_LEFT // All operator expressions have a left child operator expression and a right
// child column reference
};

template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
class AST : public cudf::benchmark {
};

template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
static void BM_ast_transform(benchmark::State& state)
{
const cudf::size_type table_size{(cudf::size_type)state.range(0)};
const cudf::size_type tree_levels = (cudf::size_type)state.range(1);
auto const table_size{static_cast<cudf::size_type>(state.range(0))};
auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};

// Create table data
auto n_cols = reuse_columns ? 1 : tree_levels + 1;
auto column_wrappers = std::vector<cudf::test::fixed_width_column_wrapper<key_type>>(n_cols);
auto columns = std::vector<cudf::column_view>(n_cols);

auto data_iterator = thrust::make_counting_iterator(0);

if constexpr (Nullable) {
auto validities = std::vector<bool>(table_size);
std::random_device rd;
std::mt19937 gen(rd());

std::generate(
validities.begin(), validities.end(), [&]() { return gen() > (0.5 * gen.max()); });
std::generate_n(column_wrappers.begin(), n_cols, [=]() {
return cudf::test::fixed_width_column_wrapper<key_type>(
data_iterator, data_iterator + table_size, validities.begin());
});
} else {
std::generate_n(column_wrappers.begin(), n_cols, [=]() {
return cudf::test::fixed_width_column_wrapper<key_type>(data_iterator,
data_iterator + table_size);
});
}
std::transform(
column_wrappers.begin(), column_wrappers.end(), columns.begin(), [](auto const& col) {
return static_cast<cudf::column_view>(col);
});

cudf::table_view table{columns};
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table =
create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols),
row_count{table_size},
Nullable ? 0.5 : -1.0);
auto table = source_table->view();

// Create column references
auto column_refs = std::vector<cudf::ast::column_reference>();
Expand Down Expand Up @@ -138,10 +108,15 @@ static void CustomRanges(benchmark::internal::Benchmark* b)
}
}

#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
TEMPLATED_BENCHMARK_F(AST, BM_ast_transform, key_type, tree_type, reuse_columns, nullable) \
->Apply(CustomRanges) \
->Unit(benchmark::kMillisecond) \
#define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
BENCHMARK_TEMPLATE_DEFINE_F(AST, name, key_type, tree_type, reuse_columns, nullable) \
(::benchmark::State & st) \
{ \
BM_ast_transform<key_type, tree_type, reuse_columns, nullable>(st); \
} \
BENCHMARK_REGISTER_F(AST, name) \
->Apply(CustomRanges) \
->Unit(benchmark::kMillisecond) \
->UseManualTime();

AST_TRANSFORM_BENCHMARK_DEFINE(
Expand Down
53 changes: 17 additions & 36 deletions cpp/benchmarks/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,23 +14,15 @@
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/binaryop.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/table/table.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <benchmark/benchmark.h>
#include <fixture/benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <algorithm>
#include <numeric>
#include <vector>

// This set of benchmarks is designed to be a comparison for the AST benchmarks
Expand All @@ -47,40 +39,29 @@ class BINARYOP : public cudf::benchmark {
template <typename key_type, TreeType tree_type, bool reuse_columns>
static void BM_binaryop_transform(benchmark::State& state)
{
const cudf::size_type table_size{(cudf::size_type)state.range(0)};
const cudf::size_type tree_levels = (cudf::size_type)state.range(1);
auto const table_size{static_cast<cudf::size_type>(state.range(0))};
auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};

// Create table data
auto n_cols = reuse_columns ? 1 : tree_levels + 1;
auto column_wrappers = std::vector<cudf::test::fixed_width_column_wrapper<key_type>>();
auto columns = std::vector<cudf::column_view>(n_cols);

auto data_iterator = thrust::make_counting_iterator(0);
std::generate_n(std::back_inserter(column_wrappers), n_cols, [=]() {
return cudf::test::fixed_width_column_wrapper<key_type>(data_iterator,
data_iterator + table_size);
});
std::transform(
column_wrappers.begin(), column_wrappers.end(), columns.begin(), [](auto const& col) {
return static_cast<cudf::column_view>(col);
});

cudf::table_view table{columns};
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table = create_sequence_table(
cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{table_size});
cudf::table_view table{*source_table};

// Execute benchmark
for (auto _ : state) {
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
// Execute tree that chains additions like (((a + b) + c) + d)
auto const op = cudf::binary_operator::ADD;
auto result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
auto const op = cudf::binary_operator::ADD;
auto const result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
if (reuse_columns) {
auto result = cudf::binary_operation(columns.at(0), columns.at(0), op, result_data_type);
auto result = cudf::binary_operation(table.column(0), table.column(0), op, result_data_type);
for (cudf::size_type i = 0; i < tree_levels - 1; i++) {
result = cudf::binary_operation(result->view(), columns.at(0), op, result_data_type);
result = cudf::binary_operation(result->view(), table.column(0), op, result_data_type);
}
} else {
auto result = cudf::binary_operation(columns.at(0), columns.at(1), op, result_data_type);
std::for_each(std::next(columns.cbegin(), 2), columns.cend(), [&](auto const& col) {
auto result = cudf::binary_operation(table.column(0), table.column(1), op, result_data_type);
std::for_each(std::next(table.begin(), 2), table.end(), [&](auto const& col) {
result = cudf::binary_operation(result->view(), col, op, result_data_type);
});
}
Expand Down
21 changes: 8 additions & 13 deletions cpp/benchmarks/binaryop/compiled_binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,30 +14,25 @@
* limitations under the License.
*/

#include <fixture/benchmark_fixture.hpp>
#include <fixture/templated_benchmark_fixture.hpp>
#include <synchronization/synchronization.hpp>

#include <cudf_test/column_wrapper.hpp>
#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/binaryop.hpp>

#include <thrust/iterator/counting_iterator.h>

class COMPILED_BINARYOP : public cudf::benchmark {
};

template <typename TypeLhs, typename TypeRhs, typename TypeOut>
void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
{
const cudf::size_type column_size{(cudf::size_type)state.range(0)};
auto const column_size{static_cast<cudf::size_type>(state.range(0))};

auto data_it = thrust::make_counting_iterator(0);
cudf::test::fixed_width_column_wrapper<TypeLhs> input1(data_it, data_it + column_size);
cudf::test::fixed_width_column_wrapper<TypeRhs> input2(data_it, data_it + column_size);
auto const source_table = create_sequence_table(
{cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{column_size});

auto lhs = cudf::column_view(input1);
auto rhs = cudf::column_view(input2);
auto lhs = cudf::column_view(source_table->get_column(0));
auto rhs = cudf::column_view(source_table->get_column(1));
auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());

// Call once for hot cache.
Expand Down
Loading

0 comments on commit 3015451

Please sign in to comment.