Skip to content

Commit

Permalink
Merge branch 'branch-22.04' of github.com:rapidsai/cudf into improvem…
Browse files Browse the repository at this point in the history
…ent/ListOfColumnsRefactor/filling
  • Loading branch information
isVoid committed Mar 11, 2022
2 parents a0ef0af + f29c8d9 commit d617caa
Show file tree
Hide file tree
Showing 58 changed files with 2,878 additions and 934 deletions.
5 changes: 4 additions & 1 deletion codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@
coverage:
status:
project: off
patch: off
patch: on

github_checks:
annotations: true
8 changes: 8 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ add_library(
src/quantiles/quantiles.cu
src/reductions/all.cu
src/reductions/any.cu
src/reductions/collect_ops.cu
src/reductions/max.cu
src/reductions/mean.cu
src/reductions/min.cu
Expand All @@ -373,6 +374,13 @@ add_library(
src/reductions/scan/scan.cpp
src/reductions/scan/scan_exclusive.cu
src/reductions/scan/scan_inclusive.cu
src/reductions/segmented_all.cu
src/reductions/segmented_any.cu
src/reductions/segmented_max.cu
src/reductions/segmented_min.cu
src/reductions/segmented_product.cu
src/reductions/segmented_reductions.cpp
src/reductions/segmented_sum.cu
src/reductions/std.cu
src/reductions/sum.cu
src/reductions/sum_of_squares.cu
Expand Down
7 changes: 4 additions & 3 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ target_compile_options(

target_link_libraries(
cudf_datagen PUBLIC GTest::gmock GTest::gtest GTest::gmock_main GTest::gtest_main
benchmark::benchmark nvbench::nvbench Threads::Threads cudf
benchmark::benchmark nvbench::nvbench Threads::Threads cudf cudftestutil
)

target_include_directories(
Expand Down Expand Up @@ -175,9 +175,10 @@ ConfigureBench(TYPE_DISPATCHER_BENCH type_dispatcher/type_dispatcher.cu)
# ##################################################################################################
# * reduction benchmark ---------------------------------------------------------------------------
ConfigureBench(
REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/reduce.cpp
reduction/scan.cpp reduction/minmax.cpp
REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/minmax.cpp
reduction/reduce.cpp reduction/scan.cpp
)
ConfigureNVBench(REDUCTION_NVBENCH reduction/segment_reduce.cu)

# ##################################################################################################
# * reduction benchmark ---------------------------------------------------------------------------
Expand Down
7 changes: 4 additions & 3 deletions cpp/benchmarks/binaryop/compiled_binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
{
auto const column_size{static_cast<cudf::size_type>(state.range(0))};

auto const source_table = create_sequence_table(
auto const source_table = create_random_table(
{cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{column_size});

auto lhs = cudf::column_view(source_table->get_column(0));
auto rhs = cudf::column_view(source_table->get_column(1));
auto lhs = cudf::column_view(source_table->get_column(0));
auto rhs = cudf::column_view(source_table->get_column(1));

auto output_dtype = cudf::data_type(cudf::type_to_id<TypeOut>());

// Call once for hot cache.
Expand Down
134 changes: 134 additions & 0 deletions cpp/benchmarks/reduction/segment_reduce.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/fixture/rmm_pool_raii.hpp>
#include <nvbench/nvbench.cuh>

#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <cudf/aggregation.hpp>
#include <cudf/column/column.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/reduction.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/device_vector.h>

#include <memory>
#include <type_traits>
#include <vector>

namespace cudf {

bool constexpr is_boolean_output_agg(segmented_reduce_aggregation::Kind kind)
{
return kind == segmented_reduce_aggregation::ALL || kind == segmented_reduce_aggregation::ANY;
}

template <segmented_reduce_aggregation::Kind kind>
std::unique_ptr<segmented_reduce_aggregation> make_simple_aggregation()
{
switch (kind) {
case segmented_reduce_aggregation::SUM:
return make_sum_aggregation<segmented_reduce_aggregation>();
case segmented_reduce_aggregation::PRODUCT:
return make_product_aggregation<segmented_reduce_aggregation>();
case segmented_reduce_aggregation::MIN:
return make_min_aggregation<segmented_reduce_aggregation>();
case segmented_reduce_aggregation::MAX:
return make_max_aggregation<segmented_reduce_aggregation>();
case segmented_reduce_aggregation::ALL:
return make_all_aggregation<segmented_reduce_aggregation>();
case segmented_reduce_aggregation::ANY:
return make_any_aggregation<segmented_reduce_aggregation>();
default: CUDF_FAIL("Unsupported simple segmented aggregation");
}
}

template <typename InputType>
std::pair<std::unique_ptr<column>, thrust::device_vector<size_type>> make_test_data(
nvbench::state& state)
{
auto const column_size{size_type(state.get_int64("column_size"))};
auto const num_segments{size_type(state.get_int64("num_segments"))};

auto segment_length = column_size / num_segments;

test::UniformRandomGenerator<InputType> rand_gen(0, 100);
auto data_it = detail::make_counting_transform_iterator(
0, [&rand_gen](auto i) { return rand_gen.generate(); });

auto offset_it =
detail::make_counting_transform_iterator(0, [&column_size, &segment_length](auto i) {
return column_size < i * segment_length ? column_size : i * segment_length;
});

test::fixed_width_column_wrapper<InputType> input(data_it, data_it + column_size);
std::vector<size_type> h_offsets(offset_it, offset_it + num_segments + 1);
thrust::device_vector<size_type> d_offsets(h_offsets);

return std::make_pair(input.release(), d_offsets);
}

template <typename InputType, typename OutputType, aggregation::Kind kind>
std::enable_if_t<!is_boolean_output_agg(kind) || std::is_same_v<OutputType, bool>, void>
BM_Simple_Segmented_Reduction(nvbench::state& state,
nvbench::type_list<InputType, OutputType, nvbench::enum_type<kind>>)
{
// TODO: to be replaced by nvbench fixture once it's ready
cudf::rmm_pool_raii rmm_pool;

auto const column_size{size_type(state.get_int64("column_size"))};
auto [input, offsets] = make_test_data<InputType>(state);
auto agg = make_simple_aggregation<kind>();

state.add_element_count(column_size);
state.add_global_memory_reads<InputType>(column_size);
state.add_global_memory_writes<OutputType>(column_size);

state.exec(
nvbench::exec_tag::sync,
[input_view = input->view(), offset_span = device_span<size_type>{offsets}, &agg](
nvbench::launch& launch) {
segmented_reduce(
input_view, offset_span, *agg, data_type{type_to_id<OutputType>()}, null_policy::INCLUDE);
});
}

template <typename InputType, typename OutputType, aggregation::Kind kind>
std::enable_if_t<is_boolean_output_agg(kind) && !std::is_same_v<OutputType, bool>, void>
BM_Simple_Segmented_Reduction(nvbench::state& state,
nvbench::type_list<InputType, OutputType, nvbench::enum_type<kind>>)
{
state.skip("Invalid combination of dtype and aggregation type.");
}

using Types = nvbench::type_list<bool, int32_t, float, double>;
// Skip benchmarking MAX/ANY since they are covered by MIN/ALL respectively.
using AggKinds = nvbench::
enum_type_list<aggregation::SUM, aggregation::PRODUCT, aggregation::MIN, aggregation::ALL>;

NVBENCH_BENCH_TYPES(BM_Simple_Segmented_Reduction, NVBENCH_TYPE_AXES(Types, Types, AggKinds))
.set_name("segmented_reduction_simple")
.set_type_axes_names({"InputType", "OutputType", "AggregationKinds"})
.add_int64_axis("column_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_segments", {1'000, 10'000, 100'000});

} // namespace cudf
11 changes: 11 additions & 0 deletions cpp/include/cudf/aggregation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,17 @@ class groupby_scan_aggregation : public virtual aggregation {
groupby_scan_aggregation() {}
};

/**
* @brief Derived class intended for segmented reduction usage.
*/
class segmented_reduce_aggregation : public virtual aggregation {
public:
~segmented_reduce_aggregation() override = default;

protected:
segmented_reduce_aggregation() {}
};

enum class udf_type : bool { CUDA, PTX };
enum class correlation_type : int32_t { PEARSON, KENDALL, SPEARMAN };

Expand Down
15 changes: 9 additions & 6 deletions cpp/include/cudf/detail/aggregation/aggregation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ class aggregation_finalizer { // Declares the interface for the finalizer
*/
class sum_aggregation final : public rolling_aggregation,
public groupby_aggregation,
public groupby_scan_aggregation {
public groupby_scan_aggregation,
public segmented_reduce_aggregation {
public:
sum_aggregation() : aggregation(SUM) {}

Expand All @@ -166,7 +167,7 @@ class sum_aggregation final : public rolling_aggregation,
/**
* @brief Derived class for specifying a product aggregation
*/
class product_aggregation final : public groupby_aggregation {
class product_aggregation final : public groupby_aggregation, public segmented_reduce_aggregation {
public:
product_aggregation() : aggregation(PRODUCT) {}

Expand All @@ -187,7 +188,8 @@ class product_aggregation final : public groupby_aggregation {
*/
class min_aggregation final : public rolling_aggregation,
public groupby_aggregation,
public groupby_scan_aggregation {
public groupby_scan_aggregation,
public segmented_reduce_aggregation {
public:
min_aggregation() : aggregation(MIN) {}

Expand All @@ -208,7 +210,8 @@ class min_aggregation final : public rolling_aggregation,
*/
class max_aggregation final : public rolling_aggregation,
public groupby_aggregation,
public groupby_scan_aggregation {
public groupby_scan_aggregation,
public segmented_reduce_aggregation {
public:
max_aggregation() : aggregation(MAX) {}

Expand Down Expand Up @@ -248,7 +251,7 @@ class count_aggregation final : public rolling_aggregation,
/**
* @brief Derived class for specifying an any aggregation
*/
class any_aggregation final : public aggregation {
class any_aggregation final : public segmented_reduce_aggregation {
public:
any_aggregation() : aggregation(ANY) {}

Expand All @@ -267,7 +270,7 @@ class any_aggregation final : public aggregation {
/**
* @brief Derived class for specifying an all aggregation
*/
class all_aggregation final : public aggregation {
class all_aggregation final : public segmented_reduce_aggregation {
public:
all_aggregation() : aggregation(ALL) {}

Expand Down
Loading

0 comments on commit d617caa

Please sign in to comment.