Skip to content

Commit

Permalink
Initial pass for reduce sums
Browse files Browse the repository at this point in the history
  • Loading branch information
isVoid committed Nov 10, 2021
1 parent a9a7b2c commit 7ccdeb8
Show file tree
Hide file tree
Showing 9 changed files with 535 additions and 12 deletions.
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,12 +360,14 @@ add_library(
src/reductions/nth_element.cu
src/reductions/product.cu
src/reductions/reductions.cpp
src/reductions/segmented_reductions.cpp
src/reductions/scan/rank_scan.cu
src/reductions/scan/scan.cpp
src/reductions/scan/scan_exclusive.cu
src/reductions/scan/scan_inclusive.cu
src/reductions/std.cu
src/reductions/sum.cu
src/reductions/segmented_sum.cu
src/reductions/sum_of_squares.cu
src/reductions/var.cu
src/replace/clamp.cu
Expand Down
27 changes: 16 additions & 11 deletions cpp/include/cudf/detail/reduction.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,6 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
*
* @param[in] d_in the begin iterator to input
* @param[in] d_offset the begin iterator to offset
* @param[in] num_items the number of items
* @param[in] num_segments the number of segments
* @param[in] op the reduction operator
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
Expand All @@ -244,15 +243,13 @@ template <typename Op,
not cudf::is_fixed_point<OutputType>()>* = nullptr>
std::unique_ptr<column> segmented_reduce(InputIterator d_in,
OffsetIterator d_offset,
cudf::size_type num_items,
cudf::size_type num_segments,
op::simple_op<Op> sop,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
using InputType = OutputType;
auto binary_op = sop.get_binary_op();
auto identity = sop.template get_identity<OutputType>();
auto binary_op = sop.get_binary_op();
auto identity = sop.template get_identity<OutputType>();
// auto dev_result = rmm::device_scalar<OutputType>{identity, stream, mr};
auto dev_result = make_fixed_width_column(
data_type{type_to_id<OutputType>()}, num_segments, mask_state::UNALLOCATED, stream, mr);
Expand All @@ -264,11 +261,10 @@ std::unique_ptr<column> segmented_reduce(InputIterator d_in,
cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(),
temp_storage_bytes,
d_in,
dev_result_mview.data<InputType>(),
dev_result_mview.data<OutputType>(),
num_segments,
d_offset,
d_offset + num_segments + 1,
num_items,
d_offset + 1,
binary_op,
identity,
stream.value());
Expand All @@ -278,18 +274,27 @@ std::unique_ptr<column> segmented_reduce(InputIterator d_in,
cub::DeviceSegmentedReduce::Reduce(d_temp_storage.data(),
temp_storage_bytes,
d_in,
dev_result_mview.data<InputType>(),
dev_result_mview.data<OutputType>(),
num_segments,
d_offset,
d_offset + num_segments + 1,
num_items,
d_offset + 1,
binary_op,
identity,
stream.value());

return dev_result;
}

template <typename Op,
typename InputIterator,
typename OutputType = typename thrust::iterator_value<InputIterator>::type,
typename std::enable_if_t<std::is_same_v<OutputType, string_view>>* = nullptr,
typename... Args>
std::unique_ptr<column> segmented_reduce(Args&&...)
{
CUDF_FAIL("Segment reduction for string type is unsupported.");
}

} // namespace detail
} // namespace reduction
} // namespace cudf
120 changes: 120 additions & 0 deletions cpp/include/cudf/detail/reduction_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,5 +254,125 @@ std::unique_ptr<scalar> nth_element(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Computes sums to each segment of the input column
*
* If all elements in an input segment are null, the result is null.
*
* @throw cudf::logic_error if input column type is not convertible to `output_dtype`
* @throw cudf::logic_error if `output_dtype` is not arithmetic point type
*
* @param col input column to compute sum
* @param offsets Indices to identify segment boundaries
* @param output_dtype data type of return type and typecast elements of input column
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Sum as column of type `output_dtype`.
*/
std::unique_ptr<column> segmented_sum(
column_view const& col,
column_view const& offsets,
data_type const output_dtype,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

// /**
// * @brief Computes minimum of elements in input column
// *
// * If all elements in input column are null, output scalar is null.
// *
// * @throw cudf::logic_error if input column type is convertible to `output_dtype`
// *
// * @param col input column to compute minimum.
// * @param output_dtype data type of return type and typecast elements of input column
// * @param stream CUDA stream used for device memory operations and kernel launches.
// * @param mr Device memory resource used to allocate the returned scalar's device memory
// * @return Minimum element as scalar of type `output_dtype`.
// */
// std::unique_ptr<scalar> min(
// column_view const& col,
// data_type const output_dtype,
// rmm::cuda_stream_view stream = rmm::cuda_stream_default,
// rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

// /**
// * @brief Computes maximum of elements in input column
// *
// * If all elements in input column are null, output scalar is null.
// *
// * @throw cudf::logic_error if input column type is convertible to `output_dtype`
// *
// * @param col input column to compute maximum.
// * @param output_dtype data type of return type and typecast elements of input column
// * @param stream CUDA stream used for device memory operations and kernel launches.
// * @param mr Device memory resource used to allocate the returned scalar's device memory
// * @return Maximum element as scalar of type `output_dtype`.
// */
// std::unique_ptr<scalar> max(
// column_view const& col,
// data_type const output_dtype,
// rmm::cuda_stream_view stream = rmm::cuda_stream_default,
// rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

// /**
// * @brief Computes any of elements in input column is true when typecasted to bool
// *
// * If all elements in input column are null, output scalar is null.
// *
// * @throw cudf::logic_error if input column type is not convertible to bool
// * @throw cudf::logic_error if `output_dtype` is not bool
// *
// * @param col input column to compute any_of.
// * @param output_dtype data type of return type and typecast elements of input column
// * @param stream CUDA stream used for device memory operations and kernel launches.
// * @param mr Device memory resource used to allocate the returned scalar's device memory
// * @return bool scalar if any of elements is true when typecasted to bool
// */
// std::unique_ptr<scalar> any(
// column_view const& col,
// data_type const output_dtype,
// rmm::cuda_stream_view stream = rmm::cuda_stream_default,
// rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

// /**
// * @brief Computes all of elements in input column is true when typecasted to bool
// *
// * If all elements in input column are null, output scalar is null.
// *
// * @throw cudf::logic_error if input column type is not convertible to bool
// * @throw cudf::logic_error if `output_dtype` is not bool
// *
// * @param col input column to compute all_of.
// * @param output_dtype data type of return type and typecast elements of input column
// * @param stream CUDA stream used for device memory operations and kernel launches.
// * @param mr Device memory resource used to allocate the returned scalar's device memory
// * @return bool scalar if all of elements is true when typecasted to bool
// */
// std::unique_ptr<scalar> all(
// column_view const& col,
// data_type const output_dtype,
// rmm::cuda_stream_view stream = rmm::cuda_stream_default,
// rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

// /**
// * @brief Computes product of elements in input column
// *
// * If all elements in input column are null, output scalar is null.
// *
// * @throw cudf::logic_error if input column type is not convertible to `output_dtype`
// * @throw cudf::logic_error if `output_dtype` is not arithmetic point type
// *
// * @param col input column to compute product.
// * @param output_dtype data type of return type and typecast elements of input column
// * @param stream CUDA stream used for device memory operations and kernel launches.
// * @param mr Device memory resource used to allocate the returned scalar's device memory
// * @return Product as scalar of type `output_dtype`.
// */
// std::unique_ptr<scalar> product(
// column_view const& col,
// data_type const output_dtype,
// rmm::cuda_stream_view stream = rmm::cuda_stream_default,
// rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace reduction
} // namespace cudf
7 changes: 7 additions & 0 deletions cpp/include/cudf/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ std::unique_ptr<scalar> reduce(
data_type output_dtype,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

std::unique_ptr<column> segmented_reduce(
column_view const& col,
column_view const& offsets,
std::unique_ptr<aggregation> const& agg,
data_type output_dtype,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Computes the scan of a column.
*
Expand Down
91 changes: 91 additions & 0 deletions cpp/src/reductions/segmented_reductions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/column/column.hpp>
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/quantiles.hpp>
#include <cudf/detail/reduction_functions.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/detail/stream_compaction.hpp>
#include <cudf/reduction.hpp>
#include <cudf/scalar/scalar_factories.hpp>

#include <cudf/structs/structs_column_view.hpp>
#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace detail {
struct segmented_reduce_dispatch_functor {
column_view const col;
column_view const offsets;
data_type output_dtype;
rmm::mr::device_memory_resource* mr;
rmm::cuda_stream_view stream;

segmented_reduce_dispatch_functor(column_view const& col,
column_view const& offsets,
data_type output_dtype,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
: col(col), offsets(offsets), output_dtype(output_dtype), mr(mr), stream(stream)
{
}

template <aggregation::Kind k>
std::unique_ptr<column> operator()(std::unique_ptr<aggregation> const& agg)
{
switch (k) {
case aggregation::SUM:
return reduction::segmented_sum(col, offsets, output_dtype, stream, mr);
break;
// case aggregation::PRODUCT: return reduction::product(col, output_dtype, stream, mr); break;
// case aggregation::MIN: return reduction::min(col, output_dtype, stream, mr); break;
// case aggregation::MAX: return reduction::max(col, output_dtype, stream, mr); break;
// case aggregation::ANY: return reduction::any(col, output_dtype, stream, mr); break;
// case aggregation::ALL: return reduction::all(col, output_dtype, stream, mr); break;
default: CUDF_FAIL("Unsupported aggregation type.");
}
}
};

std::unique_ptr<column> segmented_reduce(
column_view const& col,
column_view const& offsets,
std::unique_ptr<aggregation> const& agg,
data_type output_dtype,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
// TODO: handle invalid inputs.

return aggregation_dispatcher(
agg->kind, segmented_reduce_dispatch_functor{col, offsets, output_dtype, stream, mr}, agg);
}
} // namespace detail

std::unique_ptr<column> segmented_reduce(column_view const& col,
column_view const& offsets,
std::unique_ptr<aggregation> const& agg,
data_type output_dtype,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::segmented_reduce(col, offsets, agg, output_dtype, rmm::cuda_stream_default, mr);
}

} // namespace cudf
42 changes: 42 additions & 0 deletions cpp/src/reductions/segmented_sum.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/detail/reduction_functions.hpp>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <reductions/simple_segmented.cuh>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace reduction {

std::unique_ptr<cudf::column> segmented_sum(column_view const& col,
column_view const& offsets,
cudf::data_type const output_dtype,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return cudf::type_dispatcher(col.type(),
simple::column_type_dispatcher<cudf::reduction::op::sum>{},
col,
offsets,
output_dtype,
stream,
mr);
}

} // namespace reduction
} // namespace cudf
Loading

0 comments on commit 7ccdeb8

Please sign in to comment.