Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add groupby product support #7763

Merged
merged 21 commits into from
Apr 21, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
2c6451e
add atomicMul
karthikeyann Mar 30, 2021
35e805e
add PRODUCT to update_target_element
karthikeyann Mar 30, 2021
04b094c
add PRODUCT to hash groupby
karthikeyann Mar 30, 2021
7492f25
add PRODUCT to sort groupby
karthikeyann Mar 30, 2021
9fe73c5
add groupby product unit tests
karthikeyann Mar 31, 2021
d071b70
copyright year update
karthikeyann Mar 31, 2021
63b5ed7
add python units tests of groupby prod
karthikeyann Mar 31, 2021
7c27145
Merge branch 'branch-0.20' of github.com:rapidsai/cudf into fea-group…
karthikeyann Apr 5, 2021
692f646
remove cuda 10.2 limitations
karthikeyann Apr 6, 2021
f5237d2
address review comments
karthikeyann Apr 6, 2021
254539f
documentation examples
karthikeyann Apr 12, 2021
552a0d0
Merge branch 'branch-0.20' of github.com:rapidsai/cudf into fea-group…
karthikeyann Apr 12, 2021
449f207
stream arg fix
karthikeyann Apr 12, 2021
d0e92e4
Apply suggestions from code review
karthikeyann Apr 13, 2021
1608549
Update python/cudf/cudf/_lib/groupby.pyx
karthikeyann Apr 13, 2021
034bb24
style fix
karthikeyann Apr 13, 2021
abbe816
Merge branch 'branch-0.20' of github.com:rapidsai/cudf into fea-group…
karthikeyann Apr 16, 2021
bc1ad9c
address review comments
karthikeyann Apr 19, 2021
74b7e0a
Merge branch 'branch-0.20' of github.com:rapidsai/cudf into fea-group…
karthikeyann Apr 19, 2021
2ed1a9b
review comments updates
karthikeyann Apr 20, 2021
98b2218
Merge branch 'branch-0.20' of github.com:rapidsai/cudf into fea-group…
karthikeyann Apr 20, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ add_library(cudf
src/groupby/sort/group_min.cu
src/groupby/sort/group_nth_element.cu
src/groupby/sort/group_nunique.cu
src/groupby/sort/group_product.cu
src/groupby/sort/group_quantiles.cu
src/groupby/sort/group_std.cu
src/groupby/sort/group_sum.cu
Expand Down
38 changes: 24 additions & 14 deletions cpp/include/cudf/detail/aggregation/aggregation.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -314,29 +314,18 @@ struct update_target_element<dictionary32, aggregation::SUM, target_has_nulls, s
}
};

// This code will segfault in nvcc/ptxas 10.2 only
// https://nvbugswb.nvidia.com/NvBugs5/SWBug.aspx?bugid=3186317
// Enabling only for 2 types does not segfault. Using for unit tests.
#if (__CUDACC_VER_MAJOR__ == 10) and (__CUDACC_VER_MINOR__ == 2)
template <typename T>
constexpr bool is_SOS_supported()
{
return std::is_floating_point<T>::value;
}
#else
template <typename T>
constexpr bool is_SOS_supported()
constexpr bool is_product_supported()
{
return is_numeric<T>();
}
#endif

template <typename Source, bool target_has_nulls, bool source_has_nulls>
struct update_target_element<Source,
aggregation::SUM_OF_SQUARES,
target_has_nulls,
source_has_nulls,
std::enable_if_t<is_SOS_supported<Source>()>> {
std::enable_if_t<is_product_supported<Source>()>> {
__device__ void operator()(mutable_column_device_view target,
size_type target_index,
column_device_view source,
Expand All @@ -351,6 +340,26 @@ struct update_target_element<Source,
}
};

template <typename Source, bool target_has_nulls, bool source_has_nulls>
struct update_target_element<Source,
aggregation::PRODUCT,
target_has_nulls,
source_has_nulls,
std::enable_if_t<is_product_supported<Source>()>> {
__device__ void operator()(mutable_column_device_view target,
size_type target_index,
column_device_view source,
size_type source_index) const noexcept
{
if (source_has_nulls and source.is_null(source_index)) { return; }

using Target = target_type_t<Source, aggregation::PRODUCT>;
vyasr marked this conversation as resolved.
Show resolved Hide resolved
atomicMul(&target.element<Target>(target_index),
static_cast<Target>(source.element<Source>(source_index)));
if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); }
}
};

template <typename Source, bool target_has_nulls, bool source_has_nulls>
struct update_target_element<
Source,
Expand Down Expand Up @@ -559,7 +568,8 @@ struct identity_initializer {
k == aggregation::COUNT_VALID or k == aggregation::COUNT_ALL or
k == aggregation::ARGMAX or k == aggregation::ARGMIN or
k == aggregation::SUM_OF_SQUARES or k == aggregation::STD or
k == aggregation::VARIANCE);
k == aggregation::VARIANCE or
(k == aggregation::PRODUCT and is_product_supported<T>()));
}

template <typename T, aggregation::Kind k>
Expand Down
22 changes: 22 additions & 0 deletions cpp/include/cudf/detail/utilities/device_atomics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,28 @@ __forceinline__ __device__ T atomicAdd(T* address, T val)
return cudf::genericAtomicOperation(address, val, cudf::DeviceSum{});
}

/**
* @brief Overloads for `atomicMul`
* reads the `old` located at the `address` in global or shared memory,
* computes (old * val), and stores the result back to memory at the same
* address. These three operations are performed in one atomic transaction.
*
* The supported cudf types for `atomicMul` are:
* int8_t, int16_t, int32_t, int64_t, float, double, and bool
*
* All types are implemented by `atomicCAS`.
*
* @param[in] address The address of old value in global or shared memory
* @param[in] val The value to be multiplied
*
* @returns The old value at `address`
*/
template <typename T>
__forceinline__ __device__ T atomicMul(T* address, T val)
{
return cudf::genericAtomicOperation(address, val, cudf::DeviceProduct{});
}

/**
* @brief Overloads for `atomicMin`
* reads the `old` located at the `address` in global or shared memory,
Expand Down
39 changes: 18 additions & 21 deletions cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,33 +55,37 @@ namespace groupby {
namespace detail {
namespace hash {
namespace {
// This is a temporary fix due to compiler bug and we can resort back to
// constexpr once cuda 10.2 becomes RAPIDS's minimum compiler version
#if 0

/**
* @brief List of aggregation operations that can be computed with a hash-based
* implementation.
*/
constexpr std::array<aggregation::Kind, 10> hash_aggregations{
aggregation::SUM, aggregation::MIN, aggregation::MAX,
aggregation::COUNT_VALID, aggregation::COUNT_ALL,
aggregation::ARGMIN, aggregation::ARGMAX,
aggregation::SUM_OF_SQUARES,
aggregation::MEAN, aggregation::STD, aggregation::VARIANCE};

//Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL,
constexpr std::array<aggregation::Kind, 12> hash_aggregations{aggregation::SUM,
aggregation::PRODUCT,
aggregation::MIN,
aggregation::MAX,
aggregation::COUNT_VALID,
aggregation::COUNT_ALL,
aggregation::ARGMIN,
aggregation::ARGMAX,
aggregation::SUM_OF_SQUARES,
aggregation::MEAN,
aggregation::STD,
aggregation::VARIANCE};

// Could be hash: SUM, PRODUCT, MIN, MAX, COUNT_VALID, COUNT_ALL, ANY, ALL,
// Compound: MEAN(SUM, COUNT_VALID), VARIANCE, STD(MEAN (SUM, COUNT_VALID), COUNT_VALID),
// ARGMAX, ARGMIN
// FIXME(kn): adding SUM_OF_SQUARES causes ptxas compiler crash (<=CUDA 10.2) for more than 3 types!
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved

template <class T, size_t N>
constexpr bool array_contains(std::array<T, N> const& haystack, T needle) {
constexpr bool array_contains(std::array<T, N> const& haystack, T needle)
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
{
for (auto i = 0u; i < N; ++i) {
if (haystack[i] == needle) return true;
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved
}
return false;
}
#endif

/**
* @brief Indicates whether the specified aggregation operation can be computed
Expand All @@ -93,14 +97,7 @@ constexpr bool array_contains(std::array<T, N> const& haystack, T needle) {
*/
bool constexpr is_hash_aggregation(aggregation::Kind t)
{
// this is a temporary fix due to compiler bug and we can resort back to
// constexpr once cuda 10.2 becomes RAPIDS's minimum compiler version
// return array_contains(hash_aggregations, t);
return (t == aggregation::SUM) or (t == aggregation::MIN) or (t == aggregation::MAX) or
(t == aggregation::COUNT_VALID) or (t == aggregation::COUNT_ALL) or
(t == aggregation::ARGMIN) or (t == aggregation::ARGMAX) or
(t == aggregation::SUM_OF_SQUARES) or (t == aggregation::MEAN) or
(t == aggregation::STD) or (t == aggregation::VARIANCE);
return array_contains(hash_aggregations, t);
}

template <typename Map>
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/groupby/sort/aggregate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,18 @@ void aggregrate_result_functor::operator()<aggregation::SUM>(aggregation const&
get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
};

template <>
void aggregrate_result_functor::operator()<aggregation::PRODUCT>(aggregation const& agg)
{
if (cache.has_result(col_idx, agg)) return;
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved

cache.add_result(
col_idx,
agg,
detail::group_product(
get_grouped_values(), helper.num_groups(stream), helper.group_labels(stream), stream, mr));
};

template <>
void aggregrate_result_functor::operator()<aggregation::ARGMAX>(aggregation const& agg)
{
Expand Down
46 changes: 46 additions & 0 deletions cpp/src/groupby/sort/group_product.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/utilities/span.hpp>
#include <groupby/sort/group_single_pass_reduction_util.cuh>
karthikeyann marked this conversation as resolved.
Show resolved Hide resolved

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace groupby {
namespace detail {
std::unique_ptr<column> group_product(column_view const& values,
size_type num_groups,
cudf::device_span<size_type const> group_labels,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto values_type = cudf::is_dictionary(values.type())
? dictionary_column_view(values).keys().type()
: values.type();
return type_dispatcher(values_type,
reduce_functor<aggregation::PRODUCT>{},
values,
num_groups,
group_labels,
stream,
mr);
}

} // namespace detail
} // namespace groupby
} // namespace cudf
Loading