Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] disallow SUM and MEAN of timestamp types #5319

Merged
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
a5067bd
disable DeviceSum for timestamp
karthikeyann May 29, 2020
3e72ab0
add transformer_mean for column MEAN reduction - enables timestamp MEAN
karthikeyann May 29, 2020
afb4f34
specialize rolling MEAN for timestamp
karthikeyann May 29, 2020
fe2e54c
specialize unit test for rolling MEAN of timestamp
karthikeyann May 29, 2020
c80de81
specialize grouped rolling MEAN of timestamp
karthikeyann May 29, 2020
008d201
to get rid of warning in rolling.cu compilation
karthikeyann May 29, 2020
e90efd0
disable SUM for timestamp in device atomic tests
karthikeyann May 29, 2020
be9ddff
changelog entry for PR #5319
karthikeyann May 29, 2020
b1313a8
Merge branch 'branch-0.15' into bug-disallow-sum-timestamp
karthikeyann Jun 1, 2020
5b856c4
Merge branch 'branch-0.15' into bug-disallow-sum-timestamp
karthikeyann Jun 15, 2020
b7dd0df
Revert "specialize rolling MEAN for timestamp"
karthikeyann Jun 15, 2020
fab057f
Revert "add transformer_mean for column MEAN reduction - enables time…
karthikeyann Jun 15, 2020
234a63b
Revert "specialize grouped rolling MEAN of timestamp"
karthikeyann Jun 15, 2020
f2cdefe
Revert "specialize unit test for rolling MEAN of timestamp"
karthikeyann Jun 15, 2020
9601911
remove MEAN support for timestamp
karthikeyann Jun 15, 2020
34c2dc1
remove product operator of timestamp
karthikeyann Jun 15, 2020
de1b643
Merge branch 'branch-0.15' into bug-disallow-sum-timestamp
karthikeyann Jun 15, 2020
eccf0c9
Merge branch 'branch-0.15' into bug-disallow-sum-timestamp
karthikeyann Jun 16, 2020
2026363
Merge branch 'branch-0.15' into bug-disallow-sum-timestamp
karthikeyann Jul 24, 2020
53f427a
Merge branch 'branch-0.15' into bug-disallow-sum-timestamp
karthikeyann Jul 28, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
- PR #5300 Add support to ignore `None` in `cudf.concat` input
- PR #5334 Fix pickling sizeof test
- PR #5337 Fix broken alias from DataFrame.{at,iat} to {loc, iloc}
- PR #5319 Disallow SUM and specialize MEAN of timestamp types


# cuDF 0.14.0 (Date TBD)
Expand Down
23 changes: 22 additions & 1 deletion cpp/include/cudf/detail/reduction_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,33 @@ struct compound_op : public simple_op<Derived> {
// computes the final ResultType from the IntermediateType.
// intermediate::compute_result method is enforced by CRTP base class compound_op

template <typename T, typename Enable = void>
class transformer_mean;
template <typename T>
class transformer_mean<T, typename std::enable_if_t<!cudf::is_timestamp<T>()>> {
public:
template <typename InputType>
CUDA_HOST_DEVICE_CALLABLE auto operator()(InputType const& t) const
{
return thrust::identity<T>{}(t);
}
};
template <typename T>
class transformer_mean<T, typename std::enable_if_t<cudf::is_timestamp<T>()>> {
public:
template <typename InputType>
CUDA_HOST_DEVICE_CALLABLE auto operator()(InputType const& t) const
{
return thrust::identity<T>{}(t).time_since_epoch();
}
};

// operator for `mean`
struct mean : public compound_op<mean> {
using op = cudf::DeviceSum;

template <typename ResultType>
using transformer = thrust::identity<ResultType>;
using transformer = transformer_mean<ResultType>;

template <typename ResultType>
struct intermediate {
Expand Down
6 changes: 0 additions & 6 deletions cpp/include/cudf/detail/utilities/device_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@ namespace cudf {
// Binary operators
/* @brief binary `sum` operator */
struct DeviceSum {
template <typename T, typename std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs)
{
return T{DeviceSum{}(lhs.time_since_epoch(), rhs.time_since_epoch())};
}

template <typename T, typename std::enable_if_t<!cudf::is_timestamp<T>()>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs)
{
Expand Down
42 changes: 41 additions & 1 deletion cpp/src/rolling/rolling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ template <typename InputType,
bool has_nulls>
std::enable_if_t<!std::is_same<InputType, cudf::string_view>::value and
!(op == aggregation::COUNT_VALID || op == aggregation::COUNT_ALL ||
op == aggregation::ROW_NUMBER),
op == aggregation::ROW_NUMBER) and
!(is_timestamp<InputType>() and op == aggregation::MEAN),
bool>
__device__ process_rolling_window(column_device_view input,
mutable_column_device_view output,
Expand Down Expand Up @@ -192,6 +193,45 @@ std::enable_if_t<!std::is_same<InputType, cudf::string_view>::value and
return output_is_valid;
}

/**
* @brief Mean on only timestamp types and returns true if the
* operation was valid, else false.
*/
template <typename InputType,
typename OutputType,
typename agg_op,
aggregation::Kind op,
bool has_nulls>
std::enable_if_t<(is_timestamp<InputType>() and op == aggregation::MEAN), bool> __device__
process_rolling_window(column_device_view input,
mutable_column_device_view output,
size_type start_index,
size_type end_index,
size_type current_index,
size_type min_periods)
{
// declare this as volatile to avoid some compiler optimizations that lead to incorrect results
// for CUDA 10.0 and below (fixed in CUDA 10.1)
volatile cudf::size_type count = 0;
OutputType val = agg_op::template identity<OutputType>();

for (size_type j = start_index; j < end_index; j++) {
if (!has_nulls || input.is_valid(j)) {
OutputType element = input.element<InputType>(j);
val = agg_op{}(element.time_since_epoch(), val.time_since_epoch());
count++;
}
}

bool output_is_valid = (count >= min_periods);

// store the output value, one per thread
cudf::detail::rolling_store_output_functor<OutputType, op == aggregation::MEAN>{}(
output.element<OutputType>(current_index), val, count);

return output_is_valid;
}

/**
* @brief Computes the rolling window function
*
Expand Down
11 changes: 5 additions & 6 deletions cpp/src/rolling/rolling_detail.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ namespace detail {
template <typename ColumnType, class AggOp, aggregation::Kind op>
static constexpr bool is_rolling_supported()
{
if (!cudf::detail::is_valid_aggregation<ColumnType, op>()) { return false; }

if (cudf::is_numeric<ColumnType>()) {
if (!cudf::detail::is_valid_aggregation<ColumnType, op>()) {
return false;
} else if (cudf::is_numeric<ColumnType>()) {
constexpr bool is_comparable_countable_op = std::is_same<AggOp, DeviceMin>::value or
std::is_same<AggOp, DeviceMax>::value or
std::is_same<AggOp, DeviceCount>::value;
Expand Down Expand Up @@ -60,9 +60,8 @@ static constexpr bool is_rolling_supported()
} else if (std::is_same<ColumnType, cudf::list_view>()) {
return (op == aggregation::COUNT_VALID) or (op == aggregation::COUNT_ALL) or
(op == aggregation::ROW_NUMBER);
}

return false;
} else
return false;
}

// return true if this Op is specialized for strings.
Expand Down
24 changes: 22 additions & 2 deletions cpp/tests/device_atomics/device_atomics_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,22 @@ __global__ void gpu_atomic_test(T* result, T* data, size_t size)
}

template <typename T, typename BinaryOp>
constexpr inline bool is_timestamp_sum()
{
return cudf::is_timestamp<T>() && std::is_same<BinaryOp, cudf::DeviceSum>::value;
}
// Disable SUM of TIMESTAMP types
template <typename T,
typename BinaryOp,
typename std::enable_if_t<is_timestamp_sum<T, BinaryOp>()>* = nullptr>
__device__ T atomic_op(T* addr, T const& value, BinaryOp op)
{
return {};
}

template <typename T,
typename BinaryOp,
typename std::enable_if_t<!is_timestamp_sum<T, BinaryOp>()>* = nullptr>
__device__ T atomic_op(T* addr, T const& value, BinaryOp op)
{
T old_value = *addr;
Expand Down Expand Up @@ -133,10 +149,14 @@ struct AtomicsTest : public cudf::test::BaseFixture {
CUDA_TRY(cudaDeviceSynchronize());
CHECK_CUDA(0);

EXPECT_EQ(host_result[0], exact[0]) << "atomicAdd test failed";
if (!is_timestamp_sum<T, cudf::DeviceSum>()) {
EXPECT_EQ(host_result[0], exact[0]) << "atomicAdd test failed";
}
EXPECT_EQ(host_result[1], exact[1]) << "atomicMin test failed";
EXPECT_EQ(host_result[2], exact[2]) << "atomicMax test failed";
EXPECT_EQ(host_result[3], exact[0]) << "atomicAdd test(2) failed";
if (!is_timestamp_sum<T, cudf::DeviceSum>()) {
EXPECT_EQ(host_result[3], exact[0]) << "atomicAdd test(2) failed";
}
EXPECT_EQ(host_result[4], exact[1]) << "atomicMin test(2) failed";
EXPECT_EQ(host_result[5], exact[2]) << "atomicMax test(2) failed";
}
Expand Down
40 changes: 38 additions & 2 deletions cpp/tests/grouped_rolling/grouped_rolling_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,23 @@ class GroupedRollingTest : public cudf::test::BaseFixture {
return col.release();
}

template <typename OutputType,
typename agg_op,
bool is_mean_of_timestamp,
std::enable_if_t<!is_mean_of_timestamp>* = nullptr>
auto run_op(OutputType& val, OutputType const& in)
{
val = agg_op{}(in, val);
}
template <typename OutputType,
typename agg_op,
bool is_mean_of_timestamp,
std::enable_if_t<is_mean_of_timestamp>* = nullptr>
auto run_op(OutputType& val, OutputType const& in)
{
val = static_cast<OutputType>(agg_op{}(in.time_since_epoch(), val.time_since_epoch()));
}

template <typename agg_op,
cudf::aggregation::Kind k,
typename OutputType,
Expand Down Expand Up @@ -374,7 +391,8 @@ class GroupedRollingTest : public cudf::test::BaseFixture {
size_type count = 0;
for (size_type j = start_index; j < end_index; j++) {
if (!input.nullable() || cudf::bit_is_set(valid_mask, j)) {
val = op(static_cast<OutputType>(in_col[j]), val);
run_op<OutputType, agg_op, (is_mean and cudf::is_timestamp<OutputType>())>(
val, static_cast<OutputType>(in_col[j]));
count++;
}
}
Expand Down Expand Up @@ -941,6 +959,23 @@ class GroupedTimeRangeRollingTest : public cudf::test::BaseFixture {
return col.release();
}

template <typename OutputType,
typename agg_op,
bool is_mean_of_timestamp,
std::enable_if_t<!is_mean_of_timestamp>* = nullptr>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've always found this explicit "is_mean" bool that gets passed around a little strange (the actual rolling code did it too). The logic for this can be inferred from the other parameters, ie

Suggested change
std::enable_if_t<!is_mean_of_timestamp>* = nullptr>
template <typename OutputType,
typename agg_op,
std::enable_if_t<!(agg_op == MEAN && OutputType == cudf::is_timestamp<OutputType>())>* = nullptr
>

Then this could be bubbled up further so that is_mean can be removed from the template params for create_reference_output

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue #5466 is created to track this.

auto run_op(OutputType& val, OutputType const& in)
{
val = agg_op{}(in, val);
}
template <typename OutputType,
typename agg_op,
bool is_mean_of_timestamp,
std::enable_if_t<is_mean_of_timestamp>* = nullptr>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

auto run_op(OutputType& val, OutputType const& in)
{
val = static_cast<OutputType>(agg_op{}(in.time_since_epoch(), val.time_since_epoch()));
}

template <typename agg_op,
cudf::aggregation::Kind k,
typename OutputType,
Expand Down Expand Up @@ -1010,7 +1045,8 @@ class GroupedTimeRangeRollingTest : public cudf::test::BaseFixture {
size_type count = 0;
for (size_type j = start_index; j < end_index; j++) {
if (!input.nullable() || cudf::bit_is_set(valid_mask, j)) {
val = op(static_cast<OutputType>(in_col[j]), val);
run_op<OutputType, agg_op, (is_mean and cudf::is_timestamp<OutputType>())>(
val, static_cast<OutputType>(in_col[j]));
count++;
}
}
Expand Down
20 changes: 19 additions & 1 deletion cpp/tests/rolling/rolling_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,23 @@ class RollingTest : public cudf::test::BaseFixture {
return col.release();
}

template <typename OutputType,
typename agg_op,
bool is_mean_of_timestamp,
std::enable_if_t<!is_mean_of_timestamp>* = nullptr>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as comment from grouped_rolling_test.cpp

auto run_op(OutputType& val, OutputType const& in)
{
val = agg_op{}(in, val);
}
template <typename OutputType,
typename agg_op,
bool is_mean_of_timestamp,
std::enable_if_t<is_mean_of_timestamp>* = nullptr>
auto run_op(OutputType& val, OutputType const& in)
{
val = static_cast<OutputType>(agg_op{}(in.time_since_epoch(), val.time_since_epoch()));
}

template <typename agg_op,
cudf::aggregation::Kind k,
typename OutputType,
Expand Down Expand Up @@ -297,7 +314,8 @@ class RollingTest : public cudf::test::BaseFixture {
size_type count = 0;
for (size_type j = start_index; j < end_index; j++) {
if (!input.nullable() || cudf::bit_is_set(valid_mask, j)) {
val = op(static_cast<OutputType>(in_col[j]), val);
run_op<OutputType, agg_op, (is_mean and cudf::is_timestamp<OutputType>())>(
val, static_cast<OutputType>(in_col[j]));
count++;
}
}
Expand Down