Skip to content

Commit

Permalink
Merge pull request #5319 from karthikeyann/bug-disallow-sum-timestamp
Browse files Browse the repository at this point in the history
[REVIEW] disallow SUM and MEAN of timestamp types
  • Loading branch information
karthikeyann authored Jul 28, 2020
2 parents 8840169 + 53f427a commit a4a9aa6
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 53 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@
- PR #5750 Add RMM_ROOT/include to the spdlog search path in JNI build
- PR #5763 Update Java slf4j version to match Spark 3.0
- PR #5766 Fix issue related to `iloc` and slicing a `DataFrame`
- PR #5319 Disallow SUM and specialize MEAN of timestamp types


# cuDF 0.14.0 (03 Jun 2020)
Expand Down
12 changes: 0 additions & 12 deletions cpp/include/cudf/detail/utilities/device_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@ namespace cudf {
// Binary operators
/* @brief binary `sum` operator */
struct DeviceSum {
template <typename T, typename std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs)
{
return T{DeviceSum{}(lhs.time_since_epoch(), rhs.time_since_epoch())};
}

template <typename T, typename std::enable_if_t<!cudf::is_timestamp<T>()>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs)
{
Expand Down Expand Up @@ -145,12 +139,6 @@ struct DeviceMax {

/* @brief binary `product` operator */
struct DeviceProduct {
template <typename T, typename std::enable_if_t<cudf::is_timestamp<T>()>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs)
{
return T{DeviceProduct{}(lhs.time_since_epoch().count(), rhs.time_since_epoch().count())};
}

template <typename T, typename std::enable_if_t<!cudf::is_timestamp<T>()>* = nullptr>
CUDA_HOST_DEVICE_CALLABLE T operator()(const T& lhs, const T& rhs)
{
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/rolling/rolling_detail.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ static constexpr bool is_rolling_supported()
} else if (cudf::is_timestamp<ColumnType>()) {
return (op == aggregation::MIN) or (op == aggregation::MAX) or
(op == aggregation::COUNT_VALID) or (op == aggregation::COUNT_ALL) or
(op == aggregation::MEAN) or (op == aggregation::ROW_NUMBER);
(op == aggregation::ROW_NUMBER);

} else if (std::is_same<ColumnType, cudf::string_view>()) {
return (op == aggregation::MIN) or (op == aggregation::MAX) or
Expand Down
24 changes: 22 additions & 2 deletions cpp/tests/device_atomics/device_atomics_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,22 @@ __global__ void gpu_atomic_test(T* result, T* data, size_t size)
}

template <typename T, typename BinaryOp>
constexpr inline bool is_timestamp_sum()
{
return cudf::is_timestamp<T>() && std::is_same<BinaryOp, cudf::DeviceSum>::value;
}
// Disable SUM of TIMESTAMP types
template <typename T,
typename BinaryOp,
typename std::enable_if_t<is_timestamp_sum<T, BinaryOp>()>* = nullptr>
__device__ T atomic_op(T* addr, T const& value, BinaryOp op)
{
return {};
}

template <typename T,
typename BinaryOp,
typename std::enable_if_t<!is_timestamp_sum<T, BinaryOp>()>* = nullptr>
__device__ T atomic_op(T* addr, T const& value, BinaryOp op)
{
T old_value = *addr;
Expand Down Expand Up @@ -133,10 +149,14 @@ struct AtomicsTest : public cudf::test::BaseFixture {
CUDA_TRY(cudaDeviceSynchronize());
CHECK_CUDA(0);

EXPECT_EQ(host_result[0], exact[0]) << "atomicAdd test failed";
if (!is_timestamp_sum<T, cudf::DeviceSum>()) {
EXPECT_EQ(host_result[0], exact[0]) << "atomicAdd test failed";
}
EXPECT_EQ(host_result[1], exact[1]) << "atomicMin test failed";
EXPECT_EQ(host_result[2], exact[2]) << "atomicMax test failed";
EXPECT_EQ(host_result[3], exact[0]) << "atomicAdd test(2) failed";
if (!is_timestamp_sum<T, cudf::DeviceSum>()) {
EXPECT_EQ(host_result[3], exact[0]) << "atomicAdd test(2) failed";
}
EXPECT_EQ(host_result[4], exact[1]) << "atomicMin test(2) failed";
EXPECT_EQ(host_result[5], exact[2]) << "atomicMax test(2) failed";
}
Expand Down
71 changes: 35 additions & 36 deletions cpp/tests/grouped_rolling/grouped_rolling_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,13 +192,23 @@ class GroupedRollingTest : public cudf::test::BaseFixture {
following_window,
min_periods,
cudf::make_max_aggregation());
run_test_col(keys,
input,
expected_grouping,
preceding_window,
following_window,
min_periods,
cudf::make_mean_aggregation());

if (!cudf::is_timestamp(input.type())) {
run_test_col(keys,
input,
expected_grouping,
preceding_window,
following_window,
min_periods,
cudf::make_sum_aggregation());
run_test_col(keys,
input,
expected_grouping,
preceding_window,
following_window,
min_periods,
cudf::make_mean_aggregation());
}
run_test_col(keys,
input,
expected_grouping,
Expand Down Expand Up @@ -229,16 +239,6 @@ class GroupedRollingTest : public cudf::test::BaseFixture {
min_periods,
ptx_udf_agg);
}

if (!cudf::is_timestamp(input.type())) {
run_test_col(keys,
input,
expected_grouping,
preceding_window,
following_window,
min_periods,
cudf::make_sum_aggregation());
}
}

private:
Expand Down Expand Up @@ -763,25 +763,6 @@ class GroupedTimeRangeRollingTest : public cudf::test::BaseFixture {
following_window_in_days,
min_periods,
cudf::make_max_aggregation());
run_test_col(keys,
timestamp_column,
timestamp_order,
input,
expected_grouping,
preceding_window_in_days,
following_window_in_days,
min_periods,
cudf::make_mean_aggregation());
run_test_col(keys,
timestamp_column,
timestamp_order,
input,
expected_grouping,
preceding_window_in_days,
following_window_in_days,
min_periods,
cudf::make_row_number_aggregation());

if (!cudf::is_timestamp(input.type())) {
run_test_col(keys,
timestamp_column,
Expand All @@ -792,7 +773,25 @@ class GroupedTimeRangeRollingTest : public cudf::test::BaseFixture {
following_window_in_days,
min_periods,
cudf::make_sum_aggregation());
run_test_col(keys,
timestamp_column,
timestamp_order,
input,
expected_grouping,
preceding_window_in_days,
following_window_in_days,
min_periods,
cudf::make_mean_aggregation());
}
run_test_col(keys,
timestamp_column,
timestamp_order,
input,
expected_grouping,
preceding_window_in_days,
following_window_in_days,
min_periods,
cudf::make_row_number_aggregation());
}

private:
Expand Down
31 changes: 29 additions & 2 deletions cpp/tests/rolling/rolling_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,12 @@ class RollingTest : public cudf::test::BaseFixture {
cudf::make_count_aggregation(cudf::null_policy::INCLUDE));
run_test_col(
input, preceding_window, following_window, min_periods, cudf::make_max_aggregation());
run_test_col(
input, preceding_window, following_window, min_periods, cudf::make_mean_aggregation());

if (not cudf::is_timestamp(input.type())) {
run_test_col(
input, preceding_window, following_window, min_periods, cudf::make_sum_aggregation());
run_test_col(
input, preceding_window, following_window, min_periods, cudf::make_mean_aggregation());
}
}

Expand Down Expand Up @@ -496,6 +496,33 @@ TEST_F(RollingErrorTest, SumTimestampNotSupported)
cudf::logic_error);
}

// incorrect type/aggregation combo: mean of timestamps
TEST_F(RollingErrorTest, MeanTimestampNotSupported)
{
constexpr size_type size{10};
fixed_width_column_wrapper<cudf::timestamp_D> input_D(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(size));
fixed_width_column_wrapper<cudf::timestamp_s> input_s(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(size));
fixed_width_column_wrapper<cudf::timestamp_ms> input_ms(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(size));
fixed_width_column_wrapper<cudf::timestamp_us> input_us(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(size));
fixed_width_column_wrapper<cudf::timestamp_ns> input_ns(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(size));

EXPECT_THROW(cudf::rolling_window(input_D, 2, 2, 0, cudf::make_mean_aggregation()),
cudf::logic_error);
EXPECT_THROW(cudf::rolling_window(input_s, 2, 2, 0, cudf::make_mean_aggregation()),
cudf::logic_error);
EXPECT_THROW(cudf::rolling_window(input_ms, 2, 2, 0, cudf::make_mean_aggregation()),
cudf::logic_error);
EXPECT_THROW(cudf::rolling_window(input_us, 2, 2, 0, cudf::make_mean_aggregation()),
cudf::logic_error);
EXPECT_THROW(cudf::rolling_window(input_ns, 2, 2, 0, cudf::make_mean_aggregation()),
cudf::logic_error);
}

TYPED_TEST_CASE(RollingTest, cudf::test::FixedWidthTypes);

// simple example from Pandas docs
Expand Down

0 comments on commit a4a9aa6

Please sign in to comment.