Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support VARIANCE and STD aggregation in rolling op #8809

Merged
merged 40 commits into from
Sep 8, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
7cc6630
initial
isVoid Jul 21, 2021
6f64691
Compiles
isVoid Jul 21, 2021
2ab88fe
style
isVoid Jul 22, 2021
2f63568
clean up
isVoid Jul 23, 2021
16cb7a8
clean up
isVoid Jul 23, 2021
ddd59f0
header cleanup
isVoid Jul 23, 2021
bcd00f0
.
isVoid Jul 23, 2021
da8b755
More cleanup
isVoid Jul 23, 2021
3ff0d8d
revert ptx changes
isVoid Jul 23, 2021
fc00d8f
Static tests
isVoid Jul 23, 2021
1259df6
undo python changes
isVoid Jul 23, 2021
3539e1b
.
isVoid Jul 23, 2021
07aa54d
docs
isVoid Jul 23, 2021
d2a6407
Merge branch 'branch-21.10' of https://github.com/rapidsai/cudf into …
isVoid Jul 25, 2021
885d66e
remove count==1 restriction
isVoid Jul 25, 2021
68ab4ae
add ddof tests
isVoid Jul 25, 2021
1e1c8cd
docfix
isVoid Jul 26, 2021
1288a56
fixed_point fix
isVoid Jul 26, 2021
3a9e589
docs
isVoid Jul 26, 2021
490539c
docfix
isVoid Jul 26, 2021
394f0f0
docfix
isVoid Jul 27, 2021
bc0920b
remove thrust::reduce
isVoid Jul 27, 2021
934f104
.
isVoid Jul 27, 2021
b215199
Update cpp/src/rolling/rolling_detail.cuh
isVoid Jul 29, 2021
a3af3e9
Update cpp/src/rolling/rolling_detail.cuh
isVoid Jul 29, 2021
ae33de0
Update cpp/src/rolling/rolling_detail.cuh
isVoid Jul 29, 2021
645a172
address review comments
isVoid Jul 29, 2021
d94b8db
Apply suggestions from code review
isVoid Jul 29, 2021
0fe4a87
Add nan tests
isVoid Jul 30, 2021
9ce41ae
Remove auto generated column test
isVoid Jul 30, 2021
b62a40d
Merge branch 'branch-21.10' of https://github.com/rapidsai/cudf into …
isVoid Aug 19, 2021
43df716
count==0 case maps to invalid output.
isVoid Aug 21, 2021
d62eb00
Apply review comments: div by zero result is valid element
isVoid Aug 25, 2021
0b78ab3
Update cpp/src/rolling/rolling_detail.cuh
isVoid Aug 25, 2021
e3f89df
ddof > count situation is valid but nan
isVoid Aug 30, 2021
a504017
Merge branch 'rolling_std' of github.com:isVoid/cudf into rolling_std
isVoid Aug 30, 2021
d3cedb1
make operator constant
isVoid Aug 31, 2021
75e8140
header cleanup
isVoid Sep 1, 2021
d5359ba
Update cpp/tests/rolling/rolling_test.cpp
isVoid Sep 1, 2021
a14d77e
style
isVoid Sep 1, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions cpp/src/rolling/rolling_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,12 @@ struct DeviceRollingVariance {
[&input](auto i) { return input.is_valid_nocheck(i); })
: end_index - start_index;

// The denominator of the variance is `count - ddof`, it is strictly positive
// to gaurantee that variance is non-negative.
bool output_is_valid = count > 0 and (count >= min_periods) and (ddof < count);
// Result is null in one of the following cases:
isVoid marked this conversation as resolved.
Show resolved Hide resolved
// - All inputs are null
// - Number of valid inputs is less than `min_periods`
// - Result is negative
// When `ddof == count`, the result is valid with div by zero values (inf or nan)
bool output_is_valid = count > 0 and (count >= min_periods) and (ddof <= count);

if (output_is_valid) {
// Welford algorithm
Expand Down
97 changes: 15 additions & 82 deletions cpp/tests/rolling/rolling_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -623,99 +623,30 @@ TYPED_TEST(RollingVarStdTest, SimpleStaticVarianceStd)

using ResultType = double;

size_type const ddof = 1, min_periods = 0, preceding_window = 2, following_window = 1;

auto const col_data =
cudf::test::make_type_param_vector<TypeParam>({XXX, XXX, 9, 5, XXX, XXX, XXX, 0, 8, 5, 8});
const std::vector<bool> col_mask = {0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1};
double const nan = std::numeric_limits<double>::signaling_NaN();

auto const expected_var =
cudf::is_boolean<TypeParam>()
? std::vector<ResultType>{XXX, XXX, 0, 0, XXX, XXX, XXX, 0.5, 0.3333333333333333, 0, 0}
: std::vector<ResultType>{XXX, XXX, 8, 8, XXX, XXX, XXX, 32, 16.33333333333333, 3, 4.5};
std::vector<ResultType> expected_std(expected_var.size());
std::transform(expected_var.begin(), expected_var.end(), expected_std.begin(), [](auto const& x) {
return std::sqrt(x);
});

const std::vector<bool> expected_mask = {0, /* all null window */
0, /* count == ddof */
1,
1,
0, /* count == ddof */
0, /* all null window */
0, /* count == ddof */
1,
1,
1,
1};

fixed_width_column_wrapper<TypeParam> input(col_data.begin(), col_data.end(), col_mask.begin());
fixed_width_column_wrapper<ResultType> var_expect(
expected_var.begin(), expected_var.end(), expected_mask.begin());
fixed_width_column_wrapper<ResultType> std_expect(
expected_std.begin(), expected_std.end(), expected_mask.begin());

std::unique_ptr<cudf::column> var_result, std_result;
// static sizes
EXPECT_NO_THROW(var_result = cudf::rolling_window(input,
preceding_window,
following_window,
min_periods,
dynamic_cast<cudf::rolling_aggregation const&>(
*cudf::make_variance_aggregation(ddof))););
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*var_result, var_expect);

EXPECT_NO_THROW(std_result = cudf::rolling_window(input,
preceding_window,
following_window,
min_periods,
dynamic_cast<cudf::rolling_aggregation const&>(
*cudf::make_std_aggregation(ddof))););
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*std_result, std_expect);

#undef XXX
}

TYPED_TEST(RollingVarStdTest, SimpleStaticVarianceStdNegativeDDOF)
{
#define XXX 0 // NULL stub

using ResultType = double;

size_type const ddof = -1, min_periods = 0, preceding_window = 2, following_window = 1;
size_type const ddof = 1, min_periods = 0, preceding_window = 2, following_window = 1;

auto const col_data =
cudf::test::make_type_param_vector<TypeParam>({XXX, XXX, 9, 5, XXX, XXX, XXX, 0, 8, 5, 8});
const std::vector<bool> col_mask = {0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1};

auto const expected_var =
cudf::is_boolean<TypeParam>()
? std::vector<
ResultType>{XXX, 0, 0, 0, 0, XXX, 0, 0.16666666666666667, 0.1666666666666667, 0, 0}
: std::vector<ResultType>{XXX,
0,
2.666666666666667,
2.666666666666667,
0,
XXX,
0,
10.666666666666667,
8.166666666666667,
1.5,
1.5};
? std::vector<ResultType>{XXX, nan, 0, 0, nan, XXX, nan, 0.5, 0.3333333333333333, 0, 0}
: std::vector<ResultType>{XXX, nan, 8, 8, nan, XXX, nan, 32, 16.33333333333333, 3, 4.5};
std::vector<ResultType> expected_std(expected_var.size());
std::transform(expected_var.begin(), expected_var.end(), expected_std.begin(), [](auto const& x) {
return std::sqrt(x);
});

const std::vector<bool> expected_mask = {0, /* all null window */
1, /* 0 div 0, nan */
1,
1,
1,
1,
1, /* 0 div 0, nan */
0, /* all null window */
1,
1, /* 0 div 0, nan */
1,
1,
1,
Expand Down Expand Up @@ -763,13 +694,13 @@ TEST_F(RollingtVarStdTestUntyped, SimpleStaticVarianceStdInfNaN)
const std::vector<bool> col_mask = {1, 1, 0, 1, 1, 1, 1, 1, 0, 1};

auto const expected_var =
std::vector<ResultType>{XXX, 0.5, 0.5, nan, nan, nan, 16, nan, nan, nan};
std::vector<ResultType>{nan, 0.5, 0.5, nan, nan, nan, 16, nan, nan, nan};
std::vector<ResultType> expected_std(expected_var.size());
std::transform(expected_var.begin(), expected_var.end(), expected_std.begin(), [](auto const& x) {
return std::sqrt(x);
});

const std::vector<bool> expected_mask = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1};
const std::vector<bool> expected_mask = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};

fixed_width_column_wrapper<double> input(col_data.begin(), col_data.end(), col_mask.begin());
fixed_width_column_wrapper<ResultType> var_expect(
Expand Down Expand Up @@ -1323,14 +1254,16 @@ TYPED_TEST(FixedPointTests, VarStd)
using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
using fw_wrapper = cudf::test::fixed_width_column_wrapper<double>;

size_type preceding_window{2}, following_window{0}, min_periods{1}, ddof{1};
double const inf = std::numeric_limits<double>::infinity();
size_type preceding_window{3}, following_window{0}, min_periods{1}, ddof{2};

// The variance of `input` given `scale` == 0
std::vector<double> result_base_v{-1, 1422984.5, 1401138.0, 1352.0, 2.0, 0.5};
std::vector<double> result_base_v{
-1, inf, 1882804.66666666667, 1928018.666666666667, 1874.6666666666667, 2.0};
std::vector<bool> result_mask_v{0, 1, 1, 1, 1, 1};

// var tests
for (int32_t s = -5; s < 5; s++) {
for (int32_t s = -2; s <= 2; s++) {
auto const scale = scale_type{s};
auto const input = fp_wrapper{{42, 1729, 55, 3, 1, 2}, {1, 1, 1, 1, 1, 1}, scale};

Expand All @@ -1353,7 +1286,7 @@ TYPED_TEST(FixedPointTests, VarStd)
}

// std tests
for (int32_t s = -5; s < 5; s++) {
for (int32_t s = -2; s <= 2; s++) {
auto const scale = scale_type{s};
auto const input = fp_wrapper{{42, 1729, 55, 3, 1, 2}, {1, 1, 1, 1, 1, 1}, scale};

Expand Down