Skip to content

Commit

Permalink
First class support for unbounded window function bounds (#6811)
Browse files Browse the repository at this point in the history
1. Fixed time-range functions.
2. JNI support for UNBOUNDED time-range functions
3. Extend unbounded windows for use with
   row-based window functions
4. CHANGELOG update, + code formatting
5. Review: Parentheses around index math
6. Fixed WindowOptions.hashCode().
7. Added detailed unbounded window tests.
8. More JNI tests, for all combinations.
  • Loading branch information
mythrocks authored Nov 24, 2020
1 parent 17666c4 commit 0e7ffcf
Show file tree
Hide file tree
Showing 8 changed files with 1,538 additions and 91 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
- PR #6652 Add support for struct columns in concatenate
- PR #6675 Add DecimalDtype to cuDF
- PR #6739 Add Java bindings for is_timestamp
- PR #6811 First class support for unbounded window function bounds
- PR #6768 Add support for scatter() on list columns
- PR #6796 Add create_metadata_file in dask_cudf
- PR #6765 Cupy fallback for __array_function__ and __array_ufunc__ for cudf.Series
Expand Down
99 changes: 99 additions & 0 deletions cpp/include/cudf/rolling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,42 @@ std::unique_ptr<column> rolling_window(
std::unique_ptr<aggregation> const& agg,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Abstraction for window boundary sizes
*/
struct window_bounds {
public:
/**
* @brief Construct bounded window boundary.
*
* @param value Finite window boundary (in days or rows)
*/
static window_bounds get(size_type value) { return window_bounds(false, value); }

/**
* @brief Construct unbounded window boundary.
*
* @return window_bounds
*/
static window_bounds unbounded()
{
return window_bounds(true, std::numeric_limits<cudf::size_type>::max());
}

// TODO: In the future, add units for bounds.
// E.g. {value=1, unit=DAYS, unbounded=false}
// For the present, assume units from context:
// 1. For time-based window functions, assume DAYS as before
// 2. For all else, assume ROWS as before.
const bool is_unbounded;
const size_type value;

private:
explicit window_bounds(bool is_unbounded_, size_type value_ = 0)
: is_unbounded{is_unbounded_}, value{value_}
{
}
};
/**
* @brief Applies a grouping-aware, fixed-size rolling window function to the values in a column.
*
Expand Down Expand Up @@ -162,6 +198,25 @@ std::unique_ptr<column> grouped_rolling_window(
std::unique_ptr<aggregation> const& aggr,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc std::unique_ptr<column> grouped_rolling_window(
* table_view const& group_keys,
* column_view const& input,
* size_type preceding_window,
* size_type following_window,
* size_type min_periods,
* std::unique_ptr<aggregation> const& aggr,
* rmm::mr::device_memory_resource* mr)
*/
std::unique_ptr<column> grouped_rolling_window(
table_view const& group_keys,
column_view const& input,
window_bounds preceding_window,
window_bounds following_window,
size_type min_periods,
std::unique_ptr<aggregation> const& aggr,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc std::unique_ptr<column> grouped_rolling_window(
* table_view const& group_keys,
Expand All @@ -186,6 +241,27 @@ std::unique_ptr<column> grouped_rolling_window(
std::unique_ptr<aggregation> const& aggr,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc std::unique_ptr<column> grouped_rolling_window(
* table_view const& group_keys,
* column_view const& input,
* column_view const& default_outputs,
* size_type preceding_window,
* size_type following_window,
* size_type min_periods,
* std::unique_ptr<aggregation> const& aggr,
* rmm::mr::device_memory_resource* mr)
*/
std::unique_ptr<column> grouped_rolling_window(
table_view const& group_keys,
column_view const& input,
column_view const& default_outputs,
window_bounds preceding_window,
window_bounds following_window,
size_type min_periods,
std::unique_ptr<aggregation> const& aggr,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Applies a grouping-aware, timestamp-based rolling window function to the values in a
*column.
Expand Down Expand Up @@ -280,6 +356,29 @@ std::unique_ptr<column> grouped_time_range_rolling_window(
std::unique_ptr<aggregation> const& aggr,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc std::unique_ptr<column> grouped_time_range_rolling_window(
* table_view const& group_keys,
* column_view const& timestamp_column,
* cudf::order const& timestamp_order,
* column_view const& input,
* size_type preceding_window_in_days,
* size_type following_window_in_days,
* size_type min_periods,
* std::unique_ptr<aggregation> const& aggr,
* rmm::mr::device_memory_resource* mr)
*/
std::unique_ptr<column> grouped_time_range_rolling_window(
table_view const& group_keys,
column_view const& timestamp_column,
cudf::order const& timestamp_order,
column_view const& input,
window_bounds preceding_window_in_days,
window_bounds following_window_in_days,
size_type min_periods,
std::unique_ptr<aggregation> const& aggr,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Applies a variable-size rolling window function to the values in a column.
*
Expand Down
Loading

0 comments on commit 0e7ffcf

Please sign in to comment.