Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Groupby.shift c++ API refactor and python binding #8131

Merged
merged 26 commits into from
May 26, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
0c2d348
vector of scalars, new tests
isVoid Apr 30, 2021
831eb9a
Expand offset to column respective
isVoid Apr 30, 2021
1453728
groupby shift doc update
isVoid Apr 30, 2021
fdef55d
cython wrapper
isVoid Apr 30, 2021
9494d13
more cython binding
isVoid Apr 30, 2021
6101d92
More cython bindings
isVoid May 1, 2021
ee8a766
python bindings, tests
isVoid May 1, 2021
4f69826
Reformat docstring
isVoid May 4, 2021
0aa45b3
Reduce test cases
isVoid May 4, 2021
95e0bce
Use host_span
isVoid May 4, 2021
a230fd3
mixed_numerics
isVoid May 4, 2021
d712496
docstrings update
isVoid May 4, 2021
1c53e3e
.
isVoid May 4, 2021
ed2a8d0
Merge branch 'branch-0.20' of https://github.com/rapidsai/cudf into g…
isVoid May 6, 2021
f8cf8f4
Better create device scalar
isVoid May 18, 2021
176fea1
Test mismatched fixed_point types
isVoid May 18, 2021
d2095fa
Merge branch 'branch-0.20' of https://github.com/rapidsai/cudf into g…
isVoid May 18, 2021
2b44359
Merge branch 'branch-21.06' of https://github.com/rapidsai/cudf into …
isVoid May 18, 2021
a47209e
Progressing with multi value fill
isVoid May 18, 2021
74aa90f
Fixing failed groupby tests
isVoid May 19, 2021
0e0af0c
Adding multiple fill_values test case
isVoid May 19, 2021
8a1217e
Docstring update for `fill_value`
isVoid May 20, 2021
dd5a2f5
Merge branch 'branch-21.06' of https://github.com/rapidsai/cudf into …
isVoid May 20, 2021
b4e3bc9
Add comments about inserting index behavior
isVoid May 20, 2021
907b61d
Address doc review
isVoid May 24, 2021
eef9a25
Merge branch 'branch-21.06' of https://github.com/rapidsai/cudf into …
isVoid May 24, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/benchmarks/groupby/group_shift_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void BM_group_shift(benchmark::State& state)

for (auto _ : state) {
cuda_event_timer timer(state, true);
auto result = gb_obj.shift(vals, offset, *fill_value);
auto result = gb_obj.shift(cudf::table_view{{vals}}, {offset}, {*fill_value});
}
}

Expand Down
51 changes: 29 additions & 22 deletions cpp/include/cudf/groupby.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,37 +225,44 @@ class groupby {
/**
* @brief Performs grouped shifts for specified values.
*
* For each group, `i`th element is determined by the `i - offset`th element
* of the group. If `i - offset < 0 or >= group_size`, the value is determined by
* @p fill_value.
* In `j`th column, for each group, `i`th element is determined by the `i - offsets[j]`th
* element of the group. If `i - offsets[j] < 0 or >= group_size`, the value is determined by
* @p fill_values[j].
*
* Example:
* @code{.pseudo}
* keys: {1 1 1 1 2 2 2}
* values: {3 1 4 7 9 2 5}
* offset: 2
* fill_value: @
* result: {@ @ 3 1 @ @ 9}
* keys: {{1 2 1 1 2 2 1}}
* values: {{3 9 1 4 2 5 7}, {"a" "c" "bb" "ee" "z" "x" "d"}}
* offset: {2, -1}
* fill_value: {@, @}
* result:
* keys: {{1 1 1 1 2 2 2}}
* values: {{@ @ 3 1 @ @ 9}, {"bb" "ee" "d" @ "z" "x" @}}
*
* -------------------------------------------------
* keys: {1 1 1 1 2 2 2}
* values: {3 1 4 7 9 2 5}
* offset: -2
* fill_value: -1
* result: {4 7 -1 -1 5 -1 -1}
* keys: {{1 2 1 1 2 2 1}}
* values: {{3 9 1 4 2 5 7}, {"a" "c" "bb" "ee" "z" "x" "d"}}
* offset: {-2, 1}
* fill_value: {-1, "42"}
* result:
* keys: {{1 1 1 1 2 2 2}}
* values: {{4 7 -1 -1 5 -1 -1}, {"42", "a", "bb", "ee", "42", "c", "z"}}
*
* @endcode
*
* @param values Column to be shifted
* @param offset The off set by which to shift the input
* @param fill_value Fill value for indeterminable outputs
* @param values Table whose columns to be shifted
* @param offsets The offsets by which to shift the input
* @param fill_values Fill values for indeterminable outputs
* @param mr Device memory resource used to allocate the returned table and columns' device memory
* @return Pair containing the table with each group's key and the column shifted
* @return Pair containing the tables with each group's key and the columns shifted
*
* @throws cudf::logic_error if @p fill_value dtype does not match @p input dtype
* @throws cudf::logic_error if @p fill_value[i] dtype does not match @p values[i] dtype for
* `i`th column
*/
std::pair<std::unique_ptr<table>, std::unique_ptr<column>> shift(
column_view const& values,
size_type offset,
scalar const& fill_value,
std::pair<std::unique_ptr<table>, std::unique_ptr<table>> shift(
table_view const& values,
std::vector<size_type> const& offsets,
isVoid marked this conversation as resolved.
Show resolved Hide resolved
jrhemstad marked this conversation as resolved.
Show resolved Hide resolved
std::vector<std::reference_wrapper<const scalar>> const& fill_values,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down
41 changes: 28 additions & 13 deletions cpp/src/groupby/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <rmm/cuda_stream_view.hpp>

#include <thrust/copy.h>
#include <thrust/iterator/counting_iterator.h>

#include <memory>
#include <utility>
Expand Down Expand Up @@ -193,23 +194,37 @@ detail::sort::sort_groupby_helper& groupby::helper()
return *_helper;
};

std::pair<std::unique_ptr<table>, std::unique_ptr<column>> groupby::shift(
column_view const& values,
size_type offset,
scalar const& fill_value,
std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::shift(
table_view const& values,
std::vector<size_type> const& offsets,
std::vector<std::reference_wrapper<const scalar>> const& fill_values,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
CUDF_EXPECTS(values.type() == fill_value.type(),
"values and fill_value should have the same type.");

auto stream = rmm::cuda_stream_default;
auto grouped_values = helper().grouped_values(values, stream);
CUDF_EXPECTS(values.num_columns() == static_cast<size_type>(fill_values.size()),
"Mismatch number of fill_values and columns.");
CUDF_EXPECTS(
std::all_of(
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(values.num_columns()),
[&](auto i) { return values.column(i).type().id() == fill_values[i].get().type().id(); }),
isVoid marked this conversation as resolved.
Show resolved Hide resolved
"values and fill_value should have the same type.");

auto stream = rmm::cuda_stream_default;
std::vector<std::unique_ptr<column>> results;
auto const& group_offsets = helper().group_offsets(stream);
std::transform(
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(values.num_columns()),
std::back_inserter(results),
[&](size_type i) {
auto grouped_values = helper().grouped_values(values.column(i), stream);
return cudf::detail::segmented_shift(
grouped_values->view(), group_offsets, offsets[i], fill_values[i].get(), stream, mr);
});

return std::make_pair(
helper().sorted_keys(stream, mr),
std::move(cudf::detail::segmented_shift(
grouped_values->view(), helper().group_offsets(stream), offset, fill_value, stream, mr)));
return std::make_pair(helper().sorted_keys(stream, mr),
std::make_unique<cudf::table>(std::move(results)));
}

} // namespace groupby
Expand Down
Loading