-
Notifications
You must be signed in to change notification settings - Fork 919
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Change nullable()
to has_nulls()
in cudf::detail::gather
#14363
Changes from 14 commits
f2971f8
748aaff
ba5667b
f62f6a4
3dce824
25ebec7
2969be7
9dad666
c72ea58
c31c1db
8163a4b
b37e5a3
11b406f
4cdd582
87ce541
04990e3
868f227
f2f392b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -336,6 +336,23 @@ inline bool has_nested_nulls(table_view const& input) | |
}); | ||
} | ||
|
||
/** | ||
* @brief Returns True if the table has a nullable column at any level of the column hierarchy | ||
* | ||
* @param input The table to check for nullable columns | ||
* @return True if the table has nullable columns at any level of the column hierarchy, false | ||
* otherwise | ||
*/ | ||
inline bool has_nested_nullable_columns(table_view const& input) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same with this implementation. This could be moved to https://github.com/rapidsai/cudf/blob/branch-23.12/cpp/src/table/table.cpp There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's make an issue for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
{ | ||
return std::any_of(input.begin(), input.end(), [](auto const& col) { | ||
return col.nullable() || | ||
std::any_of(col.child_begin(), col.child_end(), [](auto const& child_col) { | ||
return has_nested_nullable_columns(table_view{{child_col}}); | ||
}); | ||
}); | ||
} | ||
|
||
/** | ||
* @brief The function to collect all nullable columns at all nested levels in a given table. | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,6 +49,7 @@ struct calculate_quantile_fn { | |
double const* d_quantiles; | ||
size_type num_quantiles; | ||
interpolation interpolation; | ||
size_type* null_count; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems to be an unrelated change to this PR so ideally it should be in a separate PR. But I'm fine to keep this here but please clarify that in the PR description. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
__device__ void operator()(size_type i) | ||
{ | ||
|
@@ -68,11 +69,13 @@ struct calculate_quantile_fn { | |
thrust::for_each_n(thrust::seq, | ||
thrust::make_counting_iterator(0), | ||
num_quantiles, | ||
[d_result = d_result, segment_size, offset](size_type j) { | ||
if (segment_size == 0) | ||
[d_result = d_result, segment_size, offset, this](size_type j) { | ||
if (segment_size == 0) { | ||
d_result.set_null(offset + j); | ||
else | ||
atomicAdd(this->null_count, 1); | ||
bdice marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} else { | ||
d_result.set_valid(offset + j); | ||
} | ||
}); | ||
} | ||
}; | ||
|
@@ -104,6 +107,7 @@ struct quantiles_functor { | |
auto values_view = column_device_view::create(values, stream); | ||
auto group_size_view = column_device_view::create(group_sizes, stream); | ||
auto result_view = mutable_column_device_view::create(result->mutable_view(), stream); | ||
auto null_count = rmm::device_scalar<cudf::size_type>(0, stream, mr); | ||
|
||
// For each group, calculate quantile | ||
if (!cudf::is_dictionary(values.type())) { | ||
|
@@ -118,7 +122,8 @@ struct quantiles_functor { | |
group_offsets.data(), | ||
quantile.data(), | ||
static_cast<size_type>(quantile.size()), | ||
interpolation}); | ||
interpolation, | ||
null_count.data()}); | ||
} else { | ||
auto values_iter = cudf::dictionary::detail::make_dictionary_iterator<T>(*values_view); | ||
thrust::for_each_n(rmm::exec_policy(stream), | ||
|
@@ -131,9 +136,11 @@ struct quantiles_functor { | |
group_offsets.data(), | ||
quantile.data(), | ||
static_cast<size_type>(quantile.size()), | ||
interpolation}); | ||
interpolation, | ||
null_count.data()}); | ||
} | ||
|
||
result->set_null_count(null_count.value(stream)); | ||
return result; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this use
&&
instead? Because if we indeed don't have any nulls here then we don't need to callgather_bitmask
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably I misunderstood the usage of this variable. So this variable should be called
need_new_bitmask
or so. It should not behas_nulls
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But we need to call
gather_bitmask
ifout_of_bounds_policy::NULLIFY
.gather_bitmask
will help nullify any OOB accesses.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, so
||
is indeed needed, but please rename that variable.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done