Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor purge_nonempty_nulls #12111

Merged
merged 9 commits into from
Nov 16, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 14 additions & 51 deletions cpp/include/cudf/copying.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1012,12 +1012,19 @@ bool has_nonempty_nulls(column_view const& input);
bool may_have_nonempty_nulls(column_view const& input);

/**
* @brief Copies `input`, purging any non-empty null rows in the column or its descendants
* @brief Copy `input` into output while purging any non-empty null rows in the column or its
* descendants.
*
* LIST columns may have non-empty null rows.
* For example:
* @code{.pseudo}
* If the input column is not of compound type (LIST/STRING/STRUCT/DICTIONARY), the output will be
* the same as input.
*
* The purge operation only applies directly to LIST and STRING columns, but it applies indirectly
* to STRUCT/DICTIONARY columns as well, since these columns may have child/descendant columns that
ttnghia marked this conversation as resolved.
Show resolved Hide resolved
* are LIST or STRING.
*
* Examples:
*
* @code{.pseudo}
* auto const lists = lists_column_wrapper<int32_t>{ {0,1}, {2,3}, {4,5} }.release();
* cudf::detail::set_null_mask(lists->null_mask(), 1, 2, false);
*
Expand All @@ -1027,33 +1034,13 @@ bool may_have_nonempty_nulls(column_view const& input);
* Offsets: [0, 2, 4, 6]
* Child: [0, 1, 2, 3, 4, 5]
*
* After purging the contents of the list's null rows, the column's contents
* will be:
* After purging the contents of the list's null rows, the column's contents will be:
* Validity: 101
* Offsets: [0, 2, 2, 4]
* Child: [0, 1, 4, 5]
* @endcode
*
* The purge operation only applies directly to LIST and STRING columns, but it
* applies indirectly to STRUCT columns as well, since LIST and STRUCT columns
* may have child/descendant columns that are LIST or STRING.
*
* @param input The column whose null rows are to be checked and purged
* @param mr Device memory resource used to allocate the returned column's device memory
* @return std::unique_ptr<column> Column with equivalent contents to `input`, but with
* the contents of null rows purged
*/
std::unique_ptr<column> purge_nonempty_nulls(
lists_column_view const& input,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Copies `input`, purging any non-empty null rows in the column or its descendants
*
* STRING columns may have non-empty null rows.
* For example:
* @code{.pseudo}
*
* auto const strings = strings_column_wrapper{ "AB", "CD", "EF" }.release();
* cudf::detail::set_null_mask(strings->null_mask(), 1, 2, false);
*
Expand All @@ -1070,26 +1057,7 @@ std::unique_ptr<column> purge_nonempty_nulls(
* Child: [A, B, E, F]
* @endcode
*
* The purge operation only applies directly to LIST and STRING columns, but it
* applies indirectly to STRUCT columns as well, since LIST and STRUCT columns
* may have child/descendant columns that are LIST or STRING.
*
* @param input The column whose null rows are to be checked and purged
* @param mr Device memory resource used to allocate the returned column's device memory
* @return std::unique_ptr<column> Column with equivalent contents to `input`, but with
* the contents of null rows purged
*/
std::unique_ptr<column> purge_nonempty_nulls(
strings_column_view const& input,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Copies `input`, purging any non-empty null rows in the column or its descendants
*
* STRUCTS columns may have null rows, with non-empty child rows.
* For example:
* @code{.pseudo}
*
* auto const lists = lists_column_wrapper<int32_t>{ {0,1}, {2,3}, {4,5} };
* auto const structs = structs_column_wrapper{ {lists}, null_at(1) };
*
Expand All @@ -1106,17 +1074,12 @@ std::unique_ptr<column> purge_nonempty_nulls(
* Child: [0, 1, 4, 5]
* @endcode
*
* The purge operation only applies directly to LIST and STRING columns, but it
* applies indirectly to STRUCT columns as well, since LIST and STRUCT columns
* may have child/descendant columns that are LIST or STRING.
*
* @param input The column whose null rows are to be checked and purged
* @param mr Device memory resource used to allocate the returned column's device memory
* @return std::unique_ptr<column> Column with equivalent contents to `input`, but with
* the contents of null rows purged
* @return A new column with equivalent contents to `input`, but with null rows purged
*/
std::unique_ptr<column> purge_nonempty_nulls(
structs_column_view const& input,
column_view const& input,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */
Expand Down
50 changes: 0 additions & 50 deletions cpp/include/cudf/detail/copy.cuh

This file was deleted.

10 changes: 10 additions & 0 deletions cpp/include/cudf/detail/copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,5 +315,15 @@ bool has_nonempty_nulls(column_view const& input, rmm::cuda_stream_view stream);
*/
bool may_have_nonempty_nulls(column_view const& input, rmm::cuda_stream_view stream);

/**
* @copydoc cudf::purge_nonempty_nulls
*
* @param stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> purge_nonempty_nulls(
column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace detail
} // namespace cudf
42 changes: 21 additions & 21 deletions cpp/src/copying/purge_nonempty_nulls.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/
#include <cudf/copying.hpp>
#include <cudf/detail/copy.cuh>
#include <cudf/detail/gather.cuh>
#include <cudf/utilities/default_stream.hpp>

#include <thrust/count.h>
Expand Down Expand Up @@ -80,6 +80,24 @@ bool has_nonempty_nulls(cudf::column_view const& input, rmm::cuda_stream_view st

return false;
}

std::unique_ptr<column> purge_nonempty_nulls(column_view const& input,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is moved from the deleted file copy.cuh.

rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// If not compound types (LIST/STRING/STRUCT/DICTIONARY) then just copy the input into output.
if (!cudf::is_compound(input.type())) { return std::make_unique<column>(input, stream, mr); }

// Implement via identity gather.
auto gathered_table = cudf::detail::gather(table_view{{input}},
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(input.size()),
out_of_bounds_policy::DONT_CHECK,
stream,
mr);
return std::move(gathered_table->release().front());
}

} // namespace detail

/**
Expand Down Expand Up @@ -110,27 +128,9 @@ bool has_nonempty_nulls(column_view const& input)
}

/**
* @copydoc cudf::purge_nonempty_nulls(lists_column_view const&, rmm::mr::device_memory_resource*)
*/
std::unique_ptr<cudf::column> purge_nonempty_nulls(lists_column_view const& input,
rmm::mr::device_memory_resource* mr)
{
return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr);
}

/**
* @copydoc cudf::purge_nonempty_nulls(structs_column_view const&, rmm::mr::device_memory_resource*)
*/
std::unique_ptr<cudf::column> purge_nonempty_nulls(structs_column_view const& input,
rmm::mr::device_memory_resource* mr)
{
return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr);
}

/**
* @copydoc cudf::purge_nonempty_nulls(strings_column_view const&, rmm::mr::device_memory_resource*)
* @copydoc cudf::purge_nonempty_nulls(column_view const&, rmm::mr::device_memory_resource*)
*/
std::unique_ptr<cudf::column> purge_nonempty_nulls(strings_column_view const& input,
std::unique_ptr<cudf::column> purge_nonempty_nulls(column_view const& input,
rmm::mr::device_memory_resource* mr)
{
return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr);
Expand Down
12 changes: 5 additions & 7 deletions cpp/src/lists/set_operations.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include "utilities.hpp"

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/copy.cuh>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/copy_if.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
Expand Down Expand Up @@ -176,9 +176,8 @@ std::unique_ptr<column> intersect_distinct(lists_column_view const& lhs,
stream,
mr);

return null_count == 0
? std::move(output)
: cudf::detail::purge_nonempty_nulls(lists_column_view{output->view()}, stream, mr);
return null_count == 0 ? std::move(output)
: cudf::detail::purge_nonempty_nulls(output->view(), stream, mr);
}

std::unique_ptr<column> union_distinct(lists_column_view const& lhs,
Expand Down Expand Up @@ -253,9 +252,8 @@ std::unique_ptr<column> difference_distinct(lists_column_view const& lhs,
stream,
mr);

return null_count == 0
? std::move(output)
: cudf::detail::purge_nonempty_nulls(lists_column_view{output->view()}, stream, mr);
return null_count == 0 ? std::move(output)
: cudf::detail::purge_nonempty_nulls(output->view(), stream, mr);
}

} // namespace detail
Expand Down
Loading