Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use make_empty_lists_column instead of make_empty_column(type_id::LIST) #13099

Merged
merged 8 commits into from
Apr 12, 2023
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ outputs:
- test -f $PREFIX/include/cudf/lists/detail/dremel.hpp
- test -f $PREFIX/include/cudf/lists/detail/extract.hpp
- test -f $PREFIX/include/cudf/lists/detail/interleave_columns.hpp
- test -f $PREFIX/include/cudf/lists/detail/lists_column_factories.hpp
- test -f $PREFIX/include/cudf/lists/detail/reverse.hpp
- test -f $PREFIX/include/cudf/lists/detail/scatter_helper.cuh
- test -f $PREFIX/include/cudf/lists/detail/set_operations.hpp
Expand All @@ -205,7 +206,6 @@ outputs:
- test -f $PREFIX/include/cudf/lists/filling.hpp
- test -f $PREFIX/include/cudf/lists/gather.hpp
- test -f $PREFIX/include/cudf/lists/list_view.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_factories.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_view.hpp
- test -f $PREFIX/include/cudf/lists/reverse.hpp
- test -f $PREFIX/include/cudf/lists/set_operations.hpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,19 @@ std::unique_ptr<cudf::column> make_lists_column_from_scalar(list_scalar const& v
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

/**
* @brief Create an empty lists column
*
* A list column requires a child type and so cannot be created with `make_empty_column`.
*
* @param child_type The type used for the empty child column
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
*/
std::unique_ptr<column> make_empty_lists_column(data_type child_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace lists
} // namespace cudf
4 changes: 2 additions & 2 deletions cpp/src/column/column_factories.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,7 +18,7 @@
#include <cudf/detail/fill.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/dictionary/dictionary_factories.hpp>
#include <cudf/lists/lists_column_factories.hpp>
#include <cudf/lists/detail/lists_column_factories.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/fill.hpp>

Expand Down
11 changes: 11 additions & 0 deletions cpp/src/lists/lists_column_factories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cudf/column/column_view.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/lists/detail/lists_column_factories.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -84,6 +85,16 @@ std::unique_ptr<cudf::column> make_lists_column_from_scalar(list_scalar const& v
return std::move(res->release()[0]);
}

std::unique_ptr<column> make_empty_lists_column(data_type child_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto offsets = make_empty_column(data_type(type_to_id<offset_type>()));
auto child = make_empty_column(child_type);
return make_lists_column(
0, std::move(offsets), std::move(child), 0, rmm::device_buffer{}, stream, mr);
}

} // namespace detail
} // namespace lists

Expand Down
11 changes: 1 addition & 10 deletions cpp/src/lists/sequences.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cudf/detail/indexalator.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/sizes_to_offsets_iterator.cuh>
#include <cudf/lists/detail/lists_column_factories.hpp>
#include <cudf/lists/filling.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -127,16 +128,6 @@ struct sequences_functor<T, std::enable_if_t<is_supported<T>()>> {
}
};

std::unique_ptr<column> make_empty_lists_column(data_type child_type,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto offsets = make_empty_column(data_type(type_to_id<offset_type>()));
auto child = make_empty_column(child_type);
return make_lists_column(
0, std::move(offsets), std::move(child), 0, rmm::device_buffer(0, stream, mr), stream, mr);
}

std::unique_ptr<column> sequences(column_view const& starts,
std::optional<column_view> const& steps,
column_view const& sizes,
Expand Down
9 changes: 2 additions & 7 deletions cpp/src/strings/extract/extract_all.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/lists/detail/lists_column_factories.hpp>
#include <cudf/strings/detail/strings_column_factories.cuh>
#include <cudf/strings/extract.hpp>
#include <cudf/strings/string_view.cuh>
Expand Down Expand Up @@ -122,13 +123,7 @@ std::unique_ptr<column> extract_all_record(strings_column_view const& input,

// Return an empty lists column if there are no valid rows
if (strings_count == null_count) {
return make_lists_column(0,
make_empty_column(type_to_id<offset_type>()),
make_empty_column(type_id::STRING),
0,
rmm::device_buffer{},
stream,
mr);
return cudf::lists::detail::make_empty_lists_column(data_type{type_id::STRING}, stream, mr);
}

// Convert counts into offsets.
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/strings/split/split_record.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/lists/detail/lists_column_factories.hpp>
#include <cudf/strings/detail/split_utils.cuh>
#include <cudf/strings/detail/strings_column_factories.cuh>
#include <cudf/strings/split/split.hpp>
Expand All @@ -46,7 +47,9 @@ std::unique_ptr<column> split_record_fn(strings_column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
if (input.is_empty()) { return make_empty_column(type_id::LIST); }
if (input.is_empty()) {
return cudf::lists::detail::make_empty_lists_column(data_type{type_id::STRING}, stream, mr);
}
if (input.size() == input.null_count()) {
auto offsets = std::make_unique<column>(input.offsets(), stream, mr);
auto results = make_empty_column(type_id::STRING);
Expand Down
10 changes: 10 additions & 0 deletions cpp/tests/strings/split_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -693,10 +693,15 @@ TEST_F(StringsSplitTest, SplitZeroSizeStringsColumns)
EXPECT_TRUE(results->num_columns() == 1);
EXPECT_TRUE(results->num_rows() == 0);

auto target = cudf::string_scalar(" ");
auto list_result = cudf::strings::split_record(zero_size_strings_column);
EXPECT_TRUE(list_result->size() == 0);
list_result = cudf::strings::rsplit_record(zero_size_strings_column);
EXPECT_TRUE(list_result->size() == 0);
list_result = cudf::strings::split_record(zero_size_strings_column, target);
EXPECT_TRUE(list_result->size() == 0);
list_result = cudf::strings::rsplit_record(zero_size_strings_column, target);
EXPECT_TRUE(list_result->size() == 0);
list_result = cudf::strings::split_record_re(zero_size_strings_column, *prog);
EXPECT_TRUE(list_result->size() == 0);
list_result = cudf::strings::rsplit_record_re(zero_size_strings_column, *prog);
Expand Down Expand Up @@ -729,12 +734,17 @@ TEST_F(StringsSplitTest, AllNullsCase)
EXPECT_TRUE(results->num_columns() == 1);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->get_column(0).view(), input);

auto target = cudf::string_scalar(" ");
auto list_result = cudf::strings::split_record(sv);
using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
LCW expected({LCW{}, LCW{}, LCW{}}, cudf::test::iterators::all_nulls());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected);
list_result = cudf::strings::rsplit_record(sv);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected);
list_result = cudf::strings::split_record(sv, target);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected);
list_result = cudf::strings::rsplit_record(sv, target);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected);
list_result = cudf::strings::split_record_re(sv, *prog);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(list_result->view(), expected);
list_result = cudf::strings::rsplit_record_re(sv, *prog);
Expand Down