Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update make_lists_column_from_scalar to use make_offsets_child_column utility #13841

Merged
merged 5 commits into from
Aug 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 8 additions & 19 deletions cpp/include/cudf/lists/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/sizes_to_offsets_iterator.cuh>
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/utilities/bit.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand All @@ -28,7 +29,6 @@
#include <thrust/functional.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/transform.h>
#include <thrust/transform_scan.h>

namespace cudf {
namespace lists {
Expand Down Expand Up @@ -74,25 +74,15 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,
{
// size of the gather map is the # of output rows
size_type output_count = gather_map_size;
size_type offset_count = output_count + 1;

// offsets of the source column
int32_t const* src_offsets{source_column.offsets().data<int32_t>() + source_column.offset()};
size_type const src_size = source_column.size();

// outgoing offsets. these will persist as output from the entire gather operation
auto dst_offsets_c = cudf::make_fixed_width_column(
data_type{type_id::INT32}, offset_count, mask_state::UNALLOCATED, stream, mr);
mutable_column_view dst_offsets_v = dst_offsets_c->mutable_view();
auto const source_column_nullmask = source_column.null_mask();

// generate the compacted outgoing offsets.
auto count_iter = thrust::make_counting_iterator<int32_t>(0);
thrust::transform_exclusive_scan(
rmm::exec_policy_nosync(stream),
count_iter,
count_iter + offset_count,
dst_offsets_v.begin<int32_t>(),
auto sizes_itr = cudf::detail::make_counting_transform_iterator(
0,
[source_column_nullmask,
source_column_offset = source_column.offset(),
gather_map,
Expand All @@ -112,9 +102,10 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,

// the length of this list
return src_offsets[offset_index + 1] - src_offsets[offset_index];
},
0,
thrust::plus<int32_t>());
});

auto [dst_offsets_c, map_size] =
cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + output_count, stream, mr);

// handle sliced columns
size_type const shift =
Expand Down Expand Up @@ -147,9 +138,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,
});

// Retrieve size of the resulting gather map for level N+1 (the last offset)
size_type child_gather_map_size =
cudf::detail::get_value<size_type>(dst_offsets_c->view(), output_count, stream);

auto const child_gather_map_size = static_cast<size_type>(map_size);
return {std::move(dst_offsets_c), std::move(base_offsets), child_gather_map_size};
}

Expand Down
23 changes: 8 additions & 15 deletions cpp/src/lists/combine/concatenate_list_elements.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@
#include <cudf/detail/copy.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/sizes_to_offsets_iterator.cuh>
#include <cudf/detail/valid_if.cuh>
#include <cudf/lists/combine.hpp>
#include <cudf/lists/lists_column_view.hpp>
Expand Down Expand Up @@ -120,36 +122,28 @@ generate_list_offsets_and_validities(column_view const& input,
{
auto const num_rows = input.size();

auto out_offsets = make_numeric_column(
data_type{type_to_id<size_type>()}, num_rows + 1, mask_state::UNALLOCATED, stream, mr);

auto const lists_of_lists_dv_ptr = column_device_view::create(input, stream);
auto const lists_dv_ptr = column_device_view::create(lists_column_view(input).child(), stream);
auto const d_out_offsets = out_offsets->mutable_view().template begin<size_type>();
auto const d_row_offsets = lists_column_view(input).offsets_begin();
auto const d_list_offsets = lists_column_view(lists_column_view(input).child()).offsets_begin();

// The array of int8_t stores validities for the output list elements.
auto validities = rmm::device_uvector<int8_t>(num_rows, stream);

// Compute output list sizes and validities.
auto const iter = thrust::make_counting_iterator<size_type>(0);
thrust::transform(
rmm::exec_policy(stream),
iter,
iter + num_rows,
d_out_offsets,
auto sizes_itr = cudf::detail::make_counting_transform_iterator(
0,
[lists_of_lists_dv = *lists_of_lists_dv_ptr,
lists_dv = *lists_dv_ptr,
d_row_offsets,
d_list_offsets,
d_validities = validities.begin(),
iter] __device__(auto const idx) {
d_validities = validities.begin()] __device__(auto const idx) {
if (d_row_offsets[idx] == d_row_offsets[idx + 1]) { // This is a null/empty row.
d_validities[idx] = static_cast<int8_t>(lists_of_lists_dv.is_valid(idx));
return size_type{0};
}
// The output row will not be null only if all lists on the input row are not null.
auto const iter = thrust::make_counting_iterator<size_type>(0);
auto const is_valid =
thrust::all_of(thrust::seq,
iter + d_row_offsets[idx],
Expand All @@ -161,10 +155,9 @@ generate_list_offsets_and_validities(column_view const& input,
// Compute size of the output list as sum of sizes of all lists in the current input row.
return d_list_offsets[d_row_offsets[idx + 1]] - d_list_offsets[d_row_offsets[idx]];
});

// Compute offsets from sizes.
thrust::exclusive_scan(
rmm::exec_policy(stream), d_out_offsets, d_out_offsets + num_rows + 1, d_out_offsets);
auto out_offsets = std::get<0>(
cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + num_rows, stream, mr));

return {std::move(out_offsets), std::move(validities)};
}
Expand Down
12 changes: 4 additions & 8 deletions cpp/src/lists/lists_column_factories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cudf/column/column_view.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/detail/sizes_to_offsets_iterator.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/lists/detail/lists_column_factories.hpp>

Expand Down Expand Up @@ -49,14 +50,9 @@ std::unique_ptr<cudf::column> make_lists_column_from_scalar(list_scalar const& v
auto mr_final = size == 1 ? mr : rmm::mr::get_current_device_resource();

// Handcraft a 1-row column
auto offsets = make_numeric_column(
data_type{type_to_id<size_type>()}, 2, mask_state::UNALLOCATED, stream, mr_final);
auto m_offsets = offsets->mutable_view();
thrust::sequence(rmm::exec_policy(stream),
m_offsets.begin<size_type>(),
m_offsets.end<size_type>(),
0,
value.view().size());
auto sizes_itr = thrust::constant_iterator<size_type>(value.view().size());
auto offsets = std::get<0>(
cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + 1, stream, mr_final));
size_type null_count = value.is_valid(stream) ? 0 : 1;
auto null_mask_state = null_count ? mask_state::ALL_NULL : mask_state::UNALLOCATED;
auto null_mask = cudf::detail::create_null_mask(1, null_mask_state, stream, mr_final);
Expand Down
10 changes: 10 additions & 0 deletions cpp/tests/column/factories_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -819,3 +819,13 @@ void struct_from_scalar(bool is_valid)
TEST_F(ColumnFactoryTest, FromStructScalar) { struct_from_scalar(true); }

TEST_F(ColumnFactoryTest, FromStructScalarNull) { struct_from_scalar(false); }

TEST_F(ColumnFactoryTest, FromScalarErrors)
{
cudf::string_scalar ss("hello world");
EXPECT_THROW(cudf::make_column_from_scalar(ss, 214748365), std::overflow_error);

using FCW = cudf::test::fixed_width_column_wrapper<int8_t>;
auto s = cudf::make_list_scalar(FCW({1, 2, 3, 4, 5, 6, 7, 8, 9, 10}));
EXPECT_THROW(cudf::make_column_from_scalar(*s, 214748365), std::overflow_error);
}