diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh index 83710a49f6a..18fe707fd69 100644 --- a/cpp/include/cudf/lists/detail/gather.cuh +++ b/cpp/include/cudf/lists/detail/gather.cuh @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -28,7 +29,6 @@ #include #include #include -#include namespace cudf { namespace lists { @@ -74,25 +74,15 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, { // size of the gather map is the # of output rows size_type output_count = gather_map_size; - size_type offset_count = output_count + 1; // offsets of the source column int32_t const* src_offsets{source_column.offsets().data() + source_column.offset()}; size_type const src_size = source_column.size(); - // outgoing offsets. these will persist as output from the entire gather operation - auto dst_offsets_c = cudf::make_fixed_width_column( - data_type{type_id::INT32}, offset_count, mask_state::UNALLOCATED, stream, mr); - mutable_column_view dst_offsets_v = dst_offsets_c->mutable_view(); auto const source_column_nullmask = source_column.null_mask(); - // generate the compacted outgoing offsets. - auto count_iter = thrust::make_counting_iterator(0); - thrust::transform_exclusive_scan( - rmm::exec_policy_nosync(stream), - count_iter, - count_iter + offset_count, - dst_offsets_v.begin(), + auto sizes_itr = cudf::detail::make_counting_transform_iterator( + 0, [source_column_nullmask, source_column_offset = source_column.offset(), gather_map, @@ -112,9 +102,10 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, // the length of this list return src_offsets[offset_index + 1] - src_offsets[offset_index]; - }, - 0, - thrust::plus()); + }); + + auto [dst_offsets_c, map_size] = + cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + output_count, stream, mr); // handle sliced columns size_type const shift = @@ -147,9 +138,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column, }); // Retrieve size of the resulting gather map for level N+1 (the last offset) - size_type child_gather_map_size = - cudf::detail::get_value(dst_offsets_c->view(), output_count, stream); - + auto const child_gather_map_size = static_cast(map_size); return {std::move(dst_offsets_c), std::move(base_offsets), child_gather_map_size}; } diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu index 3b00d7bd26e..fbe297765f8 100644 --- a/cpp/src/lists/combine/concatenate_list_elements.cu +++ b/cpp/src/lists/combine/concatenate_list_elements.cu @@ -19,7 +19,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -120,12 +122,8 @@ generate_list_offsets_and_validities(column_view const& input, { auto const num_rows = input.size(); - auto out_offsets = make_numeric_column( - data_type{type_to_id()}, num_rows + 1, mask_state::UNALLOCATED, stream, mr); - auto const lists_of_lists_dv_ptr = column_device_view::create(input, stream); auto const lists_dv_ptr = column_device_view::create(lists_column_view(input).child(), stream); - auto const d_out_offsets = out_offsets->mutable_view().template begin(); auto const d_row_offsets = lists_column_view(input).offsets_begin(); auto const d_list_offsets = lists_column_view(lists_column_view(input).child()).offsets_begin(); @@ -133,23 +131,19 @@ generate_list_offsets_and_validities(column_view const& input, auto validities = rmm::device_uvector(num_rows, stream); // Compute output list sizes and validities. - auto const iter = thrust::make_counting_iterator(0); - thrust::transform( - rmm::exec_policy(stream), - iter, - iter + num_rows, - d_out_offsets, + auto sizes_itr = cudf::detail::make_counting_transform_iterator( + 0, [lists_of_lists_dv = *lists_of_lists_dv_ptr, lists_dv = *lists_dv_ptr, d_row_offsets, d_list_offsets, - d_validities = validities.begin(), - iter] __device__(auto const idx) { + d_validities = validities.begin()] __device__(auto const idx) { if (d_row_offsets[idx] == d_row_offsets[idx + 1]) { // This is a null/empty row. d_validities[idx] = static_cast(lists_of_lists_dv.is_valid(idx)); return size_type{0}; } // The output row will not be null only if all lists on the input row are not null. + auto const iter = thrust::make_counting_iterator(0); auto const is_valid = thrust::all_of(thrust::seq, iter + d_row_offsets[idx], @@ -161,10 +155,9 @@ generate_list_offsets_and_validities(column_view const& input, // Compute size of the output list as sum of sizes of all lists in the current input row. return d_list_offsets[d_row_offsets[idx + 1]] - d_list_offsets[d_row_offsets[idx]]; }); - // Compute offsets from sizes. - thrust::exclusive_scan( - rmm::exec_policy(stream), d_out_offsets, d_out_offsets + num_rows + 1, d_out_offsets); + auto out_offsets = std::get<0>( + cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + num_rows, stream, mr)); return {std::move(out_offsets), std::move(validities)}; } diff --git a/cpp/src/lists/lists_column_factories.cu b/cpp/src/lists/lists_column_factories.cu index 7f82d32d327..278e5af07b2 100644 --- a/cpp/src/lists/lists_column_factories.cu +++ b/cpp/src/lists/lists_column_factories.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -49,14 +50,9 @@ std::unique_ptr make_lists_column_from_scalar(list_scalar const& v auto mr_final = size == 1 ? mr : rmm::mr::get_current_device_resource(); // Handcraft a 1-row column - auto offsets = make_numeric_column( - data_type{type_to_id()}, 2, mask_state::UNALLOCATED, stream, mr_final); - auto m_offsets = offsets->mutable_view(); - thrust::sequence(rmm::exec_policy(stream), - m_offsets.begin(), - m_offsets.end(), - 0, - value.view().size()); + auto sizes_itr = thrust::constant_iterator(value.view().size()); + auto offsets = std::get<0>( + cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + 1, stream, mr_final)); size_type null_count = value.is_valid(stream) ? 0 : 1; auto null_mask_state = null_count ? mask_state::ALL_NULL : mask_state::UNALLOCATED; auto null_mask = cudf::detail::create_null_mask(1, null_mask_state, stream, mr_final); diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index 66de4e19b27..95706ad9e37 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -819,3 +819,13 @@ void struct_from_scalar(bool is_valid) TEST_F(ColumnFactoryTest, FromStructScalar) { struct_from_scalar(true); } TEST_F(ColumnFactoryTest, FromStructScalarNull) { struct_from_scalar(false); } + +TEST_F(ColumnFactoryTest, FromScalarErrors) +{ + cudf::string_scalar ss("hello world"); + EXPECT_THROW(cudf::make_column_from_scalar(ss, 214748365), std::overflow_error); + + using FCW = cudf::test::fixed_width_column_wrapper; + auto s = cudf::make_list_scalar(FCW({1, 2, 3, 4, 5, 6, 7, 8, 9, 10})); + EXPECT_THROW(cudf::make_column_from_scalar(*s, 214748365), std::overflow_error); +}