Skip to content

Commit

Permalink
Fix make_column_from_scalar for all-null strings column (#11807)
Browse files Browse the repository at this point in the history
Fixes the `cudf::make_column_from_scalar` for an invalid `cudf::string_scalar` to return a column with children. Some libcudf APIs will not work with a strings column with no children. This condition would be rare enough that additional logic for checking no children in these places would be a performance and maintenance issue.
This also greatly simplifies the `make_column_from_scalar` specialization logic for strings.

Closes #11756

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Bradley Dice (https://github.com/bdice)

URL: #11807
  • Loading branch information
davidwendt authored Oct 6, 2022
1 parent 029b1db commit e323f0a
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 24 deletions.
12 changes: 3 additions & 9 deletions cpp/src/column/column_factories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -54,21 +54,15 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stri
rmm::mr::device_memory_resource* mr) const
{
if (size == 0) return make_empty_column(value.type());
auto null_mask = detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr);

if (!value.is_valid(stream))
return std::make_unique<column>(
value.type(), size, rmm::device_buffer{}, std::move(null_mask), size);

// Create a strings column_view with all nulls and no children.
// Since we are setting every row to the scalar, the fill() never needs to access
// any of the children in the strings column which would otherwise cause an exception.
column_view sc{
data_type{type_id::STRING}, size, nullptr, static_cast<bitmask_type*>(null_mask.data()), size};
column_view sc{value.type(), size, nullptr};
auto& sv = static_cast<scalar_type_t<cudf::string_view> const&>(value);

// fill the column with the scalar
auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr);
output->set_null_mask(rmm::device_buffer{}, 0); // should be no nulls

return output;
}

Expand Down
20 changes: 12 additions & 8 deletions cpp/src/strings/filling/fill.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,18 @@ std::unique_ptr<column> fill(
auto d_strings = *strings_column;

// create resulting null mask
auto valid_mask = cudf::detail::valid_if(
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(strings_count),
[d_strings, begin, end, d_value] __device__(size_type idx) {
return ((begin <= idx) && (idx < end)) ? d_value.is_valid() : !d_strings.is_null(idx);
},
stream,
mr);
auto valid_mask = [begin, end, d_value, value, d_strings, stream, mr] {
if (begin == 0 and end == d_strings.size() and value.is_valid(stream))
return std::pair(rmm::device_buffer{}, 0);
return cudf::detail::valid_if(
thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(d_strings.size()),
[d_strings, begin, end, d_value] __device__(size_type idx) {
return ((begin <= idx) && (idx < end)) ? d_value.is_valid() : !d_strings.is_null(idx);
},
stream,
mr);
}();
auto null_count = valid_mask.second;
rmm::device_buffer& null_mask = valid_mask.first;

Expand Down
2 changes: 2 additions & 0 deletions cpp/tests/column/factories_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@ TEST_F(ColumnFactoryTest, FromStringScalar)
EXPECT_EQ(0, column->null_count());
EXPECT_FALSE(column->nullable());
EXPECT_FALSE(column->has_nulls());
EXPECT_TRUE(column->num_children() > 0);
}

TEST_F(ColumnFactoryTest, FromNullStringScalar)
Expand All @@ -434,6 +435,7 @@ TEST_F(ColumnFactoryTest, FromNullStringScalar)
EXPECT_EQ(2, column->null_count());
EXPECT_TRUE(column->nullable());
EXPECT_TRUE(column->has_nulls());
EXPECT_TRUE(column->num_children() > 0);
}

TEST_F(ColumnFactoryTest, FromStringScalarWithZeroSize)
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/filling/fill_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -206,7 +206,7 @@ class FillStringTestFixture : public cudf::test::BaseFixture {
}));

auto p_ret = cudf::fill(destination, begin, end, *p_val);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*p_ret, expected);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*p_ret, expected);
}
};

Expand Down
10 changes: 5 additions & 5 deletions cpp/tests/strings/fill_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ TEST_F(StringsFillTest, Fill)
h_expected.begin(),
h_expected.end(),
thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
}
{
auto results = cudf::strings::detail::fill(view, 2, 4, cudf::string_scalar("", false));
Expand All @@ -57,23 +57,23 @@ TEST_F(StringsFillTest, Fill)
h_expected.begin(),
h_expected.end(),
thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
}
{
auto results = cudf::strings::detail::fill(view, 5, 5, cudf::string_scalar("zz"));
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, view.parent());
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, view.parent());
}
{
auto results = cudf::strings::detail::fill(view, 0, 7, cudf::string_scalar(""));
cudf::test::strings_column_wrapper expected({"", "", "", "", "", "", ""},
{1, 1, 1, 1, 1, 1, 1});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
}
{
auto results = cudf::strings::detail::fill(view, 0, 7, cudf::string_scalar("", false));
cudf::test::strings_column_wrapper expected({"", "", "", "", "", "", ""},
{0, 0, 0, 0, 0, 0, 0});
CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
}
}

Expand Down

0 comments on commit e323f0a

Please sign in to comment.