diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index 9b9e780965a..ddf39e21685 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -403,26 +403,7 @@ void traverse_children::operator()(host_span size_t { - strings_column_view scv(b); - return a + (scv.is_empty() ? 0 - // if the column is unsliced, skip the offset retrieval. - : scv.offset() > 0 - ? cudf::detail::get_value( - scv.offsets(), scv.offset() + scv.size(), stream) - - cudf::detail::get_value(scv.offsets(), scv.offset(), stream) - // if the offset() is 0, it can still be sliced to a shorter length. in this case - // we only need to read a single offset. otherwise just return the full length - // (chars_size()) - : scv.size() + 1 == scv.offsets().size() - ? scv.chars_size() - : cudf::detail::get_value(scv.offsets(), scv.size(), stream)); - }); - CUDF_EXPECTS(total_char_count <= static_cast(std::numeric_limits::max()), - "Total number of concatenated chars exceeds the column size limit", - std::overflow_error); + // chars -- checked in call to cudf::strings::detail::concatenate } template <> diff --git a/cpp/tests/copying/concatenate_tests.cpp b/cpp/tests/copying/concatenate_tests.cpp index c81f1772d10..b8faa0bd081 100644 --- a/cpp/tests/copying/concatenate_tests.cpp +++ b/cpp/tests/copying/concatenate_tests.cpp @@ -193,26 +193,10 @@ TEST_F(StringColumnTest, ConcatenateColumnViewLarge) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } -TEST_F(StringColumnTest, ConcatenateTooManyColumns) +TEST_F(StringColumnTest, ConcatenateManyColumns) { - std::vector h_strings{"aaa", - "bb", - "", - "cccc", - "d", - "ééé", - "ff", - "gggg", - "", - "h", - "iiii", - "jjj", - "k", - "lllllll", - "mmmmm", - "n", - "oo", - "ppp"}; + std::vector h_strings{ + "aaa", "bb", "", "cccc", "d", "ééé", "ff", "gggg", "", "h", "iiii", "jjj"}; std::vector expected_strings; std::vector wrappers; @@ -228,6 +212,18 @@ TEST_F(StringColumnTest, ConcatenateTooManyColumns) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } +TEST_F(StringColumnTest, ConcatenateTooLarge) +{ + std::string big_str(1000000, 'a'); // 1 million bytes x 5 = 5 million bytes + cudf::test::strings_column_wrapper input{big_str, big_str, big_str, big_str, big_str}; + std::vector input_cols; + // 5 millions bytes x 500 = 2.5GB > std::numeric_limits::max() + for (int i = 0; i < 500; ++i) { + input_cols.push_back(input); + } + EXPECT_THROW(cudf::concatenate(input_cols), std::overflow_error); +} + struct TableTest : public cudf::test::BaseFixture {}; TEST_F(TableTest, ConcatenateTables)