diff --git a/cpp/src/strings/split/split.cuh b/cpp/src/strings/split/split.cuh index 160d1be3978..69a11aabfcd 100644 --- a/cpp/src/strings/split/split.cuh +++ b/cpp/src/strings/split/split.cuh @@ -365,8 +365,8 @@ std::pair, rmm::device_uvector> split }); // create offsets from the counts for return to the caller - auto [offsets, total_tokens] = cudf::strings::detail::make_offsets_child_column( - token_counts.begin(), token_counts.end(), stream, mr); + auto [offsets, total_tokens] = + cudf::detail::make_offsets_child_column(token_counts.begin(), token_counts.end(), stream, mr); auto const d_tokens_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets->view()); diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu index 4dfb3e9ea62..6785ab9c893 100644 --- a/cpp/src/strings/split/split_re.cu +++ b/cpp/src/strings/split/split_re.cu @@ -147,7 +147,7 @@ std::pair, std::unique_ptr> gener auto const begin = cudf::detail::make_counting_transform_iterator(0, map_fn); auto const end = begin + strings_count; - auto [offsets, total_tokens] = cudf::strings::detail::make_offsets_child_column( + auto [offsets, total_tokens] = cudf::detail::make_offsets_child_column( begin, end, stream, rmm::mr::get_current_device_resource()); auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets->view()); diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp index b06d097647d..afebc91dd73 100644 --- a/cpp/tests/column/factories_test.cpp +++ b/cpp/tests/column/factories_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -761,6 +762,7 @@ TEST_F(ColumnFactoryTest, FromStructScalarNull) { struct_from_scalar(false); } TEST_F(ColumnFactoryTest, FromScalarErrors) { + if (cudf::strings::detail::is_large_strings_enabled()) { return; } cudf::string_scalar ss("hello world"); EXPECT_THROW(cudf::make_column_from_scalar(ss, 214748365), std::overflow_error); diff --git a/cpp/tests/copying/concatenate_tests.cpp b/cpp/tests/copying/concatenate_tests.cpp index a9bf22682cf..3b7bff69938 100644 --- a/cpp/tests/copying/concatenate_tests.cpp +++ b/cpp/tests/copying/concatenate_tests.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -188,6 +189,8 @@ TEST_F(StringColumnTest, ConcatenateManyColumns) TEST_F(StringColumnTest, ConcatenateTooLarge) { + if (cudf::strings::detail::is_large_strings_enabled()) { return; } + std::string big_str(1000000, 'a'); // 1 million bytes x 5 = 5 million bytes cudf::test::strings_column_wrapper input{big_str, big_str, big_str, big_str, big_str}; std::vector input_cols; @@ -374,7 +377,7 @@ TEST_F(OverflowTest, OverflowTest) } // string column, overflow on chars - { + if (!cudf::strings::detail::is_large_strings_enabled()) { constexpr auto size = static_cast(static_cast(1024) * 1024 * 1024); // try and concatenate 6 string columns of with 1 billion chars in each @@ -497,7 +500,7 @@ TEST_F(OverflowTest, Presliced) } // strings, overflow on chars - { + if (!cudf::strings::detail::is_large_strings_enabled()) { constexpr cudf::size_type total_chars_size = 1024 * 1024 * 1024; constexpr cudf::size_type string_size = 64; constexpr cudf::size_type num_rows = total_chars_size / string_size; diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp index b22d7257041..a1bb87a43fb 100644 --- a/cpp/tests/strings/array_tests.cpp +++ b/cpp/tests/strings/array_tests.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -152,6 +153,8 @@ TEST_F(StringsColumnTest, GatherZeroSizeStringsColumn) TEST_F(StringsColumnTest, GatherTooBig) { + if (cudf::strings::detail::is_large_strings_enabled()) { return; } + std::vector h_chars(3000000); cudf::test::fixed_width_column_wrapper chars(h_chars.begin(), h_chars.end()); cudf::test::fixed_width_column_wrapper offsets({0, 3000000}); diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu index 64123690aea..35d648f16e0 100644 --- a/cpp/tests/strings/factories_test.cu +++ b/cpp/tests/strings/factories_test.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -96,18 +97,11 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair) EXPECT_EQ(strings_view.chars_size(cudf::get_default_stream()), memsize); // check string data - auto h_chars_data = cudf::detail::make_std_vector_sync( - cudf::device_span(strings_view.chars_begin(cudf::get_default_stream()), - strings_view.chars_size(cudf::get_default_stream())), - cudf::get_default_stream()); - auto h_offsets_data = cudf::detail::make_std_vector_sync( - cudf::device_span( - strings_view.offsets().data() + strings_view.offset(), - strings_view.size() + 1), - cudf::get_default_stream()); - EXPECT_EQ(memcmp(h_buffer.data(), h_chars_data.data(), h_buffer.size()), 0); - EXPECT_EQ( - memcmp(h_offsets.data(), h_offsets_data.data(), h_offsets.size() * sizeof(cudf::size_type)), 0); + cudf::test::strings_column_wrapper expected( + h_test_strings.begin(), + h_test_strings.end(), + cudf::test::iterators::nulls_from_nullptrs(h_test_strings)); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(column->view(), expected); } TEST_F(StringsFactoriesTest, CreateColumnFromOffsets) diff --git a/cpp/tests/strings/repeat_strings_tests.cpp b/cpp/tests/strings/repeat_strings_tests.cpp index 9d08ac9c00c..0539895c5f4 100644 --- a/cpp/tests/strings/repeat_strings_tests.cpp +++ b/cpp/tests/strings/repeat_strings_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -220,6 +221,8 @@ TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesInvalidInput) TEST_F(RepeatStringsTest, StringsColumnWithColumnRepeatTimesOverflowOutput) { + if (cudf::strings::detail::is_large_strings_enabled()) { return; } + auto const strs = strs_col{"1", "12", "123", "1234", "12345", "123456", "1234567"}; auto const strs_cv = cudf::strings_column_view(strs);