diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index 762e9640d12..fd7b469cffd 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -523,7 +523,8 @@ std::unique_ptr create_random_utf8_string_column(data_profile cons num_rows, std::move(offsets), std::move(chars), - profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{}); + profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{}, + null_count); } /** diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu index c2bd2b968b4..c73bcb0b0ad 100644 --- a/cpp/benchmarks/string/factory.cu +++ b/cpp/benchmarks/string/factory.cu @@ -63,7 +63,7 @@ static void BM_factory(benchmark::State& state) for (auto _ : state) { cuda_event_timer raii(state, true, cudf::get_default_stream()); - cudf::make_strings_column(pairs); + cudf::make_strings_column(pairs, cudf::get_default_stream()); } cudf::strings_column_view input(column->view()); diff --git a/cpp/examples/strings/custom_optimized.cu b/cpp/examples/strings/custom_optimized.cu index ed8e075ec4b..36521871ad8 100644 --- a/cpp/examples/strings/custom_optimized.cu +++ b/cpp/examples/strings/custom_optimized.cu @@ -155,7 +155,8 @@ std::unique_ptr redact_strings(cudf::column_view const& names, *d_names, *d_visibilities, offsets.data(), chars.data()); // create column from offsets and chars vectors (no copy is performed) - auto result = cudf::make_strings_column(names.size(), std::move(offsets), std::move(chars)); + auto result = + cudf::make_strings_column(names.size(), std::move(offsets), std::move(chars), {}, 0); // wait for all of the above to finish stream.synchronize(); diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index 725faeae626..d653d482e4b 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -422,29 +422,26 @@ std::unique_ptr make_strings_column( * * This function makes a deep copy of the strings, offsets, null_mask to create a new column. * - * @throws std::bad_alloc if device memory allocation fails - * - * @param[in] strings The device span of chars in device memory. This char vector is expected to be + * @param strings The device span of chars in device memory. This char vector is expected to be * UTF-8 encoded characters. - * @param[in] offsets The device span of byte offsets in device memory. The number of elements is + * @param offsets The device span of byte offsets in device memory. The number of elements is * one more than the total number of strings so the `offsets.back()` is the total number of bytes * in the strings array. `offsets.front()` must always be 0 to point to the beginning of `strings`. - * @param[in] null_mask Device span containing the null element indicator bitmask. Arrow format for + * @param null_mask Device span containing the null element indicator bitmask. Arrow format for * nulls is used for interpreting this bitmask. - * @param[in] null_count The number of null string entries. If equal to `UNKNOWN_NULL_COUNT`, the - * null count will be computed dynamically on the first invocation of `column::null_count()` - * @param[in] stream CUDA stream used for device memory operations and kernel launches. - * @param[in] mr Device memory resource used for allocation of the column's `null_mask` and children - * columns' device memory. + * @param null_count The number of null string entries + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used for allocation of the column's `null_mask` and children + * columns' device memory * @return Constructed strings column */ std::unique_ptr make_strings_column( cudf::device_span strings, cudf::device_span offsets, - cudf::device_span null_mask = {}, - size_type null_count = cudf::UNKNOWN_NULL_COUNT, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + cudf::device_span null_mask, + size_type null_count, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** * @brief Construct a STRING type column given offsets column, chars columns, and null mask and null @@ -487,8 +484,8 @@ std::unique_ptr make_strings_column(size_type num_strings, std::unique_ptr make_strings_column(size_type num_strings, rmm::device_uvector&& offsets, rmm::device_uvector&& chars, - rmm::device_buffer&& null_mask = {}, - size_type null_count = cudf::UNKNOWN_NULL_COUNT); + rmm::device_buffer&& null_mask, + size_type null_count); /** * @brief Construct a LIST type column given offsets column, child column, null mask and null diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index becc832d207..b9a6d8f490a 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -276,10 +276,8 @@ std::unique_ptr md5_hash(table_view const& input, } }); - rmm::device_buffer null_mask{0, stream, mr}; - return make_strings_column( - input.num_rows(), std::move(offsets_column), std::move(chars_column), 0, std::move(null_mask)); + input.num_rows(), std::move(offsets_column), std::move(chars_column), 0, {}); } } // namespace detail diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index afa260e215a..a04c7d84463 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -587,7 +587,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source }); return cudf::strings::detail::make_strings_column(it, it + string_count, stream, mr); } else { - return cudf::make_strings_column(string_count, std::move(offsets), std::move(chars)); + return cudf::make_strings_column(string_count, std::move(offsets), std::move(chars), {}, 0); } } diff --git a/cpp/src/text/subword/load_merges_file.cu b/cpp/src/text/subword/load_merges_file.cu index 12d25bec52c..2a85ed0a37f 100644 --- a/cpp/src/text/subword/load_merges_file.cu +++ b/cpp/src/text/subword/load_merges_file.cu @@ -98,7 +98,7 @@ std::unique_ptr load_file_to_column(std::string const& filename_me auto d_chars = cudf::detail::make_device_uvector_async(chars, stream, mr); auto d_offsets = cudf::detail::make_device_uvector_async(offsets, stream, mr); - return cudf::make_strings_column(d_chars, d_offsets); + return cudf::make_strings_column(d_chars, d_offsets, {}, 0); } std::unique_ptr initialize_merge_pairs_map( diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp index 5bbf7b72588..4a6e68e56e0 100644 --- a/cpp/tests/strings/contains_tests.cpp +++ b/cpp/tests/strings/contains_tests.cpp @@ -301,7 +301,7 @@ TEST_F(StringsContainsTests, HexTest) ascii_chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); auto d_offsets = cudf::detail::make_device_uvector_sync( offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); - auto input = cudf::make_strings_column(d_chars, d_offsets); + auto input = cudf::make_strings_column(d_chars, d_offsets, {}, 0); auto strings_view = cudf::strings_column_view(input->view()); for (auto ch : ascii_chars) {