From 0b339dc284ddff9db5cf4957f9c090b0871840fd Mon Sep 17 00:00:00 2001 From: davidwendt Date: Fri, 5 Mar 2021 17:44:53 -0500 Subject: [PATCH 1/4] Remove step parameter from strings::detail::copy_slice --- cpp/include/cudf/strings/copying.hpp | 17 ++++++----- cpp/src/column/column.cu | 2 +- cpp/src/strings/attributes.cu | 43 +++++++++++++--------------- cpp/src/strings/copying/copying.cu | 40 ++++++++------------------ 4 files changed, 41 insertions(+), 61 deletions(-) diff --git a/cpp/include/cudf/strings/copying.hpp b/cpp/include/cudf/strings/copying.hpp index b4455e2c3b4..14ca92f48ab 100644 --- a/cpp/include/cudf/strings/copying.hpp +++ b/cpp/include/cudf/strings/copying.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,24 +25,24 @@ namespace strings { namespace detail { /** * @brief Returns a new strings column created from a subset of - * of the strings column. The subset of strings selected is between - * start (inclusive) and end (exclusive) with increments of step. + * of the strings column. + * + * The subset of strings selected is between + * start (inclusive) and end (exclusive). * * @code{.pseudo} * Example: * s1 = ["a", "b", "c", "d", "e", "f"] - * s2 = slice( s1, 2 ) + * s2 = copy_slice( s1, 2 ) * s2 is ["c", "d", "e", "f"] - * s3 = slice( s1, 1, 2 ) - * s3 is ["b", "d", "f"] + * s2 = copy_slice( s1, 1, 3 ) + * s2 is ["d", "e"] * @endcode * * @param strings Strings instance for this operation. * @param start Index to first string to select in the column (inclusive). * @param end Index to last string to select in the column (exclusive). * Default -1 indicates the last element. - * @param step Increment value between indices. - * Default step is 1. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return New strings column of size (end-start)/step. @@ -51,7 +51,6 @@ std::unique_ptr copy_slice( strings_column_view const& strings, size_type start, size_type end = -1, - size_type step = 1, rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index fc1a0871e66..db3a751def7 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -193,7 +193,7 @@ struct create_column_from_view { std::unique_ptr operator()() { cudf::strings_column_view sview(view); - return cudf::strings::detail::copy_slice(sview, 0, view.size(), 1, stream, mr); + return cudf::strings::detail::copy_slice(sview, 0, view.size(), stream, mr); } template #include -#include +#include #include #include @@ -54,28 +54,26 @@ std::unique_ptr counts_fn(strings_column_view const& strings, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto strings_count = strings.size(); + // create output column + auto results = make_numeric_column(data_type{type_id::INT32}, + strings.size(), + cudf::detail::copy_bitmask(strings.parent(), stream, mr), + strings.null_count(), + stream, + mr); + auto d_lengths = results->mutable_view().data(); + // input column device view auto strings_column = cudf::column_device_view::create(strings.parent(), stream); auto d_strings = *strings_column; - // create output column - auto results = std::make_unique( - cudf::data_type{type_id::INT32}, - strings_count, - rmm::device_buffer(strings_count * sizeof(int32_t), stream, mr), - cudf::detail::copy_bitmask(strings.parent(), stream, mr), // copy the null mask - strings.null_count()); - auto results_view = results->mutable_view(); - auto d_lengths = results_view.data(); // fill in the lengths thrust::transform(rmm::exec_policy(stream), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(strings_count), + thrust::make_counting_iterator(strings.size()), d_lengths, [d_strings, ufn] __device__(size_type idx) { - int32_t length = 0; - if (!d_strings.is_null(idx)) - length = static_cast(ufn(d_strings.element(idx))); - return length; + return d_strings.is_null(idx) + ? 0 + : static_cast(ufn(d_strings.element(idx))); }); results->set_null_count(strings.null_count()); // reset null count return results; @@ -140,23 +138,22 @@ std::unique_ptr code_points( auto d_column = *strings_column; // create offsets vector to account for each string's character length - rmm::device_vector offsets(strings.size() + 1); - size_type* d_offsets = offsets.data().get(); + rmm::device_uvector offsets(strings.size() + 1, stream); thrust::transform_inclusive_scan( rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(strings.size()), - d_offsets + 1, + offsets.begin() + 1, [d_column] __device__(size_type idx) { size_type length = 0; if (!d_column.is_null(idx)) length = d_column.element(idx).length(); return length; }, thrust::plus()); - CUDA_TRY(cudaMemsetAsync(d_offsets, 0, sizeof(size_type), stream.value())); + offsets.set_element(0, 0, stream); // the total size is the number of characters in the entire column - size_type num_characters = offsets.back(); + size_type num_characters = offsets.back_element(stream); // create output column with no nulls auto results = make_numeric_column( data_type{type_id::INT32}, num_characters, mask_state::UNALLOCATED, stream, mr); @@ -167,7 +164,7 @@ std::unique_ptr code_points( thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), strings.size(), - code_points_fn{d_column, d_offsets, d_results}); + code_points_fn{d_column, offsets.data(), d_results}); results->set_null_count(0); return results; diff --git a/cpp/src/strings/copying/copying.cu b/cpp/src/strings/copying/copying.cu index 80ef11ec456..70f11836247 100644 --- a/cpp/src/strings/copying/copying.cu +++ b/cpp/src/strings/copying/copying.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,42 +14,32 @@ * limitations under the License. */ -#include #include #include -#include +#include #include -#include -#include #include -#include #include #include #include -#include - namespace cudf { namespace strings { namespace detail { -// new strings column from subset of this strings instance + std::unique_ptr copy_slice(strings_column_view const& strings, size_type start, size_type end, - size_type step, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - size_type strings_count = strings.size(); - if (strings_count == 0) return make_empty_strings_column(stream, mr); - if (step == 0) step = 1; - CUDF_EXPECTS(step > 0, "Parameter step must be positive integer."); - if (end < 0 || end > strings_count) end = strings_count; + if (strings.is_empty()) return make_empty_strings_column(stream, mr); + if (end < 0 || end > strings.size()) end = strings.size(); CUDF_EXPECTS(((start >= 0) && (start < end)), "Invalid start parameter value."); - strings_count = cudf::util::round_up_safe((end - start), step); - if (start == 0 && strings.offset() == 0 && step == 1) { - // sliced at the beginning and copying every step, so no need to gather + size_type const strings_count = end - start; + if (start == 0 && strings.offset() == 0) { + // sliced at the beginning and copying everything, so no need to gather auto offsets_column = std::make_unique( cudf::slice(strings.offsets(), {0, strings_count + 1}).front(), stream, mr); auto data_size = @@ -66,18 +56,12 @@ std::unique_ptr copy_slice(strings_column_view const& strings, mr); } - // do the gather instead - // build indices - rmm::device_vector indices(strings_count); - thrust::sequence(rmm::exec_policy(stream), indices.begin(), indices.end(), start, step); - // create a column_view as a wrapper of these indices - column_view indices_view( - data_type{type_id::INT32}, strings_count, indices.data().get(), nullptr, 0); - // build a new strings column from the indices + // do the full gather instead + // TODO: it may be faster to just copy sliced child columns and then fixup the offset values auto sliced_table = cudf::detail::gather(table_view{{strings.parent()}}, - indices_view, + thrust::counting_iterator(start), + thrust::counting_iterator(end), cudf::out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, stream, mr) ->release(); From 8ffd37eb6c1dde67978477cbae7317d3e7a3aba0 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Mon, 8 Mar 2021 11:46:32 -0500 Subject: [PATCH 2/4] merge optimized code to handle non-zero offset/start --- cpp/src/strings/copying/copying.cu | 70 ++++++++++++++++-------------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/cpp/src/strings/copying/copying.cu b/cpp/src/strings/copying/copying.cu index 70f11836247..cdf188bfdc5 100644 --- a/cpp/src/strings/copying/copying.cu +++ b/cpp/src/strings/copying/copying.cu @@ -16,9 +16,10 @@ #include #include -#include #include -#include +#include +#include +#include #include #include @@ -37,38 +38,43 @@ std::unique_ptr copy_slice(strings_column_view const& strings, if (strings.is_empty()) return make_empty_strings_column(stream, mr); if (end < 0 || end > strings.size()) end = strings.size(); CUDF_EXPECTS(((start >= 0) && (start < end)), "Invalid start parameter value."); - size_type const strings_count = end - start; - if (start == 0 && strings.offset() == 0) { - // sliced at the beginning and copying everything, so no need to gather - auto offsets_column = std::make_unique( - cudf::slice(strings.offsets(), {0, strings_count + 1}).front(), stream, mr); - auto data_size = - cudf::detail::get_value(offsets_column->view(), strings_count, stream); - auto chars_column = std::make_unique( - cudf::slice(strings.chars(), {0, data_size}).front(), stream, mr); - auto null_mask = cudf::detail::copy_bitmask(strings.null_mask(), 0, strings_count, stream, mr); - return make_strings_column(strings_count, - std::move(offsets_column), - std::move(chars_column), - UNKNOWN_NULL_COUNT, - std::move(null_mask), - stream, - mr); + auto const strings_count = end - start; + auto const offsets_offset = start + strings.offset(); + + // slice the offsets child column + auto offsets_column = std::make_unique( + cudf::slice(strings.offsets(), {offsets_offset, offsets_offset + strings_count + 1}).front(), + stream, + mr); + auto const chars_offset = + offsets_offset == 0 ? 0 : cudf::detail::get_value(offsets_column->view(), 0, stream); + if (chars_offset > 0) { + // adjust the individual offset values only if needed + auto d_offsets = offsets_column->mutable_view(); + thrust::transform(rmm::exec_policy(stream), + d_offsets.begin(), + d_offsets.end(), + d_offsets.begin(), + [chars_offset] __device__(auto offset) { return offset - chars_offset; }); } - // do the full gather instead - // TODO: it may be faster to just copy sliced child columns and then fixup the offset values - auto sliced_table = cudf::detail::gather(table_view{{strings.parent()}}, - thrust::counting_iterator(start), - thrust::counting_iterator(end), - cudf::out_of_bounds_policy::DONT_CHECK, - stream, - mr) - ->release(); - std::unique_ptr output_column(std::move(sliced_table.front())); - if (output_column->null_count() == 0) - output_column->set_null_mask(rmm::device_buffer{0, stream, mr}, 0); - return output_column; + // slice the chars child column + auto const data_size = + cudf::detail::get_value(offsets_column->view(), strings_count, stream); + auto chars_column = std::make_unique( + cudf::slice(strings.chars(), {chars_offset, chars_offset + data_size}).front(), stream, mr); + + // slice the null mask + auto null_mask = cudf::detail::copy_bitmask( + strings.null_mask(), offsets_offset, offsets_offset + strings_count, stream, mr); + + return make_strings_column(strings_count, + std::move(offsets_column), + std::move(chars_column), + UNKNOWN_NULL_COUNT, + std::move(null_mask), + stream, + mr); } } // namespace detail From 87f5cf63d60a0903a61e488d8e630ab08f8ed17d Mon Sep 17 00:00:00 2001 From: davidwendt Date: Mon, 8 Mar 2021 11:47:03 -0500 Subject: [PATCH 3/4] move strings/copying.hpp to detail folder --- conda/recipes/libcudf/meta.yaml | 2 +- .../cudf/strings/{ => detail}/copying.hpp | 2 +- cpp/src/column/column.cu | 2 +- cpp/tests/strings/array_tests.cu | 58 +++++++------------ 4 files changed, 24 insertions(+), 40 deletions(-) rename cpp/include/cudf/strings/{ => detail}/copying.hpp (99%) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index a46712def28..141a1ae0cb1 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -167,10 +167,10 @@ test: - test -f $PREFIX/include/cudf/strings/convert/convert_integers.hpp - test -f $PREFIX/include/cudf/strings/convert/convert_ipv4.hpp - test -f $PREFIX/include/cudf/strings/convert/convert_urls.hpp - - test -f $PREFIX/include/cudf/strings/copying.hpp - test -f $PREFIX/include/cudf/strings/detail/combine.hpp - test -f $PREFIX/include/cudf/strings/detail/concatenate.hpp - test -f $PREFIX/include/cudf/strings/detail/converters.hpp + - test -f $PREFIX/include/cudf/strings/detail/copying.hpp - test -f $PREFIX/include/cudf/strings/detail/fill.hpp - test -f $PREFIX/include/cudf/strings/detail/replace.hpp - test -f $PREFIX/include/cudf/strings/detail/utilities.hpp diff --git a/cpp/include/cudf/strings/copying.hpp b/cpp/include/cudf/strings/detail/copying.hpp similarity index 99% rename from cpp/include/cudf/strings/copying.hpp rename to cpp/include/cudf/strings/detail/copying.hpp index 14ca92f48ab..19dfa193207 100644 --- a/cpp/include/cudf/strings/copying.hpp +++ b/cpp/include/cudf/strings/detail/copying.hpp @@ -36,7 +36,7 @@ namespace detail { * s2 = copy_slice( s1, 2 ) * s2 is ["c", "d", "e", "f"] * s2 = copy_slice( s1, 1, 3 ) - * s2 is ["d", "e"] + * s2 is ["b", "c"] * @endcode * * @param strings Strings instance for this operation. diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index db3a751def7..d30e5fc746a 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/tests/strings/array_tests.cu b/cpp/tests/strings/array_tests.cu index 26b00d8a548..2d1ae1a862d 100644 --- a/cpp/tests/strings/array_tests.cu +++ b/cpp/tests/strings/array_tests.cu @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -71,20 +71,17 @@ TEST_P(SliceParmsTest, Slice) h_strings.begin(), h_strings.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); + cudf::size_type start = 3; cudf::size_type end = GetParam(); - std::vector h_expected; - if (end > start) { - for (cudf::size_type idx = start; (idx < end) && (idx < (cudf::size_type)h_strings.size()); - ++idx) - h_expected.push_back(h_strings[idx]); - } - auto strings_view = cudf::strings_column_view(strings); - auto results = cudf::strings::detail::copy_slice(strings_view, start, end); + auto results = cudf::strings::detail::copy_slice(cudf::strings_column_view(strings), start, end); - cudf::test::strings_column_wrapper expected(h_expected.begin(), h_expected.end()); - // thrust::make_transform_iterator( h_expected.begin(), [] (auto str) { return str!=nullptr; })); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + cudf::test::strings_column_wrapper expected( + h_strings.begin() + start, + h_strings.begin() + end, + thrust::make_transform_iterator(h_strings.begin() + start, + [](auto str) { return str != nullptr; })); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected); } TEST_P(SliceParmsTest, SliceAllNulls) @@ -94,42 +91,29 @@ TEST_P(SliceParmsTest, SliceAllNulls) h_strings.begin(), h_strings.end(), thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); + cudf::size_type start = 3; cudf::size_type end = GetParam(); - std::vector h_expected; - if (end > start) { - for (cudf::size_type idx = start; (idx < end) && (idx < (cudf::size_type)h_strings.size()); - ++idx) - h_expected.push_back(h_strings[idx]); - } - auto strings_view = cudf::strings_column_view(strings); - auto results = cudf::strings::detail::copy_slice(strings_view, start, end); + auto results = cudf::strings::detail::copy_slice(cudf::strings_column_view(strings), start, end); + cudf::test::strings_column_wrapper expected( - h_expected.begin(), - h_expected.end(), - thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; })); + h_strings.begin() + start, + h_strings.begin() + end, + thrust::make_transform_iterator(h_strings.begin() + start, + [](auto str) { return str != nullptr; })); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } TEST_P(SliceParmsTest, SliceAllEmpty) { std::vector h_strings{"", "", "", "", "", "", ""}; - cudf::test::strings_column_wrapper strings( - h_strings.begin(), - h_strings.end(), - thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; })); + cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end()); + cudf::size_type start = 3; cudf::size_type end = GetParam(); - std::vector h_expected; - if (end > start) { - for (cudf::size_type idx = start; (idx < end) && (idx < (cudf::size_type)h_strings.size()); - ++idx) - h_expected.push_back(h_strings[idx]); - } - auto strings_view = cudf::strings_column_view(strings); - auto results = cudf::strings::detail::copy_slice(strings_view, start, end); - cudf::test::strings_column_wrapper expected(h_expected.begin(), h_expected.end()); - // thrust::make_transform_iterator( h_expected.begin(), [] (auto str) { return str!=nullptr; })); + auto results = cudf::strings::detail::copy_slice(cudf::strings_column_view(strings), start, end); + + cudf::test::strings_column_wrapper expected(h_strings.begin() + start, h_strings.begin() + end); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } From b953a995770fc46b659e279115906f72ae6341d8 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 11 Mar 2021 06:02:03 -0500 Subject: [PATCH 4/4] use set_element_async instead of set_element --- cpp/src/strings/attributes.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu index d06e55fdce6..bed86544ec7 100644 --- a/cpp/src/strings/attributes.cu +++ b/cpp/src/strings/attributes.cu @@ -150,7 +150,8 @@ std::unique_ptr code_points( return length; }, thrust::plus()); - offsets.set_element(0, 0, stream); + size_type const zero = 0; + offsets.set_element_async(0, zero, stream); // the total size is the number of characters in the entire column size_type num_characters = offsets.back_element(stream);