Skip to content

Commit

Permalink
Rework cudf::strings::detail::copy_range for offsetalator (#15010)
Browse files Browse the repository at this point in the history
This reworks the `cudf::strings::detail::copy_range()` function to use the offsetalator instead of accessing the output offsets directly. Also refactored the code to remove the unnecessary template arguments. And added a benchmark to ensure these changes did not cause a performance impact.

Most of the code in `cpp/include/cudf/strings/detail/copy_range.cuh` was rewritten and moved to `cpp/src/strings/copying/copy_range.cu`.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #15010
  • Loading branch information
davidwendt authored Feb 20, 2024
1 parent 4ca9ac8 commit 3150676
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 241 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ add_library(
src/strings/convert/convert_lists.cu
src/strings/copying/concatenate.cu
src/strings/copying/copying.cu
src/strings/copying/copy_range.cu
src/strings/copying/shift.cu
src/strings/count_matches.cu
src/strings/extract/extract.cu
Expand Down
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ ConfigureNVBench(
string/char_types.cpp
string/contains.cpp
string/copy_if_else.cpp
string/copy_range.cpp
string/count.cpp
string/extract.cpp
string/gather.cpp
Expand Down
60 changes: 60 additions & 0 deletions cpp/benchmarks/string/copy_range.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>

#include <cudf/copying.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvbench/nvbench.cuh>

static void bench_copy_range(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));

if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
state.skip("Skip benchmarks greater than size_type limit");
}

data_profile const table_profile =
data_profile_builder()
.distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width)
.no_validity();
auto const source_tables = create_random_table(
{cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, table_profile);

auto const start = num_rows / 4;
auto const end = (num_rows * 3) / 4;
auto const source = source_tables->view().column(0);
auto const target = source_tables->view().column(1);

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
auto chars_size = cudf::strings_column_view(target).chars_size(cudf::get_default_stream());
state.add_global_memory_reads<nvbench::int8_t>(chars_size); // all bytes are read;
state.add_global_memory_writes<nvbench::int8_t>(chars_size); // both columns are similar size

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
[[maybe_unused]] auto result = cudf::copy_range(source, target, start, end, start / 2);
});
}

NVBENCH_BENCH(bench_copy_range)
.set_name("copy_range")
.add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
.add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
216 changes: 0 additions & 216 deletions cpp/include/cudf/strings/detail/copy_range.cuh

This file was deleted.

60 changes: 60 additions & 0 deletions cpp/include/cudf/strings/detail/copy_range.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/strings/strings_column_view.hpp>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace strings {
namespace detail {

/**
* @brief Internal API to copy a range of string elements out-of-place from
* a source column to a target column
*
* Creates a new column as if an in-place copy was performed into `target`.
* The elements indicated by the indices `source_begin`, `source_end`)
* replace with the elements in the target column starting at `target_begin`.
* Elements outside the range are copied from `target` into the new target
* column to return.
*
* @throws cudf::logic_error for invalid range (if `target_begin < 0`,
* or `target_begin >= target.size()`,
* or `target_begin + (source_end-source_begin)` > target.size()`).
*
* @param source The strings column to copy from inside the `target_begin` range
* @param target The strings column to copy from outside the range
* @param source_end The index of the first element in the source range
* @param source_end The index of the last element in the source range (exclusive)
* @param target_begin The starting index of the target range (inclusive)
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return The result target column
*/
std::unique_ptr<column> copy_range(strings_column_view const& source,
strings_column_view const& target,
size_type source_begin,
size_type source_end,
size_type target_begin,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);

} // namespace detail
} // namespace strings
} // namespace cudf
Loading

0 comments on commit 3150676

Please sign in to comment.