-
Notifications
You must be signed in to change notification settings - Fork 915
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add libcudf strings examples (#11849)
Creates example for calling libcudf APIs for strings processing. This also includes examples of building custom kernels for modifying libcudf strings columns. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Robert Maynard (https://github.com/robertmaynard) - Mark Sadang (https://github.com/msadang) - https://github.com/nvdbaranec URL: #11849
- Loading branch information
1 parent
8a31e26
commit 7598253
Showing
10 changed files
with
704 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# Copyright (c) 2022, NVIDIA CORPORATION. | ||
|
||
cmake_minimum_required(VERSION 3.23.1) | ||
|
||
project( | ||
strings_examples | ||
VERSION 0.0.1 | ||
LANGUAGES CXX CUDA | ||
) | ||
|
||
set(CPM_DOWNLOAD_VERSION v0.35.3) | ||
file( | ||
DOWNLOAD | ||
https://github.com/cpm-cmake/CPM.cmake/releases/download/${CPM_DOWNLOAD_VERSION}/get_cpm.cmake | ||
${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake | ||
) | ||
include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake) | ||
|
||
set(CUDF_TAG branch-22.12) | ||
CPMFindPackage( | ||
NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf | ||
GIT_TAG ${CUDF_TAG} | ||
GIT_SHALLOW | ||
TRUE | ||
SOURCE_SUBDIR | ||
cpp | ||
) | ||
|
||
list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) | ||
|
||
# | ||
add_executable(libcudf_apis libcudf_apis.cpp) | ||
target_compile_features(libcudf_apis PRIVATE cxx_std_17) | ||
target_link_libraries(libcudf_apis PRIVATE cudf::cudf nvToolsExt) | ||
|
||
add_executable(custom_with_malloc custom_with_malloc.cu) | ||
target_compile_features(custom_with_malloc PRIVATE cxx_std_17) | ||
target_compile_options(custom_with_malloc PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>") | ||
target_link_libraries(custom_with_malloc PRIVATE cudf::cudf nvToolsExt) | ||
|
||
add_executable(custom_prealloc custom_prealloc.cu) | ||
target_compile_features(custom_prealloc PRIVATE cxx_std_17) | ||
target_compile_options(custom_prealloc PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>") | ||
target_link_libraries(custom_prealloc PRIVATE cudf::cudf nvToolsExt) | ||
|
||
add_executable(custom_optimized custom_optimized.cu) | ||
target_compile_features(custom_optimized PRIVATE cxx_std_17) | ||
target_compile_options(custom_optimized PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>") | ||
target_link_libraries(custom_optimized PRIVATE cudf::cudf nvToolsExt) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
/* | ||
* Copyright (c) 2022, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#pragma once | ||
|
||
#include <cudf/column/column.hpp> | ||
#include <cudf/column/column_view.hpp> | ||
#include <cudf/io/csv.hpp> | ||
#include <cudf/io/datasource.hpp> | ||
#include <cudf/table/table.hpp> | ||
#include <cudf/table/table_view.hpp> | ||
|
||
#include <rmm/mr/device/cuda_memory_resource.hpp> | ||
#include <rmm/mr/device/device_memory_resource.hpp> | ||
#include <rmm/mr/device/owning_wrapper.hpp> | ||
#include <rmm/mr/device/pool_memory_resource.hpp> | ||
|
||
#include <chrono> | ||
#include <iostream> | ||
#include <memory> | ||
#include <string> | ||
|
||
/** | ||
* @brief Main example function returns redacted strings column. | ||
* | ||
* This function returns a redacted version of the input `names` column | ||
* using the the `visibilities` column as in the following example | ||
* ``` | ||
* names visibility --> redacted | ||
* John Doe public D John | ||
* Bobby Joe private X X | ||
* ``` | ||
* | ||
* @param names First and last names separated with a single space | ||
* @param visibilities String values `public` or `private` only | ||
* @return Redacted strings column | ||
*/ | ||
std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names, | ||
cudf::column_view const& visibilities); | ||
|
||
/** | ||
* @brief Create CUDA memory resource | ||
*/ | ||
auto make_cuda_mr() { return std::make_shared<rmm::mr::cuda_memory_resource>(); } | ||
|
||
/** | ||
* @brief Create a pool device memory resource | ||
*/ | ||
auto make_pool_mr() | ||
{ | ||
return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda_mr()); | ||
} | ||
|
||
/** | ||
* @brief Create memory resource for libcudf functions | ||
*/ | ||
std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(std::string const& name) | ||
{ | ||
if (name == "pool") { return make_pool_mr(); } | ||
return make_cuda_mr(); | ||
} | ||
|
||
/** | ||
* @brief Main for strings examples | ||
* | ||
* Command line parameters: | ||
* 1. CSV file name/path | ||
* 2. Memory resource (optional): 'pool' or 'cuda' | ||
* | ||
* The stdout includes the number of rows in the input and the output size in bytes. | ||
*/ | ||
int main(int argc, char const** argv) | ||
{ | ||
if (argc < 2) { | ||
std::cout << "required parameter: csv-file-path\n"; | ||
return 1; | ||
} | ||
|
||
auto const mr_name = std::string{argc > 2 ? std::string(argv[2]) : std::string("cuda")}; | ||
auto resource = create_memory_resource(mr_name); | ||
rmm::mr::set_current_device_resource(resource.get()); | ||
|
||
auto const csv_file = std::string{argv[1]}; | ||
auto const csv_result = [csv_file] { | ||
cudf::io::csv_reader_options in_opts = | ||
cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_file}).header(-1); | ||
return cudf::io::read_csv(in_opts).tbl; | ||
}(); | ||
auto const csv_table = csv_result->view(); | ||
|
||
std::cout << "table: " << csv_table.num_rows() << " rows " << csv_table.num_columns() | ||
<< " columns\n"; | ||
|
||
auto st = std::chrono::steady_clock::now(); | ||
auto result = redact_strings(csv_table.column(0), csv_table.column(1)); | ||
|
||
std::chrono::duration<double> elapsed = std::chrono::steady_clock::now() - st; | ||
std::cout << "Wall time: " << elapsed.count() << " seconds\n"; | ||
std::cout << "Output size " << result->view().child(1).size() << " bytes\n"; | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
/* | ||
* Copyright (c) 2022, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "common.hpp" | ||
|
||
#include <cudf/column/column_device_view.cuh> | ||
#include <cudf/column/column_factories.hpp> | ||
|
||
#include <rmm/cuda_stream_view.hpp> | ||
#include <rmm/device_uvector.hpp> | ||
#include <rmm/exec_policy.hpp> | ||
|
||
#include <thrust/scan.h> | ||
|
||
#include <cuda_runtime.h> | ||
#include <nvToolsExt.h> | ||
|
||
/** | ||
* @brief Computes the size of each output row | ||
* | ||
* This thread is called once per row in d_names. | ||
* | ||
* @param d_names Column of names | ||
* @param d_visibilities Column of visibilities | ||
* @param d_sizes Output sizes for each row | ||
*/ | ||
__global__ void sizes_kernel(cudf::column_device_view const d_names, | ||
cudf::column_device_view const d_visibilities, | ||
cudf::size_type* d_sizes) | ||
{ | ||
// The row index is resolved from the CUDA thread/block objects | ||
auto index = threadIdx.x + blockIdx.x * blockDim.x; | ||
// There may be more threads than actual rows | ||
if (index >= d_names.size()) return; | ||
|
||
auto const visible = cudf::string_view("public", 6); | ||
auto const redaction = cudf::string_view("X X", 3); | ||
|
||
auto const name = d_names.element<cudf::string_view>(index); | ||
auto const vis = d_visibilities.element<cudf::string_view>(index); | ||
|
||
cudf::size_type result = redaction.size_bytes(); // init to redaction size | ||
if (vis == visible) { | ||
auto const space_idx = name.find(' '); | ||
auto const first = name.substr(0, space_idx); | ||
auto const last_initial = name.substr(space_idx + 1, 1); | ||
|
||
result = first.size_bytes() + last_initial.size_bytes() + 1; | ||
} | ||
|
||
d_sizes[index] = result; | ||
} | ||
|
||
/** | ||
* @brief Builds the output for each row | ||
* | ||
* This thread is called once per row in d_names. | ||
* | ||
* @param d_names Column of names | ||
* @param d_visibilities Column of visibilities | ||
* @param d_offsets Byte offset in `d_chars` for each row | ||
* @param d_chars Output memory for all rows | ||
*/ | ||
__global__ void redact_kernel(cudf::column_device_view const d_names, | ||
cudf::column_device_view const d_visibilities, | ||
cudf::size_type const* d_offsets, | ||
char* d_chars) | ||
{ | ||
// The row index is resolved from the CUDA thread/block objects | ||
auto index = threadIdx.x + blockIdx.x * blockDim.x; | ||
// There may be more threads than actual rows | ||
if (index >= d_names.size()) return; | ||
|
||
auto const visible = cudf::string_view("public", 6); | ||
auto const redaction = cudf::string_view("X X", 3); | ||
|
||
// resolve output_ptr using the offsets vector | ||
char* output_ptr = d_chars + d_offsets[index]; | ||
|
||
auto const name = d_names.element<cudf::string_view>(index); | ||
auto const vis = d_visibilities.element<cudf::string_view>(index); | ||
|
||
if (vis == visible) { | ||
auto const space_idx = name.find(' '); | ||
auto const first = name.substr(0, space_idx); | ||
auto const last_initial = name.substr(space_idx + 1, 1); | ||
auto const output_size = first.size_bytes() + last_initial.size_bytes() + 1; | ||
|
||
// build output string | ||
memcpy(output_ptr, last_initial.data(), last_initial.size_bytes()); | ||
output_ptr += last_initial.size_bytes(); | ||
*output_ptr++ = ' '; | ||
memcpy(output_ptr, first.data(), first.size_bytes()); | ||
} else { | ||
memcpy(output_ptr, redaction.data(), redaction.size_bytes()); | ||
} | ||
} | ||
|
||
/** | ||
* @brief Redacts each name per the corresponding visibility entry | ||
* | ||
* This implementation builds the strings column children (offsets and chars) | ||
* directly into device memory for libcudf. | ||
* | ||
* @param names Column of names | ||
* @param visibilities Column of visibilities | ||
* @return Redacted column of names | ||
*/ | ||
std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names, | ||
cudf::column_view const& visibilities) | ||
{ | ||
// all device memory operations and kernel functions will run on this stream | ||
auto stream = rmm::cuda_stream_default; | ||
|
||
auto const d_names = cudf::column_device_view::create(names, stream); | ||
auto const d_visibilities = cudf::column_device_view::create(visibilities, stream); | ||
|
||
constexpr int block_size = 128; // this arbitrary size should be a power of 2 | ||
int const blocks = (names.size() + block_size - 1) / block_size; | ||
|
||
nvtxRangePushA("redact_strings"); | ||
|
||
// create offsets vector | ||
auto offsets = rmm::device_uvector<cudf::size_type>(names.size() + 1, stream); | ||
|
||
// compute output sizes | ||
sizes_kernel<<<blocks, block_size, 0, stream.value()>>>( | ||
*d_names, *d_visibilities, offsets.data()); | ||
|
||
// convert sizes to offsets (in place) | ||
thrust::exclusive_scan(rmm::exec_policy(stream), offsets.begin(), offsets.end(), offsets.begin()); | ||
|
||
// last element is the total output size | ||
// (device-to-host copy of 1 integer -- includes synching the stream) | ||
cudf::size_type output_size = offsets.back_element(stream); | ||
|
||
// create chars vector | ||
auto chars = rmm::device_uvector<char>(output_size, stream); | ||
|
||
// build chars output | ||
redact_kernel<<<blocks, block_size, 0, stream.value()>>>( | ||
*d_names, *d_visibilities, offsets.data(), chars.data()); | ||
|
||
// create column from offsets and chars vectors (no copy is performed) | ||
auto result = cudf::make_strings_column(names.size(), std::move(offsets), std::move(chars)); | ||
|
||
// wait for all of the above to finish | ||
stream.synchronize(); | ||
|
||
nvtxRangePop(); | ||
return result; | ||
} |
Oops, something went wrong.