Add libcudf strings examples (#11849)

Creates example for calling libcudf APIs for strings processing. This also includes examples of building custom kernels for modifying libcudf strings columns. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Robert Maynard (https://github.com/robertmaynard) - Mark Sadang (https://github.com/msadang) - https://github.com/nvdbaranec URL: #11849
rapidsai · Oct 14, 2022 · 7598253 · 7598253
1 parent 8a31e26
commit 7598253
Show file tree

Hide file tree

Showing 10 changed files with 704 additions and 2 deletions.
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
@@ -74,6 +74,7 @@ sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md
 
 # Libcudf examples update
 sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/basic/CMakeLists.txt
+sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/strings/CMakeLists.txt
 
 # ucx-py version update
 sed_runner "s/export UCX_PY_VERSION=.*/export UCX_PY_VERSION='${NEXT_UCX_PY_VERSION}'/g" ci/gpu/build.sh

diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt
@@ -1,6 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-cmake_minimum_required(VERSION 3.18)
+cmake_minimum_required(VERSION 3.23.1)
 
 project(
   basic_example

diff --git a/cpp/examples/build.sh b/cpp/examples/build.sh
@@ -17,8 +17,15 @@ LIB_BUILD_DIR=${LIB_BUILD_DIR:-$(readlink -f "${EXAMPLES_DIR}/../build")}
 # Basic example
 BASIC_EXAMPLE_DIR=${EXAMPLES_DIR}/basic
 BASIC_EXAMPLE_BUILD_DIR=${BASIC_EXAMPLE_DIR}/build
-
 # Configure
 cmake -S ${BASIC_EXAMPLE_DIR} -B ${BASIC_EXAMPLE_BUILD_DIR} -Dcudf_ROOT="${LIB_BUILD_DIR}"
 # Build
 cmake --build ${BASIC_EXAMPLE_BUILD_DIR} -j${PARALLEL_LEVEL}
+
+# Strings example
+STRINGS_EXAMPLE_DIR=${EXAMPLES_DIR}/strings
+STRINGS_EXAMPLE_BUILD_DIR=${STRINGS_EXAMPLE_DIR}/build
+# Configure
+cmake -S ${STRINGS_EXAMPLE_DIR} -B ${STRINGS_EXAMPLE_BUILD_DIR} -Dcudf_ROOT="${LIB_BUILD_DIR}"
+# Build
+cmake --build ${STRINGS_EXAMPLE_BUILD_DIR} -j${PARALLEL_LEVEL}
diff --git a/cpp/examples/strings/CMakeLists.txt b/cpp/examples/strings/CMakeLists.txt
@@ -0,0 +1,49 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+cmake_minimum_required(VERSION 3.23.1)
+
+project(
+  strings_examples
+  VERSION 0.0.1
+  LANGUAGES CXX CUDA
+)
+
+set(CPM_DOWNLOAD_VERSION v0.35.3)
+file(
+  DOWNLOAD
+  https://github.com/cpm-cmake/CPM.cmake/releases/download/${CPM_DOWNLOAD_VERSION}/get_cpm.cmake
+  ${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake
+)
+include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)
+
+set(CUDF_TAG branch-22.12)
+CPMFindPackage(
+  NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf
+  GIT_TAG ${CUDF_TAG}
+  GIT_SHALLOW
+    TRUE
+    SOURCE_SUBDIR
+    cpp
+)
+
+list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
+
+#
+add_executable(libcudf_apis libcudf_apis.cpp)
+target_compile_features(libcudf_apis PRIVATE cxx_std_17)
+target_link_libraries(libcudf_apis PRIVATE cudf::cudf nvToolsExt)
+
+add_executable(custom_with_malloc custom_with_malloc.cu)
+target_compile_features(custom_with_malloc PRIVATE cxx_std_17)
+target_compile_options(custom_with_malloc PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
+target_link_libraries(custom_with_malloc PRIVATE cudf::cudf nvToolsExt)
+
+add_executable(custom_prealloc custom_prealloc.cu)
+target_compile_features(custom_prealloc PRIVATE cxx_std_17)
+target_compile_options(custom_prealloc PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
+target_link_libraries(custom_prealloc PRIVATE cudf::cudf nvToolsExt)
+
+add_executable(custom_optimized custom_optimized.cu)
+target_compile_features(custom_optimized PRIVATE cxx_std_17)
+target_compile_options(custom_optimized PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
+target_link_libraries(custom_optimized PRIVATE cudf::cudf nvToolsExt)
diff --git a/cpp/examples/strings/common.hpp b/cpp/examples/strings/common.hpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/io/csv.hpp>
+#include <cudf/io/datasource.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <rmm/mr/device/cuda_memory_resource.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/owning_wrapper.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
+
+#include <chrono>
+#include <iostream>
+#include <memory>
+#include <string>
+
+/**
+ * @brief Main example function returns redacted strings column.
+ *
+ * This function returns a redacted version of the input `names` column
+ * using the the `visibilities` column as in the following example
+ * ```
+ * names        visibility  --> redacted
+ * John Doe     public          D John
+ * Bobby Joe    private         X X
+ * ```
+ *
+ * @param names First and last names separated with a single space
+ * @param visibilities String values `public` or `private` only
+ * @return Redacted strings column
+ */
+std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,
+                                             cudf::column_view const& visibilities);
+
+/**
+ * @brief Create CUDA memory resource
+ */
+auto make_cuda_mr() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
+
+/**
+ * @brief Create a pool device memory resource
+ */
+auto make_pool_mr()
+{
+  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda_mr());
+}
+
+/**
+ * @brief Create memory resource for libcudf functions
+ */
+std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(std::string const& name)
+{
+  if (name == "pool") { return make_pool_mr(); }
+  return make_cuda_mr();
+}
+
+/**
+ * @brief Main for strings examples
+ *
+ * Command line parameters:
+ * 1. CSV file name/path
+ * 2. Memory resource (optional): 'pool' or 'cuda'
+ *
+ * The stdout includes the number of rows in the input and the output size in bytes.
+ */
+int main(int argc, char const** argv)
+{
+  if (argc < 2) {
+    std::cout << "required parameter: csv-file-path\n";
+    return 1;
+  }
+
+  auto const mr_name = std::string{argc > 2 ? std::string(argv[2]) : std::string("cuda")};
+  auto resource      = create_memory_resource(mr_name);
+  rmm::mr::set_current_device_resource(resource.get());
+
+  auto const csv_file   = std::string{argv[1]};
+  auto const csv_result = [csv_file] {
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_file}).header(-1);
+    return cudf::io::read_csv(in_opts).tbl;
+  }();
+  auto const csv_table = csv_result->view();
+
+  std::cout << "table: " << csv_table.num_rows() << " rows " << csv_table.num_columns()
+            << " columns\n";
+
+  auto st     = std::chrono::steady_clock::now();
+  auto result = redact_strings(csv_table.column(0), csv_table.column(1));
+
+  std::chrono::duration<double> elapsed = std::chrono::steady_clock::now() - st;
+  std::cout << "Wall time: " << elapsed.count() << " seconds\n";
+  std::cout << "Output size " << result->view().child(1).size() << " bytes\n";
+
+  return 0;
+}
diff --git a/cpp/examples/strings/custom_optimized.cu b/cpp/examples/strings/custom_optimized.cu
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common.hpp"
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/scan.h>
+
+#include <cuda_runtime.h>
+#include <nvToolsExt.h>
+
+/**
+ * @brief Computes the size of each output row
+ *
+ * This thread is called once per row in d_names.
+ *
+ * @param d_names Column of names
+ * @param d_visibilities Column of visibilities
+ * @param d_sizes Output sizes for each row
+ */
+__global__ void sizes_kernel(cudf::column_device_view const d_names,
+                             cudf::column_device_view const d_visibilities,
+                             cudf::size_type* d_sizes)
+{
+  // The row index is resolved from the CUDA thread/block objects
+  auto index = threadIdx.x + blockIdx.x * blockDim.x;
+  // There may be more threads than actual rows
+  if (index >= d_names.size()) return;
+
+  auto const visible   = cudf::string_view("public", 6);
+  auto const redaction = cudf::string_view("X X", 3);
+
+  auto const name = d_names.element<cudf::string_view>(index);
+  auto const vis  = d_visibilities.element<cudf::string_view>(index);
+
+  cudf::size_type result = redaction.size_bytes();  // init to redaction size
+  if (vis == visible) {
+    auto const space_idx    = name.find(' ');
+    auto const first        = name.substr(0, space_idx);
+    auto const last_initial = name.substr(space_idx + 1, 1);
+
+    result = first.size_bytes() + last_initial.size_bytes() + 1;
+  }
+
+  d_sizes[index] = result;
+}
+
+/**
+ * @brief Builds the output for each row
+ *
+ * This thread is called once per row in d_names.
+ *
+ * @param d_names Column of names
+ * @param d_visibilities Column of visibilities
+ * @param d_offsets Byte offset in `d_chars` for each row
+ * @param d_chars Output memory for all rows
+ */
+__global__ void redact_kernel(cudf::column_device_view const d_names,
+                              cudf::column_device_view const d_visibilities,
+                              cudf::size_type const* d_offsets,
+                              char* d_chars)
+{
+  // The row index is resolved from the CUDA thread/block objects
+  auto index = threadIdx.x + blockIdx.x * blockDim.x;
+  // There may be more threads than actual rows
+  if (index >= d_names.size()) return;
+
+  auto const visible   = cudf::string_view("public", 6);
+  auto const redaction = cudf::string_view("X X", 3);
+
+  // resolve output_ptr using the offsets vector
+  char* output_ptr = d_chars + d_offsets[index];
+
+  auto const name = d_names.element<cudf::string_view>(index);
+  auto const vis  = d_visibilities.element<cudf::string_view>(index);
+
+  if (vis == visible) {
+    auto const space_idx    = name.find(' ');
+    auto const first        = name.substr(0, space_idx);
+    auto const last_initial = name.substr(space_idx + 1, 1);
+    auto const output_size  = first.size_bytes() + last_initial.size_bytes() + 1;
+
+    // build output string
+    memcpy(output_ptr, last_initial.data(), last_initial.size_bytes());
+    output_ptr += last_initial.size_bytes();
+    *output_ptr++ = ' ';
+    memcpy(output_ptr, first.data(), first.size_bytes());
+  } else {
+    memcpy(output_ptr, redaction.data(), redaction.size_bytes());
+  }
+}
+
+/**
+ * @brief Redacts each name per the corresponding visibility entry
+ *
+ * This implementation builds the strings column children (offsets and chars)
+ * directly into device memory for libcudf.
+ *
+ * @param names Column of names
+ * @param visibilities Column of visibilities
+ * @return Redacted column of names
+ */
+std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,
+                                             cudf::column_view const& visibilities)
+{
+  // all device memory operations and kernel functions will run on this stream
+  auto stream = rmm::cuda_stream_default;
+
+  auto const d_names        = cudf::column_device_view::create(names, stream);
+  auto const d_visibilities = cudf::column_device_view::create(visibilities, stream);
+
+  constexpr int block_size = 128;  // this arbitrary size should be a power of 2
+  int const blocks         = (names.size() + block_size - 1) / block_size;
+
+  nvtxRangePushA("redact_strings");
+
+  // create offsets vector
+  auto offsets = rmm::device_uvector<cudf::size_type>(names.size() + 1, stream);
+
+  // compute output sizes
+  sizes_kernel<<<blocks, block_size, 0, stream.value()>>>(
+    *d_names, *d_visibilities, offsets.data());
+
+  // convert sizes to offsets (in place)
+  thrust::exclusive_scan(rmm::exec_policy(stream), offsets.begin(), offsets.end(), offsets.begin());
+
+  // last element is the total output size
+  // (device-to-host copy of 1 integer -- includes synching the stream)
+  cudf::size_type output_size = offsets.back_element(stream);
+
+  //  create chars vector
+  auto chars = rmm::device_uvector<char>(output_size, stream);
+
+  // build chars output
+  redact_kernel<<<blocks, block_size, 0, stream.value()>>>(
+    *d_names, *d_visibilities, offsets.data(), chars.data());
+
+  // create column from offsets and chars vectors (no copy is performed)
+  auto result = cudf::make_strings_column(names.size(), std::move(offsets), std::move(chars));
+
+  // wait for all of the above to finish
+  stream.synchronize();
+
+  nvtxRangePop();
+  return result;
+}