From 40eb1e2736e2cfb8ed8ab13c0ba44e4ae5cf0f6e Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Wed, 12 Oct 2022 16:46:59 -0400
Subject: [PATCH 01/54] Add strings udf C++ classes and function for phase II

---
 .../cpp/include/cudf/strings/udf/case.cuh     | 207 +++++++
 .../cpp/include/cudf/strings/udf/numeric.cuh  |  74 +++
 .../cpp/include/cudf/strings/udf/split.cuh    | 192 +++++++
 .../cpp/include/cudf/strings/udf/strip.cuh    | 110 ++++
 .../cpp/include/cudf/strings/udf/udf_apis.hpp |  10 +
 .../include/cudf/strings/udf/udf_string.cuh   | 472 ++++++++++++++++
 .../include/cudf/strings/udf/udf_string.hpp   | 523 ++++++++++++++++++
 .../cpp/src/strings/udf/udf_apis.cu           |  61 ++
 8 files changed, 1649 insertions(+)
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
new file mode 100644
index 00000000000..98b25f85f74
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "udf_string.cuh"
+
+#include <cudf/strings/detail/char_tables.hpp>
+#include <cudf/strings/detail/utf8.hpp>
+#include <cudf/strings/string_view.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+// global variables for character-type flags and case conversion
+struct chars_tables {
+  cudf::strings::detail::character_flags_table_type* flags_table;
+  cudf::strings::detail::character_cases_table_type* cases_table;
+  struct cudf::strings::detail::special_case_mapping* special_case_mapping_table;
+};
+
+namespace detail {
+
+/**
+ * @brief Utility for converting a single character
+ *
+ * There are special cases where the conversion may result in multiple characters.
+ *
+ * @param tables The char tables required for conversion
+ * @param result String to append the converted character
+ * @param code_point The code-point of the character to convert
+ * @param flag The char-type flag of the character to convert
+ */
+__device__ inline void convert_char(chars_tables const tables,
+                                    udf_string& result,
+                                    uint32_t code_point,
+                                    uint8_t flag)
+{
+  if (!cudf::strings::detail::IS_SPECIAL(flag)) {
+    result.append(cudf::strings::detail::codepoint_to_utf8(tables.cases_table[code_point]));
+    return;
+  }
+
+  // handle special case
+  auto const map =
+    tables
+      .special_case_mapping_table[cudf::strings::detail::get_special_case_hash_index(code_point)];
+  auto const output_count =
+    cudf::strings::detail::IS_LOWER(flag) ? map.num_upper_chars : map.num_lower_chars;
+  auto const* output_chars = cudf::strings::detail::IS_LOWER(flag) ? map.upper : map.lower;
+  for (uint16_t idx = 0; idx < output_count; idx++) {
+    result.append(cudf::strings::detail::codepoint_to_utf8(output_chars[idx]));
+  }
+}
+
+/**
+ * @brief Converts the given string to either upper or lower case
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @param case_flag Identifies upper/lower case conversion
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string convert_case(
+  chars_tables const tables,
+  string_view d_str,
+  cudf::strings::detail::character_flags_table_type case_flag)
+{
+  udf_string result;
+  for (auto const chr : d_str) {
+    auto const code_point = cudf::strings::detail::utf8_to_codepoint(chr);
+    auto const flag       = code_point <= 0x00FFFF ? tables.flags_table[code_point] : 0;
+
+    if ((flag & case_flag) || (cudf::strings::detail::IS_SPECIAL(flag) &&
+                               !cudf::strings::detail::IS_UPPER_OR_LOWER(flag))) {
+      convert_char(tables, result, code_point, flag);
+    } else {
+      result.append(chr);
+    }
+  }
+
+  return result;
+}
+
+/**
+ * @brief Utility for capitalize and title functions
+ *
+ * @tparam CaptializeNextFn returns true if the next candidate character should be capitalized
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+template <typename CapitalizeNextFn>
+__device__ inline udf_string capitalize(chars_tables const tables,
+                                        string_view d_str,
+                                        CapitalizeNextFn next_fn)
+{
+  udf_string result;
+  bool capitalize = true;
+  for (auto const chr : d_str) {
+    auto const code_point = cudf::strings::detail::utf8_to_codepoint(chr);
+    auto const flag       = code_point <= 0x00FFFF ? tables.flags_table[code_point] : 0;
+    auto const change_case =
+      capitalize ? cudf::strings::detail::IS_LOWER(flag) : cudf::strings::detail::IS_UPPER(flag);
+    if (change_case) {
+      detail::convert_char(tables, result, code_point, flag);
+    } else {
+      result.append(chr);
+    }
+    capitalize = next_fn(flag);
+  }
+  return result;
+}
+}  // namespace detail
+
+/**
+ * @brief Converts the given string to lower case
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string to_lower(chars_tables const tables, string_view d_str)
+{
+  cudf::strings::detail::character_flags_table_type case_flag =
+    cudf::strings::detail::IS_UPPER(0xFF);  // convert only upper case characters
+  return detail::convert_case(tables, d_str, case_flag);
+}
+
+/**
+ * @brief Converts the given string to upper case
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string to_upper(chars_tables const tables, string_view d_str)
+{
+  cudf::strings::detail::character_flags_table_type case_flag =
+    cudf::strings::detail::IS_LOWER(0xFF);  // convert only lower case characters
+  return detail::convert_case(tables, d_str, case_flag);
+}
+
+/**
+ * @brief Converts the given string to lower/upper case
+ *
+ * All lower case characters are converted to upper case and
+ * all upper case characters are converted to lower case.
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string swapcase(chars_tables const tables, string_view d_str)
+{
+  cudf::strings::detail::character_flags_table_type case_flag =
+    cudf::strings::detail::IS_LOWER(0xFF) | cudf::strings::detail::IS_UPPER(0xFF);
+  return detail::convert_case(tables, d_str, case_flag);
+}
+
+/**
+ * @brief Capitalize the first character of the given string
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string capitalize(chars_tables const tables, string_view d_str)
+{
+  auto next_fn = [](cudf::strings::detail::character_flags_table_type) -> bool { return false; };
+  return detail::capitalize(tables, d_str, next_fn);
+}
+
+/**
+ * @brief Converts the given string to title case
+ *
+ * The first character after a non-character is converted to upper case.
+ * All other characters are converted to lower case.
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string title(chars_tables const tables, string_view d_str)
+{
+  auto next_fn = [](cudf::strings::detail::character_flags_table_type flag) -> bool {
+    return !cudf::strings::detail::IS_ALPHA(flag);
+  };
+  return detail::capitalize(tables, d_str, next_fn);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
new file mode 100644
index 00000000000..48f709ae318
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
@@ -0,0 +1,74 @@
+
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "udf_string.cuh"
+
+#include <cudf/strings/detail/convert/int_to_string.cuh>
+#include <cudf/strings/detail/convert/string_to_float.cuh>
+#include <cudf/strings/detail/convert/string_to_int.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Converts a string into an integer.
+ *
+ * The '+' and '-' are allowed but only at the beginning of the string.
+ * The string is expected to contain base-10 [0-9] characters only.
+ * Any other character will end the parse.
+ * Overflow of the int64 type is not detected.
+ */
+__device__ inline int64_t stoi(string_view const& d_str)
+{
+  return cudf::strings::detail::string_to_integer(d_str);
+}
+
+/**
+ * @brief Converts an integer into string
+ *
+ * @param value integer value to convert
+ */
+__device__ inline udf_string to_string(int64_t value)
+{
+  udf_string result;
+  if (value == 0) {
+    result.append("0");
+    return result;
+  }
+  auto const d_value = static_cast<double>(abs(value));
+  auto digits        = static_cast<int32_t>(log10(d_value)) + 1 + (value < 0);
+  result.resize(digits);
+  cudf::strings::detail::integer_to_string(value, result.data());
+  return result;
+}
+
+/**
+ * @brief Converts a string into a double.
+ *
+ * Support scientific notation as well.
+ * Overflow goes to inf or -inf and underflow may go to 0.
+ */
+__device__ inline double stod(string_view const& d_str)
+{
+  return cudf::strings::detail::stod(d_str);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
new file mode 100644
index 00000000000..f7fa32d0e45
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "udf_string.cuh"
+
+#include <cudf/strings/detail/split_utils.cuh>
+#include <cudf/strings/string_view.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Split string using given string
+ *
+ * The caller must allocate an array of cudf::string_view to be filled
+ * in by this function. Calling this with a `nullptr` for the `result`
+ * will return the number of elements.
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{"the best  of times ", 19};
+ * auto tgt = cudf::string_view{}; // empty string
+ * auto token_count = split(d_str, tgt, nullptr);
+ * auto result = new cudf::string_view[token_count];
+ * split(d_str, tgt, result);
+ * // result is array like ["the", "best", "of", "times"]
+ * @endcode
+ *
+ * @param d_str String to split
+ * @param tgt String to split on
+ * @param result Empty array to populate with output objects.
+ *               Pass `nullptr` to just get the token count.
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type split(cudf::string_view const d_str,
+                                        cudf::string_view const tgt,
+                                        cudf::string_view* result)
+{
+  auto const nchars     = d_str.length();
+  cudf::size_type count = 0;
+
+  cudf::size_type last_pos = 0;
+  while (last_pos <= nchars) {
+    cudf::size_type const pos = d_str.find(tgt, last_pos);
+    auto const length         = (pos < 0 ? nchars : pos) - last_pos;
+    if (result) { *result++ = d_str.substr(last_pos, length); }
+    last_pos = pos + tgt.length();
+    ++count;
+    if (pos < 0) { break; }
+  }
+
+  return count;
+}
+
+/**
+ * @brief Split string using given target array.
+ *
+ * @param d_str String to split
+ * @param tgt Character array encoded in UTF-8 used for identifying split points
+ * @param bytes Number of bytes to read from `tgt`
+ * @param result Empty array to populate with output objects.
+ *               Pass `nullptr` to just get the token count.
+ * @return Number of tokens returned
+ */
+__device__ inline int split(cudf::string_view const d_str,
+                            char const* tgt,
+                            cudf::size_type bytes,
+                            cudf::string_view* result)
+{
+  return split(d_str, cudf::string_view{tgt, bytes}, result);
+}
+
+/**
+ * @brief Split string using given target array.
+ *
+ * @param d_str String to split
+ * @param tgt Null-terminated character array encoded in UTF-8 used for identifying split points
+ * @param result Empty array to populate with output objects.
+ *               Pass `nullptr` to just get the token count.
+ * @return Number of tokens returned
+ */
+__device__ inline int split(cudf::string_view const d_str,
+                            char const* tgt,
+                            cudf::string_view* result)
+{
+  return split(d_str, tgt, detail::bytes_in_null_terminated_string(tgt), result);
+}
+
+/**
+ * @brief Split string on whitespace.
+ *
+ * This will create tokens by splitting on one or more consecutive whitespace characters
+ * found in `d_str`.
+ *
+ * @param d_str String to split
+ * @param result Empty array to populate with output objects.
+ *               Pass `nullptr` to just get the token count.
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type split(cudf::string_view const d_str, cudf::string_view* result)
+{
+  cudf::strings::detail::whitespace_string_tokenizer tokenizer{d_str};
+  cudf::size_type count = 0;
+  while (tokenizer.next_token()) {
+    auto token = tokenizer.get_token();
+    if (result) { *result++ = d_str.substr(token.first, token.second - token.first); }
+    ++count;
+  }
+  return count;
+}
+
+/**
+ * @brief Join an array of strings with a separator.
+ *
+ * @code{.cpp}
+ * auto separator = cudf::string_view{"::", 2};
+ * cudf::string_view input[] = {
+ *   cudf::string_view{"hello", 5},
+ *   cudf::string_view{"goodbye", 7},
+ *   cudf::string_view{"world", 5} };
+ *
+ * auto result = join(separator, input, 3);
+ * // result is "hello::goodbye::world"
+ * @endcode
+ *
+ * @param separator Separator string
+ * @param input An array of strings to join
+ * @param count Number of elements in `input`
+ * @return New string
+ */
+__device__ inline udf_string join(cudf::string_view const separator,
+                                  cudf::string_view* input,
+                                  cudf::size_type count)
+{
+  udf_string result{""};
+  while (count-- > 0) {
+    result += *input++;
+    if (count > 0) { result += separator; }
+  }
+  return result;
+}
+
+/**
+ * @brief Join an array of strings with a separator.
+ *
+ * @param separator Null-terminated UTF-8 string
+ * @param bytes Number of bytes to read from `separator`
+ * @param input An array of strings to join
+ * @param count Number of elements in `input`
+ * @return New string
+ */
+__device__ inline udf_string join(char const* separator,
+                                  cudf::size_type bytes,
+                                  cudf::string_view* input,
+                                  cudf::size_type count)
+{
+  return join(cudf::string_view{separator, bytes}, input, count);
+}
+
+/**
+ * @brief Join an array of strings with a separator.
+ *
+ * @param separator Null-terminated UTF-8 string
+ * @param input An array of strings to join
+ * @param count Number of elements in `input`
+ * @return New string
+ */
+__device__ inline udf_string join(char const* separator,
+                                  cudf::string_view* input,
+                                  cudf::size_type count)
+{
+  return join(separator, detail::bytes_in_null_terminated_string(separator), input, count);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
new file mode 100644
index 00000000000..1cd7dfbaa78
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
@@ -0,0 +1,110 @@
+
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "udf_string.cuh"
+
+#include <cudf/strings/detail/strip.cuh>
+#include <cudf/strings/string_view.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Strip characters from the beginning and/or end of the given string.
+ *
+ * The `d_to_strip` is interpretted as an array of characters to be removed.
+ * If `d_to_strip` is an empty string, whitespace characters are stripped.
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{" aba ", 5};
+ * auto d_to_strip = cudf::string_view{}; // empty string
+ * auto result = strip(d_str, d_to_strip);
+ * // result is "aba"
+ * d_to_strip = cudf::string_view{" a", 2}; // space and 'a'
+ * result = strip(d_str, d_to_strip);
+ * // result is "b" ('a' or ' ' removed from the ends)
+ * @endcode
+ *
+ * @param d_str String to strip characters from
+ * @param d_to_strip Characters to remove
+ * @param stype From where to strip the characters;
+ *              Default `BOTH` indicates stripping characters from the
+ *              beginning and the end of the input string `d_str`
+ * @return New string with characters removed
+ */
+__device__ udf_string strip(cudf::string_view const d_str,
+                            cudf::string_view const d_to_strip,
+                            side_type stype = side_type::BOTH)
+{
+  return udf_string{cudf::strings::detail::strip(d_str, d_to_strip, stype)};
+}
+
+/**
+ * @brief Strip characters from the beginning of the given string.
+ *
+ * The `d_to_strip` is interpretted as an array of characters to be removed.
+ * If `d_to_strip` is an empty string, whitespace characters are stripped.
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{" aba ", 5};
+ * auto d_to_strip = cudf::string_view{}; // empty string
+ * auto result = lstrip(d_str, d_to_strip);
+ * // result is "aba "
+ * d_to_strip = cudf::string_view{"a ", 2}; // space and 'a'
+ * result = lstrip(d_str, d_to_strip);
+ * // result is "ba " ('a' or ' ' removed from the beginning)
+ * @endcode
+ *
+ * @param d_str String to strip characters from
+ * @param d_to_strip Characters to remove
+ * @return New string with characters removed
+ */
+__device__ udf_string lstrip(cudf::string_view const d_str, cudf::string_view d_to_strip)
+{
+  return strip(d_str, d_to_strip, side_type::LEFT);
+}
+
+/**
+ * @brief Strip characters from the end of the given string.
+ *
+ * The `d_to_strip` is interpretted as an array of characters to be removed.
+ * If `d_to_strip` is an empty string, whitespace characters are stripped.
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{" aba ", 5};
+ * auto d_to_strip = cudf::string_view{}; // empty string
+ * auto result = rstrip(d_str, d_to_strip);
+ * // result is " aba"
+ * d_to_strip = cudf::string_view{" a", 2}; // space and 'a'
+ * result = rstrip(d_str, d_to_strip);
+ * // result is " ab" ('a' or ' ' removed from the end)
+ * @endcode
+ *
+ * @param d_str String to strip characters from
+ * @param d_to_strip Characters to remove
+ * @return New string with characters removed
+ */
+__device__ udf_string rstrip(cudf::string_view const d_str, cudf::string_view d_to_strip)
+{
+  return strip(d_str, d_to_strip, side_type::RIGHT);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
index 6de9b91de08..f5807be151b 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
 
 #include <rmm/device_buffer.hpp>
@@ -34,6 +35,15 @@ namespace udf {
  */
 std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input);
 
+/**
+ * @brief Return a cudf::column given an array of udf_string objects.
+ *
+ * @param d_buffer Pointer to device memory of udf_string objects
+ * @param d_size The number of bytes in the d_buffer
+ * @return A strings column copy of the udf_string objects
+ */
+std::unique_ptr<cudf::column> column_from_udf_string_array(void* d_buffer, std::size_t size);
+
 }  // namespace udf
 }  // namespace strings
 }  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
new file mode 100644
index 00000000000..5ab262fb07d
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
@@ -0,0 +1,472 @@
+/*
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "udf_string.hpp"
+
+#include <cudf/strings/detail/utf8.hpp>
+#include <cudf/strings/string_view.cuh>
+
+#include <algorithm>
+#include <limits>
+#include <string>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+namespace detail {
+
+__device__ inline static cudf::size_type bytes_in_null_terminated_string(char const* str)
+{
+  if (!str) return 0;
+  cudf::size_type bytes = 0;
+  while (*str++) ++bytes;
+  return bytes;
+}
+
+}  // namespace detail
+
+__device__ inline char* udf_string::allocate(cudf::size_type bytes)
+{
+  char* data  = static_cast<char*>(malloc(bytes + 1));
+  data[bytes] = 0;  // add null-terminator so we can printf strings in device code
+  return data;
+}
+
+__device__ inline void udf_string::deallocate(char* data)
+{
+  if (data) free(data);
+}
+
+__device__ void udf_string::reallocate(cudf::size_type bytes)
+{
+  m_capacity    = bytes;
+  auto new_data = allocate(m_capacity);
+  memcpy(new_data, m_data, std::min(m_bytes, bytes));
+  deallocate(m_data);
+  m_data = new_data;
+}
+
+__device__ inline udf_string::udf_string(char const* data, cudf::size_type bytes)
+  : m_bytes(bytes), m_capacity(bytes)
+{
+  m_data = allocate(m_capacity);
+  memcpy(m_data, data, bytes);
+}
+
+__device__ udf_string::udf_string(cudf::size_type count, cudf::char_utf8 chr)
+{
+  if (count <= 0) { return; }
+  m_bytes = m_capacity = cudf::strings::detail::bytes_in_char_utf8(chr) * count;
+  m_data               = allocate(m_capacity);
+  auto out_ptr         = m_data;
+  for (auto idx = 0; idx < count; ++idx) {
+    out_ptr += cudf::strings::detail::from_char_utf8(chr, out_ptr);
+  }
+}
+
+__device__ inline udf_string::udf_string(char const* data)
+{
+  m_bytes = m_capacity = detail::bytes_in_null_terminated_string(data);
+  m_data               = allocate(m_capacity);
+  memcpy(m_data, data, m_bytes);
+}
+
+__device__ inline udf_string::udf_string(udf_string const& src)
+  : m_bytes(src.m_bytes), m_capacity(src.m_bytes)
+{
+  m_data = allocate(m_capacity);
+  memcpy(m_data, src.m_data, m_bytes);
+}
+
+__device__ inline udf_string::udf_string(udf_string&& src)
+  : m_data(src.m_data), m_bytes(src.m_bytes), m_capacity(src.m_capacity)
+{
+  src.m_data     = nullptr;
+  src.m_bytes    = 0;
+  src.m_capacity = 0;
+}
+
+__device__ inline udf_string::udf_string(cudf::string_view const str)
+  : m_bytes(str.size_bytes()), m_capacity(str.size_bytes())
+{
+  m_data = allocate(m_capacity);
+  memcpy(m_data, str.data(), m_bytes);
+}
+
+__device__ inline udf_string::~udf_string() { deallocate(m_data); }
+
+__device__ inline udf_string& udf_string::operator=(udf_string const& str) { return assign(str); }
+
+__device__ inline udf_string& udf_string::operator=(udf_string&& str)
+{
+  return assign(std::move(str));
+}
+
+__device__ inline udf_string& udf_string::operator=(cudf::string_view const str)
+{
+  return assign(str);
+}
+
+__device__ inline udf_string& udf_string::operator=(char const* str) { return assign(str); }
+
+__device__ udf_string& udf_string::assign(udf_string&& str)
+{
+  if (this == &str) { return *this; }
+  m_data         = str.m_data;
+  m_bytes        = str.m_bytes;
+  m_capacity     = str.m_capacity;
+  str.m_data     = nullptr;
+  str.m_bytes    = 0;
+  str.m_capacity = 0;
+  return *this;
+}
+
+__device__ udf_string& udf_string::assign(cudf::string_view const str)
+{
+  return assign(str.data(), str.size_bytes());
+}
+
+__device__ udf_string& udf_string::assign(char const* str)
+{
+  return assign(str, detail::bytes_in_null_terminated_string(str));
+}
+
+__device__ udf_string& udf_string::assign(char const* str, cudf::size_type bytes)
+{
+  if (bytes >= m_capacity) {
+    deallocate(m_data);
+    m_capacity = bytes;
+    m_data     = allocate(m_capacity);
+  }
+  m_bytes = bytes;
+  memcpy(m_data, str, bytes);
+  m_data[m_bytes] = 0;
+  return *this;
+}
+
+__device__ inline cudf::size_type udf_string::size_bytes() const { return m_bytes; }
+
+__device__ inline cudf::size_type udf_string::length() const
+{
+  return cudf::strings::detail::characters_in_string(m_data, m_bytes);
+}
+
+__device__ cudf::size_type udf_string::max_size() const
+{
+  return std::numeric_limits<cudf::size_type>::max() - 1;
+}
+
+__device__ inline char* udf_string::data() { return m_data; }
+
+__device__ inline char const* udf_string::data() const { return m_data; }
+
+__device__ inline bool udf_string::is_empty() const { return m_bytes == 0; }
+
+__device__ inline bool udf_string::is_null() const { return m_data == nullptr; }
+
+__device__ inline cudf::string_view::const_iterator udf_string::begin() const
+{
+  return cudf::string_view::const_iterator(cudf::string_view(m_data, m_bytes), 0);
+}
+
+__device__ inline cudf::string_view::const_iterator udf_string::end() const
+{
+  return cudf::string_view::const_iterator(cudf::string_view(m_data, m_bytes), length());
+}
+
+__device__ inline cudf::char_utf8 udf_string::at(cudf::size_type pos) const
+{
+  auto const offset = byte_offset(pos);
+  auto chr          = cudf::char_utf8{0};
+  if (offset < m_bytes) { cudf::strings::detail::to_char_utf8(data() + offset, chr); }
+  return chr;
+}
+
+__device__ inline cudf::char_utf8 udf_string::operator[](cudf::size_type pos) const
+{
+  return at(pos);
+}
+
+__device__ inline cudf::size_type udf_string::byte_offset(cudf::size_type pos) const
+{
+  cudf::size_type offset = 0;
+
+  auto sptr = m_data;
+  auto eptr = sptr + m_bytes;
+  while ((pos > 0) && (sptr < eptr)) {
+    auto const byte       = static_cast<uint8_t>(*sptr++);
+    auto const char_bytes = cudf::strings::detail::bytes_in_utf8_byte(byte);
+    if (char_bytes) { --pos; }
+    offset += char_bytes;
+  }
+  return offset;
+}
+
+__device__ inline int udf_string::compare(cudf::string_view const in) const
+{
+  return compare(in.data(), in.size_bytes());
+}
+
+__device__ inline int udf_string::compare(char const* data, cudf::size_type bytes) const
+{
+  auto const view = static_cast<cudf::string_view>(*this);
+  return view.compare(data, bytes);
+}
+
+__device__ inline bool udf_string::operator==(cudf::string_view const rhs) const
+{
+  return m_bytes == rhs.size_bytes() && compare(rhs) == 0;
+}
+
+__device__ inline bool udf_string::operator!=(cudf::string_view const rhs) const
+{
+  return compare(rhs) != 0;
+}
+
+__device__ inline bool udf_string::operator<(cudf::string_view const rhs) const
+{
+  return compare(rhs) < 0;
+}
+
+__device__ inline bool udf_string::operator>(cudf::string_view const rhs) const
+{
+  return compare(rhs) > 0;
+}
+
+__device__ inline bool udf_string::operator<=(cudf::string_view const rhs) const
+{
+  int rc = compare(rhs);
+  return (rc == 0) || (rc < 0);
+}
+
+__device__ inline bool udf_string::operator>=(cudf::string_view const rhs) const
+{
+  int rc = compare(rhs);
+  return (rc == 0) || (rc > 0);
+}
+
+__device__ inline void udf_string::clear()
+{
+  deallocate(m_data);
+  m_data     = nullptr;
+  m_bytes    = 0;
+  m_capacity = 0;
+}
+
+__device__ inline void udf_string::resize(cudf::size_type count)
+{
+  if (count > max_size()) { return; }
+  if (count > m_capacity) { reallocate(count); }
+
+  // add padding if necessary (null chars)
+  if (count > m_bytes) { memset(m_data + m_bytes, 0, count - m_bytes); }
+
+  m_bytes         = count;
+  m_data[m_bytes] = 0;
+}
+
+__device__ void udf_string::reserve(cudf::size_type count)
+{
+  if (count < max_size() && count > m_capacity) { reallocate(count); }
+}
+
+__device__ cudf::size_type udf_string::capacity() const { return m_capacity; }
+
+__device__ void udf_string::shrink_to_fit()
+{
+  if (m_bytes < m_capacity) { reallocate(m_bytes); }
+}
+
+__device__ inline udf_string& udf_string::append(char const* str, cudf::size_type in_bytes)
+{
+  if (in_bytes <= 0) { return *this; }
+  auto const nbytes = m_bytes + in_bytes;
+  if (nbytes > m_capacity) { reallocate(2 * nbytes); }
+  memcpy(m_data + m_bytes, str, in_bytes);
+  m_bytes         = nbytes;
+  m_data[m_bytes] = 0;
+  return *this;
+}
+
+__device__ inline udf_string& udf_string::append(char const* str)
+{
+  return append(str, detail::bytes_in_null_terminated_string(str));
+}
+
+__device__ inline udf_string& udf_string::append(cudf::char_utf8 chr, cudf::size_type count)
+{
+  if (count <= 0) { return *this; }
+  auto const char_bytes = cudf::strings::detail::bytes_in_char_utf8(chr) * count;
+  auto const nbytes     = m_bytes + char_bytes;
+  if (nbytes > m_capacity) { reallocate(2 * nbytes); }
+  auto out_ptr = m_data + m_bytes;
+  for (auto idx = 0; idx < count; ++idx) {
+    out_ptr += cudf::strings::detail::from_char_utf8(chr, out_ptr);
+  }
+  m_bytes         = nbytes;
+  m_data[m_bytes] = 0;
+  return *this;
+}
+
+__device__ inline udf_string& udf_string::append(cudf::string_view const in)
+{
+  return append(in.data(), in.size_bytes());
+}
+
+__device__ inline udf_string& udf_string::operator+=(cudf::string_view const in)
+{
+  return append(in);
+}
+
+__device__ inline udf_string& udf_string::operator+=(cudf::char_utf8 chr) { return append(chr); }
+
+__device__ inline udf_string& udf_string::operator+=(char const* str) { return append(str); }
+
+__device__ inline udf_string& udf_string::insert(cudf::size_type pos,
+                                                 char const* str,
+                                                 cudf::size_type in_bytes)
+{
+  return replace(pos, 0, str, in_bytes);
+}
+
+__device__ inline udf_string& udf_string::insert(cudf::size_type pos, char const* str)
+{
+  return insert(pos, str, detail::bytes_in_null_terminated_string(str));
+}
+
+__device__ inline udf_string& udf_string::insert(cudf::size_type pos, cudf::string_view const in)
+{
+  return insert(pos, in.data(), in.size_bytes());
+}
+
+__device__ inline udf_string& udf_string::insert(cudf::size_type pos,
+                                                 cudf::size_type count,
+                                                 cudf::char_utf8 chr)
+{
+  return replace(pos, 0, count, chr);
+}
+
+__device__ inline udf_string udf_string::substr(cudf::size_type pos, cudf::size_type count) const
+{
+  if (pos < 0) { return udf_string{"", 0}; }
+  auto const spos = byte_offset(pos);
+  if (spos >= m_bytes) { return udf_string{"", 0}; }
+  auto const epos = count < 0 ? m_bytes : std::min(byte_offset(pos + count), m_bytes);
+  return udf_string{data() + spos, epos - spos};
+}
+
+// utility for replace()
+__device__ void udf_string::shift_bytes(cudf::size_type spos,
+                                        cudf::size_type epos,
+                                        cudf::size_type nbytes)
+{
+  if (nbytes < m_bytes) {
+    // shift bytes to the left [...wxyz] -> [wxyzxyz]
+    auto src = epos;
+    auto tgt = spos;
+    while (tgt < nbytes) { m_data[tgt++] = m_data[src++]; }
+  } else if (nbytes > m_bytes) {
+    // shift bytes to the right [abcd...] -> [abcabcd]
+    auto src = m_bytes;
+    auto tgt = nbytes;
+    while (src > epos) { m_data[--tgt] = m_data[--src]; }
+  }
+}
+
+__device__ inline udf_string& udf_string::replace(cudf::size_type pos,
+                                                  cudf::size_type count,
+                                                  char const* str,
+                                                  cudf::size_type in_bytes)
+{
+  if (pos < 0 || in_bytes < 0) { return *this; }
+  auto const spos = byte_offset(pos);
+  if (spos > m_bytes) { return *this; }
+  auto const epos = count < 0 ? m_bytes : std::min(byte_offset(pos + count), m_bytes);
+
+  // compute new size
+  auto const nbytes = m_bytes + in_bytes - (epos - spos);
+  if (nbytes > m_capacity) { reallocate(2 * nbytes); }
+
+  // move bytes -- make room for replacement
+  shift_bytes(spos + in_bytes, epos, nbytes);
+
+  // insert the replacement
+  memcpy(m_data + spos, str, in_bytes);
+
+  m_bytes         = nbytes;
+  m_data[m_bytes] = 0;
+  return *this;
+}
+
+__device__ inline udf_string& udf_string::replace(cudf::size_type pos,
+                                                  cudf::size_type count,
+                                                  char const* str)
+{
+  return replace(pos, count, str, detail::bytes_in_null_terminated_string(str));
+}
+
+__device__ inline udf_string& udf_string::replace(cudf::size_type pos,
+                                                  cudf::size_type count,
+                                                  cudf::string_view const in)
+{
+  return replace(pos, count, in.data(), in.size_bytes());
+}
+
+__device__ inline udf_string& udf_string::replace(cudf::size_type pos,
+                                                  cudf::size_type count,
+                                                  cudf::size_type chr_count,
+                                                  cudf::char_utf8 chr)
+{
+  if (pos < 0 || chr_count < 0) { return *this; }
+  auto const spos = byte_offset(pos);
+  if (spos > m_bytes) { return *this; }
+  auto const epos = count < 0 ? m_bytes : std::min(byte_offset(pos + count), m_bytes);
+
+  // compute input size
+  auto const char_bytes = cudf::strings::detail::bytes_in_char_utf8(chr) * chr_count;
+  // compute new output size
+  auto const nbytes = m_bytes + char_bytes - (epos - spos);
+  if (nbytes > m_capacity) { reallocate(2 * nbytes); }
+
+  // move bytes -- make room for the new character(s)
+  shift_bytes(spos + char_bytes, epos, nbytes);
+
+  // copy chr chr_count times
+  auto out_ptr = m_data + spos;
+  for (auto idx = 0; idx < chr_count; ++idx) {
+    out_ptr += cudf::strings::detail::from_char_utf8(chr, out_ptr);
+  }
+
+  m_bytes         = nbytes;
+  m_data[m_bytes] = 0;
+  return *this;
+}
+
+__device__ udf_string& udf_string::erase(cudf::size_type pos, cudf::size_type count)
+{
+  return replace(pos, count, nullptr, 0);
+}
+
+__device__ inline cudf::size_type udf_string::char_offset(cudf::size_type bytepos) const
+{
+  return cudf::strings::detail::characters_in_string(data(), bytepos);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
new file mode 100644
index 00000000000..a0da0186b24
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
@@ -0,0 +1,523 @@
+/*
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/strings/string_view.hpp>
+
+#include <cuda_runtime.h>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+class udf_string {
+ public:
+  /**
+   * @brief Represents unknown character position or length.
+   */
+  static constexpr cudf::size_type npos = static_cast<cudf::size_type>(-1);
+
+  /**
+   * @brief Cast to cudf::string_view operator
+   */
+  __device__ operator cudf::string_view() const { return cudf::string_view(m_data, m_bytes); }
+
+  /**
+   * @brief Create an empty string.
+   */
+  udf_string() = default;
+
+  /**
+   * @brief Create a string using existing device memory.
+   *
+   * The given memory is copied into the instance returned.
+   *
+   * @param data Device pointer to UTF-8 encoded string
+   * @param bytes Number of bytes in `data`
+   */
+  __device__ udf_string(char const* data, cudf::size_type bytes);
+
+  /**
+   * @brief Create a string object from a null-terminated character array.
+   *
+   * The given memory is copied into the instance returned.
+   *
+   * @param data Device pointer to UTF-8 encoded null-terminated
+   *             character array.
+   */
+  __device__ udf_string(char const* data);
+
+  /**
+   * @brief Create a string object from a cudf::string_view.
+   *
+   * The input string data is copied into the instance returned.
+   *
+   * @param str String to copy
+   */
+  __device__ udf_string(cudf::string_view const str);
+
+  /**
+   * @brief Create a string object with `count` copies of character `chr`.
+   *
+   * @param count Number of times to copy `chr`
+   * @param chr Character from which to create the string
+   */
+  __device__ udf_string(cudf::size_type count, cudf::char_utf8 chr);
+
+  /**
+   * @brief Create a string object from another instance.
+   *
+   * The string data is copied from the `src` into the instance returned.
+   *
+   * @param src String to copy
+   */
+  __device__ udf_string(udf_string const& src);
+
+  /**
+   * @brief Create a string object from a move reference.
+   *
+   * The string data is moved from `src` into the instance returned.
+   * The `src` will have no content.
+   *
+   * @param src String to copy
+   */
+  __device__ udf_string(udf_string&& src);
+
+  __device__ ~udf_string();
+
+  __device__ udf_string& operator=(udf_string const&);
+  __device__ udf_string& operator=(udf_string&&);
+  __device__ udf_string& operator=(cudf::string_view const);
+  __device__ udf_string& operator=(char const*);
+
+  /**
+   * @brief Return the number of bytes in this string.
+   */
+  __device__ cudf::size_type size_bytes() const;
+
+  /**
+   * @brief Return the number of characters in this string.
+   */
+  __device__ cudf::size_type length() const;
+
+  /**
+   * @brief Return the maximum number of bytes a udf_string can hold.
+   */
+  __device__ cudf::size_type max_size() const;
+
+  /**
+   * @brief Return the internal pointer to the character array for this object.
+   */
+  __device__ char* data();
+  __device__ char const* data() const;
+
+  /**
+   * @brief Returns true if there are no characters in this string.
+   */
+  __device__ bool is_empty() const;
+
+  /**
+   * @brief Returns true if `data()==nullptr`
+   *
+   * This is experimental and may be removed in the futre.
+   */
+  __device__ bool is_null() const;
+
+  /**
+   * @brief Returns an iterator that can be used to navigate through
+   *        the UTF-8 characters in this string.
+   *
+   * This returns a `cudf::string_view::const_iterator` which is read-only.
+   */
+  __device__ cudf::string_view::const_iterator begin() const;
+  __device__ cudf::string_view::const_iterator end() const;
+
+  /**
+   * @brief Returns the character at the specified position.
+   *
+   * This will return 0 if `pos >= length()`.
+   *
+   * @param pos Index position of character to return
+   * @return Character at position `pos`
+   */
+  __device__ cudf::char_utf8 at(cudf::size_type pos) const;
+
+  /**
+   * @brief Returns the character at the specified index.
+   *
+   * This will return 0 if `pos >= length()`.
+   * Note this is read-only. Use replace() to modify a character.
+   *
+   * @param pos Index position of character to return
+   * @return Character at position `pos`
+   */
+  __device__ cudf::char_utf8 operator[](cudf::size_type pos) const;
+
+  /**
+   * @brief Return the byte offset for a given character position.
+   *
+   * The byte offset for the character at `pos` such that
+   * `data() + byte_offset(pos)` points to the memory location
+   * the character at position `pos`.
+   *
+   * @param pos Index position of character to return byte offset.
+   * @return Byte offset for character at `pos`
+   */
+  __device__ cudf::size_type byte_offset(cudf::size_type pos) const;
+
+  /**
+   * @brief Comparing target string with this string
+   *
+   * @param str Target string to compare with this string
+   * @return 0  If they compare equal
+   *         <0 Either the value of the first character of this string that does
+   *            not match is ordered before the corresponding character in `str`,
+   *            or all compared characters match but the `str` string is shorter.
+   *         >0 Either the value of the first character of this string that does
+   *            not match is ordered after the corresponding character in `str`,
+   *            or all compared characters match but the `str` string is longer.
+   */
+  __device__ int compare(cudf::string_view const str) const;
+
+  /**
+   * @brief Comparing target character array with this string
+   *
+   * @param str Target array of UTF-8 characters.
+   * @param bytes Number of bytes in `str`.
+   * @return 0  If they compare equal
+   *         <0 Either the value of the first character of this string that does
+   *            not match is ordered before the corresponding character in `str`,
+   *            or all compared characters match but `bytes < size_bytes()`.
+   *         >0 Either the value of the first character of this string that does
+   *            not match is ordered after the corresponding character in `str`,
+   *            or all compared characters match but `bytes > size_bytes()`.
+   */
+  __device__ int compare(char const* str, cudf::size_type bytes) const;
+
+  /**
+   * @brief Returns true if `rhs` matches this string exactly
+   */
+  __device__ bool operator==(cudf::string_view const rhs) const;
+
+  /**
+   * @brief Returns true if `rhs` does not match this string
+   */
+  __device__ bool operator!=(cudf::string_view const rhs) const;
+
+  /**
+   * @brief Returns true if this string is ordered before `rhs`
+   */
+  __device__ bool operator<(cudf::string_view const rhs) const;
+
+  /**
+   * @brief Returns true if `rhs` is ordered before this string
+   */
+  __device__ bool operator>(cudf::string_view const rhs) const;
+
+  /**
+   * @brief Returns true if this string matches or is ordered before `rhs`
+   */
+  __device__ bool operator<=(cudf::string_view const rhs) const;
+
+  /**
+   * @brief Returns true if `rhs` matches or is ordered before this string
+   */
+  __device__ bool operator>=(cudf::string_view const rhs) const;
+
+  /**
+   * @brief Remove all bytes from this string.
+   *
+   * All pointers, references, and iterators are invalidated.
+   */
+  __device__ void clear();
+
+  /**
+   * @brief Resizes string to contain `count` bytes.
+   *
+   * If `count > size_bytes()` then zero-padding is added.
+   * If `count < size_bytes()` then the string is truncated to size `count`.
+   *
+   * All pointers, references, and iterators may be invalidated.
+   *
+   * @param count Size in bytes of this string.
+   */
+  __device__ void resize(cudf::size_type count);
+
+  /**
+   * @brief Reserve `count` bytes in this string.
+   *
+   * If `count > capacity()`, new memory is allocated and `capacity()` will
+   * be greater than or equal to `count`.
+   * There is no effect if `count <= capacity()`.
+   *
+   * @param count Total number of bytes to reserve for this string
+   */
+  __device__ void reserve(cudf::size_type count);
+
+  /**
+   * @brief Returns the number of bytes that the string has allocated.
+   */
+  __device__ cudf::size_type capacity() const;
+
+  /**
+   * @brief Reduces internal allocation to just `size_bytes()`.
+   *
+   * All pointers, references, and iterators may be invalidated.
+   */
+  __device__ void shrink_to_fit();
+
+  /**
+   * @brief Moves the contents of `str` into this string instance
+   *
+   * @param str String to move
+   * @return This string new contents
+   */
+  __device__ udf_string& assign(udf_string&& str);
+
+  /**
+   * @brief Replaces the contents of this string with contents of `str`
+   *
+   * @param str String to copy
+   * @return This string new contents
+   */
+  __device__ udf_string& assign(cudf::string_view const str);
+
+  /**
+   * @brief Replaces the contents of this string with contents of `str`
+   *
+   * @param str Null-terminated UTF-8 character array
+   * @return This string new contents
+   */
+  __device__ udf_string& assign(char const* str);
+
+  /**
+   * @brief Replaces the contents of this string with contents of `str`
+   *
+   * @param str UTF-8 character array
+   * @param bytes Number of bytes to copy from `str`
+   * @return This string new contents
+   */
+  __device__ udf_string& assign(char const* str, cudf::size_type bytes);
+
+  /**
+   * @brief Append a string to the end of this string.
+   *
+   * @param str String to append
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& operator+=(cudf::string_view const str);
+
+  /**
+   * @brief Append a character to the end of this string.
+   *
+   * @param str Character to append
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& operator+=(cudf::char_utf8 chr);
+
+  /**
+   * @brief Append a null-terminated device memory character array
+   * to the end of this string.
+   *
+   * @param str String to append
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& operator+=(char const* str);
+
+  /**
+   * @brief Append a null-terminated character array to the end of this string.
+   *
+   * @param str String to append
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& append(char const* str);
+
+  /**
+   * @brief Append a character array to the end of this string.
+   *
+   * @param str Character array to append
+   * @param bytes Number of bytes from `str` to append.
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& append(char const* str, cudf::size_type bytes);
+
+  /**
+   * @brief Append a string to the end of this string.
+   *
+   * @param str String to append
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& append(cudf::string_view const str);
+
+  /**
+   * @brief Append a character to the end of this string
+   * a specified number of times.
+   *
+   * @param chr Character to append
+   * @param count Number of times to append `chr`
+   * @return This string with the append character(s)
+   */
+  __device__ udf_string& append(cudf::char_utf8 chr, cudf::size_type count = 1);
+
+  /**
+   * @brief Insert a string into the character position specified.
+   *
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Character position to begin insert
+   * @param str String to insert into this one
+   * @return This string with the inserted argument
+   */
+  __device__ udf_string& insert(cudf::size_type pos, cudf::string_view const str);
+
+  /**
+   * @brief Insert a null-terminated character array into the character position specified.
+   *
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Character position to begin insert
+   * @param data Null-terminated character array to insert
+   * @return This string with the inserted argument
+   */
+  __device__ udf_string& insert(cudf::size_type pos, char const* data);
+
+  /**
+   * @brief Insert a character array into the character position specified.
+   *
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Character position to begin insert
+   * @param data Character array to insert
+   * @param bytes Number of bytes from `data` to insert
+   * @return This string with the inserted argument
+   */
+  __device__ udf_string& insert(cudf::size_type pos, char const* data, cudf::size_type bytes);
+
+  /**
+   * @brief Insert a character one or more times into the character position specified.
+   *
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Character position to begin insert
+   * @param count Number of times to insert `chr`
+   * @param chr Character to insert
+   * @return This string with the inserted argument
+   */
+  __device__ udf_string& insert(cudf::size_type pos, cudf::size_type count, cudf::char_utf8 chr);
+
+  /**
+   * @brief Returns a substring of this string.
+   *
+   * An empty string is returned if `pos < 0 or pos >= length()`.
+   *
+   * @param pos Character position to start the substring
+   * @param count Number of characters for the substring;
+   *              This can be greater than the number of available characters.
+   *              Default npos returns characters in range `[pos, length())`.
+   * @return New string with the specified characters
+   */
+  __device__ udf_string substr(cudf::size_type pos, cudf::size_type count = npos) const;
+
+  /**
+   * @brief Replace a range of characters with a given string.
+   *
+   * Replaces characters in range `[pos, pos + count]` with `str`.
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Position of first character to replace
+   * @param count Number of characters to replace
+   * @param str String to replace the given range
+   * @return This string modified with the replacement
+   */
+  __device__ udf_string& replace(cudf::size_type pos,
+                                 cudf::size_type count,
+                                 cudf::string_view const str);
+
+  /**
+   * @brief Replace a range of characters with a null-terminated character array.
+   *
+   * Replaces characters in range `[pos, pos + count)` with `data`.
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Position of first character to replace
+   * @param count Number of characters to replace
+   * @param data Null-terminated character array to replace the given range
+   * @return This string modified with the replacement
+   */
+  __device__ udf_string& replace(cudf::size_type pos, cudf::size_type count, char const* data);
+
+  /**
+   * @brief Replace a range of characters with a given character array.
+   *
+   * Replaces characters in range `[pos, pos + count)` with `[data, data + bytes)`.
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Position of first character to replace
+   * @param count Number of characters to replace
+   * @param data String to replace the given range
+   * @param bytes Number of bytes from data to use for replacement
+   * @return This string modified with the replacement
+   */
+  __device__ udf_string& replace(cudf::size_type pos,
+                                 cudf::size_type count,
+                                 char const* data,
+                                 cudf::size_type bytes);
+
+  /**
+   * @brief Replace a range of characters with a character one or more times.
+   *
+   * Replaces characters in range `[pos, pos + count)` with `chr` `chr_count` times.
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Position of first character to replace
+   * @param count Number of characters to replace
+   * @param chr_count Number of times `chr` will repeated
+   * @param chr Character to use for replacement
+   * @return This string modified with the replacement
+   */
+  __device__ udf_string& replace(cudf::size_type pos,
+                                 cudf::size_type count,
+                                 cudf::size_type chr_count,
+                                 cudf::char_utf8 chr);
+
+  /**
+   * @brief Removes specified characters from this string.
+   *
+   * Removes `min(count, length() - pos)` characters starting at `pos`.
+   * There is no effect if `pos < 0 or pos >= length()`.
+   *
+   * @param pos Character position to begin insert
+   * @param count Number of characters to remove starting at `pos`
+   * @return This string with remove characters
+   */
+  __device__ udf_string& erase(cudf::size_type pos, cudf::size_type count = npos);
+
+ private:
+  char* m_data{};
+  cudf::size_type m_bytes{};
+  cudf::size_type m_capacity{};
+
+  // utilities
+  __device__ char* allocate(cudf::size_type bytes);
+  __device__ void deallocate(char* data);
+  __device__ void reallocate(cudf::size_type bytes);
+  __device__ cudf::size_type char_offset(cudf::size_type bytepos) const;
+  __device__ void shift_bytes(cudf::size_type spos, cudf::size_type epos, cudf::size_type nbytes);
+};
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
index dfef1be39f5..2bc74c8ca19 100644
--- a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
+++ b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
@@ -15,17 +15,48 @@
  */
 
 #include <cudf/strings/udf/udf_apis.hpp>
+#include <cudf/strings/udf/udf_string.cuh>
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/string_view.cuh>
 
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/transform.h>
 
 namespace cudf {
 namespace strings {
 namespace udf {
 namespace detail {
+namespace {
+
+struct free_udf_string_fn {
+  cudf::strings::udf::udf_string* d_strings;
+  __device__ void operator()(cudf::size_type idx) { d_strings[idx].clear(); }
+};
+
+void free_udf_string_array(void* d_buffer, std::size_t buffer_size, rmm::cuda_stream_view stream)
+{
+  auto const size =
+    static_cast<cudf::size_type>(buffer_size / sizeof(cudf::strings::udf::udf_string));
+  auto d_strings = reinterpret_cast<cudf::strings::udf::udf_string*>(d_buffer);
+  thrust::for_each_n(rmm::exec_policy(stream),
+                     thrust::make_counting_iterator(0),
+                     size,
+                     free_udf_string_fn{d_strings});
+}
+
+struct udf_string_to_string_view_transform_fn {
+  __device__ cudf::string_view operator()(cudf::strings::udf::udf_string const& dstr)
+  {
+    return cudf::string_view{dstr.data(), dstr.size_bytes()};
+  }
+};
+
+}  // namespace
 
 std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input,
                                                          rmm::cuda_stream_view stream)
@@ -36,6 +67,31 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
                 .release()));
 }
 
+std::unique_ptr<cudf::column> column_from_udf_string_array(void* d_buffer,
+                                                           std::size_t buffer_size,
+                                                           rmm::cuda_stream_view stream)
+{
+  auto const size =
+    static_cast<cudf::size_type>(buffer_size / sizeof(cudf::strings::udf::udf_string));
+  auto d_input = reinterpret_cast<cudf::strings::udf::udf_string*>(d_buffer);
+
+  // create string_views of the udf_strings
+  auto indices = rmm::device_uvector<cudf::string_view>(size, stream);
+  thrust::transform(rmm::exec_policy(stream),
+                    d_input,
+                    d_input + size,
+                    indices.data(),
+                    udf_string_to_string_view_transform_fn{});
+
+  auto results = cudf::make_strings_column(indices, cudf::string_view(nullptr, 0), stream);
+
+  // free the individual udf_string elements
+  free_udf_string_array(d_buffer, buffer_size, stream);
+
+  // return new column
+  return results;
+}
+
 }  // namespace detail
 
 std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input)
@@ -43,6 +99,11 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
   return detail::to_string_view_array(input, rmm::cuda_stream_default);
 }
 
+std::unique_ptr<cudf::column> column_from_udf_string_array(void* d_buffer, std::size_t buffer_size)
+{
+  return detail::column_from_udf_string_array(d_buffer, buffer_size, rmm::cuda_stream_default);
+}
+
 }  // namespace udf
 }  // namespace strings
 }  // namespace cudf

From 5317db88d51638e673ba49da905c42dd00ab0b59 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Wed, 12 Oct 2022 17:04:53 -0400
Subject: [PATCH 02/54] fix style error

---
 .../cpp/include/cudf/strings/udf/udf_string.cuh       | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
index 5ab262fb07d..ff4a4399e40 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
@@ -34,7 +34,8 @@ __device__ inline static cudf::size_type bytes_in_null_terminated_string(char co
 {
   if (!str) return 0;
   cudf::size_type bytes = 0;
-  while (*str++) ++bytes;
+  while (*str++)
+    ++bytes;
   return bytes;
 }
 
@@ -379,12 +380,16 @@ __device__ void udf_string::shift_bytes(cudf::size_type spos,
     // shift bytes to the left [...wxyz] -> [wxyzxyz]
     auto src = epos;
     auto tgt = spos;
-    while (tgt < nbytes) { m_data[tgt++] = m_data[src++]; }
+    while (tgt < nbytes) {
+      m_data[tgt++] = m_data[src++];
+    }
   } else if (nbytes > m_bytes) {
     // shift bytes to the right [abcd...] -> [abcabcd]
     auto src = m_bytes;
     auto tgt = nbytes;
-    while (src > epos) { m_data[--tgt] = m_data[--src]; }
+    while (src > epos) {
+      m_data[--tgt] = m_data[--src];
+    }
   }
 }
 

From 2e36b6a6257fc7739426fa431681ff6cae7553f4 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 13 Oct 2022 08:20:49 -0700
Subject: [PATCH 03/54] support returning strings within strings_udf library

---
 .../strings_udf/cpp/src/strings/udf/shim.cu   | 12 +++++
 .../strings_udf/_lib/cpp/strings_udf.pxd      |  3 ++
 .../strings_udf/_lib/cudf_jit_udf.pyx         | 13 +++++
 python/strings_udf/strings_udf/_typing.py     | 39 +++++++++++----
 python/strings_udf/strings_udf/lowering.py    | 48 ++++++++++++++++++-
 .../strings_udf/tests/test_string_udfs.py     | 48 +++++++++++++++----
 6 files changed, 144 insertions(+), 19 deletions(-)

diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 4d6690468ff..b284d58fe58 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -17,6 +17,7 @@
 #include <cudf/strings/udf/char_types.cuh>
 #include <cudf/strings/udf/search.cuh>
 #include <cudf/strings/udf/starts_with.cuh>
+#include <cudf/strings/udf/udf_string.cuh>
 
 using namespace cudf::strings::udf;
 
@@ -215,3 +216,14 @@ extern "C" __device__ int pycount(int* nb_retval, void const* str, void const* s
   *nb_retval = count(*str_view, *substr_view);
   return 0;
 }
+
+extern "C" __device__ int udf_string_from_string_view(int* nb_retbal,
+                                                      void const* str,
+                                                      void* udf_str)
+{
+  auto str_view_ptr = reinterpret_cast<cudf::string_view const*>(str);
+  auto udf_str_ptr  = reinterpret_cast<udf_string*>(udf_str);
+  *udf_str_ptr      = udf_string(*str_view_ptr);
+
+  return 0;
+}
diff --git a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
index fb8e3a949bf..ee145e4023b 100644
--- a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
+++ b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
@@ -14,6 +14,9 @@ from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 cdef extern from "cudf/strings/udf/udf_apis.hpp"  namespace \
         "cudf::strings::udf" nogil:
     cdef unique_ptr[device_buffer] to_string_view_array(column_view) except +
+    cdef unique_ptr[column] column_from_udf_string_array(
+        void*, size_t
+    ) except +
 
 cdef extern from "cudf/strings/detail/char_tables.hpp" namespace \
         "cudf::strings::detail" nogil:
diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
index bb1892a4d26..c450c394638 100644
--- a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
+++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
@@ -10,6 +10,7 @@ from cudf._lib.cpp.column.column cimport column, column_view
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
 from strings_udf._lib.cpp.strings_udf cimport (
+    column_from_udf_string_array as cpp_column_from_udf_string_array,
     to_string_view_array as cpp_to_string_view_array,
 )
 
@@ -22,3 +23,15 @@ def to_string_view_array(Column strings_col):
 
     device_buffer = DeviceBuffer.c_from_unique_ptr(move(c_buffer))
     return Buffer(device_buffer)
+
+
+def from_udf_string_array(DeviceBuffer d_buffer):
+    cdef size_t size = d_buffer.c_size()
+    cdef void* data = d_buffer.c_data()
+    cdef unique_ptr[column] c_result
+    # data = <void *>
+
+    with nogil:
+        c_result = move(cpp_column_from_udf_string_array(data, size))
+
+    return Column.from_unique_ptr(move(c_result))
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index 675507bccde..babda6b2d2a 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -23,9 +23,9 @@
 
 
 # String object definitions
-class DString(types.Type):
+class UDFString(types.Type):
     def __init__(self):
-        super().__init__(name="dstring")
+        super().__init__(name="udf_string")
         llty = default_manager[self].get_value_type()
         self.size_bytes = llty.get_abi_size(target_data)
 
@@ -56,9 +56,9 @@ def __init__(self, dmm, fe_type):
         super().__init__(dmm, fe_type, self._members)
 
 
-@register_model(DString)
-class dstring_model(models.StructModel):
-    # from dstring.hpp:
+@register_model(UDFString)
+class udf_string_model(models.StructModel):
+    # from udf_string.hpp:
     # private:
     #   char* m_data{};
     #   cudf::size_type m_bytes{};
@@ -74,8 +74,9 @@ def __init__(self, dmm, fe_type):
         super().__init__(dmm, fe_type, self._members)
 
 
-any_string_ty = (StringView, DString, types.StringLiteral)
+any_string_ty = (StringView, UDFString, types.StringLiteral)
 string_view = StringView()
+udf_string = UDFString()
 
 
 class StrViewArgHandler:
@@ -93,7 +94,9 @@ class StrViewArgHandler:
     """
 
     def prepare_args(self, ty, val, **kwargs):
-        if isinstance(ty, types.CPointer) and isinstance(ty.dtype, StringView):
+        if isinstance(ty, types.CPointer) and isinstance(
+            ty.dtype, (StringView, UDFString)
+        ):
             return types.uint64, val.ptr
         else:
             return ty, val
@@ -102,6 +105,26 @@ def prepare_args(self, ty, val, **kwargs):
 str_view_arg_handler = StrViewArgHandler()
 
 
+# a python object for numba to grab on to, just to have
+# something to replace with code
+def maybe_post_process_result(result):
+    pass
+
+
+@cuda_decl_registry.register_global(maybe_post_process_result)
+class MaybePostProcessResult(AbstractTemplate):
+    def generic(self, args, kws):
+        # a UDF may be typed to return a string_view in some edge cases
+        # 1. a string is returned unmodified from an input column
+        # 2. a view of a string variable is returned such as a substring
+        # in both cases the result must be promoted to udf_string to be
+        # returned. This requires a copy.
+        if len(args) == 1 and isinstance(args[0], StringView):
+            return nb_signature(udf_string, args[0])
+        else:
+            return nb_signature(args[0], args[0])
+
+
 # String functions
 @cuda_decl_registry.register_global(len)
 class StringLength(AbstractTemplate):
@@ -113,7 +136,7 @@ def generic(self, args, kws):
         if isinstance(args[0], any_string_ty) and len(args) == 1:
             # length:
             # string_view -> int32
-            # dstring -> int32
+            # udf_string -> int32
             # literal -> int32
             return nb_signature(size_type, args[0])
 
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index df0902dfa98..e7deed246c0 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -5,6 +5,7 @@
 
 from numba import cuda, types
 from numba.core import cgutils
+from numba.core.datamodel import default_manager
 from numba.core.typing import signature as nb_signature
 from numba.cuda.cudadrv import nvvm
 from numba.cuda.cudaimpl import (
@@ -13,7 +14,13 @@
 )
 
 from strings_udf._lib.tables import get_character_flags_table_ptr
-from strings_udf._typing import size_type, string_view
+from strings_udf._typing import (
+    StringView,
+    maybe_post_process_result,
+    size_type,
+    string_view,
+    udf_string,
+)
 
 character_flags_table_ptr = get_character_flags_table_ptr()
 
@@ -107,6 +114,45 @@ def cast_string_literal_to_string_view(context, builder, fromty, toty, val):
     return sv._getvalue()
 
 
+# utilities
+_create_udf_string_from_string_view = cuda.declare_device(
+    "udf_string_from_string_view",
+    types.void(types.CPointer(string_view), types.CPointer(udf_string)),
+)
+
+
+def call_create_udf_string_from_string_view(sv, udf_str):
+    _create_udf_string_from_string_view(sv, udf_str)
+
+
+@cuda_lower(maybe_post_process_result, types.Any)
+def maybe_post_process_result_impl(context, builder, sig, args):
+    if not isinstance(sig.args[0], StringView):
+        return args[0]
+    else:
+        sv_ptr = builder.alloca(args[0].type)
+
+        # obtain llvm type for udf_string
+        udf_str_ptr = builder.alloca(
+            default_manager[udf_string].get_value_type()
+        )
+        builder.store(args[0], sv_ptr)
+
+        _ = context.compile_internal(
+            builder,
+            call_create_udf_string_from_string_view,
+            nb_signature(
+                types.void, _STR_VIEW_PTR, types.CPointer(udf_string)
+            ),
+            (sv_ptr, udf_str_ptr),
+        )
+        result = cgutils.create_struct_proxy(udf_string)(
+            context, builder, value=builder.load(udf_str_ptr)
+        )
+
+        return result._getvalue()
+
+
 # String function implementations
 def call_len_string_view(st):
     return _string_view_len(st)
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index f214915ae12..62bb7466696 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -9,17 +9,26 @@
 from numba.types import CPointer, void
 
 import cudf
+import rmm
 from cudf.testing._utils import assert_eq
 
 import strings_udf
-from strings_udf._lib.cudf_jit_udf import to_string_view_array
-from strings_udf._typing import str_view_arg_handler, string_view
+from strings_udf._lib.cudf_jit_udf import (
+    from_udf_string_array,
+    to_string_view_array,
+)
+from strings_udf._typing import (
+    maybe_post_process_result,
+    str_view_arg_handler,
+    string_view,
+    udf_string,
+)
 
 if not strings_udf.ENABLED:
     pytest.skip("Strings UDF not enabled.", allow_module_level=True)
 
 
-def get_kernel(func, dtype):
+def get_kernel(func, dtype, size):
     """
     Create a kernel for testing a single scalar string function
     Allocates an output vector with a dtype specified by the caller
@@ -28,17 +37,22 @@ def get_kernel(func, dtype):
     """
 
     func = cuda.jit(device=True)(func)
-    outty = numba.np.numpy_support.from_dtype(dtype)
-    sig = nb_signature(void, CPointer(string_view), outty[::1])
+
+    if dtype == "str":
+        outty = CPointer(udf_string)
+    else:
+        outty = numba.np.numpy_support.from_dtype(dtype)[::1]
+    sig = nb_signature(void, CPointer(string_view), outty)
 
     @cuda.jit(
         sig, link=[strings_udf.ptxpath], extensions=[str_view_arg_handler]
     )
     def kernel(input_strings, output_col):
         id = cuda.grid(1)
-        if id < len(output_col):
+        if id < size:
             st = input_strings[id]
             result = func(st)
+            result = maybe_post_process_result(result)
             output_col[id] = result
 
     return kernel
@@ -53,14 +67,21 @@ def run_udf_test(data, func, dtype):
     and then assembles the result back into a cuDF series before
     comparing it with the equivalent pandas result
     """
-    dtype = np.dtype(dtype)
+    if dtype == "str":
+        output_ary = rmm.DeviceBuffer(size=len(data) * udf_string.size_bytes)
+    else:
+        dtype = np.dtype(dtype)
+        output_ary = cudf.core.column.column_empty(len(data), dtype=dtype)
+
     cudf_column = cudf.core.column.as_column(data)
     str_view_ary = to_string_view_array(cudf_column)
 
-    output_ary = cudf.core.column.column_empty(len(data), dtype=dtype)
-
-    kernel = get_kernel(func, dtype)
+    kernel = get_kernel(func, dtype, len(data))
     kernel.forall(len(data))(str_view_ary, output_ary)
+
+    if dtype == "str":
+        output_ary = from_udf_string_array(output_ary)
+
     got = cudf.Series(output_ary, dtype=dtype)
     expect = pd.Series(data).apply(func)
     assert_eq(expect, got, check_dtype=False)
@@ -259,3 +280,10 @@ def func(st):
         return st.startswith(substr)
 
     run_udf_test(data, func, "bool")
+
+
+def test_string_udf_return_string(data):
+    def func(st):
+        return st
+
+    run_udf_test(data, func, "str")

From 238c86224a06ea31ad458b882507ef8eded265d4 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 14 Oct 2022 09:50:41 -0700
Subject: [PATCH 04/54] returning strings working

---
 python/cudf/cudf/core/udf/__init__.py         | 33 +++++++++++++++++--
 python/cudf/cudf/core/udf/utils.py            | 17 ++++++++--
 python/cudf/cudf/tests/test_udf_masked_ops.py |  8 +++++
 python/strings_udf/strings_udf/lowering.py    | 18 ++++++++++
 4 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index 443466b28bd..500ca5e04d0 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
+import cupy as cp
 import numpy as np
 from numba import cuda, types
 from numba.cuda.cudaimpl import (
@@ -6,6 +7,9 @@
     registry as cuda_lowering_registry,
 )
 
+import rmm
+
+from cudf.core.column import as_column
 from cudf.core.dtypes import dtype
 from cudf.core.udf import api, row_function, utils
 from cudf.utils.dtypes import STRING_TYPES
@@ -30,8 +34,15 @@
         from . import strings_typing  # isort: skip
         from . import strings_lowering  # isort: skip
         from strings_udf import ptxpath
-        from strings_udf._lib.cudf_jit_udf import to_string_view_array
-        from strings_udf._typing import str_view_arg_handler, string_view
+        from strings_udf._lib.cudf_jit_udf import (
+            from_udf_string_array,
+            to_string_view_array,
+        )
+        from strings_udf._typing import (
+            str_view_arg_handler,
+            string_view,
+            udf_string,
+        )
 
         # add an overload of MaskedType.__init__(string_view, bool)
         cuda_lower(api.Masked, strings_typing.string_view, types.boolean)(
@@ -49,8 +60,26 @@
         utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
         utils.ptx_files.append(ptxpath)
         utils.arg_handlers.append(str_view_arg_handler)
+        utils.udf_return_type_map[string_view] = udf_string
         row_function.itemsizes[dtype("O")] = string_view.size_bytes
 
+        def _return_arr_from_dtype(dt, size):
+            if dt == np.dtype("O"):
+                result = rmm.DeviceBuffer(size=size * udf_string.size_bytes)
+                return result
+            else:
+                return cp.empty(size, dtype=dt)
+
+        utils._return_arr_from_dtype = _return_arr_from_dtype
+
+        def _post_process_output_col(col, retty):
+            if retty == np.dtype("O"):
+                return from_udf_string_array(col)
+            else:
+                return as_column(col, retty)
+
+        utils._post_process_output_col = _post_process_output_col
+
         _STRING_UDFS_ENABLED = True
     else:
         del strings_udf
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index fa79088046c..7fb19694b32 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -4,6 +4,7 @@
 
 import cachetools
 import cupy as cp
+import numba
 import numpy as np
 from numba import cuda, typeof
 from numba.core.errors import TypingError
@@ -31,6 +32,7 @@
 precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32)
 arg_handlers: List[Any] = []
 ptx_files: List[Any] = []
+udf_return_type_map: Dict[Any, Any] = {}
 
 
 @_cudf_nvtx_annotate
@@ -54,6 +56,7 @@ def _get_udf_return_type(argty, func: Callable, args=()):
     # Get the return type. The PTX is also returned by compile_udf, but is not
     # needed here.
     ptx, output_type = cudautils.compile_udf(func, compile_sig)
+
     if not isinstance(output_type, MaskedType):
         numba_output_type = numpy_support.from_dtype(np.dtype(output_type))
     else:
@@ -64,6 +67,7 @@ def _get_udf_return_type(argty, func: Callable, args=()):
         if not isinstance(numba_output_type, MaskedType)
         else numba_output_type.value_type
     )
+    result = udf_return_type_map.get(result, result)
 
     # _get_udf_return_type will throw a TypingError if the user tries to use
     # a field in the row containing an unsupported dtype, except in the
@@ -142,9 +146,12 @@ def _construct_signature(frame, return_type, args):
     actually JIT the kernel itself later, accounting for types
     and offsets. Skips columns with unsupported dtypes.
     """
-
+    if return_type in udf_return_type_map.values():
+        return_type = CPointer(return_type)
+    else:
+        return_type = return_type[::1]
     # Tuple of arrays, first the output data array, then the mask
-    return_type = Tuple((return_type[::1], boolean[::1]))
+    return_type = Tuple((return_type, boolean[::1]))
     offsets = []
     sig = [return_type, int64]
     for col in _supported_cols_from_frame(frame).values():
@@ -213,7 +220,11 @@ def _compile_or_get(frame, func, args, kernel_getter=None):
     # could be a MaskedType or a scalar type.
 
     kernel, scalar_return_type = kernel_getter(frame, func, args)
-    np_return_type = numpy_support.as_dtype(scalar_return_type)
+    try:
+        np_return_type = numpy_support.as_dtype(scalar_return_type)
+    except numba.core.errors.NumbaNotImplementedError:
+        # TODO: fix
+        np_return_type = np.dtype("object")
     precompiled[cache_key] = (kernel, np_return_type)
 
     return kernel, np_return_type
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 20245bd2a20..45a57296cba 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -860,6 +860,14 @@ def func(row):
     run_masked_udf_test(func, str_udf_data, check_dtype=False)
 
 
+@string_udf_test
+def test_string_udf_return_string(str_udf_data):
+    def func(row):
+        return row["str_col"]
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     "data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
 )
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index e7deed246c0..d8087e50253 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -114,6 +114,24 @@ def cast_string_literal_to_string_view(context, builder, fromty, toty, val):
     return sv._getvalue()
 
 
+@cuda_lowering_registry.lower_cast(string_view, udf_string)
+def cast_string_view_to_udf_string(context, builder, fromty, toty, val):
+    sv_ptr = builder.alloca(default_manager[fromty].get_value_type())
+    udf_str_ptr = builder.alloca(default_manager[toty].get_value_type())
+    builder.store(val, sv_ptr)
+    _ = context.compile_internal(
+        builder,
+        call_create_udf_string_from_string_view,
+        nb_signature(types.void, _STR_VIEW_PTR, types.CPointer(udf_string)),
+        (sv_ptr, udf_str_ptr),
+    )
+    result = cgutils.create_struct_proxy(udf_string)(
+        context, builder, value=builder.load(udf_str_ptr)
+    )
+
+    return result._getvalue()
+
+
 # utilities
 _create_udf_string_from_string_view = cuda.declare_device(
     "udf_string_from_string_view",

From 0544c2310a6b661455a0721cabafe0adea382474 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 17 Oct 2022 08:28:26 -0700
Subject: [PATCH 05/54] clean up code a bit

---
 python/cudf/cudf/core/udf/__init__.py        | 41 +++++---------------
 python/cudf/cudf/core/udf/masked_lowering.py |  9 +++--
 python/cudf/cudf/core/udf/masked_typing.py   |  2 +-
 python/cudf/cudf/core/udf/utils.py           | 17 +++++---
 4 files changed, 26 insertions(+), 43 deletions(-)

diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index 500ca5e04d0..fcdbb64396c 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -27,6 +27,7 @@
     | {types.boolean}
 )
 _STRING_UDFS_ENABLED = False
+cudf_str_dtype = dtype(str)
 try:
     import strings_udf
 
@@ -44,42 +45,16 @@
             udf_string,
         )
 
-        # add an overload of MaskedType.__init__(string_view, bool)
-        cuda_lower(api.Masked, strings_typing.string_view, types.boolean)(
-            masked_lowering.masked_constructor
-        )
-
-        # add an overload of pack_return(string_view)
-        cuda_lower(api.pack_return, strings_typing.string_view)(
-            masked_lowering.pack_return_scalar_impl
-        )
-
         _supported_masked_types |= {strings_typing.string_view}
-        utils.launch_arg_getters[dtype("O")] = to_string_view_array
-        utils.masked_array_types[dtype("O")] = string_view
+        utils.launch_arg_getters[cudf_str_dtype] = to_string_view_array
+        utils.output_col_getters[cudf_str_dtype] = from_udf_string_array
+        utils.masked_array_types[cudf_str_dtype] = string_view
+        row_function.itemsizes[cudf_str_dtype] = string_view.size_bytes
+
         utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
         utils.ptx_files.append(ptxpath)
         utils.arg_handlers.append(str_view_arg_handler)
         utils.udf_return_type_map[string_view] = udf_string
-        row_function.itemsizes[dtype("O")] = string_view.size_bytes
-
-        def _return_arr_from_dtype(dt, size):
-            if dt == np.dtype("O"):
-                result = rmm.DeviceBuffer(size=size * udf_string.size_bytes)
-                return result
-            else:
-                return cp.empty(size, dtype=dt)
-
-        utils._return_arr_from_dtype = _return_arr_from_dtype
-
-        def _post_process_output_col(col, retty):
-            if retty == np.dtype("O"):
-                return from_udf_string_array(col)
-            else:
-                return as_column(col, retty)
-
-        utils._post_process_output_col = _post_process_output_col
-
         _STRING_UDFS_ENABLED = True
     else:
         del strings_udf
@@ -87,4 +62,6 @@ def _post_process_output_col(col, retty):
 except ImportError as e:
     # allow cuDF to work without strings_udf
     pass
-masked_typing.register_masked_constructor(_supported_masked_types)
+
+masked_typing._register_masked_constructor_typing(_supported_masked_types)
+masked_lowering._register_masked_constructor_lowering(_supported_masked_types)
diff --git a/python/cudf/cudf/core/udf/masked_lowering.py b/python/cudf/cudf/core/udf/masked_lowering.py
index f825b6538bf..37f3117e756 100644
--- a/python/cudf/cudf/core/udf/masked_lowering.py
+++ b/python/cudf/cudf/core/udf/masked_lowering.py
@@ -372,10 +372,6 @@ def cast_masked_to_masked(context, builder, fromty, toty, val):
 
 
 # Masked constructor for use in a kernel for testing
-@lower_builtin(api.Masked, types.Boolean, types.boolean)
-@lower_builtin(api.Masked, types.Number, types.boolean)
-@lower_builtin(api.Masked, types.NPDatetime, types.boolean)
-@lower_builtin(api.Masked, types.NPTimedelta, types.boolean)
 def masked_constructor(context, builder, sig, args):
     ty = sig.return_type
     value, valid = args
@@ -385,6 +381,11 @@ def masked_constructor(context, builder, sig, args):
     return masked._getvalue()
 
 
+def _register_masked_constructor_lowering(supported_masked_types):
+    for ty in supported_masked_types:
+        lower_builtin(api.Masked, ty, types.boolean)(masked_constructor)
+
+
 # Allows us to make an instance of MaskedType a global variable
 # and properly use it inside functions we will later compile
 @cuda_lowering_registry.lower_constant(MaskedType)
diff --git a/python/cudf/cudf/core/udf/masked_typing.py b/python/cudf/cudf/core/udf/masked_typing.py
index a815a9f6dae..7baf2d585e2 100644
--- a/python/cudf/cudf/core/udf/masked_typing.py
+++ b/python/cudf/cudf/core/udf/masked_typing.py
@@ -169,7 +169,7 @@ def typeof_masked(val, c):
 
 # Implemented typing for Masked(value, valid) - the construction of a Masked
 # type in a kernel.
-def register_masked_constructor(supported_masked_types):
+def _register_masked_constructor_typing(supported_masked_types):
     class MaskedConstructor(ConcreteTemplate):
         key = api.Masked
         cases = [
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index 7fb19694b32..b4912882b56 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -11,6 +11,8 @@
 from numba.np import numpy_support
 from numba.types import CPointer, Poison, Tuple, boolean, int64, void
 
+import rmm
+
 from cudf.core.column.column import as_column
 from cudf.core.dtypes import CategoricalDtype
 from cudf.core.udf.masked_typing import MaskedType
@@ -33,6 +35,9 @@
 arg_handlers: List[Any] = []
 ptx_files: List[Any] = []
 udf_return_type_map: Dict[Any, Any] = {}
+masked_array_types: Dict[Any, Any] = {}
+launch_arg_getters: Dict[Any, Any] = {}
+output_col_getters: Dict[Any, Any] = {}
 
 
 @_cudf_nvtx_annotate
@@ -116,9 +121,6 @@ def _supported_cols_from_frame(frame):
     }
 
 
-masked_array_types: Dict[Any, Any] = {}
-
-
 def _masked_array_type_from_col(col):
     """
     Return a type representing a tuple of arrays,
@@ -241,9 +243,6 @@ def _get_kernel(kernel_string, globals_, sig, func):
     return kernel
 
 
-launch_arg_getters: Dict[Any, Any] = {}
-
-
 def _get_input_args_from_frame(fr):
     args = []
     offsets = []
@@ -265,8 +264,14 @@ def _get_input_args_from_frame(fr):
 
 
 def _return_arr_from_dtype(dt, size):
+    extensionty = udf_return_type_map.get(masked_array_types.get(dt))
+    if extensionty:
+        return rmm.DeviceBuffer(size=size * extensionty.size_bytes)
     return cp.empty(size, dtype=dt)
 
 
 def _post_process_output_col(col, retty):
+    getter = output_col_getters.get(retty)
+    if getter:
+        col = getter(col)
     return as_column(col, retty)

From a5661bcf7fc0ff21bc12482d7cd51e8ec001f852 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 18 Oct 2022 10:26:47 -0400
Subject: [PATCH 06/54] change void* to udf_string*

---
 .../cpp/include/cudf/strings/udf/case.cuh     |  4 +-
 .../include/cudf/strings/udf/char_types.cuh   |  2 +-
 .../cpp/include/cudf/strings/udf/numeric.cuh  |  4 +-
 .../cpp/include/cudf/strings/udf/search.cuh   |  2 +-
 .../cpp/include/cudf/strings/udf/split.cuh    | 12 ++--
 .../include/cudf/strings/udf/starts_with.cuh  |  8 +--
 .../cpp/include/cudf/strings/udf/strip.cuh    |  6 +-
 .../cpp/include/cudf/strings/udf/udf_apis.hpp | 18 ++++--
 .../cpp/src/strings/udf/udf_apis.cu           | 55 ++++++++++++-------
 9 files changed, 67 insertions(+), 44 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
index 98b25f85f74..17ee98ace6f 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
@@ -25,7 +25,9 @@ namespace cudf {
 namespace strings {
 namespace udf {
 
-// global variables for character-type flags and case conversion
+/**
+ * @brief Global variables for character-type flags and case conversion
+ */
 struct chars_tables {
   cudf::strings::detail::character_flags_table_type* flags_table;
   cudf::strings::detail::character_cases_table_type* cases_table;
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh
index 9320686442b..2c73328843f 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh
@@ -25,7 +25,7 @@ namespace strings {
 namespace udf {
 
 /**
- * @brief Returns true if all characters in the string are of the type specified.
+ * @brief Returns true if all characters in the string are of the type specified
  *
  * The output will be false if the string is empty or has at least one character
  * not of the specified type. If all characters fit the type then true is returned.
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
index 48f709ae318..2cd6eaaa298 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
@@ -27,7 +27,7 @@ namespace strings {
 namespace udf {
 
 /**
- * @brief Converts a string into an integer.
+ * @brief Converts a string into an integer
  *
  * The '+' and '-' are allowed but only at the beginning of the string.
  * The string is expected to contain base-10 [0-9] characters only.
@@ -59,7 +59,7 @@ __device__ inline udf_string to_string(int64_t value)
 }
 
 /**
- * @brief Converts a string into a double.
+ * @brief Converts a string into a double
  *
  * Support scientific notation as well.
  * Overflow goes to inf or -inf and underflow may go to 0.
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh
index ef15886f1f5..ed3a873b655 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh
@@ -23,7 +23,7 @@ namespace udf {
 
 /**
  * @brief Returns the number of times that the target string appears
- * in the source string.
+ * in the source string
  *
  * If `start <= 0` the search begins at the beginning of the `source` string.
  * If `end <=0` or `end` is greater the length of the `source` string,
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
index f7fa32d0e45..58e3e6fcfb4 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
@@ -68,7 +68,7 @@ __device__ inline cudf::size_type split(cudf::string_view const d_str,
 }
 
 /**
- * @brief Split string using given target array.
+ * @brief Split string using given target array
  *
  * @param d_str String to split
  * @param tgt Character array encoded in UTF-8 used for identifying split points
@@ -86,7 +86,7 @@ __device__ inline int split(cudf::string_view const d_str,
 }
 
 /**
- * @brief Split string using given target array.
+ * @brief Split string using given target array
  *
  * @param d_str String to split
  * @param tgt Null-terminated character array encoded in UTF-8 used for identifying split points
@@ -102,7 +102,7 @@ __device__ inline int split(cudf::string_view const d_str,
 }
 
 /**
- * @brief Split string on whitespace.
+ * @brief Split string on whitespace
  *
  * This will create tokens by splitting on one or more consecutive whitespace characters
  * found in `d_str`.
@@ -125,7 +125,7 @@ __device__ inline cudf::size_type split(cudf::string_view const d_str, cudf::str
 }
 
 /**
- * @brief Join an array of strings with a separator.
+ * @brief Join an array of strings with a separator
  *
  * @code{.cpp}
  * auto separator = cudf::string_view{"::", 2};
@@ -156,7 +156,7 @@ __device__ inline udf_string join(cudf::string_view const separator,
 }
 
 /**
- * @brief Join an array of strings with a separator.
+ * @brief Join an array of strings with a separator
  *
  * @param separator Null-terminated UTF-8 string
  * @param bytes Number of bytes to read from `separator`
@@ -173,7 +173,7 @@ __device__ inline udf_string join(char const* separator,
 }
 
 /**
- * @brief Join an array of strings with a separator.
+ * @brief Join an array of strings with a separator
  *
  * @param separator Null-terminated UTF-8 string
  * @param input An array of strings to join
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh
index 38c609ae505..8e9ffce4f52 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh
@@ -24,7 +24,7 @@ namespace udf {
 
 /**
  * @brief Returns true if the beginning of the specified string
- * matches the given character array.
+ * matches the given character array
  *
  * @param dstr String to check
  * @param tgt Character array encoded in UTF-8
@@ -42,7 +42,7 @@ __device__ inline bool starts_with(cudf::string_view const dstr,
 
 /**
  * @brief Returns true if the beginning of the specified string
- * matches the given target string.
+ * matches the given target string
  *
  * @param dstr String to check
  * @param tgt String to match
@@ -55,7 +55,7 @@ __device__ inline bool starts_with(cudf::string_view const dstr, cudf::string_vi
 
 /**
  * @brief Returns true if the end of the specified string
- * matches the given character array.
+ * matches the given character array
  *
  * @param dstr String to check
  * @param tgt Character array encoded in UTF-8
@@ -73,7 +73,7 @@ __device__ inline bool ends_with(cudf::string_view const dstr,
 
 /**
  * @brief Returns true if the end of the specified string
- * matches the given target` string.
+ * matches the given target` string
  *
  * @param dstr String to check
  * @param tgt String to match
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
index 1cd7dfbaa78..6c6639835cd 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
@@ -26,7 +26,7 @@ namespace strings {
 namespace udf {
 
 /**
- * @brief Strip characters from the beginning and/or end of the given string.
+ * @brief Strip characters from the beginning and/or end of the given string
  *
  * The `d_to_strip` is interpretted as an array of characters to be removed.
  * If `d_to_strip` is an empty string, whitespace characters are stripped.
@@ -56,7 +56,7 @@ __device__ udf_string strip(cudf::string_view const d_str,
 }
 
 /**
- * @brief Strip characters from the beginning of the given string.
+ * @brief Strip characters from the beginning of the given string
  *
  * The `d_to_strip` is interpretted as an array of characters to be removed.
  * If `d_to_strip` is an empty string, whitespace characters are stripped.
@@ -81,7 +81,7 @@ __device__ udf_string lstrip(cudf::string_view const d_str, cudf::string_view d_
 }
 
 /**
- * @brief Strip characters from the end of the given string.
+ * @brief Strip characters from the end of the given string
  *
  * The `d_to_strip` is interpretted as an array of characters to be removed.
  * If `d_to_strip` is an empty string, whitespace characters are stripped.
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
index f5807be151b..9ae986d2327 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
@@ -27,22 +27,30 @@ namespace cudf {
 namespace strings {
 namespace udf {
 
+class udf_string;
+
 /**
  * @brief Return a cudf::string_view array for the given strings column
  *
+ * No string data is copied so the input column controls the lifetime of the
+ * underlying strings.
+ *
+ * New device memory is allocated and returned to hold just  the string_view instances.
+ *
  * @param input Strings column to convert to a string_view array.
- * @throw cudf::logic_error if input is not a strings column.
+ * @return Array of string_view objects in device memory
  */
 std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input);
 
 /**
- * @brief Return a cudf::column given an array of udf_string objects.
+ * @brief Return a STRINGS column given an array of udf_string objects
  *
- * @param d_buffer Pointer to device memory of udf_string objects
- * @param d_size The number of bytes in the d_buffer
+ * @param d_strings Pointer to device memory of udf_string objects
+ * @param size The number of elements in the buffer array
  * @return A strings column copy of the udf_string objects
  */
-std::unique_ptr<cudf::column> column_from_udf_string_array(void* d_buffer, std::size_t size);
+std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings,
+                                                           cudf::size_type size);
 
 }  // namespace udf
 }  // namespace strings
diff --git a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
index 2bc74c8ca19..8ad255eda9f 100644
--- a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
+++ b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
@@ -33,22 +33,26 @@ namespace udf {
 namespace detail {
 namespace {
 
-struct free_udf_string_fn {
-  cudf::strings::udf::udf_string* d_strings;
-  __device__ void operator()(cudf::size_type idx) { d_strings[idx].clear(); }
-};
-
-void free_udf_string_array(void* d_buffer, std::size_t buffer_size, rmm::cuda_stream_view stream)
+/**
+ * @brief Frees udf_strings device memory
+ *
+ * @param d_buffer Array of udf_strings
+ */
+void free_udf_string_array(cudf::strings::udf::udf_string* d_strings,
+                           cudf::size_type size,
+                           rmm::cuda_stream_view stream)
 {
-  auto const size =
-    static_cast<cudf::size_type>(buffer_size / sizeof(cudf::strings::udf::udf_string));
-  auto d_strings = reinterpret_cast<cudf::strings::udf::udf_string*>(d_buffer);
   thrust::for_each_n(rmm::exec_policy(stream),
                      thrust::make_counting_iterator(0),
                      size,
-                     free_udf_string_fn{d_strings});
+                     [d_strings] __device__(auto idx) { d_strings[idx].clear(); });
 }
 
+/**
+ * @brief Functor wraps string_view objects around udf_string objects
+ *
+ * No string data is copied.
+ */
 struct udf_string_to_string_view_transform_fn {
   __device__ cudf::string_view operator()(cudf::strings::udf::udf_string const& dstr)
   {
@@ -58,6 +62,11 @@ struct udf_string_to_string_view_transform_fn {
 
 }  // namespace
 
+/**
+ * @copydoc to_string_view_array
+ *
+ * @param stream CUDA stream used for allocating/copying device memory and launching kernels
+ */
 std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input,
                                                          rmm::cuda_stream_view stream)
 {
@@ -67,26 +76,27 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
                 .release()));
 }
 
-std::unique_ptr<cudf::column> column_from_udf_string_array(void* d_buffer,
-                                                           std::size_t buffer_size,
+/**
+ * @copydoc column_from_udf_string_array
+
+ * @param stream CUDA stream used for allocating/copying device memory and launching kernels
+ */
+std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings,
+                                                           cudf::size_type size,
                                                            rmm::cuda_stream_view stream)
 {
-  auto const size =
-    static_cast<cudf::size_type>(buffer_size / sizeof(cudf::strings::udf::udf_string));
-  auto d_input = reinterpret_cast<cudf::strings::udf::udf_string*>(d_buffer);
-
   // create string_views of the udf_strings
   auto indices = rmm::device_uvector<cudf::string_view>(size, stream);
   thrust::transform(rmm::exec_policy(stream),
-                    d_input,
-                    d_input + size,
+                    d_strings,
+                    d_strings + size,
                     indices.data(),
                     udf_string_to_string_view_transform_fn{});
 
   auto results = cudf::make_strings_column(indices, cudf::string_view(nullptr, 0), stream);
 
   // free the individual udf_string elements
-  free_udf_string_array(d_buffer, buffer_size, stream);
+  free_udf_string_array(d_strings, size, stream);
 
   // return new column
   return results;
@@ -94,14 +104,17 @@ std::unique_ptr<cudf::column> column_from_udf_string_array(void* d_buffer,
 
 }  // namespace detail
 
+// external APIs
+
 std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input)
 {
   return detail::to_string_view_array(input, rmm::cuda_stream_default);
 }
 
-std::unique_ptr<cudf::column> column_from_udf_string_array(void* d_buffer, std::size_t buffer_size)
+std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings,
+                                                           cudf::size_type size)
 {
-  return detail::column_from_udf_string_array(d_buffer, buffer_size, rmm::cuda_stream_default);
+  return detail::column_from_udf_string_array(d_strings, size, rmm::cuda_stream_default);
 }
 
 }  // namespace udf

From 9661c4ed2c2af19c76aac294c8987e7c40f64c06 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 18 Oct 2022 12:03:57 -0400
Subject: [PATCH 07/54] update doxygens

---
 .../cpp/include/cudf/strings/udf/udf_apis.hpp |  5 +
 .../include/cudf/strings/udf/udf_string.cuh   |  2 -
 .../include/cudf/strings/udf/udf_string.hpp   | 97 ++++++++++---------
 3 files changed, 58 insertions(+), 46 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
index 9ae986d2327..540dcaf4c18 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
@@ -45,6 +45,11 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
 /**
  * @brief Return a STRINGS column given an array of udf_string objects
  *
+ * This will make a copy of the strings in d_string in order to build
+ * the output column.
+ * The individual udf_strings are also cleared freeing each of their internal
+ * device memory buffers.
+ *
  * @param d_strings Pointer to device memory of udf_string objects
  * @param size The number of elements in the buffer array
  * @return A strings column copy of the udf_string objects
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
index ff4a4399e40..d651f9faa2b 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
@@ -178,8 +178,6 @@ __device__ inline char const* udf_string::data() const { return m_data; }
 
 __device__ inline bool udf_string::is_empty() const { return m_bytes == 0; }
 
-__device__ inline bool udf_string::is_null() const { return m_data == nullptr; }
-
 __device__ inline cudf::string_view::const_iterator udf_string::begin() const
 {
   return cudf::string_view::const_iterator(cudf::string_view(m_data, m_bytes), 0);
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
index a0da0186b24..c57ab473017 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
@@ -19,14 +19,30 @@
 
 #include <cuda_runtime.h>
 
+// This header contains all class and function declarations so that it
+// can be included in a .cpp file which only has declaration requirements
+// (i.e. sizeof, conditionally-comparable, explicit conversions, etc).
+// The definitions are coded in udf_string.cuh which is to be included
+// in .cu files that use this class in kernel calls.
+
 namespace cudf {
 namespace strings {
 namespace udf {
 
+/**
+ * @brief Device string class for use with user-defined functions
+ *
+ * This class manages a device buffer of UTF-8 encoded characters
+ * for string manipulation in a device kernel.
+ *
+ * It's methods and behavior are modelled after std::string but
+ * with special consideration for UTF-8 encoded strings and for
+ * use within a cuDF UDF.
+ */
 class udf_string {
  public:
   /**
-   * @brief Represents unknown character position or length.
+   * @brief Represents unknown character position or length
    */
   static constexpr cudf::size_type npos = static_cast<cudf::size_type>(-1);
 
@@ -41,7 +57,7 @@ class udf_string {
   udf_string() = default;
 
   /**
-   * @brief Create a string using existing device memory.
+   * @brief Create a string using existing device memory
    *
    * The given memory is copied into the instance returned.
    *
@@ -51,7 +67,7 @@ class udf_string {
   __device__ udf_string(char const* data, cudf::size_type bytes);
 
   /**
-   * @brief Create a string object from a null-terminated character array.
+   * @brief Create a string object from a null-terminated character array
    *
    * The given memory is copied into the instance returned.
    *
@@ -61,7 +77,7 @@ class udf_string {
   __device__ udf_string(char const* data);
 
   /**
-   * @brief Create a string object from a cudf::string_view.
+   * @brief Create a string object from a cudf::string_view
    *
    * The input string data is copied into the instance returned.
    *
@@ -70,7 +86,7 @@ class udf_string {
   __device__ udf_string(cudf::string_view const str);
 
   /**
-   * @brief Create a string object with `count` copies of character `chr`.
+   * @brief Create a string object with `count` copies of character `chr`
    *
    * @param count Number of times to copy `chr`
    * @param chr Character from which to create the string
@@ -78,7 +94,7 @@ class udf_string {
   __device__ udf_string(cudf::size_type count, cudf::char_utf8 chr);
 
   /**
-   * @brief Create a string object from another instance.
+   * @brief Create a string object from another instance
    *
    * The string data is copied from the `src` into the instance returned.
    *
@@ -87,7 +103,7 @@ class udf_string {
   __device__ udf_string(udf_string const& src);
 
   /**
-   * @brief Create a string object from a move reference.
+   * @brief Create a string object from a move reference
    *
    * The string data is moved from `src` into the instance returned.
    * The `src` will have no content.
@@ -104,41 +120,34 @@ class udf_string {
   __device__ udf_string& operator=(char const*);
 
   /**
-   * @brief Return the number of bytes in this string.
+   * @brief Return the number of bytes in this string
    */
   __device__ cudf::size_type size_bytes() const;
 
   /**
-   * @brief Return the number of characters in this string.
+   * @brief Return the number of characters in this string
    */
   __device__ cudf::size_type length() const;
 
   /**
-   * @brief Return the maximum number of bytes a udf_string can hold.
+   * @brief Return the maximum number of bytes a udf_string can hold
    */
   __device__ cudf::size_type max_size() const;
 
   /**
-   * @brief Return the internal pointer to the character array for this object.
+   * @brief Return the internal pointer to the character array for this object
    */
   __device__ char* data();
   __device__ char const* data() const;
 
   /**
-   * @brief Returns true if there are no characters in this string.
+   * @brief Returns true if there are no characters in this string
    */
   __device__ bool is_empty() const;
 
-  /**
-   * @brief Returns true if `data()==nullptr`
-   *
-   * This is experimental and may be removed in the futre.
-   */
-  __device__ bool is_null() const;
-
   /**
    * @brief Returns an iterator that can be used to navigate through
-   *        the UTF-8 characters in this string.
+   *        the UTF-8 characters in this string
    *
    * This returns a `cudf::string_view::const_iterator` which is read-only.
    */
@@ -146,7 +155,7 @@ class udf_string {
   __device__ cudf::string_view::const_iterator end() const;
 
   /**
-   * @brief Returns the character at the specified position.
+   * @brief Returns the character at the specified position
    *
    * This will return 0 if `pos >= length()`.
    *
@@ -156,7 +165,7 @@ class udf_string {
   __device__ cudf::char_utf8 at(cudf::size_type pos) const;
 
   /**
-   * @brief Returns the character at the specified index.
+   * @brief Returns the character at the specified index
    *
    * This will return 0 if `pos >= length()`.
    * Note this is read-only. Use replace() to modify a character.
@@ -167,7 +176,7 @@ class udf_string {
   __device__ cudf::char_utf8 operator[](cudf::size_type pos) const;
 
   /**
-   * @brief Return the byte offset for a given character position.
+   * @brief Return the byte offset for a given character position
    *
    * The byte offset for the character at `pos` such that
    * `data() + byte_offset(pos)` points to the memory location
@@ -238,14 +247,14 @@ class udf_string {
   __device__ bool operator>=(cudf::string_view const rhs) const;
 
   /**
-   * @brief Remove all bytes from this string.
+   * @brief Remove all bytes from this string
    *
    * All pointers, references, and iterators are invalidated.
    */
   __device__ void clear();
 
   /**
-   * @brief Resizes string to contain `count` bytes.
+   * @brief Resizes string to contain `count` bytes
    *
    * If `count > size_bytes()` then zero-padding is added.
    * If `count < size_bytes()` then the string is truncated to size `count`.
@@ -257,7 +266,7 @@ class udf_string {
   __device__ void resize(cudf::size_type count);
 
   /**
-   * @brief Reserve `count` bytes in this string.
+   * @brief Reserve `count` bytes in this string
    *
    * If `count > capacity()`, new memory is allocated and `capacity()` will
    * be greater than or equal to `count`.
@@ -268,12 +277,12 @@ class udf_string {
   __device__ void reserve(cudf::size_type count);
 
   /**
-   * @brief Returns the number of bytes that the string has allocated.
+   * @brief Returns the number of bytes that the string has allocated
    */
   __device__ cudf::size_type capacity() const;
 
   /**
-   * @brief Reduces internal allocation to just `size_bytes()`.
+   * @brief Reduces internal allocation to just `size_bytes()`
    *
    * All pointers, references, and iterators may be invalidated.
    */
@@ -313,7 +322,7 @@ class udf_string {
   __device__ udf_string& assign(char const* str, cudf::size_type bytes);
 
   /**
-   * @brief Append a string to the end of this string.
+   * @brief Append a string to the end of this string
    *
    * @param str String to append
    * @return This string with the appended argument
@@ -321,7 +330,7 @@ class udf_string {
   __device__ udf_string& operator+=(cudf::string_view const str);
 
   /**
-   * @brief Append a character to the end of this string.
+   * @brief Append a character to the end of this string
    *
    * @param str Character to append
    * @return This string with the appended argument
@@ -330,7 +339,7 @@ class udf_string {
 
   /**
    * @brief Append a null-terminated device memory character array
-   * to the end of this string.
+   * to the end of this string
    *
    * @param str String to append
    * @return This string with the appended argument
@@ -338,7 +347,7 @@ class udf_string {
   __device__ udf_string& operator+=(char const* str);
 
   /**
-   * @brief Append a null-terminated character array to the end of this string.
+   * @brief Append a null-terminated character array to the end of this string
    *
    * @param str String to append
    * @return This string with the appended argument
@@ -346,7 +355,7 @@ class udf_string {
   __device__ udf_string& append(char const* str);
 
   /**
-   * @brief Append a character array to the end of this string.
+   * @brief Append a character array to the end of this string
    *
    * @param str Character array to append
    * @param bytes Number of bytes from `str` to append.
@@ -355,7 +364,7 @@ class udf_string {
   __device__ udf_string& append(char const* str, cudf::size_type bytes);
 
   /**
-   * @brief Append a string to the end of this string.
+   * @brief Append a string to the end of this string
    *
    * @param str String to append
    * @return This string with the appended argument
@@ -373,7 +382,7 @@ class udf_string {
   __device__ udf_string& append(cudf::char_utf8 chr, cudf::size_type count = 1);
 
   /**
-   * @brief Insert a string into the character position specified.
+   * @brief Insert a string into the character position specified
    *
    * There is no effect if `pos < 0 or pos > length()`.
    *
@@ -384,7 +393,7 @@ class udf_string {
   __device__ udf_string& insert(cudf::size_type pos, cudf::string_view const str);
 
   /**
-   * @brief Insert a null-terminated character array into the character position specified.
+   * @brief Insert a null-terminated character array into the character position specified
    *
    * There is no effect if `pos < 0 or pos > length()`.
    *
@@ -395,7 +404,7 @@ class udf_string {
   __device__ udf_string& insert(cudf::size_type pos, char const* data);
 
   /**
-   * @brief Insert a character array into the character position specified.
+   * @brief Insert a character array into the character position specified
    *
    * There is no effect if `pos < 0 or pos > length()`.
    *
@@ -407,7 +416,7 @@ class udf_string {
   __device__ udf_string& insert(cudf::size_type pos, char const* data, cudf::size_type bytes);
 
   /**
-   * @brief Insert a character one or more times into the character position specified.
+   * @brief Insert a character one or more times into the character position specified
    *
    * There is no effect if `pos < 0 or pos > length()`.
    *
@@ -419,7 +428,7 @@ class udf_string {
   __device__ udf_string& insert(cudf::size_type pos, cudf::size_type count, cudf::char_utf8 chr);
 
   /**
-   * @brief Returns a substring of this string.
+   * @brief Returns a substring of this string
    *
    * An empty string is returned if `pos < 0 or pos >= length()`.
    *
@@ -432,7 +441,7 @@ class udf_string {
   __device__ udf_string substr(cudf::size_type pos, cudf::size_type count = npos) const;
 
   /**
-   * @brief Replace a range of characters with a given string.
+   * @brief Replace a range of characters with a given string
    *
    * Replaces characters in range `[pos, pos + count]` with `str`.
    * There is no effect if `pos < 0 or pos > length()`.
@@ -447,7 +456,7 @@ class udf_string {
                                  cudf::string_view const str);
 
   /**
-   * @brief Replace a range of characters with a null-terminated character array.
+   * @brief Replace a range of characters with a null-terminated character array
    *
    * Replaces characters in range `[pos, pos + count)` with `data`.
    * There is no effect if `pos < 0 or pos > length()`.
@@ -460,7 +469,7 @@ class udf_string {
   __device__ udf_string& replace(cudf::size_type pos, cudf::size_type count, char const* data);
 
   /**
-   * @brief Replace a range of characters with a given character array.
+   * @brief Replace a range of characters with a given character array
    *
    * Replaces characters in range `[pos, pos + count)` with `[data, data + bytes)`.
    * There is no effect if `pos < 0 or pos > length()`.
@@ -477,7 +486,7 @@ class udf_string {
                                  cudf::size_type bytes);
 
   /**
-   * @brief Replace a range of characters with a character one or more times.
+   * @brief Replace a range of characters with a character one or more times
    *
    * Replaces characters in range `[pos, pos + count)` with `chr` `chr_count` times.
    * There is no effect if `pos < 0 or pos > length()`.
@@ -494,7 +503,7 @@ class udf_string {
                                  cudf::char_utf8 chr);
 
   /**
-   * @brief Removes specified characters from this string.
+   * @brief Removes specified characters from this string
    *
    * Removes `min(count, length() - pos)` characters starting at `pos`.
    * There is no effect if `pos < 0 or pos >= length()`.

From a6f03a334aae76216d0c4ee9e3ce165dcc848fea Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 18 Oct 2022 11:58:38 -0700
Subject: [PATCH 08/54] remove unnecessary explicit casting

---
 python/strings_udf/strings_udf/_typing.py     | 20 -----------
 python/strings_udf/strings_udf/lowering.py    | 36 +------------------
 .../strings_udf/tests/test_string_udfs.py     |  8 +----
 3 files changed, 2 insertions(+), 62 deletions(-)

diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index babda6b2d2a..63d2cdf69aa 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -105,26 +105,6 @@ def prepare_args(self, ty, val, **kwargs):
 str_view_arg_handler = StrViewArgHandler()
 
 
-# a python object for numba to grab on to, just to have
-# something to replace with code
-def maybe_post_process_result(result):
-    pass
-
-
-@cuda_decl_registry.register_global(maybe_post_process_result)
-class MaybePostProcessResult(AbstractTemplate):
-    def generic(self, args, kws):
-        # a UDF may be typed to return a string_view in some edge cases
-        # 1. a string is returned unmodified from an input column
-        # 2. a view of a string variable is returned such as a substring
-        # in both cases the result must be promoted to udf_string to be
-        # returned. This requires a copy.
-        if len(args) == 1 and isinstance(args[0], StringView):
-            return nb_signature(udf_string, args[0])
-        else:
-            return nb_signature(args[0], args[0])
-
-
 # String functions
 @cuda_decl_registry.register_global(len)
 class StringLength(AbstractTemplate):
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index d8087e50253..f7e67129ebe 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -14,13 +14,7 @@
 )
 
 from strings_udf._lib.tables import get_character_flags_table_ptr
-from strings_udf._typing import (
-    StringView,
-    maybe_post_process_result,
-    size_type,
-    string_view,
-    udf_string,
-)
+from strings_udf._typing import size_type, string_view, udf_string
 
 character_flags_table_ptr = get_character_flags_table_ptr()
 
@@ -143,34 +137,6 @@ def call_create_udf_string_from_string_view(sv, udf_str):
     _create_udf_string_from_string_view(sv, udf_str)
 
 
-@cuda_lower(maybe_post_process_result, types.Any)
-def maybe_post_process_result_impl(context, builder, sig, args):
-    if not isinstance(sig.args[0], StringView):
-        return args[0]
-    else:
-        sv_ptr = builder.alloca(args[0].type)
-
-        # obtain llvm type for udf_string
-        udf_str_ptr = builder.alloca(
-            default_manager[udf_string].get_value_type()
-        )
-        builder.store(args[0], sv_ptr)
-
-        _ = context.compile_internal(
-            builder,
-            call_create_udf_string_from_string_view,
-            nb_signature(
-                types.void, _STR_VIEW_PTR, types.CPointer(udf_string)
-            ),
-            (sv_ptr, udf_str_ptr),
-        )
-        result = cgutils.create_struct_proxy(udf_string)(
-            context, builder, value=builder.load(udf_str_ptr)
-        )
-
-        return result._getvalue()
-
-
 # String function implementations
 def call_len_string_view(st):
     return _string_view_len(st)
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index 62bb7466696..d40b186fdfc 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -17,12 +17,7 @@
     from_udf_string_array,
     to_string_view_array,
 )
-from strings_udf._typing import (
-    maybe_post_process_result,
-    str_view_arg_handler,
-    string_view,
-    udf_string,
-)
+from strings_udf._typing import str_view_arg_handler, string_view, udf_string
 
 if not strings_udf.ENABLED:
     pytest.skip("Strings UDF not enabled.", allow_module_level=True)
@@ -52,7 +47,6 @@ def kernel(input_strings, output_col):
         if id < size:
             st = input_strings[id]
             result = func(st)
-            result = maybe_post_process_result(result)
             output_col[id] = result
 
     return kernel

From ebaf0881358d66e690c60b5de0a3c7d2c76b4248 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Wed, 19 Oct 2022 15:16:09 -0400
Subject: [PATCH 09/54] add pad utility functions

---
 .../cpp/include/cudf/strings/udf/pad.cuh      | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
new file mode 100644
index 00000000000..38dbee72a84
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
@@ -0,0 +1,72 @@
+
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "udf_string.cuh"
+
+#include <cudf/strings/detail/pad_impl.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Pad beginning and/or end of a string with the given fill character
+ *
+ * The side_type::BOTH will attempt to center the text using the `fill_char`.
+ * If the `width` is smaller than the length of `d_str` no change occurs.
+ *
+ * @tparam side Specify where the padding should occur
+ * @param d_str String to pad
+ * @param width Minimum length in characters of the output string
+ * @param fill_char Character used for padding
+ */
+template <side_type side = side_type::RIGHT>
+__device__ udf_string pad(cudf::string_view const d_str,
+                          cudf::size_type width,
+                          cudf::string_view fill_char = cudf::string_view{" ", 1})
+{
+  if (fill_char.empty()) { return udf_string{d_str}; }
+
+  udf_string result;
+  result.resize(cudf::strings::detail::compute_padded_size(d_str, width, fill_char.size_bytes()));
+  cudf::strings::detail::pad_impl<side>(d_str, width, *fill_char.begin(), result.data());
+  return result;
+}
+
+/**
+ * @brief Pad beginning of a string with zero '0'
+ *
+ * If the `width` is smaller than the length of `d_str` no change occurs.
+ *
+ * If d_str starts with a sign character ('-' or '+') then '0' padding
+ * starts after the sign.
+ *
+ * @param d_str String to fill
+ * @param width Minimum length in characters of the output string
+ */
+__device__ udf_string zfill(cudf::string_view const d_str, cudf::size_type width)
+{
+  udf_string result;
+  result.resize(cudf::strings::detail::compute_padded_size(d_str, width, 1));
+  cudf::strings::detail::zfill_impl(d_str, width, result.data());
+  return result;
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf

From c3e17acd9ebea9054f2e8d439aa6cf61b45273d7 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Wed, 19 Oct 2022 15:30:48 -0400
Subject: [PATCH 10/54] fix doxygen for udf_apis.hpp

---
 python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
index 540dcaf4c18..9eb1c72dd5b 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
@@ -35,7 +35,7 @@ class udf_string;
  * No string data is copied so the input column controls the lifetime of the
  * underlying strings.
  *
- * New device memory is allocated and returned to hold just  the string_view instances.
+ * New device memory is allocated and returned to hold just the string_view instances.
  *
  * @param input Strings column to convert to a string_view array.
  * @return Array of string_view objects in device memory
@@ -51,7 +51,7 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
  * device memory buffers.
  *
  * @param d_strings Pointer to device memory of udf_string objects
- * @param size The number of elements in the buffer array
+ * @param size The number of elements in the d_strings array
  * @return A strings column copy of the udf_string objects
  */
 std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings,

From 2dae45d904de295f5b6e2ad018131b3c2159194f Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Thu, 20 Oct 2022 09:38:51 -0400
Subject: [PATCH 11/54] fix to_string to use count_digits

---
 python/strings_udf/cpp/include/cudf/strings/udf/case.cuh    | 2 +-
 python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
index 17ee98ace6f..7539aeef8e3 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
@@ -166,7 +166,7 @@ __device__ inline udf_string to_upper(chars_tables const tables, string_view d_s
  * @param d_str Input string to convert
  * @return New string containing the converted characters
  */
-__device__ inline udf_string swapcase(chars_tables const tables, string_view d_str)
+__device__ inline udf_string swap_case(chars_tables const tables, string_view d_str)
 {
   cudf::strings::detail::character_flags_table_type case_flag =
     cudf::strings::detail::IS_LOWER(0xFF) | cudf::strings::detail::IS_UPPER(0xFF);
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
index 2cd6eaaa298..6d176d36cfb 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
@@ -51,8 +51,7 @@ __device__ inline udf_string to_string(int64_t value)
     result.append("0");
     return result;
   }
-  auto const d_value = static_cast<double>(abs(value));
-  auto digits        = static_cast<int32_t>(log10(d_value)) + 1 + (value < 0);
+  auto digits = cudf::strings::detail::count_digits(value);
   result.resize(digits);
   cudf::strings::detail::integer_to_string(value, result.data());
   return result;

From 3467f34db786ba9a58a9b6ffa0fff9c2b88c25cf Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Thu, 20 Oct 2022 09:46:59 -0400
Subject: [PATCH 12/54] add ALL_FLAGS

---
 cpp/include/cudf/strings/detail/char_tables.hpp     |  1 +
 .../cpp/include/cudf/strings/udf/case.cuh           | 13 +++++++------
 .../cpp/include/cudf/strings/udf/numeric.cuh        |  3 +--
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/cpp/include/cudf/strings/detail/char_tables.hpp b/cpp/include/cudf/strings/detail/char_tables.hpp
index 4ea7e3ee952..275b7223a3b 100644
--- a/cpp/include/cudf/strings/detail/char_tables.hpp
+++ b/cpp/include/cudf/strings/detail/char_tables.hpp
@@ -46,6 +46,7 @@ constexpr uint8_t IS_LOWER(uint8_t x) { return ((x) & (1 << 6)); }
 constexpr uint8_t IS_SPECIAL(uint8_t x) { return ((x) & (1 << 7)); }
 constexpr uint8_t IS_ALPHANUM(uint8_t x) { return ((x) & (0x0F)); }
 constexpr uint8_t IS_UPPER_OR_LOWER(uint8_t x) { return ((x) & ((1 << 5) | (1 << 6))); }
+constexpr uint8_t ALL_FLAGS = 0xFF;
 
 // Type for the character cases table.
 using character_cases_table_type = uint16_t;
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
index 7539aeef8e3..b11297cb721 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
@@ -100,7 +100,7 @@ __device__ inline udf_string convert_case(
 /**
  * @brief Utility for capitalize and title functions
  *
- * @tparam CaptializeNextFn returns true if the next candidate character should be capitalized
+ * @tparam CapitalizeNextFn returns true if the next candidate character should be capitalized
  * @param tables The char tables required for conversion
  * @param d_str Input string to convert
  * @return New string containing the converted characters
@@ -137,8 +137,8 @@ __device__ inline udf_string capitalize(chars_tables const tables,
  */
 __device__ inline udf_string to_lower(chars_tables const tables, string_view d_str)
 {
-  cudf::strings::detail::character_flags_table_type case_flag =
-    cudf::strings::detail::IS_UPPER(0xFF);  // convert only upper case characters
+  cudf::strings::detail::character_flags_table_type case_flag = cudf::strings::detail::IS_UPPER(
+    cudf::strings::detail::ALL_FLAGS);  // convert only upper case characters
   return detail::convert_case(tables, d_str, case_flag);
 }
 
@@ -151,8 +151,8 @@ __device__ inline udf_string to_lower(chars_tables const tables, string_view d_s
  */
 __device__ inline udf_string to_upper(chars_tables const tables, string_view d_str)
 {
-  cudf::strings::detail::character_flags_table_type case_flag =
-    cudf::strings::detail::IS_LOWER(0xFF);  // convert only lower case characters
+  cudf::strings::detail::character_flags_table_type case_flag = cudf::strings::detail::IS_LOWER(
+    cudf::strings::detail::ALL_FLAGS);  // convert only lower case characters
   return detail::convert_case(tables, d_str, case_flag);
 }
 
@@ -169,7 +169,8 @@ __device__ inline udf_string to_upper(chars_tables const tables, string_view d_s
 __device__ inline udf_string swap_case(chars_tables const tables, string_view d_str)
 {
   cudf::strings::detail::character_flags_table_type case_flag =
-    cudf::strings::detail::IS_LOWER(0xFF) | cudf::strings::detail::IS_UPPER(0xFF);
+    cudf::strings::detail::IS_LOWER(cudf::strings::detail::ALL_FLAGS) |
+    cudf::strings::detail::IS_UPPER(cudf::strings::detail::ALL_FLAGS);
   return detail::convert_case(tables, d_str, case_flag);
 }
 
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
index 6d176d36cfb..4f9e8796fb5 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
@@ -51,8 +51,7 @@ __device__ inline udf_string to_string(int64_t value)
     result.append("0");
     return result;
   }
-  auto digits = cudf::strings::detail::count_digits(value);
-  result.resize(digits);
+  result.resize(cudf::strings::detail::count_digits(value));
   cudf::strings::detail::integer_to_string(value, result.data());
   return result;
 }

From cf72fc8113fa33dfe4fd0dbad250d782142436e9 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 25 Oct 2022 10:19:34 -0400
Subject: [PATCH 13/54] add noexcept decl to appropriate member functions

---
 .../include/cudf/strings/udf/char_types.cuh   |  2 +-
 .../cpp/include/cudf/strings/udf/search.cuh   |  2 +-
 .../include/cudf/strings/udf/starts_with.cuh  |  8 +--
 .../include/cudf/strings/udf/udf_string.cuh   | 40 +++++++-------
 .../include/cudf/strings/udf/udf_string.hpp   | 54 ++++++++++---------
 5 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh
index 2c73328843f..9320686442b 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/char_types.cuh
@@ -25,7 +25,7 @@ namespace strings {
 namespace udf {
 
 /**
- * @brief Returns true if all characters in the string are of the type specified
+ * @brief Returns true if all characters in the string are of the type specified.
  *
  * The output will be false if the string is empty or has at least one character
  * not of the specified type. If all characters fit the type then true is returned.
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh
index ed3a873b655..ef15886f1f5 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/search.cuh
@@ -23,7 +23,7 @@ namespace udf {
 
 /**
  * @brief Returns the number of times that the target string appears
- * in the source string
+ * in the source string.
  *
  * If `start <= 0` the search begins at the beginning of the `source` string.
  * If `end <=0` or `end` is greater the length of the `source` string,
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh
index 8e9ffce4f52..38c609ae505 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/starts_with.cuh
@@ -24,7 +24,7 @@ namespace udf {
 
 /**
  * @brief Returns true if the beginning of the specified string
- * matches the given character array
+ * matches the given character array.
  *
  * @param dstr String to check
  * @param tgt Character array encoded in UTF-8
@@ -42,7 +42,7 @@ __device__ inline bool starts_with(cudf::string_view const dstr,
 
 /**
  * @brief Returns true if the beginning of the specified string
- * matches the given target string
+ * matches the given target string.
  *
  * @param dstr String to check
  * @param tgt String to match
@@ -55,7 +55,7 @@ __device__ inline bool starts_with(cudf::string_view const dstr, cudf::string_vi
 
 /**
  * @brief Returns true if the end of the specified string
- * matches the given character array
+ * matches the given character array.
  *
  * @param dstr String to check
  * @param tgt Character array encoded in UTF-8
@@ -73,7 +73,7 @@ __device__ inline bool ends_with(cudf::string_view const dstr,
 
 /**
  * @brief Returns true if the end of the specified string
- * matches the given target` string
+ * matches the given target` string.
  *
  * @param dstr String to check
  * @param tgt String to match
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
index d651f9faa2b..aa39b94d1c5 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
@@ -94,7 +94,7 @@ __device__ inline udf_string::udf_string(udf_string const& src)
   memcpy(m_data, src.m_data, m_bytes);
 }
 
-__device__ inline udf_string::udf_string(udf_string&& src)
+__device__ inline udf_string::udf_string(udf_string&& src) noexcept
   : m_data(src.m_data), m_bytes(src.m_bytes), m_capacity(src.m_capacity)
 {
   src.m_data     = nullptr;
@@ -113,7 +113,7 @@ __device__ inline udf_string::~udf_string() { deallocate(m_data); }
 
 __device__ inline udf_string& udf_string::operator=(udf_string const& str) { return assign(str); }
 
-__device__ inline udf_string& udf_string::operator=(udf_string&& str)
+__device__ inline udf_string& udf_string::operator=(udf_string&& str) noexcept
 {
   return assign(std::move(str));
 }
@@ -125,7 +125,7 @@ __device__ inline udf_string& udf_string::operator=(cudf::string_view const str)
 
 __device__ inline udf_string& udf_string::operator=(char const* str) { return assign(str); }
 
-__device__ udf_string& udf_string::assign(udf_string&& str)
+__device__ udf_string& udf_string::assign(udf_string&& str) noexcept
 {
   if (this == &str) { return *this; }
   m_data         = str.m_data;
@@ -160,30 +160,30 @@ __device__ udf_string& udf_string::assign(char const* str, cudf::size_type bytes
   return *this;
 }
 
-__device__ inline cudf::size_type udf_string::size_bytes() const { return m_bytes; }
+__device__ inline cudf::size_type udf_string::size_bytes() const noexcept { return m_bytes; }
 
-__device__ inline cudf::size_type udf_string::length() const
+__device__ inline cudf::size_type udf_string::length() const noexcept
 {
   return cudf::strings::detail::characters_in_string(m_data, m_bytes);
 }
 
-__device__ cudf::size_type udf_string::max_size() const
+__device__ cudf::size_type udf_string::max_size() const noexcept
 {
   return std::numeric_limits<cudf::size_type>::max() - 1;
 }
 
-__device__ inline char* udf_string::data() { return m_data; }
+__device__ inline char* udf_string::data() noexcept { return m_data; }
 
-__device__ inline char const* udf_string::data() const { return m_data; }
+__device__ inline char const* udf_string::data() const noexcept { return m_data; }
 
-__device__ inline bool udf_string::is_empty() const { return m_bytes == 0; }
+__device__ inline bool udf_string::is_empty() const noexcept { return m_bytes == 0; }
 
-__device__ inline cudf::string_view::const_iterator udf_string::begin() const
+__device__ inline cudf::string_view::const_iterator udf_string::begin() const noexcept
 {
   return cudf::string_view::const_iterator(cudf::string_view(m_data, m_bytes), 0);
 }
 
-__device__ inline cudf::string_view::const_iterator udf_string::end() const
+__device__ inline cudf::string_view::const_iterator udf_string::end() const noexcept
 {
   return cudf::string_view::const_iterator(cudf::string_view(m_data, m_bytes), length());
 }
@@ -216,7 +216,7 @@ __device__ inline cudf::size_type udf_string::byte_offset(cudf::size_type pos) c
   return offset;
 }
 
-__device__ inline int udf_string::compare(cudf::string_view const in) const
+__device__ inline int udf_string::compare(cudf::string_view const in) const noexcept
 {
   return compare(in.data(), in.size_bytes());
 }
@@ -227,39 +227,39 @@ __device__ inline int udf_string::compare(char const* data, cudf::size_type byte
   return view.compare(data, bytes);
 }
 
-__device__ inline bool udf_string::operator==(cudf::string_view const rhs) const
+__device__ inline bool udf_string::operator==(cudf::string_view const rhs) const noexcept
 {
   return m_bytes == rhs.size_bytes() && compare(rhs) == 0;
 }
 
-__device__ inline bool udf_string::operator!=(cudf::string_view const rhs) const
+__device__ inline bool udf_string::operator!=(cudf::string_view const rhs) const noexcept
 {
   return compare(rhs) != 0;
 }
 
-__device__ inline bool udf_string::operator<(cudf::string_view const rhs) const
+__device__ inline bool udf_string::operator<(cudf::string_view const rhs) const noexcept
 {
   return compare(rhs) < 0;
 }
 
-__device__ inline bool udf_string::operator>(cudf::string_view const rhs) const
+__device__ inline bool udf_string::operator>(cudf::string_view const rhs) const noexcept
 {
   return compare(rhs) > 0;
 }
 
-__device__ inline bool udf_string::operator<=(cudf::string_view const rhs) const
+__device__ inline bool udf_string::operator<=(cudf::string_view const rhs) const noexcept
 {
   int rc = compare(rhs);
   return (rc == 0) || (rc < 0);
 }
 
-__device__ inline bool udf_string::operator>=(cudf::string_view const rhs) const
+__device__ inline bool udf_string::operator>=(cudf::string_view const rhs) const noexcept
 {
   int rc = compare(rhs);
   return (rc == 0) || (rc > 0);
 }
 
-__device__ inline void udf_string::clear()
+__device__ inline void udf_string::clear() noexcept
 {
   deallocate(m_data);
   m_data     = nullptr;
@@ -284,7 +284,7 @@ __device__ void udf_string::reserve(cudf::size_type count)
   if (count < max_size() && count > m_capacity) { reallocate(count); }
 }
 
-__device__ cudf::size_type udf_string::capacity() const { return m_capacity; }
+__device__ cudf::size_type udf_string::capacity() const noexcept { return m_capacity; }
 
 __device__ void udf_string::shrink_to_fit()
 {
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
index c57ab473017..2400b5ce71c 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
@@ -35,7 +35,7 @@ namespace udf {
  * This class manages a device buffer of UTF-8 encoded characters
  * for string manipulation in a device kernel.
  *
- * It's methods and behavior are modelled after std::string but
+ * Its methods and behavior are modelled after std::string but
  * with special consideration for UTF-8 encoded strings and for
  * use within a cuDF UDF.
  */
@@ -103,47 +103,47 @@ class udf_string {
   __device__ udf_string(udf_string const& src);
 
   /**
-   * @brief Create a string object from a move reference
+   * @brief Move a string object from an rvalue reference
    *
    * The string data is moved from `src` into the instance returned.
    * The `src` will have no content.
    *
    * @param src String to copy
    */
-  __device__ udf_string(udf_string&& src);
+  __device__ udf_string(udf_string&& src) noexcept;
 
   __device__ ~udf_string();
 
   __device__ udf_string& operator=(udf_string const&);
-  __device__ udf_string& operator=(udf_string&&);
+  __device__ udf_string& operator=(udf_string&&) noexcept;
   __device__ udf_string& operator=(cudf::string_view const);
   __device__ udf_string& operator=(char const*);
 
   /**
    * @brief Return the number of bytes in this string
    */
-  __device__ cudf::size_type size_bytes() const;
+  __device__ cudf::size_type size_bytes() const noexcept;
 
   /**
    * @brief Return the number of characters in this string
    */
-  __device__ cudf::size_type length() const;
+  __device__ cudf::size_type length() const noexcept;
 
   /**
    * @brief Return the maximum number of bytes a udf_string can hold
    */
-  __device__ cudf::size_type max_size() const;
+  __device__ cudf::size_type max_size() const noexcept;
 
   /**
    * @brief Return the internal pointer to the character array for this object
    */
-  __device__ char* data();
-  __device__ char const* data() const;
+  __device__ char* data() noexcept;
+  __device__ char const* data() const noexcept;
 
   /**
    * @brief Returns true if there are no characters in this string
    */
-  __device__ bool is_empty() const;
+  __device__ bool is_empty() const noexcept;
 
   /**
    * @brief Returns an iterator that can be used to navigate through
@@ -151,8 +151,8 @@ class udf_string {
    *
    * This returns a `cudf::string_view::const_iterator` which is read-only.
    */
-  __device__ cudf::string_view::const_iterator begin() const;
-  __device__ cudf::string_view::const_iterator end() const;
+  __device__ cudf::string_view::const_iterator begin() const noexcept;
+  __device__ cudf::string_view::const_iterator end() const noexcept;
 
   /**
    * @brief Returns the character at the specified position
@@ -199,7 +199,7 @@ class udf_string {
    *            not match is ordered after the corresponding character in `str`,
    *            or all compared characters match but the `str` string is longer.
    */
-  __device__ int compare(cudf::string_view const str) const;
+  __device__ int compare(cudf::string_view const str) const noexcept;
 
   /**
    * @brief Comparing target character array with this string
@@ -219,39 +219,39 @@ class udf_string {
   /**
    * @brief Returns true if `rhs` matches this string exactly
    */
-  __device__ bool operator==(cudf::string_view const rhs) const;
+  __device__ bool operator==(cudf::string_view const rhs) const noexcept;
 
   /**
    * @brief Returns true if `rhs` does not match this string
    */
-  __device__ bool operator!=(cudf::string_view const rhs) const;
+  __device__ bool operator!=(cudf::string_view const rhs) const noexcept;
 
   /**
    * @brief Returns true if this string is ordered before `rhs`
    */
-  __device__ bool operator<(cudf::string_view const rhs) const;
+  __device__ bool operator<(cudf::string_view const rhs) const noexcept;
 
   /**
    * @brief Returns true if `rhs` is ordered before this string
    */
-  __device__ bool operator>(cudf::string_view const rhs) const;
+  __device__ bool operator>(cudf::string_view const rhs) const noexcept;
 
   /**
    * @brief Returns true if this string matches or is ordered before `rhs`
    */
-  __device__ bool operator<=(cudf::string_view const rhs) const;
+  __device__ bool operator<=(cudf::string_view const rhs) const noexcept;
 
   /**
    * @brief Returns true if `rhs` matches or is ordered before this string
    */
-  __device__ bool operator>=(cudf::string_view const rhs) const;
+  __device__ bool operator>=(cudf::string_view const rhs) const noexcept;
 
   /**
    * @brief Remove all bytes from this string
    *
    * All pointers, references, and iterators are invalidated.
    */
-  __device__ void clear();
+  __device__ void clear() noexcept;
 
   /**
    * @brief Resizes string to contain `count` bytes
@@ -279,7 +279,7 @@ class udf_string {
   /**
    * @brief Returns the number of bytes that the string has allocated
    */
-  __device__ cudf::size_type capacity() const;
+  __device__ cudf::size_type capacity() const noexcept;
 
   /**
    * @brief Reduces internal allocation to just `size_bytes()`
@@ -291,16 +291,18 @@ class udf_string {
   /**
    * @brief Moves the contents of `str` into this string instance
    *
+   * On return, the `str` will have no contents.
+   *
    * @param str String to move
-   * @return This string new contents
+   * @return This string with new contents
    */
-  __device__ udf_string& assign(udf_string&& str);
+  __device__ udf_string& assign(udf_string&& str) noexcept;
 
   /**
    * @brief Replaces the contents of this string with contents of `str`
    *
    * @param str String to copy
-   * @return This string new contents
+   * @return This string with new contents
    */
   __device__ udf_string& assign(cudf::string_view const str);
 
@@ -308,7 +310,7 @@ class udf_string {
    * @brief Replaces the contents of this string with contents of `str`
    *
    * @param str Null-terminated UTF-8 character array
-   * @return This string new contents
+   * @return This string with new contents
    */
   __device__ udf_string& assign(char const* str);
 
@@ -317,7 +319,7 @@ class udf_string {
    *
    * @param str UTF-8 character array
    * @param bytes Number of bytes to copy from `str`
-   * @return This string new contents
+   * @return This string with new contents
    */
   __device__ udf_string& assign(char const* str, cudf::size_type bytes);
 

From 28e917b5482088a8b8bd9d551dc55c516672c92d Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 25 Oct 2022 10:20:12 -0400
Subject: [PATCH 14/54] fix return types for split

---
 .../cpp/include/cudf/strings/udf/split.cuh         | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
index 58e3e6fcfb4..bd2f1e9d3d9 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
@@ -77,10 +77,10 @@ __device__ inline cudf::size_type split(cudf::string_view const d_str,
  *               Pass `nullptr` to just get the token count.
  * @return Number of tokens returned
  */
-__device__ inline int split(cudf::string_view const d_str,
-                            char const* tgt,
-                            cudf::size_type bytes,
-                            cudf::string_view* result)
+__device__ inline cudf::size_type split(cudf::string_view const d_str,
+                                        char const* tgt,
+                                        cudf::size_type bytes,
+                                        cudf::string_view* result)
 {
   return split(d_str, cudf::string_view{tgt, bytes}, result);
 }
@@ -94,9 +94,9 @@ __device__ inline int split(cudf::string_view const d_str,
  *               Pass `nullptr` to just get the token count.
  * @return Number of tokens returned
  */
-__device__ inline int split(cudf::string_view const d_str,
-                            char const* tgt,
-                            cudf::string_view* result)
+__device__ inline cudf::size_type split(cudf::string_view const d_str,
+                                        char const* tgt,
+                                        cudf::string_view* result)
 {
   return split(d_str, tgt, detail::bytes_in_null_terminated_string(tgt), result);
 }

From f82c454534eb44ef11c7c8266cf02d5f1db842b7 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 25 Oct 2022 10:20:29 -0400
Subject: [PATCH 15/54] fix doxygen for various functions

---
 python/strings_udf/cpp/include/cudf/strings/udf/case.cuh  | 1 +
 python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh   | 4 ++--
 python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh | 6 +++---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
index b11297cb721..472101959a6 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
@@ -103,6 +103,7 @@ __device__ inline udf_string convert_case(
  * @tparam CapitalizeNextFn returns true if the next candidate character should be capitalized
  * @param tables The char tables required for conversion
  * @param d_str Input string to convert
+ * @param next_fn Function for next character capitalized
  * @return New string containing the converted characters
  */
 template <typename CapitalizeNextFn>
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
index 38dbee72a84..0f0506898c2 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
@@ -53,11 +53,11 @@ __device__ udf_string pad(cudf::string_view const d_str,
  *
  * If the `width` is smaller than the length of `d_str` no change occurs.
  *
- * If d_str starts with a sign character ('-' or '+') then '0' padding
+ * If `d_str` starts with a sign character ('-' or '+') then '0' padding
  * starts after the sign.
  *
  * @param d_str String to fill
- * @param width Minimum length in characters of the output string
+ * @param width Minimum length in characters of the output string (including the sign character)
  */
 __device__ udf_string zfill(cudf::string_view const d_str, cudf::size_type width)
 {
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
index 6c6639835cd..9c1c00a44fb 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
@@ -28,7 +28,7 @@ namespace udf {
 /**
  * @brief Strip characters from the beginning and/or end of the given string
  *
- * The `d_to_strip` is interpretted as an array of characters to be removed.
+ * The `d_to_strip` is interpreted as an array of characters to be removed.
  * If `d_to_strip` is an empty string, whitespace characters are stripped.
  *
  * @code{.cpp}
@@ -58,7 +58,7 @@ __device__ udf_string strip(cudf::string_view const d_str,
 /**
  * @brief Strip characters from the beginning of the given string
  *
- * The `d_to_strip` is interpretted as an array of characters to be removed.
+ * The `d_to_strip` is interpreted as an array of characters to be removed.
  * If `d_to_strip` is an empty string, whitespace characters are stripped.
  *
  * @code{.cpp}
@@ -83,7 +83,7 @@ __device__ udf_string lstrip(cudf::string_view const d_str, cudf::string_view d_
 /**
  * @brief Strip characters from the end of the given string
  *
- * The `d_to_strip` is interpretted as an array of characters to be removed.
+ * The `d_to_strip` is interpreted as an array of characters to be removed.
  * If `d_to_strip` is an empty string, whitespace characters are stripped.
  *
  * @code{.cpp}

From 7b9718c25661b6f4fe9157eae6c81524093c9022 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Mon, 31 Oct 2022 11:42:25 -0400
Subject: [PATCH 16/54] create free_udf_strings_array function

---
 .../cpp/include/cudf/strings/udf/udf_apis.hpp | 11 +++++
 .../cpp/src/strings/udf/udf_apis.cu           | 43 +++++++++----------
 2 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
index 9eb1c72dd5b..68834afa082 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
@@ -57,6 +57,17 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
 std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings,
                                                            cudf::size_type size);
 
+/**
+ * @brief Frees a vector of udf_string objects
+ *
+ * The individual udf_strings are cleared freeing each of their internal
+ * device memory buffers.
+ *
+ * @param d_strings Pointer to device memory of udf_string objects
+ * @param size The number of elements in the d_strings array
+ */
+void free_udf_string_array(udf_string* d_strings, cudf::size_type size);
+
 }  // namespace udf
 }  // namespace strings
 }  // namespace cudf
diff --git a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
index ca258dce370..e96f691598d 100644
--- a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
+++ b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
@@ -33,21 +33,6 @@ namespace udf {
 namespace detail {
 namespace {
 
-/**
- * @brief Frees udf_strings device memory
- *
- * @param d_buffer Array of udf_strings
- */
-void free_udf_string_array(cudf::strings::udf::udf_string* d_strings,
-                           cudf::size_type size,
-                           rmm::cuda_stream_view stream)
-{
-  thrust::for_each_n(rmm::exec_policy(stream),
-                     thrust::make_counting_iterator(0),
-                     size,
-                     [d_strings] __device__(auto idx) { d_strings[idx].clear(); });
-}
-
 /**
  * @brief Functor wraps string_view objects around udf_string objects
  *
@@ -78,7 +63,7 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
 
 /**
  * @copydoc column_from_udf_string_array
-
+ *
  * @param stream CUDA stream used for allocating/copying device memory and launching kernels
  */
 std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings,
@@ -93,13 +78,22 @@ std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings
                     indices.data(),
                     udf_string_to_string_view_transform_fn{});
 
-  auto results = cudf::make_strings_column(indices, cudf::string_view(nullptr, 0), stream);
-
-  // free the individual udf_string elements
-  free_udf_string_array(d_strings, size, stream);
+  return cudf::make_strings_column(indices, cudf::string_view(nullptr, 0), stream);
+}
 
-  // return new column
-  return results;
+/**
+ * @copydoc free_udf_string_array
+ *
+ * @param stream CUDA stream used for allocating/copying device memory and launching kernels
+ */
+void free_udf_string_array(cudf::strings::udf::udf_string* d_strings,
+                           cudf::size_type size,
+                           rmm::cuda_stream_view stream)
+{
+  thrust::for_each_n(rmm::exec_policy(stream),
+                     thrust::make_counting_iterator(0),
+                     size,
+                     [d_strings] __device__(auto idx) { d_strings[idx].clear(); });
 }
 
 }  // namespace detail
@@ -117,6 +111,11 @@ std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings
   return detail::column_from_udf_string_array(d_strings, size, rmm::cuda_stream_default);
 }
 
+void free_udf_string_array(udf_string* d_strings, cudf::size_type size)
+{
+  detail::free_udf_string_array(d_strings, size, rmm::cuda_stream_default);
+}
+
 }  // namespace udf
 }  // namespace strings
 }  // namespace cudf

From 68e54e8ff91388e51dc91ef85f1b6178f1b00d97 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Mon, 31 Oct 2022 11:44:18 -0400
Subject: [PATCH 17/54] fix compare returns, null assignment, reuse ctors

---
 .../include/cudf/strings/udf/udf_string.cuh   | 62 ++++++++++++-------
 .../include/cudf/strings/udf/udf_string.hpp   |  6 +-
 2 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
index aa39b94d1c5..5590fa67c70 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
@@ -30,6 +30,12 @@ namespace strings {
 namespace udf {
 namespace detail {
 
+/**
+ * @brief Count the bytes in a null-terminated character array
+ *
+ * @param str Null-terminated string
+ * @return Number of bytes in `str` upto but not including the null-terminator
+ */
 __device__ inline static cudf::size_type bytes_in_null_terminated_string(char const* str)
 {
   if (!str) return 0;
@@ -41,18 +47,38 @@ __device__ inline static cudf::size_type bytes_in_null_terminated_string(char co
 
 }  // namespace detail
 
+/**
+ * @brief Allocate memory for strings operation
+ *
+ * @param bytes Number of bytes in to allocate
+ * @return Pointer to allocated memory
+ */
 __device__ inline char* udf_string::allocate(cudf::size_type bytes)
 {
   char* data  = static_cast<char*>(malloc(bytes + 1));
-  data[bytes] = 0;  // add null-terminator so we can printf strings in device code
+  data[bytes] = '\0';  // add null-terminator so we can printf strings in device code
   return data;
 }
 
+/**
+ * @brief Free memory created by allocate()
+ *
+ * @param data Pointer to allocated memory
+ */
 __device__ inline void udf_string::deallocate(char* data)
 {
   if (data) free(data);
 }
 
+/**
+ * @brief Allocate memory for strings operation
+ *
+ * Reallocates memory for `m_data` with new size `bytes`
+ * The original data in `m_data` is preserved up to `min(bytes,m_bytes)`
+ *
+ * @param bytes Number of bytes in to allocate
+ * @return Pointer to allocated memory
+ */
 __device__ void udf_string::reallocate(cudf::size_type bytes)
 {
   m_capacity    = bytes;
@@ -75,23 +101,19 @@ __device__ udf_string::udf_string(cudf::size_type count, cudf::char_utf8 chr)
   m_bytes = m_capacity = cudf::strings::detail::bytes_in_char_utf8(chr) * count;
   m_data               = allocate(m_capacity);
   auto out_ptr         = m_data;
-  for (auto idx = 0; idx < count; ++idx) {
+  for (cudf::size_type idx = 0; idx < count; ++idx) {
     out_ptr += cudf::strings::detail::from_char_utf8(chr, out_ptr);
   }
 }
 
 __device__ inline udf_string::udf_string(char const* data)
+  : udf_string(data, detail::bytes_in_null_terminated_string(data))
 {
-  m_bytes = m_capacity = detail::bytes_in_null_terminated_string(data);
-  m_data               = allocate(m_capacity);
-  memcpy(m_data, data, m_bytes);
 }
 
 __device__ inline udf_string::udf_string(udf_string const& src)
-  : m_bytes(src.m_bytes), m_capacity(src.m_bytes)
+  : udf_string(src.m_data, src.m_bytes)
 {
-  m_data = allocate(m_capacity);
-  memcpy(m_data, src.m_data, m_bytes);
 }
 
 __device__ inline udf_string::udf_string(udf_string&& src) noexcept
@@ -103,10 +125,8 @@ __device__ inline udf_string::udf_string(udf_string&& src) noexcept
 }
 
 __device__ inline udf_string::udf_string(cudf::string_view const str)
-  : m_bytes(str.size_bytes()), m_capacity(str.size_bytes())
+  : udf_string(str.data(), str.size_bytes())
 {
-  m_data = allocate(m_capacity);
-  memcpy(m_data, str.data(), m_bytes);
 }
 
 __device__ inline udf_string::~udf_string() { deallocate(m_data); }
@@ -156,7 +176,7 @@ __device__ udf_string& udf_string::assign(char const* str, cudf::size_type bytes
   }
   m_bytes = bytes;
   memcpy(m_data, str, bytes);
-  m_data[m_bytes] = 0;
+  m_data[m_bytes] = '\0';
   return *this;
 }
 
@@ -167,7 +187,7 @@ __device__ inline cudf::size_type udf_string::length() const noexcept
   return cudf::strings::detail::characters_in_string(m_data, m_bytes);
 }
 
-__device__ cudf::size_type udf_string::max_size() const noexcept
+__device__ constexpr cudf::size_type udf_string::max_size() const noexcept
 {
   return std::numeric_limits<cudf::size_type>::max() - 1;
 }
@@ -249,14 +269,12 @@ __device__ inline bool udf_string::operator>(cudf::string_view const rhs) const
 
 __device__ inline bool udf_string::operator<=(cudf::string_view const rhs) const noexcept
 {
-  int rc = compare(rhs);
-  return (rc == 0) || (rc < 0);
+  return compare(rhs) <= 0;
 }
 
 __device__ inline bool udf_string::operator>=(cudf::string_view const rhs) const noexcept
 {
-  int rc = compare(rhs);
-  return (rc == 0) || (rc > 0);
+  return compare(rhs) >= 0;
 }
 
 __device__ inline void udf_string::clear() noexcept
@@ -276,7 +294,7 @@ __device__ inline void udf_string::resize(cudf::size_type count)
   if (count > m_bytes) { memset(m_data + m_bytes, 0, count - m_bytes); }
 
   m_bytes         = count;
-  m_data[m_bytes] = 0;
+  m_data[m_bytes] = '\0';
 }
 
 __device__ void udf_string::reserve(cudf::size_type count)
@@ -298,7 +316,7 @@ __device__ inline udf_string& udf_string::append(char const* str, cudf::size_typ
   if (nbytes > m_capacity) { reallocate(2 * nbytes); }
   memcpy(m_data + m_bytes, str, in_bytes);
   m_bytes         = nbytes;
-  m_data[m_bytes] = 0;
+  m_data[m_bytes] = '\0';
   return *this;
 }
 
@@ -318,7 +336,7 @@ __device__ inline udf_string& udf_string::append(cudf::char_utf8 chr, cudf::size
     out_ptr += cudf::strings::detail::from_char_utf8(chr, out_ptr);
   }
   m_bytes         = nbytes;
-  m_data[m_bytes] = 0;
+  m_data[m_bytes] = '\0';
   return *this;
 }
 
@@ -412,7 +430,7 @@ __device__ inline udf_string& udf_string::replace(cudf::size_type pos,
   memcpy(m_data + spos, str, in_bytes);
 
   m_bytes         = nbytes;
-  m_data[m_bytes] = 0;
+  m_data[m_bytes] = '\0';
   return *this;
 }
 
@@ -456,7 +474,7 @@ __device__ inline udf_string& udf_string::replace(cudf::size_type pos,
   }
 
   m_bytes         = nbytes;
-  m_data[m_bytes] = 0;
+  m_data[m_bytes] = '\0';
   return *this;
 }
 
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
index 2400b5ce71c..1dee976b299 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
@@ -132,7 +132,7 @@ class udf_string {
   /**
    * @brief Return the maximum number of bytes a udf_string can hold
    */
-  __device__ cudf::size_type max_size() const noexcept;
+  __device__ constexpr cudf::size_type max_size() const noexcept;
 
   /**
    * @brief Return the internal pointer to the character array for this object
@@ -182,6 +182,8 @@ class udf_string {
    * `data() + byte_offset(pos)` points to the memory location
    * the character at position `pos`.
    *
+   * The behavior is undefined if `pos < 0 or pos >= length()`
+   *
    * @param pos Index position of character to return byte offset.
    * @return Byte offset for character at `pos`
    */
@@ -261,6 +263,8 @@ class udf_string {
    *
    * All pointers, references, and iterators may be invalidated.
    *
+   * The behavior is undefined if `count > max_size()`
+   *
    * @param count Size in bytes of this string.
    */
   __device__ void resize(cudf::size_type count);

From 6eef0a4d8805f88491b97a3d7f15c4986c98056c Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Mon, 31 Oct 2022 11:44:56 -0400
Subject: [PATCH 18/54] fix some doxygen wording

---
 python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh | 2 +-
 python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh     | 2 +-
 python/strings_udf/cpp/include/cudf/strings/udf/split.cuh   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
index 4f9e8796fb5..c8c9f6e46f4 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
@@ -59,7 +59,7 @@ __device__ inline udf_string to_string(int64_t value)
 /**
  * @brief Converts a string into a double
  *
- * Support scientific notation as well.
+ * This function supports scientific notation.
  * Overflow goes to inf or -inf and underflow may go to 0.
  */
 __device__ inline double stod(string_view const& d_str)
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
index 0f0506898c2..c43c67c9ddf 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
@@ -28,7 +28,7 @@ namespace udf {
  * @brief Pad beginning and/or end of a string with the given fill character
  *
  * The side_type::BOTH will attempt to center the text using the `fill_char`.
- * If the `width` is smaller than the length of `d_str` no change occurs.
+ * If the `width <= d_str.length()` no change occurs.
  *
  * @tparam side Specify where the padding should occur
  * @param d_str String to pad
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
index bd2f1e9d3d9..6a11e5bf655 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
@@ -34,11 +34,11 @@ namespace udf {
  *
  * @code{.cpp}
  * auto d_str = cudf::string_view{"the best  of times ", 19};
- * auto tgt = cudf::string_view{}; // empty string
+ * auto tgt = cudf::string_view{" ", 1};
  * auto token_count = split(d_str, tgt, nullptr);
  * auto result = new cudf::string_view[token_count];
  * split(d_str, tgt, result);
- * // result is array like ["the", "best", "of", "times"]
+ * // result is array like ["the", "best", "", "of", "times", ""]
  * @endcode
  *
  * @param d_str String to split

From 69e0d7c0b77dc87e6f6f1472d4a1c296307ab8bd Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Mon, 31 Oct 2022 17:36:27 -0400
Subject: [PATCH 19/54] remove string_view const parameter decl

---
 .../include/cudf/strings/udf/udf_string.cuh   | 34 ++++++++-----------
 .../include/cudf/strings/udf/udf_string.hpp   | 28 +++++++--------
 2 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
index 5590fa67c70..92b5536364e 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
@@ -124,7 +124,7 @@ __device__ inline udf_string::udf_string(udf_string&& src) noexcept
   src.m_capacity = 0;
 }
 
-__device__ inline udf_string::udf_string(cudf::string_view const str)
+__device__ inline udf_string::udf_string(cudf::string_view str)
   : udf_string(str.data(), str.size_bytes())
 {
 }
@@ -138,10 +138,7 @@ __device__ inline udf_string& udf_string::operator=(udf_string&& str) noexcept
   return assign(std::move(str));
 }
 
-__device__ inline udf_string& udf_string::operator=(cudf::string_view const str)
-{
-  return assign(str);
-}
+__device__ inline udf_string& udf_string::operator=(cudf::string_view str) { return assign(str); }
 
 __device__ inline udf_string& udf_string::operator=(char const* str) { return assign(str); }
 
@@ -157,7 +154,7 @@ __device__ udf_string& udf_string::assign(udf_string&& str) noexcept
   return *this;
 }
 
-__device__ udf_string& udf_string::assign(cudf::string_view const str)
+__device__ udf_string& udf_string::assign(cudf::string_view str)
 {
   return assign(str.data(), str.size_bytes());
 }
@@ -236,7 +233,7 @@ __device__ inline cudf::size_type udf_string::byte_offset(cudf::size_type pos) c
   return offset;
 }
 
-__device__ inline int udf_string::compare(cudf::string_view const in) const noexcept
+__device__ inline int udf_string::compare(cudf::string_view in) const noexcept
 {
   return compare(in.data(), in.size_bytes());
 }
@@ -247,32 +244,32 @@ __device__ inline int udf_string::compare(char const* data, cudf::size_type byte
   return view.compare(data, bytes);
 }
 
-__device__ inline bool udf_string::operator==(cudf::string_view const rhs) const noexcept
+__device__ inline bool udf_string::operator==(cudf::string_view rhs) const noexcept
 {
   return m_bytes == rhs.size_bytes() && compare(rhs) == 0;
 }
 
-__device__ inline bool udf_string::operator!=(cudf::string_view const rhs) const noexcept
+__device__ inline bool udf_string::operator!=(cudf::string_view rhs) const noexcept
 {
   return compare(rhs) != 0;
 }
 
-__device__ inline bool udf_string::operator<(cudf::string_view const rhs) const noexcept
+__device__ inline bool udf_string::operator<(cudf::string_view rhs) const noexcept
 {
   return compare(rhs) < 0;
 }
 
-__device__ inline bool udf_string::operator>(cudf::string_view const rhs) const noexcept
+__device__ inline bool udf_string::operator>(cudf::string_view rhs) const noexcept
 {
   return compare(rhs) > 0;
 }
 
-__device__ inline bool udf_string::operator<=(cudf::string_view const rhs) const noexcept
+__device__ inline bool udf_string::operator<=(cudf::string_view rhs) const noexcept
 {
   return compare(rhs) <= 0;
 }
 
-__device__ inline bool udf_string::operator>=(cudf::string_view const rhs) const noexcept
+__device__ inline bool udf_string::operator>=(cudf::string_view rhs) const noexcept
 {
   return compare(rhs) >= 0;
 }
@@ -340,15 +337,12 @@ __device__ inline udf_string& udf_string::append(cudf::char_utf8 chr, cudf::size
   return *this;
 }
 
-__device__ inline udf_string& udf_string::append(cudf::string_view const in)
+__device__ inline udf_string& udf_string::append(cudf::string_view in)
 {
   return append(in.data(), in.size_bytes());
 }
 
-__device__ inline udf_string& udf_string::operator+=(cudf::string_view const in)
-{
-  return append(in);
-}
+__device__ inline udf_string& udf_string::operator+=(cudf::string_view in) { return append(in); }
 
 __device__ inline udf_string& udf_string::operator+=(cudf::char_utf8 chr) { return append(chr); }
 
@@ -366,7 +360,7 @@ __device__ inline udf_string& udf_string::insert(cudf::size_type pos, char const
   return insert(pos, str, detail::bytes_in_null_terminated_string(str));
 }
 
-__device__ inline udf_string& udf_string::insert(cudf::size_type pos, cudf::string_view const in)
+__device__ inline udf_string& udf_string::insert(cudf::size_type pos, cudf::string_view in)
 {
   return insert(pos, in.data(), in.size_bytes());
 }
@@ -443,7 +437,7 @@ __device__ inline udf_string& udf_string::replace(cudf::size_type pos,
 
 __device__ inline udf_string& udf_string::replace(cudf::size_type pos,
                                                   cudf::size_type count,
-                                                  cudf::string_view const in)
+                                                  cudf::string_view in)
 {
   return replace(pos, count, in.data(), in.size_bytes());
 }
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
index 1dee976b299..728bb0b5adf 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
@@ -83,7 +83,7 @@ class udf_string {
    *
    * @param str String to copy
    */
-  __device__ udf_string(cudf::string_view const str);
+  __device__ udf_string(cudf::string_view str);
 
   /**
    * @brief Create a string object with `count` copies of character `chr`
@@ -201,7 +201,7 @@ class udf_string {
    *            not match is ordered after the corresponding character in `str`,
    *            or all compared characters match but the `str` string is longer.
    */
-  __device__ int compare(cudf::string_view const str) const noexcept;
+  __device__ int compare(cudf::string_view str) const noexcept;
 
   /**
    * @brief Comparing target character array with this string
@@ -221,32 +221,32 @@ class udf_string {
   /**
    * @brief Returns true if `rhs` matches this string exactly
    */
-  __device__ bool operator==(cudf::string_view const rhs) const noexcept;
+  __device__ bool operator==(cudf::string_view rhs) const noexcept;
 
   /**
    * @brief Returns true if `rhs` does not match this string
    */
-  __device__ bool operator!=(cudf::string_view const rhs) const noexcept;
+  __device__ bool operator!=(cudf::string_view rhs) const noexcept;
 
   /**
    * @brief Returns true if this string is ordered before `rhs`
    */
-  __device__ bool operator<(cudf::string_view const rhs) const noexcept;
+  __device__ bool operator<(cudf::string_view rhs) const noexcept;
 
   /**
    * @brief Returns true if `rhs` is ordered before this string
    */
-  __device__ bool operator>(cudf::string_view const rhs) const noexcept;
+  __device__ bool operator>(cudf::string_view rhs) const noexcept;
 
   /**
    * @brief Returns true if this string matches or is ordered before `rhs`
    */
-  __device__ bool operator<=(cudf::string_view const rhs) const noexcept;
+  __device__ bool operator<=(cudf::string_view rhs) const noexcept;
 
   /**
    * @brief Returns true if `rhs` matches or is ordered before this string
    */
-  __device__ bool operator>=(cudf::string_view const rhs) const noexcept;
+  __device__ bool operator>=(cudf::string_view rhs) const noexcept;
 
   /**
    * @brief Remove all bytes from this string
@@ -308,7 +308,7 @@ class udf_string {
    * @param str String to copy
    * @return This string with new contents
    */
-  __device__ udf_string& assign(cudf::string_view const str);
+  __device__ udf_string& assign(cudf::string_view str);
 
   /**
    * @brief Replaces the contents of this string with contents of `str`
@@ -333,7 +333,7 @@ class udf_string {
    * @param str String to append
    * @return This string with the appended argument
    */
-  __device__ udf_string& operator+=(cudf::string_view const str);
+  __device__ udf_string& operator+=(cudf::string_view str);
 
   /**
    * @brief Append a character to the end of this string
@@ -375,7 +375,7 @@ class udf_string {
    * @param str String to append
    * @return This string with the appended argument
    */
-  __device__ udf_string& append(cudf::string_view const str);
+  __device__ udf_string& append(cudf::string_view str);
 
   /**
    * @brief Append a character to the end of this string
@@ -396,7 +396,7 @@ class udf_string {
    * @param str String to insert into this one
    * @return This string with the inserted argument
    */
-  __device__ udf_string& insert(cudf::size_type pos, cudf::string_view const str);
+  __device__ udf_string& insert(cudf::size_type pos, cudf::string_view str);
 
   /**
    * @brief Insert a null-terminated character array into the character position specified
@@ -457,9 +457,7 @@ class udf_string {
    * @param str String to replace the given range
    * @return This string modified with the replacement
    */
-  __device__ udf_string& replace(cudf::size_type pos,
-                                 cudf::size_type count,
-                                 cudf::string_view const str);
+  __device__ udf_string& replace(cudf::size_type pos, cudf::size_type count, cudf::string_view str);
 
   /**
    * @brief Replace a range of characters with a null-terminated character array

From a95c03070af37c5e81c16970bfb0946f0357152f Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Mon, 31 Oct 2022 18:46:50 -0400
Subject: [PATCH 20/54] fix default-stream

---
 .../cpp/include/cudf/strings/udf/udf_string.cuh           | 8 ++++----
 python/strings_udf/cpp/src/strings/udf/udf_apis.cu        | 5 +++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
index 92b5536364e..e5c689fb87e 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
@@ -306,12 +306,12 @@ __device__ void udf_string::shrink_to_fit()
   if (m_bytes < m_capacity) { reallocate(m_bytes); }
 }
 
-__device__ inline udf_string& udf_string::append(char const* str, cudf::size_type in_bytes)
+__device__ inline udf_string& udf_string::append(char const* str, cudf::size_type bytes)
 {
-  if (in_bytes <= 0) { return *this; }
-  auto const nbytes = m_bytes + in_bytes;
+  if (bytes <= 0) { return *this; }
+  auto const nbytes = m_bytes + bytes;
   if (nbytes > m_capacity) { reallocate(2 * nbytes); }
-  memcpy(m_data + m_bytes, str, in_bytes);
+  memcpy(m_data + m_bytes, str, bytes);
   m_bytes         = nbytes;
   m_data[m_bytes] = '\0';
   return *this;
diff --git a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
index e96f691598d..7927740fd49 100644
--- a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
+++ b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
@@ -20,6 +20,7 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/string_view.cuh>
+#include <cudf/utilities/default_stream.hpp>
 
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
@@ -108,12 +109,12 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
 std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings,
                                                            cudf::size_type size)
 {
-  return detail::column_from_udf_string_array(d_strings, size, rmm::cuda_stream_default);
+  return detail::column_from_udf_string_array(d_strings, size, cudf::get_default_stream());
 }
 
 void free_udf_string_array(udf_string* d_strings, cudf::size_type size)
 {
-  detail::free_udf_string_array(d_strings, size, rmm::cuda_stream_default);
+  detail::free_udf_string_array(d_strings, size, cudf::get_default_stream());
 }
 
 }  // namespace udf

From e0526e6afa4774c179569089de19ab469a9489fe Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Mon, 31 Oct 2022 19:16:29 -0400
Subject: [PATCH 21/54] remove lstrip and rstrip

---
 .../cpp/include/cudf/strings/udf/strip.cuh    | 54 +++++--------------
 1 file changed, 12 insertions(+), 42 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
index 9c1c00a44fb..f2db3073460 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
@@ -41,68 +41,38 @@ namespace udf {
  * // result is "b" ('a' or ' ' removed from the ends)
  * @endcode
  *
- * @param d_str String to strip characters from
- * @param d_to_strip Characters to remove
- * @param stype From where to strip the characters;
- *              Default `BOTH` indicates stripping characters from the
- *              beginning and the end of the input string `d_str`
- * @return New string with characters removed
- */
-__device__ udf_string strip(cudf::string_view const d_str,
-                            cudf::string_view const d_to_strip,
-                            side_type stype = side_type::BOTH)
-{
-  return udf_string{cudf::strings::detail::strip(d_str, d_to_strip, stype)};
-}
-
-/**
- * @brief Strip characters from the beginning of the given string
- *
- * The `d_to_strip` is interpreted as an array of characters to be removed.
- * If `d_to_strip` is an empty string, whitespace characters are stripped.
- *
  * @code{.cpp}
  * auto d_str = cudf::string_view{" aba ", 5};
  * auto d_to_strip = cudf::string_view{}; // empty string
- * auto result = lstrip(d_str, d_to_strip);
+ * auto result = strip(d_str, d_to_strip, side_type::LEFT);
  * // result is "aba "
- * d_to_strip = cudf::string_view{"a ", 2}; // space and 'a'
- * result = lstrip(d_str, d_to_strip);
+ * d_to_strip = cudf::string_view{"a ", 2}; // 'a' and space
+ * result = strip(d_str, d_to_strip, side_type::LEFT);
  * // result is "ba " ('a' or ' ' removed from the beginning)
  * @endcode
  *
- * @param d_str String to strip characters from
- * @param d_to_strip Characters to remove
- * @return New string with characters removed
- */
-__device__ udf_string lstrip(cudf::string_view const d_str, cudf::string_view d_to_strip)
-{
-  return strip(d_str, d_to_strip, side_type::LEFT);
-}
-
-/**
- * @brief Strip characters from the end of the given string
- *
- * The `d_to_strip` is interpreted as an array of characters to be removed.
- * If `d_to_strip` is an empty string, whitespace characters are stripped.
- *
  * @code{.cpp}
  * auto d_str = cudf::string_view{" aba ", 5};
  * auto d_to_strip = cudf::string_view{}; // empty string
- * auto result = rstrip(d_str, d_to_strip);
+ * auto result = strip(d_str, d_to_strip, side_type::RIGHT);
  * // result is " aba"
  * d_to_strip = cudf::string_view{" a", 2}; // space and 'a'
- * result = rstrip(d_str, d_to_strip);
+ * result = rstrip(d_str, d_to_strip, side_type::RIGHT);
  * // result is " ab" ('a' or ' ' removed from the end)
  * @endcode
  *
  * @param d_str String to strip characters from
  * @param d_to_strip Characters to remove
+ * @param stype From where to strip the characters;
+ *              Default `BOTH` indicates stripping characters from the
+ *              beginning and the end of the input string `d_str`
  * @return New string with characters removed
  */
-__device__ udf_string rstrip(cudf::string_view const d_str, cudf::string_view d_to_strip)
+__device__ udf_string strip(cudf::string_view const d_str,
+                            cudf::string_view const d_to_strip,
+                            side_type stype = side_type::BOTH)
 {
-  return strip(d_str, d_to_strip, side_type::RIGHT);
+  return udf_string{cudf::strings::detail::strip(d_str, d_to_strip, stype)};
 }
 
 }  // namespace udf

From bc903d6d293319945c4485acc91737066a7d1c98 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Mon, 31 Oct 2022 19:21:14 -0400
Subject: [PATCH 22/54] reword split doxygen text for result=nullptr

---
 python/strings_udf/cpp/include/cudf/strings/udf/split.cuh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
index 6a11e5bf655..7eeeb94271b 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
@@ -29,8 +29,8 @@ namespace udf {
  * @brief Split string using given string
  *
  * The caller must allocate an array of cudf::string_view to be filled
- * in by this function. Calling this with a `nullptr` for the `result`
- * will return the number of elements.
+ * in by this function. This function can be called with a `result=nullptr`
+ * to compute the number of tokens.
  *
  * @code{.cpp}
  * auto d_str = cudf::string_view{"the best  of times ", 19};

From eb6532e1e0d86ac92596297b08383668d0207035 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 1 Nov 2022 08:21:02 -0400
Subject: [PATCH 23/54] add cuda_runtime.h to resolve device refs

---
 cpp/include/cudf/strings/string_view.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp
index 03bf538b1b2..265adc60392 100644
--- a/cpp/include/cudf/strings/string_view.hpp
+++ b/cpp/include/cudf/strings/string_view.hpp
@@ -17,6 +17,8 @@
 
 #include <cudf/types.hpp>
 
+#include <cuda_runtime.h>
+
 #include <iterator>
 
 /**

From a8fca127c81322a02b6c5ceb035f587940eba873 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 1 Nov 2022 08:21:39 -0400
Subject: [PATCH 24/54] fix doxygen wording for pad()

---
 python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
index c43c67c9ddf..d6d4ed637e9 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
@@ -28,7 +28,7 @@ namespace udf {
  * @brief Pad beginning and/or end of a string with the given fill character
  *
  * The side_type::BOTH will attempt to center the text using the `fill_char`.
- * If the `width <= d_str.length()` no change occurs.
+ * If `width <= d_str.length()` no change occurs and the input `d_str` is returned.
  *
  * @tparam side Specify where the padding should occur
  * @param d_str String to pad

From a249d133d145cd1135d0baf3729166fbb2e22259 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 1 Nov 2022 08:22:02 -0400
Subject: [PATCH 25/54] refactor split; add count_tokens function

---
 .../cpp/include/cudf/strings/udf/split.cuh    | 104 ++++++++++++++++--
 1 file changed, 97 insertions(+), 7 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
index 7eeeb94271b..ca31425aa62 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
@@ -24,6 +24,7 @@
 namespace cudf {
 namespace strings {
 namespace udf {
+namespace detail {
 
 /**
  * @brief Split string using given string
@@ -66,6 +67,54 @@ __device__ inline cudf::size_type split(cudf::string_view const d_str,
 
   return count;
 }
+}  // namespace detail
+
+/**
+ * @brief Count tokens in a string without performing the split
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{"the best  of times ", 19};
+ * auto tgt = cudf::string_view{" ", 1};
+ * auto token_count = count_tokens(d_str, tgt);
+ * // token_count is 6
+ * @endcode
+ *
+ * @param d_str String to split
+ * @param tgt String to split on
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type count_tokens(cudf::string_view const d_str,
+                                               cudf::string_view const tgt)
+{
+  return detail::split(d_str, tgt, nullptr);
+}
+
+/**
+ * @brief Split string using given string
+ *
+ * The caller must allocate an array of cudf::string_view to be filled
+ * in by this function.
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{"the best  of times ", 19};
+ * auto tgt = cudf::string_view{" ", 1};
+ * auto token_count = count_tokens(d_str, tgt);
+ * auto result = new cudf::string_view[token_count];
+ * split(d_str, tgt, result);
+ * // result is array like ["the", "best", "", "of", "times", ""]
+ * @endcode
+ *
+ * @param d_str String to split
+ * @param tgt String to split on
+ * @param result Empty array to populate with output objects.
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type split(cudf::string_view const d_str,
+                                        cudf::string_view const tgt,
+                                        cudf::string_view* result)
+{
+  return detail::split(d_str, tgt, result);
+}
 
 /**
  * @brief Split string using given target array
@@ -73,8 +122,7 @@ __device__ inline cudf::size_type split(cudf::string_view const d_str,
  * @param d_str String to split
  * @param tgt Character array encoded in UTF-8 used for identifying split points
  * @param bytes Number of bytes to read from `tgt`
- * @param result Empty array to populate with output objects.
- *               Pass `nullptr` to just get the token count.
+ * @param result Empty array to populate with output objects
  * @return Number of tokens returned
  */
 __device__ inline cudf::size_type split(cudf::string_view const d_str,
@@ -82,7 +130,7 @@ __device__ inline cudf::size_type split(cudf::string_view const d_str,
                                         cudf::size_type bytes,
                                         cudf::string_view* result)
 {
-  return split(d_str, cudf::string_view{tgt, bytes}, result);
+  return detail::split(d_str, cudf::string_view{tgt, bytes}, result);
 }
 
 /**
@@ -90,8 +138,7 @@ __device__ inline cudf::size_type split(cudf::string_view const d_str,
  *
  * @param d_str String to split
  * @param tgt Null-terminated character array encoded in UTF-8 used for identifying split points
- * @param result Empty array to populate with output objects.
- *               Pass `nullptr` to just get the token count.
+ * @param result Empty array to populate with output objects
  * @return Number of tokens returned
  */
 __device__ inline cudf::size_type split(cudf::string_view const d_str,
@@ -101,11 +148,21 @@ __device__ inline cudf::size_type split(cudf::string_view const d_str,
   return split(d_str, tgt, detail::bytes_in_null_terminated_string(tgt), result);
 }
 
+namespace detail {
 /**
  * @brief Split string on whitespace
  *
- * This will create tokens by splitting on one or more consecutive whitespace characters
- * found in `d_str`.
+ * The caller must allocate an array of cudf::string_view to be filled
+ * in by this function. This function can be called with a `result=nullptr`
+ * to compute the number of tokens.
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{"the best  of times ", 19};
+ * auto token_count = split(d_str, nullptr);
+ * auto result = new cudf::string_view[token_count];
+ * split(d_str, result);
+ * // result is array like ["the", "best", "of", "times"]
+ * @endcode
  *
  * @param d_str String to split
  * @param result Empty array to populate with output objects.
@@ -123,6 +180,39 @@ __device__ inline cudf::size_type split(cudf::string_view const d_str, cudf::str
   }
   return count;
 }
+}  // namespace detail
+
+/**
+ * @brief Count tokens in a string without performing the split on whitespace
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{"the best  of times ", 19};
+ * auto token_count = count_tokens(d_str);
+ * // token_count is 4
+ * @endcode
+ *
+ * @param d_str String to split
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type count_tokens(cudf::string_view const d_str)
+{
+  return detail::split(d_str, nullptr);
+}
+
+/**
+ * @brief Split string on whitespace
+ *
+ * This will create tokens by splitting on one or more consecutive whitespace characters
+ * found in `d_str`.
+ *
+ * @param d_str String to split
+ * @param result Empty array to populate with output objects.
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type split(cudf::string_view const d_str, cudf::string_view* result)
+{
+  return detail::split(d_str, result);
+}
 
 /**
  * @brief Join an array of strings with a separator

From 96b06f632d878a424fb5a5bf16b5832e2d1f62e0 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 1 Nov 2022 08:22:38 -0400
Subject: [PATCH 26/54] refactor append, replace for better reuse

---
 .../include/cudf/strings/udf/udf_string.cuh   | 38 ++-----------------
 1 file changed, 4 insertions(+), 34 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
index e5c689fb87e..2bd8ceb9bba 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
@@ -324,17 +324,8 @@ __device__ inline udf_string& udf_string::append(char const* str)
 
 __device__ inline udf_string& udf_string::append(cudf::char_utf8 chr, cudf::size_type count)
 {
-  if (count <= 0) { return *this; }
-  auto const char_bytes = cudf::strings::detail::bytes_in_char_utf8(chr) * count;
-  auto const nbytes     = m_bytes + char_bytes;
-  if (nbytes > m_capacity) { reallocate(2 * nbytes); }
-  auto out_ptr = m_data + m_bytes;
-  for (auto idx = 0; idx < count; ++idx) {
-    out_ptr += cudf::strings::detail::from_char_utf8(chr, out_ptr);
-  }
-  m_bytes         = nbytes;
-  m_data[m_bytes] = '\0';
-  return *this;
+  auto d_str = udf_string(count, chr);
+  return append(d_str);
 }
 
 __device__ inline udf_string& udf_string::append(cudf::string_view in)
@@ -447,29 +438,8 @@ __device__ inline udf_string& udf_string::replace(cudf::size_type pos,
                                                   cudf::size_type chr_count,
                                                   cudf::char_utf8 chr)
 {
-  if (pos < 0 || chr_count < 0) { return *this; }
-  auto const spos = byte_offset(pos);
-  if (spos > m_bytes) { return *this; }
-  auto const epos = count < 0 ? m_bytes : std::min(byte_offset(pos + count), m_bytes);
-
-  // compute input size
-  auto const char_bytes = cudf::strings::detail::bytes_in_char_utf8(chr) * chr_count;
-  // compute new output size
-  auto const nbytes = m_bytes + char_bytes - (epos - spos);
-  if (nbytes > m_capacity) { reallocate(2 * nbytes); }
-
-  // move bytes -- make room for the new character(s)
-  shift_bytes(spos + char_bytes, epos, nbytes);
-
-  // copy chr chr_count times
-  auto out_ptr = m_data + spos;
-  for (auto idx = 0; idx < chr_count; ++idx) {
-    out_ptr += cudf::strings::detail::from_char_utf8(chr, out_ptr);
-  }
-
-  m_bytes         = nbytes;
-  m_data[m_bytes] = '\0';
-  return *this;
+  auto d_str = udf_string(chr_count, chr);
+  return replace(pos, count, d_str);
 }
 
 __device__ udf_string& udf_string::erase(cudf::size_type pos, cudf::size_type count)

From 7849307c86aa5369a873e724808f372a634a6849 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 1 Nov 2022 12:02:53 -0400
Subject: [PATCH 27/54] expand spos/epos var names

---
 .../include/cudf/strings/udf/udf_string.cuh   | 42 +++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
index 2bd8ceb9bba..5c9a02a9510 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
@@ -222,10 +222,10 @@ __device__ inline cudf::size_type udf_string::byte_offset(cudf::size_type pos) c
 {
   cudf::size_type offset = 0;
 
-  auto sptr = m_data;
-  auto eptr = sptr + m_bytes;
-  while ((pos > 0) && (sptr < eptr)) {
-    auto const byte       = static_cast<uint8_t>(*sptr++);
+  auto start = m_data;
+  auto end   = start + m_bytes;
+  while ((pos > 0) && (start < end)) {
+    auto const byte       = static_cast<uint8_t>(*start++);
     auto const char_bytes = cudf::strings::detail::bytes_in_utf8_byte(byte);
     if (char_bytes) { --pos; }
     offset += char_bytes;
@@ -366,21 +366,21 @@ __device__ inline udf_string& udf_string::insert(cudf::size_type pos,
 __device__ inline udf_string udf_string::substr(cudf::size_type pos, cudf::size_type count) const
 {
   if (pos < 0) { return udf_string{"", 0}; }
-  auto const spos = byte_offset(pos);
-  if (spos >= m_bytes) { return udf_string{"", 0}; }
-  auto const epos = count < 0 ? m_bytes : std::min(byte_offset(pos + count), m_bytes);
-  return udf_string{data() + spos, epos - spos};
+  auto const start_pos = byte_offset(pos);
+  if (start_pos >= m_bytes) { return udf_string{"", 0}; }
+  auto const end_pos = count < 0 ? m_bytes : std::min(byte_offset(pos + count), m_bytes);
+  return udf_string{data() + start_pos, end_pos - start_pos};
 }
 
 // utility for replace()
-__device__ void udf_string::shift_bytes(cudf::size_type spos,
-                                        cudf::size_type epos,
+__device__ void udf_string::shift_bytes(cudf::size_type start_pos,
+                                        cudf::size_type end_pos,
                                         cudf::size_type nbytes)
 {
   if (nbytes < m_bytes) {
     // shift bytes to the left [...wxyz] -> [wxyzxyz]
-    auto src = epos;
-    auto tgt = spos;
+    auto src = end_pos;
+    auto tgt = start_pos;
     while (tgt < nbytes) {
       m_data[tgt++] = m_data[src++];
     }
@@ -388,7 +388,7 @@ __device__ void udf_string::shift_bytes(cudf::size_type spos,
     // shift bytes to the right [abcd...] -> [abcabcd]
     auto src = m_bytes;
     auto tgt = nbytes;
-    while (src > epos) {
+    while (src > end_pos) {
       m_data[--tgt] = m_data[--src];
     }
   }
@@ -400,19 +400,19 @@ __device__ inline udf_string& udf_string::replace(cudf::size_type pos,
                                                   cudf::size_type in_bytes)
 {
   if (pos < 0 || in_bytes < 0) { return *this; }
-  auto const spos = byte_offset(pos);
-  if (spos > m_bytes) { return *this; }
-  auto const epos = count < 0 ? m_bytes : std::min(byte_offset(pos + count), m_bytes);
+  auto const start_pos = byte_offset(pos);
+  if (start_pos > m_bytes) { return *this; }
+  auto const end_pos = count < 0 ? m_bytes : std::min(byte_offset(pos + count), m_bytes);
 
   // compute new size
-  auto const nbytes = m_bytes + in_bytes - (epos - spos);
+  auto const nbytes = m_bytes + in_bytes - (end_pos - start_pos);
   if (nbytes > m_capacity) { reallocate(2 * nbytes); }
 
   // move bytes -- make room for replacement
-  shift_bytes(spos + in_bytes, epos, nbytes);
+  shift_bytes(start_pos + in_bytes, end_pos, nbytes);
 
   // insert the replacement
-  memcpy(m_data + spos, str, in_bytes);
+  memcpy(m_data + start_pos, str, in_bytes);
 
   m_bytes         = nbytes;
   m_data[m_bytes] = '\0';
@@ -447,9 +447,9 @@ __device__ udf_string& udf_string::erase(cudf::size_type pos, cudf::size_type co
   return replace(pos, count, nullptr, 0);
 }
 
-__device__ inline cudf::size_type udf_string::char_offset(cudf::size_type bytepos) const
+__device__ inline cudf::size_type udf_string::char_offset(cudf::size_type byte_pos) const
 {
-  return cudf::strings::detail::characters_in_string(data(), bytepos);
+  return cudf::strings::detail::characters_in_string(data(), byte_pos);
 }
 
 }  // namespace udf

From cadcf79e079b938dcc0be755c1308ed7a147f1c5 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Tue, 1 Nov 2022 12:03:31 -0400
Subject: [PATCH 28/54] add more doc to replace() for count parm

---
 .../include/cudf/strings/udf/udf_string.hpp    | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
index 728bb0b5adf..2bbda357cee 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
@@ -452,6 +452,9 @@ class udf_string {
    * Replaces characters in range `[pos, pos + count]` with `str`.
    * There is no effect if `pos < 0 or pos > length()`.
    *
+   * If `count==0` then `str` is inserted starting at `pos`.
+   * If `count==npos` then the replacement range is `[pos,length())`.
+   *
    * @param pos Position of first character to replace
    * @param count Number of characters to replace
    * @param str String to replace the given range
@@ -465,6 +468,9 @@ class udf_string {
    * Replaces characters in range `[pos, pos + count)` with `data`.
    * There is no effect if `pos < 0 or pos > length()`.
    *
+   * If `count==0` then `data` is inserted starting at `pos`.
+   * If `count==npos` then the replacement range is `[pos,length())`.
+   *
    * @param pos Position of first character to replace
    * @param count Number of characters to replace
    * @param data Null-terminated character array to replace the given range
@@ -478,6 +484,9 @@ class udf_string {
    * Replaces characters in range `[pos, pos + count)` with `[data, data + bytes)`.
    * There is no effect if `pos < 0 or pos > length()`.
    *
+   * If `count==0` then `data` is inserted starting at `pos`.
+   * If `count==npos` then the replacement range is `[pos,length())`.
+   *
    * @param pos Position of first character to replace
    * @param count Number of characters to replace
    * @param data String to replace the given range
@@ -495,6 +504,9 @@ class udf_string {
    * Replaces characters in range `[pos, pos + count)` with `chr` `chr_count` times.
    * There is no effect if `pos < 0 or pos > length()`.
    *
+   * If `count==0` then `chr` is inserted starting at `pos`.
+   * If `count==npos` then the replacement range is `[pos,length())`.
+   *
    * @param pos Position of first character to replace
    * @param count Number of characters to replace
    * @param chr_count Number of times `chr` will repeated
@@ -527,8 +539,10 @@ class udf_string {
   __device__ char* allocate(cudf::size_type bytes);
   __device__ void deallocate(char* data);
   __device__ void reallocate(cudf::size_type bytes);
-  __device__ cudf::size_type char_offset(cudf::size_type bytepos) const;
-  __device__ void shift_bytes(cudf::size_type spos, cudf::size_type epos, cudf::size_type nbytes);
+  __device__ cudf::size_type char_offset(cudf::size_type byte_pos) const;
+  __device__ void shift_bytes(cudf::size_type start_pos,
+                              cudf::size_type end_pos,
+                              cudf::size_type nbytes);
 };
 
 }  // namespace udf

From 1e02c262eacf68d0a25b96dbcf6c72d3cbe50b20 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 1 Nov 2022 12:16:22 -0700
Subject: [PATCH 29/54] adjust for changes

---
 .../strings_udf/_lib/cpp/strings_udf.pxd           |  7 +++++++
 .../strings_udf/strings_udf/_lib/cudf_jit_udf.pyx  | 14 ++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
index ee145e4023b..648fab0abe7 100644
--- a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
+++ b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
@@ -11,12 +11,19 @@ from cudf._lib.cpp.types cimport size_type
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
 
+cdef extern from "cudf/strings/udf/udf_string.hpp" namespace \
+        "cudf::strings::udf" nogil:
+    cdef cppclass udf_string
+
 cdef extern from "cudf/strings/udf/udf_apis.hpp"  namespace \
         "cudf::strings::udf" nogil:
     cdef unique_ptr[device_buffer] to_string_view_array(column_view) except +
     cdef unique_ptr[column] column_from_udf_string_array(
         void*, size_t
     ) except +
+    cdef void free_udf_string_array(
+        udf_string* d_strings, size_type size
+    ) except +
 
 cdef extern from "cudf/strings/detail/char_tables.hpp" namespace \
         "cudf::strings::detail" nogil:
diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
index 19fbf51bd1c..c1c99225133 100644
--- a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
+++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
@@ -6,12 +6,14 @@ from libcpp.utility cimport move
 from cudf.core.buffer import Buffer
 
 from cudf._lib.column cimport Column
-from cudf._lib.cpp.column.column cimport column_view
+from cudf._lib.cpp.column.column cimport column, column_view
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
 from strings_udf._lib.cpp.strings_udf cimport (
     column_from_udf_string_array as cpp_column_from_udf_string_array,
+    free_udf_string_array as cpp_free_udf_string_array,
     to_string_view_array as cpp_to_string_view_array,
+    udf_string,
 )
 
 
@@ -26,12 +28,16 @@ def to_string_view_array(Column strings_col):
 
 
 def from_udf_string_array(DeviceBuffer d_buffer):
-    cdef size_t size = d_buffer.c_size()
-    cdef void* data = d_buffer.c_data()
+    cdef size_t size = d_buffer.c_size() // 16
+    cdef udf_string* data = <udf_string*>d_buffer.c_data()
     cdef unique_ptr[column] c_result
     # data = <void *>
 
     with nogil:
         c_result = move(cpp_column_from_udf_string_array(data, size))
 
-    return Column.from_unique_ptr(move(c_result))
+    result = Column.from_unique_ptr(move(c_result))
+    with nogil:
+        cpp_free_udf_string_array(data, size)
+
+    return result

From 1218c08b96a63e98287ad36d3f45970dee2749e6 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 2 Nov 2022 08:06:37 -0700
Subject: [PATCH 30/54] fix up cython

---
 python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd | 2 +-
 python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx    | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
index 648fab0abe7..4835325dbd7 100644
--- a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
+++ b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
@@ -19,7 +19,7 @@ cdef extern from "cudf/strings/udf/udf_apis.hpp"  namespace \
         "cudf::strings::udf" nogil:
     cdef unique_ptr[device_buffer] to_string_view_array(column_view) except +
     cdef unique_ptr[column] column_from_udf_string_array(
-        void*, size_t
+        udf_string* d_strings, size_type size,
     ) except +
     cdef void free_udf_string_array(
         udf_string* d_strings, size_type size
diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
index c1c99225133..186d30984bb 100644
--- a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
+++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
@@ -28,10 +28,9 @@ def to_string_view_array(Column strings_col):
 
 
 def from_udf_string_array(DeviceBuffer d_buffer):
-    cdef size_t size = d_buffer.c_size() // 16
+    cdef size_t size = int(d_buffer.c_size() / sizeof(udf_string))
     cdef udf_string* data = <udf_string*>d_buffer.c_data()
     cdef unique_ptr[column] c_result
-    # data = <void *>
 
     with nogil:
         c_result = move(cpp_column_from_udf_string_array(data, size))

From e864deae2e7933dc796cbf910a6caae1da11b028 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 3 Nov 2022 08:42:50 -0700
Subject: [PATCH 31/54] from_udf_string_array -> column_from_udf_string_array,
 to_string_view_array->column_to_string_view_array

---
 python/cudf/cudf/core/udf/__init__.py                    | 8 ++++----
 python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx     | 4 ++--
 python/strings_udf/strings_udf/tests/test_string_udfs.py | 8 ++++----
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index 194868659a7..a5aa9bb89da 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -36,8 +36,8 @@
         utils.ptx_files.append(ptxpath)
 
         from strings_udf._lib.cudf_jit_udf import (
-            from_udf_string_array,
-            to_string_view_array,
+            column_from_udf_string_array,
+            column_to_string_view_array,
         )
         from strings_udf._typing import (
             str_view_arg_handler,
@@ -53,8 +53,8 @@
         )
 
         _supported_masked_types |= {string_view}
-        utils.launch_arg_getters[cudf_str_dtype] = to_string_view_array
-        utils.output_col_getters[cudf_str_dtype] = from_udf_string_array
+        utils.launch_arg_getters[cudf_str_dtype] = column_to_string_view_array
+        utils.output_col_getters[cudf_str_dtype] = column_from_udf_string_array
         utils.masked_array_types[cudf_str_dtype] = string_view
         row_function.itemsizes[cudf_str_dtype] = string_view.size_bytes
 
diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
index 8b628dd5229..32d4d6cb073 100644
--- a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
+++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
@@ -17,7 +17,7 @@ from strings_udf._lib.cpp.strings_udf cimport (
 )
 
 
-def to_string_view_array(Column strings_col):
+def column_to_string_view_array(Column strings_col):
     cdef unique_ptr[device_buffer] c_buffer
     cdef column_view input_view = strings_col.view()
     with nogil:
@@ -27,7 +27,7 @@ def to_string_view_array(Column strings_col):
     return as_buffer(device_buffer)
 
 
-def from_udf_string_array(DeviceBuffer d_buffer):
+def column_from_udf_string_array(DeviceBuffer d_buffer):
     cdef size_t size = int(d_buffer.c_size() / sizeof(udf_string))
     cdef udf_string* data = <udf_string*>d_buffer.c_data()
     cdef unique_ptr[column] c_result
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index b5d44229e00..f29f6345045 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -14,8 +14,8 @@
 
 import strings_udf
 from strings_udf._lib.cudf_jit_udf import (
-    from_udf_string_array,
-    to_string_view_array,
+    column_from_udf_string_array,
+    column_to_string_view_array,
 )
 from strings_udf._typing import str_view_arg_handler, string_view, udf_string
 
@@ -65,13 +65,13 @@ def run_udf_test(data, func, dtype):
         output_ary = cudf.core.column.column_empty(len(data), dtype=dtype)
 
     cudf_column = cudf.core.column.as_column(data)
-    str_view_ary = to_string_view_array(cudf_column)
+    str_view_ary = column_to_string_view_array(cudf_column)
 
     kernel = get_kernel(func, dtype, len(data))
     kernel.forall(len(data))(str_view_ary, output_ary)
 
     if dtype == "str":
-        output_ary = from_udf_string_array(output_ary)
+        output_ary = column_from_udf_string_array(output_ary)
 
     got = cudf.Series(output_ary, dtype=dtype)
     expect = pd.Series(data).apply(func)

From 9fccc9b31ece3409d48ea49ca6ad7912a100ff4d Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 3 Nov 2022 12:25:23 -0700
Subject: [PATCH 32/54] refactor

---
 python/cudf/cudf/core/udf/__init__.py     |  6 ++---
 python/cudf/cudf/core/udf/utils.py        | 28 ++++++++++++++---------
 python/strings_udf/strings_udf/_typing.py | 15 ++++++++++++
 3 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index a5aa9bb89da..e5451fa7fa1 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -51,17 +51,15 @@
         cuda_lower(api.Masked, string_view, types.boolean)(
             masked_lowering.masked_constructor
         )
-
+        utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
         _supported_masked_types |= {string_view}
+
         utils.launch_arg_getters[cudf_str_dtype] = column_to_string_view_array
         utils.output_col_getters[cudf_str_dtype] = column_from_udf_string_array
         utils.masked_array_types[cudf_str_dtype] = string_view
         row_function.itemsizes[cudf_str_dtype] = string_view.size_bytes
 
-        utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
-
         utils.arg_handlers.append(str_view_arg_handler)
-        utils.udf_return_type_map[string_view] = udf_string
         _STRING_UDFS_ENABLED = True
 
 except ImportError as e:
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index b4912882b56..f7ec0fd5ce0 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -4,7 +4,6 @@
 
 import cachetools
 import cupy as cp
-import numba
 import numpy as np
 from numba import cuda, typeof
 from numba.core.errors import TypingError
@@ -34,7 +33,6 @@
 precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32)
 arg_handlers: List[Any] = []
 ptx_files: List[Any] = []
-udf_return_type_map: Dict[Any, Any] = {}
 masked_array_types: Dict[Any, Any] = {}
 launch_arg_getters: Dict[Any, Any] = {}
 output_col_getters: Dict[Any, Any] = {}
@@ -72,7 +70,7 @@ def _get_udf_return_type(argty, func: Callable, args=()):
         if not isinstance(numba_output_type, MaskedType)
         else numba_output_type.value_type
     )
-    result = udf_return_type_map.get(result, result)
+    result = result if result.is_internal else result.return_type
 
     # _get_udf_return_type will throw a TypingError if the user tries to use
     # a field in the row containing an unsupported dtype, except in the
@@ -148,7 +146,7 @@ def _construct_signature(frame, return_type, args):
     actually JIT the kernel itself later, accounting for types
     and offsets. Skips columns with unsupported dtypes.
     """
-    if return_type in udf_return_type_map.values():
+    if not return_type.is_internal:
         return_type = CPointer(return_type)
     else:
         return_type = return_type[::1]
@@ -222,11 +220,19 @@ def _compile_or_get(frame, func, args, kernel_getter=None):
     # could be a MaskedType or a scalar type.
 
     kernel, scalar_return_type = kernel_getter(frame, func, args)
-    try:
-        np_return_type = numpy_support.as_dtype(scalar_return_type)
-    except numba.core.errors.NumbaNotImplementedError:
-        # TODO: fix
-        np_return_type = np.dtype("object")
+    np_return_type = (
+        numpy_support.as_dtype(scalar_return_type)
+        if scalar_return_type.is_internal
+        else scalar_return_type.np_dtype
+    )
+
+    # try:
+    #    np_return_type = numpy_support.as_dtype(scalar_return_type)
+    # except numba.core.errors.NumbaNotImplementedError:
+    #    # TODO: fix
+    #    np_return_type = np.dtype("object")
+    # np_return_type = numpy_support.as_dtype(scalar_return_type)
+
     precompiled[cache_key] = (kernel, np_return_type)
 
     return kernel, np_return_type
@@ -264,9 +270,9 @@ def _get_input_args_from_frame(fr):
 
 
 def _return_arr_from_dtype(dt, size):
-    extensionty = udf_return_type_map.get(masked_array_types.get(dt))
+    extensionty = masked_array_types.get(dt)
     if extensionty:
-        return rmm.DeviceBuffer(size=size * extensionty.size_bytes)
+        return rmm.DeviceBuffer(size=size * extensionty.return_type.size_bytes)
     return cp.empty(size, dtype=dt)
 
 
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index 63d2cdf69aa..320958960cd 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -3,6 +3,7 @@
 import operator
 
 import llvmlite.binding as ll
+import numpy as np
 from numba import types
 from numba.core.datamodel import default_manager
 from numba.core.extending import models, register_model
@@ -24,18 +25,32 @@
 
 # String object definitions
 class UDFString(types.Type):
+
+    np_dtype = np.dtype("object")
+
     def __init__(self):
         super().__init__(name="udf_string")
         llty = default_manager[self].get_value_type()
         self.size_bytes = llty.get_abi_size(target_data)
 
+    @property
+    def return_type(self):
+        return self
+
 
 class StringView(types.Type):
+
+    np_dtype = np.dtype("object")
+
     def __init__(self):
         super().__init__(name="string_view")
         llty = default_manager[self].get_value_type()
         self.size_bytes = llty.get_abi_size(target_data)
 
+    @property
+    def return_type(self):
+        return UDFString()
+
 
 @register_model(StringView)
 class stringview_model(models.StructModel):

From d5c37a8c16477404e1d838fbc4764432503f5148 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 3 Nov 2022 12:27:33 -0700
Subject: [PATCH 33/54] prune imports

---
 python/cudf/cudf/core/udf/__init__.py | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index e5451fa7fa1..926d2ea6cbf 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -1,15 +1,8 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
-import cupy as cp
-import numpy as np
-from numba import cuda, types
-from numba.cuda.cudaimpl import (
-    lower as cuda_lower,
-    registry as cuda_lowering_registry,
-)
 
-import rmm
+from numba import types
+from numba.cuda.cudaimpl import lower as cuda_lower
 
-from cudf.core.column import as_column
 from cudf.core.dtypes import dtype
 from cudf.core.udf import api, row_function, utils
 from cudf.utils.dtypes import STRING_TYPES
@@ -39,11 +32,7 @@
             column_from_udf_string_array,
             column_to_string_view_array,
         )
-        from strings_udf._typing import (
-            str_view_arg_handler,
-            string_view,
-            udf_string,
-        )
+        from strings_udf._typing import str_view_arg_handler, string_view
 
         from . import strings_typing  # isort: skip
         from . import strings_lowering  # isort: skip

From b7c1b1d0cf381010db8043272baf3fa861bdb3f1 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 3 Nov 2022 12:29:42 -0700
Subject: [PATCH 34/54] cleanup

---
 python/cudf/cudf/core/udf/utils.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index f7ec0fd5ce0..750ab0bded0 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -226,13 +226,6 @@ def _compile_or_get(frame, func, args, kernel_getter=None):
         else scalar_return_type.np_dtype
     )
 
-    # try:
-    #    np_return_type = numpy_support.as_dtype(scalar_return_type)
-    # except numba.core.errors.NumbaNotImplementedError:
-    #    # TODO: fix
-    #    np_return_type = np.dtype("object")
-    # np_return_type = numpy_support.as_dtype(scalar_return_type)
-
     precompiled[cache_key] = (kernel, np_return_type)
 
     return kernel, np_return_type

From 267b90497570ab15afbc42ab58ac4fd0a5ab0645 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 4 Nov 2022 07:03:26 -0700
Subject: [PATCH 35/54] begin to address reviews

---
 python/strings_udf/strings_udf/lowering.py           |  8 +-------
 .../strings_udf/tests/test_string_udfs.py            | 12 ++++++------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index f7e67129ebe..909b0e56187 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -18,17 +18,11 @@
 
 character_flags_table_ptr = get_character_flags_table_ptr()
 
-
-# read-only functions
-# We will provide only one overload for this set of functions, which will
-# expect a string_view. When a literal is encountered, numba will promote it to
-# a string_view whereas when a dstring is encountered, numba will convert it to
-# a view via its native view() method.
-
 _STR_VIEW_PTR = types.CPointer(string_view)
 
 
 # CUDA function declarations
+# read-only (input is a string_view, output is a fixed with type)
 _string_view_len = cuda.declare_device("len", size_type(_STR_VIEW_PTR))
 
 
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index f29f6345045..ca3fbda4eb1 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -59,21 +59,21 @@ def run_udf_test(data, func, dtype):
     comparing it with the equivalent pandas result
     """
     if dtype == "str":
-        output_ary = rmm.DeviceBuffer(size=len(data) * udf_string.size_bytes)
+        output = rmm.DeviceBuffer(size=len(data) * udf_string.size_bytes)
     else:
         dtype = np.dtype(dtype)
-        output_ary = cudf.core.column.column_empty(len(data), dtype=dtype)
+        output = cudf.core.column.column_empty(len(data), dtype=dtype)
 
     cudf_column = cudf.core.column.as_column(data)
-    str_view_ary = column_to_string_view_array(cudf_column)
+    str_views = column_to_string_view_array(cudf_column)
 
     kernel = get_kernel(func, dtype, len(data))
-    kernel.forall(len(data))(str_view_ary, output_ary)
+    kernel.forall(len(data))(str_views, output)
 
     if dtype == "str":
-        output_ary = column_from_udf_string_array(output_ary)
+        output = column_from_udf_string_array(output)
 
-    got = cudf.Series(output_ary, dtype=dtype)
+    got = cudf.Series(output, dtype=dtype)
     expect = pd.Series(data).apply(func)
     assert_eq(expect, got, check_dtype=False)
 

From 8b7a412e469c444642cad3d4f990e386ce3bfa7d Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Fri, 4 Nov 2022 09:04:09 -0500
Subject: [PATCH 36/54] Update
 python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx

Co-authored-by: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
---
 python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
index 32d4d6cb073..4fc9e473fa3 100644
--- a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
+++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
@@ -34,9 +34,8 @@ def column_from_udf_string_array(DeviceBuffer d_buffer):
 
     with nogil:
         c_result = move(cpp_column_from_udf_string_array(data, size))
+        cpp_free_udf_string_array(data, size)
 
     result = Column.from_unique_ptr(move(c_result))
-    with nogil:
-        cpp_free_udf_string_array(data, size)
 
     return result

From 4f821ca89e07af6ccdb7deedd8876634ead58910 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Fri, 4 Nov 2022 07:25:06 -0700
Subject: [PATCH 37/54] finish addressing reviews, walrus everywhere!

---
 python/cudf/cudf/core/udf/utils.py                      | 6 ++----
 python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd | 4 ++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index 750ab0bded0..4d40d41f9c3 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -263,14 +263,12 @@ def _get_input_args_from_frame(fr):
 
 
 def _return_arr_from_dtype(dt, size):
-    extensionty = masked_array_types.get(dt)
-    if extensionty:
+    if extensionty := masked_array_types.get(dt):
         return rmm.DeviceBuffer(size=size * extensionty.return_type.size_bytes)
     return cp.empty(size, dtype=dt)
 
 
 def _post_process_output_col(col, retty):
-    getter = output_col_getters.get(retty)
-    if getter:
+    if getter := output_col_getters.get(retty):
         col = getter(col)
     return as_column(col, retty)
diff --git a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
index 4835325dbd7..7b90760abcc 100644
--- a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
+++ b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
@@ -19,10 +19,10 @@ cdef extern from "cudf/strings/udf/udf_apis.hpp"  namespace \
         "cudf::strings::udf" nogil:
     cdef unique_ptr[device_buffer] to_string_view_array(column_view) except +
     cdef unique_ptr[column] column_from_udf_string_array(
-        udf_string* d_strings, size_type size,
+        udf_string* strings, size_type size,
     ) except +
     cdef void free_udf_string_array(
-        udf_string* d_strings, size_type size
+        udf_string* strings, size_type size
     ) except +
 
 cdef extern from "cudf/strings/detail/char_tables.hpp" namespace \

From b0a8681334d283b02ecfcdecd8fb6670e8ee8a53 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 7 Nov 2022 11:50:24 -0800
Subject: [PATCH 38/54] support strip

---
 .../strings_udf/cpp/src/strings/udf/shim.cu   | 15 +++++++
 python/strings_udf/strings_udf/_typing.py     | 19 +++++++--
 python/strings_udf/strings_udf/lowering.py    | 40 ++++++++++++++++++-
 .../strings_udf/tests/test_string_udfs.py     |  8 ++++
 4 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index b284d58fe58..8c19a59d45c 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -17,6 +17,7 @@
 #include <cudf/strings/udf/char_types.cuh>
 #include <cudf/strings/udf/search.cuh>
 #include <cudf/strings/udf/starts_with.cuh>
+#include <cudf/strings/udf/strip.cuh>
 #include <cudf/strings/udf/udf_string.cuh>
 
 using namespace cudf::strings::udf;
@@ -227,3 +228,17 @@ extern "C" __device__ int udf_string_from_string_view(int* nb_retbal,
 
   return 0;
 }
+
+extern "C" __device__ int strip(int* nb_retval,
+                                void* udf_str,
+                                void* const* to_strip,
+                                void* const* strip_str)
+{
+  auto to_strip_ptr  = reinterpret_cast<cudf::string_view const*>(to_strip);
+  auto strip_str_ptr = reinterpret_cast<cudf::string_view const*>(strip_str);
+  auto udf_str_ptr   = reinterpret_cast<udf_string*>(udf_str);
+
+  *udf_str_ptr = strip(*to_strip_ptr, *strip_str_ptr);
+
+  return 0;
+}
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index 320958960cd..e70ab2c8a63 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -181,7 +181,7 @@ def attr(self, mod):
     return attr
 
 
-def create_identifier_attr(attrname):
+def create_identifier_attr(attrname, retty):
     """
     Helper function wrapping numba's low level extension API. Provides
     the boilerplate needed to register a unary function of a string
@@ -192,7 +192,7 @@ class StringViewIdentifierAttr(AbstractTemplate):
         key = f"StringView.{attrname}"
 
         def generic(self, args, kws):
-            return nb_signature(types.boolean, recvr=self.this)
+            return nb_signature(retty, recvr=self.this)
 
     def attr(self, mod):
         return types.BoundFunction(StringViewIdentifierAttr, string_view)
@@ -229,6 +229,7 @@ def resolve_count(self, mod):
     "isnumeric",
     "istitle",
 ]
+string_binary_funcs = ["strip"]
 
 for func in bool_binary_funcs:
     setattr(
@@ -237,12 +238,24 @@ def resolve_count(self, mod):
         create_binary_attr(func, types.boolean),
     )
 
+for func in string_binary_funcs:
+    setattr(
+        StringViewAttrs,
+        f"resolve_{func}",
+        create_binary_attr(func, udf_string),
+    )
+
+
 for func in int_binary_funcs:
     setattr(
         StringViewAttrs, f"resolve_{func}", create_binary_attr(func, size_type)
     )
 
 for func in id_unary_funcs:
-    setattr(StringViewAttrs, f"resolve_{func}", create_identifier_attr(func))
+    setattr(
+        StringViewAttrs,
+        f"resolve_{func}",
+        create_identifier_attr(func, types.boolean),
+    )
 
 cuda_decl_registry.register_attr(StringViewAttrs)
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index 909b0e56187..26c019e9b1b 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -19,6 +19,7 @@
 character_flags_table_ptr = get_character_flags_table_ptr()
 
 _STR_VIEW_PTR = types.CPointer(string_view)
+_UDF_STRING_PTR = types.CPointer(udf_string)
 
 
 # CUDA function declarations
@@ -55,7 +56,9 @@ def _declare_binary_func(lhs, rhs, out, name):
 _string_view_find = _declare_size_type_str_str_func("find")
 _string_view_rfind = _declare_size_type_str_str_func("rfind")
 _string_view_contains = _declare_bool_str_str_func("contains")
-
+_string_view_strip = cuda.declare_device(
+    "strip", types.int32(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR)
+)
 
 # A binary function of the form f(string, int) -> bool
 _declare_bool_str_int_func = partial(
@@ -179,6 +182,36 @@ def binary_func_impl(context, builder, sig, args):
     return deco
 
 
+def create_binary_string_func_return_string(binary_func):
+    def deco(cuda_func):
+        @cuda_lower(binary_func, string_view, string_view)
+        def binary_func_return_string_impl(context, builder, sig, args):
+            lhs_ptr = builder.alloca(args[0].type)
+            rhs_ptr = builder.alloca(args[1].type)
+            builder.store(args[0], lhs_ptr)
+            builder.store(args[1], rhs_ptr)
+
+            udf_str_ptr = builder.alloca(
+                default_manager[udf_string].get_value_type()
+            )
+
+            _ = context.compile_internal(
+                builder,
+                cuda_func,
+                size_type(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR),
+                (udf_str_ptr, lhs_ptr, rhs_ptr),
+            )
+
+            result = cgutils.create_struct_proxy(udf_string)(
+                context, builder, value=builder.load(udf_str_ptr)
+            )
+            return result._getvalue()
+
+        return binary_func_return_string_impl
+
+    return deco
+
+
 @create_binary_string_func(operator.contains, types.boolean)
 def contains_impl(st, substr):
     return _string_view_contains(st, substr)
@@ -214,6 +247,11 @@ def lt_impl(st, rhs):
     return _string_view_lt(st, rhs)
 
 
+@create_binary_string_func_return_string("StringView.strip")
+def strip_impl(result, to_strip, strip_char):
+    return _string_view_strip(result, to_strip, strip_char)
+
+
 @create_binary_string_func("StringView.startswith", types.boolean)
 def startswith_impl(sv, substr):
     return _string_view_startswith(sv, substr)
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index ca3fbda4eb1..025e98b3b86 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -278,3 +278,11 @@ def func(st):
         return st
 
     run_udf_test(data, func, "str")
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_strip(data, strip_char):
+    def func(st):
+        return st.strip(strip_char)
+
+    run_udf_test(data, func, "str")

From 18aee5a574a8abd85d048d8bc3580aac9908146f Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 8 Nov 2022 06:46:50 -0800
Subject: [PATCH 39/54] updates

---
 .../strings_udf/cpp/src/strings/udf/shim.cu   | 28 +++++++++++++++++++
 python/strings_udf/strings_udf/_typing.py     |  2 +-
 python/strings_udf/strings_udf/lowering.py    | 16 +++++++++++
 .../strings_udf/tests/test_string_udfs.py     | 16 +++++++++++
 4 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 8c19a59d45c..63e740c5226 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -242,3 +242,31 @@ extern "C" __device__ int strip(int* nb_retval,
 
   return 0;
 }
+
+extern "C" __device__ int lstrip(int* nb_retval,
+                                 void* udf_str,
+                                 void* const* to_strip,
+                                 void* const* strip_str)
+{
+  auto to_strip_ptr  = reinterpret_cast<cudf::string_view const*>(to_strip);
+  auto strip_str_ptr = reinterpret_cast<cudf::string_view const*>(strip_str);
+  auto udf_str_ptr   = reinterpret_cast<udf_string*>(udf_str);
+
+  *udf_str_ptr = strip(*to_strip_ptr, *strip_str_ptr, cudf::strings::side_type::LEFT);
+
+  return 0;
+}
+
+extern "C" __device__ int rstrip(int* nb_retval,
+                                 void* udf_str,
+                                 void* const* to_strip,
+                                 void* const* strip_str)
+{
+  auto to_strip_ptr  = reinterpret_cast<cudf::string_view const*>(to_strip);
+  auto strip_str_ptr = reinterpret_cast<cudf::string_view const*>(strip_str);
+  auto udf_str_ptr   = reinterpret_cast<udf_string*>(udf_str);
+
+  *udf_str_ptr = strip(*to_strip_ptr, *strip_str_ptr, cudf::strings::side_type::RIGHT);
+
+  return 0;
+}
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index e70ab2c8a63..5da73413cef 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -229,7 +229,7 @@ def resolve_count(self, mod):
     "isnumeric",
     "istitle",
 ]
-string_binary_funcs = ["strip"]
+string_binary_funcs = ["strip", "lstrip", "rstrip"]
 
 for func in bool_binary_funcs:
     setattr(
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index 26c019e9b1b..2b445094861 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -59,6 +59,12 @@ def _declare_binary_func(lhs, rhs, out, name):
 _string_view_strip = cuda.declare_device(
     "strip", types.int32(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR)
 )
+_string_view_lstrip = cuda.declare_device(
+    "strip", types.int32(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR)
+)
+_string_view_rstrip = cuda.declare_device(
+    "strip", types.int32(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR)
+)
 
 # A binary function of the form f(string, int) -> bool
 _declare_bool_str_int_func = partial(
@@ -252,6 +258,16 @@ def strip_impl(result, to_strip, strip_char):
     return _string_view_strip(result, to_strip, strip_char)
 
 
+@create_binary_string_func_return_string("StringView.lstrip")
+def lstrip_impl(result, to_strip, strip_char):
+    return _string_view_lstrip(result, to_strip, strip_char)
+
+
+@create_binary_string_func_return_string("StringView.rstrip")
+def rstrip_impl(result, to_strip, strip_char):
+    return _string_view_rstrip(result, to_strip, strip_char)
+
+
 @create_binary_string_func("StringView.startswith", types.boolean)
 def startswith_impl(sv, substr):
     return _string_view_startswith(sv, substr)
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index 025e98b3b86..522433d404f 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -286,3 +286,19 @@ def func(st):
         return st.strip(strip_char)
 
     run_udf_test(data, func, "str")
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_lstrip(data, strip_char):
+    def func(st):
+        return st.lstrip(strip_char)
+
+    run_udf_test(data, func, "str")
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_rstrip(data, strip_char):
+    def func(st):
+        return st.rstrip(strip_char)
+
+    run_udf_test(data, func, "str")

From d7556b078fa337a19b78e771201c07947b67d5d7 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 8 Nov 2022 07:03:05 -0800
Subject: [PATCH 40/54] fix bad merge

---
 python/strings_udf/cpp/src/strings/udf/shim.cu | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index ed254b8e4e1..63e740c5226 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -17,10 +17,7 @@
 #include <cudf/strings/udf/char_types.cuh>
 #include <cudf/strings/udf/search.cuh>
 #include <cudf/strings/udf/starts_with.cuh>
-<<<<<<< HEAD
 #include <cudf/strings/udf/strip.cuh>
-=======
->>>>>>> branch-22.12
 #include <cudf/strings/udf/udf_string.cuh>
 
 using namespace cudf::strings::udf;

From c4f884716b379f2e4c68a7159c81de06944cb59f Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 8 Nov 2022 07:04:14 -0800
Subject: [PATCH 41/54] add tests to cudf

---
 python/cudf/cudf/tests/test_udf_masked_ops.py | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index b4c7cef3a4c..2d9951982f9 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -876,6 +876,30 @@ def func(row):
     run_masked_udf_test(func, str_udf_data, check_dtype=False)
 
 
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_strip(str_udf_data, strip_char):
+    def func(row):
+        return row["str_col"].strip(strip_char)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_lstrip(str_udf_data, strip_char):
+    def func(row):
+        return row["str_col"].lstrip(strip_char)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_rstrip(str_udf_data, strip_char):
+    def func(row):
+        return row["str_col"].rstrip(strip_char)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     "data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
 )

From 7030108082cc2c7a54d1c722dab6a04c996280cb Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 8 Nov 2022 07:31:05 -0800
Subject: [PATCH 42/54] plumb to maskedtype

---
 python/cudf/cudf/core/udf/__init__.py         | 11 +++++++++--
 python/cudf/cudf/core/udf/masked_typing.py    |  1 +
 python/cudf/cudf/core/udf/strings_lowering.py | 12 +++++++++++-
 python/cudf/cudf/core/udf/strings_typing.py   |  9 +++++++++
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index 926d2ea6cbf..8421d763167 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -32,7 +32,11 @@
             column_from_udf_string_array,
             column_to_string_view_array,
         )
-        from strings_udf._typing import str_view_arg_handler, string_view
+        from strings_udf._typing import (
+            str_view_arg_handler,
+            string_view,
+            udf_string,
+        )
 
         from . import strings_typing  # isort: skip
         from . import strings_lowering  # isort: skip
@@ -41,7 +45,7 @@
             masked_lowering.masked_constructor
         )
         utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
-        _supported_masked_types |= {string_view}
+        _supported_masked_types |= {string_view, udf_string}
 
         utils.launch_arg_getters[cudf_str_dtype] = column_to_string_view_array
         utils.output_col_getters[cudf_str_dtype] = column_from_udf_string_array
@@ -49,6 +53,9 @@
         row_function.itemsizes[cudf_str_dtype] = string_view.size_bytes
 
         utils.arg_handlers.append(str_view_arg_handler)
+
+        masked_typing.MASKED_INIT_MAP[udf_string] = udf_string
+
         _STRING_UDFS_ENABLED = True
 
 except ImportError as e:
diff --git a/python/cudf/cudf/core/udf/masked_typing.py b/python/cudf/cudf/core/udf/masked_typing.py
index 7baf2d585e2..f600e33ff64 100644
--- a/python/cudf/cudf/core/udf/masked_typing.py
+++ b/python/cudf/cudf/core/udf/masked_typing.py
@@ -70,6 +70,7 @@ def _type_to_masked_type(t):
                 "attempting to use a column of unsupported dtype in a UDF. "
                 f"Supported dtypes are:\n{supported_type_str}"
             )
+            breakpoint()
             return types.Poison(err)
     else:
         return result
diff --git a/python/cudf/cudf/core/udf/strings_lowering.py b/python/cudf/cudf/core/udf/strings_lowering.py
index 59041977f87..fdfd013bad7 100644
--- a/python/cudf/cudf/core/udf/strings_lowering.py
+++ b/python/cudf/cudf/core/udf/strings_lowering.py
@@ -7,7 +7,7 @@
 from numba.core.typing import signature as nb_signature
 from numba.cuda.cudaimpl import lower as cuda_lower
 
-from strings_udf._typing import size_type, string_view
+from strings_udf._typing import size_type, string_view, udf_string
 from strings_udf.lowering import (
     contains_impl,
     count_impl,
@@ -22,8 +22,11 @@
     istitle_impl,
     isupper_impl,
     len_impl,
+    lstrip_impl,
     rfind_impl,
+    rstrip_impl,
     startswith_impl,
+    strip_impl,
 )
 
 from cudf.core.udf.masked_typing import MaskedType
@@ -79,6 +82,13 @@ def masked_binary_func_impl(context, builder, sig, args):
     )
 
 
+create_binary_string_func("MaskedType.strip", strip_impl, udf_string)
+
+create_binary_string_func("MaskedType.lstrip", lstrip_impl, udf_string)
+
+create_binary_string_func("MaskedType.rstrip", rstrip_impl, udf_string)
+
+
 create_binary_string_func(
     "MaskedType.startswith",
     startswith_impl,
diff --git a/python/cudf/cudf/core/udf/strings_typing.py b/python/cudf/cudf/core/udf/strings_typing.py
index 1179688651f..93a45680cf1 100644
--- a/python/cudf/cudf/core/udf/strings_typing.py
+++ b/python/cudf/cudf/core/udf/strings_typing.py
@@ -13,7 +13,9 @@
     id_unary_funcs,
     int_binary_funcs,
     size_type,
+    string_binary_funcs,
     string_view,
+    udf_string,
 )
 
 from cudf.core.udf import masked_typing
@@ -172,6 +174,13 @@ def resolve_valid(self, mod):
         create_masked_binary_attr(f"MaskedType.{func}", size_type),
     )
 
+for func in string_binary_funcs:
+    setattr(
+        MaskedStringViewAttrs,
+        f"resolve_{func}",
+        create_masked_binary_attr(f"MaskedType.{func}", udf_string),
+    )
+
 for func in id_unary_funcs:
     setattr(
         MaskedStringViewAttrs,

From 11e966c4d45c0b3c1ef4b2302a1541f87b4ccc8e Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 8 Nov 2022 07:45:21 -0800
Subject: [PATCH 43/54] cleanup

---
 python/strings_udf/strings_udf/lowering.py | 75 ++++++++++------------
 1 file changed, 34 insertions(+), 41 deletions(-)

diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index 2b445094861..3dfc2b1c413 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -171,53 +171,46 @@ def deco(cuda_func):
         def binary_func_impl(context, builder, sig, args):
             lhs_ptr = builder.alloca(args[0].type)
             rhs_ptr = builder.alloca(args[1].type)
-
             builder.store(args[0], lhs_ptr)
             builder.store(args[1], rhs_ptr)
-            result = context.compile_internal(
-                builder,
-                cuda_func,
-                nb_signature(retty, _STR_VIEW_PTR, _STR_VIEW_PTR),
-                (lhs_ptr, rhs_ptr),
-            )
 
-            return result
+            # these conditional statements should compile out
+            if retty != udf_string:
+                # binary function of two strings yielding a fixed-width type
+                # example: str.startswith(other) -> bool
+                # shim functions can return the value through nb_retval
+                result = context.compile_internal(
+                    builder,
+                    cuda_func,
+                    nb_signature(retty, _STR_VIEW_PTR, _STR_VIEW_PTR),
+                    (lhs_ptr, rhs_ptr),
+                )
+                return result
+            else:
+                # binary function of two strings yielding a new string
+                # example: str.strip(other) -> str
+                # shim functions can not return a struct due to C linkage
+                # so we operate on an extra void ptr and throw away nb_retval
+                udf_str_ptr = builder.alloca(
+                    default_manager[udf_string].get_value_type()
+                )
+
+                _ = context.compile_internal(
+                    builder,
+                    cuda_func,
+                    size_type(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR),
+                    (udf_str_ptr, lhs_ptr, rhs_ptr),
+                )
+                result = cgutils.create_struct_proxy(udf_string)(
+                    context, builder, value=builder.load(udf_str_ptr)
+                )
+                return result._getvalue()
 
         return binary_func_impl
 
     return deco
 
 
-def create_binary_string_func_return_string(binary_func):
-    def deco(cuda_func):
-        @cuda_lower(binary_func, string_view, string_view)
-        def binary_func_return_string_impl(context, builder, sig, args):
-            lhs_ptr = builder.alloca(args[0].type)
-            rhs_ptr = builder.alloca(args[1].type)
-            builder.store(args[0], lhs_ptr)
-            builder.store(args[1], rhs_ptr)
-
-            udf_str_ptr = builder.alloca(
-                default_manager[udf_string].get_value_type()
-            )
-
-            _ = context.compile_internal(
-                builder,
-                cuda_func,
-                size_type(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR),
-                (udf_str_ptr, lhs_ptr, rhs_ptr),
-            )
-
-            result = cgutils.create_struct_proxy(udf_string)(
-                context, builder, value=builder.load(udf_str_ptr)
-            )
-            return result._getvalue()
-
-        return binary_func_return_string_impl
-
-    return deco
-
-
 @create_binary_string_func(operator.contains, types.boolean)
 def contains_impl(st, substr):
     return _string_view_contains(st, substr)
@@ -253,17 +246,17 @@ def lt_impl(st, rhs):
     return _string_view_lt(st, rhs)
 
 
-@create_binary_string_func_return_string("StringView.strip")
+@create_binary_string_func("StringView.strip", udf_string)
 def strip_impl(result, to_strip, strip_char):
     return _string_view_strip(result, to_strip, strip_char)
 
 
-@create_binary_string_func_return_string("StringView.lstrip")
+@create_binary_string_func("StringView.lstrip", udf_string)
 def lstrip_impl(result, to_strip, strip_char):
     return _string_view_lstrip(result, to_strip, strip_char)
 
 
-@create_binary_string_func_return_string("StringView.rstrip")
+@create_binary_string_func("StringView.rstrip", udf_string)
 def rstrip_impl(result, to_strip, strip_char):
     return _string_view_rstrip(result, to_strip, strip_char)
 

From 9991c7607994fa06d08b64d0f8a55c8d03c2f2d5 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 8 Nov 2022 07:46:07 -0800
Subject: [PATCH 44/54] more cleanup

---
 python/cudf/cudf/core/udf/masked_typing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/core/udf/masked_typing.py b/python/cudf/cudf/core/udf/masked_typing.py
index f600e33ff64..7baf2d585e2 100644
--- a/python/cudf/cudf/core/udf/masked_typing.py
+++ b/python/cudf/cudf/core/udf/masked_typing.py
@@ -70,7 +70,6 @@ def _type_to_masked_type(t):
                 "attempting to use a column of unsupported dtype in a UDF. "
                 f"Supported dtypes are:\n{supported_type_str}"
             )
-            breakpoint()
             return types.Poison(err)
     else:
         return result

From 837a49c727770066a0a06801ff69de47824be229 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 9 Nov 2022 10:27:59 -0600
Subject: [PATCH 45/54] Update python/strings_udf/strings_udf/lowering.py

Co-authored-by: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
---
 python/strings_udf/strings_udf/lowering.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index 3dfc2b1c413..49ba77ee996 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -190,7 +190,11 @@ def binary_func_impl(context, builder, sig, args):
                 # binary function of two strings yielding a new string
                 # example: str.strip(other) -> str
                 # shim functions can not return a struct due to C linkage
-                # so we operate on an extra void ptr and throw away nb_retval
+                # so we create a new udf_string and pass a pointer to it
+                # for the shim function to write the output to. The return
+                # value of compile_internal is therefore discarded (although
+                # this may change in the future if we need to return error
+                # codes, for instance).
                 udf_str_ptr = builder.alloca(
                     default_manager[udf_string].get_value_type()
                 )

From 302fe6021ceb58f31fc140402b1ac3b3b93ae3b0 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 9 Nov 2022 08:43:00 -0800
Subject: [PATCH 46/54] address reviews

---
 python/cudf/cudf/core/udf/strings_typing.py   | 4 ++--
 python/cudf/cudf/tests/test_udf_masked_ops.py | 3 +++
 python/strings_udf/strings_udf/_typing.py     | 4 ++--
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/udf/strings_typing.py b/python/cudf/cudf/core/udf/strings_typing.py
index 93a45680cf1..f8f50600b12 100644
--- a/python/cudf/cudf/core/udf/strings_typing.py
+++ b/python/cudf/cudf/core/udf/strings_typing.py
@@ -13,7 +13,7 @@
     id_unary_funcs,
     int_binary_funcs,
     size_type,
-    string_binary_funcs,
+    string_return_attrs,
     string_view,
     udf_string,
 )
@@ -174,7 +174,7 @@ def resolve_valid(self, mod):
         create_masked_binary_attr(f"MaskedType.{func}", size_type),
     )
 
-for func in string_binary_funcs:
+for func in string_return_attrs:
     setattr(
         MaskedStringViewAttrs,
         f"resolve_{func}",
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 2d9951982f9..7af47f981d6 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -876,6 +876,7 @@ def func(row):
     run_masked_udf_test(func, str_udf_data, check_dtype=False)
 
 
+@string_udf_test
 @pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
 def test_string_udf_strip(str_udf_data, strip_char):
     def func(row):
@@ -884,6 +885,7 @@ def func(row):
     run_masked_udf_test(func, str_udf_data, check_dtype=False)
 
 
+@string_udf_test
 @pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
 def test_string_udf_lstrip(str_udf_data, strip_char):
     def func(row):
@@ -892,6 +894,7 @@ def func(row):
     run_masked_udf_test(func, str_udf_data, check_dtype=False)
 
 
+@string_udf_test
 @pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
 def test_string_udf_rstrip(str_udf_data, strip_char):
     def func(row):
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index 5da73413cef..a309a9cb93c 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -229,7 +229,7 @@ def resolve_count(self, mod):
     "isnumeric",
     "istitle",
 ]
-string_binary_funcs = ["strip", "lstrip", "rstrip"]
+string_return_attrs = ["strip", "lstrip", "rstrip"]
 
 for func in bool_binary_funcs:
     setattr(
@@ -238,7 +238,7 @@ def resolve_count(self, mod):
         create_binary_attr(func, types.boolean),
     )
 
-for func in string_binary_funcs:
+for func in string_return_attrs:
     setattr(
         StringViewAttrs,
         f"resolve_{func}",

From e0f98cc9ee0dc413f755e4696eaafdf5024a89c9 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 9 Nov 2022 12:07:29 -0800
Subject: [PATCH 47/54] plumb concat to empty shim function for now

---
 python/cudf/cudf/tests/test_udf_masked_ops.py |  9 ++++++
 .../strings_udf/cpp/src/strings/udf/shim.cu   |  8 ++++++
 python/strings_udf/strings_udf/_typing.py     |  5 ++++
 python/strings_udf/strings_udf/lowering.py    | 28 +++++++++++++++++++
 .../strings_udf/tests/test_string_udfs.py     |  8 ++++++
 5 files changed, 58 insertions(+)

diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 7af47f981d6..fbe6b3f8888 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -903,6 +903,15 @@ def func(row):
     run_masked_udf_test(func, str_udf_data, check_dtype=False)
 
 
+@string_udf_test
+@pytest.mark.parametrize("concat_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_concat(str_udf_data, concat_char):
+    def func(row):
+        return row["str_col"] + concat_char
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     "data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
 )
diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 63e740c5226..cac97d454ac 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -270,3 +270,11 @@ extern "C" __device__ int rstrip(int* nb_retval,
 
   return 0;
 }
+
+extern "C" __device__ int concat(int* nb_retval,
+                                 void* const* to_strip,
+                                 void* udf_str,
+                                 void* const* strip_str)
+{
+  return 0;
+}
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index a309a9cb93c..b678db88b95 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -159,8 +159,13 @@ def generic(self, args, kws):
 register_stringview_binaryop(operator.gt, types.boolean)
 register_stringview_binaryop(operator.le, types.boolean)
 register_stringview_binaryop(operator.ge, types.boolean)
+
+# st in other
 register_stringview_binaryop(operator.contains, types.boolean)
 
+# st + other
+register_stringview_binaryop(operator.add, udf_string)
+
 
 def create_binary_attr(attrname, retty):
     """
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index 49ba77ee996..f4b227f26dc 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -25,6 +25,9 @@
 # CUDA function declarations
 # read-only (input is a string_view, output is a fixed with type)
 _string_view_len = cuda.declare_device("len", size_type(_STR_VIEW_PTR))
+_concat_string_view = cuda.declare_device(
+    "concat", types.void(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR)
+)
 
 
 def _declare_binary_func(lhs, rhs, out, name):
@@ -159,6 +162,31 @@ def len_impl(context, builder, sig, args):
     return result
 
 
+def call_concat_string_view(result, lhs, rhs):
+    return _concat_string_view(result, lhs, rhs)
+
+
+@cuda_lower(operator.add, string_view, string_view)
+def concat_impl(context, builder, sig, args):
+    lhs_ptr = builder.alloca(args[0].type)
+    rhs_ptr = builder.alloca(args[1].type)
+    builder.store(args[0], lhs_ptr)
+    builder.store(args[1], rhs_ptr)
+
+    udf_str_ptr = builder.alloca(default_manager[udf_string].get_value_type())
+    _ = context.compile_internal(
+        builder,
+        call_concat_string_view,
+        types.void(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR),
+        (udf_str_ptr, lhs_ptr, rhs_ptr),
+    )
+
+    result = cgutils.create_struct_proxy(udf_string)(
+        context, builder, value=builder.load(udf_str_ptr)
+    )
+    return result._getvalue()
+
+
 def create_binary_string_func(binary_func, retty):
     """
     Provide a wrapper around numba's low-level extension API which
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index 522433d404f..abaa6919502 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -302,3 +302,11 @@ def func(st):
         return st.rstrip(strip_char)
 
     run_udf_test(data, func, "str")
+
+
+@pytest.mark.parametrize("concat_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_concat(data, concat_char):
+    def func(st):
+        return st + concat_char
+
+    run_udf_test(data, func, "str")

From 2bbba3be6f8db5b0ff1375da6cb6f5a02241191d Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 9 Nov 2022 18:03:04 -0800
Subject: [PATCH 48/54] troublesome segfaulting shim function

---
 python/strings_udf/cpp/src/strings/udf/shim.cu | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index cac97d454ac..73f0755c941 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -271,10 +271,15 @@ extern "C" __device__ int rstrip(int* nb_retval,
   return 0;
 }
 
-extern "C" __device__ int concat(int* nb_retval,
-                                 void* const* to_strip,
-                                 void* udf_str,
-                                 void* const* strip_str)
+extern "C" __device__ int concat(int* nb_retval, void* udf_str, void* const* lhs, void* const* rhs)
 {
+  auto lhs_ptr     = reinterpret_cast<cudf::string_view const*>(lhs);
+  auto rhs_ptr     = reinterpret_cast<cudf::string_view const*>(rhs);
+  auto udf_str_ptr = reinterpret_cast<udf_string*>(udf_str);
+
+  udf_string result;
+  result.append(*lhs_ptr).append(*rhs_ptr);
+  *udf_str_ptr = result;
+  printf("%s\n", result.data());
   return 0;
 }

From 906f6d5d31ed3947b5534514ae7c0aaa7f6c0cc5 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 10 Nov 2022 07:59:06 -0800
Subject: [PATCH 49/54] zero out preallocated udf_string

---
 python/strings_udf/cpp/src/strings/udf/shim.cu | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 015b6d242d6..383b2ff055b 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -278,8 +278,10 @@ extern "C" __device__ int concat(int* nb_retval, void* udf_str, void* const* lhs
 
   auto udf_str_ptr = reinterpret_cast<udf_string*>(udf_str);
 
-  udf_string result;
+  // struct allocated by numba is not initialized
+  memset(udf_str_ptr, 0, sizeof(udf_string))
 
+    udf_string result;
   result.append(*lhs_ptr).append(*rhs_ptr);
   *udf_str_ptr = result;
   printf("%s\n", result.data());

From bdb64e9d8effb3bb4934a3f9bea6185040adc248 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Thu, 10 Nov 2022 08:37:16 -0800
Subject: [PATCH 50/54] add tests for maskedtype and a little extra typing

---
 python/cudf/cudf/core/udf/strings_typing.py    | 10 ++++++++++
 python/strings_udf/cpp/src/strings/udf/shim.cu |  4 ++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/udf/strings_typing.py b/python/cudf/cudf/core/udf/strings_typing.py
index f8f50600b12..e8a35c12f71 100644
--- a/python/cudf/cudf/core/udf/strings_typing.py
+++ b/python/cudf/cudf/core/udf/strings_typing.py
@@ -59,6 +59,16 @@ def len_typing(self, args, kws):
         return nb_signature(size_type, args[0])
 
 
+@register_string_function(operator.add)
+def concat_typing(self, args, kws):
+    if _is_valid_string_arg(args[0]) and _is_valid_string_arg(args[1]):
+        return nb_signature(
+            MaskedType(udf_string),
+            MaskedType(string_view),
+            MaskedType(string_view),
+        )
+
+
 @register_string_function(operator.contains)
 def contains_typing(self, args, kws):
     if _is_valid_string_arg(args[0]) and _is_valid_string_arg(args[1]):
diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 383b2ff055b..5c191062bf0 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -279,9 +279,9 @@ extern "C" __device__ int concat(int* nb_retval, void* udf_str, void* const* lhs
   auto udf_str_ptr = reinterpret_cast<udf_string*>(udf_str);
 
   // struct allocated by numba is not initialized
-  memset(udf_str_ptr, 0, sizeof(udf_string))
+  memset(udf_str_ptr, 0, sizeof(udf_string));
 
-    udf_string result;
+  udf_string result;
   result.append(*lhs_ptr).append(*rhs_ptr);
   *udf_str_ptr = result;
   printf("%s\n", result.data());

From bfff98a0d356f0c3d3c56b89f90a54637ccc49b0 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Thu, 10 Nov 2022 13:34:01 -0600
Subject: [PATCH 51/54] Update python/strings_udf/cpp/src/strings/udf/shim.cu

Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com>
---
 python/strings_udf/cpp/src/strings/udf/shim.cu | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 5c191062bf0..7b9a9074753 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -284,6 +284,5 @@ extern "C" __device__ int concat(int* nb_retval, void* udf_str, void* const* lhs
   udf_string result;
   result.append(*lhs_ptr).append(*rhs_ptr);
   *udf_str_ptr = result;
-  printf("%s\n", result.data());
   return 0;
 }

From c22123ab1eeac0b8e2a23a9ec8534cb51daee5c1 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 14 Nov 2022 09:42:04 -0800
Subject: [PATCH 52/54] move memset into lowering

---
 python/strings_udf/cpp/src/strings/udf/shim.cu | 3 ---
 python/strings_udf/strings_udf/lowering.py     | 4 +++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 7b9a9074753..bc717da4669 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -278,9 +278,6 @@ extern "C" __device__ int concat(int* nb_retval, void* udf_str, void* const* lhs
 
   auto udf_str_ptr = reinterpret_cast<udf_string*>(udf_str);
 
-  // struct allocated by numba is not initialized
-  memset(udf_str_ptr, 0, sizeof(udf_string));
-
   udf_string result;
   result.append(*lhs_ptr).append(*rhs_ptr);
   *udf_str_ptr = result;
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index d98384c0d02..f4bc73c5b6f 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -174,7 +174,9 @@ def concat_impl(context, builder, sig, args):
     builder.store(args[0], lhs_ptr)
     builder.store(args[1], rhs_ptr)
 
-    udf_str_ptr = builder.alloca(default_manager[udf_string].get_value_type())
+    udf_str_ptr = cgutils.alloca_once(
+        builder, default_manager[udf_string].get_value_type(), zfill=True
+    )
     _ = context.compile_internal(
         builder,
         call_concat_string_view,

From 7255d944a6c52d5358058a01c3e3cb57f3796248 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 15 Nov 2022 07:47:55 -0800
Subject: [PATCH 53/54] use placement new

---
 python/strings_udf/cpp/src/strings/udf/shim.cu | 2 +-
 python/strings_udf/strings_udf/lowering.py     | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index bc717da4669..737afff4a1b 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -276,7 +276,7 @@ extern "C" __device__ int concat(int* nb_retval, void* udf_str, void* const* lhs
   auto lhs_ptr = reinterpret_cast<cudf::string_view const*>(lhs);
   auto rhs_ptr = reinterpret_cast<cudf::string_view const*>(rhs);
 
-  auto udf_str_ptr = reinterpret_cast<udf_string*>(udf_str);
+  auto udf_str_ptr = new (udf_str) udf_string;
 
   udf_string result;
   result.append(*lhs_ptr).append(*rhs_ptr);
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index f4bc73c5b6f..d98384c0d02 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -174,9 +174,7 @@ def concat_impl(context, builder, sig, args):
     builder.store(args[0], lhs_ptr)
     builder.store(args[1], rhs_ptr)
 
-    udf_str_ptr = cgutils.alloca_once(
-        builder, default_manager[udf_string].get_value_type(), zfill=True
-    )
+    udf_str_ptr = builder.alloca(default_manager[udf_string].get_value_type())
     _ = context.compile_internal(
         builder,
         call_concat_string_view,

From d6d030c0528981baa040a3184a735cd2ddf8e3fa Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 16 Nov 2022 06:11:24 -0800
Subject: [PATCH 54/54] add reflected concat test

---
 python/strings_udf/strings_udf/tests/test_string_udfs.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index abaa6919502..49663ee02ec 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -310,3 +310,11 @@ def func(st):
         return st + concat_char
 
     run_udf_test(data, func, "str")
+
+
+@pytest.mark.parametrize("concat_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_concat_reflected(data, concat_char):
+    def func(st):
+        return concat_char + st
+
+    run_udf_test(data, func, "str")