diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
index 8cd4f8c6d27..a88f621095c 100644
--- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
+++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
@@ -269,6 +269,13 @@ An *immutable*, non-owning view of a table.
 
 A *mutable*, non-owning view of a table.
 
+## cudf::size_type
+
+The `cudf::size_type` is the type used for the number of elements in a column, offsets to elements within a column, indices to address specific elements, segments for subsets of column elements, etc.
+It is equivalent to a signed, 32-bit integer type and therefore has a maximum value of 2147483647.
+Some APIs also accept negative index values and those functions support a minimum value of -2147483648.
+This fundamental type also influences output values not just for column size limits but for counting elements as well.
+
 ## Spans
 
 libcudf provides `span` classes that mimic C++20 `std::span`, which is a lightweight
@@ -370,16 +377,16 @@ libcudf APIs should still perform any validation that does not require introspec
 To give some idea of what should or should not be validated, here are (non-exhaustive) lists of examples.
 
 **Things that libcudf should validate**:
-- Input column/table sizes or dtypes
+- Input column/table sizes or data types
 
 **Things that libcudf should not validate**:
 - Integer overflow
-- Ensuring that outputs will not exceed the 2GB size limit for a given set of inputs
+- Ensuring that outputs will not exceed the [2GB size](#cudfsize_type) limit for a given set of inputs
 
 
 ## libcudf expects nested types to have sanitized null masks
 
-Various libcudf APIs accepting columns of nested dtypes (such as `LIST` or `STRUCT`) may assume that these columns have been sanitized.
+Various libcudf APIs accepting columns of nested data types (such as `LIST` or `STRUCT`) may assume that these columns have been sanitized.
 In this context, sanitization refers to ensuring that the null elements in a column with a nested dtype are compatible with the elements of nested columns.
 Specifically:
 - Null elements of list columns should also be empty. The starting offset of a null element should be equal to the ending offset.
@@ -746,8 +753,8 @@ where compile time was a problem is in types used to store indices, which can be
 The "Indexalator", or index-normalizing iterator (`include/cudf/detail/indexalator.cuh`), can be
 used for index types (integers) without requiring a type-specific instance. It can be used for any
 iterator interface for reading an array of integer values of type `int8`, `int16`, `int32`,
-`int64`, `uint8`, `uint16`, `uint32`, or `uint64`. Reading specific elements always return a
-`cudf::size_type` integer.
+`int64`, `uint8`, `uint16`, `uint32`, or `uint64`. Reading specific elements always returns a
+[`cudf::size_type`](#cudfsize_type) integer.
 
 Use the `indexalator_factory` to create an appropriate input iterator from a column_view. Example
 input iterator usage:
@@ -1104,7 +1111,7 @@ For list columns, the parent column's type is `LIST` and contains no data, but i
 the number of lists in the column, and its null mask represents the validity of each list element.
 The parent has two children.
 
-1. A non-nullable column of `INT32` elements that indicates the offset to the beginning of each list
+1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the beginning of each list
    in a dense column of elements.
 2. A column containing the actual data and optional null mask for all elements of all the lists
    packed together.
@@ -1152,7 +1159,7 @@ a non-nullable column of `INT8` data. The parent column's type is `STRING` and c
 but its size represents the number of strings in the column, and its null mask represents the
 validity of each string. To summarize, the strings column children are:
 
-1. A non-nullable column of `INT32` elements that indicates the offset to the beginning of each
+1. A non-nullable column of [`size_type`](#cudfsize_type) elements that indicates the offset to the beginning of each
    string in a dense column of all characters.
 2. A non-nullable column of `INT8` elements of all the characters across all the strings packed
    together.
@@ -1264,7 +1271,7 @@ libcudf provides view types for nested column types as well as for the data elem
 `cudf::strings_column_view` is a view of a strings column, like `cudf::column_view` is a view of
 any `cudf::column`. `cudf::string_view` is a view of a single string, and therefore
 `cudf::string_view` is the data type of a `cudf::column` of type `STRING` just like `int32_t` is the
-data type for a `cudf::column` of type `INT32`. As it's name implies, this is a read-only object
+data type for a `cudf::column` of type [`size_type`](#cudfsize_type). As its name implies, this is a read-only object
 instance that points to device memory inside the strings column. It's lifespan is the same (or less)
 as the column it views.
 
diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp
index fbe931f945d..21c2ca1d64e 100644
--- a/cpp/include/cudf/lists/contains.hpp
+++ b/cpp/include/cudf/lists/contains.hpp
@@ -42,7 +42,7 @@ namespace lists {
  *
  * @param lists Lists column whose `n` rows are to be searched
  * @param search_key The scalar key to be looked up in each list row
- * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return BOOL8 column of `n` rows with the result of the lookup
  */
 std::unique_ptr<column> contains(
@@ -64,7 +64,7 @@ std::unique_ptr<column> contains(
  *
  * @param lists Lists column whose `n` rows are to be searched
  * @param search_keys Column of elements to be looked up in each list row
- * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return BOOL8 column of `n` rows with the result of the lookup
  */
 std::unique_ptr<column> contains(
@@ -85,7 +85,7 @@ std::unique_ptr<column> contains(
  * Nulls inside non-null nested elements (such as lists or structs) are not considered.
  *
  * @param lists Lists column whose `n` rows are to be searched
- * @param mr Device memory resource used to allocate the returned column's device memory.
+ * @param mr Device memory resource used to allocate the returned column's device memory
  * @return BOOL8 column of `n` rows with the result of the lookup
  */
 std::unique_ptr<column> contains_nulls(
@@ -102,7 +102,7 @@ enum class duplicate_find_option : int32_t {
 };
 
 /**
- * @brief Create a column of `size_type` values indicating the position of a search key
+ * @brief Create a column of values indicating the position of a search key
  * within each list row in the `lists` column
  *
  * The output column has as many elements as there are rows in the input `lists` column.
@@ -119,14 +119,14 @@ enum class duplicate_find_option : int32_t {
  * If `find_option == FIND_LAST`, the position of the last match in the list row is
  * returned.
  *
+ * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists`
+ *
  * @param lists Lists column whose `n` rows are to be searched
  * @param search_key The scalar key to be looked up in each list row
  * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or
  * last (`FIND_LAST`)
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return INT32 column of `n` rows with the location of the `search_key`
- *
- * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists`
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return column of `n` rows with the location of the `search_key`
  */
 std::unique_ptr<column> index_of(
   cudf::lists_column_view const& lists,
@@ -135,7 +135,7 @@ std::unique_ptr<column> index_of(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Create a column of `size_type` values indicating the position of a search key
+ * @brief Create a column of values indicating the position of a search key
  * row within the corresponding list row in the `lists` column
  *
  * The output column has as many elements as there are rows in the input `lists` column.
@@ -152,16 +152,16 @@ std::unique_ptr<column> index_of(
  * If `find_option == FIND_LAST`, the position of the last match in the list row is
  * returned.
  *
+ * @throw cudf::logic_error If `search_keys` does not match `lists` in its number of rows
+ * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists`
+ *
  * @param lists Lists column whose `n` rows are to be searched
  * @param search_keys A column of search keys to be looked up in each corresponding row of
  * `lists`
  * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or
  * last (`FIND_LAST`)
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return INT32 column of `n` rows with the location of the `search_key`
- *
- * @throw cudf::logic_error If `search_keys` does not match `lists` in its number of rows
- * @throw cudf::data_type_error If `search_keys` type does not match the element type in `lists`
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return column of `n` rows with the location of the `search_key`
  */
 std::unique_ptr<column> index_of(
   cudf::lists_column_view const& lists,
diff --git a/cpp/include/cudf/lists/count_elements.hpp b/cpp/include/cudf/lists/count_elements.hpp
index dac6c1b5bf8..552ba058b93 100644
--- a/cpp/include/cudf/lists/count_elements.hpp
+++ b/cpp/include/cudf/lists/count_elements.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -44,9 +44,9 @@ namespace lists {
  * Any null input element will result in a corresponding null entry
  * in the output column.
  *
- * @param input Input lists column.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New INT32 column with the number of elements for each row.
+ * @param input Input lists column
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New column with the number of elements for each row
  */
 std::unique_ptr<column> count_elements(
   lists_column_view const& input,
diff --git a/cpp/include/cudf/search.hpp b/cpp/include/cudf/search.hpp
index bd9520df644..fee22786d7a 100644
--- a/cpp/include/cudf/search.hpp
+++ b/cpp/include/cudf/search.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -64,7 +64,7 @@ namespace cudf {
  * @param column_order Vector of column sort order
  * @param null_precedence Vector of null_precedence enums needles
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return A non-nullable column of cudf::size_type elements containing the insertion points
+ * @return A non-nullable column of elements containing the insertion points
  */
 std::unique_ptr<column> lower_bound(
   table_view const& haystack,
@@ -104,7 +104,7 @@ std::unique_ptr<column> lower_bound(
  * @param column_order Vector of column sort order
  * @param null_precedence Vector of null_precedence enums needles
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return A non-nullable column of cudf::size_type elements containing the insertion points
+ * @return A non-nullable column of elements containing the insertion points
  */
 std::unique_ptr<column> upper_bound(
   table_view const& haystack,
diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp
index 922bed3b1ea..6924e77ae9b 100644
--- a/cpp/include/cudf/sorting.hpp
+++ b/cpp/include/cudf/sorting.hpp
@@ -44,7 +44,7 @@ namespace cudf {
  * for each column. Size must be equal to `input.num_columns()` or empty.
  * If empty, all columns will be sorted in `null_order::BEFORE`.
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return A non-nullable column of `size_type` elements containing the permuted row indices of
+ * @return A non-nullable column of elements containing the permuted row indices of
  * `input` if it were sorted
  */
 std::unique_ptr<column> sorted_order(
diff --git a/cpp/include/cudf/strings/attributes.hpp b/cpp/include/cudf/strings/attributes.hpp
index f0f7c667697..85086e44a26 100644
--- a/cpp/include/cudf/strings/attributes.hpp
+++ b/cpp/include/cudf/strings/attributes.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,8 +32,8 @@ namespace strings {
  */
 
 /**
- * @brief Returns an integer numeric column containing the length of each string in
- * characters.
+ * @brief Returns a column containing character lengths
+ * of each string in the given column
  *
  * The output column will have the same number of rows as the
  * specified strings column. Each row value will be the number of
@@ -41,17 +41,17 @@ namespace strings {
  *
  * Any null string will result in a null entry for that row in the output column.
  *
- * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New INT32 column with lengths for each string.
+ * @param input Strings instance for this operation
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New column with lengths for each string
  */
 std::unique_ptr<column> count_characters(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief Returns a numeric column containing the length of each string in
- * bytes.
+ * @brief Returns a column containing byte lengths
+ * of each string in the given column
  *
  * The output column will have the same number of rows as the
  * specified strings column. Each row value will be the number of
@@ -59,17 +59,17 @@ std::unique_ptr<column> count_characters(
  *
  * Any null string will result in a null entry for that row in the output column.
  *
- * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New INT32 column with the number of bytes for each string.
+ * @param input Strings instance for this operation
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New column with the number of bytes for each string
  */
 std::unique_ptr<column> count_bytes(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Creates a numeric column with code point values (integers) for each
- * character of each string.
+ * character of each string
  *
  * A code point is the integer value representation of a character.
  * For example, the code point value for the character 'A' in UTF-8 is 65.
@@ -79,12 +79,12 @@ std::unique_ptr<column> count_bytes(
  *
  * Any null string is ignored. No null entries will appear in the output column.
  *
- * @param strings Strings instance for this operation.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New INT32 column with code point integer values for each character.
+ * @param input Strings instance for this operation
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New INT32 column with code point integer values for each character
  */
 std::unique_ptr<column> code_points(
-  strings_column_view const& strings,
+  strings_column_view const& input,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of strings_apis group
diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp
index aebc4ae7dab..92914bc810f 100644
--- a/cpp/include/cudf/strings/contains.hpp
+++ b/cpp/include/cudf/strings/contains.hpp
@@ -165,7 +165,7 @@ std::unique_ptr<column> matches_re(
  * @param strings Strings instance for this operation
  * @param prog Regex program instance
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return New INT32 column with counts for each string
+ * @return New column of match counts for each string
  */
 std::unique_ptr<column> count_re(
   strings_column_view const& strings,
diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh
index 09e0f3bb079..02a65c01178 100644
--- a/cpp/include/cudf/strings/detail/strings_children.cuh
+++ b/cpp/include/cudf/strings/detail/strings_children.cuh
@@ -59,7 +59,7 @@ auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
                            rmm::mr::device_memory_resource* mr)
 {
   auto offsets_column = make_numeric_column(
-    data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
+    data_type{type_to_id<size_type>()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
   auto offsets_view          = offsets_column->mutable_view();
   auto d_offsets             = offsets_view.template data<int32_t>();
   size_and_exec_fn.d_offsets = d_offsets;
diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh
index 2939c47e6af..a3a5946fe55 100644
--- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh
+++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh
@@ -175,7 +175,7 @@ std::unique_ptr<column> make_strings_column(CharIterator chars_begin,
 
   // build offsets column -- this is the number of strings + 1
   auto offsets_column = make_numeric_column(
-    data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
+    data_type{type_to_id<size_type>()}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
   auto offsets_view = offsets_column->mutable_view();
   thrust::transform(rmm::exec_policy(stream),
                     offsets_begin,
diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp
index 38b49e63590..80a6edc496b 100644
--- a/cpp/include/nvtext/detail/tokenize.hpp
+++ b/cpp/include/nvtext/detail/tokenize.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,12 +28,7 @@ namespace detail {
  * @copydoc nvtext::tokenize(strings_column_view const&,string_scalar
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param strings Strings column tokenize.
- * @param delimiter UTF-8 characters used to separate each string into tokens.
- *                  The default of empty string will separate tokens using whitespace.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New strings columns of tokens.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  */
 std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
                                        cudf::string_scalar const& delimiter,
@@ -44,11 +39,7 @@ std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
  * @copydoc nvtext::tokenize(strings_column_view const&,strings_column_view
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param strings Strings column to tokenize.
- * @param delimiters Strings used to separate individual strings into tokens.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New strings columns of tokens.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  */
 std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
                                        cudf::strings_column_view const& delimiters,
@@ -59,12 +50,7 @@ std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
  * @copydoc nvtext::count_tokens(strings_column_view const&, string_scalar
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param strings Strings column to use for this operation.
- * @param delimiter Strings used to separate each string into tokens.
- *                  The default of empty string will separate tokens using whitespace.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New INT32 column of token counts.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  */
 std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& strings,
                                            cudf::string_scalar const& delimiter,
@@ -75,11 +61,7 @@ std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& stri
  * @copydoc nvtext::count_tokens(strings_column_view const&,strings_column_view
  * const&,rmm::mr::device_memory_resource*)
  *
- * @param strings Strings column to use for this operation.
- * @param delimiters Strings used to separate each string into tokens.
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New INT32 column of token counts.
+ * @param stream CUDA stream used for device memory operations and kernel launches
  */
 std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& strings,
                                            cudf::strings_column_view const& delimiters,
diff --git a/cpp/include/nvtext/tokenize.hpp b/cpp/include/nvtext/tokenize.hpp
index 10a9f746d76..a72f7dcfa59 100644
--- a/cpp/include/nvtext/tokenize.hpp
+++ b/cpp/include/nvtext/tokenize.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -112,11 +112,11 @@ std::unique_ptr<cudf::column> tokenize(
  * All null row entries are ignored and the output contains all valid rows.
  * The number of tokens for a null element is set to 0 in the output column.
  *
- * @param strings Strings column to use for this operation.
- * @param delimiter Strings used to separate each string into tokens.
+ * @param strings Strings column to use for this operation
+ * @param delimiter Strings used to separate each string into tokens;
  *                  The default of empty string will separate tokens using whitespace.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New INT32 column of token counts.
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New column of token counts
  */
 std::unique_ptr<cudf::column> count_tokens(
   cudf::strings_column_view const& strings,
@@ -141,12 +141,12 @@ std::unique_ptr<cudf::column> count_tokens(
  * All null row entries are ignored and the output contains all valid rows.
  * The number of tokens for a null element is set to 0 in the output column.
  *
- * @throw cudf::logic_error if the delimiters column is empty or contains nulls.
+ * @throw cudf::logic_error if the delimiters column is empty or contains nulls
  *
- * @param strings Strings column to use for this operation.
- * @param delimiters Strings used to separate each string into tokens.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New INT32 column of token counts.
+ * @param strings Strings column to use for this operation
+ * @param delimiters Strings used to separate each string into tokens
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New column of token counts
  */
 std::unique_ptr<cudf::column> count_tokens(
   cudf::strings_column_view const& strings,
diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu
index 66288c7d14d..3a1b7044b56 100644
--- a/cpp/src/strings/attributes.cu
+++ b/cpp/src/strings/attributes.cu
@@ -60,16 +60,16 @@ constexpr size_type AVG_CHAR_BYTES_THRESHOLD = 64;
 
 /**
  * @brief Returns a numeric column containing lengths of each string in
- * based on the provided unary function.
+ * based on the provided unary function
  *
  * Any null string will result in a null entry for that row in the output column.
  *
- * @tparam UnaryFunction Device function that returns an integer given a string_view.
- * @param strings Strings instance for this operation.
- * @param ufn Function returns an integer for each string.
- * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @tparam UnaryFunction Device function that returns an integer given a string_view
+ * @param strings Strings instance for this operation
+ * @param ufn Function returns an integer for each string
+ * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return New INT32 column with lengths for each string.
+ * @return New column with lengths for each string
  */
 template <typename UnaryFunction>
 std::unique_ptr<column> counts_fn(strings_column_view const& strings,
@@ -78,7 +78,7 @@ std::unique_ptr<column> counts_fn(strings_column_view const& strings,
                                   rmm::mr::device_memory_resource* mr)
 {
   // create output column
-  auto results   = make_numeric_column(data_type{type_id::INT32},
+  auto results   = make_numeric_column(data_type{type_to_id<size_type>()},
                                      strings.size(),
                                      cudf::detail::copy_bitmask(strings.parent(), stream, mr),
                                      strings.null_count(),
@@ -176,12 +176,12 @@ std::unique_ptr<column> count_characters(strings_column_view const& input,
   return count_characters_parallel(input, stream, mr);
 }
 
-std::unique_ptr<column> count_bytes(strings_column_view const& strings,
+std::unique_ptr<column> count_bytes(strings_column_view const& input,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
 {
   auto ufn = [] __device__(string_view const& d_str) { return d_str.size_bytes(); };
-  return counts_fn(strings, ufn, stream, mr);
+  return counts_fn(input, ufn, stream, mr);
 }
 
 }  // namespace detail
@@ -214,19 +214,19 @@ struct code_points_fn {
 
 namespace detail {
 //
-std::unique_ptr<column> code_points(strings_column_view const& strings,
+std::unique_ptr<column> code_points(strings_column_view const& input,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
 {
-  auto strings_column = column_device_view::create(strings.parent(), stream);
+  auto strings_column = column_device_view::create(input.parent(), stream);
   auto d_column       = *strings_column;
 
   // create offsets vector to account for each string's character length
-  rmm::device_uvector<size_type> offsets(strings.size() + 1, stream);
+  rmm::device_uvector<size_type> offsets(input.size() + 1, stream);
   thrust::transform_inclusive_scan(
     rmm::exec_policy(stream),
     thrust::make_counting_iterator<size_type>(0),
-    thrust::make_counting_iterator<size_type>(strings.size()),
+    thrust::make_counting_iterator<size_type>(input.size()),
     offsets.begin() + 1,
     [d_column] __device__(size_type idx) {
       size_type length = 0;
@@ -248,7 +248,7 @@ std::unique_ptr<column> code_points(strings_column_view const& strings,
   // now set the ranges from each strings' character values
   thrust::for_each_n(rmm::exec_policy(stream),
                      thrust::make_counting_iterator<size_type>(0),
-                     strings.size(),
+                     input.size(),
                      code_points_fn{d_column, offsets.data(), d_results});
 
   results->set_null_count(0);
@@ -259,25 +259,25 @@ std::unique_ptr<column> code_points(strings_column_view const& strings,
 
 // external APIS
 
-std::unique_ptr<column> count_characters(strings_column_view const& strings,
+std::unique_ptr<column> count_characters(strings_column_view const& input,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::count_characters(strings, cudf::get_default_stream(), mr);
+  return detail::count_characters(input, cudf::get_default_stream(), mr);
 }
 
-std::unique_ptr<column> count_bytes(strings_column_view const& strings,
+std::unique_ptr<column> count_bytes(strings_column_view const& input,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::count_bytes(strings, cudf::get_default_stream(), mr);
+  return detail::count_bytes(input, cudf::get_default_stream(), mr);
 }
 
-std::unique_ptr<column> code_points(strings_column_view const& strings,
+std::unique_ptr<column> code_points(strings_column_view const& input,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::code_points(strings, cudf::get_default_stream(), mr);
+  return detail::code_points(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/count_matches.cu b/cpp/src/strings/count_matches.cu
index 9d29bbb8c96..1fde3a54089 100644
--- a/cpp/src/strings/count_matches.cu
+++ b/cpp/src/strings/count_matches.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -63,7 +63,7 @@ std::unique_ptr<column> count_matches(column_device_view const& d_strings,
   assert(output_size >= d_strings.size() and "Unexpected output size");
 
   auto results = make_numeric_column(
-    data_type{type_id::INT32}, output_size, mask_state::UNALLOCATED, stream, mr);
+    data_type{type_to_id<size_type>()}, output_size, mask_state::UNALLOCATED, stream, mr);
 
   if (d_strings.size() == 0) return results;