diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 565a396d913..90225b8537a 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -280,7 +280,6 @@ ConfigureNVBench(TEXT_NVBENCH text/minhash.cpp)
 # * strings benchmark -------------------------------------------------------------------
 ConfigureBench(
   STRINGS_BENCH
-  string/case.cpp
   string/combine.cpp
   string/contains.cpp
   string/convert_datetime.cpp
@@ -301,7 +300,9 @@ ConfigureBench(
   string/url_decode.cu
 )
 
-ConfigureNVBench(STRINGS_NVBENCH string/like.cpp string/reverse.cpp string/lengths.cpp)
+ConfigureNVBench(
+  STRINGS_NVBENCH string/like.cpp string/reverse.cpp string/lengths.cpp string/case.cpp
+)
 
 # ##################################################################################################
 # * json benchmark -------------------------------------------------------------------
diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp
index 6152ea741a3..0cdd5fbac32 100644
--- a/cpp/benchmarks/string/case.cpp
+++ b/cpp/benchmarks/string/case.cpp
@@ -15,36 +15,64 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
 
 #include <cudf/strings/case.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
-class StringCase : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
-static void BM_case(benchmark::State& state)
+void bench_case(nvbench::state& state)
 {
-  cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
-  auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows});
-  cudf::strings_column_view input(column->view());
+  auto const n_rows    = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const max_width = static_cast<int32_t>(state.get_int64("width"));
+  auto const encoding  = state.get_string("encoding");
 
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::get_default_stream());
-    cudf::strings::to_lower(input);
+  if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(max_width) >=
+      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
+    state.skip("Skip benchmarks greater than size_type limit");
   }
 
-  state.SetBytesProcessed(state.iterations() * input.chars_size());
-}
+  data_profile const profile = data_profile_builder().distribution(
+    cudf::type_id::STRING, distribution_id::NORMAL, 0, max_width);
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
+
+  auto col_view = column->view();
+
+  cudf::column::contents ascii_contents;
+  if (encoding == "ascii") {
+    data_profile ascii_profile = data_profile_builder().no_validity().distribution(
+      cudf::type_id::INT8, distribution_id::UNIFORM, 32, 126);  // nice ASCII range
+    auto input = cudf::strings_column_view(col_view);
+    auto ascii_column =
+      create_random_column(cudf::type_id::INT8, row_count{input.chars_size()}, ascii_profile);
+    auto ascii_data = ascii_column->view();
 
-#define SORT_BENCHMARK_DEFINE(name)          \
-  BENCHMARK_DEFINE_F(StringCase, name)       \
-  (::benchmark::State & st) { BM_case(st); } \
-  BENCHMARK_REGISTER_F(StringCase, name)     \
-    ->RangeMultiplier(8)                     \
-    ->Ranges({{1 << 12, 1 << 24}})           \
-    ->UseManualTime()                        \
-    ->Unit(benchmark::kMillisecond);
+    col_view = cudf::column_view(col_view.type(),
+                                 col_view.size(),
+                                 nullptr,
+                                 col_view.null_mask(),
+                                 col_view.null_count(),
+                                 0,
+                                 {input.offsets(), ascii_data});
+
+    ascii_contents = ascii_column->release();
+  }
+  auto input = cudf::strings_column_view(col_view);
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+
+  state.add_element_count(input.chars_size(), "chars_size");
+  state.add_global_memory_reads<nvbench::int8_t>(input.chars_size());
+  state.add_global_memory_writes<nvbench::int8_t>(input.chars_size());
+
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch& launch) { auto result = cudf::strings::to_lower(input); });
+}
 
-SORT_BENCHMARK_DEFINE(to_lower)
+NVBENCH_BENCH(bench_case)
+  .set_name("strings_case")
+  .add_int64_axis("width", {32, 64, 128, 256, 512, 1024, 2048})
+  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_string_axis("encoding", {"ascii", "utf8"});
diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu
index 02c4532bb79..0997983c95e 100644
--- a/cpp/src/strings/case.cu
+++ b/cpp/src/strings/case.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,9 +17,10 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/copying.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/device_atomics.cuh>
 #include <cudf/strings/case.hpp>
 #include <cudf/strings/detail/char_tables.hpp>
 #include <cudf/strings/detail/strings_children.cuh>
@@ -38,31 +39,33 @@ namespace detail {
 namespace {
 
 /**
- * @brief Per string logic for case conversion functions.
+ * @brief Threshold to decide on using string or warp parallel functions.
  *
+ * If the average byte length of a string in a column exceeds this value then
+ * the warp-parallel function is used to compute the output sizes.
+ * Otherwise, a regular string-parallel function is used.
+ *
+ * This value was found using the strings_lengths benchmark results.
  */
-struct upper_lower_fn {
-  const column_device_view d_column;
-  character_flags_table_type case_flag;  // flag to check with on each character
-  const character_flags_table_type* d_flags;
-  const character_cases_table_type* d_case_table;
-  const special_case_mapping* d_special_case_mapping;
-  int32_t* d_offsets{};
-  char* d_chars{};
+constexpr size_type AVG_CHAR_BYTES_THRESHOLD = 64;
 
-  __device__ special_case_mapping get_special_case_mapping(uint32_t code_point)
-  {
-    return d_special_case_mapping[get_special_case_hash_index(code_point)];
-  }
+/**
+ * @brief Utility functions for converting characters to upper or lower case
+ */
+struct convert_char_fn {
+  character_flags_table_type case_flag;
+  character_flags_table_type const* d_flags;
+  character_cases_table_type const* d_case_table;
+  special_case_mapping const* d_special_case_mapping;
 
-  // compute-size / copy the bytes representing the special case mapping for this codepoint
-  __device__ int32_t handle_special_case_bytes(uint32_t code_point,
-                                               char* d_buffer,
-                                               detail::character_flags_table_type flag)
+  // compute size or copy the bytes representing the special case mapping for this codepoint
+  __device__ size_type handle_special_case_bytes(uint32_t code_point,
+                                                 detail::character_flags_table_type flag,
+                                                 char* d_buffer = nullptr) const
   {
-    special_case_mapping m = get_special_case_mapping(code_point);
-    size_type bytes        = 0;
+    special_case_mapping m = d_special_case_mapping[get_special_case_hash_index(code_point)];
 
+    size_type bytes   = 0;
     auto const count  = IS_LOWER(flag) ? m.num_upper_chars : m.num_lower_chars;
     auto const* chars = IS_LOWER(flag) ? m.upper : m.lower;
     for (uint16_t idx = 0; idx < count; idx++) {
@@ -73,76 +76,200 @@ struct upper_lower_fn {
     return bytes;
   }
 
-  __device__ void operator()(size_type idx)
+  // this is called for converting any UTF-8 characters
+  __device__ size_type process_character(char_utf8 chr, char* d_buffer = nullptr) const
+  {
+    auto const code_point = detail::utf8_to_codepoint(chr);
+
+    detail::character_flags_table_type flag = code_point <= 0x00'FFFF ? d_flags[code_point] : 0;
+
+    // we apply special mapping in two cases:
+    // - uncased characters with the special mapping flag: always
+    // - cased characters with the special mapping flag: when matching the input case_flag
+    if (IS_SPECIAL(flag) && ((flag & case_flag) || !IS_UPPER_OR_LOWER(flag))) {
+      return handle_special_case_bytes(code_point, case_flag, d_buffer);
+    }
+
+    char_utf8 const new_char =
+      (flag & case_flag) ? detail::codepoint_to_utf8(d_case_table[code_point]) : chr;
+    return (d_buffer) ? detail::from_char_utf8(new_char, d_buffer)
+                      : detail::bytes_in_char_utf8(new_char);
+  }
+
+  // special function for converting ASCII-only characters
+  __device__ char process_ascii(char chr)
   {
-    if (d_column.is_null(idx)) {
+    return (case_flag & d_flags[chr]) ? static_cast<char>(d_case_table[chr]) : chr;
+  }
+};
+
+/**
+ * @brief Per string logic for case conversion functions
+ *
+ * This can be used in calls to make_strings_children.
+ */
+struct upper_lower_fn {
+  convert_char_fn converter;
+  column_device_view d_strings;
+  size_type* d_offsets{};
+  char* d_chars{};
+
+  __device__ void operator()(size_type idx) const
+  {
+    if (d_strings.is_null(idx)) {
       if (!d_chars) d_offsets[idx] = 0;
       return;
     }
-    auto const d_str = d_column.template element<string_view>(idx);
-    int32_t bytes    = 0;
+    auto const d_str = d_strings.element<string_view>(idx);
+    size_type bytes  = 0;
     char* d_buffer   = d_chars ? d_chars + d_offsets[idx] : nullptr;
     for (auto itr = d_str.begin(); itr != d_str.end(); ++itr) {
-      uint32_t code_point = detail::utf8_to_codepoint(*itr);
-
-      detail::character_flags_table_type flag = code_point <= 0x00'FFFF ? d_flags[code_point] : 0;
-
-      // we apply special mapping in two cases:
-      // - uncased characters with the special mapping flag, always
-      // - cased characters with the special mapping flag, when matching the input case_flag
-      //
-      if (IS_SPECIAL(flag) && ((flag & case_flag) || !IS_UPPER_OR_LOWER(flag))) {
-        auto const new_bytes = handle_special_case_bytes(code_point, d_buffer, case_flag);
-        bytes += new_bytes;
-        if (d_buffer) d_buffer += new_bytes;
+      auto const size = converter.process_character(*itr, d_buffer);
+      if (d_buffer) {
+        d_buffer += size;
       } else {
-        char_utf8 new_char =
-          (flag & case_flag) ? detail::codepoint_to_utf8(d_case_table[code_point]) : *itr;
-        if (!d_buffer)
-          bytes += detail::bytes_in_char_utf8(new_char);
-        else
-          d_buffer += detail::from_char_utf8(new_char, d_buffer);
+        bytes += size;
       }
     }
-    if (!d_buffer) d_offsets[idx] = bytes;
+    if (!d_buffer) { d_offsets[idx] = bytes; }
+  }
+};
+
+/**
+ * @brief Count output bytes in warp-parallel threads
+ *
+ * This executes as one warp per string and just computes the output sizes.
+ */
+struct count_bytes_fn {
+  convert_char_fn converter;
+  column_device_view d_strings;
+  size_type* d_offsets;
+
+  __device__ void operator()(size_type idx) const
+  {
+    auto const str_idx  = idx / cudf::detail::warp_size;
+    auto const lane_idx = idx % cudf::detail::warp_size;
+
+    // initialize the output for the atomicAdd
+    if (lane_idx == 0) { d_offsets[str_idx] = 0; }
+    __syncwarp();
+
+    if (d_strings.is_null(str_idx)) { return; }
+    auto const d_str   = d_strings.element<string_view>(str_idx);
+    auto const str_ptr = d_str.data();
+
+    size_type size = 0;
+    for (auto i = lane_idx; i < d_str.size_bytes(); i += cudf::detail::warp_size) {
+      auto const chr = str_ptr[i];
+      if (is_utf8_continuation_char(chr)) { continue; }
+      char_utf8 u8 = 0;
+      to_char_utf8(str_ptr + i, u8);
+      size += converter.process_character(u8);
+    }
+    // this is every so slightly faster than using the cub::warp_reduce
+    if (size > 0) atomicAdd(d_offsets + str_idx, size);
   }
 };
 
+/**
+ * @brief Special functor for processing ASCII-only data
+ */
+struct ascii_converter_fn {
+  convert_char_fn converter;
+  __device__ char operator()(char chr) { return converter.process_ascii(chr); }
+};
+
 /**
  * @brief Utility method for converting upper and lower case characters
- * in a strings column.
+ * in a strings column
  *
- * @param strings Strings to convert.
+ * @param input Strings to convert
  * @param case_flag The character type to convert (upper, lower, or both)
- * @param stream CUDA stream used for device memory operations and kernel launches.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New strings column with characters converted.
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New strings column with characters converted
  */
-std::unique_ptr<column> convert_case(strings_column_view const& strings,
+std::unique_ptr<column> convert_case(strings_column_view const& input,
                                      character_flags_table_type case_flag,
                                      rmm::cuda_stream_view stream,
                                      rmm::mr::device_memory_resource* mr)
 {
-  if (strings.is_empty()) return make_empty_column(type_id::STRING);
-
-  auto strings_column = column_device_view::create(strings.parent(), stream);
-  auto d_column       = *strings_column;
-
-  // build functor with lookup tables used for case conversion
-  upper_lower_fn functor{d_column,
-                         case_flag,
-                         get_character_flags_table(),
-                         get_character_cases_table(),
-                         get_special_case_mapping_table()};
-
-  // this utility calls the functor to build the offsets and chars columns
-  auto children = cudf::strings::detail::make_strings_children(functor, strings.size(), stream, mr);
-
-  return make_strings_column(strings.size(),
-                             std::move(children.first),
-                             std::move(children.second),
-                             strings.null_count(),
-                             cudf::detail::copy_bitmask(strings.parent(), stream, mr));
+  if (input.size() == input.null_count()) {
+    return std::make_unique<column>(input.parent(), stream, mr);
+  }
+
+  auto const d_strings = column_device_view::create(input.parent(), stream);
+  auto const d_flags   = get_character_flags_table();
+  auto const d_cases   = get_character_cases_table();
+  auto const d_special = get_special_case_mapping_table();
+
+  convert_char_fn ccfn{case_flag, d_flags, d_cases, d_special};
+  upper_lower_fn converter{ccfn, *d_strings};
+
+  // For smaller strings, use the regular string-parallel algorithm
+  if ((input.chars_size() / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD) {
+    auto [offsets, chars] =
+      cudf::strings::detail::make_strings_children(converter, input.size(), stream, mr);
+    return make_strings_column(input.size(),
+                               std::move(offsets),
+                               std::move(chars),
+                               input.null_count(),
+                               cudf::detail::copy_bitmask(input.parent(), stream, mr));
+  }
+
+  // Check if the input contains any multi-byte characters.
+  // This check incurs ~20% performance hit for smaller strings and so we only use it
+  // after the threshold check above. The check makes very little impact for larger strings
+  // but results in a large performance gain when the input contains only single-byte characters.
+  // The count_if is faster than any_of or all_of: https://github.com/NVIDIA/thrust/issues/1016
+  bool const multi_byte_chars =
+    thrust::count_if(
+      rmm::exec_policy(stream), input.chars_begin(), input.chars_end(), [] __device__(auto chr) {
+        return is_utf8_continuation_char(chr);
+      }) > 0;
+  if (!multi_byte_chars) {
+    // optimization for ASCII-only case: copy the input column and inplace replace each character
+    auto result = std::make_unique<column>(input.parent(), stream, mr);
+    auto d_chars =
+      result->mutable_view().child(strings_column_view::chars_column_index).data<char>();
+    auto const chars_size = strings_column_view(result->view()).chars_size();
+    thrust::transform(
+      rmm::exec_policy(stream), d_chars, d_chars + chars_size, d_chars, ascii_converter_fn{ccfn});
+    result->set_null_count(input.null_count());
+    return result;
+  }
+
+  // This will use a warp-parallel algorithm to compute the output sizes for each string
+  // and then uses the normal string parallel functor to build the output.
+  auto offsets = make_numeric_column(
+    data_type{type_to_id<size_type>()}, input.size() + 1, mask_state::UNALLOCATED, stream, mr);
+  auto d_offsets = offsets->mutable_view().data<size_type>();
+
+  // first pass, compute output sizes
+  // note: tried to use segmented-reduce approach instead here and it was consistently slower
+  count_bytes_fn counter{ccfn, *d_strings, d_offsets};
+  auto const count_itr = thrust::make_counting_iterator<size_type>(0);
+  thrust::for_each_n(
+    rmm::exec_policy(stream), count_itr, input.size() * cudf::detail::warp_size, counter);
+
+  // convert sizes to offsets
+  auto const bytes =
+    cudf::detail::sizes_to_offsets(d_offsets, d_offsets + input.size() + 1, d_offsets, stream);
+  CUDF_EXPECTS(bytes <= static_cast<int64_t>(std::numeric_limits<size_type>::max()),
+               "Size of output exceeds column size limit",
+               std::overflow_error);
+
+  auto chars = create_chars_child_column(static_cast<size_type>(bytes), stream, mr);
+  // second pass, write output
+  converter.d_offsets = d_offsets;
+  converter.d_chars   = chars->mutable_view().data<char>();
+  thrust::for_each_n(rmm::exec_policy(stream), count_itr, input.size(), converter);
+
+  return make_strings_column(input.size(),
+                             std::move(offsets),
+                             std::move(chars),
+                             input.null_count(),
+                             cudf::detail::copy_bitmask(input.parent(), stream, mr));
 }
 
 }  // namespace
diff --git a/cpp/tests/strings/case_tests.cpp b/cpp/tests/strings/case_tests.cpp
index 3852930dafe..31637a6ab9a 100644
--- a/cpp/tests/strings/case_tests.cpp
+++ b/cpp/tests/strings/case_tests.cpp
@@ -202,6 +202,64 @@ TEST_F(StringsCaseTest, MultiCharLower)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
+TEST_F(StringsCaseTest, Ascii)
+{
+  // triggering the ascii code path requires some long-ish strings
+  cudf::test::strings_column_wrapper input{
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=-"};
+  auto view     = cudf::strings_column_view(input);
+  auto expected = cudf::test::strings_column_wrapper{
+    "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=-"};
+  auto results = cudf::strings::to_lower(view);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
+  expected = cudf::test::strings_column_wrapper{
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ",
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ",
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ",
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=-"};
+  results = cudf::strings::to_upper(view);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
+  results = cudf::strings::to_upper(cudf::strings_column_view(cudf::slice(input, {1, 3}).front()));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, cudf::slice(expected, {1, 3}).front());
+}
+
+TEST_F(StringsCaseTest, LongStrings)
+{
+  // average string length >= AVG_CHAR_BYTES_THRESHOLD as defined in case.cu
+  cudf::test::strings_column_wrapper input{
+    "ABCDÉFGHIJKLMNOPQRSTUVWXYZabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "ABCDÉFGHIJKLMNOPQRSTUVWXYZabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "ABCDÉFGHIJKLMNOPQRSTUVWXYZabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "ABCDÉFGHIJKLMNOPQRSTUVWXYZabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=-"};
+  auto view     = cudf::strings_column_view(input);
+  auto expected = cudf::test::strings_column_wrapper{
+    "abcdéfghijklmnopqrstuvwxyzabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "abcdéfghijklmnopqrstuvwxyzabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "abcdéfghijklmnopqrstuvwxyzabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=- ",
+    "abcdéfghijklmnopqrstuvwxyzabcdéfghijklmnopqrstuvwxyz1234567890!@#$%^&*()_+=-"};
+  auto results = cudf::strings::to_lower(view);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
+  expected = cudf::test::strings_column_wrapper{
+    "ABCDÉFGHIJKLMNOPQRSTUVWXYZABCDÉFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ",
+    "ABCDÉFGHIJKLMNOPQRSTUVWXYZABCDÉFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ",
+    "ABCDÉFGHIJKLMNOPQRSTUVWXYZABCDÉFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=- ",
+    "ABCDÉFGHIJKLMNOPQRSTUVWXYZABCDÉFGHIJKLMNOPQRSTUVWXYZ1234567890!@#$%^&*()_+=-"};
+  results = cudf::strings::to_upper(view);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
+  results = cudf::strings::to_upper(cudf::strings_column_view(cudf::slice(input, {1, 3}).front()));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, cudf::slice(expected, {1, 3}).front());
+}
+
 TEST_F(StringsCaseTest, EmptyStringsColumn)
 {
   cudf::column_view zero_size_strings_column(