From 8d7bf34ff045fbe1760c90ef825e6719bab1ff80 Mon Sep 17 00:00:00 2001
From: gpuCI <38199262+GPUtester@users.noreply.github.com>
Date: Tue, 4 Feb 2020 08:13:38 -0800
Subject: [PATCH 1/9] REL v0.12.0 release

---
 docs/cudf/source/conf.py      | 2 +-
 docs/nvstrings/source/conf.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index 80ccad1c9fe..0e465b9efbc 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -71,7 +71,7 @@
 # built documents.
 #
 # The short X.Y version.
-version = "0.12"
+version = '0.12'
 # The full version, including alpha/beta/rc tags.
 release = cudf.__version__
 
diff --git a/docs/nvstrings/source/conf.py b/docs/nvstrings/source/conf.py
index 83795ef3660..3da2ce0c912 100644
--- a/docs/nvstrings/source/conf.py
+++ b/docs/nvstrings/source/conf.py
@@ -69,9 +69,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = "0.12"
+version = '0.12'
 # The full version, including alpha/beta/rc tags.
-release = "0.12.0a"
+release = '0.12.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

From 7178642ae8f353db1be87b65cc8315a5373833e6 Mon Sep 17 00:00:00 2001
From: Sriram Chandramouli <sriram_chandramouli@hotmail.com>
Date: Wed, 27 May 2020 15:39:53 +0000
Subject: [PATCH 2/9] - compute substrings from beginning until delimiter or
 from a delimiter until end of string   - this Closes #5158   - this emulates
 spark's `substring_index` function

---
 cpp/include/cudf/strings/find.hpp |  88 +++++++++
 cpp/src/strings/find.cu           | 153 ++++++++++++++
 cpp/tests/strings/find_tests.cpp  | 319 ++++++++++++++++++++++++++++++
 3 files changed, 560 insertions(+)

diff --git a/cpp/include/cudf/strings/find.hpp b/cpp/include/cudf/strings/find.hpp
index ab6afc82094..a9b27b7cfde 100644
--- a/cpp/include/cudf/strings/find.hpp
+++ b/cpp/include/cudf/strings/find.hpp
@@ -141,6 +141,94 @@ std::unique_ptr<column> ends_with(
   string_scalar const& target,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
+/**
+ * @brief Returns a column of strings that searches for the @p delimiter @p count number of
+ * times in the source @p strings forward if @p count is positive or backwards if @p count is
+ * negative. If @p count is positive, it returns a substring from the start of the source @p
+ * strings up until @p count occurrence of the @delimiter not including the @p delimiter.
+ * If @p count is negative, it returns a substring from the start of the @p count occurrence of
+ * the @delimiter in the source @p strings past the delimiter until the end of the string.
+ *
+ * The search for @delimiter in @p strings is case sensitive.
+ * If the @p count is 0, every row in the output column will be null.
+ * If the row value of @p strings is null, the row value in the output column will be null.
+ * If the @p delimiter is invalid or null, every row in the output column will be null.
+ * If the @p delimiter or the column value for a row is empty, the row value in the output
+ * column will be empty.
+ * If @p count occurrences of @p delimiter isn't found, the row value in the output column will
+ * be the row value from the input @p strings column.
+ *
+ * @code{.pseudo}
+ * Example:
+ * in_s = ['www.nvidia.com', null, 'www.google.com', '', 'foo' ]
+ * r = substring_index(in_s, '.', 1)
+ * r is ['www', null, 'www', '', 'foo']
+ *
+ * in_s = ['www.nvidia.com', null, 'www.google.com', '', 'foo' ]
+ * r = substring_index(in_s, '.', -2)
+ * r is ['nvidia.com', null, 'google.com', '', 'foo']
+ * @endcode
+ *
+ * @param strings Strings instance for this operation.
+ * @param delimiter UTF-8 encoded string to search for in each string.
+ * @param count Number of times to search for delimiter in each string. If the value is positive,
+ *              forward search of delimiter is performed; else, a backward search is performed.
+ * @param mr Resource for allocating device memory.
+ * @return New strings column containing the substrings.
+ */
+std::unique_ptr<column> substring_index(
+  strings_column_view const& strings,
+  string_scalar const& delimiter,
+  size_type count,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+/**
+ * @brief Returns a column of strings that searches the delimiter for each row from
+ * @p delimiter_strings @p count number of times in the source @p strings forward if @p count
+ * is positive or backwards if @p count is negative. If @p count is positive, it returns a
+ * substring from the start of the source @p strings up until @p count occurrence of the
+ * delimiter for that row not including that delimiter. If @p count is negative, it returns a
+ * substring from the start of the @p count occurrence of the delimiter for that row in the
+ * source @p strings past the delimiter until the end of the string.
+ *
+ * The search for @p delimiter_strings in @p strings is case sensitive.
+ * If the @p count is 0, every row in the output column will be null.
+ * If the row value of @p strings is null, the row value in the output column will be null.
+ * If the row value from @p delimiter_strings is invalid or null, the row value in the
+ * output column will be null.
+ * If the row value from @p delimiter_strings or the column value for a row is empty, the
+ * row value in the output column will be empty.
+ * If @p count occurrences of delimiter isn't found, the row value in the output column will
+ * be the row value from the input @p strings column.
+ *
+ * @code{.pseudo}
+ * Example:
+ * in_s = ['www.nvidia.com', null, 'www.google.com', '', 'foo..bar....goo' ]
+ * delimiters = ['.', '..', '', null, '..']
+ * r = substring_index(in_s, delimiters, 2)
+ * r is ['www.nvidia', null, '', null, 'foo..bar']
+ *
+ * in_s = ['www.nvidia.com', null, 'www.google.com', '', 'foo..bar....goo', 'apache.org' ]
+ * delimiters = ['.', '..', '', null, '..', '.']
+ * r = substring_index(in_s, delimiters, -2)
+ * r is ['nvidia.com', null, '', null, '..goo', 'apache.org']
+ * @endcode
+ *
+ * @throw cudf::logic_error if the number of rows in @p strings and @delimiter_strings do not match.
+ *
+ * @param strings Strings instance for this operation.
+ * @param delimiter_strings UTF-8 encoded string for each row.
+ * @param count Number of times to search for delimiter in each string. If the value is positive,
+ *              forward search of delimiter is performed; else, a backward search is performed.
+ * @param mr Resource for allocating device memory.
+ * @return New strings column containing the substrings.
+ */
+std::unique_ptr<column> substring_index(
+  strings_column_view const& strings,
+  strings_column_view const& delimiter_strings,
+  size_type count,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/src/strings/find.cu b/cpp/src/strings/find.cu
index c791f8f7ab2..589bb976e08 100644
--- a/cpp/src/strings/find.cu
+++ b/cpp/src/strings/find.cu
@@ -16,7 +16,9 @@
 
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/scalar/scalar_device_view.cuh>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/find.hpp>
@@ -286,5 +288,156 @@ std::unique_ptr<column> ends_with(strings_column_view const& strings,
   return detail::ends_with(strings, target, mr);
 }
 
+// For substring_index APIs
+namespace detail {
+// Internal helper class
+namespace {
+
+struct substring_index_functor {
+  template <typename ColItrT, typename DelimiterItrT>
+  std::unique_ptr<column> operator()(ColItrT const col_itr,
+                                     DelimiterItrT const delim_itr,
+                                     size_type delimiter_count,
+                                     rmm::mr::device_memory_resource* mr,
+                                     cudaStream_t stream,
+                                     size_type strings_count) const
+  {
+    // Shallow copy of the resultant strings
+    rmm::device_vector<string_view> out_col_strings(strings_count);
+
+    // Invalid output column strings - null rows
+    string_view const invalid_str{nullptr, 0};
+
+    thrust::transform(
+      rmm::exec_policy(stream)->on(stream),
+      col_itr,
+      col_itr + strings_count,
+      delim_itr,
+      out_col_strings.data().get(),
+      [delimiter_count, invalid_str] __device__(auto col_val_pair, auto delim_val_pair) {
+        // If the column value for this row or the delimiter is null or if the delimiter count is 0,
+        // result is null
+        if (!col_val_pair.second || !delim_val_pair.second || delimiter_count == 0)
+          return invalid_str;
+        auto col_val = col_val_pair.first;
+
+        // If the global delimiter or the row specific delimiter or if the column value for the row
+        // is empty, value is empty.
+        if (delim_val_pair.first.empty() || col_val.empty()) return string_view{};
+
+        auto delim_val = delim_val_pair.first;
+
+        auto const col_val_len   = col_val.length();
+        auto const delimiter_len = delim_val.length();
+
+        auto nsearches      = (delimiter_count < 0) ? -delimiter_count : delimiter_count;
+        size_type start_pos = 0;
+        size_type end_pos   = col_val_len;
+        string_view out_str{};
+
+        for (auto i = 0; i < nsearches; ++i) {
+          if (delimiter_count < 0) {
+            end_pos = col_val.rfind(delim_val, 0, end_pos);
+            if (end_pos == -1) {
+              out_str = col_val;
+              break;
+            }
+            if (i + 1 == nsearches)
+              out_str =
+                col_val.substr(end_pos + delimiter_len, col_val_len - end_pos - delimiter_len);
+          } else {
+            auto char_pos = col_val.find(delim_val, start_pos);
+            if (char_pos == -1) {
+              out_str = col_val;
+              break;
+            }
+            if (i + 1 == nsearches)
+              out_str = col_val.substr(0, char_pos);
+            else
+              start_pos = char_pos + delimiter_len;
+          }
+        }
+
+        return out_str.empty() ? string_view{} : out_str;
+      });
+
+    // Create an output column with the resultant strings
+    return make_strings_column(out_col_strings, invalid_str, stream, mr);
+  }
+};
+
+}  // namespace
+
+template <typename DelimiterItrT>
+std::unique_ptr<column> substring_index(strings_column_view const& strings,
+                                        DelimiterItrT const delimiter_itr,
+                                        size_type count,
+                                        rmm::mr::device_memory_resource* mr,
+                                        cudaStream_t stream = 0)
+{
+  auto strings_count = strings.size();
+  // If there aren't any rows, return an empty strings column
+  if (strings_count == 0) return strings::detail::make_empty_strings_column(mr, stream);
+
+  // Create device view of the column
+  auto colview_ptr = column_device_view::create(strings.parent(), stream);
+  auto colview     = *colview_ptr;
+  if (colview.nullable()) {
+    return substring_index_functor{}(
+      experimental::detail::make_pair_iterator<string_view, true>(colview),
+      delimiter_itr,
+      count,
+      mr,
+      stream,
+      strings_count);
+  } else {
+    return substring_index_functor{}(
+      experimental::detail::make_pair_iterator<string_view, false>(colview),
+      delimiter_itr,
+      count,
+      mr,
+      stream,
+      strings_count);
+  }
+}
+
+}  // namespace detail
+
+// external APIs
+
+std::unique_ptr<column> substring_index(strings_column_view const& strings,
+                                        string_scalar const& delimiter,
+                                        size_type count,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::substring_index(
+    strings, experimental::detail::make_pair_iterator<string_view>(delimiter), count, mr);
+}
+
+std::unique_ptr<column> substring_index(strings_column_view const& strings,
+                                        strings_column_view const& delimiters,
+                                        size_type count,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS(strings.size() == delimiters.size(),
+               "Strings and delimiters column sizes do not match");
+
+  CUDF_FUNC_RANGE();
+  auto delimiters_dev_view_ptr = cudf::column_device_view::create(delimiters.parent(), 0);
+  auto delimiters_dev_view     = *delimiters_dev_view_ptr;
+  return (delimiters_dev_view.nullable())
+           ? detail::substring_index(
+               strings,
+               experimental::detail::make_pair_iterator<string_view, true>(delimiters_dev_view),
+               count,
+               mr)
+           : detail::substring_index(
+               strings,
+               experimental::detail::make_pair_iterator<string_view, false>(delimiters_dev_view),
+               count,
+               mr);
+}
+
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/tests/strings/find_tests.cpp b/cpp/tests/strings/find_tests.cpp
index 6a0d39757f0..ab29a06c1d0 100644
--- a/cpp/tests/strings/find_tests.cpp
+++ b/cpp/tests/strings/find_tests.cpp
@@ -20,6 +20,7 @@
 #include <cudf/strings/find.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
+#include <tests/strings/utilities.h>
 #include <tests/utilities/base_fixture.hpp>
 #include <tests/utilities/column_utilities.hpp>
 #include <tests/utilities/column_wrapper.hpp>
@@ -215,3 +216,321 @@ TEST_P(FindParmsTest, Find)
 INSTANTIATE_TEST_CASE_P(StringsFindTest,
                         FindParmsTest,
                         testing::ValuesIn(std::array<int32_t, 4>{0, 1, 2, 3}));
+
+struct StringsSubstringIndexWithScalarTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(StringsSubstringIndexWithScalarTest, ZeroSizeStringsColumn)
+{
+  cudf::column_view col0(cudf::data_type{cudf::STRING}, 0, nullptr, nullptr, 0);
+  auto strings_view = cudf::strings_column_view(col0);
+
+  auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("foo"), 1);
+  cudf::test::expect_strings_empty(results->view());
+}
+
+TEST_F(StringsSubstringIndexWithScalarTest, AllEmpty)
+{
+  auto strings_col  = cudf::test::strings_column_wrapper({"", "", "", "", ""});
+  auto strings_view = cudf::strings_column_view(strings_col);
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", ""});
+
+  auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("e"), -1);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringIndexWithScalarTest, EmptyDelimiter)
+{
+  auto strings_col = cudf::test::strings_column_wrapper(
+    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
+  ;
+  auto strings_view = cudf::strings_column_view(strings_col);
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                        {true, true, false, true, true, true});
+  auto results     = cudf::strings::substring_index(strings_view, cudf::string_scalar(""), 1);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringIndexWithScalarTest, ZeroCount)
+{
+  auto strings_col = cudf::test::strings_column_wrapper(
+    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
+  ;
+  auto strings_view = cudf::strings_column_view(strings_col);
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                        {false, false, false, false, false, false});
+
+  auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("é"), 0);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringIndexWithScalarTest, SearchDelimiter)
+{
+  auto strings_col = cudf::test::strings_column_wrapper(
+    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
+  ;
+  auto strings_view = cudf::strings_column_view(strings_col);
+
+  {
+    auto exp_results = cudf::test::strings_column_wrapper({"H", "thes", "", "lease", "t", ""},
+                                                          {true, true, false, true, true, true});
+
+    auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("é"), 1);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"llo", "", "", "lease", "st strings", ""}, {true, true, false, true, true, true});
+
+    auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("é"), -1);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("é"), 2);
+    cudf::test::expect_columns_equal(*results, strings_view.parent(), true);
+  }
+
+  {
+    auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("é"), -2);
+    cudf::test::expect_columns_equal(*results, strings_view.parent(), true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper(
+      {"Hello LLollooogh", "oopppllo", "", "oppollo", "polo lop apploo po", ""},
+      {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper({"Hello LL", "o", "", "opp", "pol", ""},
+                                                          {true, true, false, true, true, true});
+
+    auto results =
+      cudf::strings::substring_index(cudf::strings_column_view{col0}, cudf::string_scalar("o"), 2);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper(
+      {"Hello LLollooogh", "oopppllo", "", "oppollo", "polo lop apploo po", ""},
+      {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper({"ogh", "pppllo", "", "llo", " po", ""},
+                                                          {true, true, false, true, true, true});
+
+    auto results =
+      cudf::strings::substring_index(cudf::strings_column_view{col0}, cudf::string_scalar("o"), -2);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper(
+      {"Héllo HélloHéllo", "Hélloééééé", "", "éééééé", "poloéé lopéé applooéé po", ""},
+      {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"Héllo HélloHéllo", "Hélloééééé", "", "éééé", "poloéé lopéé apploo", ""},
+      {true, true, false, true, true, true});
+
+    auto results =
+      cudf::strings::substring_index(cudf::strings_column_view{col0}, cudf::string_scalar("éé"), 3);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper(
+      {"Héllo HélloHéllo", "Hélloééééé", "", "éééééé", "poloéé lopéé applooéé po", ""},
+      {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"Héllo HélloHéllo", "Hélloééééé", "", "éééé", " lopéé applooéé po", ""},
+      {true, true, false, true, true, true});
+
+    auto results = cudf::strings::substring_index(
+      cudf::strings_column_view{col0}, cudf::string_scalar("éé"), -3);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper({"www.yahoo.com",
+                                                    "www.apache..org",
+                                                    "tennis...com",
+                                                    "nvidia....com",
+                                                    "google...........com",
+                                                    "microsoft...c.....co..m"});
+
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"www.yahoo.com", "www.apache.", "tennis..", "nvidia..", "google..", "microsoft.."});
+
+    auto results =
+      cudf::strings::substring_index(cudf::strings_column_view{col0}, cudf::string_scalar("."), 3);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper({"www.yahoo.com",
+                                                    "www.apache..org",
+                                                    "tennis..com",
+                                                    "nvidia....com",
+                                                    "google...........com",
+                                                    ".",
+                                                    "microsoft...c.....co..m"});
+
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"www.yahoo.com", "www.apache..org", "tennis..com", "..com", "..com", ".", "co..m"});
+
+    auto results = cudf::strings::substring_index(
+      cudf::strings_column_view{col0}, cudf::string_scalar(".."), -2);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+}
+
+struct StringsSubstringIndexWithColumnTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(StringsSubstringIndexWithColumnTest, ZeroSizeStringsColumn)
+{
+  cudf::column_view col0(cudf::data_type{cudf::STRING}, 0, nullptr, nullptr, 0);
+  auto strings_view = cudf::strings_column_view(col0);
+
+  auto results = cudf::strings::substring_index(strings_view, strings_view, 1);
+  // Check empty column
+  cudf::test::expect_strings_empty(results->view());
+}
+
+TEST_F(StringsSubstringIndexWithColumnTest, GenerateExceptions)
+{
+  auto col0      = cudf::test::strings_column_wrapper({"", "", "", "", ""});
+  auto delim_col = cudf::test::strings_column_wrapper({"", "foo", "bar", "."});
+
+  EXPECT_THROW(cudf::strings::substring_index(
+                 cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1),
+               cudf::logic_error);
+}
+
+TEST_F(StringsSubstringIndexWithColumnTest, ColumnAllEmpty)
+{
+  auto col0      = cudf::test::strings_column_wrapper({"", "", "", "", ""});
+  auto delim_col = cudf::test::strings_column_wrapper({"", "foo", "bar", ".", "/"});
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", ""});
+
+  auto results = cudf::strings::substring_index(
+    cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringIndexWithColumnTest, DelimiterAllEmptyAndInvalid)
+{
+  auto col0 = cudf::test::strings_column_wrapper(
+    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
+  auto delim_col = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                      {true, false, true, false, true, false});
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                        {true, false, false, false, true, false});
+
+  auto results = cudf::strings::substring_index(
+    cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 1);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringIndexWithColumnTest, ZeroDelimiterCount)
+{
+  auto col0 = cudf::test::strings_column_wrapper(
+    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
+  auto delim_col = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                      {true, false, true, false, true, false});
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                        {false, false, false, false, false, false});
+
+  auto results = cudf::strings::substring_index(
+    cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 0);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringIndexWithColumnTest, SearchDelimiter)
+{
+  {
+    auto col0 = cudf::test::strings_column_wrapper(
+      {"H™élloi ™◎oo™ff™", "thesé", "", "lease™", "tést strings", "™"},
+      {true, true, false, true, true, true});
+    auto delim_col = cudf::test::strings_column_wrapper({"™", "™", "", "e", "t", "™"});
+
+    auto exp_results = cudf::test::strings_column_wrapper({"H", "thesé", "", "l", "", ""},
+                                                          {true, true, false, true, true, true});
+
+    auto results = cudf::strings::substring_index(
+      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 1);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0      = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi ™◎ooﬀ™ff™",
+                                                    "tﬀﬀhﬀesé",
+                                                    "",
+                                                    "lﬀ fooﬀ ffﬀ eaﬀse™",
+                                                    "tést ﬀstri.nﬀgs",
+                                                    "ﬀﬀ ™ ﬀﬀ ﬀ"},
+                                                   {true, true, false, true, true, true});
+    auto delim_col = cudf::test::strings_column_wrapper({"ﬀ™", "ﬀ", "", "ﬀ ", "t", "ﬀ ™"});
+
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"ff™", "esé", "", "eaﬀse™", "ri.nﬀgs", " ﬀﬀ ﬀ"}, {true, true, false, true, true, true});
+
+    auto results = cudf::strings::substring_index(
+      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi fooﬀ™ barﬀ™ gooﬀ™ ™◎ooﬀ™ff™",
+                                                    "tﬀﬀhﬀesé",
+                                                    "",
+                                                    "lﬀ fooﬀ ffﬀ eaﬀse™",
+                                                    "tést ﬀ™ffﬀ™ﬀ™ffﬀstri.ﬀ™ffﬀ™nﬀgs",
+                                                    "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"},
+                                                   {true, true, false, true, true, true});
+    auto delim_col = cudf::test::strings_column_wrapper({"ﬀ™", "ﬀ", "", "e ", "ﬀ™ff", "ﬀ™ﬀ™"},
+                                                        {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi fooﬀ™ barﬀ™ goo",
+                                                           "tﬀﬀh",
+                                                           "",
+                                                           "lﬀ fooﬀ ffﬀ eaﬀse™",
+                                                           "tést ﬀ™ffﬀ™ﬀ™ffﬀstri.",
+                                                           "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"},
+                                                          {true, true, false, true, true, true});
+
+    auto results = cudf::strings::substring_index(
+      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 3);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi fooﬀ™ barﬀ™ gooﬀ™ ™◎ooﬀ™ff™",
+                                                    "tﬀﬀhﬀesé",
+                                                    "",
+                                                    "lﬀ fooﬀ ffﬀ eaﬀse™",
+                                                    "tést ﬀ™ffﬀ™ﬀ™ffﬀstri.ﬀ™ffﬀ™nﬀgs",
+                                                    "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"});
+    auto delim_col = cudf::test::strings_column_wrapper({"ﬀ™", "ﬀ", "", "e ", "ﬀ™ff", "ﬀ™ﬀ™"},
+                                                        {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper({" gooﬀ™ ™◎ooﬀ™ff™",
+                                                           "ﬀhﬀesé",
+                                                           "",
+                                                           "lﬀ fooﬀ ffﬀ eaﬀse™",
+                                                           "ﬀ™ﬀ™ffﬀstri.ﬀ™ffﬀ™nﬀgs",
+                                                           "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"},
+                                                          {true, true, false, true, true, true});
+
+    auto results = cudf::strings::substring_index(
+      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -3);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+}

From 41e2ea9225fac3ac6e95000589fb52105b8ede7a Mon Sep 17 00:00:00 2001
From: Sriram Chandramouli <sriram_chandramouli@hotmail.com>
Date: Wed, 27 May 2020 16:16:30 +0000
Subject: [PATCH 3/9] - updates after upstream merge

---
 cpp/src/strings/find.cu | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cpp/src/strings/find.cu b/cpp/src/strings/find.cu
index 589bb976e08..c6a17f70a9d 100644
--- a/cpp/src/strings/find.cu
+++ b/cpp/src/strings/find.cu
@@ -384,7 +384,7 @@ std::unique_ptr<column> substring_index(strings_column_view const& strings,
   auto colview     = *colview_ptr;
   if (colview.nullable()) {
     return substring_index_functor{}(
-      experimental::detail::make_pair_iterator<string_view, true>(colview),
+      cudf::detail::make_pair_iterator<string_view, true>(colview),
       delimiter_itr,
       count,
       mr,
@@ -392,7 +392,7 @@ std::unique_ptr<column> substring_index(strings_column_view const& strings,
       strings_count);
   } else {
     return substring_index_functor{}(
-      experimental::detail::make_pair_iterator<string_view, false>(colview),
+      cudf::detail::make_pair_iterator<string_view, false>(colview),
       delimiter_itr,
       count,
       mr,
@@ -412,7 +412,7 @@ std::unique_ptr<column> substring_index(strings_column_view const& strings,
 {
   CUDF_FUNC_RANGE();
   return detail::substring_index(
-    strings, experimental::detail::make_pair_iterator<string_view>(delimiter), count, mr);
+    strings, cudf::detail::make_pair_iterator<string_view>(delimiter), count, mr);
 }
 
 std::unique_ptr<column> substring_index(strings_column_view const& strings,
@@ -429,12 +429,12 @@ std::unique_ptr<column> substring_index(strings_column_view const& strings,
   return (delimiters_dev_view.nullable())
            ? detail::substring_index(
                strings,
-               experimental::detail::make_pair_iterator<string_view, true>(delimiters_dev_view),
+               cudf::detail::make_pair_iterator<string_view, true>(delimiters_dev_view),
                count,
                mr)
            : detail::substring_index(
                strings,
-               experimental::detail::make_pair_iterator<string_view, false>(delimiters_dev_view),
+               cudf::detail::make_pair_iterator<string_view, false>(delimiters_dev_view),
                count,
                mr);
 }

From fe8d3ea8ee5a2e05437288f91cccf2b5f5a763df Mon Sep 17 00:00:00 2001
From: Sriram Chandramouli <sriram_chandramouli@hotmail.com>
Date: Wed, 27 May 2020 16:17:50 +0000
Subject: [PATCH 4/9] - fix code style

---
 cpp/src/strings/find.cu | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/cpp/src/strings/find.cu b/cpp/src/strings/find.cu
index c6a17f70a9d..a55a179daf8 100644
--- a/cpp/src/strings/find.cu
+++ b/cpp/src/strings/find.cu
@@ -383,21 +383,19 @@ std::unique_ptr<column> substring_index(strings_column_view const& strings,
   auto colview_ptr = column_device_view::create(strings.parent(), stream);
   auto colview     = *colview_ptr;
   if (colview.nullable()) {
-    return substring_index_functor{}(
-      cudf::detail::make_pair_iterator<string_view, true>(colview),
-      delimiter_itr,
-      count,
-      mr,
-      stream,
-      strings_count);
+    return substring_index_functor{}(cudf::detail::make_pair_iterator<string_view, true>(colview),
+                                     delimiter_itr,
+                                     count,
+                                     mr,
+                                     stream,
+                                     strings_count);
   } else {
-    return substring_index_functor{}(
-      cudf::detail::make_pair_iterator<string_view, false>(colview),
-      delimiter_itr,
-      count,
-      mr,
-      stream,
-      strings_count);
+    return substring_index_functor{}(cudf::detail::make_pair_iterator<string_view, false>(colview),
+                                     delimiter_itr,
+                                     count,
+                                     mr,
+                                     stream,
+                                     strings_count);
   }
 }
 

From 3554406faee719ceec12fcfb327f03daa2f5bae4 Mon Sep 17 00:00:00 2001
From: Sriram Chandramouli <sriram_chandramouli@hotmail.com>
Date: Wed, 27 May 2020 22:28:31 +0000
Subject: [PATCH 5/9] - add changelog entry

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 79f18ee1b27..15f15f4627e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -35,6 +35,7 @@
 - PR #5203 Add Java bindings for is_integer and is_float
 - PR #5205 Add ci test for libcudf, libnvstrings headers existence check in meta.yml
 - PR #5293 Add Java bindings for replace_with_backrefs
+- PR #5303 Add substring_index functionality for strings
 
 ## Improvements
 

From e9c5e6ae10cc48b1ba03ba7aa33387a8d004dde5 Mon Sep 17 00:00:00 2001
From: Sriram Chandramouli <sriram_chandramouli@hotmail.com>
Date: Fri, 29 May 2020 18:37:48 +0000
Subject: [PATCH 6/9] - rename method to `slice_strings` - reuse some of the
 facility `slice_strings` already has to build the substrings

---
 CHANGELOG.md                           |   2 +-
 cpp/include/cudf/strings/find.hpp      |  88 -------
 cpp/include/cudf/strings/substring.hpp |  87 +++++++
 cpp/src/strings/find.cu                | 151 ------------
 cpp/src/strings/substring.cu           | 220 ++++++++++++++---
 cpp/tests/strings/find_tests.cpp       | 319 -------------------------
 cpp/tests/strings/substring_tests.cpp  | 317 ++++++++++++++++++++++++
 7 files changed, 587 insertions(+), 597 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9288e2018c6..cb7be405063 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## New Features
 
 - PR #5222 Adding clip feature support to DataFrame and Series
+- PR #5303 Add substring_index functionality for strings
 
 ## Improvements
 
@@ -56,7 +57,6 @@
 - PR #5203 Add Java bindings for is_integer and is_float
 - PR #5205 Add ci test for libcudf, libnvstrings headers existence check in meta.yml
 - PR #5293 Add Java bindings for replace_with_backrefs
-- PR #5303 Add substring_index functionality for strings
 
 ## Improvements
 
diff --git a/cpp/include/cudf/strings/find.hpp b/cpp/include/cudf/strings/find.hpp
index a9b27b7cfde..ab6afc82094 100644
--- a/cpp/include/cudf/strings/find.hpp
+++ b/cpp/include/cudf/strings/find.hpp
@@ -141,94 +141,6 @@ std::unique_ptr<column> ends_with(
   string_scalar const& target,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
-/**
- * @brief Returns a column of strings that searches for the @p delimiter @p count number of
- * times in the source @p strings forward if @p count is positive or backwards if @p count is
- * negative. If @p count is positive, it returns a substring from the start of the source @p
- * strings up until @p count occurrence of the @delimiter not including the @p delimiter.
- * If @p count is negative, it returns a substring from the start of the @p count occurrence of
- * the @delimiter in the source @p strings past the delimiter until the end of the string.
- *
- * The search for @delimiter in @p strings is case sensitive.
- * If the @p count is 0, every row in the output column will be null.
- * If the row value of @p strings is null, the row value in the output column will be null.
- * If the @p delimiter is invalid or null, every row in the output column will be null.
- * If the @p delimiter or the column value for a row is empty, the row value in the output
- * column will be empty.
- * If @p count occurrences of @p delimiter isn't found, the row value in the output column will
- * be the row value from the input @p strings column.
- *
- * @code{.pseudo}
- * Example:
- * in_s = ['www.nvidia.com', null, 'www.google.com', '', 'foo' ]
- * r = substring_index(in_s, '.', 1)
- * r is ['www', null, 'www', '', 'foo']
- *
- * in_s = ['www.nvidia.com', null, 'www.google.com', '', 'foo' ]
- * r = substring_index(in_s, '.', -2)
- * r is ['nvidia.com', null, 'google.com', '', 'foo']
- * @endcode
- *
- * @param strings Strings instance for this operation.
- * @param delimiter UTF-8 encoded string to search for in each string.
- * @param count Number of times to search for delimiter in each string. If the value is positive,
- *              forward search of delimiter is performed; else, a backward search is performed.
- * @param mr Resource for allocating device memory.
- * @return New strings column containing the substrings.
- */
-std::unique_ptr<column> substring_index(
-  strings_column_view const& strings,
-  string_scalar const& delimiter,
-  size_type count,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
-
-/**
- * @brief Returns a column of strings that searches the delimiter for each row from
- * @p delimiter_strings @p count number of times in the source @p strings forward if @p count
- * is positive or backwards if @p count is negative. If @p count is positive, it returns a
- * substring from the start of the source @p strings up until @p count occurrence of the
- * delimiter for that row not including that delimiter. If @p count is negative, it returns a
- * substring from the start of the @p count occurrence of the delimiter for that row in the
- * source @p strings past the delimiter until the end of the string.
- *
- * The search for @p delimiter_strings in @p strings is case sensitive.
- * If the @p count is 0, every row in the output column will be null.
- * If the row value of @p strings is null, the row value in the output column will be null.
- * If the row value from @p delimiter_strings is invalid or null, the row value in the
- * output column will be null.
- * If the row value from @p delimiter_strings or the column value for a row is empty, the
- * row value in the output column will be empty.
- * If @p count occurrences of delimiter isn't found, the row value in the output column will
- * be the row value from the input @p strings column.
- *
- * @code{.pseudo}
- * Example:
- * in_s = ['www.nvidia.com', null, 'www.google.com', '', 'foo..bar....goo' ]
- * delimiters = ['.', '..', '', null, '..']
- * r = substring_index(in_s, delimiters, 2)
- * r is ['www.nvidia', null, '', null, 'foo..bar']
- *
- * in_s = ['www.nvidia.com', null, 'www.google.com', '', 'foo..bar....goo', 'apache.org' ]
- * delimiters = ['.', '..', '', null, '..', '.']
- * r = substring_index(in_s, delimiters, -2)
- * r is ['nvidia.com', null, '', null, '..goo', 'apache.org']
- * @endcode
- *
- * @throw cudf::logic_error if the number of rows in @p strings and @delimiter_strings do not match.
- *
- * @param strings Strings instance for this operation.
- * @param delimiter_strings UTF-8 encoded string for each row.
- * @param count Number of times to search for delimiter in each string. If the value is positive,
- *              forward search of delimiter is performed; else, a backward search is performed.
- * @param mr Resource for allocating device memory.
- * @return New strings column containing the substrings.
- */
-std::unique_ptr<column> substring_index(
-  strings_column_view const& strings,
-  strings_column_view const& delimiter_strings,
-  size_type count,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
-
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/include/cudf/strings/substring.hpp b/cpp/include/cudf/strings/substring.hpp
index d438fd4cc29..db27154f7b7 100644
--- a/cpp/include/cudf/strings/substring.hpp
+++ b/cpp/include/cudf/strings/substring.hpp
@@ -104,6 +104,93 @@ std::unique_ptr<column> slice_strings(
   column_view const& stops,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
+/**
+ * @brief Returns a column of strings after searching for @p delimiter @p count number of
+ * times in the source @p strings forward if @p count is positive or backwards if @p count is
+ * negative. If @p count is positive, it returns a substring from the start of the source @p
+ * strings up until @p count occurrence of the @delimiter not including the @p delimiter.
+ * If @p count is negative, it returns a substring from the start of the @p count occurrence of
+ * the @delimiter in the source @p strings past the delimiter until the end of the string.
+ *
+ * The search for @delimiter in @p strings is case sensitive.
+ * If the row value of @p strings is null, the row value in the output column will be null.
+ * If the @p count is 0 or if @p delimiter is invalid, output column will be an empty string.
+ * If the @p delimiter or the column value for a row is empty, the row value in the output
+ * column will be empty.
+ * If @p count occurrences of @p delimiter isn't found, the row value in the output column will
+ * be the row value from the input @p strings column.
+ *
+ * @code{.pseudo}
+ * Example:
+ * in_s = ['www.nvidia.com', null, 'www.google.com', '', 'foo']
+ * r = slice_strings(in_s, '.', 1)
+ * r =    ['www',            null, 'www',            '', 'foo']
+ *
+ * in_s = ['www.nvidia.com', null, 'www.google.com', '', 'foo']
+ * r = slice_strings(in_s, '.', -2)
+ * r =    ['nvidia.com',     null, 'google.com',     '', 'foo']
+ * @endcode
+ *
+ * @param strings Strings instance for this operation.
+ * @param delimiter UTF-8 encoded string to search for in each string.
+ * @param count Number of times to search for delimiter in each string. If the value is positive,
+ *              forward search of delimiter is performed; else, a backward search is performed.
+ * @param mr Resource for allocating device memory.
+ * @return New strings column containing the substrings.
+ */
+std::unique_ptr<column> slice_strings(
+  strings_column_view const& strings,
+  string_scalar const& delimiter,
+  size_type count,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
+/**
+ * @brief Returns a column of strings after searching the delimiter defined per row from
+ * @p delimiter_strings @p count number of times in the source @p strings forward if @p count
+ * is positive or backwards if @p count is negative. If @p count is positive, it returns a
+ * substring from the start of the source @p strings up until @p count occurrence of the
+ * delimiter for that row not including that delimiter. If @p count is negative, it returns a
+ * substring from the start of the @p count occurrence of the delimiter for that row in the
+ * source @p strings past the delimiter until the end of the string.
+ *
+ * The search for @p delimiter_strings in @p strings is case sensitive.
+ * If the @p count is 0, every row in the output column will be an empty string.
+ * If the row value of @p strings is null, the row value in the output column will be null.
+ * If the row value from @p delimiter_strings is invalid or null, the row value in the
+ * output column will an empty string.
+ * If the row value from @p delimiter_strings or the column value for a row is empty, the
+ * row value in the output column will be empty.
+ * If @p count occurrences of delimiter isn't found, the row value in the output column will
+ * be the row value from the input @p strings column.
+ *
+ * @code{.pseudo}
+ * Example:
+ * in_s =       ['www.nvidia.com', null, 'www.google.com', 'bar', 'foo..bar....goo']
+ * delimiters = ['.',              '..', '',               null,  '..']
+ * r = slice_strings(in_s, delimiters, 2)
+ * r =          ['www.nvidia',     null, '',               '',   'foo..bar']
+ *
+ * in_s =       ['www.nvidia.com', null, 'www.google.com', '',  'foo..bar....goo', 'apache.org']
+ * delimiters = ['.',              '..', '',               null,'..',              '.']
+ * r = slice_strings(in_s, delimiters, -2)
+ * r =          ['nvidia.com',     null, '',               '',  '..goo',           'apache.org']
+ * @endcode
+ *
+ * @throw cudf::logic_error if the number of rows in @p strings and @delimiter_strings do not match.
+ *
+ * @param strings Strings instance for this operation.
+ * @param delimiter_strings UTF-8 encoded string for each row.
+ * @param count Number of times to search for delimiter in each string. If the value is positive,
+ *              forward search of delimiter is performed; else, a backward search is performed.
+ * @param mr Resource for allocating device memory.
+ * @return New strings column containing the substrings.
+ */
+std::unique_ptr<column> slice_strings(
+  strings_column_view const& strings,
+  strings_column_view const& delimiter_strings,
+  size_type count,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
+
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/src/strings/find.cu b/cpp/src/strings/find.cu
index a55a179daf8..c791f8f7ab2 100644
--- a/cpp/src/strings/find.cu
+++ b/cpp/src/strings/find.cu
@@ -16,9 +16,7 @@
 
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/scalar/scalar_device_view.cuh>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/find.hpp>
@@ -288,154 +286,5 @@ std::unique_ptr<column> ends_with(strings_column_view const& strings,
   return detail::ends_with(strings, target, mr);
 }
 
-// For substring_index APIs
-namespace detail {
-// Internal helper class
-namespace {
-
-struct substring_index_functor {
-  template <typename ColItrT, typename DelimiterItrT>
-  std::unique_ptr<column> operator()(ColItrT const col_itr,
-                                     DelimiterItrT const delim_itr,
-                                     size_type delimiter_count,
-                                     rmm::mr::device_memory_resource* mr,
-                                     cudaStream_t stream,
-                                     size_type strings_count) const
-  {
-    // Shallow copy of the resultant strings
-    rmm::device_vector<string_view> out_col_strings(strings_count);
-
-    // Invalid output column strings - null rows
-    string_view const invalid_str{nullptr, 0};
-
-    thrust::transform(
-      rmm::exec_policy(stream)->on(stream),
-      col_itr,
-      col_itr + strings_count,
-      delim_itr,
-      out_col_strings.data().get(),
-      [delimiter_count, invalid_str] __device__(auto col_val_pair, auto delim_val_pair) {
-        // If the column value for this row or the delimiter is null or if the delimiter count is 0,
-        // result is null
-        if (!col_val_pair.second || !delim_val_pair.second || delimiter_count == 0)
-          return invalid_str;
-        auto col_val = col_val_pair.first;
-
-        // If the global delimiter or the row specific delimiter or if the column value for the row
-        // is empty, value is empty.
-        if (delim_val_pair.first.empty() || col_val.empty()) return string_view{};
-
-        auto delim_val = delim_val_pair.first;
-
-        auto const col_val_len   = col_val.length();
-        auto const delimiter_len = delim_val.length();
-
-        auto nsearches      = (delimiter_count < 0) ? -delimiter_count : delimiter_count;
-        size_type start_pos = 0;
-        size_type end_pos   = col_val_len;
-        string_view out_str{};
-
-        for (auto i = 0; i < nsearches; ++i) {
-          if (delimiter_count < 0) {
-            end_pos = col_val.rfind(delim_val, 0, end_pos);
-            if (end_pos == -1) {
-              out_str = col_val;
-              break;
-            }
-            if (i + 1 == nsearches)
-              out_str =
-                col_val.substr(end_pos + delimiter_len, col_val_len - end_pos - delimiter_len);
-          } else {
-            auto char_pos = col_val.find(delim_val, start_pos);
-            if (char_pos == -1) {
-              out_str = col_val;
-              break;
-            }
-            if (i + 1 == nsearches)
-              out_str = col_val.substr(0, char_pos);
-            else
-              start_pos = char_pos + delimiter_len;
-          }
-        }
-
-        return out_str.empty() ? string_view{} : out_str;
-      });
-
-    // Create an output column with the resultant strings
-    return make_strings_column(out_col_strings, invalid_str, stream, mr);
-  }
-};
-
-}  // namespace
-
-template <typename DelimiterItrT>
-std::unique_ptr<column> substring_index(strings_column_view const& strings,
-                                        DelimiterItrT const delimiter_itr,
-                                        size_type count,
-                                        rmm::mr::device_memory_resource* mr,
-                                        cudaStream_t stream = 0)
-{
-  auto strings_count = strings.size();
-  // If there aren't any rows, return an empty strings column
-  if (strings_count == 0) return strings::detail::make_empty_strings_column(mr, stream);
-
-  // Create device view of the column
-  auto colview_ptr = column_device_view::create(strings.parent(), stream);
-  auto colview     = *colview_ptr;
-  if (colview.nullable()) {
-    return substring_index_functor{}(cudf::detail::make_pair_iterator<string_view, true>(colview),
-                                     delimiter_itr,
-                                     count,
-                                     mr,
-                                     stream,
-                                     strings_count);
-  } else {
-    return substring_index_functor{}(cudf::detail::make_pair_iterator<string_view, false>(colview),
-                                     delimiter_itr,
-                                     count,
-                                     mr,
-                                     stream,
-                                     strings_count);
-  }
-}
-
-}  // namespace detail
-
-// external APIs
-
-std::unique_ptr<column> substring_index(strings_column_view const& strings,
-                                        string_scalar const& delimiter,
-                                        size_type count,
-                                        rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FUNC_RANGE();
-  return detail::substring_index(
-    strings, cudf::detail::make_pair_iterator<string_view>(delimiter), count, mr);
-}
-
-std::unique_ptr<column> substring_index(strings_column_view const& strings,
-                                        strings_column_view const& delimiters,
-                                        size_type count,
-                                        rmm::mr::device_memory_resource* mr)
-{
-  CUDF_EXPECTS(strings.size() == delimiters.size(),
-               "Strings and delimiters column sizes do not match");
-
-  CUDF_FUNC_RANGE();
-  auto delimiters_dev_view_ptr = cudf::column_device_view::create(delimiters.parent(), 0);
-  auto delimiters_dev_view     = *delimiters_dev_view_ptr;
-  return (delimiters_dev_view.nullable())
-           ? detail::substring_index(
-               strings,
-               cudf::detail::make_pair_iterator<string_view, true>(delimiters_dev_view),
-               count,
-               mr)
-           : detail::substring_index(
-               strings,
-               cudf::detail::make_pair_iterator<string_view, false>(delimiters_dev_view),
-               count,
-               mr);
-}
-
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu
index c4fa4a39297..13b7a965953 100644
--- a/cpp/src/strings/substring.cu
+++ b/cpp/src/strings/substring.cu
@@ -16,6 +16,7 @@
 
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
 #include <cudf/strings/detail/utilities.hpp>
@@ -198,37 +199,28 @@ struct substring_from_fn {
   }
 };
 
-/**
- * Called by the type-dispatcher for resolving the position columns
- * (starts_column and stops_column) to actual types.
- */
 struct dispatch_substring_from_fn {
   /**
    * @brief Returns strings column with substrings based on the ranges in the
    * individual starts and stops column position values.
    */
-  template <typename PositionType,
-            std::enable_if_t<std::is_integral<PositionType>::value>* = nullptr>
-  std::unique_ptr<column> operator()(strings_column_view const& strings,
-                                     column_view const& starts_column,
-                                     column_view const& stops_column,
+  template <typename PositionType>
+  std::unique_ptr<column> operator()(column_device_view const& d_column,
+                                     size_type null_count,
+                                     PositionType const* starts,
+                                     PositionType const* stops,
                                      rmm::mr::device_memory_resource* mr,
                                      cudaStream_t stream) const
   {
-    const PositionType* starts = starts_column.data<PositionType>();
-    const PositionType* stops  = stops_column.data<PositionType>();
+    auto strings_count = d_column.size();
 
-    auto strings_count  = strings.size();
-    auto strings_column = column_device_view::create(strings.parent(), stream);
-    auto d_column       = *strings_column;
-
-    // copy the null mask
+    // Copy the null mask
     rmm::device_buffer null_mask;
-    size_type null_count = strings.null_count();
     if (d_column.nullable())
       null_mask = rmm::device_buffer(
         d_column.null_mask(), cudf::bitmask_allocation_size_bytes(strings_count), stream, mr);
-    // build offsets column
+
+    // Build offsets column
     auto offsets_transformer_itr =
       thrust::make_transform_iterator(thrust::make_counting_iterator<PositionType>(0),
                                       substring_from_fn<PositionType>{d_column, starts, stops});
@@ -237,7 +229,7 @@ struct dispatch_substring_from_fn {
     auto offsets_view  = offsets_column->view();
     auto d_new_offsets = offsets_view.template data<int32_t>();
 
-    // build chars column
+    // Build chars column
     cudf::size_type bytes = thrust::device_pointer_cast(d_new_offsets)[strings_count];
     auto chars_column     = cudf::strings::detail::create_chars_child_column(
       strings_count, null_count, bytes, mr, stream);
@@ -248,7 +240,7 @@ struct dispatch_substring_from_fn {
       thrust::make_counting_iterator<cudf::size_type>(0),
       strings_count,
       substring_from_fn<PositionType, ExecuteOp>{d_column, starts, stops, d_new_offsets, d_chars});
-    //
+
     return make_strings_column(strings_count,
                                std::move(offsets_column),
                                std::move(chars_column),
@@ -257,30 +249,114 @@ struct dispatch_substring_from_fn {
                                stream,
                                mr);
   }
-  //
+};
+
+// This functor is invoked to compute the substrings using start and end positional indices.
+// The type used to store the indices is inferred by invoking this functor through the
+// type dispatcher.
+struct compute_substrings {
+  template <typename PositionType,
+            std::enable_if_t<std::is_integral<PositionType>::value>* = nullptr>
+  std::unique_ptr<column> operator()(column_device_view const& d_column,
+                                     size_type null_count,
+                                     column_view const& starts_column,
+                                     column_view const& stops_column,
+                                     rmm::mr::device_memory_resource* mr,
+                                     cudaStream_t stream) const
+  {
+    return dispatch_substring_from_fn{}(d_column,
+                                        null_count,
+                                        starts_column.data<PositionType>(),
+                                        stops_column.data<PositionType>(),
+                                        mr,
+                                        stream);
+  }
+
   template <typename PositionType,
             std::enable_if_t<not std::is_integral<PositionType>::value>* = nullptr>
-  std::unique_ptr<column> operator()(strings_column_view const&,
-                                     column_view const&,
-                                     column_view const&,
-                                     rmm::mr::device_memory_resource*,
-                                     cudaStream_t) const
+  std::unique_ptr<column> operator()(column_device_view const& d_column,
+                                     size_type null_count,
+                                     column_view const& starts_column,
+                                     column_view const& stops_column,
+                                     rmm::mr::device_memory_resource* mr,
+                                     cudaStream_t stream) const
   {
     CUDF_FAIL("Positions values must be an integral type.");
   }
 };
 
-template <>
-std::unique_ptr<column> dispatch_substring_from_fn::operator()<bool>(
-  strings_column_view const&,
-  column_view const&,
-  column_view const&,
-  rmm::mr::device_memory_resource*,
-  cudaStream_t) const
-{
-  CUDF_FAIL("Positions values must not be bool type.");
-}
+// When slice_strings is invoked with a delimiter string and a delimiter count, we need to
+// compute the start and end indices of the substring. This functor accomplishes that.
+struct compute_substring_indices {
+  template <typename DelimiterItrT>
+  void operator()(column_device_view const& d_column,
+                  DelimiterItrT const delim_itr,
+                  size_type delimiter_count,
+                  size_type* start_char_pos,
+                  size_type* end_char_pos,
+                  rmm::mr::device_memory_resource* mr,
+                  cudaStream_t stream) const
+  {
+    auto strings_count = d_column.size();
+
+    thrust::for_each_n(
+      rmm::exec_policy(stream)->on(stream),
+      thrust::make_counting_iterator<size_type>(0),
+      strings_count,
+      [delim_itr, delimiter_count, start_char_pos, end_char_pos, d_column] __device__(
+        size_type idx) {
+        // If the column value for this row is null, result is null.
+        // If the delimiter count is 0, result is empty string.
+        if (d_column.is_null(idx) || !delimiter_count) { return; }
 
+        auto const& delim_val_pair = delim_itr[idx];
+        auto const& col_val        = d_column.element<string_view>(idx);
+
+        // If the global delimiter or the row specific delimiter is invalid or if it is empty, row
+        // value is empty.
+        // If the column value for the row is empty, the row value is empty.
+        if (!delim_val_pair.second || delim_val_pair.first.empty() || col_val.empty()) { return; }
+
+        auto const& delim_val = delim_val_pair.first;
+
+        auto const col_val_len   = col_val.length();
+        auto const delimiter_len = delim_val.length();
+
+        auto nsearches      = (delimiter_count < 0) ? -delimiter_count : delimiter_count;
+        size_type start_pos = 0;
+        size_type end_pos   = col_val_len;
+
+        for (auto i = 0; i < nsearches; ++i) {
+          if (delimiter_count < 0) {
+            end_pos = col_val.rfind(delim_val, 0, end_pos);
+            if (end_pos == -1) {
+              start_char_pos[idx] = 0;
+              end_char_pos[idx]   = col_val_len;
+              return;
+            }
+            if (i + 1 == nsearches) {
+              start_char_pos[idx] = end_pos + delimiter_len;
+              end_char_pos[idx]   = col_val_len;
+              return;
+            }
+          } else {
+            auto char_pos = col_val.find(delim_val, start_pos);
+            if (char_pos == -1) {
+              start_char_pos[idx] = 0;
+              end_char_pos[idx]   = col_val_len;
+              return;
+            }
+            if (i + 1 == nsearches) {
+              start_char_pos[idx] = 0;
+              end_char_pos[idx]   = char_pos;
+              return;
+            } else
+              start_pos = char_pos + delimiter_len;
+          }
+        }
+      });
+  }
+};
 }  // namespace
 
 //
@@ -301,17 +377,51 @@ std::unique_ptr<column> slice_strings(
                "Parameters starts and stops must be of the same type.");
   CUDF_EXPECTS(starts_column.null_count() == 0, "Parameter starts must not contain nulls.");
   CUDF_EXPECTS(stops_column.null_count() == 0, "Parameter stops must not contain nulls.");
+  CUDF_EXPECTS(starts_column.type().id() != data_type{BOOL8}.id(),
+               "Positions values must not be bool type.");
+  CUDF_EXPECTS(is_fixed_width(starts_column.type()), "Positions values must be an integral type.");
 
+  auto strings_column = column_device_view::create(strings.parent(), stream);
+  auto d_column       = *strings_column;
   // perhaps another candidate for index-normalizer
   return cudf::type_dispatcher(starts_column.type(),
-                               dispatch_substring_from_fn{},
-                               strings,
+                               compute_substrings{},
+                               d_column,
+                               strings.null_count(),
                                starts_column,
                                stops_column,
                                mr,
                                stream);
 }
 
+template <typename DelimiterItrT>
+std::unique_ptr<column> slice_strings(strings_column_view const& strings,
+                                      DelimiterItrT const delimiter_itr,
+                                      size_type count,
+                                      rmm::mr::device_memory_resource* mr,
+                                      cudaStream_t stream = 0)
+{
+  auto strings_count = strings.size();
+  // If there aren't any rows, return an empty strings column
+  if (strings_count == 0) return strings::detail::make_empty_strings_column(mr, stream);
+
+  // Compute the substring indices first
+  rmm::device_vector<size_type> start_char_pos_vec(strings_count, 0);
+  rmm::device_vector<size_type> end_char_pos_vec(strings_count, 0);
+  auto* start_char_pos = start_char_pos_vec.data().get();
+  auto* end_char_pos   = end_char_pos_vec.data().get();
+
+  auto strings_column = column_device_view::create(strings.parent(), stream);
+  auto d_column       = *strings_column;
+  // Compute the substring indices first
+  compute_substring_indices{}(
+    d_column, delimiter_itr, count, start_char_pos, end_char_pos, mr, stream);
+
+  // Extract the substrings using the indices next
+  return dispatch_substring_from_fn{}(
+    d_column, strings.null_count(), start_char_pos, end_char_pos, mr, stream);
+}
+
 }  // namespace detail
 
 // external API
@@ -325,5 +435,39 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
   return detail::slice_strings(strings, starts_column, stops_column, mr);
 }
 
+std::unique_ptr<column> slice_strings(strings_column_view const& strings,
+                                      string_scalar const& delimiter,
+                                      size_type count,
+                                      rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::slice_strings(
+    strings, cudf::detail::make_pair_iterator<string_view>(delimiter), count, mr, nullptr);
+}
+
+std::unique_ptr<column> slice_strings(strings_column_view const& strings,
+                                      strings_column_view const& delimiters,
+                                      size_type count,
+                                      rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS(strings.size() == delimiters.size(),
+               "Strings and delimiters column sizes do not match");
+
+  CUDF_FUNC_RANGE();
+  auto delimiters_dev_view_ptr = cudf::column_device_view::create(delimiters.parent(), 0);
+  auto delimiters_dev_view     = *delimiters_dev_view_ptr;
+  return (delimiters_dev_view.nullable())
+           ? detail::slice_strings(
+               strings,
+               cudf::detail::make_pair_iterator<string_view, true>(delimiters_dev_view),
+               count,
+               mr)
+           : detail::slice_strings(
+               strings,
+               cudf::detail::make_pair_iterator<string_view, false>(delimiters_dev_view),
+               count,
+               mr);
+}
+
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/tests/strings/find_tests.cpp b/cpp/tests/strings/find_tests.cpp
index ab29a06c1d0..6a0d39757f0 100644
--- a/cpp/tests/strings/find_tests.cpp
+++ b/cpp/tests/strings/find_tests.cpp
@@ -20,7 +20,6 @@
 #include <cudf/strings/find.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
-#include <tests/strings/utilities.h>
 #include <tests/utilities/base_fixture.hpp>
 #include <tests/utilities/column_utilities.hpp>
 #include <tests/utilities/column_wrapper.hpp>
@@ -216,321 +215,3 @@ TEST_P(FindParmsTest, Find)
 INSTANTIATE_TEST_CASE_P(StringsFindTest,
                         FindParmsTest,
                         testing::ValuesIn(std::array<int32_t, 4>{0, 1, 2, 3}));
-
-struct StringsSubstringIndexWithScalarTest : public cudf::test::BaseFixture {
-};
-
-TEST_F(StringsSubstringIndexWithScalarTest, ZeroSizeStringsColumn)
-{
-  cudf::column_view col0(cudf::data_type{cudf::STRING}, 0, nullptr, nullptr, 0);
-  auto strings_view = cudf::strings_column_view(col0);
-
-  auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("foo"), 1);
-  cudf::test::expect_strings_empty(results->view());
-}
-
-TEST_F(StringsSubstringIndexWithScalarTest, AllEmpty)
-{
-  auto strings_col  = cudf::test::strings_column_wrapper({"", "", "", "", ""});
-  auto strings_view = cudf::strings_column_view(strings_col);
-
-  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", ""});
-
-  auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("e"), -1);
-  cudf::test::expect_columns_equal(*results, exp_results, true);
-}
-
-TEST_F(StringsSubstringIndexWithScalarTest, EmptyDelimiter)
-{
-  auto strings_col = cudf::test::strings_column_wrapper(
-    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
-  ;
-  auto strings_view = cudf::strings_column_view(strings_col);
-
-  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
-                                                        {true, true, false, true, true, true});
-  auto results     = cudf::strings::substring_index(strings_view, cudf::string_scalar(""), 1);
-  cudf::test::expect_columns_equal(*results, exp_results, true);
-}
-
-TEST_F(StringsSubstringIndexWithScalarTest, ZeroCount)
-{
-  auto strings_col = cudf::test::strings_column_wrapper(
-    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
-  ;
-  auto strings_view = cudf::strings_column_view(strings_col);
-
-  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
-                                                        {false, false, false, false, false, false});
-
-  auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("é"), 0);
-  cudf::test::expect_columns_equal(*results, exp_results, true);
-}
-
-TEST_F(StringsSubstringIndexWithScalarTest, SearchDelimiter)
-{
-  auto strings_col = cudf::test::strings_column_wrapper(
-    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
-  ;
-  auto strings_view = cudf::strings_column_view(strings_col);
-
-  {
-    auto exp_results = cudf::test::strings_column_wrapper({"H", "thes", "", "lease", "t", ""},
-                                                          {true, true, false, true, true, true});
-
-    auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("é"), 1);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-
-  {
-    auto exp_results = cudf::test::strings_column_wrapper(
-      {"llo", "", "", "lease", "st strings", ""}, {true, true, false, true, true, true});
-
-    auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("é"), -1);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-
-  {
-    auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("é"), 2);
-    cudf::test::expect_columns_equal(*results, strings_view.parent(), true);
-  }
-
-  {
-    auto results = cudf::strings::substring_index(strings_view, cudf::string_scalar("é"), -2);
-    cudf::test::expect_columns_equal(*results, strings_view.parent(), true);
-  }
-
-  {
-    auto col0 = cudf::test::strings_column_wrapper(
-      {"Hello LLollooogh", "oopppllo", "", "oppollo", "polo lop apploo po", ""},
-      {true, true, false, true, true, true});
-
-    auto exp_results = cudf::test::strings_column_wrapper({"Hello LL", "o", "", "opp", "pol", ""},
-                                                          {true, true, false, true, true, true});
-
-    auto results =
-      cudf::strings::substring_index(cudf::strings_column_view{col0}, cudf::string_scalar("o"), 2);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-
-  {
-    auto col0 = cudf::test::strings_column_wrapper(
-      {"Hello LLollooogh", "oopppllo", "", "oppollo", "polo lop apploo po", ""},
-      {true, true, false, true, true, true});
-
-    auto exp_results = cudf::test::strings_column_wrapper({"ogh", "pppllo", "", "llo", " po", ""},
-                                                          {true, true, false, true, true, true});
-
-    auto results =
-      cudf::strings::substring_index(cudf::strings_column_view{col0}, cudf::string_scalar("o"), -2);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-
-  {
-    auto col0 = cudf::test::strings_column_wrapper(
-      {"Héllo HélloHéllo", "Hélloééééé", "", "éééééé", "poloéé lopéé applooéé po", ""},
-      {true, true, false, true, true, true});
-
-    auto exp_results = cudf::test::strings_column_wrapper(
-      {"Héllo HélloHéllo", "Hélloééééé", "", "éééé", "poloéé lopéé apploo", ""},
-      {true, true, false, true, true, true});
-
-    auto results =
-      cudf::strings::substring_index(cudf::strings_column_view{col0}, cudf::string_scalar("éé"), 3);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-
-  {
-    auto col0 = cudf::test::strings_column_wrapper(
-      {"Héllo HélloHéllo", "Hélloééééé", "", "éééééé", "poloéé lopéé applooéé po", ""},
-      {true, true, false, true, true, true});
-
-    auto exp_results = cudf::test::strings_column_wrapper(
-      {"Héllo HélloHéllo", "Hélloééééé", "", "éééé", " lopéé applooéé po", ""},
-      {true, true, false, true, true, true});
-
-    auto results = cudf::strings::substring_index(
-      cudf::strings_column_view{col0}, cudf::string_scalar("éé"), -3);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-
-  {
-    auto col0 = cudf::test::strings_column_wrapper({"www.yahoo.com",
-                                                    "www.apache..org",
-                                                    "tennis...com",
-                                                    "nvidia....com",
-                                                    "google...........com",
-                                                    "microsoft...c.....co..m"});
-
-    auto exp_results = cudf::test::strings_column_wrapper(
-      {"www.yahoo.com", "www.apache.", "tennis..", "nvidia..", "google..", "microsoft.."});
-
-    auto results =
-      cudf::strings::substring_index(cudf::strings_column_view{col0}, cudf::string_scalar("."), 3);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-
-  {
-    auto col0 = cudf::test::strings_column_wrapper({"www.yahoo.com",
-                                                    "www.apache..org",
-                                                    "tennis..com",
-                                                    "nvidia....com",
-                                                    "google...........com",
-                                                    ".",
-                                                    "microsoft...c.....co..m"});
-
-    auto exp_results = cudf::test::strings_column_wrapper(
-      {"www.yahoo.com", "www.apache..org", "tennis..com", "..com", "..com", ".", "co..m"});
-
-    auto results = cudf::strings::substring_index(
-      cudf::strings_column_view{col0}, cudf::string_scalar(".."), -2);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-}
-
-struct StringsSubstringIndexWithColumnTest : public cudf::test::BaseFixture {
-};
-
-TEST_F(StringsSubstringIndexWithColumnTest, ZeroSizeStringsColumn)
-{
-  cudf::column_view col0(cudf::data_type{cudf::STRING}, 0, nullptr, nullptr, 0);
-  auto strings_view = cudf::strings_column_view(col0);
-
-  auto results = cudf::strings::substring_index(strings_view, strings_view, 1);
-  // Check empty column
-  cudf::test::expect_strings_empty(results->view());
-}
-
-TEST_F(StringsSubstringIndexWithColumnTest, GenerateExceptions)
-{
-  auto col0      = cudf::test::strings_column_wrapper({"", "", "", "", ""});
-  auto delim_col = cudf::test::strings_column_wrapper({"", "foo", "bar", "."});
-
-  EXPECT_THROW(cudf::strings::substring_index(
-                 cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1),
-               cudf::logic_error);
-}
-
-TEST_F(StringsSubstringIndexWithColumnTest, ColumnAllEmpty)
-{
-  auto col0      = cudf::test::strings_column_wrapper({"", "", "", "", ""});
-  auto delim_col = cudf::test::strings_column_wrapper({"", "foo", "bar", ".", "/"});
-
-  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", ""});
-
-  auto results = cudf::strings::substring_index(
-    cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1);
-  cudf::test::expect_columns_equal(*results, exp_results, true);
-}
-
-TEST_F(StringsSubstringIndexWithColumnTest, DelimiterAllEmptyAndInvalid)
-{
-  auto col0 = cudf::test::strings_column_wrapper(
-    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
-  auto delim_col = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
-                                                      {true, false, true, false, true, false});
-
-  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
-                                                        {true, false, false, false, true, false});
-
-  auto results = cudf::strings::substring_index(
-    cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 1);
-  cudf::test::expect_columns_equal(*results, exp_results, true);
-}
-
-TEST_F(StringsSubstringIndexWithColumnTest, ZeroDelimiterCount)
-{
-  auto col0 = cudf::test::strings_column_wrapper(
-    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
-  auto delim_col = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
-                                                      {true, false, true, false, true, false});
-
-  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
-                                                        {false, false, false, false, false, false});
-
-  auto results = cudf::strings::substring_index(
-    cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 0);
-  cudf::test::expect_columns_equal(*results, exp_results, true);
-}
-
-TEST_F(StringsSubstringIndexWithColumnTest, SearchDelimiter)
-{
-  {
-    auto col0 = cudf::test::strings_column_wrapper(
-      {"H™élloi ™◎oo™ff™", "thesé", "", "lease™", "tést strings", "™"},
-      {true, true, false, true, true, true});
-    auto delim_col = cudf::test::strings_column_wrapper({"™", "™", "", "e", "t", "™"});
-
-    auto exp_results = cudf::test::strings_column_wrapper({"H", "thesé", "", "l", "", ""},
-                                                          {true, true, false, true, true, true});
-
-    auto results = cudf::strings::substring_index(
-      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 1);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-
-  {
-    auto col0      = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi ™◎ooﬀ™ff™",
-                                                    "tﬀﬀhﬀesé",
-                                                    "",
-                                                    "lﬀ fooﬀ ffﬀ eaﬀse™",
-                                                    "tést ﬀstri.nﬀgs",
-                                                    "ﬀﬀ ™ ﬀﬀ ﬀ"},
-                                                   {true, true, false, true, true, true});
-    auto delim_col = cudf::test::strings_column_wrapper({"ﬀ™", "ﬀ", "", "ﬀ ", "t", "ﬀ ™"});
-
-    auto exp_results = cudf::test::strings_column_wrapper(
-      {"ff™", "esé", "", "eaﬀse™", "ri.nﬀgs", " ﬀﬀ ﬀ"}, {true, true, false, true, true, true});
-
-    auto results = cudf::strings::substring_index(
-      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-
-  {
-    auto col0 = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi fooﬀ™ barﬀ™ gooﬀ™ ™◎ooﬀ™ff™",
-                                                    "tﬀﬀhﬀesé",
-                                                    "",
-                                                    "lﬀ fooﬀ ffﬀ eaﬀse™",
-                                                    "tést ﬀ™ffﬀ™ﬀ™ffﬀstri.ﬀ™ffﬀ™nﬀgs",
-                                                    "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"},
-                                                   {true, true, false, true, true, true});
-    auto delim_col = cudf::test::strings_column_wrapper({"ﬀ™", "ﬀ", "", "e ", "ﬀ™ff", "ﬀ™ﬀ™"},
-                                                        {true, true, false, true, true, true});
-
-    auto exp_results = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi fooﬀ™ barﬀ™ goo",
-                                                           "tﬀﬀh",
-                                                           "",
-                                                           "lﬀ fooﬀ ffﬀ eaﬀse™",
-                                                           "tést ﬀ™ffﬀ™ﬀ™ffﬀstri.",
-                                                           "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"},
-                                                          {true, true, false, true, true, true});
-
-    auto results = cudf::strings::substring_index(
-      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 3);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-
-  {
-    auto col0 = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi fooﬀ™ barﬀ™ gooﬀ™ ™◎ooﬀ™ff™",
-                                                    "tﬀﬀhﬀesé",
-                                                    "",
-                                                    "lﬀ fooﬀ ffﬀ eaﬀse™",
-                                                    "tést ﬀ™ffﬀ™ﬀ™ffﬀstri.ﬀ™ffﬀ™nﬀgs",
-                                                    "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"});
-    auto delim_col = cudf::test::strings_column_wrapper({"ﬀ™", "ﬀ", "", "e ", "ﬀ™ff", "ﬀ™ﬀ™"},
-                                                        {true, true, false, true, true, true});
-
-    auto exp_results = cudf::test::strings_column_wrapper({" gooﬀ™ ™◎ooﬀ™ff™",
-                                                           "ﬀhﬀesé",
-                                                           "",
-                                                           "lﬀ fooﬀ ffﬀ eaﬀse™",
-                                                           "ﬀ™ﬀ™ffﬀstri.ﬀ™ffﬀ™nﬀgs",
-                                                           "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"},
-                                                          {true, true, false, true, true, true});
-
-    auto results = cudf::strings::substring_index(
-      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -3);
-    cudf::test::expect_columns_equal(*results, exp_results, true);
-  }
-}
diff --git a/cpp/tests/strings/substring_tests.cpp b/cpp/tests/strings/substring_tests.cpp
index 99b4df39a87..5de87592033 100644
--- a/cpp/tests/strings/substring_tests.cpp
+++ b/cpp/tests/strings/substring_tests.cpp
@@ -270,3 +270,320 @@ TEST_F(StringsSubstringsTest, Error)
   auto strings_column = cudf::strings_column_view(strings);
   EXPECT_THROW(cudf::strings::slice_strings(strings_column, 0, 0, 0), cudf::logic_error);
 }
+
+struct StringsSubstringsScalarDelimiterTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(StringsSubstringsScalarDelimiterTest, ZeroSizeStringsColumn)
+{
+  cudf::column_view col0(cudf::data_type{cudf::STRING}, 0, nullptr, nullptr, 0);
+  auto strings_view = cudf::strings_column_view(col0);
+
+  auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("foo"), 1);
+  cudf::test::expect_strings_empty(results->view());
+}
+
+TEST_F(StringsSubstringsScalarDelimiterTest, AllEmpty)
+{
+  auto strings_col  = cudf::test::strings_column_wrapper({"", "", "", "", ""});
+  auto strings_view = cudf::strings_column_view(strings_col);
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", ""});
+
+  auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("e"), -1);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringsScalarDelimiterTest, EmptyDelimiter)
+{
+  auto strings_col = cudf::test::strings_column_wrapper(
+    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
+  ;
+  auto strings_view = cudf::strings_column_view(strings_col);
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                        {true, true, false, true, true, true});
+  auto results     = cudf::strings::slice_strings(strings_view, cudf::string_scalar(""), 1);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringsScalarDelimiterTest, ZeroCount)
+{
+  auto strings_col = cudf::test::strings_column_wrapper(
+    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
+  ;
+  auto strings_view = cudf::strings_column_view(strings_col);
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                        {true, true, false, true, true, true});
+
+  auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), 0);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringsScalarDelimiterTest, SearchDelimiter)
+{
+  auto strings_col = cudf::test::strings_column_wrapper(
+    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
+  ;
+  auto strings_view = cudf::strings_column_view(strings_col);
+
+  {
+    auto exp_results = cudf::test::strings_column_wrapper({"H", "thes", "", "lease", "t", ""},
+                                                          {true, true, false, true, true, true});
+
+    auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), 1);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"llo", "", "", "lease", "st strings", ""}, {true, true, false, true, true, true});
+
+    auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), -1);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), 2);
+    cudf::test::expect_columns_equal(*results, strings_view.parent(), true);
+  }
+
+  {
+    auto results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("é"), -2);
+    cudf::test::expect_columns_equal(*results, strings_view.parent(), true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper(
+      {"Hello LLollooogh", "oopppllo", "", "oppollo", "polo lop apploo po", ""},
+      {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper({"Hello LL", "o", "", "opp", "pol", ""},
+                                                          {true, true, false, true, true, true});
+
+    auto results =
+      cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("o"), 2);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper(
+      {"Hello LLollooogh", "oopppllo", "", "oppollo", "polo lop apploo po", ""},
+      {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper({"ogh", "pppllo", "", "llo", " po", ""},
+                                                          {true, true, false, true, true, true});
+
+    auto results =
+      cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("o"), -2);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper(
+      {"Héllo HélloHéllo", "Hélloééééé", "", "éééééé", "poloéé lopéé applooéé po", ""},
+      {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"Héllo HélloHéllo", "Hélloééééé", "", "éééé", "poloéé lopéé apploo", ""},
+      {true, true, false, true, true, true});
+
+    auto results =
+      cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("éé"), 3);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper(
+      {"Héllo HélloHéllo", "Hélloééééé", "", "éééééé", "poloéé lopéé applooéé po", ""},
+      {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"Héllo HélloHéllo", "Hélloééééé", "", "éééé", " lopéé applooéé po", ""},
+      {true, true, false, true, true, true});
+
+    auto results =
+      cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("éé"), -3);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper({"www.yahoo.com",
+                                                    "www.apache..org",
+                                                    "tennis...com",
+                                                    "nvidia....com",
+                                                    "google...........com",
+                                                    "microsoft...c.....co..m"});
+
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"www.yahoo.com", "www.apache.", "tennis..", "nvidia..", "google..", "microsoft.."});
+
+    auto results =
+      cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar("."), 3);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper({"www.yahoo.com",
+                                                    "www.apache..org",
+                                                    "tennis..com",
+                                                    "nvidia....com",
+                                                    "google...........com",
+                                                    ".",
+                                                    "microsoft...c.....co..m"});
+
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"www.yahoo.com", "www.apache..org", "tennis..com", "..com", "..com", ".", "co..m"});
+
+    auto results =
+      cudf::strings::slice_strings(cudf::strings_column_view{col0}, cudf::string_scalar(".."), -2);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+}
+
+struct StringsSubstringsColumnDelimiterTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(StringsSubstringsColumnDelimiterTest, ZeroSizeStringsColumn)
+{
+  cudf::column_view col0(cudf::data_type{cudf::STRING}, 0, nullptr, nullptr, 0);
+  auto strings_view = cudf::strings_column_view(col0);
+
+  auto results = cudf::strings::slice_strings(strings_view, strings_view, 1);
+  // Check empty column
+  cudf::test::expect_strings_empty(results->view());
+}
+
+TEST_F(StringsSubstringsColumnDelimiterTest, GenerateExceptions)
+{
+  auto col0      = cudf::test::strings_column_wrapper({"", "", "", "", ""});
+  auto delim_col = cudf::test::strings_column_wrapper({"", "foo", "bar", "."});
+
+  EXPECT_THROW(cudf::strings::slice_strings(
+                 cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1),
+               cudf::logic_error);
+}
+
+TEST_F(StringsSubstringsColumnDelimiterTest, ColumnAllEmpty)
+{
+  auto col0      = cudf::test::strings_column_wrapper({"", "", "", "", ""});
+  auto delim_col = cudf::test::strings_column_wrapper({"", "foo", "bar", ".", "/"});
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", ""});
+
+  auto results = cudf::strings::slice_strings(
+    cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringsColumnDelimiterTest, DelimiterAllEmptyAndInvalid)
+{
+  auto col0 = cudf::test::strings_column_wrapper(
+    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
+  auto delim_col = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                      {true, false, true, false, true, false});
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                        {true, true, false, true, true, true});
+
+  auto results = cudf::strings::slice_strings(
+    cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 1);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringsColumnDelimiterTest, ZeroDelimiterCount)
+{
+  auto col0 = cudf::test::strings_column_wrapper(
+    {"Héllo", "thesé", "", "lease", "tést strings", ""}, {true, true, false, true, true, true});
+  auto delim_col = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                      {true, false, true, false, true, false});
+
+  auto exp_results = cudf::test::strings_column_wrapper({"", "", "", "", "", ""},
+                                                        {true, true, false, true, true, true});
+
+  auto results = cudf::strings::slice_strings(
+    cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 0);
+  cudf::test::expect_columns_equal(*results, exp_results, true);
+}
+
+TEST_F(StringsSubstringsColumnDelimiterTest, SearchDelimiter)
+{
+  {
+    auto col0 = cudf::test::strings_column_wrapper(
+      {"H™élloi ™◎oo™ff™", "thesé", "", "lease™", "tést strings", "™"},
+      {true, true, false, true, true, true});
+    auto delim_col = cudf::test::strings_column_wrapper({"™", "™", "", "e", "t", "™"});
+
+    auto exp_results = cudf::test::strings_column_wrapper({"H", "thesé", "", "l", "", ""},
+                                                          {true, true, false, true, true, true});
+
+    auto results = cudf::strings::slice_strings(
+      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 1);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0      = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi ™◎ooﬀ™ff™",
+                                                    "tﬀﬀhﬀesé",
+                                                    "",
+                                                    "lﬀ fooﬀ ffﬀ eaﬀse™",
+                                                    "tést ﬀstri.nﬀgs",
+                                                    "ﬀﬀ ™ ﬀﬀ ﬀ"},
+                                                   {true, true, false, true, true, true});
+    auto delim_col = cudf::test::strings_column_wrapper({"ﬀ™", "ﬀ", "", "ﬀ ", "t", "ﬀ ™"});
+
+    auto exp_results = cudf::test::strings_column_wrapper(
+      {"ff™", "esé", "", "eaﬀse™", "ri.nﬀgs", " ﬀﬀ ﬀ"}, {true, true, false, true, true, true});
+
+    auto results = cudf::strings::slice_strings(
+      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -1);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi fooﬀ™ barﬀ™ gooﬀ™ ™◎ooﬀ™ff™",
+                                                    "tﬀﬀhﬀesé",
+                                                    "",
+                                                    "lﬀ fooﬀ ffﬀ eaﬀse™",
+                                                    "tést ﬀ™ffﬀ™ﬀ™ffﬀstri.ﬀ™ffﬀ™nﬀgs",
+                                                    "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"},
+                                                   {true, true, false, true, true, true});
+    auto delim_col = cudf::test::strings_column_wrapper({"ﬀ™", "ﬀ", "", "e ", "ﬀ™ff", "ﬀ™ﬀ™"},
+                                                        {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi fooﬀ™ barﬀ™ goo",
+                                                           "tﬀﬀh",
+                                                           "",
+                                                           "lﬀ fooﬀ ffﬀ eaﬀse™",
+                                                           "tést ﬀ™ffﬀ™ﬀ™ffﬀstri.",
+                                                           "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"},
+                                                          {true, true, false, true, true, true});
+
+    auto results = cudf::strings::slice_strings(
+      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, 3);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+
+  {
+    auto col0 = cudf::test::strings_column_wrapper({"H™élloﬀ ﬀﬀi fooﬀ™ barﬀ™ gooﬀ™ ™◎ooﬀ™ff™",
+                                                    "tﬀﬀhﬀesé",
+                                                    "",
+                                                    "lﬀ fooﬀ ffﬀ eaﬀse™",
+                                                    "tést ﬀ™ffﬀ™ﬀ™ffﬀstri.ﬀ™ffﬀ™nﬀgs",
+                                                    "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"});
+    auto delim_col = cudf::test::strings_column_wrapper({"ﬀ™", "ﬀ", "", "e ", "ﬀ™ff", "ﬀ™ﬀ™"},
+                                                        {true, true, false, true, true, true});
+
+    auto exp_results = cudf::test::strings_column_wrapper({" gooﬀ™ ™◎ooﬀ™ff™",
+                                                           "ﬀhﬀesé",
+                                                           "",
+                                                           "lﬀ fooﬀ ffﬀ eaﬀse™",
+                                                           "ﬀ™ﬀ™ffﬀstri.ﬀ™ffﬀ™nﬀgs",
+                                                           "ﬀﬀ ™ ﬀﬀ ﬀ™ ﬀ™ﬀ™ﬀ™ ﬀ™ﬀ™ ﬀ"});
+
+    auto results = cudf::strings::slice_strings(
+      cudf::strings_column_view{col0}, cudf::strings_column_view{delim_col}, -3);
+    cudf::test::expect_columns_equal(*results, exp_results, true);
+  }
+}

From 81d8b6229d93acfb077adb2e449fc85ae68183a4 Mon Sep 17 00:00:00 2001
From: Sriram Chandramouli <sriram_chandramouli@hotmail.com>
Date: Fri, 29 May 2020 19:48:35 +0000
Subject: [PATCH 7/9] - minor cleanup to substring index computing functor

---
 cpp/src/strings/substring.cu | 82 ++++++++++++++++++------------------
 1 file changed, 40 insertions(+), 42 deletions(-)

diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu
index 13b7a965953..7388ae94dee 100644
--- a/cpp/src/strings/substring.cu
+++ b/cpp/src/strings/substring.cu
@@ -305,53 +305,51 @@ struct compute_substring_indices {
       strings_count,
       [delim_itr, delimiter_count, start_char_pos, end_char_pos, d_column] __device__(
         size_type idx) {
-        // If the column value for this row is null, result is null.
-        // If the delimiter count is 0, result is empty string.
-        if (d_column.is_null(idx) || !delimiter_count) { return; }
-
         auto const& delim_val_pair = delim_itr[idx];
-        auto const& col_val        = d_column.element<string_view>(idx);
+        auto const& delim_val      = delim_val_pair.first;  // Don't use it yet
 
+        // If the column value for this row is null, result is null.
+        // If the delimiter count is 0, result is empty string.
         // If the global delimiter or the row specific delimiter is invalid or if it is empty, row
         // value is empty.
-        // If the column value for the row is empty, the row value is empty.
-        if (!delim_val_pair.second || delim_val_pair.first.empty() || col_val.empty()) { return; }
-
-        auto const& delim_val = delim_val_pair.first;
-
-        auto const col_val_len   = col_val.length();
-        auto const delimiter_len = delim_val.length();
-
-        auto nsearches      = (delimiter_count < 0) ? -delimiter_count : delimiter_count;
-        size_type start_pos = 0;
-        size_type end_pos   = col_val_len;
-
-        for (auto i = 0; i < nsearches; ++i) {
-          if (delimiter_count < 0) {
-            end_pos = col_val.rfind(delim_val, 0, end_pos);
-            if (end_pos == -1) {
-              start_char_pos[idx] = 0;
-              end_char_pos[idx]   = col_val_len;
-              return;
-            }
-            if (i + 1 == nsearches) {
-              start_char_pos[idx] = end_pos + delimiter_len;
-              end_char_pos[idx]   = col_val_len;
-              return;
-            }
-          } else {
-            auto char_pos = col_val.find(delim_val, start_pos);
-            if (char_pos == -1) {
-              start_char_pos[idx] = 0;
-              end_char_pos[idx]   = col_val_len;
-              return;
+        if (!d_column.is_null(idx) && delimiter_count && delim_val_pair.second &&
+            !delim_val.empty()) {
+          auto const& col_val = d_column.element<string_view>(idx);
+
+          // If the column value for the row is empty, the row value is empty.
+          if (!col_val.empty()) {
+            auto const col_val_len   = col_val.length();
+            auto const delimiter_len = delim_val.length();
+
+            auto nsearches      = (delimiter_count < 0) ? -delimiter_count : delimiter_count;
+            size_type start_pos = 0;
+            size_type end_pos   = col_val_len;
+            bool keep_searching = true;
+
+            for (auto i = 0; keep_searching && i < nsearches; ++i) {
+              if (delimiter_count < 0) {
+                end_pos = col_val.rfind(delim_val, 0, end_pos);
+                if (end_pos == -1) {
+                  start_char_pos[idx] = 0;
+                  end_char_pos[idx]   = col_val_len;
+                  keep_searching      = false;
+                } else if (i + 1 == nsearches) {
+                  start_char_pos[idx] = end_pos + delimiter_len;
+                  end_char_pos[idx]   = col_val_len;
+                }
+              } else {
+                auto char_pos = col_val.find(delim_val, start_pos);
+                if (char_pos == -1) {
+                  start_char_pos[idx] = 0;
+                  end_char_pos[idx]   = col_val_len;
+                  keep_searching      = false;
+                } else if (i + 1 == nsearches) {
+                  start_char_pos[idx] = 0;
+                  end_char_pos[idx]   = char_pos;
+                } else
+                  start_pos = char_pos + delimiter_len;
+              }
             }
-            if (i + 1 == nsearches) {
-              start_char_pos[idx] = 0;
-              end_char_pos[idx]   = char_pos;
-              return;
-            } else
-              start_pos = char_pos + delimiter_len;
           }
         }
       });

From b02244bb54c6e4547dab19aef08b12a500b52044 Mon Sep 17 00:00:00 2001
From: Sriram Chandramouli <sriram_chandramouli@hotmail.com>
Date: Mon, 1 Jun 2020 14:44:26 +0000
Subject: [PATCH 8/9] - incorporate review comments

---
 CHANGELOG.md                           |  2 +-
 cpp/include/cudf/strings/substring.hpp |  8 ++++++--
 cpp/src/strings/substring.cu           | 21 ++++++++++++---------
 3 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 160481f08f1..e07b6e9ae46 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 ## New Features
 
 - PR #5222 Adding clip feature support to DataFrame and Series
-- PR #5303 Add substring_index functionality for strings
+- PR #5303 Add slice_strings functionality using delimiter string
 
 ## Improvements
 - PR #5245 Add column reduction benchmark
diff --git a/cpp/include/cudf/strings/substring.hpp b/cpp/include/cudf/strings/substring.hpp
index 7e3fcda5d6f..2139bb63af5 100644
--- a/cpp/include/cudf/strings/substring.hpp
+++ b/cpp/include/cudf/strings/substring.hpp
@@ -105,7 +105,9 @@ std::unique_ptr<column> slice_strings(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
 /**
- * @brief Returns a column of strings after searching for @p delimiter @p count number of
+ * @brief Slices a column of strings by using a delimiter as a slice point.
+ *
+ * Returns a column of strings after searching for @p delimiter @p count number of
  * times in the source @p strings forward if @p count is positive or backwards if @p count is
  * negative. If @p count is positive, it returns a substring from the start of the source @p
  * strings up until @p count occurrence of the @delimiter not including the @p delimiter.
@@ -145,7 +147,9 @@ std::unique_ptr<column> slice_strings(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_default_resource());
 
 /**
- * @brief Returns a column of strings after searching the delimiter defined per row from
+ * @brief Slices a column of strings by using a delimiter column as slice points.
+ *
+ * Returns a column of strings after searching the delimiter defined per row from
  * @p delimiter_strings @p count number of times in the source @p strings forward if @p count
  * is positive or backwards if @p count is negative. If @p count is positive, it returns a
  * substring from the start of the source @p strings up until @p count occurrence of the
diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu
index 7388ae94dee..cae52115a65 100644
--- a/cpp/src/strings/substring.cu
+++ b/cpp/src/strings/substring.cu
@@ -199,7 +199,7 @@ struct substring_from_fn {
   }
 };
 
-struct dispatch_substring_from_fn {
+struct compute_substrings_from_fn {
   /**
    * @brief Returns strings column with substrings based on the ranges in the
    * individual starts and stops column position values.
@@ -264,7 +264,7 @@ struct compute_substrings {
                                      rmm::mr::device_memory_resource* mr,
                                      cudaStream_t stream) const
   {
-    return dispatch_substring_from_fn{}(d_column,
+    return compute_substrings_from_fn{}(d_column,
                                         null_count,
                                         starts_column.data<PositionType>(),
                                         stops_column.data<PositionType>(),
@@ -312,8 +312,7 @@ struct compute_substring_indices {
         // If the delimiter count is 0, result is empty string.
         // If the global delimiter or the row specific delimiter is invalid or if it is empty, row
         // value is empty.
-        if (!d_column.is_null(idx) && delimiter_count && delim_val_pair.second &&
-            !delim_val.empty()) {
+        if (!d_column.is_null(idx) && delim_val_pair.second && !delim_val.empty()) {
           auto const& col_val = d_column.element<string_view>(idx);
 
           // If the column value for the row is empty, the row value is empty.
@@ -377,7 +376,7 @@ std::unique_ptr<column> slice_strings(
   CUDF_EXPECTS(stops_column.null_count() == 0, "Parameter stops must not contain nulls.");
   CUDF_EXPECTS(starts_column.type().id() != data_type{BOOL8}.id(),
                "Positions values must not be bool type.");
-  CUDF_EXPECTS(is_fixed_width(starts_column.type()), "Positions values must be an integral type.");
+  CUDF_EXPECTS(is_fixed_width(starts_column.type()), "Positions values must be fixed width type.");
 
   auto strings_column = column_device_view::create(strings.parent(), stream);
   auto d_column       = *strings_column;
@@ -411,12 +410,16 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
 
   auto strings_column = column_device_view::create(strings.parent(), stream);
   auto d_column       = *strings_column;
-  // Compute the substring indices first
-  compute_substring_indices{}(
-    d_column, delimiter_itr, count, start_char_pos, end_char_pos, mr, stream);
+
+  // If delimiter count is 0, the output column will contain empty strings
+  if (count) {
+    // Compute the substring indices first
+    compute_substring_indices{}(
+      d_column, delimiter_itr, count, start_char_pos, end_char_pos, mr, stream);
+  }
 
   // Extract the substrings using the indices next
-  return dispatch_substring_from_fn{}(
+  return compute_substrings_from_fn{}(
     d_column, strings.null_count(), start_char_pos, end_char_pos, mr, stream);
 }
 

From 9d2fc8bf70d1d5b86d737f8decfe85737dbd9c70 Mon Sep 17 00:00:00 2001
From: Sriram Chandramouli <sriram_chandramouli@hotmail.com>
Date: Tue, 2 Jun 2020 01:37:15 +0000
Subject: [PATCH 9/9] - incorporate review comments

---
 cpp/include/cudf/strings/substring.hpp | 35 ++++++-------
 cpp/src/strings/substring.cu           | 68 ++++++++++++--------------
 2 files changed, 48 insertions(+), 55 deletions(-)

diff --git a/cpp/include/cudf/strings/substring.hpp b/cpp/include/cudf/strings/substring.hpp
index 2139bb63af5..9e21905f3db 100644
--- a/cpp/include/cudf/strings/substring.hpp
+++ b/cpp/include/cudf/strings/substring.hpp
@@ -108,17 +108,18 @@ std::unique_ptr<column> slice_strings(
  * @brief Slices a column of strings by using a delimiter as a slice point.
  *
  * Returns a column of strings after searching for @p delimiter @p count number of
- * times in the source @p strings forward if @p count is positive or backwards if @p count is
- * negative. If @p count is positive, it returns a substring from the start of the source @p
- * strings up until @p count occurrence of the @delimiter not including the @p delimiter.
- * If @p count is negative, it returns a substring from the start of the @p count occurrence of
- * the @delimiter in the source @p strings past the delimiter until the end of the string.
+ * times in the source @p strings from left to right if @p count is positive or from
+ * right to left if @p count is negative. If @p count is positive, it returns a substring
+ * from the start of the source @p strings up until @p count occurrence of the @delimiter
+ * not including the @p delimiter. If @p count is negative, it returns a substring from
+ * the start of the @p count occurrence of the @delimiter in the source @p strings past
+ * the delimiter until the end of the string.
  *
  * The search for @delimiter in @p strings is case sensitive.
  * If the row value of @p strings is null, the row value in the output column will be null.
- * If the @p count is 0 or if @p delimiter is invalid, output column will be an empty string.
- * If the @p delimiter or the column value for a row is empty, the row value in the output
- * column will be empty.
+ * If the @p count is 0 or if @p delimiter is invalid or empty, every row in the output column
+ * will be an empty string.
+ * If the column value for a row is empty, the row value in the output column will be empty.
  * If @p count occurrences of @p delimiter isn't found, the row value in the output column will
  * be the row value from the input @p strings column.
  *
@@ -136,7 +137,7 @@ std::unique_ptr<column> slice_strings(
  * @param strings Strings instance for this operation.
  * @param delimiter UTF-8 encoded string to search for in each string.
  * @param count Number of times to search for delimiter in each string. If the value is positive,
- *              forward search of delimiter is performed; else, a backward search is performed.
+ *              delimiter is searched from left to right; else, it is searched from right to left.
  * @param mr Resource for allocating device memory.
  * @return New strings column containing the substrings.
  */
@@ -150,18 +151,18 @@ std::unique_ptr<column> slice_strings(
  * @brief Slices a column of strings by using a delimiter column as slice points.
  *
  * Returns a column of strings after searching the delimiter defined per row from
- * @p delimiter_strings @p count number of times in the source @p strings forward if @p count
- * is positive or backwards if @p count is negative. If @p count is positive, it returns a
- * substring from the start of the source @p strings up until @p count occurrence of the
- * delimiter for that row not including that delimiter. If @p count is negative, it returns a
- * substring from the start of the @p count occurrence of the delimiter for that row in the
- * source @p strings past the delimiter until the end of the string.
+ * @p delimiter_strings @p count number of times in the source @p strings from left to right
+ * if @p count is positive or from right to left if @p count is negative. If @p count is
+ * positive, it returns a substring from the start of the source @p strings up until
+ * @p count occurrence of the delimiter for that row not including that delimiter. If @p count
+ * is negative, it returns a substring from the start of the @p count occurrence of the
+ * delimiter for that row in the source @p strings past the delimiter until the end of the string.
  *
  * The search for @p delimiter_strings in @p strings is case sensitive.
  * If the @p count is 0, every row in the output column will be an empty string.
  * If the row value of @p strings is null, the row value in the output column will be null.
  * If the row value from @p delimiter_strings is invalid or null, the row value in the
- * output column will an empty string.
+ * output column will be an empty string.
  * If the row value from @p delimiter_strings or the column value for a row is empty, the
  * row value in the output column will be empty.
  * If @p count occurrences of delimiter isn't found, the row value in the output column will
@@ -185,7 +186,7 @@ std::unique_ptr<column> slice_strings(
  * @param strings Strings instance for this operation.
  * @param delimiter_strings UTF-8 encoded string for each row.
  * @param count Number of times to search for delimiter in each string. If the value is positive,
- *              forward search of delimiter is performed; else, a backward search is performed.
+ *              delimiter is searched from left to right; else, it is searched from right to left.
  * @param mr Resource for allocating device memory.
  * @return New strings column containing the substrings.
  */
diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu
index cae52115a65..14d04c1b257 100644
--- a/cpp/src/strings/substring.cu
+++ b/cpp/src/strings/substring.cu
@@ -312,44 +312,36 @@ struct compute_substring_indices {
         // If the delimiter count is 0, result is empty string.
         // If the global delimiter or the row specific delimiter is invalid or if it is empty, row
         // value is empty.
-        if (!d_column.is_null(idx) && delim_val_pair.second && !delim_val.empty()) {
-          auto const& col_val = d_column.element<string_view>(idx);
-
-          // If the column value for the row is empty, the row value is empty.
-          if (!col_val.empty()) {
-            auto const col_val_len   = col_val.length();
-            auto const delimiter_len = delim_val.length();
-
-            auto nsearches      = (delimiter_count < 0) ? -delimiter_count : delimiter_count;
-            size_type start_pos = 0;
-            size_type end_pos   = col_val_len;
-            bool keep_searching = true;
-
-            for (auto i = 0; keep_searching && i < nsearches; ++i) {
-              if (delimiter_count < 0) {
-                end_pos = col_val.rfind(delim_val, 0, end_pos);
-                if (end_pos == -1) {
-                  start_char_pos[idx] = 0;
-                  end_char_pos[idx]   = col_val_len;
-                  keep_searching      = false;
-                } else if (i + 1 == nsearches) {
-                  start_char_pos[idx] = end_pos + delimiter_len;
-                  end_char_pos[idx]   = col_val_len;
-                }
-              } else {
-                auto char_pos = col_val.find(delim_val, start_pos);
-                if (char_pos == -1) {
-                  start_char_pos[idx] = 0;
-                  end_char_pos[idx]   = col_val_len;
-                  keep_searching      = false;
-                } else if (i + 1 == nsearches) {
-                  start_char_pos[idx] = 0;
-                  end_char_pos[idx]   = char_pos;
-                } else
-                  start_pos = char_pos + delimiter_len;
-              }
-            }
+        if (d_column.is_null(idx) || !delim_val_pair.second || delim_val.empty()) return;
+        auto const& col_val = d_column.element<string_view>(idx);
+
+        // If the column value for the row is empty, the row value is empty.
+        if (!col_val.empty()) {
+          auto const col_val_len   = col_val.length();
+          auto const delimiter_len = delim_val.length();
+
+          auto nsearches           = (delimiter_count < 0) ? -delimiter_count : delimiter_count;
+          bool const left_to_right = (delimiter_count > 0);
+
+          size_type start_pos = start_char_pos[idx];
+          size_type end_pos   = col_val_len;
+          size_type char_pos  = -1;
+
+          end_char_pos[idx] = col_val_len;
+
+          for (auto i = 0; i < nsearches; ++i) {
+            char_pos = left_to_right ? col_val.find(delim_val, start_pos)
+                                     : col_val.rfind(delim_val, 0, end_pos);
+            if (char_pos == -1) return;
+            if (left_to_right)
+              start_pos = char_pos + delimiter_len;
+            else
+              end_pos = char_pos;
           }
+          if (left_to_right)
+            end_char_pos[idx] = char_pos;
+          else
+            start_char_pos[idx] = end_pos + delimiter_len;
         }
       });
   }
@@ -412,7 +404,7 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
   auto d_column       = *strings_column;
 
   // If delimiter count is 0, the output column will contain empty strings
-  if (count) {
+  if (count != 0) {
     // Compute the substring indices first
     compute_substring_indices{}(
       d_column, delimiter_itr, count, start_char_pos, end_char_pos, mr, stream);