diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 39587b4bd05..75955428eab 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -178,12 +178,14 @@ test:
     - test -f $PREFIX/include/cudf/strings/detail/converters.hpp
     - test -f $PREFIX/include/cudf/strings/detail/copying.hpp
     - test -f $PREFIX/include/cudf/strings/detail/fill.hpp
+    - test -f $PREFIX/include/cudf/strings/detail/json.hpp
     - test -f $PREFIX/include/cudf/strings/detail/replace.hpp
     - test -f $PREFIX/include/cudf/strings/detail/utilities.hpp
     - test -f $PREFIX/include/cudf/strings/extract.hpp
     - test -f $PREFIX/include/cudf/strings/findall.hpp
     - test -f $PREFIX/include/cudf/strings/find.hpp
     - test -f $PREFIX/include/cudf/strings/find_multiple.hpp
+    - test -f $PREFIX/include/cudf/strings/json.hpp
     - test -f $PREFIX/include/cudf/strings/padding.hpp
     - test -f $PREFIX/include/cudf/strings/replace.hpp
     - test -f $PREFIX/include/cudf/strings/replace_re.hpp
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5cd82e52180..61cb13d3445 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -346,6 +346,7 @@ add_library(cudf
     src/strings/find.cu
     src/strings/find_multiple.cu
     src/strings/padding.cu
+    src/strings/json/json_path.cu
     src/strings/regex/regcomp.cpp
     src/strings/regex/regexec.cu
     src/strings/replace/backref_re.cu
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 5aa7e0132f8..11af408f1c5 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -202,3 +202,8 @@ ConfigureBench(STRINGS_BENCH
   string/substring_benchmark.cpp
   string/translate_benchmark.cpp
   string/url_decode_benchmark.cpp)
+
+###################################################################################################
+# - json benchmark -------------------------------------------------------------------
+ConfigureBench(JSON_BENCH
+  string/json_benchmark.cpp)
diff --git a/cpp/benchmarks/string/json_benchmark.cpp b/cpp/benchmarks/string/json_benchmark.cpp
new file mode 100644
index 00000000000..6fb6a07a8d0
--- /dev/null
+++ b/cpp/benchmarks/string/json_benchmark.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmark/benchmark.h>
+#include <benchmarks/common/generate_benchmark_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
+#include <cudf/strings/json.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+
+class JsonPath : public cudf::benchmark {
+};
+
+float frand() { return static_cast<float>(rand()) / static_cast<float>(RAND_MAX); }
+
+int rand_range(int min, int max) { return min + static_cast<int>(frand() * (max - min)); }
+
+std::vector<std::string> Books{
+  "{\n\"category\": \"reference\",\n\"author\": \"Nigel Rees\",\n\"title\": \"Sayings of the "
+  "Century\",\n\"price\": 8.95\n}",
+  "{\n\"category\": \"fiction\",\n\"author\": \"Evelyn Waugh\",\n\"title\": \"Sword of "
+  "Honour\",\n\"price\": 12.99\n}",
+  "{\n\"category\": \"fiction\",\n\"author\": \"Herman Melville\",\n\"title\": \"Moby "
+  "Dick\",\n\"isbn\": \"0-553-21311-3\",\n\"price\": 8.99\n}",
+  "{\n\"category\": \"fiction\",\n\"author\": \"J. R. R. Tolkien\",\n\"title\": \"The Lord of the "
+  "Rings\",\n\"isbn\": \"0-395-19395-8\",\n\"price\": 22.99\n}"};
+constexpr int Approx_book_size = 110;
+std::vector<std::string> Bicycles{
+  "{\"color\": \"red\", \"price\": 9.95}",
+  "{\"color\": \"green\", \"price\": 29.95}",
+  "{\"color\": \"blue\", \"price\": 399.95}",
+  "{\"color\": \"yellow\", \"price\": 99.95}",
+  "{\"color\": \"mauve\", \"price\": 199.95}",
+};
+constexpr int Approx_bicycle_size = 33;
+std::string Misc{"\n\"expensive\": 10\n"};
+std::string generate_field(std::vector<std::string> const& values, int num_values)
+{
+  std::string res;
+  for (int idx = 0; idx < num_values; idx++) {
+    if (idx > 0) { res += std::string(",\n"); }
+    int vindex = std::min(static_cast<int>(floor(frand() * values.size())),
+                          static_cast<int>(values.size() - 1));
+    res += values[vindex];
+  }
+  return res;
+}
+
+std::string build_row(int desired_bytes)
+{
+  // always have at least 2 books and 2 bikes
+  int num_books    = 2;
+  int num_bicycles = 2;
+  int remaining_bytes =
+    desired_bytes - ((num_books * Approx_book_size) + (num_bicycles * Approx_bicycle_size));
+
+  // divide up the remainder between books and bikes
+  float book_pct    = frand();
+  float bicycle_pct = 1.0f - book_pct;
+  num_books += (remaining_bytes * book_pct) / Approx_book_size;
+  num_bicycles += (remaining_bytes * bicycle_pct) / Approx_bicycle_size;
+
+  std::string books    = "\"book\": [\n" + generate_field(Books, num_books) + "]\n";
+  std::string bicycles = "\"bicycle\": [\n" + generate_field(Bicycles, num_bicycles) + "]\n";
+
+  std::string store = "\"store\": {\n";
+  if (frand() <= 0.5f) {
+    store += books + std::string(",\n") + bicycles;
+  } else {
+    store += bicycles + std::string(",\n") + books;
+  }
+  store += std::string("}\n");
+
+  std::string row = std::string("{\n");
+  if (frand() <= 0.5f) {
+    row += store + std::string(",\n") + Misc;
+  } else {
+    row += Misc + std::string(",\n") + store;
+  }
+  row += std::string("}\n");
+  return row;
+}
+
+template <class... QueryArg>
+static void BM_case(benchmark::State& state, QueryArg&&... query_arg)
+{
+  srand(5236);
+  auto iter = thrust::make_transform_iterator(
+    thrust::make_counting_iterator(0),
+    [desired_bytes = state.range(1)](int index) { return build_row(desired_bytes); });
+  int num_rows = state.range(0);
+  cudf::test::strings_column_wrapper input(iter, iter + num_rows);
+  cudf::strings_column_view scv(input);
+  size_t num_chars = scv.chars().size();
+
+  std::string json_path(query_arg...);
+
+  for (auto _ : state) {
+    cuda_event_timer raii(state, true, 0);
+    auto result = cudf::strings::get_json_object(scv, json_path);
+    cudaStreamSynchronize(0);
+  }
+
+  // this isn't strictly 100% accurate. a given query isn't necessarily
+  // going to visit every single incoming character.  but in spirit it does.
+  state.SetBytesProcessed(state.iterations() * num_chars);
+}
+
+#define JSON_BENCHMARK_DEFINE(name, query)                         \
+  BENCHMARK_CAPTURE(BM_case, name, query)                          \
+    ->ArgsProduct({{100, 1000, 100000, 400000}, {300, 600, 4096}}) \
+    ->UseManualTime()                                              \
+    ->Unit(benchmark::kMillisecond);
+
+JSON_BENCHMARK_DEFINE(query0, "$");
+JSON_BENCHMARK_DEFINE(query1, "$.store");
+JSON_BENCHMARK_DEFINE(query2, "$.store.book");
+JSON_BENCHMARK_DEFINE(query3, "$.store.*");
+JSON_BENCHMARK_DEFINE(query4, "$.store.book[*]");
+JSON_BENCHMARK_DEFINE(query5, "$.store.book[*].category");
+JSON_BENCHMARK_DEFINE(query6, "$.store['bicycle']");
+JSON_BENCHMARK_DEFINE(query7, "$.store.book[*]['isbn']");
+JSON_BENCHMARK_DEFINE(query8, "$.store.bicycle[1]");
diff --git a/cpp/include/cudf/strings/detail/json.hpp b/cpp/include/cudf/strings/detail/json.hpp
new file mode 100644
index 00000000000..e6a0b49f102
--- /dev/null
+++ b/cpp/include/cudf/strings/detail/json.hpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/strings/strings_column_view.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+namespace strings {
+namespace detail {
+
+/**
+ * @copydoc cudf::strings::get_json_object
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ */
+std::unique_ptr<cudf::column> get_json_object(
+  cudf::strings_column_view const& col,
+  cudf::string_scalar const& json_path,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+}  // namespace detail
+}  // namespace strings
+}  // namespace cudf
diff --git a/cpp/include/cudf/strings/json.hpp b/cpp/include/cudf/strings/json.hpp
new file mode 100644
index 00000000000..b39e4a2027c
--- /dev/null
+++ b/cpp/include/cudf/strings/json.hpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/strings/strings_column_view.hpp>
+
+namespace cudf {
+namespace strings {
+
+/**
+ * @addtogroup strings_json
+ * @{
+ * @file
+ */
+
+/**
+ * @brief Apply a JSONPath string to all rows in an input strings column.
+ *
+ * Applies a JSONPath string to an incoming strings column where each row in the column
+ * is a valid json string.  The output is returned by row as a strings column.
+ *
+ * https://tools.ietf.org/id/draft-goessner-dispatch-jsonpath-00.html
+ * Implements only the operators: $ . [] *
+ *
+ * @param col The input strings column. Each row must contain a valid json string
+ * @param json_path The JSONPath string to be applied to each row
+ * @param mr Resource for allocating device memory.
+ * @return New strings column containing the retrieved json object strings
+ */
+std::unique_ptr<cudf::column> get_json_object(
+  cudf::strings_column_view const& col,
+  cudf::string_scalar const& json_path,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/** @} */  // end of doxygen group
+}  // namespace strings
+}  // namespace cudf
diff --git a/cpp/include/doxygen_groups.h b/cpp/include/doxygen_groups.h
index 65dd5c73475..f78ff98d49d 100644
--- a/cpp/include/doxygen_groups.h
+++ b/cpp/include/doxygen_groups.h
@@ -127,6 +127,7 @@
  *   @defgroup strings_modify Modifying
  *   @defgroup strings_replace Replacing
  *   @defgroup strings_split Splitting
+ *   @defgroup strings_json JSON
  * @}
  * @defgroup dictionary_apis Dictionary
  * @{
diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu
index 86e5f1fdcae..44acc7fc55f 100644
--- a/cpp/src/io/csv/csv_gpu.cu
+++ b/cpp/src/io/csv/csv_gpu.cu
@@ -196,7 +196,7 @@ __global__ void __launch_bounds__(csvparse_block_dim)
       } else if (serialized_trie_contains(opts.trie_true, {field_start, field_len}) ||
                  serialized_trie_contains(opts.trie_false, {field_start, field_len})) {
         atomicAdd(&d_columnData[actual_col].bool_count, 1);
-      } else if (cudf::io::gpu::is_infinity(field_start, next_delimiter)) {
+      } else if (cudf::io::is_infinity(field_start, next_delimiter)) {
         atomicAdd(&d_columnData[actual_col].float_count, 1);
       } else {
         long countNumber   = 0;
@@ -277,7 +277,7 @@ __inline__ __device__ T decode_value(char const *begin,
                                      char const *end,
                                      parse_options_view const &opts)
 {
-  return cudf::io::gpu::parse_numeric<T, base>(begin, end, opts);
+  return cudf::io::parse_numeric<T, base>(begin, end, opts);
 }
 
 template <typename T>
@@ -285,7 +285,7 @@ __inline__ __device__ T decode_value(char const *begin,
                                      char const *end,
                                      parse_options_view const &opts)
 {
-  return cudf::io::gpu::parse_numeric<T>(begin, end, opts);
+  return cudf::io::parse_numeric<T>(begin, end, opts);
 }
 
 template <>
diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu
index 5efb64fd4d5..75910ae6b5b 100644
--- a/cpp/src/io/json/json_gpu.cu
+++ b/cpp/src/io/json/json_gpu.cu
@@ -114,7 +114,7 @@ __inline__ __device__ T decode_value(const char *begin,
                                      uint64_t end,
                                      parse_options_view const &opts)
 {
-  return cudf::io::gpu::parse_numeric<T, base>(begin, end, opts);
+  return cudf::io::parse_numeric<T, base>(begin, end, opts);
 }
 
 /**
@@ -131,7 +131,7 @@ __inline__ __device__ T decode_value(const char *begin,
                                      const char *end,
                                      parse_options_view const &opts)
 {
-  return cudf::io::gpu::parse_numeric<T>(begin, end, opts);
+  return cudf::io::parse_numeric<T>(begin, end, opts);
 }
 
 /**
diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh
index 584d2c9a74a..b7719cba580 100644
--- a/cpp/src/io/utilities/parsing_utils.cuh
+++ b/cpp/src/io/utilities/parsing_utils.cuh
@@ -20,6 +20,8 @@
 #include <cudf/io/types.hpp>
 #include <cudf/utilities/span.hpp>
 
+#include <io/utilities/column_type_histogram.hpp>
+
 #include <rmm/device_vector.hpp>
 
 using cudf::device_span;
@@ -82,67 +84,6 @@ struct parse_options {
   }
 };
 
-namespace gpu {
-/**
- * @brief CUDA kernel iterates over the data until the end of the current field
- *
- * Also iterates over (one or more) delimiter characters after the field.
- * Function applies to formats with field delimiters and line terminators.
- *
- * @param begin Pointer to the first element of the string
- * @param end Pointer to the first element after the string
- * @param opts A set of parsing options
- * @param escape_char A boolean value to signify whether to consider `\` as escape character or
- * just a character.
- *
- * @return Pointer to the last character in the field, including the
- *  delimiter(s) following the field data
- */
-__device__ __inline__ char const* seek_field_end(char const* begin,
-                                                 char const* end,
-                                                 parse_options_view const& opts,
-                                                 bool escape_char = false)
-{
-  bool quotation   = false;
-  auto current     = begin;
-  bool escape_next = false;
-  while (true) {
-    // Use simple logic to ignore control chars between any quote seq
-    // Handles nominal cases including doublequotes within quotes, but
-    // may not output exact failures as PANDAS for malformed fields.
-    // Check for instances such as "a2\"bc" and "\\" if `escape_char` is true.
-
-    if (*current == opts.quotechar and not escape_next) {
-      quotation = !quotation;
-    } else if (!quotation) {
-      if (*current == opts.delimiter) {
-        while (opts.multi_delimiter && current < end && *(current + 1) == opts.delimiter) {
-          ++current;
-        }
-        break;
-      } else if (*current == opts.terminator) {
-        break;
-      } else if (*current == '\r' && (current + 1 < end && *(current + 1) == '\n')) {
-        --end;
-        break;
-      }
-    }
-
-    if (escape_char == true) {
-      // If a escape character is encountered, escape next character in next loop.
-      if (escape_next == false and *current == '\\') {
-        escape_next = true;
-      } else {
-        escape_next = false;
-      }
-    }
-
-    if (current >= end) break;
-    current++;
-  }
-  return current;
-}
-
 /**
  * @brief Returns the numeric value of an ASCII/UTF-8 character. Specialization
  * for integral types. Handles hexadecimal digits, both uppercase and lowercase.
@@ -155,7 +96,7 @@ __device__ __inline__ char const* seek_field_end(char const* begin,
  * @return uint8_t Numeric value of the character, or `0`
  */
 template <typename T, typename std::enable_if_t<std::is_integral<T>::value>* = nullptr>
-__device__ __forceinline__ uint8_t decode_digit(char c, bool* valid_flag)
+constexpr uint8_t decode_digit(char c, bool* valid_flag)
 {
   if (c >= '0' && c <= '9') return c - '0';
   if (c >= 'a' && c <= 'f') return c - 'a' + 10;
@@ -176,7 +117,7 @@ __device__ __forceinline__ uint8_t decode_digit(char c, bool* valid_flag)
  * @return uint8_t Numeric value of the character, or `0`
  */
 template <typename T, typename std::enable_if_t<!std::is_integral<T>::value>* = nullptr>
-__device__ __forceinline__ uint8_t decode_digit(char c, bool* valid_flag)
+constexpr uint8_t decode_digit(char c, bool* valid_flag)
 {
   if (c >= '0' && c <= '9') return c - '0';
 
@@ -185,10 +126,7 @@ __device__ __forceinline__ uint8_t decode_digit(char c, bool* valid_flag)
 }
 
 // Converts character to lowercase.
-__inline__ __device__ char to_lower(char const c)
-{
-  return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c;
-}
+constexpr char to_lower(char const c) { return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c; }
 
 /**
  * @brief Checks if string is infinity, case insensitive with/without sign
@@ -199,7 +137,7 @@ __inline__ __device__ char to_lower(char const c)
  * @param end Pointer to the first element after the string
  * @return true if string is valid infinity, else false.
  */
-__inline__ __device__ bool is_infinity(char const* begin, char const* end)
+constexpr bool is_infinity(char const* begin, char const* end)
 {
   if (*begin == '-' || *begin == '+') begin++;
   char const* cinf = "infinity";
@@ -223,9 +161,10 @@ __inline__ __device__ bool is_infinity(char const* begin, char const* end)
  * @return The parsed and converted value
  */
 template <typename T, int base = 10>
-__inline__ __device__ T parse_numeric(const char* begin,
-                                      const char* end,
-                                      parse_options_view const& opts)
+constexpr T parse_numeric(const char* begin,
+                          const char* end,
+                          parse_options_view const& opts,
+                          T error_result = std::numeric_limits<T>::quiet_NaN())
 {
   T value{};
   bool all_digits_valid = true;
@@ -281,11 +220,72 @@ __inline__ __device__ T parse_numeric(const char* begin,
       if (exponent != 0) { value *= exp10(double(exponent * exponent_sign)); }
     }
   }
-  if (!all_digits_valid) { return std::numeric_limits<T>::quiet_NaN(); }
+  if (!all_digits_valid) { return error_result; }
 
   return value * sign;
 }
 
+namespace gpu {
+/**
+ * @brief CUDA kernel iterates over the data until the end of the current field
+ *
+ * Also iterates over (one or more) delimiter characters after the field.
+ * Function applies to formats with field delimiters and line terminators.
+ *
+ * @param begin Pointer to the first element of the string
+ * @param end Pointer to the first element after the string
+ * @param opts A set of parsing options
+ * @param escape_char A boolean value to signify whether to consider `\` as escape character or
+ * just a character.
+ *
+ * @return Pointer to the last character in the field, including the
+ *  delimiter(s) following the field data
+ */
+__device__ __inline__ char const* seek_field_end(char const* begin,
+                                                 char const* end,
+                                                 parse_options_view const& opts,
+                                                 bool escape_char = false)
+{
+  bool quotation   = false;
+  auto current     = begin;
+  bool escape_next = false;
+  while (true) {
+    // Use simple logic to ignore control chars between any quote seq
+    // Handles nominal cases including doublequotes within quotes, but
+    // may not output exact failures as PANDAS for malformed fields.
+    // Check for instances such as "a2\"bc" and "\\" if `escape_char` is true.
+
+    if (*current == opts.quotechar and not escape_next) {
+      quotation = !quotation;
+    } else if (!quotation) {
+      if (*current == opts.delimiter) {
+        while (opts.multi_delimiter && current < end && *(current + 1) == opts.delimiter) {
+          ++current;
+        }
+        break;
+      } else if (*current == opts.terminator) {
+        break;
+      } else if (*current == '\r' && (current + 1 < end && *(current + 1) == '\n')) {
+        --end;
+        break;
+      }
+    }
+
+    if (escape_char == true) {
+      // If a escape character is encountered, escape next character in next loop.
+      if (escape_next == false and *current == '\\') {
+        escape_next = true;
+      } else {
+        escape_next = false;
+      }
+    }
+
+    if (current >= end) break;
+    current++;
+  }
+  return current;
+}
+
 /**
  * @brief Lexicographically compare digits in input against string
  * representing an integer
diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu
new file mode 100644
index 00000000000..cd8aae12070
--- /dev/null
+++ b/cpp/src/strings/json/json_path.cu
@@ -0,0 +1,952 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/get_value.cuh>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/strings/string_view.cuh>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/bit.hpp>
+#include <cudf/utilities/error.hpp>
+
+#include <io/utilities/parsing_utils.cuh>
+
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/optional.h>
+
+namespace cudf {
+namespace strings {
+namespace detail {
+
+namespace {
+
+// debug accessibility
+
+// change to "\n" and 1 to make output more readable
+#define DEBUG_NEWLINE
+constexpr int DEBUG_NEWLINE_LEN = 0;
+
+/**
+ * @brief Result of calling a parse function.
+ *
+ * The primary use of this is to distinguish between "success" and
+ * "success but no data" return cases.  For example, if you are reading the
+ * values of an array you might call a parse function in a while loop. You
+ * would want to continue doing this until you either encounter an error (parse_result::ERROR)
+ * or you get nothing back (parse_result::EMPTY)
+ */
+enum class parse_result {
+  ERROR,    // failure
+  SUCCESS,  // success
+  EMPTY,    // success, but no data
+};
+
+/**
+ * @brief Base parser class inherited by the (device-side) json_state class and
+ * (host-side) path_state class.
+ *
+ * Contains a number of useful utility functions common to parsing json and
+ * JSONPath strings.
+ */
+class parser {
+ protected:
+  CUDA_HOST_DEVICE_CALLABLE parser() : input(nullptr), input_len(0), pos(nullptr) {}
+  CUDA_HOST_DEVICE_CALLABLE parser(const char* _input, int64_t _input_len)
+    : input(_input), input_len(_input_len), pos(_input)
+  {
+    parse_whitespace();
+  }
+
+  CUDA_HOST_DEVICE_CALLABLE parser(parser const& p)
+    : input(p.input), input_len(p.input_len), pos(p.pos)
+  {
+  }
+
+  CUDA_HOST_DEVICE_CALLABLE bool eof(const char* p) { return p - input >= input_len; }
+  CUDA_HOST_DEVICE_CALLABLE bool eof() { return eof(pos); }
+
+  CUDA_HOST_DEVICE_CALLABLE bool parse_whitespace()
+  {
+    while (!eof()) {
+      if (is_whitespace(*pos)) {
+        pos++;
+      } else {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  CUDA_HOST_DEVICE_CALLABLE parse_result parse_string(string_view& str,
+                                                      bool can_be_empty,
+                                                      char quote)
+  {
+    str = string_view(nullptr, 0);
+
+    if (parse_whitespace() && *pos == quote) {
+      const char* start = ++pos;
+      while (!eof()) {
+        if (*pos == quote) {
+          str = string_view(start, pos - start);
+          pos++;
+          return parse_result::SUCCESS;
+        }
+        pos++;
+      }
+    }
+
+    return can_be_empty ? parse_result::EMPTY : parse_result::ERROR;
+  }
+
+  // a name means:
+  // - a string followed by a :
+  // - no string
+  CUDA_HOST_DEVICE_CALLABLE parse_result parse_name(string_view& name,
+                                                    bool can_be_empty,
+                                                    char quote)
+  {
+    if (parse_string(name, can_be_empty, quote) == parse_result::ERROR) {
+      return parse_result::ERROR;
+    }
+
+    // if we got a real string, the next char must be a :
+    if (name.size_bytes() > 0) {
+      if (!parse_whitespace()) { return parse_result::ERROR; }
+      if (*pos == ':') {
+        pos++;
+        return parse_result::SUCCESS;
+      }
+    }
+    return parse_result::EMPTY;
+  }
+
+  // numbers, true, false, null.
+  // this function is not particularly strong. badly formed values will get
+  // consumed without throwing any errors
+  CUDA_HOST_DEVICE_CALLABLE parse_result parse_non_string_value(string_view& val)
+  {
+    if (!parse_whitespace()) { return parse_result::ERROR; }
+
+    // parse to the end of the value
+    char const* start = pos;
+    char const* end   = start;
+    while (!eof(end)) {
+      char const c = *end;
+      if (c == ',' || c == '}' || c == ']' || is_whitespace(c)) { break; }
+
+      // illegal chars
+      if (c == '[' || c == '{' || c == ':' || c == '\"') { return parse_result::ERROR; }
+      end++;
+    }
+    pos = end;
+
+    val = string_view(start, end - start);
+
+    return parse_result::SUCCESS;
+  }
+
+ protected:
+  char const* input;
+  int64_t input_len;
+  char const* pos;
+
+ private:
+  CUDA_HOST_DEVICE_CALLABLE bool is_whitespace(char c) { return c <= ' '; }
+};
+
+/**
+ * @brief Output buffer object.  Used during the preprocess/size-computation step
+ * and the actual output step.
+ *
+ * There is an important distinction between two cases:
+ *
+ * - producing no output at all. that is, the query matched nothing in the input.
+ * - producing empty output. the query matched something in the input, but the
+ *   value of the result is an empty string.
+ *
+ * The `has_output` field is the flag which indicates whether or not the output
+ * from the query should be considered empty or null.
+ *
+ */
+struct json_output {
+  size_t output_max_len;
+  char* output;
+  thrust::optional<size_t> output_len;
+
+  __device__ void add_output(const char* str, size_t len)
+  {
+    if (output != nullptr) { memcpy(output + output_len.value_or(0), str, len); }
+    output_len = output_len.value_or(0) + len;
+  }
+
+  __device__ void add_output(string_view const& str) { add_output(str.data(), str.size_bytes()); }
+};
+
+enum json_element_type { NONE, OBJECT, ARRAY, VALUE };
+
+/**
+ * @brief Parsing class that holds the current state of the json to be parse and provides
+ * functions for navigating through it.
+ */
+class json_state : private parser {
+ public:
+  __device__ json_state()
+    : parser(),
+      cur_el_start(nullptr),
+      cur_el_type(json_element_type::NONE),
+      parent_el_type(json_element_type::NONE)
+  {
+  }
+  __device__ json_state(const char* _input, int64_t _input_len)
+    : parser(_input, _input_len),
+      cur_el_start(nullptr),
+      cur_el_type(json_element_type::NONE),
+      parent_el_type(json_element_type::NONE)
+  {
+  }
+
+  __device__ json_state(json_state const& j)
+    : parser(j),
+      cur_el_start(j.cur_el_start),
+      cur_el_type(j.cur_el_type),
+      parent_el_type(j.parent_el_type)
+  {
+  }
+
+  // retrieve the entire current element into the output
+  __device__ parse_result extract_element(json_output* output, bool list_element)
+  {
+    char const* start = cur_el_start;
+    char const* end   = start;
+
+    // if we're a value type, do a simple value parse.
+    if (cur_el_type == VALUE) {
+      pos = cur_el_start;
+      if (parse_value() != parse_result::SUCCESS) { return parse_result::ERROR; }
+      end = pos;
+
+      // SPARK-specific behavior.  if this is a non-list-element wrapped in quotes,
+      // strip them. we may need to make this behavior configurable in some way
+      // later on.
+      if (!list_element && *start == '\"' && *(end - 1) == '\"') {
+        start++;
+        end--;
+      }
+    }
+    // otherwise, march through everything inside
+    else {
+      int obj_count = 0;
+      int arr_count = 0;
+
+      while (!eof(end)) {
+        // could do some additional checks here. we know our current
+        // element type, so we could be more strict on what kinds of
+        // characters we expect to see.
+        switch (*end++) {
+          case '{': obj_count++; break;
+          case '}': obj_count--; break;
+          case '[': arr_count++; break;
+          case ']': arr_count--; break;
+          default: break;
+        }
+        if (obj_count == 0 && arr_count == 0) { break; }
+      }
+      if (obj_count > 0 || arr_count > 0) { return parse_result::ERROR; }
+      pos = end;
+    }
+
+    // parse trailing ,
+    if (parse_whitespace()) {
+      if (*pos == ',') { pos++; }
+    }
+
+    if (output != nullptr) { output->add_output({start, static_cast<size_type>(end - start)}); }
+    return parse_result::SUCCESS;
+  }
+
+  // skip the next element
+  __device__ parse_result skip_element() { return extract_element(nullptr, false); }
+
+  // advance to the next element
+  __device__ parse_result next_element() { return next_element_internal(false); }
+
+  // advance inside the current element
+  __device__ parse_result child_element(json_element_type expected_type)
+  {
+    if (expected_type != NONE && cur_el_type != expected_type) { return parse_result::ERROR; }
+
+    // if we succeed, record our parent element type.
+    auto const prev_el_type = cur_el_type;
+    auto const result       = next_element_internal(true);
+    if (result == parse_result::SUCCESS) { parent_el_type = prev_el_type; }
+    return result;
+  }
+
+  // return the next element that matches the specified name.
+  __device__ parse_result next_matching_element(string_view const& name, bool inclusive)
+  {
+    // if we're not including the current element, skip it
+    if (!inclusive) {
+      parse_result result = next_element_internal(false);
+      if (result != parse_result::SUCCESS) { return result; }
+    }
+    // loop until we find a match or there's nothing left
+    do {
+      // wildcard matches anything
+      if (name.size_bytes() == 1 && name.data()[0] == '*') {
+        return parse_result::SUCCESS;
+      } else if (cur_el_name == name) {
+        return parse_result::SUCCESS;
+      }
+
+      // next
+      parse_result result = next_element_internal(false);
+      if (result != parse_result::SUCCESS) { return result; }
+    } while (1);
+
+    return parse_result::ERROR;
+  }
+
+ private:
+  // parse a value - either a string or a number/null/bool
+  __device__ parse_result parse_value()
+  {
+    if (!parse_whitespace()) { return parse_result::ERROR; }
+
+    // string or number?
+    string_view unused;
+    return *pos == '\"' ? parse_string(unused, false, '\"') : parse_non_string_value(unused);
+  }
+
+  __device__ parse_result next_element_internal(bool child)
+  {
+    // if we're not getting a child element, skip the current element.
+    // this will leave pos as the first character -after- the close of
+    // the current element
+    if (!child && cur_el_start != nullptr) {
+      if (skip_element() == parse_result::ERROR) { return parse_result::ERROR; }
+      cur_el_start = nullptr;
+    }
+    // otherwise pos will be at the first character within the current element
+
+    // can only get the child of an object or array.
+    // this could theoretically be handled as an error, but the evaluators I've found
+    // seem to treat this as "it's nothing"
+    if (child && (cur_el_type == VALUE || cur_el_type == NONE)) { return parse_result::EMPTY; }
+
+    // what's next
+    if (!parse_whitespace()) { return parse_result::EMPTY; }
+    // if we're closing off a parent element, we're done
+    char const c = *pos;
+    if (c == ']' || c == '}') { return parse_result::EMPTY; }
+
+    // if we're not accessing elements of an array, check for name.
+    bool const array_access =
+      (cur_el_type == ARRAY && child) || (parent_el_type == ARRAY && !child);
+    if (!array_access && parse_name(cur_el_name, true, '\"') == parse_result::ERROR) {
+      return parse_result::ERROR;
+    }
+
+    // element type
+    if (!parse_whitespace()) { return parse_result::EMPTY; }
+    switch (*pos++) {
+      case '[': cur_el_type = ARRAY; break;
+      case '{': cur_el_type = OBJECT; break;
+
+      case ',':
+      case ':':
+      case '\'': return parse_result::ERROR;
+
+      // value type
+      default: cur_el_type = VALUE; break;
+    }
+
+    // the start of the current element is always at the value, not the name
+    cur_el_start = pos - 1;
+    return parse_result::SUCCESS;
+  }
+
+  const char* cur_el_start;          // pointer to the first character of the -value- of the current
+                                     // element - not the name
+  string_view cur_el_name;           // name of the current element (if applicable)
+  json_element_type cur_el_type;     // type of the current element
+  json_element_type parent_el_type;  // parent element type
+};
+
+enum class path_operator_type { ROOT, CHILD, CHILD_WILDCARD, CHILD_INDEX, ERROR, END };
+
+/**
+ * @brief A "command" operator used to query a json string.  A full query is
+ * an array of these operators applied to the incoming json string,
+ */
+struct path_operator {
+  CUDA_HOST_DEVICE_CALLABLE path_operator()
+    : type(path_operator_type::ERROR), index(-1), expected_type{NONE}
+  {
+  }
+  CUDA_HOST_DEVICE_CALLABLE path_operator(path_operator_type _type,
+                                          json_element_type _expected_type = NONE)
+    : type(_type), index(-1), expected_type{_expected_type}
+  {
+  }
+
+  path_operator_type type;  // operator type
+  // the expected element type we're applying this operation to.
+  // for example:
+  //    - you cannot retrieve a subscripted field (eg [5]) from an object.
+  //    - you cannot retrieve a field by name (eg  .book) from an array.
+  //    - you -can- use .* for both arrays and objects
+  // a value of NONE imples any type accepted
+  json_element_type expected_type;  // the expected type of the element we're working with
+  string_view name;                 // name to match against (if applicable)
+  int index;                        // index for subscript operator
+};
+
+/**
+ * @brief Parsing class that holds the current state of the JSONPath string to be parsed
+ * and provides functions for navigating through it. This is only called on the host
+ * during the preprocess step which builds a command buffer that the gpu uses.
+ */
+class path_state : private parser {
+ public:
+  path_state(const char* _path, size_t _path_len) : parser(_path, _path_len) {}
+
+  // get the next operator in the JSONPath string
+  path_operator get_next_operator()
+  {
+    if (eof()) { return {path_operator_type::END}; }
+
+    switch (*pos++) {
+      case '$': return {path_operator_type::ROOT};
+
+      case '.': {
+        path_operator op;
+        string_view term{".[", 2};
+        if (parse_path_name(op.name, term)) {
+          // this is another potential use case for __SPARK_BEHAVIORS / configurability
+          // Spark currently only handles the wildcard operator inside [*], it does
+          // not handle .*
+          if (op.name.size_bytes() == 1 && op.name.data()[0] == '*') {
+            op.type          = path_operator_type::CHILD_WILDCARD;
+            op.expected_type = NONE;
+          } else {
+            op.type          = path_operator_type::CHILD;
+            op.expected_type = OBJECT;
+          }
+          return op;
+        }
+      } break;
+
+      // 3 ways this can be used
+      // indices:   [0]
+      // name:      ['book']
+      // wildcard:  [*]
+      case '[': {
+        path_operator op;
+        string_view term{"]", 1};
+        bool const is_string = *pos == '\'' ? true : false;
+        if (parse_path_name(op.name, term)) {
+          pos++;
+          if (op.name.size_bytes() == 1 && op.name.data()[0] == '*') {
+            op.type          = path_operator_type::CHILD_WILDCARD;
+            op.expected_type = NONE;
+          } else {
+            if (is_string) {
+              op.type          = path_operator_type::CHILD;
+              op.expected_type = OBJECT;
+            } else {
+              op.type  = path_operator_type::CHILD_INDEX;
+              op.index = cudf::io::parse_numeric<int>(
+                op.name.data(), op.name.data() + op.name.size_bytes(), json_opts, -1);
+              CUDF_EXPECTS(op.index >= 0, "Invalid numeric index specified in JSONPath");
+              op.expected_type = ARRAY;
+            }
+          }
+          return op;
+        }
+      } break;
+
+      // wildcard operator
+      case '*': {
+        pos++;
+        return path_operator{path_operator_type::CHILD_WILDCARD};
+      } break;
+
+      default: CUDF_FAIL("Unrecognized JSONPath operator"); break;
+    }
+    return {path_operator_type::ERROR};
+  }
+
+ private:
+  cudf::io::parse_options_view json_opts{',', '\n', '\"', '.'};
+
+  bool parse_path_name(string_view& name, string_view const& terminators)
+  {
+    switch (*pos) {
+      case '*':
+        name = string_view(pos, 1);
+        pos++;
+        break;
+
+      case '\'':
+        if (parse_string(name, false, '\'') != parse_result::SUCCESS) { return false; }
+        break;
+
+      default: {
+        size_t const chars_left = input_len - (pos - input);
+        char const* end         = std::find_first_of(
+          pos, pos + chars_left, terminators.data(), terminators.data() + terminators.size_bytes());
+        if (end) {
+          name = string_view(pos, end - pos);
+          pos  = end;
+        } else {
+          name = string_view(pos, chars_left);
+          pos  = input + input_len;
+        }
+        break;
+      }
+    }
+
+    // an empty name is not valid
+    CUDF_EXPECTS(name.size_bytes() > 0, "Invalid empty name in JSONPath query string");
+
+    return true;
+  }
+};
+
+/**
+ * @brief Preprocess the incoming JSONPath string on the host to generate a
+ * command buffer for use by the GPU.
+ *
+ * @param json_path The incoming json path
+ * @param stream Cuda stream to perform any gpu actions on
+ * @returns A pair containing the command buffer, and maximum stack depth required.
+ */
+std::pair<thrust::optional<rmm::device_uvector<path_operator>>, int> build_command_buffer(
+  cudf::string_scalar const& json_path, rmm::cuda_stream_view stream)
+{
+  std::string h_json_path = json_path.to_string(stream);
+  path_state p_state(h_json_path.data(), static_cast<size_type>(h_json_path.size()));
+
+  std::vector<path_operator> h_operators;
+
+  path_operator op;
+  int max_stack_depth = 1;
+  do {
+    op = p_state.get_next_operator();
+    if (op.type == path_operator_type::ERROR) {
+      CUDF_FAIL("Encountered invalid JSONPath input string");
+    }
+    if (op.type == path_operator_type::CHILD_WILDCARD) { max_stack_depth++; }
+    // convert pointer to device pointer
+    if (op.name.size_bytes() > 0) {
+      op.name =
+        string_view(json_path.data() + (op.name.data() - h_json_path.data()), op.name.size_bytes());
+    }
+    if (op.type == path_operator_type::ROOT) {
+      CUDF_EXPECTS(h_operators.size() == 0, "Root operator ($) can only exist at the root");
+    }
+    // if we havent' gotten a root operator to start, and we're not empty, quietly push a
+    // root operator now.
+    if (h_operators.size() == 0 && op.type != path_operator_type::ROOT &&
+        op.type != path_operator_type::END) {
+      h_operators.push_back(path_operator{path_operator_type::ROOT});
+    }
+    h_operators.push_back(op);
+  } while (op.type != path_operator_type::END);
+
+  auto const is_empty = h_operators.size() == 1 && h_operators[0].type == path_operator_type::END;
+  return is_empty
+           ? std::make_pair(thrust::nullopt, 0)
+           : std::make_pair(
+               thrust::make_optional(cudf::detail::make_device_uvector_sync(h_operators, stream)),
+               max_stack_depth);
+}
+
+#define PARSE_TRY(_x)                                                       \
+  do {                                                                      \
+    last_result = _x;                                                       \
+    if (last_result == parse_result::ERROR) { return parse_result::ERROR; } \
+  } while (0)
+
+/**
+ * @brief Parse a single json string using the provided command buffer
+ *
+ * @param j_state The incoming json string and associated parser
+ * @param commands The command buffer to be applied to the string. Always ends with a
+ * path_operator_type::END
+ * @param output Buffer user to store the results of the query
+ * @returns A result code indicating success/fail/empty.
+ */
+template <int max_command_stack_depth>
+__device__ parse_result parse_json_path(json_state& j_state,
+                                        path_operator const* commands,
+                                        json_output& output)
+{
+  // manually maintained context stack in lieu of calling parse_json_path recursively.
+  struct context {
+    json_state j_state;
+    path_operator const* commands;
+    bool list_element;
+    bool state_flag;
+  };
+  context stack[max_command_stack_depth];
+  int stack_pos     = 0;
+  auto push_context = [&stack, &stack_pos](json_state const& _j_state,
+                                           path_operator const* _commands,
+                                           bool _list_element = false,
+                                           bool _state_flag   = false) {
+    if (stack_pos == max_command_stack_depth - 1) { return false; }
+    stack[stack_pos++] = context{_j_state, _commands, _list_element, _state_flag};
+    return true;
+  };
+  auto pop_context = [&stack, &stack_pos](context& c) {
+    if (stack_pos > 0) {
+      c = stack[--stack_pos];
+      return true;
+    }
+    return false;
+  };
+  push_context(j_state, commands, false);
+
+  parse_result last_result = parse_result::SUCCESS;
+  context ctx;
+  int element_count = 0;
+  while (pop_context(ctx)) {
+    path_operator op = *ctx.commands;
+
+    switch (op.type) {
+      // whatever the first object is
+      case path_operator_type::ROOT:
+        PARSE_TRY(ctx.j_state.next_element());
+        push_context(ctx.j_state, ctx.commands + 1);
+        break;
+
+      // .name
+      // ['name']
+      // [1]
+      // will return a single thing
+      case path_operator_type::CHILD: {
+        PARSE_TRY(ctx.j_state.child_element(op.expected_type));
+        if (last_result == parse_result::SUCCESS) {
+          PARSE_TRY(ctx.j_state.next_matching_element(op.name, true));
+          if (last_result == parse_result::SUCCESS) {
+            push_context(ctx.j_state, ctx.commands + 1, ctx.list_element);
+          }
+        }
+      } break;
+
+      // .*
+      // [*]
+      // will return an array of things
+      case path_operator_type::CHILD_WILDCARD: {
+        // if we're on the first element of this wildcard
+        if (!ctx.state_flag) {
+          // we will only ever be returning 1 array
+          if (!ctx.list_element) { output.add_output({"[" DEBUG_NEWLINE, 1 + DEBUG_NEWLINE_LEN}); }
+
+          // step into the child element
+          PARSE_TRY(ctx.j_state.child_element(op.expected_type));
+          if (last_result == parse_result::EMPTY) {
+            if (!ctx.list_element) {
+              output.add_output({"]" DEBUG_NEWLINE, 1 + DEBUG_NEWLINE_LEN});
+            }
+            last_result = parse_result::SUCCESS;
+            break;
+          }
+
+          // first element
+          PARSE_TRY(ctx.j_state.next_matching_element({"*", 1}, true));
+          if (last_result == parse_result::EMPTY) {
+            if (!ctx.list_element) {
+              output.add_output({"]" DEBUG_NEWLINE, 1 + DEBUG_NEWLINE_LEN});
+            }
+            last_result = parse_result::SUCCESS;
+            break;
+          }
+
+          // re-push ourselves
+          push_context(ctx.j_state, ctx.commands, ctx.list_element, true);
+          // push the next command
+          push_context(ctx.j_state, ctx.commands + 1, true);
+        } else {
+          // next element
+          PARSE_TRY(ctx.j_state.next_matching_element({"*", 1}, false));
+          if (last_result == parse_result::EMPTY) {
+            if (!ctx.list_element) {
+              output.add_output({"]" DEBUG_NEWLINE, 1 + DEBUG_NEWLINE_LEN});
+            }
+            last_result = parse_result::SUCCESS;
+            break;
+          }
+
+          // re-push ourselves
+          push_context(ctx.j_state, ctx.commands, ctx.list_element, true);
+          // push the next command
+          push_context(ctx.j_state, ctx.commands + 1, true);
+        }
+      } break;
+
+      // [0]
+      // [1]
+      // etc
+      // returns a single thing
+      case path_operator_type::CHILD_INDEX: {
+        PARSE_TRY(ctx.j_state.child_element(op.expected_type));
+        if (last_result == parse_result::SUCCESS) {
+          string_view const any{"*", 1};
+          PARSE_TRY(ctx.j_state.next_matching_element(any, true));
+          if (last_result == parse_result::SUCCESS) {
+            int idx;
+            for (idx = 1; idx <= op.index; idx++) {
+              PARSE_TRY(ctx.j_state.next_matching_element(any, false));
+              if (last_result == parse_result::EMPTY) { break; }
+            }
+            // if we didn't end up at the index we requested, this is an invalid index
+            if (idx - 1 != op.index) { return parse_result::ERROR; }
+            push_context(ctx.j_state, ctx.commands + 1, ctx.list_element);
+          }
+        }
+      } break;
+
+      // some sort of error.
+      case path_operator_type::ERROR: return parse_result::ERROR; break;
+
+      // END case
+      default: {
+        if (ctx.list_element && element_count > 0) {
+          output.add_output({"," DEBUG_NEWLINE, 1 + DEBUG_NEWLINE_LEN});
+        }
+        PARSE_TRY(ctx.j_state.extract_element(&output, ctx.list_element));
+        if (ctx.list_element && last_result != parse_result::EMPTY) { element_count++; }
+      } break;
+    }
+  }
+
+  return parse_result::SUCCESS;
+}
+
+// hardcoding this for now. to reach a stack depth of 8 would require
+// a JSONPath containing 7 nested wildcards so this is probably reasonable.
+constexpr int max_command_stack_depth = 8;
+
+/**
+ * @brief Parse a single json string using the provided command buffer
+ *
+ * This function exists primarily as a shim for debugging purposes.
+ *
+ * @param input The incoming json string
+ * @param input_len Size of the incoming json string
+ * @param commands The command buffer to be applied to the string. Always ends with a
+ * path_operator_type::END
+ * @param out_buf Buffer user to store the results of the query (nullptr in the size computation
+ * step)
+ * @param out_buf_size Size of the output buffer
+ * @returns A pair containing the result code the output buffer.
+ */
+__device__ thrust::pair<parse_result, json_output> get_json_object_single(
+  char const* input,
+  size_t input_len,
+  path_operator const* const commands,
+  char* out_buf,
+  size_t out_buf_size)
+{
+  json_state j_state(input, input_len);
+  json_output output{out_buf_size, out_buf};
+
+  auto const result = parse_json_path<max_command_stack_depth>(j_state, commands, output);
+
+  return {result, output};
+}
+
+/**
+ * @brief Kernel for running the JSONPath query.
+ *
+ * This kernel operates in a 2-pass way.  On the first pass, it computes
+ * output sizes.  On the second pass it fills in the provided output buffers
+ * (chars and validity)
+ *
+ * @param col Device view of the incoming string
+ * @param commands JSONPath command buffer
+ * @param output_offsets Buffer used to store the string offsets for the results of the query
+ * @param out_buf Buffer used to store the results of the query
+ * @param out_validity Output validity buffer
+ * @param out_valid_count Output count of # of valid bits
+ */
+template <int block_size>
+__launch_bounds__(block_size) __global__
+  void get_json_object_kernel(column_device_view col,
+                              path_operator const* const commands,
+                              offset_type* output_offsets,
+                              thrust::optional<char*> out_buf,
+                              thrust::optional<bitmask_type*> out_validity,
+                              thrust::optional<size_type*> out_valid_count)
+{
+  size_type tid    = threadIdx.x + (blockDim.x * blockIdx.x);
+  size_type stride = blockDim.x * gridDim.x;
+
+  if (out_valid_count.has_value()) { *(out_valid_count.value()) = 0; }
+  size_type warp_valid_count{0};
+
+  auto active_threads = __ballot_sync(0xffffffff, tid < col.size());
+  while (tid < col.size()) {
+    bool is_valid         = false;
+    string_view const str = col.element<string_view>(tid);
+    size_type output_size = 0;
+    if (str.size_bytes() > 0) {
+      char* dst = out_buf.has_value() ? out_buf.value() + output_offsets[tid] : nullptr;
+      size_t const dst_size =
+        out_buf.has_value() ? output_offsets[tid + 1] - output_offsets[tid] : 0;
+
+      parse_result result;
+      json_output out;
+      thrust::tie(result, out) =
+        get_json_object_single(str.data(), str.size_bytes(), commands, dst, dst_size);
+      output_size = out.output_len.value_or(0);
+      if (out.output_len.has_value() && result == parse_result::SUCCESS) { is_valid = true; }
+    }
+
+    // filled in only during the precompute step. during the compute step, the offsets
+    // are fed back in so we do -not- want to write them out
+    if (!out_buf.has_value()) { output_offsets[tid] = static_cast<offset_type>(output_size); }
+
+    // validity filled in only during the output step
+    if (out_validity.has_value()) {
+      uint32_t mask = __ballot_sync(active_threads, is_valid);
+      // 0th lane of the warp writes the validity
+      if (!(tid % cudf::detail::warp_size)) {
+        out_validity.value()[cudf::word_index(tid)] = mask;
+        warp_valid_count += __popc(mask);
+      }
+    }
+
+    tid += stride;
+    active_threads = __ballot_sync(active_threads, tid < col.size());
+  }
+
+  // sum the valid counts across the whole block
+  if (out_valid_count) {
+    size_type block_valid_count =
+      cudf::detail::single_lane_block_sum_reduce<block_size, 0>(warp_valid_count);
+    if (threadIdx.x == 0) { atomicAdd(out_valid_count.value(), block_valid_count); }
+  }
+}
+
+/**
+ * @copydoc cudf::strings::detail::get_json_object
+ */
+std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& col,
+                                              cudf::string_scalar const& json_path,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::mr::device_memory_resource* mr)
+{
+  // preprocess the json_path into a command buffer
+  auto preprocess = build_command_buffer(json_path, stream);
+  CUDF_EXPECTS(std::get<1>(preprocess) <= max_command_stack_depth,
+               "Encountered JSONPath string that is too complex");
+
+  // allocate output offsets buffer.
+  auto offsets = cudf::make_fixed_width_column(
+    data_type{type_id::INT32}, col.size() + 1, mask_state::UNALLOCATED, stream, mr);
+  cudf::mutable_column_view offsets_view(*offsets);
+
+  // if the query is empty, return a string column containing all nulls
+  if (!std::get<0>(preprocess).has_value()) {
+    return std::make_unique<column>(
+      data_type{type_id::STRING},
+      col.size(),
+      rmm::device_buffer{0, stream, mr},  // no data
+      cudf::detail::create_null_mask(col.size(), mask_state::ALL_NULL, stream, mr),
+      col.size());  // null count
+  }
+
+  constexpr int block_size = 512;
+  cudf::detail::grid_1d const grid{col.size(), block_size};
+
+  auto cdv = column_device_view::create(col.parent(), stream);
+
+  // preprocess sizes (returned in the offsets buffer)
+  get_json_object_kernel<block_size>
+    <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
+      *cdv,
+      std::get<0>(preprocess).value().data(),
+      offsets_view.head<offset_type>(),
+      thrust::nullopt,
+      thrust::nullopt,
+      thrust::nullopt);
+
+  // convert sizes to offsets
+  thrust::exclusive_scan(rmm::exec_policy(stream),
+                         offsets_view.head<offset_type>(),
+                         offsets_view.head<offset_type>() + col.size() + 1,
+                         offsets_view.head<offset_type>(),
+                         0);
+  size_type const output_size =
+    cudf::detail::get_value<offset_type>(offsets_view, col.size(), stream);
+
+  // allocate output string column
+  auto chars = cudf::make_fixed_width_column(
+    data_type{type_id::INT8}, output_size, mask_state::UNALLOCATED, stream, mr);
+
+  // potential optimization : if we know that all outputs are valid, we could skip creating
+  // the validity mask altogether
+  rmm::device_buffer validity =
+    cudf::detail::create_null_mask(col.size(), mask_state::UNINITIALIZED, stream, mr);
+
+  // compute results
+  cudf::mutable_column_view chars_view(*chars);
+  rmm::device_scalar<size_type> d_valid_count{0, stream};
+  get_json_object_kernel<block_size>
+    <<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
+      *cdv,
+      std::get<0>(preprocess).value().data(),
+      offsets_view.head<offset_type>(),
+      chars_view.head<char>(),
+      static_cast<bitmask_type*>(validity.data()),
+      d_valid_count.data());
+
+  return make_strings_column(col.size(),
+                             std::move(offsets),
+                             std::move(chars),
+                             col.size() - d_valid_count.value(),
+                             std::move(validity),
+                             stream,
+                             mr);
+}
+
+}  // namespace
+}  // namespace detail
+
+/**
+ * @copydoc cudf::strings::get_json_object
+ */
+std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& col,
+                                              cudf::string_scalar const& json_path,
+                                              rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::get_json_object(col, json_path, 0, mr);
+}
+
+}  // namespace strings
+}  // namespace cudf
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 082f039054e..f9904dda49e 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -334,6 +334,7 @@ ConfigureTest(STRINGS_TEST
     strings/hash_string.cu
     strings/integers_tests.cu
     strings/ipv4_tests.cpp
+    strings/json_tests.cpp
     strings/pad_tests.cpp
     strings/replace_regex_tests.cpp
     strings/replace_tests.cpp
diff --git a/cpp/tests/strings/json_tests.cpp b/cpp/tests/strings/json_tests.cpp
new file mode 100644
index 00000000000..44eb35d4163
--- /dev/null
+++ b/cpp/tests/strings/json_tests.cpp
@@ -0,0 +1,761 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/strings/json.hpp>
+#include <cudf/strings/replace.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
+// reference:  https://jsonpath.herokuapp.com/
+
+// clang-format off
+std::string json_string{
+  "{" 
+    "\"store\": {""\"book\": ["
+        "{"
+          "\"category\": \"reference\","
+          "\"author\": \"Nigel Rees\","
+          "\"title\": \"Sayings of the Century\","
+          "\"price\": 8.95"
+        "},"
+        "{"
+          "\"category\": \"fiction\","
+          "\"author\": \"Evelyn Waugh\","
+          "\"title\": \"Sword of Honour\","
+          "\"price\": 12.99"
+        "},"
+        "{"
+          "\"category\": \"fiction\","
+          "\"author\": \"Herman Melville\","
+          "\"title\": \"Moby Dick\","
+          "\"isbn\": \"0-553-21311-3\","
+          "\"price\": 8.99"
+        "},"
+        "{"
+          "\"category\": \"fiction\","
+          "\"author\": \"J. R. R. Tolkien\","
+          "\"title\": \"The Lord of the Rings\","
+          "\"isbn\": \"0-395-19395-8\","
+          "\"price\": 22.99"
+        "}"
+      "],"
+      "\"bicycle\": {"
+        "\"color\": \"red\","
+        "\"price\": 19.95"
+      "}"
+    "},"
+    "\"expensive\": 10"
+  "}"
+};
+// clang-format on
+
+std::unique_ptr<cudf::column> drop_whitespace(cudf::column_view const& col)
+{
+  cudf::test::strings_column_wrapper whitespace{"\n", "\r", "\t"};
+  cudf::test::strings_column_wrapper repl{"", "", ""};
+
+  cudf::strings_column_view strings(col);
+  cudf::strings_column_view targets(whitespace);
+  cudf::strings_column_view replacements(repl);
+  return cudf::strings::replace(strings, targets, replacements);
+}
+
+struct JsonTests : public cudf::test::BaseFixture {
+};
+
+TEST_F(JsonTests, GetJsonObjectRootOp)
+{
+  // root
+  cudf::test::strings_column_wrapper input{json_string};
+  std::string json_path("$");
+  auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+  auto result     = drop_whitespace(*result_raw);
+
+  auto expected = drop_whitespace(input);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+}
+
+TEST_F(JsonTests, GetJsonObjectChildOp)
+{
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected_raw{     
+      "{"
+        "\"book\": ["
+          "{"
+            "\"category\": \"reference\","
+            "\"author\": \"Nigel Rees\","
+            "\"title\": \"Sayings of the Century\","
+            "\"price\": 8.95"
+          "},"
+          "{"
+            "\"category\": \"fiction\","
+            "\"author\": \"Evelyn Waugh\","
+            "\"title\": \"Sword of Honour\","
+            "\"price\": 12.99"
+          "},"
+          "{"
+            "\"category\": \"fiction\","
+            "\"author\": \"Herman Melville\","
+            "\"title\": \"Moby Dick\","
+            "\"isbn\": \"0-553-21311-3\","
+            "\"price\": 8.99"
+          "},"
+          "{"
+            "\"category\": \"fiction\","
+            "\"author\": \"J. R. R. Tolkien\","
+            "\"title\": \"The Lord of the Rings\","
+            "\"isbn\": \"0-395-19395-8\","
+            "\"price\": 22.99"
+          "}"
+        "],"
+        "\"bicycle\": {"
+          "\"color\": \"red\","
+          "\"price\": 19.95"
+        "}"
+      "}"
+    };
+    // clang-format on
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.book");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected_raw{
+      "["
+        "{"
+          "\"category\": \"reference\","
+          "\"author\": \"Nigel Rees\","
+          "\"title\": \"Sayings of the Century\","
+          "\"price\": 8.95"
+        "},"
+        "{"
+          "\"category\": \"fiction\","
+          "\"author\": \"Evelyn Waugh\","
+          "\"title\": \"Sword of Honour\","
+          "\"price\": 12.99"
+        "},"
+        "{"
+          "\"category\": \"fiction\","
+          "\"author\": \"Herman Melville\","
+          "\"title\": \"Moby Dick\","
+          "\"isbn\": \"0-553-21311-3\","
+          "\"price\": 8.99"
+        "},"
+        "{"
+          "\"category\": \"fiction\","
+          "\"author\": \"J. R. R. Tolkien\","
+          "\"title\": \"The Lord of the Rings\","
+          "\"isbn\": \"0-395-19395-8\","
+          "\"price\": 22.99"
+        "}"
+      "]"
+    };
+    // clang-format on
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+}
+
+TEST_F(JsonTests, GetJsonObjectWildcardOp)
+{
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.*");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected_raw{
+      "["
+        "["
+          "{"
+            "\"category\": \"reference\","
+            "\"author\": \"Nigel Rees\","
+            "\"title\": \"Sayings of the Century\","
+            "\"price\": 8.95"
+          "},"
+          "{"
+            "\"category\": \"fiction\","
+            "\"author\": \"Evelyn Waugh\","
+            "\"title\": \"Sword of Honour\","
+            "\"price\": 12.99"
+          "},"
+          "{"
+            "\"category\": \"fiction\","
+            "\"author\": \"Herman Melville\","
+            "\"title\": \"Moby Dick\","
+            "\"isbn\": \"0-553-21311-3\","
+            "\"price\": 8.99"
+          "},"
+          "{"
+            "\"category\": \"fiction\","
+            "\"author\": \"J. R. R. Tolkien\","
+            "\"title\": \"The Lord of the Rings\","
+            "\"isbn\": \"0-395-19395-8\","
+            "\"price\": 22.99"
+          "}"
+        "],"
+        "{"
+          "\"color\": \"red\","
+          "\"price\": 19.95"
+        "}"
+      "]"
+    };
+    // clang-format on
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("*");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected_raw{
+      "[" 
+        "{"
+          "\"book\": ["
+            "{"
+              "\"category\": \"reference\","
+              "\"author\": \"Nigel Rees\","
+              "\"title\": \"Sayings of the Century\","
+              "\"price\": 8.95"
+            "},"
+            "{"
+              "\"category\": \"fiction\","
+              "\"author\": \"Evelyn Waugh\","
+              "\"title\": \"Sword of Honour\","
+              "\"price\": 12.99"
+            "},"
+            "{"
+              "\"category\": \"fiction\","
+              "\"author\": \"Herman Melville\","
+              "\"title\": \"Moby Dick\","
+              "\"isbn\": \"0-553-21311-3\","
+              "\"price\": 8.99"
+            "},"
+            "{"
+              "\"category\": \"fiction\","
+              "\"author\": \"J. R. R. Tolkien\","
+              "\"title\": \"The Lord of the Rings\","
+              "\"isbn\": \"0-395-19395-8\","
+              "\"price\": 22.99"
+            "}"
+          "],"
+          "\"bicycle\": {"
+            "\"color\": \"red\","
+            "\"price\": 19.95"
+          "}"
+        "},"
+        "10"
+      "]"
+    };
+    // clang-format on
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+}
+
+TEST_F(JsonTests, GetJsonObjectSubscriptOp)
+{
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.book[2]");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected_raw{
+      "{"
+        "\"category\": \"fiction\","
+        "\"author\": \"Herman Melville\","
+        "\"title\": \"Moby Dick\","
+        "\"isbn\": \"0-553-21311-3\","
+        "\"price\": 8.99"
+      "}"
+    };
+    // clang-format on
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store['bicycle']");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected_raw{
+      "{"
+        "\"color\": \"red\","
+        "\"price\": 19.95"
+      "}"
+    };
+    // clang-format on
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.book[*]");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected_raw{
+      "["
+        "{"
+          "\"category\": \"reference\","
+          "\"author\": \"Nigel Rees\","
+          "\"title\": \"Sayings of the Century\","
+          "\"price\": 8.95"
+        "},"
+        "{"
+          "\"category\": \"fiction\","
+          "\"author\": \"Evelyn Waugh\","
+          "\"title\": \"Sword of Honour\","
+          "\"price\": 12.99"
+        "},"
+        "{"
+          "\"category\": \"fiction\","
+          "\"author\": \"Herman Melville\","
+          "\"title\": \"Moby Dick\","
+          "\"isbn\": \"0-553-21311-3\","
+          "\"price\": 8.99"
+        "},"
+        "{"
+          "\"category\": \"fiction\","
+          "\"author\": \"J. R. R. Tolkien\","
+          "\"title\": \"The Lord of the Rings\","
+          "\"isbn\": \"0-395-19395-8\","
+          "\"price\": 22.99"
+        "}"
+      "]"
+    };
+    // clang-format on
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+}
+
+TEST_F(JsonTests, GetJsonObjectFilter)
+{
+  // queries that result in filtering/collating results (mostly meaning - generates new
+  // json instead of just returning parts of the existing string
+
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.book[*]['isbn']");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    cudf::test::strings_column_wrapper expected_raw{"[\"0-553-21311-3\",\"0-395-19395-8\"]"};
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.book[*].category");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    cudf::test::strings_column_wrapper expected_raw{
+      "[\"reference\",\"fiction\",\"fiction\",\"fiction\"]"};
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.book[*].title");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    cudf::test::strings_column_wrapper expected_raw{
+      "[\"Sayings of the Century\",\"Sword of Honour\",\"Moby Dick\",\"The Lord of the Rings\"]"};
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.book.*.price");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    cudf::test::strings_column_wrapper expected_raw{"[8.95,12.99,8.99,22.99]"};
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+
+  {
+    // spark behavioral difference.
+    //  standard:     "fiction"
+    //  spark:        fiction
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.book[2].category");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    cudf::test::strings_column_wrapper expected_raw{"fiction"};
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+}
+
+TEST_F(JsonTests, GetJsonObjectNullInputs)
+{
+  {
+    std::string str("{\"a\" : \"b\"}");
+    cudf::test::strings_column_wrapper input({str, str, str, str}, {1, 0, 1, 0});
+
+    std::string json_path("$.a");
+    auto result_raw = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    auto result     = drop_whitespace(*result_raw);
+
+    cudf::test::strings_column_wrapper expected_raw({"b", "", "b", ""}, {1, 0, 1, 0});
+    auto expected = drop_whitespace(expected_raw);
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, *expected);
+  }
+}
+
+TEST_F(JsonTests, GetJsonObjectEmptyQuery)
+{
+  // empty query -> null
+  {
+    cudf::test::strings_column_wrapper input{"{\"a\" : \"b\"}"};
+    std::string json_path("");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    cudf::test::strings_column_wrapper expected({""}, {0});
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+}
+
+TEST_F(JsonTests, GetJsonObjectEmptyInputsAndOutputs)
+{
+  // empty input -> null
+  {
+    cudf::test::strings_column_wrapper input{""};
+    std::string json_path("$");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    cudf::test::strings_column_wrapper expected({""}, {0});
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+
+  // slightly different from "empty output". in this case, we're
+  // returning something, but it happens to be empty. so we expect
+  // a valid, but empty row
+  {
+    cudf::test::strings_column_wrapper input{"{\"store\": { \"bicycle\" : \"\" } }"};
+    std::string json_path("$.store.bicycle");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    cudf::test::strings_column_wrapper expected({""}, {1});
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+}
+
+// badly formed JSONpath strings
+TEST_F(JsonTests, GetJsonObjectIllegalQuery)
+{
+  // can't have more than one root operator, or a root operator anywhere other
+  // than the beginning
+  {
+    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    std::string json_path("$$");
+    auto query = [&]() {
+      auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    };
+    EXPECT_THROW(query(), cudf::logic_error);
+  }
+
+  // invalid index
+  {
+    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    std::string json_path("$[auh46h-]");
+    auto query = [&]() {
+      auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    };
+    EXPECT_THROW(query(), cudf::logic_error);
+  }
+
+  // invalid index
+  {
+    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    std::string json_path("$[[]]");
+    auto query = [&]() {
+      auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    };
+    EXPECT_THROW(query(), cudf::logic_error);
+  }
+
+  // negative index
+  {
+    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    std::string json_path("$[-1]");
+    auto query = [&]() {
+      auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    };
+    EXPECT_THROW(query(), cudf::logic_error);
+  }
+
+  // child operator with no name specified
+  {
+    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    std::string json_path(".");
+    auto query = [&]() {
+      auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    };
+    EXPECT_THROW(query(), cudf::logic_error);
+  }
+
+  {
+    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    std::string json_path("][");
+    auto query = [&]() {
+      auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    };
+    EXPECT_THROW(query(), cudf::logic_error);
+  }
+
+  {
+    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    std::string json_path("6hw6,56i3");
+    auto query = [&]() {
+      auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+    };
+    EXPECT_THROW(query(), cudf::logic_error);
+  }
+}
+
+// queries that are legal, but reference invalid parts of the input
+TEST_F(JsonTests, GetJsonObjectInvalidQuery)
+{
+  // non-existent field
+  {
+    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    std::string json_path("$[*].c");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    cudf::test::strings_column_wrapper expected({""}, {0});
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+
+  // non-existent field
+  {
+    cudf::test::strings_column_wrapper input{"{\"a\": \"b\"}"};
+    std::string json_path("$[*].c[2]");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    cudf::test::strings_column_wrapper expected({""}, {0});
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+
+  // non-existent field
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.book.price");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    cudf::test::strings_column_wrapper expected({""}, {0});
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+
+  // out of bounds index
+  {
+    cudf::test::strings_column_wrapper input{json_string};
+    std::string json_path("$.store.book[4]");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    cudf::test::strings_column_wrapper expected({""}, {0});
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+}
+
+TEST_F(JsonTests, MixedOutput)
+{
+  // various queries on:
+  // clang-format off
+  std::vector<std::string> input_strings {
+    "{\"a\": {\"b\" : \"c\"}}",
+
+    "{"
+      "\"a\": {\"b\" : \"c\"},"
+      "\"d\": [{\"e\":123}, {\"f\":-10}]"
+    "}",
+
+    "{"
+      "\"b\": 123"
+    "}",
+
+    "{"
+      "\"a\": [\"y\",500]"
+    "}",
+
+    "{"
+      "\"a\": \"\""
+    "}",
+
+    "{"
+      "\"a\": {"
+                "\"z\": {\"i\": 10, \"j\": 100},"
+                "\"b\": [\"c\",null,true,-1]"
+              "}"
+    "}"
+  };
+  // clang-format on
+  cudf::test::strings_column_wrapper input(input_strings.begin(), input_strings.end());
+  {
+    std::string json_path("$.a");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected({
+      "{\"b\" : \"c\"}",
+      "{\"b\" : \"c\"}",
+      "",
+      "[\"y\",500]",
+      "",
+      "{"
+         "\"z\": {\"i\": 10, \"j\": 100},"
+         "\"b\": [\"c\",null,true,-1]"
+      "}"
+      }, 
+      {1, 1, 0, 1, 1, 1});
+    // clang-format on
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+
+  {
+    std::string json_path("$.a[1]");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected({
+        "",
+        "",
+        "",
+        "500",
+        "",
+        "",
+      },
+      {0, 0, 0, 1, 0, 0});
+    // clang-format on
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+
+  {
+    std::string json_path("$.a.b");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected({
+      "c", 
+      "c", 
+      "", 
+      "", 
+      "", 
+      "[\"c\",null,true,-1]"},
+      {1, 1, 0, 0, 0, 1});
+    // clang-format on
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+
+  {
+    std::string json_path("$.a[*]");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected({
+      "[\"c\"]", 
+      "[\"c\"]", 
+      "", 
+      "[\"y\",500]", 
+      "[]", 
+      "["
+        "{\"i\": 10, \"j\": 100},"
+        "[\"c\",null,true,-1]"
+      "]" },
+      {1, 1, 0, 1, 1, 1});
+    // clang-format on
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+
+  {
+    std::string json_path("$.a.b[*]");
+    auto result = cudf::strings::get_json_object(cudf::strings_column_view(input), json_path);
+
+    // clang-format off
+    cudf::test::strings_column_wrapper expected({
+      "[]", 
+      "[]", 
+      "", 
+      "",
+      "",      
+      "[\"c\",null,true,-1]"},
+      {1, 1, 0, 0, 0, 1});
+    // clang-format on
+
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*result, expected);
+  }
+}
diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu
index 78a67464654..a54c86405a5 100644
--- a/cpp/tests/utilities/column_utilities.cu
+++ b/cpp/tests/utilities/column_utilities.cu
@@ -71,7 +71,7 @@ struct column_property_comparator {
 
     // equivalent, but not exactly equal columns can have a different number of children if their
     // sizes are both 0. Specifically, empty string columns may or may not have children.
-    if (check_exact_equality || lhs.size() > 0) {
+    if (check_exact_equality || (lhs.size() > 0 && lhs.null_count() < lhs.size())) {
       EXPECT_EQ(lhs.num_children(), rhs.num_children());
     }
   }
diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 5d869ab75fb..402c64dd83d 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -2083,6 +2083,23 @@ public final ColumnVector substring(ColumnView start, ColumnView end) {
     return new ColumnVector(substringColumn(getNativeView(), start.getNativeView(), end.getNativeView()));
   }
 
+   /**
+   * Apply a JSONPath string to all rows in an input strings column.
+   *
+   * Applies a JSONPath string to an incoming strings column where each row in the column
+   * is a valid json string.  The output is returned by row as a strings column.
+   *
+   * For reference, https://tools.ietf.org/id/draft-goessner-dispatch-jsonpath-00.html
+   * Note: Only implements the operators: $ . [] *
+   *
+   * @param path The JSONPath string to be applied to each row
+   * @return new strings ColumnVector containing the retrieved json object strings
+   */
+  public final ColumnVector getJSONObject(Scalar path) {
+    assert(type.equals(DType.STRING)) : "column type must be a String";
+    return new ColumnVector(getJSONObject(getNativeView(), path.getScalarHandle()));
+  }
+
   /**
    * Returns a new strings column where target string within each string is replaced with the specified
    * replacement string.
@@ -2649,6 +2666,8 @@ static DeviceMemoryBufferView getOffsetsBuffer(long viewHandle) {
    */
   private static native long stringTimestampToTimestamp(long viewHandle, int unit, String format);
 
+  private static native long getJSONObject(long viewHandle, long scalarHandle) throws CudfException;
+
   /**
    * Native method to parse and convert a timestamp column vector to string column vector. A unix
    * timestamp is a long value representing how many units since 1970-01-01 00:00:00:000 in either
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index dc1acc50b5f..cec3a1a92a6 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -54,6 +54,7 @@
 #include <cudf/strings/split/split.hpp>
 #include <cudf/strings/strip.hpp>
 #include <cudf/strings/substring.hpp>
+#include <cudf/strings/json.hpp>
 #include <cudf/transform.hpp>
 #include <cudf/unary.hpp>
 #include <cudf/utilities/bit.hpp>
@@ -65,6 +66,8 @@
 
 #include "cudf_jni_apis.hpp"
 #include "dtype_utils.hpp"
+#include "jni.h"
+#include "jni_utils.hpp"
 
 namespace {
 
@@ -1835,4 +1838,24 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_copyColumnViewToCV(JNIEnv
   }
   CATCH_STD(env, 0)
 }
+
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_getJSONObject(JNIEnv *env, jclass, 
+                                                                     jlong j_view_handle, jlong j_scalar_handle) {
+
+   JNI_NULL_CHECK(env, j_view_handle, "view cannot be null", 0);
+   JNI_NULL_CHECK(env, j_scalar_handle, "path cannot be null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    cudf::column_view* n_column_view = reinterpret_cast<cudf::column_view*>(j_view_handle);
+    cudf::strings_column_view n_strings_col_view(*n_column_view);
+    cudf::string_scalar *n_scalar_path = reinterpret_cast<cudf::string_scalar *>(j_scalar_handle);
+
+    auto result = cudf::strings::get_json_object(n_strings_col_view, *n_scalar_path);
+
+    return reinterpret_cast<jlong>(result.release());
+  }
+  CATCH_STD(env, 0)
+
+}
 } // extern "C"
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index fe1cba5ceb1..ce2c287a1c8 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -4132,6 +4132,50 @@ void testCopyToColumnVector() {
     }
   }
 
+  @Test
+  void testGetJSONObject() {
+    String jsonString = "{ \"store\": {\n" +
+        "    \"book\": [\n" +
+        "      { \"category\": \"reference\",\n" +
+        "        \"author\": \"Nigel Rees\",\n" +
+        "        \"title\": \"Sayings of the Century\",\n" +
+        "        \"price\": 8.95\n" +
+        "      },\n" +
+        "      { \"category\": \"fiction\",\n" +
+        "        \"author\": \"Evelyn Waugh\",\n" +
+        "        \"title\": \"Sword of Honour\",\n" +
+        "        \"price\": 12.99\n" +
+        "      },\n" +
+        "      { \"category\": \"fiction\",\n" +
+        "        \"author\": \"Herman Melville\",\n" +
+        "        \"title\": \"Moby Dick\",\n" +
+        "        \"isbn\": \"0-553-21311-3\",\n" +
+        "        \"price\": 8.99\n" +
+        "      },\n" +
+        "      { \"category\": \"fiction\",\n" +
+        "        \"author\": \"J. R. R. Tolkien\",\n" +
+        "        \"title\": \"The Lord of the Rings\",\n" +
+        "        \"isbn\": \"0-395-19395-8\",\n" +
+        "        \"price\": 22.99\n" +
+        "      }\n" +
+        "    ],\n" +
+        "    \"bicycle\": {\n" +
+        "      \"color\": \"red\",\n" +
+        "      \"price\": 19.95\n" +
+        "    }\n" +
+        "  }\n" +
+        "}";
+
+    try (ColumnVector json = ColumnVector.fromStrings(jsonString, jsonString);
+         ColumnVector expectedAuthors = ColumnVector.fromStrings("[\"Nigel Rees\",\"Evelyn " +
+             "Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]", "[\"Nigel Rees\",\"Evelyn " +
+             "Waugh\",\"Herman Melville\",\"J. R. R. Tolkien\"]");
+         Scalar path = Scalar.fromString("$.store.book[*].author");
+         ColumnVector gotAuthors = json.getJSONObject(path)) {
+      assertColumnsAreEqual(expectedAuthors, gotAuthors);
+    }
+  }
+
   @Test
   void testMakeStructEmpty() {
     final int numRows = 10;