From 9f2247f4d3e4a035b141251ea4cf4f1b6a6ef8ec Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Fri, 22 Jul 2022 11:51:44 -0700
Subject: [PATCH 01/40] add placeholder experimental JSON reader

---
 cpp/CMakeLists.txt                         |  1 +
 cpp/include/cudf/io/json.hpp               | 28 +++++++++++++++++
 cpp/src/io/json/experimental/read_json.cpp | 31 +++++++++++++++++++
 cpp/src/io/json/experimental/read_json.hpp | 36 ++++++++++++++++++++++
 cpp/src/io/json/reader_impl.cu             |  6 ++++
 python/cudf/cudf/_lib/cpp/io/json.pxd      |  5 +++
 python/cudf/cudf/_lib/json.pyx             |  4 ++-
 python/cudf/cudf/io/json.py                |  9 ++++--
 python/cudf/cudf/utils/ioutils.py          |  2 +-
 9 files changed, 118 insertions(+), 4 deletions(-)
 create mode 100644 cpp/src/io/json/experimental/read_json.cpp
 create mode 100644 cpp/src/io/json/experimental/read_json.hpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 4819d1c2f5c..104e731c470 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -322,6 +322,7 @@ add_library(
   src/io/functions.cpp
   src/io/json/json_gpu.cu
   src/io/json/reader_impl.cu
+  src/io/json/experimental/read_json.cpp
   src/io/orc/aggregate_orc_metadata.cpp
   src/io/orc/dict_enc.cu
   src/io/orc/orc.cpp
diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
index 9ccb5ec4d58..01334060063 100644
--- a/cpp/include/cudf/io/json.hpp
+++ b/cpp/include/cudf/io/json.hpp
@@ -80,6 +80,9 @@ class json_reader_options {
   // Whether to parse dates as DD/MM versus MM/DD
   bool _dayfirst = false;
 
+  // Whether to parse dates as DD/MM versus MM/DD
+  bool _experimental = false;
+
   /**
    * @brief Constructor from source info.
    *
@@ -193,6 +196,13 @@ class json_reader_options {
    */
   bool is_enabled_dayfirst() const { return _dayfirst; }
 
+  /**
+   * @brief Whether the experimental reader should be used.
+   *
+   * @returns true if the experimental reader will be used, false otherwise
+   */
+  bool is_enabled_experimental() const { return _experimental; }
+
   /**
    * @brief Set data types for columns to be read.
    *
@@ -241,6 +251,13 @@ class json_reader_options {
    * @param val Boolean value to enable/disable day first parsing format
    */
   void enable_dayfirst(bool val) { _dayfirst = val; }
+
+  /**
+   * @brief Set whether to use the experimental reader.
+   *
+   * @param val Boolean value to enable/disable the experimental readers
+   */
+  void enable_experimental(bool val) { _experimental = val; }
 };
 
 /**
@@ -347,6 +364,17 @@ class json_reader_options_builder {
     options._dayfirst = val;
     return *this;
   }
+  /**
+   * @brief Set whether to use the experimental reader.
+   *
+   * @param val Boolean value to enable/disable experimental parsing
+   * @return this for chaining
+   */
+  json_reader_options_builder& experimental(bool val)
+  {
+    options._experimental = val;
+    return *this;
+  }
 
   /**
    * @brief move json_reader_options member once it's built.
diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
new file mode 100644
index 00000000000..fef5aa7d794
--- /dev/null
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "read_json.hpp"
+
+#include <cudf/utilities/error.hpp>
+
+namespace cudf::io::detail::json::experimental {
+
+table_with_metadata read_json(std::vector<std::unique_ptr<datasource>>& sources,
+                              json_reader_options const& reader_opts,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FAIL("Not implemented");
+}
+
+}  // namespace cudf::io::detail::json::experimental
diff --git a/cpp/src/io/json/experimental/read_json.hpp b/cpp/src/io/json/experimental/read_json.hpp
new file mode 100644
index 00000000000..9c39315da30
--- /dev/null
+++ b/cpp/src/io/json/experimental/read_json.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/io/datasource.hpp>
+#include <cudf/io/json.hpp>
+#include <cudf/types.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <memory>
+#include <vector>
+
+namespace cudf::io::detail::json::experimental {
+
+table_with_metadata read_json(std::vector<std::unique_ptr<datasource>>& sources,
+                              json_reader_options const& reader_opts,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr);
+
+}
diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu
index 052c51351a1..7e6be190acb 100644
--- a/cpp/src/io/json/reader_impl.cu
+++ b/cpp/src/io/json/reader_impl.cu
@@ -16,6 +16,8 @@
 
 #include "json_gpu.hpp"
 
+#include "experimental/read_json.hpp"
+
 #include <hash/concurrent_unordered_map.cuh>
 
 #include <io/comp/io_uncomp.hpp>
@@ -571,6 +573,10 @@ table_with_metadata read_json(std::vector<std::unique_ptr<datasource>>& sources,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
 {
+  if (reader_opts.is_enabled_experimental()) {
+    return experimental::read_json(sources, reader_opts, stream, mr);
+  }
+
   CUDF_EXPECTS(not sources.empty(), "No sources were defined");
 
   CUDF_EXPECTS(reader_opts.is_enabled_lines(), "Only JSON Lines format is currently supported.\n");
diff --git a/python/cudf/cudf/_lib/cpp/io/json.pxd b/python/cudf/cudf/_lib/cpp/io/json.pxd
index 2c65e329bb0..6e240d00349 100644
--- a/python/cudf/cudf/_lib/cpp/io/json.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/json.pxd
@@ -24,6 +24,7 @@ cdef extern from "cudf/io/json.hpp" \
         size_type get_byte_range_size() except+
         bool is_enabled_lines() except+
         bool is_enabled_dayfirst() except+
+        bool is_enabled_experimental() except+
 
         # setter
         void set_dtypes(vector[data_type] types) except+
@@ -35,6 +36,7 @@ cdef extern from "cudf/io/json.hpp" \
         void set_byte_range_size(size_type size) except+
         void enable_lines(bool val) except+
         void enable_dayfirst(bool val) except+
+        void enable_experimental(bool val) except+
 
         @staticmethod
         json_reader_options_builder builder(
@@ -70,6 +72,9 @@ cdef extern from "cudf/io/json.hpp" \
         json_reader_options_builder& dayfirst(
             bool val
         ) except+
+        json_reader_options_builder& experimental(
+            bool val
+        ) except+
 
         json_reader_options build() except+
 
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 263d70afe26..89057e61b6b 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -31,7 +31,8 @@ cpdef read_json(object filepaths_or_buffers,
                 object dtype,
                 bool lines,
                 object compression,
-                object byte_range):
+                object byte_range,
+                bool experimental):
     """
     Cython function to call into libcudf API, see `read_json`.
 
@@ -98,6 +99,7 @@ cpdef read_json(object filepaths_or_buffers,
         .lines(c_lines)
         .byte_range_offset(c_range_offset)
         .byte_range_size(c_range_size)
+        .experimental(experimental)
         .build()
     )
     if is_list_like_dtypes:
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index 869e055decf..f7c5c36edc5 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -27,7 +27,7 @@ def read_json(
         raise ValueError("cudf engine only supports JSON Lines format")
     if engine == "auto":
         engine = "cudf" if lines else "pandas"
-    if engine == "cudf":
+    if engine == "cudf" or engine == "cudf_experimental":
         # Multiple sources are passed as a list. If a single source is passed,
         # wrap it in a list for unified processing downstream.
         if not is_list_like(path_or_buf):
@@ -56,7 +56,12 @@ def read_json(
 
         return cudf.DataFrame._from_data(
             *libjson.read_json(
-                filepaths_or_buffers, dtype, lines, compression, byte_range
+                filepaths_or_buffers,
+                dtype,
+                lines,
+                compression,
+                byte_range,
+                engine == "cudf_experimental",
             )
         )
     else:
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 3771587eb47..d3c41de842a 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -463,7 +463,7 @@
     function or `StringIO`). Multiple inputs may be provided as a list. If a
     list is specified each list entry may be of a different input type as long
     as each input is of a valid type and all input JSON schema(s) match.
-engine : {{ 'auto', 'cudf', 'pandas' }}, default 'auto'
+engine : {{ 'auto', 'cudf', 'cudf_experimental', 'pandas' }}, default 'auto'
     Parser engine to use. If 'auto' is passed, the engine will be
     automatically selected based on the other parameters.
 orient : string,

From 76b283475bb2ded9622c8e9f1cae63a562db969b Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Fri, 22 Jul 2022 11:58:07 -0700
Subject: [PATCH 02/40] doc fix

---
 cpp/include/cudf/io/json.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
index 01334060063..72d5fc9c4a6 100644
--- a/cpp/include/cudf/io/json.hpp
+++ b/cpp/include/cudf/io/json.hpp
@@ -80,7 +80,7 @@ class json_reader_options {
   // Whether to parse dates as DD/MM versus MM/DD
   bool _dayfirst = false;
 
-  // Whether to parse dates as DD/MM versus MM/DD
+  // Whether to use the experimental reader
   bool _experimental = false;
 
   /**
@@ -255,7 +255,7 @@ class json_reader_options {
   /**
    * @brief Set whether to use the experimental reader.
    *
-   * @param val Boolean value to enable/disable the experimental readers
+   * @param val Boolean value to enable/disable the experimental reader
    */
   void enable_experimental(bool val) { _experimental = val; }
 };

From f5464f654f606566ca3701cbf9ec949cf4c1e6ce Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Fri, 22 Jul 2022 12:10:15 -0700
Subject: [PATCH 03/40] copyright year

---
 python/cudf/cudf/_lib/cpp/io/json.pxd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/cpp/io/json.pxd b/python/cudf/cudf/_lib/cpp/io/json.pxd
index 6e240d00349..bc9d87a5cbf 100644
--- a/python/cudf/cudf/_lib/cpp/io/json.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/json.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport uint8_t
 from libcpp cimport bool

From 2ca0ac0442c26a10ccce4e4bef42abaec016c0d1 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vukasin.milovanovic.87@gmail.com>
Date: Mon, 25 Jul 2022 13:27:38 -0700
Subject: [PATCH 04/40] newline

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 cpp/include/cudf/io/json.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
index 72d5fc9c4a6..73724b99589 100644
--- a/cpp/include/cudf/io/json.hpp
+++ b/cpp/include/cudf/io/json.hpp
@@ -364,6 +364,7 @@ class json_reader_options_builder {
     options._dayfirst = val;
     return *this;
   }
+
   /**
    * @brief Set whether to use the experimental reader.
    *

From 3ee7a5accaccc005445f7564c4d03df97eebb4d1 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Mon, 25 Jul 2022 13:32:45 -0700
Subject: [PATCH 05/40] use span

---
 cpp/src/io/json/experimental/read_json.cpp | 2 +-
 cpp/src/io/json/experimental/read_json.hpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
index fef5aa7d794..146eaf203e4 100644
--- a/cpp/src/io/json/experimental/read_json.cpp
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -20,7 +20,7 @@
 
 namespace cudf::io::detail::json::experimental {
 
-table_with_metadata read_json(std::vector<std::unique_ptr<datasource>>& sources,
+table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
                               json_reader_options const& reader_opts,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
diff --git a/cpp/src/io/json/experimental/read_json.hpp b/cpp/src/io/json/experimental/read_json.hpp
index 9c39315da30..c9f74b2cc41 100644
--- a/cpp/src/io/json/experimental/read_json.hpp
+++ b/cpp/src/io/json/experimental/read_json.hpp
@@ -19,16 +19,16 @@
 #include <cudf/io/datasource.hpp>
 #include <cudf/io/json.hpp>
 #include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <memory>
-#include <vector>
 
 namespace cudf::io::detail::json::experimental {
 
-table_with_metadata read_json(std::vector<std::unique_ptr<datasource>>& sources,
+table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
                               json_reader_options const& reader_opts,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr);

From fcc90c5a3a390165daea1f19d588f8c2134a7c55 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Tue, 26 Jul 2022 19:08:19 -0700
Subject: [PATCH 06/40] options check + decompression

---
 cpp/include/cudf/io/types.hpp              |  1 +
 cpp/src/io/json/experimental/read_json.cpp | 46 +++++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp
index e9a93894f7d..7520ca107cc 100644
--- a/cpp/include/cudf/io/types.hpp
+++ b/cpp/include/cudf/io/types.hpp
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
 
 #include <thrust/optional.h>
diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
index 146eaf203e4..fbe9b5f6112 100644
--- a/cpp/src/io/json/experimental/read_json.cpp
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -17,15 +17,59 @@
 #include "read_json.hpp"
 
 #include <cudf/utilities/error.hpp>
+#include <io/comp/io_uncomp.hpp>
 
 namespace cudf::io::detail::json::experimental {
 
+table_with_metadata read_nested_json(host_span<char const> input,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FAIL("Not implemented");
+}
+
+std::vector<uint8_t> ingest_raw_input(host_span<std::unique_ptr<datasource>> sources,
+                                      compression_type compression)
+{
+  // Iterate through the user defined sources and read the contents into the local buffer
+  size_t total_source_size = 0;
+  for (const auto& source : sources) {
+    total_source_size += source->size();
+  }
+
+  auto buffer = std::vector<uint8_t>(total_source_size);
+
+  size_t bytes_read = 0;
+  for (const auto& source : sources) {
+    if (not source->is_empty()) {
+      auto const destination = buffer.data() + bytes_read;
+      bytes_read += source->host_read(0, source->size(), destination);
+    }
+  }
+
+  if (compression == compression_type::NONE) {
+    return buffer;
+  } else {
+    return decompress(compression, buffer);
+  }
+}
+
 table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
                               json_reader_options const& reader_opts,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
 {
-  CUDF_FAIL("Not implemented");
+  auto const dtypes_empty =
+    std::visit([](const auto& dtypes) { return dtypes.empty(); }, reader_opts.get_dtypes());
+  CUDF_EXPECTS(dtypes_empty, "user specified dtypes are not yet supported");
+  CUDF_EXPECTS(not reader_opts.is_enabled_lines(), "JSON Lines format is not yet supported");
+  CUDF_EXPECTS(reader_opts.get_byte_range_offset() == 0 and reader_opts.get_byte_range_size() == 0,
+               "specifying a byte range is not yet supported");
+
+  auto const buffer = ingest_raw_input(sources, reader_opts.get_compression());
+  auto data = host_span<char const>(reinterpret_cast<char const*>(buffer.data()), buffer.size());
+
+  return read_nested_json(data, stream, mr);
 }
 
 }  // namespace cudf::io::detail::json::experimental

From 22b5a46c8a8bb6dfbdaad9827452dbc1792be375 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Mon, 15 Aug 2022 07:29:48 -0700
Subject: [PATCH 07/40] adds support for ndjson

---
 cpp/src/io/json/nested_json.hpp    |  14 +-
 cpp/src/io/json/nested_json_gpu.cu | 371 +++++++++++++++++++----------
 cpp/tests/io/nested_json_test.cpp  |  61 ++++-
 3 files changed, 316 insertions(+), 130 deletions(-)

diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 03acd393594..d8886bc0928 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cudf/io/json.hpp>
 #include <cudf/io/types.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/bit.hpp>
@@ -267,14 +268,14 @@ namespace detail {
  * At this stage, we do not perform bracket matching, i.e., we do not verify whether a closing
  * bracket would actually pop a the corresponding opening brace.
  *
- * @param[in] d_json_in The string of input characters
+ * @param[in] json_in The string of input characters
  * @param[out] d_top_of_stack Will be populated with what-is-on-top-of-the-stack for any given input
  * character of \p d_json_in, where a '{' represents that the corresponding input character is
  * within the context of a struct, a '[' represents that it is within the context of an array, and a
  * '_' symbol that it is at the root of the JSON.
  * @param[in] stream The cuda stream to dispatch GPU kernels to
  */
-void get_stack_context(device_span<SymbolT const> d_json_in,
+void get_stack_context(device_span<SymbolT const> json_in,
                        SymbolT* d_top_of_stack,
                        rmm::cuda_stream_view stream);
 
@@ -282,14 +283,17 @@ void get_stack_context(device_span<SymbolT const> d_json_in,
  * @brief Parses the given JSON string and emits a sequence of tokens that demarcate relevant
  * sections from the input.
  *
- * @param[in] d_json_in The JSON input
+ * @param[in] json_in The JSON input
+ * @param[in] options Parsing options specifying the parsing behaviour
  * @param[out] d_tokens Device memory to which the parsed tokens are written
  * @param[out] d_tokens_indices Device memory to which the indices are written, where each index
  * represents the offset within \p d_json_in that cause the input being written
  * @param[out] d_num_written_tokens The total number of tokens that were parsed
  * @param[in] stream The CUDA stream to which kernels are dispatched
  */
-void get_token_stream(device_span<SymbolT const> d_json_in,
+
+void get_token_stream(device_span<SymbolT const> json_in,
+                      cudf::io::json_reader_options const& options,
                       PdaTokenT* d_tokens,
                       SymbolOffsetT* d_tokens_indices,
                       SymbolOffsetT* d_num_written_tokens,
@@ -299,12 +303,14 @@ void get_token_stream(device_span<SymbolT const> d_json_in,
  * @brief Parses the given JSON string and generates table from the given input.
  *
  * @param input The JSON input
+ * @param options Parsing options specifying the parsing behaviour
  * @param stream The CUDA stream to which kernels are dispatched
  * @param mr Optional, resource with which to allocate.
  * @return The data parsed from the given JSON input
  */
 table_with_metadata parse_nested_json(
   host_span<SymbolT const> input,
+  cudf::io::json_reader_options const& options,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 5e293f8a750..f8a862e2c65 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -23,6 +23,7 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/detail/valid_if.cuh>
+#include <cudf/io/json.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/bit.hpp>
@@ -164,6 +165,8 @@ enum class symbol_group_id : PdaSymbolGroupIdT {
   COLON,
   /// Whitespace
   WHITE_SPACE,
+  /// Linebreak
+  LINE_BREAK,
   /// Other (any input symbol not assigned to one of the above symbol groups)
   OTHER,
   /// Total number of symbol groups amongst which to differentiate
@@ -206,7 +209,7 @@ static __constant__ PdaSymbolGroupIdT tos_sg_to_pda_sgid[] = {
   static_cast<PdaSymbolGroupIdT>(symbol_group_id::OTHER),
   static_cast<PdaSymbolGroupIdT>(symbol_group_id::OTHER),
   static_cast<PdaSymbolGroupIdT>(symbol_group_id::WHITE_SPACE),
-  static_cast<PdaSymbolGroupIdT>(symbol_group_id::WHITE_SPACE),
+  static_cast<PdaSymbolGroupIdT>(symbol_group_id::LINE_BREAK),
   static_cast<PdaSymbolGroupIdT>(symbol_group_id::OTHER),
   static_cast<PdaSymbolGroupIdT>(symbol_group_id::OTHER),
   static_cast<PdaSymbolGroupIdT>(symbol_group_id::WHITE_SPACE),
@@ -403,62 +406,62 @@ constexpr auto PD_NUM_STATES = static_cast<StateT>(pda_state_t::PD_NUM_STATES);
 // The starting state of the pushdown automaton
 constexpr auto start_state = static_cast<StateT>(pda_state_t::PD_BOV);
 
-// Identity symbol to symbol group lookup table
-std::vector<std::vector<char>> const pda_sgids{
-  {0},  {1},  {2},  {3},  {4},  {5},  {6},  {7},  {8},  {9},  {10}, {11}, {12}, {13}, {14},
-  {15}, {16}, {17}, {18}, {19}, {20}, {21}, {22}, {23}, {24}, {25}, {26}, {27}, {28}, {29}};
-
 /**
  * @brief Getting the transition table
  */
-auto get_transition_table()
+auto get_transition_table(bool newline_delimited_json)
 {
+  static_assert(static_cast<PdaStackSymbolGroupIdT>(stack_symbol_group_id::STACK_ROOT) == 0);
+  static_assert(static_cast<PdaStackSymbolGroupIdT>(stack_symbol_group_id::STACK_LIST) == 1);
+  static_assert(static_cast<PdaStackSymbolGroupIdT>(stack_symbol_group_id::STACK_STRUCT) == 2);
+
+  auto const PD_ANL = newline_delimited_json ? PD_BOV : PD_PVL;
   std::array<std::array<pda_state_t, NUM_PDA_SGIDS>, PD_NUM_STATES> pda_tt;
-  //  {       [       }       ]       "       \       ,       :     space   other
+  //  {       [       }       ]       "       \       ,       :     space   newline other
   pda_tt[static_cast<StateT>(pda_state_t::PD_BOV)] = {
-    PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_LON,
-    PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_LON,
-    PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_LON};
+    PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON,
+    PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON,
+    PD_BOA, PD_BOA, PD_ERR, PD_ERR, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_BOV, PD_LON};
   pda_tt[static_cast<StateT>(pda_state_t::PD_BOA)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_BOA, PD_BOA, PD_ERR, PD_PVL, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOA, PD_LON,
-    PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_FLN, PD_ERR, PD_ERR, PD_ERR, PD_BOA, PD_ERR};
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_BOA, PD_BOA, PD_ERR, PD_PVL, PD_STR, PD_ERR, PD_ERR, PD_ERR, PD_BOA, PD_BOA, PD_LON,
+    PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_FLN, PD_ERR, PD_ERR, PD_ERR, PD_BOA, PD_BOA, PD_ERR};
   pda_tt[static_cast<StateT>(pda_state_t::PD_LON)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_LON,
-    PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_LON,
-    PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_LON};
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_PVL, PD_LON,
+    PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_PVL, PD_LON,
+    PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_PVL, PD_LON};
   pda_tt[static_cast<StateT>(pda_state_t::PD_STR)] = {
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR,
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR,
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR};
+    PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+    PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+    PD_STR, PD_STR, PD_STR, PD_STR, PD_PVL, PD_SCE, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR};
   pda_tt[static_cast<StateT>(pda_state_t::PD_SCE)] = {
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
-    PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR};
+    PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+    PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR,
+    PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR, PD_STR};
   pda_tt[static_cast<StateT>(pda_state_t::PD_PVL)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_ERR,
-    PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_ERR};
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ANL, PD_ERR,
+    PD_ERR, PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_BOV, PD_ERR, PD_PVL, PD_PVL, PD_ERR,
+    PD_ERR, PD_ERR, PD_PVL, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR, PD_PVL, PD_PVL, PD_ERR};
   pda_tt[static_cast<StateT>(pda_state_t::PD_BFN)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_FLN, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_ERR};
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_FLN, PD_ERR, PD_ERR, PD_ERR, PD_BFN, PD_BFN, PD_ERR};
   pda_tt[static_cast<StateT>(pda_state_t::PD_FLN)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_PFN, PD_FNE, PD_FLN, PD_FLN, PD_FLN, PD_FLN};
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_PFN, PD_FNE, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN};
   pda_tt[static_cast<StateT>(pda_state_t::PD_FNE)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN};
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN, PD_FLN};
   pda_tt[static_cast<StateT>(pda_state_t::PD_PFN)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_PFN, PD_ERR};
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_BOV, PD_PFN, PD_PFN, PD_ERR};
   pda_tt[static_cast<StateT>(pda_state_t::PD_ERR)] = {
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
-    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR};
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR,
+    PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR, PD_ERR};
   return pda_tt;
 }
 
@@ -468,7 +471,8 @@ auto get_transition_table()
 auto get_translation_table()
 {
   std::array<std::array<std::vector<char>, NUM_PDA_SGIDS>, PD_NUM_STATES> pda_tlt;
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOV)] = {{{token_t::StructBegin},
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOV)] = {{/*ROOT*/
+                                                        {token_t::StructBegin},
                                                         {token_t::ListBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -477,7 +481,9 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {},
+                                                        {},
                                                         {token_t::ValueBegin},
+                                                        /*LIST*/
                                                         {token_t::StructBegin},
                                                         {token_t::ListBegin},
                                                         {token_t::ErrorBegin},
@@ -487,7 +493,9 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {},
+                                                        {},
                                                         {token_t::ValueBegin},
+                                                        /*STRUCT*/
                                                         {token_t::StructBegin},
                                                         {token_t::ListBegin},
                                                         {token_t::ErrorBegin},
@@ -497,8 +505,10 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {},
+                                                        {},
                                                         {token_t::ValueBegin}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOA)] = {{{token_t::ErrorBegin},
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOA)] = {{/*ROOT*/
+                                                        {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -508,6 +518,8 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},
+                                                        /*LIST*/
                                                         {token_t::StructBegin},
                                                         {token_t::ListBegin},
                                                         {token_t::ErrorBegin},
@@ -517,7 +529,9 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {},
+                                                        {},
                                                         {token_t::ValueBegin},
+                                                        /*STRUCT*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::StructEnd},
@@ -527,8 +541,10 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {},
+                                                        {},
                                                         {token_t::ErrorBegin}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_LON)] = {{{token_t::ErrorBegin},
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_LON)] = {{/*ROOT*/
+                                                        {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -537,7 +553,9 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ValueEnd},
+                                                        {token_t::ValueEnd},
                                                         {},
+                                                        /*LIST*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -547,7 +565,9 @@ auto get_translation_table()
                                                         {token_t::ValueEnd},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ValueEnd},
+                                                        {token_t::ValueEnd},
                                                         {},
+                                                        /*STRUCT*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ValueEnd, token_t::StructEnd},
@@ -557,15 +577,82 @@ auto get_translation_table()
                                                         {token_t::ValueEnd},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ValueEnd},
+                                                        {token_t::ValueEnd},
+                                                        {}}};
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_STR)] = {{/*ROOT*/
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {token_t::StringEnd},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        /*LIST*/
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {token_t::StringEnd},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        /*STRUCT*/
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {token_t::StringEnd},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {}}};
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_SCE)] = {{/*ROOT*/
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        /*LIST*/
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        /*STRUCT*/
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
                                                         {}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_STR)] = {
-    {{}, {}, {}, {}, {token_t::StringEnd}, {}, {}, {}, {}, {},
-     {}, {}, {}, {}, {token_t::StringEnd}, {}, {}, {}, {}, {},
-     {}, {}, {}, {}, {token_t::StringEnd}, {}, {}, {}, {}, {}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_SCE)] = {{{}, {}, {}, {}, {}, {}, {}, {}, {}, {},
-                                                        {}, {}, {}, {}, {}, {}, {}, {}, {}, {},
-                                                        {}, {}, {}, {}, {}, {}, {}, {}, {}, {}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_PVL)] = {{{token_t::ErrorBegin},
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_PVL)] = {{/*ROOT*/
+                                                        {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -574,7 +661,9 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {},
+                                                        {},
                                                         {token_t::ErrorBegin},
+                                                        /*LIST*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -584,7 +673,9 @@ auto get_translation_table()
                                                         {},
                                                         {token_t::ErrorBegin},
                                                         {},
+                                                        {},
                                                         {token_t::ErrorBegin},
+                                                        /*STRUCT*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::StructEnd},
@@ -594,8 +685,11 @@ auto get_translation_table()
                                                         {},
                                                         {token_t::ErrorBegin},
                                                         {},
+                                                        {},
                                                         {token_t::ErrorBegin}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BFN)] = {{{token_t::ErrorBegin},
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BFN)] = {{/*ROOT*/
+                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -605,6 +699,7 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
+                                                        /*LIST*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -616,6 +711,8 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
+                                                        /*STRUCT*/
+                                                        {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -624,8 +721,9 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {},
+                                                        {},
                                                         {token_t::ErrorBegin}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_FLN)] = {{{token_t::ErrorBegin},
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_FLN)] = {{/*ROOT*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -637,6 +735,7 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
+                                                        /*LIST*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -645,6 +744,10 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},
+                                                        /*STRUCT*/
                                                         {},
                                                         {},
                                                         {},
@@ -654,8 +757,10 @@ auto get_translation_table()
                                                         {},
                                                         {},
                                                         {},
+                                                        {},
                                                         {}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_FNE)] = {{{token_t::ErrorBegin},
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_FNE)] = {{/*ROOT*/
+                                                        {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -666,6 +771,7 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
+                                                        /*LIST*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -675,6 +781,10 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},
+                                                        /*STRUCT*/
+                                                        {},
                                                         {},
                                                         {},
                                                         {},
@@ -685,7 +795,8 @@ auto get_translation_table()
                                                         {},
                                                         {},
                                                         {}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_PFN)] = {{{token_t::ErrorBegin},
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_PFN)] = {{/*ROOT*/
+                                                        {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -696,6 +807,7 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
+                                                        /*LIST*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
@@ -707,17 +819,54 @@ auto get_translation_table()
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
+                                                        /*STRUCT*/
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
                                                         {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},
+                                                        {},
                                                         {},
                                                         {},
                                                         {token_t::ErrorBegin}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_ERR)] = {{{}, {}, {}, {}, {}, {}, {}, {}, {}, {},
-                                                        {}, {}, {}, {}, {}, {}, {}, {}, {}, {},
-                                                        {}, {}, {}, {}, {}, {}, {}, {}, {}, {}}};
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_ERR)] = {{/*ROOT*/
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        /*LIST*/
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        /*STRUCT*/
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {},
+                                                        {}}};
   return pda_tlt;
 }
 
@@ -792,11 +941,14 @@ void get_stack_context(device_span<SymbolT const> json_in,
 
 // TODO: return pair of device_uvector instead of passing pre-allocated pointers.
 void get_token_stream(device_span<SymbolT const> json_in,
+                      cudf::io::json_reader_options const& options,
                       PdaTokenT* d_tokens,
                       SymbolOffsetT* d_tokens_indices,
                       SymbolOffsetT* d_num_written_tokens,
                       rmm::cuda_stream_view stream)
 {
+  auto const new_line_delimited_json = options.is_enabled_lines();
+
   // Memory holding the top-of-stack stack context for the input
   rmm::device_uvector<StackSymbolT> stack_op_indices{json_in.size(), stream};
 
@@ -820,8 +972,12 @@ void get_token_stream(device_span<SymbolT const> json_in,
                                  tokenizer_pda::pda_state_t::PD_NUM_STATES)>;
 
   // Instantiating PDA transducer
-  ToTokenStreamFstT json_to_tokens_fst{tokenizer_pda::pda_sgids,
-                                       tokenizer_pda::get_transition_table(),
+  std::vector<std::vector<char>> pda_sgid_identity{tokenizer_pda::NUM_PDA_SGIDS};
+  std::generate(std::begin(pda_sgid_identity), std::end(pda_sgid_identity), [i = 0]() mutable {
+    return std::vector<char>{static_cast<char>(i++)};
+  });
+  ToTokenStreamFstT json_to_tokens_fst{pda_sgid_identity,
+                                       tokenizer_pda::get_transition_table(new_line_delimited_json),
                                        tokenizer_pda::get_translation_table(),
                                        stream};
 
@@ -850,6 +1006,7 @@ void make_json_column(json_column& root_column,
                       std::stack<tree_node>& current_data_path,
                       host_span<SymbolT const> input,
                       device_span<SymbolT const> d_input,
+                      cudf::io::json_reader_options const& options,
                       rmm::cuda_stream_view stream)
 {
   // Default name for a list's child column
@@ -862,6 +1019,7 @@ void make_json_column(json_column& root_column,
 
   // Parse the JSON and get the token stream
   get_token_stream(d_input,
+                   options,
                    tokens_gpu.device_ptr(),
                    token_indices_gpu.device_ptr(),
                    num_tokens_out.device_ptr(),
@@ -897,15 +1055,6 @@ void make_json_column(json_column& root_column,
     };
   };
 
-  // Whether this token is a beginning-of-list or beginning-of-struct token
-  auto is_nested_token = [](PdaTokenT const token) {
-    switch (token) {
-      case token_t::StructBegin:
-      case token_t::ListBegin: return true;
-      default: return false;
-    };
-  };
-
   // Skips the quote char if the token is a beginning-of-string or beginning-of-field-name token
   auto get_token_index = [](PdaTokenT const token, SymbolOffsetT const token_index) {
     constexpr SymbolOffsetT skip_quote_char = 1;
@@ -1061,7 +1210,6 @@ void make_json_column(json_column& root_column,
   std::size_t offset = 0;
 
   // Giving names to magic constants
-  constexpr uint32_t row_offset_zero  = 0;
   constexpr uint32_t zero_child_count = 0;
 
   //--------------------------------------------------------------------------------
@@ -1071,51 +1219,6 @@ void make_json_column(json_column& root_column,
   CUDF_EXPECTS(num_tokens_out[0] > 0, "Empty JSON input not supported");
   CUDF_EXPECTS(is_valid_root_token(tokens_gpu[offset]), "Invalid beginning of JSON document");
 
-  // The JSON root is either a struct or list
-  if (is_nested_token(tokens_gpu[offset])) {
-    // Initialize the root column and append this row to it
-    root_column.append_row(row_offset_zero,
-                           token_to_column_type(tokens_gpu[offset]),
-                           get_token_index(tokens_gpu[offset], token_indices_gpu[offset]),
-                           get_token_index(tokens_gpu[offset], token_indices_gpu[offset]),
-                           0);
-
-    // Push the root node onto the stack for the data path
-    current_data_path.push({&root_column, row_offset_zero, nullptr, zero_child_count});
-
-    // Continue with the next token from the token stream
-    offset++;
-  }
-  // The JSON is a simple scalar value -> create simple table and return
-  else {
-    constexpr SymbolOffsetT max_tokens_for_scalar_value = 2;
-    CUDF_EXPECTS(num_tokens_out[0] <= max_tokens_for_scalar_value,
-                 "Invalid JSON format. Expected just a scalar value.");
-
-    // If this isn't the only token, verify the subsequent token is the correct end-of-* partner
-    if ((offset + 1) < num_tokens_out[0]) {
-      CUDF_EXPECTS(tokens_gpu[offset + 1] == end_of_partner(tokens_gpu[offset]),
-                   "Invalid JSON token sequence");
-    }
-
-    // The offset to the first symbol from the JSON input associated with the current token
-    auto const& token_begin_offset = get_token_index(tokens_gpu[offset], token_indices_gpu[offset]);
-
-    // The offset to one past the last symbol associated with the current token
-    // Literals without trailing space are missing the corresponding end-of-* counterpart.
-    auto const& token_end_offset =
-      (offset + 1 < num_tokens_out[0])
-        ? get_token_index(tokens_gpu[offset + 1], token_indices_gpu[offset + 1])
-        : input.size();
-
-    root_column.append_row(row_offset_zero,
-                           json_col_t::StringColumn,
-                           token_begin_offset,
-                           token_end_offset,
-                           zero_child_count);
-    return;
-  }
-
   while (offset < num_tokens_out[0]) {
     // Verify there's at least the JSON root node left on the stack to which we can append data
     CUDF_EXPECTS(current_data_path.size() > 0, "Invalid JSON structure");
@@ -1215,6 +1318,7 @@ void make_json_column(json_column& root_column,
     else if (token == token_t::ErrorBegin) {
 #ifdef NJP_DEBUG_PRINT
       std::cout << "[ErrorBegin]\n";
+      std::cout << "@" << get_token_index(tokens_gpu[offset], token_indices_gpu[offset]);
 #endif
       CUDF_FAIL("Parser encountered an invalid format.");
     }
@@ -1371,26 +1475,51 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> json_column_to
 }
 
 table_with_metadata parse_nested_json(host_span<SymbolT const> input,
+                                      cudf::io::json_reader_options const& options,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource* mr)
 {
+  auto const new_line_delimited_json = options.is_enabled_lines();
+
   // Allocate device memory for the JSON input & copy over to device
   rmm::device_uvector<SymbolT> d_input = cudf::detail::make_device_uvector_async(input, stream);
 
   // Get internal JSON column
   json_column root_column{};
   std::stack<tree_node> data_path{};
-  make_json_column(root_column, data_path, input, d_input, stream);
+
+  constexpr uint32_t row_offset_zero            = 0;
+  constexpr uint32_t token_begin_offset_zero    = 0;
+  constexpr uint32_t token_end_offset_zero      = 0;
+  constexpr uint32_t node_init_child_count_zero = 0;
+
+  // We initialize the very root node and root column that represents a list column that contains
+  // all the values found at the root "level" of the given JSON string Initialize the root column
+  // For JSON lines: we expect to find a list of values that all will be inserted into this list
+  // column.
+  // For regular JSON: we expect to have only a single value (single row) that will be inserted into
+  // this column
+  root_column.append_row(
+    row_offset_zero, json_col_t::ListColumn, token_begin_offset_zero, token_end_offset_zero, 1);
+
+  // Push the root node onto the stack for the data path
+  data_path.push({&root_column, row_offset_zero, nullptr, node_init_child_count_zero});
+
+  make_json_column(root_column, data_path, input, d_input, options, stream);
+
+  // data_root refers to the root column of the data represented by the given JSON string
+  auto const& data_root =
+    new_line_delimited_json ? root_column : root_column.child_columns.begin()->second;
 
   // Verify that we were in fact given a list of structs (or in JSON speech: an array of objects)
   auto constexpr single_child_col_count = 1;
-  CUDF_EXPECTS(root_column.type == json_col_t::ListColumn and
-                 root_column.child_columns.size() == single_child_col_count and
-                 root_column.child_columns.begin()->second.type == json_col_t::StructColumn,
+  CUDF_EXPECTS(data_root.type == json_col_t::ListColumn and
+                 data_root.child_columns.size() == single_child_col_count and
+                 data_root.child_columns.begin()->second.type == json_col_t::StructColumn,
                "Currently the nested JSON parser only supports an array of (nested) objects");
 
   // Slice off the root list column, which has only a single row that contains all the structs
-  auto const& root_struct_col = root_column.child_columns.begin()->second;
+  auto const& root_struct_col = data_root.child_columns.begin()->second;
 
   // Initialize meta data to be populated while recursing through the tree of columns
   std::vector<std::unique_ptr<column>> out_columns;
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index d426acf26f9..a217b2f7d18 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -18,6 +18,7 @@
 #include <io/utilities/hostdevice_vector.hpp>
 
 #include <cudf/io/datasource.hpp>
+#include <cudf/io/json.hpp>
 #include <cudf/io/parquet.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 
@@ -251,6 +252,9 @@ TEST_F(JsonTest, TokenStream)
   rmm::cuda_stream stream{};
   rmm::cuda_stream_view stream_view(stream);
 
+  // Default parsing options
+  cudf::io::json_reader_options default_options{};
+
   // Test input
   std::string input = R"(  [{)"
                       R"("category": "reference",)"
@@ -282,6 +286,7 @@ TEST_F(JsonTest, TokenStream)
 
   // Parse the JSON and get the token stream
   cuio_json::detail::get_token_stream(d_input,
+                                      default_options,
                                       tokens_gpu.device_ptr(),
                                       token_indices_gpu.device_ptr(),
                                       num_tokens_out.device_ptr(),
@@ -342,10 +347,13 @@ TEST_F(JsonTest, ExtractColumn)
   rmm::cuda_stream stream{};
   rmm::cuda_stream_view stream_view(stream);
 
+  // Default parsing options
+  cudf::io::json_reader_options default_options{};
+
   std::string input = R"( [{"a":0.0, "b":1.0}, {"a":0.1, "b":1.1}, {"a":0.2, "b":1.2}] )";
   // Get the JSON's tree representation
   auto const cudf_table = cuio_json::detail::parse_nested_json(
-    cudf::host_span<SymbolT const>{input.data(), input.size()}, stream_view);
+    cudf::host_span<SymbolT const>{input.data(), input.size()}, default_options, stream_view);
 
   auto const expected_col_count  = 2;
   auto const first_column_index  = 0;
@@ -366,6 +374,9 @@ TEST_F(JsonTest, UTF_JSON)
   rmm::cuda_stream stream{};
   rmm::cuda_stream_view stream_view(stream);
 
+  // Default parsing options
+  cudf::io::json_reader_options default_options{};
+
   // Only ASCII string
   std::string ascii_pass = R"([
   {"a":1,"b":2,"c":[3], "d": {}},
@@ -375,7 +386,8 @@ TEST_F(JsonTest, UTF_JSON)
   {"a":1,"b":null,"c":null},
   {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "Kaniyan"}}])";
 
-  CUDF_EXPECT_NO_THROW(cuio_json::detail::parse_nested_json(ascii_pass, stream_view));
+  CUDF_EXPECT_NO_THROW(
+    cuio_json::detail::parse_nested_json(ascii_pass, default_options, stream_view));
 
   // utf-8 string that fails parsing.
   std::string utf_failed = R"([
@@ -385,7 +397,8 @@ TEST_F(JsonTest, UTF_JSON)
   {"a":1,"b":8.0,"c":null, "d": {}},
   {"a":1,"b":null,"c":null},
   {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "filip ʒakotɛ"}}])";
-  CUDF_EXPECT_NO_THROW(cuio_json::detail::parse_nested_json(utf_failed, stream_view));
+  CUDF_EXPECT_NO_THROW(
+    cuio_json::detail::parse_nested_json(utf_failed, default_options, stream_view));
 
   // utf-8 string that passes parsing.
   std::string utf_pass = R"([
@@ -396,7 +409,8 @@ TEST_F(JsonTest, UTF_JSON)
   {"a":1,"b":null,"c":null},
   {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "Kaniyan"}},
   {"a":1,"b":NaN,"c":[null, null], "d": {"year": 2, "author": "filip ʒakotɛ"}}])";
-  CUDF_EXPECT_NO_THROW(cuio_json::detail::parse_nested_json(utf_pass, stream_view));
+  CUDF_EXPECT_NO_THROW(
+    cuio_json::detail::parse_nested_json(utf_pass, default_options, stream_view));
 }
 
 TEST_F(JsonTest, FromParquet)
@@ -410,6 +424,9 @@ TEST_F(JsonTest, FromParquet)
   rmm::cuda_stream stream{};
   rmm::cuda_stream_view stream_view(stream);
 
+  // Default parsing options
+  cudf::io::json_reader_options default_options{};
+
   // Binary parquet data containing the same data as the data represented by the JSON string.
   // We could add a dataset to include this file, but we don't want tests in cudf to have data.
   const unsigned char parquet_data[] = {
@@ -496,7 +513,7 @@ TEST_F(JsonTest, FromParquet)
 
   // Read in the data via the JSON parser
   auto const cudf_table = cuio_json::detail::parse_nested_json(
-    cudf::host_span<SymbolT const>{input.data(), input.size()}, stream_view);
+    cudf::host_span<SymbolT const>{input.data(), input.size()}, default_options, stream_view);
 
   // Verify that the data read via parquet matches the data read via JSON
   CUDF_TEST_EXPECT_TABLES_EQUAL(cudf_table.tbl->view(), result.tbl->view());
@@ -504,3 +521,37 @@ TEST_F(JsonTest, FromParquet)
   // Verify that the schema read via parquet matches the schema read via JSON
   cudf::test::expect_metadata_equal(cudf_table.metadata, result.metadata);
 }
+
+TEST_F(JsonTest, JsonLines)
+{
+  // Prepare cuda stream for data transfers & kernels
+  rmm::cuda_stream stream{};
+  rmm::cuda_stream_view stream_view(stream);
+
+  // Default parsing options
+  cudf::io::json_reader_options json_lines_options =
+    cudf::io::json_reader_options_builder{}.lines(true);
+
+  using cuio_json::SymbolT;
+
+  std::string json_string =
+    R"({"a":"a0"}
+    {"a":"a1"}
+    {"a":"a2", "b":"b2"}
+    {"a":"a3", "c":"c3"}
+    {"a":"a4"})";
+
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(
+      cudf::io::source_info{json_string.c_str(), json_string.size()})
+      .lines(true);
+  cudf::io::table_with_metadata old_reader_table = cudf::io::read_json(in_options);
+
+  auto const new_reader_table = cuio_json::detail::parse_nested_json(
+    cudf::host_span<SymbolT const>{json_string.data(), json_string.size()},
+    json_lines_options,
+    stream_view);
+
+  // Verify that the data read via parquet matches the data read via JSON
+  CUDF_TEST_EXPECT_TABLES_EQUAL(old_reader_table.tbl->view(), new_reader_table.tbl->view());
+}

From 87fce7d05c3a5377a2a68b2a374db1ebd127ee54 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Mon, 15 Aug 2022 10:48:12 -0700
Subject: [PATCH 08/40] addresses outstanding todo

---
 cpp/src/io/json/nested_json.hpp    | 26 ++++++-------
 cpp/src/io/json/nested_json_gpu.cu | 61 ++++++++++++++++--------------
 cpp/tests/io/nested_json_test.cpp  | 26 +++++--------
 3 files changed, 53 insertions(+), 60 deletions(-)

diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index d8886bc0928..1048f9fcedd 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -285,27 +285,25 @@ void get_stack_context(device_span<SymbolT const> json_in,
  *
  * @param[in] json_in The JSON input
  * @param[in] options Parsing options specifying the parsing behaviour
- * @param[out] d_tokens Device memory to which the parsed tokens are written
- * @param[out] d_tokens_indices Device memory to which the indices are written, where each index
  * represents the offset within \p d_json_in that cause the input being written
- * @param[out] d_num_written_tokens The total number of tokens that were parsed
  * @param[in] stream The CUDA stream to which kernels are dispatched
+ * @param[in] mr Optional, resource with which to allocate
+ * @return Pair of device vectors, where the first vector represents the token types and the second
+ * vector represents the index within the input corresponding to each token
  */
-
-void get_token_stream(device_span<SymbolT const> json_in,
-                      cudf::io::json_reader_options const& options,
-                      PdaTokenT* d_tokens,
-                      SymbolOffsetT* d_tokens_indices,
-                      SymbolOffsetT* d_num_written_tokens,
-                      rmm::cuda_stream_view stream);
+std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> get_token_stream(
+  device_span<SymbolT const> json_in,
+  cudf::io::json_reader_options const& options,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Parses the given JSON string and generates table from the given input.
  *
- * @param input The JSON input
- * @param options Parsing options specifying the parsing behaviour
- * @param stream The CUDA stream to which kernels are dispatched
- * @param mr Optional, resource with which to allocate.
+ * @param[in] input The JSON input
+ * @param[in] options Parsing options specifying the parsing behaviour
+ * @param[in] stream The CUDA stream to which kernels are dispatched
+ * @param[in] mr Optional, resource with which to allocate
  * @return The data parsed from the given JSON input
  */
 table_with_metadata parse_nested_json(
diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index f8a862e2c65..34c85402284 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -939,14 +939,17 @@ void get_stack_context(device_span<SymbolT const> json_in,
     stream);
 }
 
-// TODO: return pair of device_uvector instead of passing pre-allocated pointers.
-void get_token_stream(device_span<SymbolT const> json_in,
-                      cudf::io::json_reader_options const& options,
-                      PdaTokenT* d_tokens,
-                      SymbolOffsetT* d_tokens_indices,
-                      SymbolOffsetT* d_num_written_tokens,
-                      rmm::cuda_stream_view stream)
+std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> get_token_stream(
+  device_span<SymbolT const> json_in,
+  cudf::io::json_reader_options const& options,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
+  constexpr std::size_t single_item_count = 1ULL;
+  rmm::device_uvector<PdaTokenT> tokens{json_in.size(), stream, mr};
+  rmm::device_uvector<SymbolOffsetT> tokens_indices{json_in.size(), stream, mr};
+  rmm::device_uvector<SymbolOffsetT> num_written_tokens{single_item_count, stream};
+
   auto const new_line_delimited_json = options.is_enabled_lines();
 
   // Memory holding the top-of-stack stack context for the input
@@ -984,11 +987,17 @@ void get_token_stream(device_span<SymbolT const> json_in,
   // Perform a PDA-transducer pass
   json_to_tokens_fst.Transduce(pda_sgids.begin(),
                                static_cast<SymbolOffsetT>(json_in.size()),
-                               d_tokens,
-                               d_tokens_indices,
-                               d_num_written_tokens,
+                               tokens.data(),
+                               tokens_indices.data(),
+                               num_written_tokens.data(),
                                tokenizer_pda::start_state,
                                stream);
+
+  auto num_total_tokens = num_written_tokens.front_element(stream);
+  tokens.resize(num_total_tokens, stream);
+  tokens_indices.resize(num_total_tokens, stream);
+
+  return std::make_pair(std::move(tokens), std::move(tokens_indices));
 }
 
 /**
@@ -1007,28 +1016,20 @@ void make_json_column(json_column& root_column,
                       host_span<SymbolT const> input,
                       device_span<SymbolT const> d_input,
                       cudf::io::json_reader_options const& options,
-                      rmm::cuda_stream_view stream)
+                      rmm::cuda_stream_view stream,
+                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   // Default name for a list's child column
   std::string const list_child_name = "element";
 
-  constexpr std::size_t single_item = 1;
-  hostdevice_vector<PdaTokenT> tokens_gpu{input.size(), stream};
-  hostdevice_vector<SymbolOffsetT> token_indices_gpu{input.size(), stream};
-  hostdevice_vector<SymbolOffsetT> num_tokens_out{single_item, stream};
-
   // Parse the JSON and get the token stream
-  get_token_stream(d_input,
-                   options,
-                   tokens_gpu.device_ptr(),
-                   token_indices_gpu.device_ptr(),
-                   num_tokens_out.device_ptr(),
-                   stream);
+  const auto [d_tokens_gpu, d_token_indices_gpu] = get_token_stream(d_input, options, stream, mr);
 
   // Copy the JSON tokens to the host
-  token_indices_gpu.device_to_host(stream);
-  tokens_gpu.device_to_host(stream);
-  num_tokens_out.device_to_host(stream);
+  thrust::host_vector<PdaTokenT> tokens_gpu =
+    cudf::detail::make_host_vector_async(d_tokens_gpu, stream);
+  thrust::host_vector<SymbolOffsetT> token_indices_gpu =
+    cudf::detail::make_host_vector_async(d_token_indices_gpu, stream);
 
   // Make sure tokens have been copied to the host
   stream.synchronize();
@@ -1216,10 +1217,12 @@ void make_json_column(json_column& root_column,
   // INITIALIZE JSON ROOT NODE
   //--------------------------------------------------------------------------------
   // The JSON root may only be a struct, list, string, or value node
-  CUDF_EXPECTS(num_tokens_out[0] > 0, "Empty JSON input not supported");
+  CUDF_EXPECTS(tokens_gpu.size() == token_indices_gpu.size(),
+               "Unexpected mismatch in number of token types and token indices");
+  CUDF_EXPECTS(tokens_gpu.size() > 0, "Empty JSON input not supported");
   CUDF_EXPECTS(is_valid_root_token(tokens_gpu[offset]), "Invalid beginning of JSON document");
 
-  while (offset < num_tokens_out[0]) {
+  while (offset < tokens_gpu.size()) {
     // Verify there's at least the JSON root node left on the stack to which we can append data
     CUDF_EXPECTS(current_data_path.size() > 0, "Invalid JSON structure");
 
@@ -1327,7 +1330,7 @@ void make_json_column(json_column& root_column,
     else if (token == token_t::FieldNameBegin or token == token_t::StringBegin or
              token == token_t::ValueBegin) {
       // Verify that this token has the right successor to build a correct (being, end) token pair
-      CUDF_EXPECTS((offset + 1) < num_tokens_out[0], "Invalid JSON token sequence");
+      CUDF_EXPECTS((offset + 1) < tokens_gpu.size(), "Invalid JSON token sequence");
       CUDF_EXPECTS(tokens_gpu[offset + 1] == end_of_partner(token), "Invalid JSON token sequence");
 
       // The offset to the first symbol from the JSON input associated with the current token
@@ -1505,7 +1508,7 @@ table_with_metadata parse_nested_json(host_span<SymbolT const> input,
   // Push the root node onto the stack for the data path
   data_path.push({&root_column, row_offset_zero, nullptr, node_init_child_count_zero});
 
-  make_json_column(root_column, data_path, input, d_input, options, stream);
+  make_json_column(root_column, data_path, input, d_input, options, stream, mr);
 
   // data_root refers to the root column of the data represented by the given JSON string
   auto const& data_root =
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index a217b2f7d18..cae0083daed 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -246,8 +246,6 @@ TEST_F(JsonTest, TokenStream)
   using cuio_json::SymbolOffsetT;
   using cuio_json::SymbolT;
 
-  constexpr std::size_t single_item = 1;
-
   // Prepare cuda stream for data transfers & kernels
   rmm::cuda_stream stream{};
   rmm::cuda_stream_view stream_view(stream);
@@ -280,22 +278,15 @@ TEST_F(JsonTest, TokenStream)
                                         cudaMemcpyHostToDevice,
                                         stream.value()));
 
-  hostdevice_vector<PdaTokenT> tokens_gpu{input.size(), stream_view};
-  hostdevice_vector<SymbolOffsetT> token_indices_gpu{input.size(), stream_view};
-  hostdevice_vector<SymbolOffsetT> num_tokens_out{single_item, stream_view};
-
   // Parse the JSON and get the token stream
-  cuio_json::detail::get_token_stream(d_input,
-                                      default_options,
-                                      tokens_gpu.device_ptr(),
-                                      token_indices_gpu.device_ptr(),
-                                      num_tokens_out.device_ptr(),
-                                      stream_view);
+  const auto [d_tokens_gpu, d_token_indices_gpu] =
+    cuio_json::detail::get_token_stream(d_input, default_options, stream_view);
 
   // Copy back the number of tokens that were written
-  num_tokens_out.device_to_host(stream_view);
-  tokens_gpu.device_to_host(stream_view);
-  token_indices_gpu.device_to_host(stream_view);
+  thrust::host_vector<PdaTokenT> tokens_gpu =
+    cudf::detail::make_host_vector_async(d_tokens_gpu, stream);
+  thrust::host_vector<SymbolOffsetT> token_indices_gpu =
+    cudf::detail::make_host_vector_async(d_token_indices_gpu, stream);
 
   // Make sure we copied back all relevant data
   stream_view.synchronize();
@@ -328,9 +319,10 @@ TEST_F(JsonTest, TokenStream)
     {267, token_t::StructEnd},      {268, token_t::ListEnd}};
 
   // Verify the number of tokens matches
-  ASSERT_EQ(golden_token_stream.size(), num_tokens_out[0]);
+  ASSERT_EQ(golden_token_stream.size(), tokens_gpu.size());
+  ASSERT_EQ(golden_token_stream.size(), token_indices_gpu.size());
 
-  for (std::size_t i = 0; i < num_tokens_out[0]; i++) {
+  for (std::size_t i = 0; i < tokens_gpu.size(); i++) {
     // Ensure the index the tokens are pointing to do match
     EXPECT_EQ(golden_token_stream[i].first, token_indices_gpu[i]) << "Mismatch at #" << i;
 

From 9669c6a1f49eebee8c7c7a5c251ff2e0a54afa98 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Mon, 15 Aug 2022 16:39:06 -0700
Subject: [PATCH 09/40] C++ side changes + test

---
 cpp/src/io/json/experimental/read_json.cpp | 10 ++------
 cpp/tests/io/json_test.cpp                 | 27 ++++++++++++++++++----
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
index fbe9b5f6112..0c579cbf035 100644
--- a/cpp/src/io/json/experimental/read_json.cpp
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -15,19 +15,13 @@
  */
 
 #include "read_json.hpp"
+#include <io/json/nested_json.hpp>
 
 #include <cudf/utilities/error.hpp>
 #include <io/comp/io_uncomp.hpp>
 
 namespace cudf::io::detail::json::experimental {
 
-table_with_metadata read_nested_json(host_span<char const> input,
-                                     rmm::cuda_stream_view stream,
-                                     rmm::mr::device_memory_resource* mr)
-{
-  CUDF_FAIL("Not implemented");
-}
-
 std::vector<uint8_t> ingest_raw_input(host_span<std::unique_ptr<datasource>> sources,
                                       compression_type compression)
 {
@@ -69,7 +63,7 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
   auto const buffer = ingest_raw_input(sources, reader_opts.get_compression());
   auto data = host_span<char const>(reinterpret_cast<char const*>(buffer.data()), buffer.size());
 
-  return read_nested_json(data, stream, mr);
+  return cudf::io::json::detail::parse_nested_json(data, stream, mr);
 }
 
 }  // namespace cudf::io::detail::json::experimental
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index c8aefece94f..3866def2cdf 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -915,13 +915,30 @@ TEST_F(JsonReaderTest, BadDtypeParams)
   EXPECT_THROW(cudf_io::read_json(options_map), cudf::logic_error);
 }
 
-TEST_F(JsonReaderTest, ExperimentalParam)
+TEST_F(JsonReaderTest, JsonRecordsBasic)
 {
-  cudf_io::json_reader_options const options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{nullptr, 0}).experimental(true);
+  const std::string fname = temp_env->get_temp_dir() + "JsonLinesFileTest.json";
+  std::ofstream outfile(fname, std::ofstream::out);
+  outfile << "[{\"a\":\"11\", \"b\":\"1.1\"},{\"a\":\"22\", \"b\":\"2.2\"}]";
+  outfile.close();
+
+  cudf_io::json_reader_options options =
+    cudf_io::json_reader_options::builder(cudf_io::source_info{fname}).experimental(true);
+  auto result = cudf_io::read_json(options);
+
+  EXPECT_EQ(result.tbl->num_columns(), 2);
+  EXPECT_EQ(result.tbl->num_rows(), 2);
 
-  // should throw for now
-  EXPECT_THROW(cudf_io::read_json(options), cudf::logic_error);
+  EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRING);
+  EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::STRING);
+
+  EXPECT_EQ(std::string(result.metadata.column_names[0]), "a");
+  EXPECT_EQ(std::string(result.metadata.column_names[1]), "b");
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0),
+                                 cudf::test::strings_column_wrapper({"11", "22"}));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1),
+                                 cudf::test::strings_column_wrapper({"1.1", "2.2"}));
 }
 
 CUDF_TEST_PROGRAM_MAIN()

From c9fb5b28d158ff198b042684f4a9d69803c7cb91 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Mon, 15 Aug 2022 16:51:58 -0700
Subject: [PATCH 10/40] working Python + test

---
 cpp/src/io/json/nested_json_gpu.cu  |  4 +++-
 python/cudf/cudf/tests/test_json.py | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 5e293f8a750..bffc8891020 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -1395,11 +1395,13 @@ table_with_metadata parse_nested_json(host_span<SymbolT const> input,
   // Initialize meta data to be populated while recursing through the tree of columns
   std::vector<std::unique_ptr<column>> out_columns;
   std::vector<column_name_info> out_column_names;
+  std::vector<std::string> out_root_column_names;
 
   // Iterate over the struct's child columns and convert to cudf column
   for (auto const& [col_name, json_col] : root_struct_col.child_columns) {
     // Insert this columns name into the schema
     out_column_names.emplace_back(col_name);
+    out_root_column_names.emplace_back(col_name);
 
     // Get this JSON column's cudf column and schema info
     auto [cudf_col, col_name_info]   = json_column_to_cudf_column(json_col, d_input, stream, mr);
@@ -1408,7 +1410,7 @@ table_with_metadata parse_nested_json(host_span<SymbolT const> input,
   }
 
   return table_with_metadata{std::make_unique<table>(std::move(out_columns)),
-                             {{}, out_column_names}};
+                             {out_root_column_names, out_column_names}};
 }
 
 }  // namespace detail
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 800ed68e8a4..5122c976f27 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -579,3 +579,22 @@ def test_json_experimental():
     # should raise an exception, for now
     with pytest.raises(RuntimeError):
         cudf.read_json("", engine="cudf_experimental")
+
+
+def test_json_nested_basic(tmpdir):
+    fname = tmpdir.mkdir("gdf_json").join("tmp_json_nested_basic")
+    data = {
+        "c1": [{"f1": "sf11", "f2": "sf21"}, {"f1": "sf12", "f2": "sf22"}],
+        "c2": [["l11", "l21"], ["l12", "l22"]],
+    }
+    pdf = pd.DataFrame(data)
+    pdf.to_json(fname, orient="records")
+
+    with open(fname, "r") as f:
+        print(f.read())
+    print(pdf)
+
+    df = cudf.read_json(fname, engine="cudf_experimental", orient="records")
+    pdf = pd.read_json(fname, orient="records")
+
+    assert_eq(cudf.DataFrame(pdf), df)

From 2de91e18bedb8a97c3cd23648f733eaa170849c7 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Mon, 15 Aug 2022 17:12:03 -0700
Subject: [PATCH 11/40] clean up

---
 cpp/src/io/json/experimental/read_json.cpp | 23 ++++++++--------------
 cpp/tests/io/json_test.cpp                 |  2 +-
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
index 0c579cbf035..2259fcc839a 100644
--- a/cpp/src/io/json/experimental/read_json.cpp
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -20,32 +20,25 @@
 #include <cudf/utilities/error.hpp>
 #include <io/comp/io_uncomp.hpp>
 
+#include <numeric>
+
 namespace cudf::io::detail::json::experimental {
 
 std::vector<uint8_t> ingest_raw_input(host_span<std::unique_ptr<datasource>> sources,
                                       compression_type compression)
 {
-  // Iterate through the user defined sources and read the contents into the local buffer
-  size_t total_source_size = 0;
-  for (const auto& source : sources) {
-    total_source_size += source->size();
-  }
-
+  auto const total_source_size =
+    std::accumulate(sources.begin(), sources.end(), 0ul, [](size_t sum, auto& source) {
+      return sum + source->size();
+    });
   auto buffer = std::vector<uint8_t>(total_source_size);
 
   size_t bytes_read = 0;
   for (const auto& source : sources) {
-    if (not source->is_empty()) {
-      auto const destination = buffer.data() + bytes_read;
-      bytes_read += source->host_read(0, source->size(), destination);
-    }
+    bytes_read += source->host_read(0, source->size(), buffer.data() + bytes_read);
   }
 
-  if (compression == compression_type::NONE) {
-    return buffer;
-  } else {
-    return decompress(compression, buffer);
-  }
+  return (compression == compression_type::NONE) ? buffer : decompress(compression, buffer);
 }
 
 table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 3866def2cdf..4f98dc54a73 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -915,7 +915,7 @@ TEST_F(JsonReaderTest, BadDtypeParams)
   EXPECT_THROW(cudf_io::read_json(options_map), cudf::logic_error);
 }
 
-TEST_F(JsonReaderTest, JsonRecordsBasic)
+TEST_F(JsonReaderTest, JsonExperimentalBasic)
 {
   const std::string fname = temp_env->get_temp_dir() + "JsonLinesFileTest.json";
   std::ofstream outfile(fname, std::ofstream::out);

From 70dd9b1c0df226809b84788a133f5f0974b88315 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Mon, 15 Aug 2022 23:43:20 -0700
Subject: [PATCH 12/40] stop using column_names

---
 cpp/src/io/json/nested_json_gpu.cu |  4 +--
 cpp/src/io/json/reader_impl.cu     | 19 +++++++----
 cpp/tests/io/json_test.cpp         | 52 +++++++++++++++---------------
 python/cudf/cudf/_lib/json.pyx     |  2 +-
 4 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index bffc8891020..5e293f8a750 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -1395,13 +1395,11 @@ table_with_metadata parse_nested_json(host_span<SymbolT const> input,
   // Initialize meta data to be populated while recursing through the tree of columns
   std::vector<std::unique_ptr<column>> out_columns;
   std::vector<column_name_info> out_column_names;
-  std::vector<std::string> out_root_column_names;
 
   // Iterate over the struct's child columns and convert to cudf column
   for (auto const& [col_name, json_col] : root_struct_col.child_columns) {
     // Insert this columns name into the schema
     out_column_names.emplace_back(col_name);
-    out_root_column_names.emplace_back(col_name);
 
     // Get this JSON column's cudf column and schema info
     auto [cudf_col, col_name_info]   = json_column_to_cudf_column(json_col, d_input, stream, mr);
@@ -1410,7 +1408,7 @@ table_with_metadata parse_nested_json(host_span<SymbolT const> input,
   }
 
   return table_with_metadata{std::make_unique<table>(std::move(out_columns)),
-                             {out_root_column_names, out_column_names}};
+                             {{}, out_column_names}};
 }
 
 }  // namespace detail
diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu
index 6b12b462dd9..3be0ff318a1 100644
--- a/cpp/src/io/json/reader_impl.cu
+++ b/cpp/src/io/json/reader_impl.cu
@@ -480,7 +480,7 @@ std::vector<data_type> get_data_types(json_reader_options const& reader_opts,
 
 table_with_metadata convert_data_to_table(parse_options_view const& parse_opts,
                                           std::vector<data_type> const& dtypes,
-                                          std::vector<std::string> const& column_names,
+                                          std::vector<std::string>&& column_names,
                                           col_map_type* column_map,
                                           device_span<uint64_t const> rec_starts,
                                           device_span<char const> data,
@@ -552,8 +552,8 @@ table_with_metadata convert_data_to_table(parse_options_view const& parse_opts,
 
   std::vector<column_name_info> column_infos;
   column_infos.reserve(column_names.size());
-  std::transform(column_names.cbegin(),
-                 column_names.cend(),
+  std::transform(std::make_move_iterator(column_names.begin()),
+                 std::make_move_iterator(column_names.end()),
                  std::back_inserter(column_infos),
                  [](auto const& col_name) { return column_name_info{col_name}; });
 
@@ -563,8 +563,7 @@ table_with_metadata convert_data_to_table(parse_options_view const& parse_opts,
 
   CUDF_EXPECTS(!out_columns.empty(), "No columns created from json input");
 
-  return table_with_metadata{std::make_unique<table>(std::move(out_columns)),
-                             {column_names, column_infos}};
+  return table_with_metadata{std::make_unique<table>(std::move(out_columns)), {{}, column_infos}};
 }
 
 /**
@@ -636,8 +635,14 @@ table_with_metadata read_json(std::vector<std::unique_ptr<datasource>>& sources,
 
   CUDF_EXPECTS(not dtypes.empty(), "Error in data type detection.\n");
 
-  return convert_data_to_table(
-    parse_opts.view(), dtypes, column_names, column_map.get(), rec_starts, d_data, stream, mr);
+  return convert_data_to_table(parse_opts.view(),
+                               dtypes,
+                               std::move(column_names),
+                               column_map.get(),
+                               rec_starts,
+                               d_data,
+                               stream,
+                               mr);
 }
 
 }  // namespace json
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 4f98dc54a73..adf97bf3e2a 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -171,8 +171,8 @@ TEST_F(JsonReaderTest, BasicJsonLines)
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT32);
   EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64);
 
-  EXPECT_EQ(result.metadata.column_names[0], "0");
-  EXPECT_EQ(result.metadata.column_names[1], "1");
+  EXPECT_EQ(result.metadata.schema_info[0].name, "0");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "1");
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
@@ -228,9 +228,9 @@ TEST_F(JsonReaderTest, JsonLinesStrings)
   EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64);
   EXPECT_EQ(result.tbl->get_column(2).type().id(), cudf::type_id::STRING);
 
-  EXPECT_EQ(result.metadata.column_names[0], "0");
-  EXPECT_EQ(result.metadata.column_names[1], "1");
-  EXPECT_EQ(result.metadata.column_names[2], "2");
+  EXPECT_EQ(result.metadata.schema_info[0].name, "0");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "1");
+  EXPECT_EQ(result.metadata.schema_info[2].name, "2");
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
@@ -414,9 +414,9 @@ TEST_F(JsonReaderTest, JsonLinesDtypeInference)
   EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64);
   EXPECT_EQ(result.tbl->get_column(2).type().id(), cudf::type_id::STRING);
 
-  EXPECT_EQ(std::string(result.metadata.column_names[0]), "0");
-  EXPECT_EQ(std::string(result.metadata.column_names[1]), "1");
-  EXPECT_EQ(std::string(result.metadata.column_names[2]), "2");
+  EXPECT_EQ(result.metadata.schema_info[0].name, "0");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "1");
+  EXPECT_EQ(result.metadata.schema_info[2].name, "2");
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
@@ -444,8 +444,8 @@ TEST_F(JsonReaderTest, JsonLinesFileInput)
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64);
   EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64);
 
-  EXPECT_EQ(std::string(result.metadata.column_names[0]), "0");
-  EXPECT_EQ(std::string(result.metadata.column_names[1]), "1");
+  EXPECT_EQ(result.metadata.schema_info[0].name, "0");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "1");
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
@@ -472,7 +472,7 @@ TEST_F(JsonReaderTest, JsonLinesByteRange)
   EXPECT_EQ(result.tbl->num_rows(), 3);
 
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64);
-  EXPECT_EQ(std::string(result.metadata.column_names[0]), "0");
+  EXPECT_EQ(result.metadata.schema_info[0].name, "0");
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
@@ -496,9 +496,9 @@ TEST_F(JsonReaderTest, JsonLinesObjects)
   EXPECT_EQ(result.tbl->num_rows(), 1);
 
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64);
-  EXPECT_EQ(std::string(result.metadata.column_names[0]), "co\\\"l1");
+  EXPECT_EQ(result.metadata.schema_info[0].name, "co\\\"l1");
   EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64);
-  EXPECT_EQ(std::string(result.metadata.column_names[1]), "col2");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "col2");
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
@@ -522,9 +522,9 @@ TEST_F(JsonReaderTest, JsonLinesObjectsStrings)
     EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64);
     EXPECT_EQ(result.tbl->get_column(2).type().id(), cudf::type_id::STRING);
 
-    EXPECT_EQ(std::string(result.metadata.column_names[0]), "col1");
-    EXPECT_EQ(std::string(result.metadata.column_names[1]), "col2");
-    EXPECT_EQ(std::string(result.metadata.column_names[2]), "col3");
+    EXPECT_EQ(result.metadata.schema_info[0].name, "col1");
+    EXPECT_EQ(result.metadata.schema_info[1].name, "col2");
+    EXPECT_EQ(result.metadata.schema_info[2].name, "col3");
 
     auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
@@ -563,9 +563,9 @@ TEST_F(JsonReaderTest, JsonLinesObjectsMissingData)
   EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::STRING);
   EXPECT_EQ(result.tbl->get_column(2).type().id(), cudf::type_id::FLOAT64);
 
-  EXPECT_EQ(std::string(result.metadata.column_names[0]), "col2");
-  EXPECT_EQ(std::string(result.metadata.column_names[1]), "col3");
-  EXPECT_EQ(std::string(result.metadata.column_names[2]), "col1");
+  EXPECT_EQ(result.metadata.schema_info[0].name, "col2");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "col3");
+  EXPECT_EQ(result.metadata.schema_info[2].name, "col1");
 
   auto col1_validity =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; });
@@ -598,9 +598,9 @@ TEST_F(JsonReaderTest, JsonLinesObjectsOutOfOrder)
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64);
   EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64);
 
-  EXPECT_EQ(std::string(result.metadata.column_names[0]), "col1");
-  EXPECT_EQ(std::string(result.metadata.column_names[1]), "col2");
-  EXPECT_EQ(std::string(result.metadata.column_names[2]), "col3");
+  EXPECT_EQ(result.metadata.schema_info[0].name, "col1");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "col2");
+  EXPECT_EQ(result.metadata.schema_info[2].name, "col3");
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
@@ -881,8 +881,8 @@ TEST_F(JsonReaderTest, JsonLinesMultipleFileInputs)
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64);
   EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::FLOAT64);
 
-  EXPECT_EQ(std::string(result.metadata.column_names[0]), "0");
-  EXPECT_EQ(std::string(result.metadata.column_names[1]), "1");
+  EXPECT_EQ(result.metadata.schema_info[0].name, "0");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "1");
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
@@ -932,8 +932,8 @@ TEST_F(JsonReaderTest, JsonExperimentalBasic)
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRING);
   EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::STRING);
 
-  EXPECT_EQ(std::string(result.metadata.column_names[0]), "a");
-  EXPECT_EQ(std::string(result.metadata.column_names[1]), "b");
+  EXPECT_EQ(result.metadata.schema_info[0].name, "a");
+  EXPECT_EQ(result.metadata.schema_info[1].name, "b");
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0),
                                  cudf::test::strings_column_wrapper({"11", "22"}));
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 0ee6062e7f2..376850b7b1b 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -113,7 +113,7 @@ cpdef read_json(object filepaths_or_buffers,
     with nogil:
         c_result = move(libcudf_read_json(opts))
 
-    meta_names = [name.decode() for name in c_result.metadata.column_names]
+    meta_names = [info.name.decode() for info in c_result.metadata.schema_info]
     df = cudf.DataFrame._from_data(*data_from_unique_ptr(
         move(c_result.tbl),
         column_names=meta_names

From b1afef0c3ea332c62c4e5fd5e49fa15b14e8a705 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Tue, 16 Aug 2022 02:01:20 -0700
Subject: [PATCH 13/40] adds documentation for mr parameter

---
 cpp/src/io/json/nested_json_gpu.cu | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 34c85402284..4c21b9a78a8 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -1009,6 +1009,7 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
  * @param[in] input The JSON input in host memory
  * @param[in] d_input The JSON input in device memory
  * @param[in] stream The CUDA stream to which kernels are dispatched
+ * @param[in] mr Optional, resource with which to allocate
  * @return The columnar representation of the data from the given JSON input
  */
 void make_json_column(json_column& root_column,

From 8409214ead5b150122a60b3c1b1db5fcecc59c9e Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Tue, 16 Aug 2022 02:04:44 -0700
Subject: [PATCH 14/40] minor documentation fixes

---
 cpp/src/io/json/nested_json.hpp    | 1 -
 cpp/src/io/json/nested_json_gpu.cu | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 1048f9fcedd..47ce1edafaf 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -285,7 +285,6 @@ void get_stack_context(device_span<SymbolT const> json_in,
  *
  * @param[in] json_in The JSON input
  * @param[in] options Parsing options specifying the parsing behaviour
- * represents the offset within \p d_json_in that cause the input being written
  * @param[in] stream The CUDA stream to which kernels are dispatched
  * @param[in] mr Optional, resource with which to allocate
  * @return Pair of device vectors, where the first vector represents the token types and the second
diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 4c21b9a78a8..b51d1270f22 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -1009,6 +1009,7 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
  * @param[in] input The JSON input in host memory
  * @param[in] d_input The JSON input in device memory
  * @param[in] stream The CUDA stream to which kernels are dispatched
+ * @param[in] options Parsing options specifying the parsing behaviour
  * @param[in] mr Optional, resource with which to allocate
  * @return The columnar representation of the data from the given JSON input
  */

From d0e0defcdaf3c50da6cc13f174a4d55846ec23d2 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Tue, 16 Aug 2022 02:06:08 -0700
Subject: [PATCH 15/40] fixes parameter order

---
 cpp/src/io/json/nested_json_gpu.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index b51d1270f22..26d7aaf3b2b 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -1008,8 +1008,8 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
  * first node encountered in \p input
  * @param[in] input The JSON input in host memory
  * @param[in] d_input The JSON input in device memory
- * @param[in] stream The CUDA stream to which kernels are dispatched
  * @param[in] options Parsing options specifying the parsing behaviour
+ * @param[in] stream The CUDA stream to which kernels are dispatched
  * @param[in] mr Optional, resource with which to allocate
  * @return The columnar representation of the data from the given JSON input
  */

From 574ac4397e25f05cbb7bdf1d12a1c673c5ecb543 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Tue, 16 Aug 2022 09:07:52 -0700
Subject: [PATCH 16/40] fix copy-paste error

---
 cpp/tests/io/json_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index adf97bf3e2a..c3af9fc2eb0 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -917,7 +917,7 @@ TEST_F(JsonReaderTest, BadDtypeParams)
 
 TEST_F(JsonReaderTest, JsonExperimentalBasic)
 {
-  const std::string fname = temp_env->get_temp_dir() + "JsonLinesFileTest.json";
+  std::string const fname = temp_env->get_temp_dir() + "JsonExperimentalBasic.json";
   std::ofstream outfile(fname, std::ofstream::out);
   outfile << "[{\"a\":\"11\", \"b\":\"1.1\"},{\"a\":\"22\", \"b\":\"2.2\"}]";
   outfile.close();

From 2de80b74a42c21a4b4b738e013df22c403125e96 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vukasin.milovanovic.87@gmail.com>
Date: Tue, 16 Aug 2022 09:11:04 -0700
Subject: [PATCH 17/40] raw string

Co-authored-by: Elias Stehle <3958403+elstehle@users.noreply.github.com>
---
 cpp/tests/io/json_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index adf97bf3e2a..79d7bf241f6 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -919,7 +919,7 @@ TEST_F(JsonReaderTest, JsonExperimentalBasic)
 {
   const std::string fname = temp_env->get_temp_dir() + "JsonLinesFileTest.json";
   std::ofstream outfile(fname, std::ofstream::out);
-  outfile << "[{\"a\":\"11\", \"b\":\"1.1\"},{\"a\":\"22\", \"b\":\"2.2\"}]";
+  outfile << R"([{"a":"11", "b":"1.1"},{"a":"22", "b":"2.2"}])"
   outfile.close();
 
   cudf_io::json_reader_options options =

From bc14a1dd71a12ca11b1964b4ae10f4d3932f374e Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Tue, 16 Aug 2022 09:12:23 -0700
Subject: [PATCH 18/40] remove print in Python test

---
 python/cudf/cudf/tests/test_json.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 5122c976f27..6beb050d920 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -590,10 +590,6 @@ def test_json_nested_basic(tmpdir):
     pdf = pd.DataFrame(data)
     pdf.to_json(fname, orient="records")
 
-    with open(fname, "r") as f:
-        print(f.read())
-    print(pdf)
-
     df = cudf.read_json(fname, engine="cudf_experimental", orient="records")
     pdf = pd.read_json(fname, orient="records")
 

From bca2e839d3db8ea70abea83b6d281be613bad9cb Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Tue, 16 Aug 2022 09:21:48 -0700
Subject: [PATCH 19/40] addressing reviews

---
 cpp/src/io/json/experimental/read_json.cpp | 3 ++-
 python/cudf/cudf/tests/test_json.py        | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
index 2259fcc839a..e070aacaca2 100644
--- a/cpp/src/io/json/experimental/read_json.cpp
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -15,10 +15,11 @@
  */
 
 #include "read_json.hpp"
+
+#include <io/comp/io_uncomp.hpp>
 #include <io/json/nested_json.hpp>
 
 #include <cudf/utilities/error.hpp>
-#include <io/comp/io_uncomp.hpp>
 
 #include <numeric>
 
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 6beb050d920..368015cf563 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -593,4 +593,4 @@ def test_json_nested_basic(tmpdir):
     df = cudf.read_json(fname, engine="cudf_experimental", orient="records")
     pdf = pd.read_json(fname, orient="records")
 
-    assert_eq(cudf.DataFrame(pdf), df)
+    assert_eq(pdf, df)

From ba28571ca2492f5edcb5b6f76c08ce751146a94f Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Tue, 16 Aug 2022 10:57:10 -0700
Subject: [PATCH 20/40] Java fix

---
 java/src/main/native/src/TableJni.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 44c08aec110..857fac7df2b 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -1459,7 +1459,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readJSON(
     cudf::io::table_with_metadata result = cudf::io::read_json(opts.build());
 
     // there is no need to re-order columns when inferring schema
-    if (result.metadata.column_names.empty() || n_col_names.size() <= 0) {
+    if (result.metadata.schema_info.empty() || n_col_names.size() <= 0) {
       return convert_table_for_return(env, result.tbl);
     } else {
       // json reader will not return the correct column order,
@@ -1467,10 +1467,10 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readJSON(
 
       // turn name and its index in table into map<name, index>
       std::map<std::string, cudf::size_type> m;
-      std::transform(result.metadata.column_names.begin(), result.metadata.column_names.end(),
+      std::transform(result.metadata.schema_info.cbegin(), result.metadata.schema_info.cend(),
                      thrust::make_counting_iterator(0), std::inserter(m, m.end()),
-                     [](auto const &column_name, auto const &index) {
-                       return std::make_pair(column_name, index);
+                     [](auto const &column_info, auto const &index) {
+                       return std::make_pair(column_info.name, index);
                      });
 
       auto col_names_vec = n_col_names.as_cpp_vector();

From a6d5ab732f6cc474289a088ecb1e1c9287fee728 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Tue, 16 Aug 2022 11:59:42 -0700
Subject: [PATCH 21/40] style

---
 cpp/tests/io/json_test.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index fcecea8e7e0..67f0542ace2 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -919,7 +919,7 @@ TEST_F(JsonReaderTest, JsonExperimentalBasic)
 {
   std::string const fname = temp_env->get_temp_dir() + "JsonExperimentalBasic.json";
   std::ofstream outfile(fname, std::ofstream::out);
-  outfile << R"([{"a":"11", "b":"1.1"},{"a":"22", "b":"2.2"}])"
+  outfile << R"([{"a":"11", "b":"1.1"},{"a":"22", "b":"2.2"}])";
   outfile.close();
 
   cudf_io::json_reader_options options =

From a0bd2292f1dcfca9d4b6470c17c0f4b07d85d93f Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Tue, 16 Aug 2022 23:28:54 -0700
Subject: [PATCH 22/40] integrates upstream interface changes

---
 cpp/src/io/json/experimental/read_json.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
index e070aacaca2..cc154d5f325 100644
--- a/cpp/src/io/json/experimental/read_json.cpp
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -57,7 +57,7 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
   auto const buffer = ingest_raw_input(sources, reader_opts.get_compression());
   auto data = host_span<char const>(reinterpret_cast<char const*>(buffer.data()), buffer.size());
 
-  return cudf::io::json::detail::parse_nested_json(data, stream, mr);
+  return cudf::io::json::detail::parse_nested_json(data, reader_opts, stream, mr);
 }
 
 }  // namespace cudf::io::detail::json::experimental

From f3bba9d4181822704a917b199591ea452bfd46ef Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Wed, 17 Aug 2022 10:14:26 -0700
Subject: [PATCH 23/40] enables lines option in the nested reader

---
 cpp/src/io/json/experimental/read_json.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
index cc154d5f325..ceac40ba4f9 100644
--- a/cpp/src/io/json/experimental/read_json.cpp
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -50,7 +50,6 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
   auto const dtypes_empty =
     std::visit([](const auto& dtypes) { return dtypes.empty(); }, reader_opts.get_dtypes());
   CUDF_EXPECTS(dtypes_empty, "user specified dtypes are not yet supported");
-  CUDF_EXPECTS(not reader_opts.is_enabled_lines(), "JSON Lines format is not yet supported");
   CUDF_EXPECTS(reader_opts.get_byte_range_offset() == 0 and reader_opts.get_byte_range_size() == 0,
                "specifying a byte range is not yet supported");
 

From 21b40231e5c6e6e05c548519b5419df84ffb9a83 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Wed, 17 Aug 2022 11:32:15 -0700
Subject: [PATCH 24/40] migrates test from details api to reader api

---
 cpp/tests/io/json_test.cpp        | 27 ++++++++++++++++++++++++
 cpp/tests/io/nested_json_test.cpp | 34 -------------------------------
 2 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 67f0542ace2..af72edce91b 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/iterator.cuh>
@@ -941,4 +942,30 @@ TEST_F(JsonReaderTest, JsonExperimentalBasic)
                                  cudf::test::strings_column_wrapper({"1.1", "2.2"}));
 }
 
+TEST_F(JsonReaderTest, JsonExperimentalLines)
+{
+  std::string json_string =
+    R"({"a":"a0"}
+    {"a":"a1"}
+    {"a":"a2", "b":"b2"}
+    {"a":"a3", "c":"c3"}
+    {"a":"a4"})";
+
+  // Initialize parsing options (reading json lines)
+  cudf::io::json_reader_options json_lines_options =
+    cudf::io::json_reader_options::builder(
+      cudf::io::source_info{json_string.c_str(), json_string.size()})
+      .lines(true);
+
+  // Read test data via existing, non-nested json lines reader
+  cudf::io::table_with_metadata current_reader_table = cudf::io::read_json(json_lines_options);
+
+  // Read test data via new, nested json reader
+  json_lines_options.enable_experimental(true);
+  cudf::io::table_with_metadata new_reader_table = cudf::io::read_json(json_lines_options);
+
+  // Verify that the data read via parquet matches the data read via JSON
+  CUDF_TEST_EXPECT_TABLES_EQUAL(current_reader_table.tbl->view(), new_reader_table.tbl->view());
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index cae0083daed..7ba7e0a4a03 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -513,37 +513,3 @@ TEST_F(JsonTest, FromParquet)
   // Verify that the schema read via parquet matches the schema read via JSON
   cudf::test::expect_metadata_equal(cudf_table.metadata, result.metadata);
 }
-
-TEST_F(JsonTest, JsonLines)
-{
-  // Prepare cuda stream for data transfers & kernels
-  rmm::cuda_stream stream{};
-  rmm::cuda_stream_view stream_view(stream);
-
-  // Default parsing options
-  cudf::io::json_reader_options json_lines_options =
-    cudf::io::json_reader_options_builder{}.lines(true);
-
-  using cuio_json::SymbolT;
-
-  std::string json_string =
-    R"({"a":"a0"}
-    {"a":"a1"}
-    {"a":"a2", "b":"b2"}
-    {"a":"a3", "c":"c3"}
-    {"a":"a4"})";
-
-  cudf::io::json_reader_options in_options =
-    cudf::io::json_reader_options::builder(
-      cudf::io::source_info{json_string.c_str(), json_string.size()})
-      .lines(true);
-  cudf::io::table_with_metadata old_reader_table = cudf::io::read_json(in_options);
-
-  auto const new_reader_table = cuio_json::detail::parse_nested_json(
-    cudf::host_span<SymbolT const>{json_string.data(), json_string.size()},
-    json_lines_options,
-    stream_view);
-
-  // Verify that the data read via parquet matches the data read via JSON
-  CUDF_TEST_EXPECT_TABLES_EQUAL(old_reader_table.tbl->view(), new_reader_table.tbl->view());
-}

From cdc44411a385fb71da6954a98ebd2a59944fcf0a Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Wed, 17 Aug 2022 11:58:45 -0700
Subject: [PATCH 25/40] improves code comment

---
 cpp/src/io/json/nested_json_gpu.cu | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 26d7aaf3b2b..07348e67b6c 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -1498,12 +1498,13 @@ table_with_metadata parse_nested_json(host_span<SymbolT const> input,
   constexpr uint32_t token_end_offset_zero      = 0;
   constexpr uint32_t node_init_child_count_zero = 0;
 
-  // We initialize the very root node and root column that represents a list column that contains
-  // all the values found at the root "level" of the given JSON string Initialize the root column
-  // For JSON lines: we expect to find a list of values that all will be inserted into this list
+  // We initialize the very root node and root column, which represent the JSON document being
+  // parsed. That root node is a list node and that root column is a list column. The column has the
+  // root node as its only row. The values parsed from the JSON input will be treated as follows:
+  // (1) For JSON lines: we expect to find a list of JSON values that all
+  // will be inserted into this root list column. (2) For regular JSON: we expect to have only a
+  // single value (list, struct, string, number, literal) that will be inserted into this root
   // column.
-  // For regular JSON: we expect to have only a single value (single row) that will be inserted into
-  // this column
   root_column.append_row(
     row_offset_zero, json_col_t::ListColumn, token_begin_offset_zero, token_end_offset_zero, 1);
 

From ea6959fc6e51c92924ed07680d3b89a27b648144 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Mon, 22 Aug 2022 00:59:31 -0700
Subject: [PATCH 26/40] removes in/out specification on params

---
 cpp/src/io/json/nested_json.hpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 47ce1edafaf..8fa4d82a499 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -283,10 +283,10 @@ void get_stack_context(device_span<SymbolT const> json_in,
  * @brief Parses the given JSON string and emits a sequence of tokens that demarcate relevant
  * sections from the input.
  *
- * @param[in] json_in The JSON input
- * @param[in] options Parsing options specifying the parsing behaviour
- * @param[in] stream The CUDA stream to which kernels are dispatched
- * @param[in] mr Optional, resource with which to allocate
+ * @param json_in The JSON input
+ * @param options Parsing options specifying the parsing behaviour
+ * @param stream The CUDA stream to which kernels are dispatched
+ * @param mr Optional, resource with which to allocate
  * @return Pair of device vectors, where the first vector represents the token types and the second
  * vector represents the index within the input corresponding to each token
  */
@@ -299,10 +299,10 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
 /**
  * @brief Parses the given JSON string and generates table from the given input.
  *
- * @param[in] input The JSON input
- * @param[in] options Parsing options specifying the parsing behaviour
- * @param[in] stream The CUDA stream to which kernels are dispatched
- * @param[in] mr Optional, resource with which to allocate
+ * @param input The JSON input
+ * @param options Parsing options specifying the parsing behaviour
+ * @param stream The CUDA stream to which kernels are dispatched
+ * @param mr Optional, resource with which to allocate
  * @return The data parsed from the given JSON input
  */
 table_with_metadata parse_nested_json(

From 00be9159aa81e508d42c79abbe7fbf3ef29a6744 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Mon, 22 Aug 2022 01:00:29 -0700
Subject: [PATCH 27/40] removes _gpu suffix from tokens

---
 cpp/src/io/json/nested_json_gpu.cu | 39 +++++++++++++++---------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 07348e67b6c..09dcce6ddd7 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -1028,7 +1028,7 @@ void make_json_column(json_column& root_column,
   const auto [d_tokens_gpu, d_token_indices_gpu] = get_token_stream(d_input, options, stream, mr);
 
   // Copy the JSON tokens to the host
-  thrust::host_vector<PdaTokenT> tokens_gpu =
+  thrust::host_vector<PdaTokenT> tokens =
     cudf::detail::make_host_vector_async(d_tokens_gpu, stream);
   thrust::host_vector<SymbolOffsetT> token_indices_gpu =
     cudf::detail::make_host_vector_async(d_token_indices_gpu, stream);
@@ -1219,12 +1219,12 @@ void make_json_column(json_column& root_column,
   // INITIALIZE JSON ROOT NODE
   //--------------------------------------------------------------------------------
   // The JSON root may only be a struct, list, string, or value node
-  CUDF_EXPECTS(tokens_gpu.size() == token_indices_gpu.size(),
+  CUDF_EXPECTS(tokens.size() == token_indices_gpu.size(),
                "Unexpected mismatch in number of token types and token indices");
-  CUDF_EXPECTS(tokens_gpu.size() > 0, "Empty JSON input not supported");
-  CUDF_EXPECTS(is_valid_root_token(tokens_gpu[offset]), "Invalid beginning of JSON document");
+  CUDF_EXPECTS(tokens.size() > 0, "Empty JSON input not supported");
+  CUDF_EXPECTS(is_valid_root_token(tokens[offset]), "Invalid beginning of JSON document");
 
-  while (offset < tokens_gpu.size()) {
+  while (offset < tokens.size()) {
     // Verify there's at least the JSON root node left on the stack to which we can append data
     CUDF_EXPECTS(current_data_path.size() > 0, "Invalid JSON structure");
 
@@ -1234,7 +1234,7 @@ void make_json_column(json_column& root_column,
                  "Invalid JSON structure");
 
     // The token we're currently parsing
-    auto const& token = tokens_gpu[offset];
+    auto const& token = tokens[offset];
 
 #ifdef NJP_DEBUG_PRINT
     std::cout << "[" << token_to_string(token) << "]\n";
@@ -1257,9 +1257,9 @@ void make_json_column(json_column& root_column,
 
       // Add this struct node to the current column
       selected_col->append_row(target_row_index,
-                               token_to_column_type(tokens_gpu[offset]),
-                               get_token_index(tokens_gpu[offset], token_indices_gpu[offset]),
-                               get_token_index(tokens_gpu[offset], token_indices_gpu[offset]),
+                               token_to_column_type(tokens[offset]),
+                               get_token_index(tokens[offset], token_indices_gpu[offset]),
+                               get_token_index(tokens[offset], token_indices_gpu[offset]),
                                zero_child_count);
     }
 
@@ -1274,7 +1274,7 @@ void make_json_column(json_column& root_column,
       // Update row to account for string offset
       update_row(current_data_path.top().column,
                  current_data_path.top().row_index,
-                 get_token_index(tokens_gpu[offset], token_indices_gpu[offset]),
+                 get_token_index(tokens[offset], token_indices_gpu[offset]),
                  current_data_path.top().num_children);
 
       // Pop struct from the path stack
@@ -1295,9 +1295,9 @@ void make_json_column(json_column& root_column,
 
       // Add this struct node to the current column
       selected_col->append_row(target_row_index,
-                               token_to_column_type(tokens_gpu[offset]),
-                               get_token_index(tokens_gpu[offset], token_indices_gpu[offset]),
-                               get_token_index(tokens_gpu[offset], token_indices_gpu[offset]),
+                               token_to_column_type(tokens[offset]),
+                               get_token_index(tokens[offset], token_indices_gpu[offset]),
+                               get_token_index(tokens[offset], token_indices_gpu[offset]),
                                zero_child_count);
     }
 
@@ -1312,7 +1312,7 @@ void make_json_column(json_column& root_column,
       // Update row to account for string offset
       update_row(current_data_path.top().column,
                  current_data_path.top().row_index,
-                 get_token_index(tokens_gpu[offset], token_indices_gpu[offset]),
+                 get_token_index(tokens[offset], token_indices_gpu[offset]),
                  current_data_path.top().num_children);
 
       // Pop list from the path stack
@@ -1323,7 +1323,7 @@ void make_json_column(json_column& root_column,
     else if (token == token_t::ErrorBegin) {
 #ifdef NJP_DEBUG_PRINT
       std::cout << "[ErrorBegin]\n";
-      std::cout << "@" << get_token_index(tokens_gpu[offset], token_indices_gpu[offset]);
+      std::cout << "@" << get_token_index(tokens[offset], token_indices_gpu[offset]);
 #endif
       CUDF_FAIL("Parser encountered an invalid format.");
     }
@@ -1332,16 +1332,15 @@ void make_json_column(json_column& root_column,
     else if (token == token_t::FieldNameBegin or token == token_t::StringBegin or
              token == token_t::ValueBegin) {
       // Verify that this token has the right successor to build a correct (being, end) token pair
-      CUDF_EXPECTS((offset + 1) < tokens_gpu.size(), "Invalid JSON token sequence");
-      CUDF_EXPECTS(tokens_gpu[offset + 1] == end_of_partner(token), "Invalid JSON token sequence");
+      CUDF_EXPECTS((offset + 1) < tokens.size(), "Invalid JSON token sequence");
+      CUDF_EXPECTS(tokens[offset + 1] == end_of_partner(token), "Invalid JSON token sequence");
 
       // The offset to the first symbol from the JSON input associated with the current token
-      auto const& token_begin_offset =
-        get_token_index(tokens_gpu[offset], token_indices_gpu[offset]);
+      auto const& token_begin_offset = get_token_index(tokens[offset], token_indices_gpu[offset]);
 
       // The offset to one past the last symbol associated with the current token
       auto const& token_end_offset =
-        get_token_index(tokens_gpu[offset + 1], token_indices_gpu[offset + 1]);
+        get_token_index(tokens[offset + 1], token_indices_gpu[offset + 1]);
 
       // FieldNameBegin
       // For the current struct node in the tree, select the child column corresponding to this

From 73ff3075e63fefc9025ad34be82ad7db89ed21a4 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Mon, 22 Aug 2022 14:41:15 -0700
Subject: [PATCH 28/40] better translation table comments thx @upsj

---
 cpp/src/io/json/nested_json_gpu.cu | 761 +++++++++++++++--------------
 1 file changed, 385 insertions(+), 376 deletions(-)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 09dcce6ddd7..826553c0a03 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -471,402 +471,411 @@ auto get_transition_table(bool newline_delimited_json)
 auto get_translation_table()
 {
   std::array<std::array<std::vector<char>, NUM_PDA_SGIDS>, PD_NUM_STATES> pda_tlt;
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOV)] = {{/*ROOT*/
-                                                        {token_t::StructBegin},
-                                                        {token_t::ListBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::StringBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {},
-                                                        {token_t::ValueBegin},
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOV)] = {{                         /*ROOT*/
+                                                        {token_t::StructBegin},  // OPENING_BRACE
+                                                        {token_t::ListBegin},    // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},   // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},   // CLOSING_BRACKET
+                                                        {token_t::StringBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},   // ESCAPE
+                                                        {token_t::ErrorBegin},   // COMMA
+                                                        {token_t::ErrorBegin},   // COLON
+                                                        {},                      // WHITE_SPACE
+                                                        {},                      // LINE_BREAK
+                                                        {token_t::ValueBegin},   // OTHER
                                                         /*LIST*/
-                                                        {token_t::StructBegin},
-                                                        {token_t::ListBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::StringBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {},
-                                                        {token_t::ValueBegin},
+                                                        {token_t::StructBegin},  // OPENING_BRACE
+                                                        {token_t::ListBegin},    // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},   // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},   // CLOSING_BRACKET
+                                                        {token_t::StringBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},   // ESCAPE
+                                                        {token_t::ErrorBegin},   // COMMA
+                                                        {token_t::ErrorBegin},   // COLON
+                                                        {},                      // WHITE_SPACE
+                                                        {},                      // LINE_BREAK
+                                                        {token_t::ValueBegin},   // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::StructBegin},
-                                                        {token_t::ListBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::StringBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {},
-                                                        {token_t::ValueBegin}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOA)] = {{/*ROOT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
+                                                        {token_t::StructBegin},   // OPENING_BRACE
+                                                        {token_t::ListBegin},     // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},    // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},    // CLOSING_BRACKET
+                                                        {token_t::StringBegin},   // QUOTE
+                                                        {token_t::ErrorBegin},    // ESCAPE
+                                                        {token_t::ErrorBegin},    // COMMA
+                                                        {token_t::ErrorBegin},    // COLON
+                                                        {},                       // WHITE_SPACE
+                                                        {},                       // LINE_BREAK
+                                                        {token_t::ValueBegin}}};  // OTHER
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOA)] = {{                          /*ROOT*/
+                                                        {token_t::ErrorBegin},    // OPENING_BRACE
+                                                        {token_t::ErrorBegin},    // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},    // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},    // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},    // QUOTE
+                                                        {token_t::ErrorBegin},    // ESCAPE
+                                                        {token_t::ErrorBegin},    // COMMA
+                                                        {token_t::ErrorBegin},    // COLON
+                                                        {token_t::ErrorBegin},    // WHITE_SPACE
+                                                        {token_t::ErrorBegin},    // LINE_BREAK
+                                                        {token_t::ErrorBegin},    // OTHER
                                                         /*LIST*/
-                                                        {token_t::StructBegin},
-                                                        {token_t::ListBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ListEnd},
-                                                        {token_t::StringBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {},
-                                                        {token_t::ValueBegin},
+                                                        {token_t::StructBegin},  // OPENING_BRACE
+                                                        {token_t::ListBegin},    // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},   // CLOSING_BRACE
+                                                        {token_t::ListEnd},      // CLOSING_BRACKET
+                                                        {token_t::StringBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},   // ESCAPE
+                                                        {token_t::ErrorBegin},   // COMMA
+                                                        {token_t::ErrorBegin},   // COLON
+                                                        {},                      // WHITE_SPACE
+                                                        {},                      // LINE_BREAK
+                                                        {token_t::ValueBegin},   // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::StructEnd},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::FieldNameBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {},
-                                                        {token_t::ErrorBegin}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_LON)] = {{/*ROOT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ValueEnd},
-                                                        {token_t::ValueEnd},
-                                                        {},
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::StructEnd},   // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::FieldNameBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},      // ESCAPE
+                                                        {token_t::ErrorBegin},      // COMMA
+                                                        {token_t::ErrorBegin},      // COLON
+                                                        {},                         // WHITE_SPACE
+                                                        {},                         // LINE_BREAK
+                                                        {token_t::ErrorBegin}}};    // OTHER
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_LON)] = {
+    {                        /*ROOT*/
+     {token_t::ErrorBegin},  // OPENING_BRACE
+     {token_t::ErrorBegin},  // OPENING_BRACKET
+     {token_t::ErrorBegin},  // CLOSING_BRACE
+     {token_t::ErrorBegin},  // CLOSING_BRACKET
+     {token_t::ErrorBegin},  // QUOTE
+     {token_t::ErrorBegin},  // ESCAPE
+     {token_t::ErrorBegin},  // COMMA
+     {token_t::ErrorBegin},  // COLON
+     {token_t::ValueEnd},    // WHITE_SPACE
+     {token_t::ValueEnd},    // LINE_BREAK
+     {},                     // OTHER
+     /*LIST*/
+     {token_t::ErrorBegin},                  // OPENING_BRACE
+     {token_t::ErrorBegin},                  // OPENING_BRACKET
+     {token_t::ErrorBegin},                  // CLOSING_BRACE
+     {token_t::ValueEnd, token_t::ListEnd},  // CLOSING_BRACKET
+     {token_t::ErrorBegin},                  // QUOTE
+     {token_t::ErrorBegin},                  // ESCAPE
+     {token_t::ValueEnd},                    // COMMA
+     {token_t::ErrorBegin},                  // COLON
+     {token_t::ValueEnd},                    // WHITE_SPACE
+     {token_t::ValueEnd},                    // LINE_BREAK
+     {},                                     // OTHER
+     /*STRUCT*/
+     {token_t::ErrorBegin},                    // OPENING_BRACE
+     {token_t::ErrorBegin},                    // OPENING_BRACKET
+     {token_t::ValueEnd, token_t::StructEnd},  // CLOSING_BRACE
+     {token_t::ErrorBegin},                    // CLOSING_BRACKET
+     {token_t::ErrorBegin},                    // QUOTE
+     {token_t::ErrorBegin},                    // ESCAPE
+     {token_t::ValueEnd},                      // COMMA
+     {token_t::ErrorBegin},                    // COLON
+     {token_t::ValueEnd},                      // WHITE_SPACE
+     {token_t::ValueEnd},                      // LINE_BREAK
+     {}}};                                     // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_STR)] = {{                       /*ROOT*/
+                                                        {},                    // OPENING_BRACE
+                                                        {},                    // OPENING_BRACKET
+                                                        {},                    // CLOSING_BRACE
+                                                        {},                    // CLOSING_BRACKET
+                                                        {token_t::StringEnd},  // QUOTE
+                                                        {},                    // ESCAPE
+                                                        {},                    // COMMA
+                                                        {},                    // COLON
+                                                        {},                    // WHITE_SPACE
+                                                        {},                    // LINE_BREAK
+                                                        {},                    // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ValueEnd, token_t::ListEnd},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ValueEnd},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ValueEnd},
-                                                        {token_t::ValueEnd},
-                                                        {},
+                                                        {},                    // OPENING_BRACE
+                                                        {},                    // OPENING_BRACKET
+                                                        {},                    // CLOSING_BRACE
+                                                        {},                    // CLOSING_BRACKET
+                                                        {token_t::StringEnd},  // QUOTE
+                                                        {},                    // ESCAPE
+                                                        {},                    // COMMA
+                                                        {},                    // COLON
+                                                        {},                    // WHITE_SPACE
+                                                        {},                    // LINE_BREAK
+                                                        {},                    // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ValueEnd, token_t::StructEnd},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ValueEnd},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ValueEnd},
-                                                        {token_t::ValueEnd},
-                                                        {}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_STR)] = {{/*ROOT*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {token_t::StringEnd},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
+                                                        {},                    // OPENING_BRACE
+                                                        {},                    // OPENING_BRACKET
+                                                        {},                    // CLOSING_BRACE
+                                                        {},                    // CLOSING_BRACKET
+                                                        {token_t::StringEnd},  // QUOTE
+                                                        {},                    // ESCAPE
+                                                        {},                    // COMMA
+                                                        {},                    // COLON
+                                                        {},                    // WHITE_SPACE
+                                                        {},                    // LINE_BREAK
+                                                        {}}};                  // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_SCE)] = {{     /*ROOT*/
+                                                        {},  // OPENING_BRACE
+                                                        {},  // OPENING_BRACKET
+                                                        {},  // CLOSING_BRACE
+                                                        {},  // CLOSING_BRACKET
+                                                        {},  // QUOTE
+                                                        {},  // ESCAPE
+                                                        {},  // COMMA
+                                                        {},  // COLON
+                                                        {},  // WHITE_SPACE
+                                                        {},  // LINE_BREAK
+                                                        {},  // OTHER
                                                         /*LIST*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {token_t::StringEnd},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
+                                                        {},  // OPENING_BRACE
+                                                        {},  // OPENING_BRACKET
+                                                        {},  // CLOSING_BRACE
+                                                        {},  // CLOSING_BRACKET
+                                                        {},  // QUOTE
+                                                        {},  // ESCAPE
+                                                        {},  // COMMA
+                                                        {},  // COLON
+                                                        {},  // WHITE_SPACE
+                                                        {},  // LINE_BREAK
+                                                        {},  // OTHER
                                                         /*STRUCT*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {token_t::StringEnd},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_SCE)] = {{/*ROOT*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
+                                                        {},    // OPENING_BRACE
+                                                        {},    // OPENING_BRACKET
+                                                        {},    // CLOSING_BRACE
+                                                        {},    // CLOSING_BRACKET
+                                                        {},    // QUOTE
+                                                        {},    // ESCAPE
+                                                        {},    // COMMA
+                                                        {},    // COLON
+                                                        {},    // WHITE_SPACE
+                                                        {},    // LINE_BREAK
+                                                        {}}};  // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_PVL)] = {{                        /*ROOT*/
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},  // ESCAPE
+                                                        {token_t::ErrorBegin},  // COMMA
+                                                        {token_t::ErrorBegin},  // COLON
+                                                        {},                     // WHITE_SPACE
+                                                        {},                     // LINE_BREAK
+                                                        {token_t::ErrorBegin},  // OTHER
                                                         /*LIST*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ListEnd},     // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},  // ESCAPE
+                                                        {},                     // COMMA
+                                                        {token_t::ErrorBegin},  // COLON
+                                                        {},                     // WHITE_SPACE
+                                                        {},                     // LINE_BREAK
+                                                        {token_t::ErrorBegin},  // OTHER
                                                         /*STRUCT*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_PVL)] = {{/*ROOT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {},
-                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},    // OPENING_BRACE
+                                                        {token_t::ErrorBegin},    // OPENING_BRACKET
+                                                        {token_t::StructEnd},     // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},    // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},    // QUOTE
+                                                        {token_t::ErrorBegin},    // ESCAPE
+                                                        {},                       // COMMA
+                                                        {token_t::ErrorBegin},    // COLON
+                                                        {},                       // WHITE_SPACE
+                                                        {},                       // LINE_BREAK
+                                                        {token_t::ErrorBegin}}};  // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BFN)] = {{                        /*ROOT*/
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},  // ESCAPE
+                                                        {token_t::ErrorBegin},  // COMMA
+                                                        {token_t::ErrorBegin},  // COLON
+                                                        {token_t::ErrorBegin},  // WHITE_SPACE
+                                                        {token_t::ErrorBegin},  // LINE_BREAK
+                                                        {token_t::ErrorBegin},  // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ListEnd},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {},
-                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},  // ESCAPE
+                                                        {token_t::ErrorBegin},  // COMMA
+                                                        {token_t::ErrorBegin},  // COLON
+                                                        {token_t::ErrorBegin},  // WHITE_SPACE
+                                                        {token_t::ErrorBegin},  // LINE_BREAK
+                                                        {token_t::ErrorBegin},  // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::StructEnd},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {},
-                                                        {token_t::ErrorBegin}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BFN)] = {{/*ROOT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::FieldNameBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},      // ESCAPE
+                                                        {token_t::ErrorBegin},      // COMMA
+                                                        {token_t::ErrorBegin},      // COLON
+                                                        {},                         // WHITE_SPACE
+                                                        {},                         // LINE_BREAK
+                                                        {token_t::ErrorBegin}}};    // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_FLN)] = {{                        /*ROOT*/
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},  // ESCAPE
+                                                        {token_t::ErrorBegin},  // COMMA
+                                                        {token_t::ErrorBegin},  // COLON
+                                                        {token_t::ErrorBegin},  // WHITE_SPACE
+                                                        {token_t::ErrorBegin},  // LINE_BREAK
+                                                        {token_t::ErrorBegin},  // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},  // ESCAPE
+                                                        {token_t::ErrorBegin},  // COMMA
+                                                        {token_t::ErrorBegin},  // COLON
+                                                        {token_t::ErrorBegin},  // WHITE_SPACE
+                                                        {token_t::ErrorBegin},  // LINE_BREAK
+                                                        {token_t::ErrorBegin},  // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::FieldNameBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {},
-                                                        {token_t::ErrorBegin}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_FLN)] = {{/*ROOT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
+                                                        {},                       // OPENING_BRACE
+                                                        {},                       // OPENING_BRACKET
+                                                        {},                       // CLOSING_BRACE
+                                                        {},                       // CLOSING_BRACKET
+                                                        {token_t::FieldNameEnd},  // QUOTE
+                                                        {},                       // ESCAPE
+                                                        {},                       // COMMA
+                                                        {},                       // COLON
+                                                        {},                       // WHITE_SPACE
+                                                        {},                       // LINE_BREAK
+                                                        {}}};                     // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_FNE)] = {{                        /*ROOT*/
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},  // ESCAPE
+                                                        {token_t::ErrorBegin},  // COMMA
+                                                        {token_t::ErrorBegin},  // COLON
+                                                        {token_t::ErrorBegin},  // WHITE_SPACE
+                                                        {token_t::ErrorBegin},  // LINE_BREAK
+                                                        {token_t::ErrorBegin},  // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},  // ESCAPE
+                                                        {token_t::ErrorBegin},  // COMMA
+                                                        {token_t::ErrorBegin},  // COLON
+                                                        {token_t::ErrorBegin},  // WHITE_SPACE
+                                                        {token_t::ErrorBegin},  // LINE_BREAK
+                                                        {token_t::ErrorBegin},  // OTHER
                                                         /*STRUCT*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {token_t::FieldNameEnd},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_FNE)] = {{/*ROOT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
+                                                        {},    // OPENING_BRACE
+                                                        {},    // OPENING_BRACKET
+                                                        {},    // CLOSING_BRACE
+                                                        {},    // CLOSING_BRACKET
+                                                        {},    // QUOTE
+                                                        {},    // ESCAPE
+                                                        {},    // COMMA
+                                                        {},    // COLON
+                                                        {},    // WHITE_SPACE
+                                                        {},    // LINE_BREAK
+                                                        {}}};  // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_PFN)] = {{                        /*ROOT*/
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},  // ESCAPE
+                                                        {token_t::ErrorBegin},  // COMMA
+                                                        {token_t::ErrorBegin},  // COLON
+                                                        {token_t::ErrorBegin},  // WHITE_SPACE
+                                                        {token_t::ErrorBegin},  // LINE_BREAK
+                                                        {token_t::ErrorBegin},  // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},  // OPENING_BRACE
+                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},  // QUOTE
+                                                        {token_t::ErrorBegin},  // ESCAPE
+                                                        {token_t::ErrorBegin},  // COMMA
+                                                        {token_t::ErrorBegin},  // COLON
+                                                        {token_t::ErrorBegin},  // WHITE_SPACE
+                                                        {token_t::ErrorBegin},  // LINE_BREAK
+                                                        {token_t::ErrorBegin},  // OTHER
                                                         /*STRUCT*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_PFN)] = {{/*ROOT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
+                                                        {token_t::ErrorBegin},    // OPENING_BRACE
+                                                        {token_t::ErrorBegin},    // OPENING_BRACKET
+                                                        {token_t::ErrorBegin},    // CLOSING_BRACE
+                                                        {token_t::ErrorBegin},    // CLOSING_BRACKET
+                                                        {token_t::ErrorBegin},    // QUOTE
+                                                        {token_t::ErrorBegin},    // ESCAPE
+                                                        {token_t::ErrorBegin},    // COMMA
+                                                        {},                       // COLON
+                                                        {},                       // WHITE_SPACE
+                                                        {},                       // LINE_BREAK
+                                                        {token_t::ErrorBegin}}};  // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_ERR)] = {{     /*ROOT*/
+                                                        {},  // OPENING_BRACE
+                                                        {},  // OPENING_BRACKET
+                                                        {},  // CLOSING_BRACE
+                                                        {},  // CLOSING_BRACKET
+                                                        {},  // QUOTE
+                                                        {},  // ESCAPE
+                                                        {},  // COMMA
+                                                        {},  // COLON
+                                                        {},  // WHITE_SPACE
+                                                        {},  // LINE_BREAK
+                                                        {},  // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
+                                                        {},  // OPENING_BRACE
+                                                        {},  // OPENING_BRACKET
+                                                        {},  // CLOSING_BRACE
+                                                        {},  // CLOSING_BRACKET
+                                                        {},  // QUOTE
+                                                        {},  // ESCAPE
+                                                        {},  // COMMA
+                                                        {},  // COLON
+                                                        {},  // WHITE_SPACE
+                                                        {},  // LINE_BREAK
+                                                        {},  // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {token_t::ErrorBegin},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {token_t::ErrorBegin}}};
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_ERR)] = {{/*ROOT*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        /*LIST*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        /*STRUCT*/
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {},
-                                                        {}}};
+                                                        {},    // OPENING_BRACE
+                                                        {},    // OPENING_BRACKET
+                                                        {},    // CLOSING_BRACE
+                                                        {},    // CLOSING_BRACKET
+                                                        {},    // QUOTE
+                                                        {},    // ESCAPE
+                                                        {},    // COMMA
+                                                        {},    // COLON
+                                                        {},    // WHITE_SPACE
+                                                        {},    // LINE_BREAK
+                                                        {}}};  // OTHER
   return pda_tlt;
 }
 

From 39243f343dcf1ddd0a758cf3c0e1f7e841bd706a Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Mon, 22 Aug 2022 14:49:24 -0700
Subject: [PATCH 29/40] uses device_scalar and better generator

---
 cpp/src/io/json/nested_json_gpu.cu | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 826553c0a03..03e66f14c43 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -957,7 +957,7 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
   constexpr std::size_t single_item_count = 1ULL;
   rmm::device_uvector<PdaTokenT> tokens{json_in.size(), stream, mr};
   rmm::device_uvector<SymbolOffsetT> tokens_indices{json_in.size(), stream, mr};
-  rmm::device_uvector<SymbolOffsetT> num_written_tokens{single_item_count, stream};
+  rmm::device_scalar<SymbolOffsetT> num_written_tokens{stream, mr};
 
   auto const new_line_delimited_json = options.is_enabled_lines();
 
@@ -985,9 +985,9 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
 
   // Instantiating PDA transducer
   std::vector<std::vector<char>> pda_sgid_identity{tokenizer_pda::NUM_PDA_SGIDS};
-  std::generate(std::begin(pda_sgid_identity), std::end(pda_sgid_identity), [i = 0]() mutable {
-    return std::vector<char>{static_cast<char>(i++)};
-  });
+  std::generate(std::begin(pda_sgid_identity),
+                std::end(pda_sgid_identity),
+                [i = char{0}]() mutable { return std::vector<char>{i++}; });
   ToTokenStreamFstT json_to_tokens_fst{pda_sgid_identity,
                                        tokenizer_pda::get_transition_table(new_line_delimited_json),
                                        tokenizer_pda::get_translation_table(),
@@ -1002,7 +1002,7 @@ std::pair<rmm::device_uvector<PdaTokenT>, rmm::device_uvector<SymbolOffsetT>> ge
                                tokenizer_pda::start_state,
                                stream);
 
-  auto num_total_tokens = num_written_tokens.front_element(stream);
+  auto num_total_tokens = num_written_tokens.value(stream);
   tokens.resize(num_total_tokens, stream);
   tokens_indices.resize(num_total_tokens, stream);
 

From 722017477d519a18132e5d71917793b7d94d8168 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Tue, 23 Aug 2022 08:56:36 -0700
Subject: [PATCH 30/40] removes code comment banner

---
 cpp/src/io/json/nested_json_gpu.cu | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 03e66f14c43..21833800063 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -1224,13 +1224,11 @@ void make_json_column(json_column& root_column,
   // Giving names to magic constants
   constexpr uint32_t zero_child_count = 0;
 
-  //--------------------------------------------------------------------------------
-  // INITIALIZE JSON ROOT NODE
-  //--------------------------------------------------------------------------------
-  // The JSON root may only be a struct, list, string, or value node
   CUDF_EXPECTS(tokens.size() == token_indices_gpu.size(),
                "Unexpected mismatch in number of token types and token indices");
   CUDF_EXPECTS(tokens.size() > 0, "Empty JSON input not supported");
+
+  // The JSON root may only be a struct, list, string, or value node
   CUDF_EXPECTS(is_valid_root_token(tokens[offset]), "Invalid beginning of JSON document");
 
   while (offset < tokens.size()) {

From c6f8d0ed36da905760d64b2ba8c0baee8f924e33 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Tue, 23 Aug 2022 21:58:57 -0700
Subject: [PATCH 31/40] fixes code comments

---
 cpp/tests/io/json_test.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index af72edce91b..b44780314b7 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -964,7 +964,8 @@ TEST_F(JsonReaderTest, JsonExperimentalLines)
   json_lines_options.enable_experimental(true);
   cudf::io::table_with_metadata new_reader_table = cudf::io::read_json(json_lines_options);
 
-  // Verify that the data read via parquet matches the data read via JSON
+  // Verify that the data read via non-nested JSON lines reader matches the data read via nested
+  // JSON reader
   CUDF_TEST_EXPECT_TABLES_EQUAL(current_reader_table.tbl->view(), new_reader_table.tbl->view());
 }
 

From e38f3d891c3b55fcf0d5118c265e6849cc1d915e Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Tue, 23 Aug 2022 22:30:11 -0700
Subject: [PATCH 32/40] adds more tests for json lines

---
 cpp/tests/io/json_test.cpp | 41 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index b44780314b7..77efb0c4d76 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -957,10 +957,10 @@ TEST_F(JsonReaderTest, JsonExperimentalLines)
       cudf::io::source_info{json_string.c_str(), json_string.size()})
       .lines(true);
 
-  // Read test data via existing, non-nested json lines reader
+  // Read test data via existing, non-nested JSON lines reader
   cudf::io::table_with_metadata current_reader_table = cudf::io::read_json(json_lines_options);
 
-  // Read test data via new, nested json reader
+  // Read test data via new, nested JSON reader
   json_lines_options.enable_experimental(true);
   cudf::io::table_with_metadata new_reader_table = cudf::io::read_json(json_lines_options);
 
@@ -969,4 +969,41 @@ TEST_F(JsonReaderTest, JsonExperimentalLines)
   CUDF_TEST_EXPECT_TABLES_EQUAL(current_reader_table.tbl->view(), new_reader_table.tbl->view());
 }
 
+TEST_F(JsonReaderTest, ExperimentalLinesNoOmissions)
+{
+  std::vector<std::string> json_inputs =
+    // single column
+    {R"({"a":"a0"}
+    {"a":"a1"}
+    {"a":"a2"}
+    {"a":"a3"}
+    {"a":"a4"})",
+     // single column, single row
+     R"({"a":"a0"})",
+     // single row
+     R"({"a":"a0", "b":"b0"})",
+     // two column, two rows
+     R"({"a":"a0", "b":"b0"}
+    {"a":"a1", "b":"b1"})"};
+
+  for (auto const& json_string : json_inputs) {
+    // Initialize parsing options (reading json lines)
+    cudf::io::json_reader_options json_lines_options =
+      cudf::io::json_reader_options::builder(
+        cudf::io::source_info{json_string.c_str(), json_string.size()})
+        .lines(true);
+
+    // Read test data via existing, non-nested JSON lines reader
+    cudf::io::table_with_metadata current_reader_table = cudf::io::read_json(json_lines_options);
+
+    // Read test data via new, nested JSON reader
+    json_lines_options.enable_experimental(true);
+    cudf::io::table_with_metadata new_reader_table = cudf::io::read_json(json_lines_options);
+
+    // Verify that the data read via non-nested JSON lines reader matches the data read via nested
+    // JSON reader
+    CUDF_TEST_EXPECT_TABLES_EQUAL(current_reader_table.tbl->view(), new_reader_table.tbl->view());
+  }
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From cdb743d0e0a8906fa05c6dfee41616251d99089c Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Wed, 24 Aug 2022 11:20:09 -0700
Subject: [PATCH 33/40] adds json lines test for experimental nested json
 reader

---
 python/cudf/cudf/tests/test_json.py | 42 ++++++++++++++++++++++++-----
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 368015cf563..338c38df272 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -8,6 +8,7 @@
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 import cudf
@@ -575,12 +576,6 @@ def test_default_float_bitwidth(default_float_bitwidth):
     assert df["b"].dtype == np.dtype(f"f{default_float_bitwidth//8}")
 
 
-def test_json_experimental():
-    # should raise an exception, for now
-    with pytest.raises(RuntimeError):
-        cudf.read_json("", engine="cudf_experimental")
-
-
 def test_json_nested_basic(tmpdir):
     fname = tmpdir.mkdir("gdf_json").join("tmp_json_nested_basic")
     data = {
@@ -594,3 +589,38 @@ def test_json_nested_basic(tmpdir):
     pdf = pd.read_json(fname, orient="records")
 
     assert_eq(pdf, df)
+
+
+def test_json_nested_lines(tmpdir):
+    fname = tmpdir.mkdir("gdf_json").join("tmp_json_nested_lines")
+    data = {
+        "c1": [{"f1": "sf11", "f2": "sf21"}, {"f1": "sf12", "f2": "sf22"}],
+        "c2": [["l11", "l21"], ["l12", "l22"]],
+    }
+    pdf = pd.DataFrame(data)
+    pdf.to_json(fname, orient="records", lines=True)
+
+    df = cudf.read_json(
+        fname, engine="cudf_experimental", orient="records", lines=True
+    )
+    pdf = pd.read_json(fname, orient="records", lines=True)
+
+    assert_eq(pdf, df)
+
+
+def test_json_nested_lines_with_omissions(tmpdir):
+    fname = tmpdir.mkdir("gdf_json").join("tmp_json_nested_lines_omissions")
+    data = {
+        "c1": [{"f2": "sf21"}, {"f1": "sf12"}],
+        "c2": [["l11", "l21"], []],
+    }
+    pdf = pd.DataFrame(data)
+    pdf.to_json(fname, orient="records", lines=True)
+
+    df = cudf.read_json(
+        fname, engine="cudf_experimental", orient="records", lines=True
+    )
+    pdf = pd.read_json(fname, orient="records", lines=True)
+
+    # Pandas just just omits "f1" in first row, so we have to enforce a common schema
+    assert df.to_arrow().equals(pa.Table.from_pandas(pdf))

From 713260f622f371ce7d4852e6020a83736173489f Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Wed, 24 Aug 2022 11:36:05 -0700
Subject: [PATCH 34/40] fixes style

---
 python/cudf/cudf/tests/test_json.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 338c38df272..d5e40f5a829 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -622,5 +622,5 @@ def test_json_nested_lines_with_omissions(tmpdir):
     )
     pdf = pd.read_json(fname, orient="records", lines=True)
 
-    # Pandas just just omits "f1" in first row, so we have to enforce a common schema
+    # Pandas omits "f1" in first row, so we have to enforce a common schema
     assert df.to_arrow().equals(pa.Table.from_pandas(pdf))

From 14749f7685e3866f733b8417027ec10f80e0d1eb Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Wed, 24 Aug 2022 12:59:49 -0700
Subject: [PATCH 35/40] parametrizes test and uses bytesio

---
 python/cudf/cudf/tests/test_json.py | 49 +++++++++++++----------------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index d5e40f5a829..80ccbd64130 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -591,36 +591,29 @@ def test_json_nested_basic(tmpdir):
     assert_eq(pdf, df)
 
 
-def test_json_nested_lines(tmpdir):
-    fname = tmpdir.mkdir("gdf_json").join("tmp_json_nested_lines")
-    data = {
-        "c1": [{"f1": "sf11", "f2": "sf21"}, {"f1": "sf12", "f2": "sf22"}],
-        "c2": [["l11", "l21"], ["l12", "l22"]],
-    }
-    pdf = pd.DataFrame(data)
-    pdf.to_json(fname, orient="records", lines=True)
-
-    df = cudf.read_json(
-        fname, engine="cudf_experimental", orient="records", lines=True
-    )
-    pdf = pd.read_json(fname, orient="records", lines=True)
-
-    assert_eq(pdf, df)
-
-
-def test_json_nested_lines_with_omissions(tmpdir):
-    fname = tmpdir.mkdir("gdf_json").join("tmp_json_nested_lines_omissions")
-    data = {
-        "c1": [{"f2": "sf21"}, {"f1": "sf12"}],
-        "c2": [["l11", "l21"], []],
-    }
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "c1": [{"f1": "sf11", "f2": "sf21"}, {"f1": "sf12", "f2": "sf22"}],
+            "c2": [["l11", "l21"], ["l12", "l22"]],
+        },
+        # Essential test case to handle omissions
+        {
+            "c1": [{"f2": "sf21"}, {"f1": "sf12"}],
+            "c2": [["l11", "l21"], []],
+        },
+    ],
+)
+def test_json_nested_lines(data):
+    bytes = BytesIO()
     pdf = pd.DataFrame(data)
-    pdf.to_json(fname, orient="records", lines=True)
-
+    pdf.to_json(bytes, orient="records", lines=True)
+    bytes.seek(0)
     df = cudf.read_json(
-        fname, engine="cudf_experimental", orient="records", lines=True
+        bytes, engine="cudf_experimental", orient="records", lines=True
     )
-    pdf = pd.read_json(fname, orient="records", lines=True)
-
+    pdf = pd.read_json(bytes, orient="records", lines=True)
+    # In the second test-case:
     # Pandas omits "f1" in first row, so we have to enforce a common schema
     assert df.to_arrow().equals(pa.Table.from_pandas(pdf))

From 94daa4fffa6318c22b9c86f003fdf1113e40a510 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Wed, 24 Aug 2022 13:01:38 -0700
Subject: [PATCH 36/40] adds seek before reads

---
 python/cudf/cudf/tests/test_json.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 80ccbd64130..f3d9180d44d 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -613,6 +613,7 @@ def test_json_nested_lines(data):
     df = cudf.read_json(
         bytes, engine="cudf_experimental", orient="records", lines=True
     )
+    bytes.seek(0)
     pdf = pd.read_json(bytes, orient="records", lines=True)
     # In the second test-case:
     # Pandas omits "f1" in first row, so we have to enforce a common schema

From c09c4afe2b1f2065360da4c1374858cc893f3360 Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Thu, 25 Aug 2022 06:26:08 -0700
Subject: [PATCH 37/40] prettifies translation table

---
 cpp/src/io/json/nested_json_gpu.cu | 615 +++++++++++++++--------------
 1 file changed, 313 insertions(+), 302 deletions(-)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 21833800063..fe5f00318b9 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -470,153 +470,164 @@ auto get_transition_table(bool newline_delimited_json)
  */
 auto get_translation_table()
 {
+  constexpr auto StructBegin    = token_t::StructBegin;
+  constexpr auto StructEnd      = token_t::StructEnd;
+  constexpr auto ListBegin      = token_t::ListBegin;
+  constexpr auto ListEnd        = token_t::ListEnd;
+  constexpr auto FieldNameBegin = token_t::FieldNameBegin;
+  constexpr auto FieldNameEnd   = token_t::FieldNameEnd;
+  constexpr auto StringBegin    = token_t::StringBegin;
+  constexpr auto StringEnd      = token_t::StringEnd;
+  constexpr auto ValueBegin     = token_t::ValueBegin;
+  constexpr auto ValueEnd       = token_t::ValueEnd;
+  constexpr auto ErrorBegin     = token_t::ErrorBegin;
+
   std::array<std::array<std::vector<char>, NUM_PDA_SGIDS>, PD_NUM_STATES> pda_tlt;
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOV)] = {{                         /*ROOT*/
-                                                        {token_t::StructBegin},  // OPENING_BRACE
-                                                        {token_t::ListBegin},    // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},   // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},   // CLOSING_BRACKET
-                                                        {token_t::StringBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},   // ESCAPE
-                                                        {token_t::ErrorBegin},   // COMMA
-                                                        {token_t::ErrorBegin},   // COLON
-                                                        {},                      // WHITE_SPACE
-                                                        {},                      // LINE_BREAK
-                                                        {token_t::ValueBegin},   // OTHER
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOV)] = {{                /*ROOT*/
+                                                        {StructBegin},  // OPENING_BRACE
+                                                        {ListBegin},    // OPENING_BRACKET
+                                                        {ErrorBegin},   // CLOSING_BRACE
+                                                        {ErrorBegin},   // CLOSING_BRACKET
+                                                        {StringBegin},  // QUOTE
+                                                        {ErrorBegin},   // ESCAPE
+                                                        {ErrorBegin},   // COMMA
+                                                        {ErrorBegin},   // COLON
+                                                        {},             // WHITE_SPACE
+                                                        {},             // LINE_BREAK
+                                                        {ValueBegin},   // OTHER
+                                                        /*LIST*/
+                                                        {StructBegin},  // OPENING_BRACE
+                                                        {ListBegin},    // OPENING_BRACKET
+                                                        {ErrorBegin},   // CLOSING_BRACE
+                                                        {ErrorBegin},   // CLOSING_BRACKET
+                                                        {StringBegin},  // QUOTE
+                                                        {ErrorBegin},   // ESCAPE
+                                                        {ErrorBegin},   // COMMA
+                                                        {ErrorBegin},   // COLON
+                                                        {},             // WHITE_SPACE
+                                                        {},             // LINE_BREAK
+                                                        {ValueBegin},   // OTHER
+                                                        /*STRUCT*/
+                                                        {StructBegin},   // OPENING_BRACE
+                                                        {ListBegin},     // OPENING_BRACKET
+                                                        {ErrorBegin},    // CLOSING_BRACE
+                                                        {ErrorBegin},    // CLOSING_BRACKET
+                                                        {StringBegin},   // QUOTE
+                                                        {ErrorBegin},    // ESCAPE
+                                                        {ErrorBegin},    // COMMA
+                                                        {ErrorBegin},    // COLON
+                                                        {},              // WHITE_SPACE
+                                                        {},              // LINE_BREAK
+                                                        {ValueBegin}}};  // OTHER
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOA)] = {{                 /*ROOT*/
+                                                        {ErrorBegin},    // OPENING_BRACE
+                                                        {ErrorBegin},    // OPENING_BRACKET
+                                                        {ErrorBegin},    // CLOSING_BRACE
+                                                        {ErrorBegin},    // CLOSING_BRACKET
+                                                        {ErrorBegin},    // QUOTE
+                                                        {ErrorBegin},    // ESCAPE
+                                                        {ErrorBegin},    // COMMA
+                                                        {ErrorBegin},    // COLON
+                                                        {ErrorBegin},    // WHITE_SPACE
+                                                        {ErrorBegin},    // LINE_BREAK
+                                                        {ErrorBegin},    // OTHER
                                                         /*LIST*/
-                                                        {token_t::StructBegin},  // OPENING_BRACE
-                                                        {token_t::ListBegin},    // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},   // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},   // CLOSING_BRACKET
-                                                        {token_t::StringBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},   // ESCAPE
-                                                        {token_t::ErrorBegin},   // COMMA
-                                                        {token_t::ErrorBegin},   // COLON
-                                                        {},                      // WHITE_SPACE
-                                                        {},                      // LINE_BREAK
-                                                        {token_t::ValueBegin},   // OTHER
+                                                        {StructBegin},  // OPENING_BRACE
+                                                        {ListBegin},    // OPENING_BRACKET
+                                                        {ErrorBegin},   // CLOSING_BRACE
+                                                        {ListEnd},      // CLOSING_BRACKET
+                                                        {StringBegin},  // QUOTE
+                                                        {ErrorBegin},   // ESCAPE
+                                                        {ErrorBegin},   // COMMA
+                                                        {ErrorBegin},   // COLON
+                                                        {},             // WHITE_SPACE
+                                                        {},             // LINE_BREAK
+                                                        {ValueBegin},   // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::StructBegin},   // OPENING_BRACE
-                                                        {token_t::ListBegin},     // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},    // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},    // CLOSING_BRACKET
-                                                        {token_t::StringBegin},   // QUOTE
-                                                        {token_t::ErrorBegin},    // ESCAPE
-                                                        {token_t::ErrorBegin},    // COMMA
-                                                        {token_t::ErrorBegin},    // COLON
-                                                        {},                       // WHITE_SPACE
-                                                        {},                       // LINE_BREAK
-                                                        {token_t::ValueBegin}}};  // OTHER
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BOA)] = {{                          /*ROOT*/
-                                                        {token_t::ErrorBegin},    // OPENING_BRACE
-                                                        {token_t::ErrorBegin},    // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},    // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},    // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},    // QUOTE
-                                                        {token_t::ErrorBegin},    // ESCAPE
-                                                        {token_t::ErrorBegin},    // COMMA
-                                                        {token_t::ErrorBegin},    // COLON
-                                                        {token_t::ErrorBegin},    // WHITE_SPACE
-                                                        {token_t::ErrorBegin},    // LINE_BREAK
-                                                        {token_t::ErrorBegin},    // OTHER
+                                                        {ErrorBegin},      // OPENING_BRACE
+                                                        {ErrorBegin},      // OPENING_BRACKET
+                                                        {StructEnd},       // CLOSING_BRACE
+                                                        {ErrorBegin},      // CLOSING_BRACKET
+                                                        {FieldNameBegin},  // QUOTE
+                                                        {ErrorBegin},      // ESCAPE
+                                                        {ErrorBegin},      // COMMA
+                                                        {ErrorBegin},      // COLON
+                                                        {},                // WHITE_SPACE
+                                                        {},                // LINE_BREAK
+                                                        {ErrorBegin}}};    // OTHER
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_LON)] = {{                   /*ROOT*/
+                                                        {ErrorBegin},      // OPENING_BRACE
+                                                        {ErrorBegin},      // OPENING_BRACKET
+                                                        {ErrorBegin},      // CLOSING_BRACE
+                                                        {ErrorBegin},      // CLOSING_BRACKET
+                                                        {ErrorBegin},      // QUOTE
+                                                        {ErrorBegin},      // ESCAPE
+                                                        {ErrorBegin},      // COMMA
+                                                        {ErrorBegin},      // COLON
+                                                        {ValueEnd},        // WHITE_SPACE
+                                                        {ValueEnd},        // LINE_BREAK
+                                                        {},                // OTHER
                                                         /*LIST*/
-                                                        {token_t::StructBegin},  // OPENING_BRACE
-                                                        {token_t::ListBegin},    // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},   // CLOSING_BRACE
-                                                        {token_t::ListEnd},      // CLOSING_BRACKET
-                                                        {token_t::StringBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},   // ESCAPE
-                                                        {token_t::ErrorBegin},   // COMMA
-                                                        {token_t::ErrorBegin},   // COLON
-                                                        {},                      // WHITE_SPACE
-                                                        {},                      // LINE_BREAK
-                                                        {token_t::ValueBegin},   // OTHER
+                                                        {ErrorBegin},         // OPENING_BRACE
+                                                        {ErrorBegin},         // OPENING_BRACKET
+                                                        {ErrorBegin},         // CLOSING_BRACE
+                                                        {ValueEnd, ListEnd},  // CLOSING_BRACKET
+                                                        {ErrorBegin},         // QUOTE
+                                                        {ErrorBegin},         // ESCAPE
+                                                        {ValueEnd},           // COMMA
+                                                        {ErrorBegin},         // COLON
+                                                        {ValueEnd},           // WHITE_SPACE
+                                                        {ValueEnd},           // LINE_BREAK
+                                                        {},                   // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::StructEnd},   // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::FieldNameBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},      // ESCAPE
-                                                        {token_t::ErrorBegin},      // COMMA
-                                                        {token_t::ErrorBegin},      // COLON
-                                                        {},                         // WHITE_SPACE
-                                                        {},                         // LINE_BREAK
-                                                        {token_t::ErrorBegin}}};    // OTHER
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_LON)] = {
-    {                        /*ROOT*/
-     {token_t::ErrorBegin},  // OPENING_BRACE
-     {token_t::ErrorBegin},  // OPENING_BRACKET
-     {token_t::ErrorBegin},  // CLOSING_BRACE
-     {token_t::ErrorBegin},  // CLOSING_BRACKET
-     {token_t::ErrorBegin},  // QUOTE
-     {token_t::ErrorBegin},  // ESCAPE
-     {token_t::ErrorBegin},  // COMMA
-     {token_t::ErrorBegin},  // COLON
-     {token_t::ValueEnd},    // WHITE_SPACE
-     {token_t::ValueEnd},    // LINE_BREAK
-     {},                     // OTHER
-     /*LIST*/
-     {token_t::ErrorBegin},                  // OPENING_BRACE
-     {token_t::ErrorBegin},                  // OPENING_BRACKET
-     {token_t::ErrorBegin},                  // CLOSING_BRACE
-     {token_t::ValueEnd, token_t::ListEnd},  // CLOSING_BRACKET
-     {token_t::ErrorBegin},                  // QUOTE
-     {token_t::ErrorBegin},                  // ESCAPE
-     {token_t::ValueEnd},                    // COMMA
-     {token_t::ErrorBegin},                  // COLON
-     {token_t::ValueEnd},                    // WHITE_SPACE
-     {token_t::ValueEnd},                    // LINE_BREAK
-     {},                                     // OTHER
-     /*STRUCT*/
-     {token_t::ErrorBegin},                    // OPENING_BRACE
-     {token_t::ErrorBegin},                    // OPENING_BRACKET
-     {token_t::ValueEnd, token_t::StructEnd},  // CLOSING_BRACE
-     {token_t::ErrorBegin},                    // CLOSING_BRACKET
-     {token_t::ErrorBegin},                    // QUOTE
-     {token_t::ErrorBegin},                    // ESCAPE
-     {token_t::ValueEnd},                      // COMMA
-     {token_t::ErrorBegin},                    // COLON
-     {token_t::ValueEnd},                      // WHITE_SPACE
-     {token_t::ValueEnd},                      // LINE_BREAK
-     {}}};                                     // OTHER
-
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_STR)] = {{                       /*ROOT*/
-                                                        {},                    // OPENING_BRACE
-                                                        {},                    // OPENING_BRACKET
-                                                        {},                    // CLOSING_BRACE
-                                                        {},                    // CLOSING_BRACKET
-                                                        {token_t::StringEnd},  // QUOTE
-                                                        {},                    // ESCAPE
-                                                        {},                    // COMMA
-                                                        {},                    // COLON
-                                                        {},                    // WHITE_SPACE
-                                                        {},                    // LINE_BREAK
-                                                        {},                    // OTHER
+                                                        {ErrorBegin},           // OPENING_BRACE
+                                                        {ErrorBegin},           // OPENING_BRACKET
+                                                        {ValueEnd, StructEnd},  // CLOSING_BRACE
+                                                        {ErrorBegin},           // CLOSING_BRACKET
+                                                        {ErrorBegin},           // QUOTE
+                                                        {ErrorBegin},           // ESCAPE
+                                                        {ValueEnd},             // COMMA
+                                                        {ErrorBegin},           // COLON
+                                                        {ValueEnd},             // WHITE_SPACE
+                                                        {ValueEnd},             // LINE_BREAK
+                                                        {}}};                   // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_STR)] = {{              /*ROOT*/
+                                                        {},           // OPENING_BRACE
+                                                        {},           // OPENING_BRACKET
+                                                        {},           // CLOSING_BRACE
+                                                        {},           // CLOSING_BRACKET
+                                                        {StringEnd},  // QUOTE
+                                                        {},           // ESCAPE
+                                                        {},           // COMMA
+                                                        {},           // COLON
+                                                        {},           // WHITE_SPACE
+                                                        {},           // LINE_BREAK
+                                                        {},           // OTHER
                                                         /*LIST*/
-                                                        {},                    // OPENING_BRACE
-                                                        {},                    // OPENING_BRACKET
-                                                        {},                    // CLOSING_BRACE
-                                                        {},                    // CLOSING_BRACKET
-                                                        {token_t::StringEnd},  // QUOTE
-                                                        {},                    // ESCAPE
-                                                        {},                    // COMMA
-                                                        {},                    // COLON
-                                                        {},                    // WHITE_SPACE
-                                                        {},                    // LINE_BREAK
-                                                        {},                    // OTHER
+                                                        {},           // OPENING_BRACE
+                                                        {},           // OPENING_BRACKET
+                                                        {},           // CLOSING_BRACE
+                                                        {},           // CLOSING_BRACKET
+                                                        {StringEnd},  // QUOTE
+                                                        {},           // ESCAPE
+                                                        {},           // COMMA
+                                                        {},           // COLON
+                                                        {},           // WHITE_SPACE
+                                                        {},           // LINE_BREAK
+                                                        {},           // OTHER
                                                         /*STRUCT*/
-                                                        {},                    // OPENING_BRACE
-                                                        {},                    // OPENING_BRACKET
-                                                        {},                    // CLOSING_BRACE
-                                                        {},                    // CLOSING_BRACKET
-                                                        {token_t::StringEnd},  // QUOTE
-                                                        {},                    // ESCAPE
-                                                        {},                    // COMMA
-                                                        {},                    // COLON
-                                                        {},                    // WHITE_SPACE
-                                                        {},                    // LINE_BREAK
-                                                        {}}};                  // OTHER
+                                                        {},           // OPENING_BRACE
+                                                        {},           // OPENING_BRACKET
+                                                        {},           // CLOSING_BRACE
+                                                        {},           // CLOSING_BRACKET
+                                                        {StringEnd},  // QUOTE
+                                                        {},           // ESCAPE
+                                                        {},           // COMMA
+                                                        {},           // COLON
+                                                        {},           // WHITE_SPACE
+                                                        {},           // LINE_BREAK
+                                                        {}}};         // OTHER
 
   pda_tlt[static_cast<StateT>(pda_state_t::PD_SCE)] = {{     /*ROOT*/
                                                         {},  // OPENING_BRACE
@@ -655,141 +666,141 @@ auto get_translation_table()
                                                         {},    // LINE_BREAK
                                                         {}}};  // OTHER
 
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_PVL)] = {{                        /*ROOT*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},  // ESCAPE
-                                                        {token_t::ErrorBegin},  // COMMA
-                                                        {token_t::ErrorBegin},  // COLON
-                                                        {},                     // WHITE_SPACE
-                                                        {},                     // LINE_BREAK
-                                                        {token_t::ErrorBegin},  // OTHER
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_PVL)] = {{               /*ROOT*/
+                                                        {ErrorBegin},  // OPENING_BRACE
+                                                        {ErrorBegin},  // OPENING_BRACKET
+                                                        {ErrorBegin},  // CLOSING_BRACE
+                                                        {ErrorBegin},  // CLOSING_BRACKET
+                                                        {ErrorBegin},  // QUOTE
+                                                        {ErrorBegin},  // ESCAPE
+                                                        {ErrorBegin},  // COMMA
+                                                        {ErrorBegin},  // COLON
+                                                        {},            // WHITE_SPACE
+                                                        {},            // LINE_BREAK
+                                                        {ErrorBegin},  // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ListEnd},     // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},  // ESCAPE
-                                                        {},                     // COMMA
-                                                        {token_t::ErrorBegin},  // COLON
-                                                        {},                     // WHITE_SPACE
-                                                        {},                     // LINE_BREAK
-                                                        {token_t::ErrorBegin},  // OTHER
+                                                        {ErrorBegin},  // OPENING_BRACE
+                                                        {ErrorBegin},  // OPENING_BRACKET
+                                                        {ErrorBegin},  // CLOSING_BRACE
+                                                        {ListEnd},     // CLOSING_BRACKET
+                                                        {ErrorBegin},  // QUOTE
+                                                        {ErrorBegin},  // ESCAPE
+                                                        {},            // COMMA
+                                                        {ErrorBegin},  // COLON
+                                                        {},            // WHITE_SPACE
+                                                        {},            // LINE_BREAK
+                                                        {ErrorBegin},  // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::ErrorBegin},    // OPENING_BRACE
-                                                        {token_t::ErrorBegin},    // OPENING_BRACKET
-                                                        {token_t::StructEnd},     // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},    // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},    // QUOTE
-                                                        {token_t::ErrorBegin},    // ESCAPE
-                                                        {},                       // COMMA
-                                                        {token_t::ErrorBegin},    // COLON
-                                                        {},                       // WHITE_SPACE
-                                                        {},                       // LINE_BREAK
-                                                        {token_t::ErrorBegin}}};  // OTHER
-
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_BFN)] = {{                        /*ROOT*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},  // ESCAPE
-                                                        {token_t::ErrorBegin},  // COMMA
-                                                        {token_t::ErrorBegin},  // COLON
-                                                        {token_t::ErrorBegin},  // WHITE_SPACE
-                                                        {token_t::ErrorBegin},  // LINE_BREAK
-                                                        {token_t::ErrorBegin},  // OTHER
+                                                        {ErrorBegin},    // OPENING_BRACE
+                                                        {ErrorBegin},    // OPENING_BRACKET
+                                                        {StructEnd},     // CLOSING_BRACE
+                                                        {ErrorBegin},    // CLOSING_BRACKET
+                                                        {ErrorBegin},    // QUOTE
+                                                        {ErrorBegin},    // ESCAPE
+                                                        {},              // COMMA
+                                                        {ErrorBegin},    // COLON
+                                                        {},              // WHITE_SPACE
+                                                        {},              // LINE_BREAK
+                                                        {ErrorBegin}}};  // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_BFN)] = {{               /*ROOT*/
+                                                        {ErrorBegin},  // OPENING_BRACE
+                                                        {ErrorBegin},  // OPENING_BRACKET
+                                                        {ErrorBegin},  // CLOSING_BRACE
+                                                        {ErrorBegin},  // CLOSING_BRACKET
+                                                        {ErrorBegin},  // QUOTE
+                                                        {ErrorBegin},  // ESCAPE
+                                                        {ErrorBegin},  // COMMA
+                                                        {ErrorBegin},  // COLON
+                                                        {ErrorBegin},  // WHITE_SPACE
+                                                        {ErrorBegin},  // LINE_BREAK
+                                                        {ErrorBegin},  // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},  // ESCAPE
-                                                        {token_t::ErrorBegin},  // COMMA
-                                                        {token_t::ErrorBegin},  // COLON
-                                                        {token_t::ErrorBegin},  // WHITE_SPACE
-                                                        {token_t::ErrorBegin},  // LINE_BREAK
-                                                        {token_t::ErrorBegin},  // OTHER
+                                                        {ErrorBegin},  // OPENING_BRACE
+                                                        {ErrorBegin},  // OPENING_BRACKET
+                                                        {ErrorBegin},  // CLOSING_BRACE
+                                                        {ErrorBegin},  // CLOSING_BRACKET
+                                                        {ErrorBegin},  // QUOTE
+                                                        {ErrorBegin},  // ESCAPE
+                                                        {ErrorBegin},  // COMMA
+                                                        {ErrorBegin},  // COLON
+                                                        {ErrorBegin},  // WHITE_SPACE
+                                                        {ErrorBegin},  // LINE_BREAK
+                                                        {ErrorBegin},  // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::FieldNameBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},      // ESCAPE
-                                                        {token_t::ErrorBegin},      // COMMA
-                                                        {token_t::ErrorBegin},      // COLON
-                                                        {},                         // WHITE_SPACE
-                                                        {},                         // LINE_BREAK
-                                                        {token_t::ErrorBegin}}};    // OTHER
-
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_FLN)] = {{                        /*ROOT*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},  // ESCAPE
-                                                        {token_t::ErrorBegin},  // COMMA
-                                                        {token_t::ErrorBegin},  // COLON
-                                                        {token_t::ErrorBegin},  // WHITE_SPACE
-                                                        {token_t::ErrorBegin},  // LINE_BREAK
-                                                        {token_t::ErrorBegin},  // OTHER
+                                                        {ErrorBegin},      // OPENING_BRACE
+                                                        {ErrorBegin},      // OPENING_BRACKET
+                                                        {ErrorBegin},      // CLOSING_BRACE
+                                                        {ErrorBegin},      // CLOSING_BRACKET
+                                                        {FieldNameBegin},  // QUOTE
+                                                        {ErrorBegin},      // ESCAPE
+                                                        {ErrorBegin},      // COMMA
+                                                        {ErrorBegin},      // COLON
+                                                        {},                // WHITE_SPACE
+                                                        {},                // LINE_BREAK
+                                                        {ErrorBegin}}};    // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_FLN)] = {{               /*ROOT*/
+                                                        {ErrorBegin},  // OPENING_BRACE
+                                                        {ErrorBegin},  // OPENING_BRACKET
+                                                        {ErrorBegin},  // CLOSING_BRACE
+                                                        {ErrorBegin},  // CLOSING_BRACKET
+                                                        {ErrorBegin},  // QUOTE
+                                                        {ErrorBegin},  // ESCAPE
+                                                        {ErrorBegin},  // COMMA
+                                                        {ErrorBegin},  // COLON
+                                                        {ErrorBegin},  // WHITE_SPACE
+                                                        {ErrorBegin},  // LINE_BREAK
+                                                        {ErrorBegin},  // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},  // ESCAPE
-                                                        {token_t::ErrorBegin},  // COMMA
-                                                        {token_t::ErrorBegin},  // COLON
-                                                        {token_t::ErrorBegin},  // WHITE_SPACE
-                                                        {token_t::ErrorBegin},  // LINE_BREAK
-                                                        {token_t::ErrorBegin},  // OTHER
+                                                        {ErrorBegin},  // OPENING_BRACE
+                                                        {ErrorBegin},  // OPENING_BRACKET
+                                                        {ErrorBegin},  // CLOSING_BRACE
+                                                        {ErrorBegin},  // CLOSING_BRACKET
+                                                        {ErrorBegin},  // QUOTE
+                                                        {ErrorBegin},  // ESCAPE
+                                                        {ErrorBegin},  // COMMA
+                                                        {ErrorBegin},  // COLON
+                                                        {ErrorBegin},  // WHITE_SPACE
+                                                        {ErrorBegin},  // LINE_BREAK
+                                                        {ErrorBegin},  // OTHER
                                                         /*STRUCT*/
-                                                        {},                       // OPENING_BRACE
-                                                        {},                       // OPENING_BRACKET
-                                                        {},                       // CLOSING_BRACE
-                                                        {},                       // CLOSING_BRACKET
-                                                        {token_t::FieldNameEnd},  // QUOTE
-                                                        {},                       // ESCAPE
-                                                        {},                       // COMMA
-                                                        {},                       // COLON
-                                                        {},                       // WHITE_SPACE
-                                                        {},                       // LINE_BREAK
-                                                        {}}};                     // OTHER
-
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_FNE)] = {{                        /*ROOT*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},  // ESCAPE
-                                                        {token_t::ErrorBegin},  // COMMA
-                                                        {token_t::ErrorBegin},  // COLON
-                                                        {token_t::ErrorBegin},  // WHITE_SPACE
-                                                        {token_t::ErrorBegin},  // LINE_BREAK
-                                                        {token_t::ErrorBegin},  // OTHER
+                                                        {},              // OPENING_BRACE
+                                                        {},              // OPENING_BRACKET
+                                                        {},              // CLOSING_BRACE
+                                                        {},              // CLOSING_BRACKET
+                                                        {FieldNameEnd},  // QUOTE
+                                                        {},              // ESCAPE
+                                                        {},              // COMMA
+                                                        {},              // COLON
+                                                        {},              // WHITE_SPACE
+                                                        {},              // LINE_BREAK
+                                                        {}}};            // OTHER
+
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_FNE)] = {{               /*ROOT*/
+                                                        {ErrorBegin},  // OPENING_BRACE
+                                                        {ErrorBegin},  // OPENING_BRACKET
+                                                        {ErrorBegin},  // CLOSING_BRACE
+                                                        {ErrorBegin},  // CLOSING_BRACKET
+                                                        {ErrorBegin},  // QUOTE
+                                                        {ErrorBegin},  // ESCAPE
+                                                        {ErrorBegin},  // COMMA
+                                                        {ErrorBegin},  // COLON
+                                                        {ErrorBegin},  // WHITE_SPACE
+                                                        {ErrorBegin},  // LINE_BREAK
+                                                        {ErrorBegin},  // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},  // ESCAPE
-                                                        {token_t::ErrorBegin},  // COMMA
-                                                        {token_t::ErrorBegin},  // COLON
-                                                        {token_t::ErrorBegin},  // WHITE_SPACE
-                                                        {token_t::ErrorBegin},  // LINE_BREAK
-                                                        {token_t::ErrorBegin},  // OTHER
+                                                        {ErrorBegin},  // OPENING_BRACE
+                                                        {ErrorBegin},  // OPENING_BRACKET
+                                                        {ErrorBegin},  // CLOSING_BRACE
+                                                        {ErrorBegin},  // CLOSING_BRACKET
+                                                        {ErrorBegin},  // QUOTE
+                                                        {ErrorBegin},  // ESCAPE
+                                                        {ErrorBegin},  // COMMA
+                                                        {ErrorBegin},  // COLON
+                                                        {ErrorBegin},  // WHITE_SPACE
+                                                        {ErrorBegin},  // LINE_BREAK
+                                                        {ErrorBegin},  // OTHER
                                                         /*STRUCT*/
                                                         {},    // OPENING_BRACE
                                                         {},    // OPENING_BRACKET
@@ -803,42 +814,42 @@ auto get_translation_table()
                                                         {},    // LINE_BREAK
                                                         {}}};  // OTHER
 
-  pda_tlt[static_cast<StateT>(pda_state_t::PD_PFN)] = {{                        /*ROOT*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},  // ESCAPE
-                                                        {token_t::ErrorBegin},  // COMMA
-                                                        {token_t::ErrorBegin},  // COLON
-                                                        {token_t::ErrorBegin},  // WHITE_SPACE
-                                                        {token_t::ErrorBegin},  // LINE_BREAK
-                                                        {token_t::ErrorBegin},  // OTHER
+  pda_tlt[static_cast<StateT>(pda_state_t::PD_PFN)] = {{               /*ROOT*/
+                                                        {ErrorBegin},  // OPENING_BRACE
+                                                        {ErrorBegin},  // OPENING_BRACKET
+                                                        {ErrorBegin},  // CLOSING_BRACE
+                                                        {ErrorBegin},  // CLOSING_BRACKET
+                                                        {ErrorBegin},  // QUOTE
+                                                        {ErrorBegin},  // ESCAPE
+                                                        {ErrorBegin},  // COMMA
+                                                        {ErrorBegin},  // COLON
+                                                        {ErrorBegin},  // WHITE_SPACE
+                                                        {ErrorBegin},  // LINE_BREAK
+                                                        {ErrorBegin},  // OTHER
                                                         /*LIST*/
-                                                        {token_t::ErrorBegin},  // OPENING_BRACE
-                                                        {token_t::ErrorBegin},  // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},  // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},  // QUOTE
-                                                        {token_t::ErrorBegin},  // ESCAPE
-                                                        {token_t::ErrorBegin},  // COMMA
-                                                        {token_t::ErrorBegin},  // COLON
-                                                        {token_t::ErrorBegin},  // WHITE_SPACE
-                                                        {token_t::ErrorBegin},  // LINE_BREAK
-                                                        {token_t::ErrorBegin},  // OTHER
+                                                        {ErrorBegin},  // OPENING_BRACE
+                                                        {ErrorBegin},  // OPENING_BRACKET
+                                                        {ErrorBegin},  // CLOSING_BRACE
+                                                        {ErrorBegin},  // CLOSING_BRACKET
+                                                        {ErrorBegin},  // QUOTE
+                                                        {ErrorBegin},  // ESCAPE
+                                                        {ErrorBegin},  // COMMA
+                                                        {ErrorBegin},  // COLON
+                                                        {ErrorBegin},  // WHITE_SPACE
+                                                        {ErrorBegin},  // LINE_BREAK
+                                                        {ErrorBegin},  // OTHER
                                                         /*STRUCT*/
-                                                        {token_t::ErrorBegin},    // OPENING_BRACE
-                                                        {token_t::ErrorBegin},    // OPENING_BRACKET
-                                                        {token_t::ErrorBegin},    // CLOSING_BRACE
-                                                        {token_t::ErrorBegin},    // CLOSING_BRACKET
-                                                        {token_t::ErrorBegin},    // QUOTE
-                                                        {token_t::ErrorBegin},    // ESCAPE
-                                                        {token_t::ErrorBegin},    // COMMA
-                                                        {},                       // COLON
-                                                        {},                       // WHITE_SPACE
-                                                        {},                       // LINE_BREAK
-                                                        {token_t::ErrorBegin}}};  // OTHER
+                                                        {ErrorBegin},    // OPENING_BRACE
+                                                        {ErrorBegin},    // OPENING_BRACKET
+                                                        {ErrorBegin},    // CLOSING_BRACE
+                                                        {ErrorBegin},    // CLOSING_BRACKET
+                                                        {ErrorBegin},    // QUOTE
+                                                        {ErrorBegin},    // ESCAPE
+                                                        {ErrorBegin},    // COMMA
+                                                        {},              // COLON
+                                                        {},              // WHITE_SPACE
+                                                        {},              // LINE_BREAK
+                                                        {ErrorBegin}}};  // OTHER
 
   pda_tlt[static_cast<StateT>(pda_state_t::PD_ERR)] = {{     /*ROOT*/
                                                         {},  // OPENING_BRACE

From 6efecf49a2daf534e283c3f32694fadb5be68a9e Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Thu, 25 Aug 2022 06:27:16 -0700
Subject: [PATCH 38/40] default_stream and more constness

---
 cpp/tests/io/json_test.cpp        |   4 +-
 cpp/tests/io/nested_json_test.cpp | 129 ++++++++++++++----------------
 2 files changed, 61 insertions(+), 72 deletions(-)

diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 77efb0c4d76..232aaa51ef3 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -944,7 +944,7 @@ TEST_F(JsonReaderTest, JsonExperimentalBasic)
 
 TEST_F(JsonReaderTest, JsonExperimentalLines)
 {
-  std::string json_string =
+  std::string const json_string =
     R"({"a":"a0"}
     {"a":"a1"}
     {"a":"a2", "b":"b2"}
@@ -971,7 +971,7 @@ TEST_F(JsonReaderTest, JsonExperimentalLines)
 
 TEST_F(JsonReaderTest, ExperimentalLinesNoOmissions)
 {
-  std::vector<std::string> json_inputs =
+  std::array<std::string const, 4> const json_inputs
     // single column
     {R"({"a":"a0"}
     {"a":"a1"}
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index 7ba7e0a4a03..f0ececaf4eb 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -21,6 +21,7 @@
 #include <cudf/io/json.hpp>
 #include <cudf/io/parquet.hpp>
 #include <cudf/lists/lists_column_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
@@ -29,9 +30,6 @@
 #include <cudf_test/io_metadata_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
 
-#include <rmm/cuda_stream.hpp>
-#include <rmm/cuda_stream_view.hpp>
-
 #include <string>
 
 namespace cuio_json = cudf::io::json;
@@ -139,28 +137,27 @@ TEST_F(JsonTest, StackContext)
   using StackSymbolT = char;
 
   // Prepare cuda stream for data transfers & kernels
-  rmm::cuda_stream stream{};
-  rmm::cuda_stream_view stream_view(stream);
+  constexpr auto stream = cudf::default_stream_value;
 
   // Test input
-  std::string input = R"(  [{)"
-                      R"("category": "reference",)"
-                      R"("index:": [4,12,42],)"
-                      R"("author": "Nigel Rees",)"
-                      R"("title": "[Sayings of the Century]",)"
-                      R"("price": 8.95)"
-                      R"(},  )"
-                      R"({)"
-                      R"("category": "reference",)"
-                      R"("index": [4,{},null,{"a":[{ }, {}] } ],)"
-                      R"("author": "Nigel Rees",)"
-                      R"("title": "{}\\\"[], <=semantic-symbols-string\\\\",)"
-                      R"("price": 8.95)"
-                      R"(}] )";
+  std::string const input = R"(  [{)"
+                            R"("category": "reference",)"
+                            R"("index:": [4,12,42],)"
+                            R"("author": "Nigel Rees",)"
+                            R"("title": "[Sayings of the Century]",)"
+                            R"("price": 8.95)"
+                            R"(},  )"
+                            R"({)"
+                            R"("category": "reference",)"
+                            R"("index": [4,{},null,{"a":[{ }, {}] } ],)"
+                            R"("author": "Nigel Rees",)"
+                            R"("title": "{}\\\"[], <=semantic-symbols-string\\\\",)"
+                            R"("price": 8.95)"
+                            R"(}] )";
 
   // Prepare input & output buffers
-  rmm::device_uvector<SymbolT> d_input(input.size(), stream_view);
-  hostdevice_vector<StackSymbolT> stack_context(input.size(), stream_view);
+  rmm::device_uvector<SymbolT> d_input(input.size(), stream);
+  hostdevice_vector<StackSymbolT> stack_context(input.size(), stream);
 
   ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_input.data(),
                                         input.data(),
@@ -169,13 +166,13 @@ TEST_F(JsonTest, StackContext)
                                         stream.value()));
 
   // Run algorithm
-  cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stream_view);
+  cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stream);
 
   // Copy back the results
-  stack_context.device_to_host(stream_view);
+  stack_context.device_to_host(stream);
 
   // Make sure we copied back the stack context
-  stream_view.synchronize();
+  stream.synchronize();
 
   std::vector<char> golden_stack_context{
     '_', '_', '_', '[', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{',
@@ -205,15 +202,14 @@ TEST_F(JsonTest, StackContextUtf8)
   using StackSymbolT = char;
 
   // Prepare cuda stream for data transfers & kernels
-  rmm::cuda_stream stream{};
-  rmm::cuda_stream_view stream_view(stream);
+  constexpr auto stream = cudf::default_stream_value;
 
   // Test input
-  std::string input = R"([{"a":{"year":1882,"author": "Bharathi"}, {"a":"filip ʒakotɛ"}}])";
+  std::string const input = R"([{"a":{"year":1882,"author": "Bharathi"}, {"a":"filip ʒakotɛ"}}])";
 
   // Prepare input & output buffers
-  rmm::device_uvector<SymbolT> d_input(input.size(), stream_view);
-  hostdevice_vector<StackSymbolT> stack_context(input.size(), stream_view);
+  rmm::device_uvector<SymbolT> d_input(input.size(), stream);
+  hostdevice_vector<StackSymbolT> stack_context(input.size(), stream);
 
   ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_input.data(),
                                         input.data(),
@@ -222,13 +218,13 @@ TEST_F(JsonTest, StackContextUtf8)
                                         stream.value()));
 
   // Run algorithm
-  cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stream_view);
+  cuio_json::detail::get_stack_context(d_input, stack_context.device_ptr(), stream);
 
   // Copy back the results
-  stack_context.device_to_host(stream_view);
+  stack_context.device_to_host(stream);
 
   // Make sure we copied back the stack context
-  stream_view.synchronize();
+  stream.synchronize();
 
   std::vector<char> golden_stack_context{
     '_', '[', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{', '{',
@@ -247,30 +243,29 @@ TEST_F(JsonTest, TokenStream)
   using cuio_json::SymbolT;
 
   // Prepare cuda stream for data transfers & kernels
-  rmm::cuda_stream stream{};
-  rmm::cuda_stream_view stream_view(stream);
+  constexpr auto stream = cudf::default_stream_value;
 
   // Default parsing options
   cudf::io::json_reader_options default_options{};
 
   // Test input
-  std::string input = R"(  [{)"
-                      R"("category": "reference",)"
-                      R"("index:": [4,12,42],)"
-                      R"("author": "Nigel Rees",)"
-                      R"("title": "[Sayings of the Century]",)"
-                      R"("price": 8.95)"
-                      R"(},  )"
-                      R"({)"
-                      R"("category": "reference",)"
-                      R"("index": [4,{},null,{"a":[{ }, {}] } ],)"
-                      R"("author": "Nigel Rees",)"
-                      R"("title": "{}[], <=semantic-symbols-string",)"
-                      R"("price": 8.95)"
-                      R"(}] )";
+  std::string const input = R"(  [{)"
+                            R"("category": "reference",)"
+                            R"("index:": [4,12,42],)"
+                            R"("author": "Nigel Rees",)"
+                            R"("title": "[Sayings of the Century]",)"
+                            R"("price": 8.95)"
+                            R"(},  )"
+                            R"({)"
+                            R"("category": "reference",)"
+                            R"("index": [4,{},null,{"a":[{ }, {}] } ],)"
+                            R"("author": "Nigel Rees",)"
+                            R"("title": "{}[], <=semantic-symbols-string",)"
+                            R"("price": 8.95)"
+                            R"(}] )";
 
   // Prepare input & output buffers
-  rmm::device_uvector<SymbolT> d_input(input.size(), stream_view);
+  rmm::device_uvector<SymbolT> d_input(input.size(), stream);
 
   ASSERT_CUDA_SUCCEEDED(cudaMemcpyAsync(d_input.data(),
                                         input.data(),
@@ -280,7 +275,7 @@ TEST_F(JsonTest, TokenStream)
 
   // Parse the JSON and get the token stream
   const auto [d_tokens_gpu, d_token_indices_gpu] =
-    cuio_json::detail::get_token_stream(d_input, default_options, stream_view);
+    cuio_json::detail::get_token_stream(d_input, default_options, stream);
 
   // Copy back the number of tokens that were written
   thrust::host_vector<PdaTokenT> tokens_gpu =
@@ -289,7 +284,7 @@ TEST_F(JsonTest, TokenStream)
     cudf::detail::make_host_vector_async(d_token_indices_gpu, stream);
 
   // Make sure we copied back all relevant data
-  stream_view.synchronize();
+  stream.synchronize();
 
   // Golden token stream sample
   using token_t = cuio_json::token_t;
@@ -336,16 +331,15 @@ TEST_F(JsonTest, ExtractColumn)
   using cuio_json::SymbolT;
 
   // Prepare cuda stream for data transfers & kernels
-  rmm::cuda_stream stream{};
-  rmm::cuda_stream_view stream_view(stream);
+  constexpr auto stream = cudf::default_stream_value;
 
   // Default parsing options
   cudf::io::json_reader_options default_options{};
 
-  std::string input = R"( [{"a":0.0, "b":1.0}, {"a":0.1, "b":1.1}, {"a":0.2, "b":1.2}] )";
+  std::string const input = R"( [{"a":0.0, "b":1.0}, {"a":0.1, "b":1.1}, {"a":0.2, "b":1.2}] )";
   // Get the JSON's tree representation
   auto const cudf_table = cuio_json::detail::parse_nested_json(
-    cudf::host_span<SymbolT const>{input.data(), input.size()}, default_options, stream_view);
+    cudf::host_span<SymbolT const>{input.data(), input.size()}, default_options, stream);
 
   auto const expected_col_count  = 2;
   auto const first_column_index  = 0;
@@ -363,14 +357,13 @@ TEST_F(JsonTest, ExtractColumn)
 TEST_F(JsonTest, UTF_JSON)
 {
   // Prepare cuda stream for data transfers & kernels
-  rmm::cuda_stream stream{};
-  rmm::cuda_stream_view stream_view(stream);
+  constexpr auto stream = cudf::default_stream_value;
 
   // Default parsing options
   cudf::io::json_reader_options default_options{};
 
   // Only ASCII string
-  std::string ascii_pass = R"([
+  std::string const ascii_pass = R"([
   {"a":1,"b":2,"c":[3], "d": {}},
   {"a":1,"b":4.0,"c":[], "d": {"year":1882,"author": "Bharathi"}},
   {"a":1,"b":6.0,"c":[5, 7], "d": null},
@@ -378,22 +371,20 @@ TEST_F(JsonTest, UTF_JSON)
   {"a":1,"b":null,"c":null},
   {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "Kaniyan"}}])";
 
-  CUDF_EXPECT_NO_THROW(
-    cuio_json::detail::parse_nested_json(ascii_pass, default_options, stream_view));
+  CUDF_EXPECT_NO_THROW(cuio_json::detail::parse_nested_json(ascii_pass, default_options, stream));
 
   // utf-8 string that fails parsing.
-  std::string utf_failed = R"([
+  std::string const utf_failed = R"([
   {"a":1,"b":2,"c":[3], "d": {}},
   {"a":1,"b":4.0,"c":[], "d": {"year":1882,"author": "Bharathi"}},
   {"a":1,"b":6.0,"c":[5, 7], "d": null},
   {"a":1,"b":8.0,"c":null, "d": {}},
   {"a":1,"b":null,"c":null},
   {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "filip ʒakotɛ"}}])";
-  CUDF_EXPECT_NO_THROW(
-    cuio_json::detail::parse_nested_json(utf_failed, default_options, stream_view));
+  CUDF_EXPECT_NO_THROW(cuio_json::detail::parse_nested_json(utf_failed, default_options, stream));
 
   // utf-8 string that passes parsing.
-  std::string utf_pass = R"([
+  std::string const utf_pass = R"([
   {"a":1,"b":2,"c":[3], "d": {}},
   {"a":1,"b":4.0,"c":[], "d": {"year":1882,"author": "Bharathi"}},
   {"a":1,"b":6.0,"c":[5, 7], "d": null},
@@ -401,20 +392,18 @@ TEST_F(JsonTest, UTF_JSON)
   {"a":1,"b":null,"c":null},
   {"a":1,"b":Infinity,"c":[null], "d": {"year":-600,"author": "Kaniyan"}},
   {"a":1,"b":NaN,"c":[null, null], "d": {"year": 2, "author": "filip ʒakotɛ"}}])";
-  CUDF_EXPECT_NO_THROW(
-    cuio_json::detail::parse_nested_json(utf_pass, default_options, stream_view));
+  CUDF_EXPECT_NO_THROW(cuio_json::detail::parse_nested_json(utf_pass, default_options, stream));
 }
 
 TEST_F(JsonTest, FromParquet)
 {
   using cuio_json::SymbolT;
 
-  std::string input =
+  std::string const input =
     R"([{"0":{},"1":[],"2":{}},{"1":[[""],[]],"2":{"2":""}},{"0":{"a":"1"},"2":{"0":"W&RR=+I","1":""}}])";
 
   // Prepare cuda stream for data transfers & kernels
-  rmm::cuda_stream stream{};
-  rmm::cuda_stream_view stream_view(stream);
+  constexpr auto stream = cudf::default_stream_value;
 
   // Default parsing options
   cudf::io::json_reader_options default_options{};
@@ -505,7 +494,7 @@ TEST_F(JsonTest, FromParquet)
 
   // Read in the data via the JSON parser
   auto const cudf_table = cuio_json::detail::parse_nested_json(
-    cudf::host_span<SymbolT const>{input.data(), input.size()}, default_options, stream_view);
+    cudf::host_span<SymbolT const>{input.data(), input.size()}, default_options, stream);
 
   // Verify that the data read via parquet matches the data read via JSON
   CUDF_TEST_EXPECT_TABLES_EQUAL(cudf_table.tbl->view(), result.tbl->view());

From 272bc164fd5a84a301c497ad3bb1d681d05a6a7e Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Thu, 25 Aug 2022 06:42:58 -0700
Subject: [PATCH 39/40] add TODO for stack ctx interface

---
 cpp/src/io/json/nested_json.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 8fa4d82a499..4e930f86591 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -262,6 +262,8 @@ enum token_t : PdaTokenT {
 };
 
 namespace detail {
+
+// TODO: return device_uvector instead of passing pre-allocated memory
 /**
  * @brief Identifies the stack context for each character from a JSON input. Specifically, we
  * identify brackets and braces outside of quoted fields (e.g., field names, strings).

From 9822ecb01f0eaa5c88fd5638f7b235dd16c3707a Mon Sep 17 00:00:00 2001
From: Elias Stehle <3958403+elstehle@users.noreply.github.com>
Date: Thu, 25 Aug 2022 06:54:41 -0700
Subject: [PATCH 40/40] clarifies treatment of empty lines for ndjson

---
 cpp/src/io/json/nested_json_gpu.cu | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index fe5f00318b9..3bc7bd89692 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -415,6 +415,8 @@ auto get_transition_table(bool newline_delimited_json)
   static_assert(static_cast<PdaStackSymbolGroupIdT>(stack_symbol_group_id::STACK_LIST) == 1);
   static_assert(static_cast<PdaStackSymbolGroupIdT>(stack_symbol_group_id::STACK_STRUCT) == 2);
 
+  // In case of newline-delimited JSON, multiple newlines are ignored, similar to whitespace.
+  // Thas is, empty lines are ignored
   auto const PD_ANL = newline_delimited_json ? PD_BOV : PD_PVL;
   std::array<std::array<pda_state_t, NUM_PDA_SGIDS>, PD_NUM_STATES> pda_tt;
   //  {       [       }       ]       "       \       ,       :     space   newline other