rapidsai · GPUtester · Sep 27, 2022 · Sep 27, 2022 · Sep 27, 2022 · Sep 27, 2022
@@ -327,6 +327,7 @@ add_library(
   src/io/csv/reader_impl.cu
   src/io/csv/writer_impl.cu
   src/io/functions.cpp
+  src/io/json/json_column.cu
   src/io/json/json_gpu.cu
   src/io/json/json_tree.cu
   src/io/json/nested_json_gpu.cu
@@ -354,6 +355,7 @@ add_library(
   src/io/statistics/parquet_column_statistics.cu
   src/io/text/byte_range_info.cpp
   src/io/text/data_chunk_source_factories.cpp
+  src/io/text/bgzip_data_chunk_source.cu
   src/io/text/multibyte_split.cu
   src/io/utilities/column_buffer.cpp
   src/io/utilities/config_utils.cpp

@@ -76,7 +76,8 @@ void BM_NESTED_JSON(nvbench::state& state)
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     // Allocate device-side temporary storage & run algorithm
-    cudf::io::json::detail::parse_nested_json(input, default_options, cudf::default_stream_value);
+    cudf::io::json::detail::device_parse_nested_json(
+      input, default_options, cudf::default_stream_value);
   });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");

@@ -19,6 +19,7 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/strings/detail/utf8.hpp>
 #include <cudf/types.hpp>
 
@@ -304,6 +305,7 @@ std::unique_ptr<column> parse_data(str_tuple_it str_tuples,
                                    rmm::cuda_stream_view stream,
                                    rmm::mr::device_memory_resource* mr)
 {
+  CUDF_FUNC_RANGE();
   if (col_type == cudf::data_type{cudf::type_id::STRING}) {
     rmm::device_uvector<size_type> offsets(col_size + 1, stream);
 

@@ -27,16 +27,51 @@ namespace cudf::io::text {
 
 /**
  * @brief Creates a data source capable of producing device-buffered views of the given string.
+ * @param data the host data to be exposed as a data chunk source. Its lifetime must be at least as
+ *             long as the lifetime of the returned data_chunk_source.
+ * @return the data chunk source for the provided host data. It copies data from the host to the
+ *         device.
  */
 std::unique_ptr<data_chunk_source> make_source(host_span<const char> data);
 
 /**
  * @brief Creates a data source capable of producing device-buffered views of the file
+ * @param filename the filename of the file to be exposed as a data chunk source.
+ * @return the data chunk source for the provided filename. It reads data from the file and copies
+ *         it to the device.
  */
-std::unique_ptr<data_chunk_source> make_source_from_file(std::string const& filename);
+std::unique_ptr<data_chunk_source> make_source_from_file(std::string_view filename);
+
+/**
+ * @brief Creates a data source capable of producing device-buffered views of a BGZIP compressed
+ *        file.
+ * @param filename the filename of the BGZIP-compressed file to be exposed as a data chunk source.
+ * @return the data chunk source for the provided filename. It reads data from the file and copies
+ *         it to the device, where it will be decompressed.
+ */
+std::unique_ptr<data_chunk_source> make_source_from_bgzip_file(std::string_view filename);
+
+/**
+ * @brief Creates a data source capable of producing device-buffered views of a BGZIP compressed
+ *        file with virtual record offsets.
+ * @param filename the filename of the BGZIP-compressed file to be exposed as a data chunk source.
+ * @param virtual_begin the virtual (Tabix) offset of the first byte to be read. Its upper 48 bits
+ *                      describe the offset into the compressed file, its lower 16 bits describe the
+ *                      block-local offset.
+ * @param virtual_end the virtual (Tabix) offset one past the last byte to be read.
+ * @return the data chunk source for the provided filename. It reads data from the file and copies
+ *         it to the device, where it will be decompressed. The chunk source only returns data
+ *         between the virtual offsets `virtual_begin` and `virtual_end`.
+ */
+std::unique_ptr<data_chunk_source> make_source_from_bgzip_file(std::string_view filename,
+                                                               uint64_t virtual_begin,
+                                                               uint64_t virtual_end);
 
 /**
  * @brief Creates a data source capable of producing views of the given device string scalar
+ * @param data the device data to be exposed as a data chunk source. Its lifetime must be at least
+ *             as long as the lifetime of the returned data_chunk_source.
+ * @return the data chunk source for the provided host data. It does not create any copies.
  */
 std::unique_ptr<data_chunk_source> make_source(cudf::string_scalar& data);
 

@@ -68,7 +68,8 @@ namespace cudf::io::nvcomp {
 
 // Dispatcher for nvcompBatched<format>DecompressGetTempSizeEx
 template <typename... Args>
-nvcompStatus_t batched_decompress_get_temp_size_ex(compression_type compression, Args&&... args)
+std::optional<nvcompStatus_t> batched_decompress_get_temp_size_ex(compression_type compression,
+                                                                  Args&&... args)
 {
 #if NVCOMP_HAS_TEMPSIZE_EX
   switch (compression) {
@@ -78,13 +79,13 @@ nvcompStatus_t batched_decompress_get_temp_size_ex(compression_type compression,
 #if NVCOMP_HAS_ZSTD_DECOMP
       return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward<Args>(args)...);
 #else
-      CUDF_FAIL("Unsupported compression type");
+      return std::nullopt;
 #endif
     case compression_type::DEFLATE: [[fallthrough]];
-    default: CUDF_FAIL("Unsupported compression type");
+    default: return std::nullopt;
   }
 #endif
-  CUDF_FAIL("GetTempSizeEx is not supported in the current nvCOMP version");
+  return std::nullopt;
 }
 
 // Dispatcher for nvcompBatched<format>DecompressGetTempSize
@@ -138,16 +139,12 @@ size_t batched_decompress_temp_size(compression_type compression,
                                     size_t max_uncomp_chunk_size,
                                     size_t max_total_uncomp_size)
 {
-  size_t temp_size         = 0;
-  auto const nvcomp_status = [&]() {
-    try {
-      return batched_decompress_get_temp_size_ex(
-        compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size);
-    } catch (cudf::logic_error const& err) {
-      return batched_decompress_get_temp_size(
-        compression, num_chunks, max_uncomp_chunk_size, &temp_size);
-    }
-  }();
+  size_t temp_size = 0;
+  auto const nvcomp_status =
+    batched_decompress_get_temp_size_ex(
+      compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size)
+      .value_or(batched_decompress_get_temp_size(
+        compression, num_chunks, max_uncomp_chunk_size, &temp_size));
 
   CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess,
                "Unable to get scratch size for decompression");

@@ -211,8 +211,8 @@ struct DispatchFSM : DeviceFSMPolicy {
     if (CubDebug(error = dfa_simulation_config.Init<PolicyT>(dfa_kernel))) return error;
 
     // Kernel invocation
-    uint32_t grid_size =
-      CUB_QUOTIENT_CEILING(num_chars, PolicyT::BLOCK_THREADS * PolicyT::ITEMS_PER_THREAD);
+    uint32_t grid_size = std::max(
+      1u, CUB_QUOTIENT_CEILING(num_chars, PolicyT::BLOCK_THREADS * PolicyT::ITEMS_PER_THREAD));
     uint32_t block_threads = dfa_simulation_config.block_threads;
 
     dfa_kernel<<<grid_size, block_threads, 0, stream>>>(dfa,
@@ -348,7 +348,7 @@ struct DispatchFSM : DeviceFSMPolicy {
       NUM_SYMBOLS_PER_BLOCK = BLOCK_THREADS * SYMBOLS_PER_THREAD
     };
 
-    BlockOffsetT num_blocks = CUB_QUOTIENT_CEILING(num_chars, NUM_SYMBOLS_PER_BLOCK);
+    BlockOffsetT num_blocks = std::max(1u, CUB_QUOTIENT_CEILING(num_chars, NUM_SYMBOLS_PER_BLOCK));
     size_t num_threads      = num_blocks * BLOCK_THREADS;
 
     //------------------------------------------------------------------------------

@@ -53,7 +53,14 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
   auto const buffer = ingest_raw_input(sources, reader_opts.get_compression());
   auto data = host_span<char const>(reinterpret_cast<char const*>(buffer.data()), buffer.size());
 
-  return cudf::io::json::detail::parse_nested_json(data, reader_opts, stream, mr);
+  try {
+    return cudf::io::json::detail::device_parse_nested_json(data, reader_opts, stream, mr);
+  } catch (cudf::logic_error const& err) {
+#ifdef NJP_DEBUG_PRINT
+    std::cout << "Fall back to host nested json parser" << std::endl;
+#endif
+    return cudf::io::json::detail::host_parse_nested_json(data, reader_opts, stream, mr);
+  }
 }
 
 }  // namespace cudf::io::detail::json::experimental