Merge pull request #5 from rapidsai/branch-22.12

merge branch-22.12
etseidl · Sep 28, 2022 · d3378fc · d3378fc
2 parents 6e7eb8d + b8ab576
commit d3378fc
Show file tree

Hide file tree

Showing 63 changed files with 2,446 additions and 323 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# cuDF 22.12.00 (Date TBD)
+
+Please see https://github.com/rapidsai/cudf/releases/tag/v22.12.00a for the latest changes to this development branch.
+
 # cuDF 22.10.00 (Date TBD)
 
 Please see https://github.com/rapidsai/cudf/releases/tag/v22.10.00a for the latest changes to this development branch.

diff --git a/ci/checks/style.sh b/ci/checks/style.sh
@@ -14,7 +14,7 @@ LANG=C.UTF-8
 . /opt/conda/etc/profile.d/conda.sh
 conda activate rapids
 
-FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/cmake-format-rapids-cmake.json
+FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/cmake-format-rapids-cmake.json
 export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
 mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
 wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -35,7 +35,7 @@ unset GIT_DESCRIBE_TAG
 export INSTALL_DASK_MAIN=1
 
 # ucx-py version
-export UCX_PY_VERSION='0.28.*'
+export UCX_PY_VERSION='0.29.*'
 
 ################################################################################
 # TRAP - Setup trap for removing jitify cache

diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh
@@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
 export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 
 # ucx-py version
-export UCX_PY_VERSION='0.28.*'
+export UCX_PY_VERSION='0.29.*'
 
 ################################################################################
 # TRAP - Setup trap for removing jitify cache

diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
@@ -13,7 +13,7 @@ dependencies:
   - clang=11.1.0
   - clang-tools=11.1.0
   - cupy>=9.5.0,<12.0.0a0
-  - rmm=22.10.*
+  - rmm=22.12.*
   - cmake>=3.20.1,!=3.23.0
   - cmake_setuptools>=0.1.3
   - scikit-build>=0.13.1
@@ -62,7 +62,7 @@ dependencies:
   - sphinx-autobuild
   - myst-nb
   - scipy
-  - dask-cuda=22.10.*
+  - dask-cuda=22.12.*
   - mimesis<4.1
   - packaging
   - protobuf>=3.20.1,<3.21.0a0

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -25,7 +25,7 @@ rapids_cuda_init_architectures(CUDF)
 
 project(
   CUDF
-  VERSION 22.10.00
+  VERSION 22.12.00
   LANGUAGES C CXX CUDA
 )
 if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5)
@@ -327,6 +327,7 @@ add_library(
   src/io/csv/reader_impl.cu
   src/io/csv/writer_impl.cu
   src/io/functions.cpp
+  src/io/json/json_column.cu
   src/io/json/json_gpu.cu
   src/io/json/json_tree.cu
   src/io/json/nested_json_gpu.cu
@@ -354,6 +355,7 @@ add_library(
   src/io/statistics/parquet_column_statistics.cu
   src/io/text/byte_range_info.cpp
   src/io/text/data_chunk_source_factories.cpp
+  src/io/text/bgzip_data_chunk_source.cu
   src/io/text/multibyte_split.cu
   src/io/utilities/column_buffer.cpp
   src/io/utilities/config_utils.cpp

diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp
@@ -76,7 +76,8 @@ void BM_NESTED_JSON(nvbench::state& state)
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     // Allocate device-side temporary storage & run algorithm
-    cudf::io::json::detail::parse_nested_json(input, default_options, cudf::default_stream_value);
+    cudf::io::json::detail::device_parse_nested_json(
+      input, default_options, cudf::default_stream_value);
   });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");

diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "libcudf"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 22.10.00
+PROJECT_NUMBER         = 22.12.00
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -2162,7 +2162,7 @@ SKIP_FUNCTION_MACROS   = YES
 # the path). If a tag file is not located in the directory in which doxygen is
 # run, you must also specify the path to the tagfile here.
 
-TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/22.10
+TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/22.12
 
 # When a file name is specified after GENERATE_TAGFILE, doxygen will create a
 # tag file that is based on the input files it reads. See section "Linking to

diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt
@@ -16,7 +16,7 @@ file(
 )
 include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)
 
-set(CUDF_TAG branch-22.10)
+set(CUDF_TAG branch-22.12)
 CPMFindPackage(
   NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf
   GIT_TAG ${CUDF_TAG}

diff --git a/cpp/include/cudf/io/detail/data_casting.cuh b/cpp/include/cudf/io/detail/data_casting.cuh
@@ -19,6 +19,7 @@
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/strings/detail/utf8.hpp>
 #include <cudf/types.hpp>
 
@@ -304,6 +305,7 @@ std::unique_ptr<column> parse_data(str_tuple_it str_tuples,
                                    rmm::cuda_stream_view stream,
                                    rmm::mr::device_memory_resource* mr)
 {
+  CUDF_FUNC_RANGE();
   if (col_type == cudf::data_type{cudf::type_id::STRING}) {
     rmm::device_uvector<size_type> offsets(col_size + 1, stream);
 

diff --git a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp
@@ -27,16 +27,51 @@ namespace cudf::io::text {
 
 /**
  * @brief Creates a data source capable of producing device-buffered views of the given string.
+ * @param data the host data to be exposed as a data chunk source. Its lifetime must be at least as
+ *             long as the lifetime of the returned data_chunk_source.
+ * @return the data chunk source for the provided host data. It copies data from the host to the
+ *         device.
  */
 std::unique_ptr<data_chunk_source> make_source(host_span<const char> data);
 
 /**
  * @brief Creates a data source capable of producing device-buffered views of the file
+ * @param filename the filename of the file to be exposed as a data chunk source.
+ * @return the data chunk source for the provided filename. It reads data from the file and copies
+ *         it to the device.
  */
-std::unique_ptr<data_chunk_source> make_source_from_file(std::string const& filename);
+std::unique_ptr<data_chunk_source> make_source_from_file(std::string_view filename);
+
+/**
+ * @brief Creates a data source capable of producing device-buffered views of a BGZIP compressed
+ *        file.
+ * @param filename the filename of the BGZIP-compressed file to be exposed as a data chunk source.
+ * @return the data chunk source for the provided filename. It reads data from the file and copies
+ *         it to the device, where it will be decompressed.
+ */
+std::unique_ptr<data_chunk_source> make_source_from_bgzip_file(std::string_view filename);
+
+/**
+ * @brief Creates a data source capable of producing device-buffered views of a BGZIP compressed
+ *        file with virtual record offsets.
+ * @param filename the filename of the BGZIP-compressed file to be exposed as a data chunk source.
+ * @param virtual_begin the virtual (Tabix) offset of the first byte to be read. Its upper 48 bits
+ *                      describe the offset into the compressed file, its lower 16 bits describe the
+ *                      block-local offset.
+ * @param virtual_end the virtual (Tabix) offset one past the last byte to be read.
+ * @return the data chunk source for the provided filename. It reads data from the file and copies
+ *         it to the device, where it will be decompressed. The chunk source only returns data
+ *         between the virtual offsets `virtual_begin` and `virtual_end`.
+ */
+std::unique_ptr<data_chunk_source> make_source_from_bgzip_file(std::string_view filename,
+                                                               uint64_t virtual_begin,
+                                                               uint64_t virtual_end);
 
 /**
  * @brief Creates a data source capable of producing views of the given device string scalar
+ * @param data the device data to be exposed as a data chunk source. Its lifetime must be at least
+ *             as long as the lifetime of the returned data_chunk_source.
+ * @return the data chunk source for the provided host data. It does not create any copies.
  */
 std::unique_ptr<data_chunk_source> make_source(cudf::string_scalar& data);
 

diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt
@@ -22,7 +22,7 @@ include(rapids-find)
 
 project(
   CUDA_KAFKA
-  VERSION 22.10.00
+  VERSION 22.12.00
   LANGUAGES CXX
 )
 

diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
@@ -68,7 +68,8 @@ namespace cudf::io::nvcomp {
 
 // Dispatcher for nvcompBatched<format>DecompressGetTempSizeEx
 template <typename... Args>
-nvcompStatus_t batched_decompress_get_temp_size_ex(compression_type compression, Args&&... args)
+std::optional<nvcompStatus_t> batched_decompress_get_temp_size_ex(compression_type compression,
+                                                                  Args&&... args)
 {
 #if NVCOMP_HAS_TEMPSIZE_EX
   switch (compression) {
@@ -78,13 +79,13 @@ nvcompStatus_t batched_decompress_get_temp_size_ex(compression_type compression,
 #if NVCOMP_HAS_ZSTD_DECOMP
       return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward<Args>(args)...);
 #else
-      CUDF_FAIL("Unsupported compression type");
+      return std::nullopt;
 #endif
     case compression_type::DEFLATE: [[fallthrough]];
-    default: CUDF_FAIL("Unsupported compression type");
+    default: return std::nullopt;
   }
 #endif
-  CUDF_FAIL("GetTempSizeEx is not supported in the current nvCOMP version");
+  return std::nullopt;
 }
 
 // Dispatcher for nvcompBatched<format>DecompressGetTempSize
@@ -138,16 +139,12 @@ size_t batched_decompress_temp_size(compression_type compression,
                                     size_t max_uncomp_chunk_size,
                                     size_t max_total_uncomp_size)
 {
-  size_t temp_size         = 0;
-  auto const nvcomp_status = [&]() {
-    try {
-      return batched_decompress_get_temp_size_ex(
-        compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size);
-    } catch (cudf::logic_error const& err) {
-      return batched_decompress_get_temp_size(
-        compression, num_chunks, max_uncomp_chunk_size, &temp_size);
-    }
-  }();
+  size_t temp_size = 0;
+  auto const nvcomp_status =
+    batched_decompress_get_temp_size_ex(
+      compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size)
+      .value_or(batched_decompress_get_temp_size(
+        compression, num_chunks, max_uncomp_chunk_size, &temp_size));
 
   CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess,
                "Unable to get scratch size for decompression");

diff --git a/cpp/src/io/fst/dispatch_dfa.cuh b/cpp/src/io/fst/dispatch_dfa.cuh
@@ -211,8 +211,8 @@ struct DispatchFSM : DeviceFSMPolicy {
     if (CubDebug(error = dfa_simulation_config.Init<PolicyT>(dfa_kernel))) return error;
 
     // Kernel invocation
-    uint32_t grid_size =
-      CUB_QUOTIENT_CEILING(num_chars, PolicyT::BLOCK_THREADS * PolicyT::ITEMS_PER_THREAD);
+    uint32_t grid_size = std::max(
+      1u, CUB_QUOTIENT_CEILING(num_chars, PolicyT::BLOCK_THREADS * PolicyT::ITEMS_PER_THREAD));
     uint32_t block_threads = dfa_simulation_config.block_threads;
 
     dfa_kernel<<<grid_size, block_threads, 0, stream>>>(dfa,
@@ -348,7 +348,7 @@ struct DispatchFSM : DeviceFSMPolicy {
       NUM_SYMBOLS_PER_BLOCK = BLOCK_THREADS * SYMBOLS_PER_THREAD
     };
 
-    BlockOffsetT num_blocks = CUB_QUOTIENT_CEILING(num_chars, NUM_SYMBOLS_PER_BLOCK);
+    BlockOffsetT num_blocks = std::max(1u, CUB_QUOTIENT_CEILING(num_chars, NUM_SYMBOLS_PER_BLOCK));
     size_t num_threads      = num_blocks * BLOCK_THREADS;
 
     //------------------------------------------------------------------------------

diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
@@ -53,7 +53,14 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
   auto const buffer = ingest_raw_input(sources, reader_opts.get_compression());
   auto data = host_span<char const>(reinterpret_cast<char const*>(buffer.data()), buffer.size());
 
-  return cudf::io::json::detail::parse_nested_json(data, reader_opts, stream, mr);
+  try {
+    return cudf::io::json::detail::device_parse_nested_json(data, reader_opts, stream, mr);
+  } catch (cudf::logic_error const& err) {
+#ifdef NJP_DEBUG_PRINT
+    std::cout << "Fall back to host nested json parser" << std::endl;
+#endif
+    return cudf::io::json::detail::host_parse_nested_json(data, reader_opts, stream, mr);
+  }
 }
 
 }  // namespace cudf::io::detail::json::experimental