Skip to content

Commit

Permalink
Merge pull request #5 from rapidsai/branch-22.12
Browse files Browse the repository at this point in the history
merge branch-22.12
  • Loading branch information
etseidl authored Sep 28, 2022
2 parents 6e7eb8d + b8ab576 commit d3378fc
Show file tree
Hide file tree
Showing 63 changed files with 2,446 additions and 323 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# cuDF 22.12.00 (Date TBD)

Please see https://github.com/rapidsai/cudf/releases/tag/v22.12.00a for the latest changes to this development branch.

# cuDF 22.10.00 (Date TBD)

Please see https://github.com/rapidsai/cudf/releases/tag/v22.10.00a for the latest changes to this development branch.
Expand Down
2 changes: 1 addition & 1 deletion ci/checks/style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ LANG=C.UTF-8
. /opt/conda/etc/profile.d/conda.sh
conda activate rapids

FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/cmake-format-rapids-cmake.json
FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/cmake-format-rapids-cmake.json
export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
Expand Down
2 changes: 1 addition & 1 deletion ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ unset GIT_DESCRIBE_TAG
export INSTALL_DASK_MAIN=1

# ucx-py version
export UCX_PY_VERSION='0.28.*'
export UCX_PY_VERSION='0.29.*'

################################################################################
# TRAP - Setup trap for removing jitify cache
Expand Down
2 changes: 1 addition & 1 deletion ci/gpu/java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`

# ucx-py version
export UCX_PY_VERSION='0.28.*'
export UCX_PY_VERSION='0.29.*'

################################################################################
# TRAP - Setup trap for removing jitify cache
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies:
- clang=11.1.0
- clang-tools=11.1.0
- cupy>=9.5.0,<12.0.0a0
- rmm=22.10.*
- rmm=22.12.*
- cmake>=3.20.1,!=3.23.0
- cmake_setuptools>=0.1.3
- scikit-build>=0.13.1
Expand Down Expand Up @@ -62,7 +62,7 @@ dependencies:
- sphinx-autobuild
- myst-nb
- scipy
- dask-cuda=22.10.*
- dask-cuda=22.12.*
- mimesis<4.1
- packaging
- protobuf>=3.20.1,<3.21.0a0
Expand Down
4 changes: 3 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ rapids_cuda_init_architectures(CUDF)

project(
CUDF
VERSION 22.10.00
VERSION 22.12.00
LANGUAGES C CXX CUDA
)
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5)
Expand Down Expand Up @@ -327,6 +327,7 @@ add_library(
src/io/csv/reader_impl.cu
src/io/csv/writer_impl.cu
src/io/functions.cpp
src/io/json/json_column.cu
src/io/json/json_gpu.cu
src/io/json/json_tree.cu
src/io/json/nested_json_gpu.cu
Expand Down Expand Up @@ -354,6 +355,7 @@ add_library(
src/io/statistics/parquet_column_statistics.cu
src/io/text/byte_range_info.cpp
src/io/text/data_chunk_source_factories.cpp
src/io/text/bgzip_data_chunk_source.cu
src/io/text/multibyte_split.cu
src/io/utilities/column_buffer.cpp
src/io/utilities/config_utils.cpp
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/io/json/nested_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ void BM_NESTED_JSON(nvbench::state& state)
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
// Allocate device-side temporary storage & run algorithm
cudf::io::json::detail::parse_nested_json(input, default_options, cudf::default_stream_value);
cudf::io::json::detail::device_parse_nested_json(
input, default_options, cudf::default_stream_value);
});

auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
Expand Down
4 changes: 2 additions & 2 deletions cpp/doxygen/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ PROJECT_NAME = "libcudf"
# could be handy for archiving the generated documentation or if some version
# control system is used.

PROJECT_NUMBER = 22.10.00
PROJECT_NUMBER = 22.12.00

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
Expand Down Expand Up @@ -2162,7 +2162,7 @@ SKIP_FUNCTION_MACROS = YES
# the path). If a tag file is not located in the directory in which doxygen is
# run, you must also specify the path to the tagfile here.

TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/22.10
TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/22.12

# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
# tag file that is based on the input files it reads. See section "Linking to
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/basic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ file(
)
include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)

set(CUDF_TAG branch-22.10)
set(CUDF_TAG branch-22.12)
CPMFindPackage(
NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf
GIT_TAG ${CUDF_TAG}
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cudf/io/detail/data_casting.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cudf/column/column.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/detail/utf8.hpp>
#include <cudf/types.hpp>

Expand Down Expand Up @@ -304,6 +305,7 @@ std::unique_ptr<column> parse_data(str_tuple_it str_tuples,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
if (col_type == cudf::data_type{cudf::type_id::STRING}) {
rmm::device_uvector<size_type> offsets(col_size + 1, stream);

Expand Down
37 changes: 36 additions & 1 deletion cpp/include/cudf/io/text/data_chunk_source_factories.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,51 @@ namespace cudf::io::text {

/**
* @brief Creates a data source capable of producing device-buffered views of the given string.
* @param data the host data to be exposed as a data chunk source. Its lifetime must be at least as
* long as the lifetime of the returned data_chunk_source.
* @return the data chunk source for the provided host data. It copies data from the host to the
* device.
*/
std::unique_ptr<data_chunk_source> make_source(host_span<const char> data);

/**
* @brief Creates a data source capable of producing device-buffered views of the file
* @param filename the filename of the file to be exposed as a data chunk source.
* @return the data chunk source for the provided filename. It reads data from the file and copies
* it to the device.
*/
std::unique_ptr<data_chunk_source> make_source_from_file(std::string const& filename);
std::unique_ptr<data_chunk_source> make_source_from_file(std::string_view filename);

/**
* @brief Creates a data source capable of producing device-buffered views of a BGZIP compressed
* file.
* @param filename the filename of the BGZIP-compressed file to be exposed as a data chunk source.
* @return the data chunk source for the provided filename. It reads data from the file and copies
* it to the device, where it will be decompressed.
*/
std::unique_ptr<data_chunk_source> make_source_from_bgzip_file(std::string_view filename);

/**
* @brief Creates a data source capable of producing device-buffered views of a BGZIP compressed
* file with virtual record offsets.
* @param filename the filename of the BGZIP-compressed file to be exposed as a data chunk source.
* @param virtual_begin the virtual (Tabix) offset of the first byte to be read. Its upper 48 bits
* describe the offset into the compressed file, its lower 16 bits describe the
* block-local offset.
* @param virtual_end the virtual (Tabix) offset one past the last byte to be read.
* @return the data chunk source for the provided filename. It reads data from the file and copies
* it to the device, where it will be decompressed. The chunk source only returns data
* between the virtual offsets `virtual_begin` and `virtual_end`.
*/
std::unique_ptr<data_chunk_source> make_source_from_bgzip_file(std::string_view filename,
uint64_t virtual_begin,
uint64_t virtual_end);

/**
* @brief Creates a data source capable of producing views of the given device string scalar
* @param data the device data to be exposed as a data chunk source. Its lifetime must be at least
* as long as the lifetime of the returned data_chunk_source.
* @return the data chunk source for the provided host data. It does not create any copies.
*/
std::unique_ptr<data_chunk_source> make_source(cudf::string_scalar& data);

Expand Down
2 changes: 1 addition & 1 deletion cpp/libcudf_kafka/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ include(rapids-find)

project(
CUDA_KAFKA
VERSION 22.10.00
VERSION 22.12.00
LANGUAGES CXX
)

Expand Down
25 changes: 11 additions & 14 deletions cpp/src/io/comp/nvcomp_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ namespace cudf::io::nvcomp {

// Dispatcher for nvcompBatched<format>DecompressGetTempSizeEx
template <typename... Args>
nvcompStatus_t batched_decompress_get_temp_size_ex(compression_type compression, Args&&... args)
std::optional<nvcompStatus_t> batched_decompress_get_temp_size_ex(compression_type compression,
Args&&... args)
{
#if NVCOMP_HAS_TEMPSIZE_EX
switch (compression) {
Expand All @@ -78,13 +79,13 @@ nvcompStatus_t batched_decompress_get_temp_size_ex(compression_type compression,
#if NVCOMP_HAS_ZSTD_DECOMP
return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward<Args>(args)...);
#else
CUDF_FAIL("Unsupported compression type");
return std::nullopt;
#endif
case compression_type::DEFLATE: [[fallthrough]];
default: CUDF_FAIL("Unsupported compression type");
default: return std::nullopt;
}
#endif
CUDF_FAIL("GetTempSizeEx is not supported in the current nvCOMP version");
return std::nullopt;
}

// Dispatcher for nvcompBatched<format>DecompressGetTempSize
Expand Down Expand Up @@ -138,16 +139,12 @@ size_t batched_decompress_temp_size(compression_type compression,
size_t max_uncomp_chunk_size,
size_t max_total_uncomp_size)
{
size_t temp_size = 0;
auto const nvcomp_status = [&]() {
try {
return batched_decompress_get_temp_size_ex(
compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size);
} catch (cudf::logic_error const& err) {
return batched_decompress_get_temp_size(
compression, num_chunks, max_uncomp_chunk_size, &temp_size);
}
}();
size_t temp_size = 0;
auto const nvcomp_status =
batched_decompress_get_temp_size_ex(
compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size)
.value_or(batched_decompress_get_temp_size(
compression, num_chunks, max_uncomp_chunk_size, &temp_size));

CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess,
"Unable to get scratch size for decompression");
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/fst/dispatch_dfa.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ struct DispatchFSM : DeviceFSMPolicy {
if (CubDebug(error = dfa_simulation_config.Init<PolicyT>(dfa_kernel))) return error;

// Kernel invocation
uint32_t grid_size =
CUB_QUOTIENT_CEILING(num_chars, PolicyT::BLOCK_THREADS * PolicyT::ITEMS_PER_THREAD);
uint32_t grid_size = std::max(
1u, CUB_QUOTIENT_CEILING(num_chars, PolicyT::BLOCK_THREADS * PolicyT::ITEMS_PER_THREAD));
uint32_t block_threads = dfa_simulation_config.block_threads;

dfa_kernel<<<grid_size, block_threads, 0, stream>>>(dfa,
Expand Down Expand Up @@ -348,7 +348,7 @@ struct DispatchFSM : DeviceFSMPolicy {
NUM_SYMBOLS_PER_BLOCK = BLOCK_THREADS * SYMBOLS_PER_THREAD
};

BlockOffsetT num_blocks = CUB_QUOTIENT_CEILING(num_chars, NUM_SYMBOLS_PER_BLOCK);
BlockOffsetT num_blocks = std::max(1u, CUB_QUOTIENT_CEILING(num_chars, NUM_SYMBOLS_PER_BLOCK));
size_t num_threads = num_blocks * BLOCK_THREADS;

//------------------------------------------------------------------------------
Expand Down
9 changes: 8 additions & 1 deletion cpp/src/io/json/experimental/read_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,14 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
auto const buffer = ingest_raw_input(sources, reader_opts.get_compression());
auto data = host_span<char const>(reinterpret_cast<char const*>(buffer.data()), buffer.size());

return cudf::io::json::detail::parse_nested_json(data, reader_opts, stream, mr);
try {
return cudf::io::json::detail::device_parse_nested_json(data, reader_opts, stream, mr);
} catch (cudf::logic_error const& err) {
#ifdef NJP_DEBUG_PRINT
std::cout << "Fall back to host nested json parser" << std::endl;
#endif
return cudf::io::json::detail::host_parse_nested_json(data, reader_opts, stream, mr);
}
}

} // namespace cudf::io::detail::json::experimental
Loading

0 comments on commit d3378fc

Please sign in to comment.