Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge branch-22.12 #5

Merged
merged 24 commits into from
Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f3af699
DOC
raydouglass Sep 23, 2022
1a2e755
Merge pull request #11753 from rapidsai/branch-22.10
GPUtester Sep 23, 2022
ba9c43c
Merge pull request #11757 from rapidsai/branch-22.10
GPUtester Sep 23, 2022
7376f1f
Merge pull request #11758 from rapidsai/branch-22.10
GPUtester Sep 23, 2022
59847c1
Merge pull request #11763 from rapidsai/branch-22.10
GPUtester Sep 24, 2022
41474af
Merge pull request #11767 from rapidsai/branch-22.10
GPUtester Sep 26, 2022
5fb657d
Merge pull request #11773 from rapidsai/branch-22.10
GPUtester Sep 26, 2022
2b94483
Merge pull request #11774 from rapidsai/branch-22.10
GPUtester Sep 26, 2022
a945377
Add doc section for `list` & `struct` handling (#11770)
galipremsagar Sep 26, 2022
7d40b30
Merge pull request #11775 from rapidsai/branch-22.10
GPUtester Sep 26, 2022
11156cc
Fix issue with set-item incase of `list` and `struct` types (#11760)
galipremsagar Sep 26, 2022
a1cbb02
Merge pull request #11776 from rapidsai/branch-22.10
GPUtester Sep 26, 2022
e64c2da
Fix return type of `Index.isna` & `Index.notna` (#11769)
galipremsagar Sep 26, 2022
aa2ef0e
Merge pull request #11777 from rapidsai/branch-22.10
GPUtester Sep 26, 2022
d24bce5
Remove `kwargs` in `read_csv` & `to_csv` (#11762)
galipremsagar Sep 27, 2022
cc6f237
Merge pull request #11781 from rapidsai/branch-22.10
GPUtester Sep 27, 2022
0a430fa
Fix `cudf::partition*` APIs that do not return offsets for empty outp…
ttnghia Sep 27, 2022
cc97584
Merge pull request #11782 from rapidsai/branch-22.10
GPUtester Sep 27, 2022
c5d555a
JSON Column creation in GPU (#11714)
karthikeyann Sep 27, 2022
831ef04
Add BGZIP `data_chunk_reader` (#11652)
upsj Sep 27, 2022
35b0a52
Enable `schema_element` & `keep_quotes` support in json reader (#11746)
galipremsagar Sep 27, 2022
1d7af9e
Merge pull request #11784 from rapidsai/branch-22.10
GPUtester Sep 27, 2022
a270ae6
Add `istitle` to string UDFs (#11738)
brandon-b-miller Sep 27, 2022
b8ab576
Merge pull request #11786 from rapidsai/branch-22.10
GPUtester Sep 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# cuDF 22.12.00 (Date TBD)

Please see https://github.com/rapidsai/cudf/releases/tag/v22.12.00a for the latest changes to this development branch.

# cuDF 22.10.00 (Date TBD)

Please see https://github.com/rapidsai/cudf/releases/tag/v22.10.00a for the latest changes to this development branch.
Expand Down
2 changes: 1 addition & 1 deletion ci/checks/style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ LANG=C.UTF-8
. /opt/conda/etc/profile.d/conda.sh
conda activate rapids

FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/cmake-format-rapids-cmake.json
FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/cmake-format-rapids-cmake.json
export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
Expand Down
2 changes: 1 addition & 1 deletion ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ unset GIT_DESCRIBE_TAG
export INSTALL_DASK_MAIN=1

# ucx-py version
export UCX_PY_VERSION='0.28.*'
export UCX_PY_VERSION='0.29.*'

################################################################################
# TRAP - Setup trap for removing jitify cache
Expand Down
2 changes: 1 addition & 1 deletion ci/gpu/java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`

# ucx-py version
export UCX_PY_VERSION='0.28.*'
export UCX_PY_VERSION='0.29.*'

################################################################################
# TRAP - Setup trap for removing jitify cache
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ dependencies:
- clang=11.1.0
- clang-tools=11.1.0
- cupy>=9.5.0,<12.0.0a0
- rmm=22.10.*
- rmm=22.12.*
- cmake>=3.20.1,!=3.23.0
- cmake_setuptools>=0.1.3
- scikit-build>=0.13.1
Expand Down Expand Up @@ -62,7 +62,7 @@ dependencies:
- sphinx-autobuild
- myst-nb
- scipy
- dask-cuda=22.10.*
- dask-cuda=22.12.*
- mimesis<4.1
- packaging
- protobuf>=3.20.1,<3.21.0a0
Expand Down
4 changes: 3 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ rapids_cuda_init_architectures(CUDF)

project(
CUDF
VERSION 22.10.00
VERSION 22.12.00
LANGUAGES C CXX CUDA
)
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5)
Expand Down Expand Up @@ -327,6 +327,7 @@ add_library(
src/io/csv/reader_impl.cu
src/io/csv/writer_impl.cu
src/io/functions.cpp
src/io/json/json_column.cu
src/io/json/json_gpu.cu
src/io/json/json_tree.cu
src/io/json/nested_json_gpu.cu
Expand Down Expand Up @@ -354,6 +355,7 @@ add_library(
src/io/statistics/parquet_column_statistics.cu
src/io/text/byte_range_info.cpp
src/io/text/data_chunk_source_factories.cpp
src/io/text/bgzip_data_chunk_source.cu
src/io/text/multibyte_split.cu
src/io/utilities/column_buffer.cpp
src/io/utilities/config_utils.cpp
Expand Down
3 changes: 2 additions & 1 deletion cpp/benchmarks/io/json/nested_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ void BM_NESTED_JSON(nvbench::state& state)
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
// Allocate device-side temporary storage & run algorithm
cudf::io::json::detail::parse_nested_json(input, default_options, cudf::default_stream_value);
cudf::io::json::detail::device_parse_nested_json(
input, default_options, cudf::default_stream_value);
});

auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
Expand Down
4 changes: 2 additions & 2 deletions cpp/doxygen/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ PROJECT_NAME = "libcudf"
# could be handy for archiving the generated documentation or if some version
# control system is used.

PROJECT_NUMBER = 22.10.00
PROJECT_NUMBER = 22.12.00

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
Expand Down Expand Up @@ -2162,7 +2162,7 @@ SKIP_FUNCTION_MACROS = YES
# the path). If a tag file is not located in the directory in which doxygen is
# run, you must also specify the path to the tagfile here.

TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/22.10
TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/22.12

# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
# tag file that is based on the input files it reads. See section "Linking to
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/basic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ file(
)
include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)

set(CUDF_TAG branch-22.10)
set(CUDF_TAG branch-22.12)
CPMFindPackage(
NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf
GIT_TAG ${CUDF_TAG}
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cudf/io/detail/data_casting.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cudf/column/column.hpp>
#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/detail/utf8.hpp>
#include <cudf/types.hpp>

Expand Down Expand Up @@ -304,6 +305,7 @@ std::unique_ptr<column> parse_data(str_tuple_it str_tuples,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
if (col_type == cudf::data_type{cudf::type_id::STRING}) {
rmm::device_uvector<size_type> offsets(col_size + 1, stream);

Expand Down
37 changes: 36 additions & 1 deletion cpp/include/cudf/io/text/data_chunk_source_factories.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,51 @@ namespace cudf::io::text {

/**
* @brief Creates a data source capable of producing device-buffered views of the given string.
* @param data the host data to be exposed as a data chunk source. Its lifetime must be at least as
* long as the lifetime of the returned data_chunk_source.
* @return the data chunk source for the provided host data. It copies data from the host to the
* device.
*/
std::unique_ptr<data_chunk_source> make_source(host_span<const char> data);

/**
* @brief Creates a data source capable of producing device-buffered views of the file
* @param filename the filename of the file to be exposed as a data chunk source.
* @return the data chunk source for the provided filename. It reads data from the file and copies
* it to the device.
*/
std::unique_ptr<data_chunk_source> make_source_from_file(std::string const& filename);
std::unique_ptr<data_chunk_source> make_source_from_file(std::string_view filename);

/**
* @brief Creates a data source capable of producing device-buffered views of a BGZIP compressed
* file.
* @param filename the filename of the BGZIP-compressed file to be exposed as a data chunk source.
* @return the data chunk source for the provided filename. It reads data from the file and copies
* it to the device, where it will be decompressed.
*/
std::unique_ptr<data_chunk_source> make_source_from_bgzip_file(std::string_view filename);

/**
* @brief Creates a data source capable of producing device-buffered views of a BGZIP compressed
* file with virtual record offsets.
* @param filename the filename of the BGZIP-compressed file to be exposed as a data chunk source.
* @param virtual_begin the virtual (Tabix) offset of the first byte to be read. Its upper 48 bits
* describe the offset into the compressed file, its lower 16 bits describe the
* block-local offset.
* @param virtual_end the virtual (Tabix) offset one past the last byte to be read.
* @return the data chunk source for the provided filename. It reads data from the file and copies
* it to the device, where it will be decompressed. The chunk source only returns data
* between the virtual offsets `virtual_begin` and `virtual_end`.
*/
std::unique_ptr<data_chunk_source> make_source_from_bgzip_file(std::string_view filename,
uint64_t virtual_begin,
uint64_t virtual_end);

/**
* @brief Creates a data source capable of producing views of the given device string scalar
* @param data the device data to be exposed as a data chunk source. Its lifetime must be at least
* as long as the lifetime of the returned data_chunk_source.
* @return the data chunk source for the provided host data. It does not create any copies.
*/
std::unique_ptr<data_chunk_source> make_source(cudf::string_scalar& data);

Expand Down
2 changes: 1 addition & 1 deletion cpp/libcudf_kafka/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ include(rapids-find)

project(
CUDA_KAFKA
VERSION 22.10.00
VERSION 22.12.00
LANGUAGES CXX
)

Expand Down
25 changes: 11 additions & 14 deletions cpp/src/io/comp/nvcomp_adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ namespace cudf::io::nvcomp {

// Dispatcher for nvcompBatched<format>DecompressGetTempSizeEx
template <typename... Args>
nvcompStatus_t batched_decompress_get_temp_size_ex(compression_type compression, Args&&... args)
std::optional<nvcompStatus_t> batched_decompress_get_temp_size_ex(compression_type compression,
Args&&... args)
{
#if NVCOMP_HAS_TEMPSIZE_EX
switch (compression) {
Expand All @@ -78,13 +79,13 @@ nvcompStatus_t batched_decompress_get_temp_size_ex(compression_type compression,
#if NVCOMP_HAS_ZSTD_DECOMP
return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward<Args>(args)...);
#else
CUDF_FAIL("Unsupported compression type");
return std::nullopt;
#endif
case compression_type::DEFLATE: [[fallthrough]];
default: CUDF_FAIL("Unsupported compression type");
default: return std::nullopt;
}
#endif
CUDF_FAIL("GetTempSizeEx is not supported in the current nvCOMP version");
return std::nullopt;
}

// Dispatcher for nvcompBatched<format>DecompressGetTempSize
Expand Down Expand Up @@ -138,16 +139,12 @@ size_t batched_decompress_temp_size(compression_type compression,
size_t max_uncomp_chunk_size,
size_t max_total_uncomp_size)
{
size_t temp_size = 0;
auto const nvcomp_status = [&]() {
try {
return batched_decompress_get_temp_size_ex(
compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size);
} catch (cudf::logic_error const& err) {
return batched_decompress_get_temp_size(
compression, num_chunks, max_uncomp_chunk_size, &temp_size);
}
}();
size_t temp_size = 0;
auto const nvcomp_status =
batched_decompress_get_temp_size_ex(
compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size)
.value_or(batched_decompress_get_temp_size(
compression, num_chunks, max_uncomp_chunk_size, &temp_size));

CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess,
"Unable to get scratch size for decompression");
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/fst/dispatch_dfa.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ struct DispatchFSM : DeviceFSMPolicy {
if (CubDebug(error = dfa_simulation_config.Init<PolicyT>(dfa_kernel))) return error;

// Kernel invocation
uint32_t grid_size =
CUB_QUOTIENT_CEILING(num_chars, PolicyT::BLOCK_THREADS * PolicyT::ITEMS_PER_THREAD);
uint32_t grid_size = std::max(
1u, CUB_QUOTIENT_CEILING(num_chars, PolicyT::BLOCK_THREADS * PolicyT::ITEMS_PER_THREAD));
uint32_t block_threads = dfa_simulation_config.block_threads;

dfa_kernel<<<grid_size, block_threads, 0, stream>>>(dfa,
Expand Down Expand Up @@ -348,7 +348,7 @@ struct DispatchFSM : DeviceFSMPolicy {
NUM_SYMBOLS_PER_BLOCK = BLOCK_THREADS * SYMBOLS_PER_THREAD
};

BlockOffsetT num_blocks = CUB_QUOTIENT_CEILING(num_chars, NUM_SYMBOLS_PER_BLOCK);
BlockOffsetT num_blocks = std::max(1u, CUB_QUOTIENT_CEILING(num_chars, NUM_SYMBOLS_PER_BLOCK));
size_t num_threads = num_blocks * BLOCK_THREADS;

//------------------------------------------------------------------------------
Expand Down
9 changes: 8 additions & 1 deletion cpp/src/io/json/experimental/read_json.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,14 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
auto const buffer = ingest_raw_input(sources, reader_opts.get_compression());
auto data = host_span<char const>(reinterpret_cast<char const*>(buffer.data()), buffer.size());

return cudf::io::json::detail::parse_nested_json(data, reader_opts, stream, mr);
try {
return cudf::io::json::detail::device_parse_nested_json(data, reader_opts, stream, mr);
} catch (cudf::logic_error const& err) {
#ifdef NJP_DEBUG_PRINT
std::cout << "Fall back to host nested json parser" << std::endl;
#endif
return cudf::io::json::detail::host_parse_nested_json(data, reader_opts, stream, mr);
}
}

} // namespace cudf::io::detail::json::experimental
Loading