Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into null_counts_optional
Browse files Browse the repository at this point in the history
  • Loading branch information
etseidl authored Dec 12, 2023
2 parents 41be3b1 + d1ba966 commit c896cb6
Show file tree
Hide file tree
Showing 133 changed files with 2,949 additions and 1,902 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ cpp/thirdparty/googletest/

## Doxygen
cpp/doxygen/html
cpp/doxygen/xml

#Java
target
Expand Down
2 changes: 1 addition & 1 deletion ci/build_wheel_cudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -euo pipefail

package_dir="python/cudf"

export SKBUILD_CONFIGURE_OPTIONS="-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
export SKBUILD_CONFIGURE_OPTIONS="-DUSE_LIBARROW_FROM_PYARROW=ON"

./ci/build_wheel.sh cudf ${package_dir}

Expand Down
5 changes: 2 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ option(CUDA_ENABLE_LINEINFO
option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
# cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF)
mark_as_advanced(USE_LIBARROW_FROM_PYARROW)

set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON)
if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS)
Expand All @@ -90,9 +92,6 @@ option(
)
mark_as_advanced(CUDF_BUILD_STREAMS_TEST_UTIL)

option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
mark_as_advanced(USE_LIBARROW_FROM_PYARROW)

message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}")
message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}")
Expand Down
21 changes: 12 additions & 9 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
#include <thrust/transform.h>
#include <thrust/tuple.h>

#include <cuda/functional>

#include <algorithm>
#include <cstdint>
#include <memory>
Expand Down Expand Up @@ -247,12 +249,12 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_chrono<T>()>> {
sec.end(),
ns.begin(),
result.begin(),
[] __device__(int64_t sec_value, int64_t nanoseconds_value) {
cuda::proclaim_return_type<T>([] __device__(int64_t sec_value, int64_t nanoseconds_value) {
auto const timestamp_ns =
cudf::duration_s{sec_value} + cudf::duration_ns{nanoseconds_value};
// Return value in the type's precision
return T(cuda::std::chrono::duration_cast<typename T::duration>(timestamp_ns));
});
}));
return result;
}
};
Expand Down Expand Up @@ -367,12 +369,13 @@ rmm::device_uvector<cudf::size_type> sample_indices_with_run_length(cudf::size_t
// This is gather.
auto avg_repeated_sample_indices_iterator = thrust::make_transform_iterator(
thrust::make_counting_iterator(0),
[rb = run_lens.begin(),
re = run_lens.end(),
samples_indices = samples_indices.begin()] __device__(cudf::size_type i) {
auto sample_idx = thrust::upper_bound(thrust::seq, rb, re, i) - rb;
return samples_indices[sample_idx];
});
cuda::proclaim_return_type<cudf::size_type>(
[rb = run_lens.begin(),
re = run_lens.end(),
samples_indices = samples_indices.begin()] __device__(cudf::size_type i) {
auto sample_idx = thrust::upper_bound(thrust::seq, rb, re, i) - rb;
return samples_indices[sample_idx];
}));
rmm::device_uvector<cudf::size_type> repeated_sample_indices(num_rows,
cudf::get_default_stream());
thrust::copy(thrust::device,
Expand Down Expand Up @@ -513,7 +516,7 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
lengths.end(),
null_mask.begin(),
lengths.begin(),
[] __device__(auto) { return 0; },
cuda::proclaim_return_type<cudf::size_type>([] __device__(auto) { return 0; }),
thrust::logical_not<bool>{});
auto valid_lengths = thrust::make_transform_iterator(
thrust::make_zip_iterator(thrust::make_tuple(lengths.begin(), null_mask.begin())),
Expand Down
27 changes: 27 additions & 0 deletions cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,30 @@ void try_drop_l3_cache()
[](auto& cmd) { return exec_cmd(cmd).empty(); }),
"Failed to execute the drop cache command");
}

cudf::io::io_type retrieve_io_type_enum(std::string_view io_string)
{
if (io_string == "FILEPATH") { return cudf::io::io_type::FILEPATH; }
if (io_string == "HOST_BUFFER") { return cudf::io::io_type::HOST_BUFFER; }
if (io_string == "DEVICE_BUFFER") { return cudf::io::io_type::DEVICE_BUFFER; }
if (io_string == "VOID") { return cudf::io::io_type::VOID; }
if (io_string == "USER_IMPLEMENTED") { return cudf::io::io_type::USER_IMPLEMENTED; }
CUDF_FAIL("Unsupported io_type.");
}

cudf::io::compression_type retrieve_compression_type_enum(std::string_view compression_string)
{
if (compression_string == "NONE") { return cudf::io::compression_type::NONE; }
if (compression_string == "AUTO") { return cudf::io::compression_type::AUTO; }
if (compression_string == "SNAPPY") { return cudf::io::compression_type::SNAPPY; }
if (compression_string == "GZIP") { return cudf::io::compression_type::GZIP; }
if (compression_string == "BZIP2") { return cudf::io::compression_type::BZIP2; }
if (compression_string == "BROTLI") { return cudf::io::compression_type::BROTLI; }
if (compression_string == "ZIP") { return cudf::io::compression_type::ZIP; }
if (compression_string == "XZ") { return cudf::io::compression_type::XZ; }
if (compression_string == "ZLIB") { return cudf::io::compression_type::ZLIB; }
if (compression_string == "LZ4") { return cudf::io::compression_type::LZ4; }
if (compression_string == "LZO") { return cudf::io::compression_type::LZO; }
if (compression_string == "ZSTD") { return cudf::io::compression_type::ZSTD; }
CUDF_FAIL("Unsupported compression_type.");
}
24 changes: 24 additions & 0 deletions cpp/benchmarks/io/cuio_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,27 @@ std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks,
* @throw cudf::logic_error if the environment variable is set and the command fails
*/
void try_drop_l3_cache();

/**
* @brief Convert a string to the corresponding io_type enum value.
*
* This function takes a string and returns the matching io_type enum value. It allows you to
* convert a string representation of an io_type into its corresponding enum value.
*
* @param io_string The input string representing the io_type
*
* @return The io_type enum value
*/
cudf::io::io_type retrieve_io_type_enum(std::string_view io_string);

/**
* @brief Convert a string to the corresponding compression_type enum value.
*
* This function takes a string and returns the matching compression_type enum value. It allows you
* to convert a string representation of a compression_type into its corresponding enum value.
*
* @param compression_string The input string representing the compression_type
*
* @return The compression_type enum value
*/
cudf::io::compression_type retrieve_compression_type_enum(std::string_view compression_string);
91 changes: 37 additions & 54 deletions cpp/benchmarks/io/parquet/parquet_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,34 +56,30 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <data_type DataType, cudf::io::io_type IOType>
void BM_parquet_read_data(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
template <data_type DataType>
void BM_parquet_read_data(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
{
auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
auto const compression = cudf::io::compression_type::SNAPPY;
auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const run_length = static_cast<cudf::size_type>(state.get_int64("run_length"));
auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
auto const compression = cudf::io::compression_type::SNAPPY;

auto const tbl =
create_random_table(cycle_dtypes(d_type, num_cols),
table_size_bytes{data_size},
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();

cuio_source_sink_pair source_sink(IOType);
cuio_source_sink_pair source_sink(source_type);
cudf::io::parquet_writer_options write_opts =
cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
.compression(compression);

parquet_read_common(write_opts, source_sink, state);
}

template <cudf::io::io_type IOType, cudf::io::compression_type Compression>
void BM_parquet_read_io_compression(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<IOType>, nvbench::enum_type<Compression>>)
void BM_parquet_read_io_compression(nvbench::state& state)
{
auto const d_type = get_type_or_group({static_cast<int32_t>(data_type::INTEGRAL),
static_cast<int32_t>(data_type::FLOAT),
Expand All @@ -94,10 +90,10 @@ void BM_parquet_read_io_compression(
static_cast<int32_t>(data_type::LIST),
static_cast<int32_t>(data_type::STRUCT)});

cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
auto const compression = Compression;
auto const source_type = IOType;
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const run_length = static_cast<cudf::size_type>(state.get_int64("run_length"));
auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
auto const compression = retrieve_compression_type_enum(state.get_string("compression_type"));

auto const tbl =
create_random_table(cycle_dtypes(d_type, num_cols),
Expand All @@ -113,17 +109,15 @@ void BM_parquet_read_io_compression(
parquet_read_common(write_opts, source_sink, state);
}

template <cudf::io::io_type IOType>
void BM_parquet_read_io_small_mixed(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<IOType>>)
void BM_parquet_read_io_small_mixed(nvbench::state& state)
{
auto const d_type =
std::pair<cudf::type_id, cudf::type_id>{cudf::type_id::STRING, cudf::type_id::INT32};

cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
cudf::size_type const num_strings = state.get_int64("num_string_cols");
auto const source_type = IOType;
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const run_length = static_cast<cudf::size_type>(state.get_int64("run_length"));
auto const num_strings = static_cast<cudf::size_type>(state.get_int64("num_string_cols"));
auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));

// want 80 pages total, across 4 columns, so 20 pages per column
cudf::size_type constexpr n_col = 4;
Expand All @@ -145,24 +139,23 @@ void BM_parquet_read_io_small_mixed(nvbench::state& state,
parquet_read_common(write_opts, source_sink, state);
}

template <data_type DataType, cudf::io::io_type IOType>
void BM_parquet_read_chunks(
nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>, nvbench::enum_type<IOType>>)
template <data_type DataType>
void BM_parquet_read_chunks(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
{
auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));
cudf::size_type const cardinality = state.get_int64("cardinality");
cudf::size_type const run_length = state.get_int64("run_length");
cudf::size_type const byte_limit = state.get_int64("byte_limit");
auto const compression = cudf::io::compression_type::SNAPPY;
auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const run_length = static_cast<cudf::size_type>(state.get_int64("run_length"));
auto const byte_limit = static_cast<cudf::size_type>(state.get_int64("byte_limit"));
auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
auto const compression = cudf::io::compression_type::SNAPPY;

auto const tbl =
create_random_table(cycle_dtypes(d_type, num_cols),
table_size_bytes{data_size},
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();

cuio_source_sink_pair source_sink(IOType);
cuio_source_sink_pair source_sink(source_type);
cudf::io::parquet_writer_options write_opts =
cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
.compression(compression);
Expand Down Expand Up @@ -202,43 +195,33 @@ using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::LIST,
data_type::STRUCT>;

using io_list = nvbench::enum_type_list<cudf::io::io_type::FILEPATH,
cudf::io::io_type::HOST_BUFFER,
cudf::io::io_type::DEVICE_BUFFER>;

using compression_list =
nvbench::enum_type_list<cudf::io::compression_type::SNAPPY, cudf::io::compression_type::NONE>;

NVBENCH_BENCH_TYPES(BM_parquet_read_data,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
NVBENCH_BENCH_TYPES(BM_parquet_read_data, NVBENCH_TYPE_AXES(d_type_list))
.set_name("parquet_read_decode")
.set_type_axes_names({"data_type", "io"})
.set_type_axes_names({"data_type"})
.add_string_axis("io_type", {"DEVICE_BUFFER"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});

NVBENCH_BENCH_TYPES(BM_parquet_read_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list))
NVBENCH_BENCH(BM_parquet_read_io_compression)
.set_name("parquet_read_io_compression")
.set_type_axes_names({"io", "compression"})
.add_string_axis("io_type", {"FILEPATH", "HOST_BUFFER", "DEVICE_BUFFER"})
.add_string_axis("compression_type", {"SNAPPY", "NONE"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});

NVBENCH_BENCH_TYPES(BM_parquet_read_chunks,
NVBENCH_TYPE_AXES(d_type_list,
nvbench::enum_type_list<cudf::io::io_type::DEVICE_BUFFER>))
NVBENCH_BENCH_TYPES(BM_parquet_read_chunks, NVBENCH_TYPE_AXES(d_type_list))
.set_name("parquet_read_chunks")
.set_type_axes_names({"data_type", "io"})
.add_string_axis("io_type", {"DEVICE_BUFFER"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32})
.add_int64_axis("byte_limit", {0, 500'000});

NVBENCH_BENCH_TYPES(BM_parquet_read_io_small_mixed,
NVBENCH_TYPE_AXES(nvbench::enum_type_list<cudf::io::io_type::FILEPATH>))
NVBENCH_BENCH(BM_parquet_read_io_small_mixed)
.set_name("parquet_read_io_small_mixed")
.set_type_axes_names({"io"})
.add_string_axis("io_type", {"FILEPATH"})
.set_min_samples(4)
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32})
Expand Down
Loading

0 comments on commit c896cb6

Please sign in to comment.