Skip to content

Commit

Permalink
Merge branch 'branch-23.12' into processed_bytes_stream_distinct_coun…
Browse files Browse the repository at this point in the history
…t_nvbench
  • Loading branch information
harrism authored Sep 26, 2023
2 parents a6323f2 + 30093c3 commit ed6d718
Show file tree
Hide file tree
Showing 82 changed files with 2,092 additions and 1,153 deletions.
1 change: 0 additions & 1 deletion .github/ops-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,3 @@ auto_merger: true
branch_checker: true
label_checker: true
release_drafter: true
recently_updated: true
2 changes: 1 addition & 1 deletion ci/build_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export RAPIDS_DOCS_DIR="$(mktemp -d)"

rapids-logger "Build CPP docs"
pushd cpp/doxygen
aws s3 cp s3://rapidsai-docs/librmm/${RAPIDS_VERSION_NUMBER}/html/rmm.tag . || echo "Failed to download rmm Doxygen tag"
aws s3 cp s3://rapidsai-docs/librmm/html/${RAPIDS_VERSION_NUMBER}/rmm.tag . || echo "Failed to download rmm Doxygen tag"
doxygen Doxyfile
mkdir -p "${RAPIDS_DOCS_DIR}/libcudf/html"
mv html/* "${RAPIDS_DOCS_DIR}/libcudf/html"
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ channels:
- nvidia
dependencies:
- aiobotocore>=2.2.0
- aws-sdk-cpp<1.11
- benchmark==1.8.0
- boto3>=1.21.21
- botocore>=1.24.21
Expand Down
1 change: 1 addition & 0 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ channels:
- nvidia
dependencies:
- aiobotocore>=2.2.0
- aws-sdk-cpp<1.11
- benchmark==1.8.0
- boto3>=1.21.21
- botocore>=1.24.21
Expand Down
3 changes: 3 additions & 0 deletions conda/recipes/libcudf/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ gbench_version:
gtest_version:
- ">=1.13.0"

aws_sdk_cpp_version:
- "<1.11"

libarrow_version:
- "=12"

Expand Down
2 changes: 2 additions & 0 deletions conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ requirements:
- gtest {{ gtest_version }}
- gmock {{ gtest_version }}
- zlib {{ zlib_version }}
- aws-sdk-cpp {{ aws_sdk_cpp_version }}

outputs:
- name: libcudf
Expand Down Expand Up @@ -107,6 +108,7 @@ outputs:
- dlpack {{ dlpack_version }}
- gtest {{ gtest_version }}
- gmock {{ gtest_version }}
- aws-sdk-cpp {{ aws_sdk_cpp_version }}
test:
commands:
- test -f $PREFIX/lib/libcudf.so
Expand Down
2 changes: 1 addition & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ ConfigureBench(ITERATOR_BENCH iterator/iterator.cu)
# ##################################################################################################
# * search benchmark ------------------------------------------------------------------------------
ConfigureBench(SEARCH_BENCH search/search.cpp)
ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp)
ConfigureNVBench(SEARCH_NVBENCH search/contains_scalar.cpp search/contains_table.cpp)

# ##################################################################################################
# * sort benchmark --------------------------------------------------------------------------------
Expand Down
18 changes: 10 additions & 8 deletions cpp/benchmarks/io/cuio_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <benchmarks/io/cuio_common.hpp>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/detail/utilities/logger.hpp>

#include <cstdio>
Expand Down Expand Up @@ -141,17 +142,18 @@ std::vector<std::string> select_column_names(std::vector<std::string> const& col
return col_names_to_read;
}

std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks, int chunk)
std::vector<cudf::size_type> segments_in_chunk(int num_segments, int num_chunks, int chunk_idx)
{
CUDF_EXPECTS(num_segments >= num_chunks,
"Number of chunks cannot be greater than the number of segments in the file");
auto start_segment = [num_segments, num_chunks](int chunk) {
return num_segments * chunk / num_chunks;
};
std::vector<cudf::size_type> selected_segments;
for (auto segment = start_segment(chunk); segment < start_segment(chunk + 1); ++segment) {
selected_segments.push_back(segment);
}
CUDF_EXPECTS(chunk_idx < num_chunks,
"Chunk index must be smaller than the number of chunks in the file");

auto const segments_in_chunk = cudf::util::div_rounding_up_unsafe(num_segments, num_chunks);
auto const begin_segment = std::min(chunk_idx * segments_in_chunk, num_segments);
auto const end_segment = std::min(begin_segment + segments_in_chunk, num_segments);
std::vector<cudf::size_type> selected_segments(end_segment - begin_segment);
std::iota(selected_segments.begin(), selected_segments.end(), begin_segment);

return selected_segments;
}
Expand Down
12 changes: 5 additions & 7 deletions cpp/benchmarks/io/orc/orc_reader_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <benchmarks/io/cuio_common.hpp>
#include <benchmarks/io/nvbench_helpers.hpp>

#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/io/orc.hpp>
#include <cudf/io/orc_metadata.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand All @@ -30,7 +31,7 @@
constexpr int64_t data_size = 512 << 20;
// The number of separate read calls to use when reading files in multiple chunks
// Each call reads roughly equal amounts of data
constexpr int32_t chunked_read_num_chunks = 8;
constexpr int32_t chunked_read_num_chunks = 4;

std::vector<std::string> get_top_level_col_names(cudf::io::source_info const& source)
{
Expand Down Expand Up @@ -88,7 +89,7 @@ void BM_orc_read_varying_options(nvbench::state& state,

auto const num_stripes =
cudf::io::read_orc_metadata(source_sink.make_source_info()).num_stripes();
cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
auto const chunk_row_cnt = cudf::util::div_rounding_up_unsafe(view.num_rows(), num_chunks);

auto mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
Expand All @@ -99,7 +100,6 @@ void BM_orc_read_varying_options(nvbench::state& state,
timer.start();
cudf::size_type rows_read = 0;
for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
auto const is_last_chunk = chunk == (num_chunks - 1);
switch (RowSelection) {
case row_selection::ALL: break;
case row_selection::STRIPES:
Expand All @@ -108,7 +108,6 @@ void BM_orc_read_varying_options(nvbench::state& state,
case row_selection::NROWS:
read_options.set_skip_rows(chunk * chunk_row_cnt);
read_options.set_num_rows(chunk_row_cnt);
if (is_last_chunk) read_options.set_num_rows(-1);
break;
default: CUDF_FAIL("Unsupported row selection method");
}
Expand All @@ -132,9 +131,6 @@ using col_selections = nvbench::enum_type_list<column_selection::ALL,
column_selection::ALTERNATE,
column_selection::FIRST_HALF,
column_selection::SECOND_HALF>;
using row_selections =
nvbench::enum_type_list<row_selection::ALL, row_selection::STRIPES, row_selection::NROWS>;

NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
NVBENCH_TYPE_AXES(col_selections,
nvbench::enum_type_list<row_selection::ALL>,
Expand All @@ -146,6 +142,8 @@ NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
{"column_selection", "row_selection", "uses_index", "uses_numpy_dtype", "timestamp_type"})
.set_min_samples(4);

using row_selections =
nvbench::enum_type_list<row_selection::ALL, row_selection::NROWS, row_selection::STRIPES>;
NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
NVBENCH_TYPE_AXES(nvbench::enum_type_list<column_selection::ALL>,
row_selections,
Expand Down
65 changes: 38 additions & 27 deletions cpp/benchmarks/io/parquet/parquet_reader_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,29 @@
#include <benchmarks/io/cuio_common.hpp>
#include <benchmarks/io/nvbench_helpers.hpp>

#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/io/parquet.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <nvbench/nvbench.cuh>

// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to
// run on most GPUs, but large enough to allow highest throughput
constexpr std::size_t data_size = 512 << 20;
constexpr std::size_t row_group_size = 128 << 20;
constexpr std::size_t data_size = 512 << 20;
// The number of separate read calls to use when reading files in multiple chunks
// Each call reads roughly equal amounts of data
constexpr int32_t chunked_read_num_chunks = 4;

std::vector<std::string> get_top_level_col_names(cudf::io::source_info const& source)
{
cudf::io::parquet_reader_options const read_options =
cudf::io::parquet_reader_options::builder(source);
auto const schema = cudf::io::read_parquet(read_options).metadata.schema_info;

std::vector<std::string> names;
names.reserve(schema.size());
std::transform(schema.cbegin(), schema.cend(), std::back_inserter(names), [](auto const& c) {
return c.name;
});
return names;
auto const top_lvl_cols = cudf::io::read_parquet_metadata(source).schema().root().children();
std::vector<std::string> col_names;
std::transform(top_lvl_cols.cbegin(),
top_lvl_cols.cend(),
std::back_inserter(col_names),
[](auto const& col_meta) { return col_meta.name(); });

return col_names;
}

template <column_selection ColSelection,
Expand All @@ -55,6 +56,8 @@ void BM_parquet_read_options(nvbench::state& state,
nvbench::enum_type<UsesPandasMetadata>,
nvbench::enum_type<Timestamp>>)
{
auto const num_chunks = RowSelection == row_selection::ALL ? 1 : chunked_read_num_chunks;

auto constexpr str_to_categories = ConvertsStrings == converts_strings::YES;
auto constexpr uses_pd_metadata = UsesPandasMetadata == uses_pandas_metadata::YES;

Expand Down Expand Up @@ -87,9 +90,8 @@ void BM_parquet_read_options(nvbench::state& state,
.use_pandas_metadata(uses_pd_metadata)
.timestamp_type(ts_type);

// TODO: add read_parquet_metadata to properly calculate #row_groups
auto constexpr num_row_groups = data_size / row_group_size;
auto constexpr num_chunks = 1;
auto const num_row_groups = read_parquet_metadata(source_sink.make_source_info()).num_rowgroups();
auto const chunk_row_cnt = cudf::util::div_rounding_up_unsafe(view.num_rows(), num_chunks);

auto mem_stats_logger = cudf::memory_stats_logger();
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
Expand All @@ -100,18 +102,15 @@ void BM_parquet_read_options(nvbench::state& state,
timer.start();
cudf::size_type rows_read = 0;
for (int32_t chunk = 0; chunk < num_chunks; ++chunk) {
auto const is_last_chunk = chunk == (num_chunks - 1);
switch (RowSelection) {
case row_selection::ALL: break;
case row_selection::ROW_GROUPS: {
auto row_groups_to_read = segments_in_chunk(num_row_groups, num_chunks, chunk);
if (is_last_chunk) {
// Need to assume that an additional "overflow" row group is present
row_groups_to_read.push_back(num_row_groups);
}
read_options.set_row_groups({row_groups_to_read});
read_options.set_row_groups({segments_in_chunk(num_row_groups, num_chunks, chunk)});
} break;
case row_selection::NROWS: [[fallthrough]];
case row_selection::NROWS:
read_options.set_skip_rows(chunk * chunk_row_cnt);
read_options.set_num_rows(chunk_row_cnt);
break;
default: CUDF_FAIL("Unsupported row selection method");
}

Expand All @@ -130,14 +129,26 @@ void BM_parquet_read_options(nvbench::state& state,
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

using row_selections =
nvbench::enum_type_list<row_selection::ALL, row_selection::NROWS, row_selection::ROW_GROUPS>;
NVBENCH_BENCH_TYPES(BM_parquet_read_options,
NVBENCH_TYPE_AXES(nvbench::enum_type_list<column_selection::ALL>,
row_selections,
nvbench::enum_type_list<converts_strings::YES>,
nvbench::enum_type_list<uses_pandas_metadata::YES>,
nvbench::enum_type_list<cudf::type_id::EMPTY>))
.set_name("parquet_read_row_selection")
.set_type_axes_names({"column_selection",
"row_selection",
"str_to_categories",
"uses_pandas_metadata",
"timestamp_type"})
.set_min_samples(4);

using col_selections = nvbench::enum_type_list<column_selection::ALL,
column_selection::ALTERNATE,
column_selection::FIRST_HALF,
column_selection::SECOND_HALF>;

// TODO: row_selection::ROW_GROUPS disabled until we add an API to read metadata from a parquet file
// and determine num row groups. https://github.com/rapidsai/cudf/pull/9963#issuecomment-1004832863

NVBENCH_BENCH_TYPES(BM_parquet_read_options,
NVBENCH_TYPE_AXES(col_selections,
nvbench::enum_type_list<row_selection::ALL>,
Expand Down
File renamed without changes.
73 changes: 73 additions & 0 deletions cpp/benchmarks/search/contains_table.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>

#include <cudf/detail/search.hpp>
#include <cudf/lists/list_view.hpp>
#include <cudf/types.hpp>

#include <rmm/mr/device/per_device_resource.hpp>

#include <nvbench/nvbench.cuh>

auto constexpr num_unique_elements = 1000;

template <typename Type>
static void nvbench_contains_table(nvbench::state& state, nvbench::type_list<Type>)
{
auto const size = state.get_int64("table_size");
auto const dtype = cudf::type_to_id<Type>();
double const null_probability = state.get_float64("null_probability");

auto builder = data_profile_builder().null_probability(null_probability);
if (dtype == cudf::type_id::LIST) {
builder.distribution(dtype, distribution_id::UNIFORM, 0, num_unique_elements)
.distribution(cudf::type_id::INT32, distribution_id::UNIFORM, 0, num_unique_elements)
.list_depth(1);
} else {
builder.distribution(dtype, distribution_id::UNIFORM, 0, num_unique_elements);
}

auto const haystack = create_random_table(
{dtype}, table_size_bytes{static_cast<size_t>(size)}, data_profile{builder}, 0);
auto const needles = create_random_table(
{dtype}, table_size_bytes{static_cast<size_t>(size)}, data_profile{builder}, 1);

auto mem_stats_logger = cudf::memory_stats_logger();

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto const stream_view = rmm::cuda_stream_view{launch.get_stream()};
[[maybe_unused]] auto const result =
cudf::detail::contains(haystack->view(),
needles->view(),
cudf::null_equality::EQUAL,
cudf::nan_equality::ALL_EQUAL,
stream_view,
rmm::mr::get_current_device_resource());
});

state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
}

NVBENCH_BENCH_TYPES(nvbench_contains_table,
NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, cudf::list_view>))
.set_name("contains_table")
.set_type_axes_names({"type"})
.add_float64_axis("null_probability", {0.0, 0.1})
.add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000});
3 changes: 2 additions & 1 deletion cpp/benchmarks/text/ngrams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,12 @@ static void BM_ngrams(benchmark::State& state, ngrams_type nt)
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
cudf::strings_column_view input(column->view());
auto const separator = cudf::string_scalar("_");

for (auto _ : state) {
cuda_event_timer raii(state, true);
switch (nt) {
case ngrams_type::tokens: nvtext::generate_ngrams(input); break;
case ngrams_type::tokens: nvtext::generate_ngrams(input, 2, separator); break;
case ngrams_type::characters: nvtext::generate_character_ngrams(input); break;
}
}
Expand Down
7 changes: 5 additions & 2 deletions cpp/benchmarks/text/tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,11 @@ static void bench_tokenize(nvbench::state& state)
auto result = nvtext::count_tokens(input, cudf::strings_column_view(delimiters));
});
} else if (tokenize_type == "ngrams") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::ngrams_tokenize(input); });
auto const delimiter = cudf::string_scalar("");
auto const separator = cudf::string_scalar("_");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::ngrams_tokenize(input, 2, delimiter, separator);
});
} else if (tokenize_type == "characters") {
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = nvtext::character_tokenize(input); });
Expand Down
Loading

0 comments on commit ed6d718

Please sign in to comment.