Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into test-branch
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 authored Oct 29, 2024
2 parents 0b863e9 + 8d7b0d8 commit 496587e
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/auto-assign.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ jobs:
steps:
- uses: actions-ecosystem/action-add-assignees@v1
with:
repo_token: "${{ secrets.GITHUB_TOKEN }}"
github_token: "${{ secrets.GITHUB_TOKEN }}"
assignees: ${{ github.actor }}
26 changes: 13 additions & 13 deletions cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ enum class TreeType {
template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
static void BM_ast_transform(nvbench::state& state)
{
auto const table_size = static_cast<cudf::size_type>(state.get_int64("table_size"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));

// Create table data
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table =
create_sequence_table(cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols),
row_count{table_size},
row_count{num_rows},
Nullable ? std::optional<double>{0.5} : std::nullopt);
auto table = source_table->view();

Expand Down Expand Up @@ -99,8 +99,8 @@ static void BM_ast_transform(nvbench::state& state)
auto const& expression_tree_root = expressions.back();

// Use the number of bytes read from global memory
state.add_global_memory_reads<key_type>(static_cast<size_t>(table_size) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(table_size);
state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
Expand All @@ -109,15 +109,15 @@ static void BM_ast_transform(nvbench::state& state)
template <cudf::ast::ast_operator cmp_op, cudf::ast::ast_operator reduce_op>
static void BM_string_compare_ast_transform(nvbench::state& state)
{
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_comparisons = static_cast<cudf::size_type>(state.get_int64("num_comparisons"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));

CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons");
CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons");

// Create table data
auto const num_cols = num_comparisons * 2;
auto const num_cols = tree_levels * 2;
std::vector<std::unique_ptr<cudf::column>> columns;
std::for_each(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
Expand Down Expand Up @@ -150,7 +150,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1]));

std::for_each(thrust::make_counting_iterator(1),
thrust::make_counting_iterator(num_comparisons),
thrust::make_counting_iterator(tree_levels),
[&](size_t idx) {
auto const& lhs = expressions.back();
auto const& rhs = expressions.emplace_back(
Expand All @@ -177,7 +177,7 @@ static void BM_string_compare_ast_transform(nvbench::state& state)
NVBENCH_BENCH(name) \
.set_name(#name) \
.add_int64_axis("tree_levels", {1, 5, 10}) \
.add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})

AST_TRANSFORM_BENCHMARK_DEFINE(
ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false);
Expand All @@ -202,7 +202,7 @@ AST_TRANSFORM_BENCHMARK_DEFINE(
.set_name(#name) \
.add_int64_axis("string_width", {32, 64, 128, 256}) \
.add_int64_axis("num_rows", {32768, 262144, 2097152}) \
.add_int64_axis("num_comparisons", {1, 2, 3, 4}) \
.add_int64_axis("tree_levels", {1, 2, 3, 4}) \
.add_int64_axis("hit_rate", {50, 100})

AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and,
Expand Down
26 changes: 13 additions & 13 deletions cpp/benchmarks/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,18 @@ enum class TreeType {
template <typename key_type, TreeType tree_type, bool reuse_columns>
static void BM_binaryop_transform(nvbench::state& state)
{
auto const table_size{static_cast<cudf::size_type>(state.get_int64("table_size"))};
auto const num_rows{static_cast<cudf::size_type>(state.get_int64("num_rows"))};
auto const tree_levels{static_cast<cudf::size_type>(state.get_int64("tree_levels"))};

// Create table data
auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
auto const source_table = create_sequence_table(
cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{table_size});
cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{num_rows});
cudf::table_view table{*source_table};

// Use the number of bytes read from global memory
state.add_global_memory_reads<key_type>(static_cast<size_t>(table_size) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(table_size);
state.add_global_memory_reads<key_type>(static_cast<size_t>(num_rows) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(num_rows);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
// Execute tree that chains additions like (((a + b) + c) + d)
Expand All @@ -74,15 +74,15 @@ static void BM_binaryop_transform(nvbench::state& state)
template <cudf::binary_operator cmp_op, cudf::binary_operator reduce_op>
static void BM_string_compare_binaryop_transform(nvbench::state& state)
{
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_comparisons = static_cast<cudf::size_type>(state.get_int64("num_comparisons"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));

CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons");
CUDF_EXPECTS(tree_levels > 0, "benchmarks require 1 or more comparisons");

// Create table data
auto const num_cols = num_comparisons * 2;
auto const num_cols = tree_levels * 2;
std::vector<std::unique_ptr<cudf::column>> columns;
std::for_each(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
Expand Down Expand Up @@ -113,7 +113,7 @@ static void BM_string_compare_binaryop_transform(nvbench::state& state)
cudf::binary_operation(table.get_column(0), table.get_column(1), cmp_op, bool_type, stream);
std::for_each(
thrust::make_counting_iterator(1),
thrust::make_counting_iterator(num_comparisons),
thrust::make_counting_iterator(tree_levels),
[&](size_t idx) {
std::unique_ptr<cudf::column> comparison = cudf::binary_operation(
table.get_column(idx * 2), table.get_column(idx * 2 + 1), cmp_op, bool_type, stream);
Expand All @@ -133,7 +133,7 @@ static void BM_string_compare_binaryop_transform(nvbench::state& state)
} \
NVBENCH_BENCH(name) \
.add_int64_axis("tree_levels", {1, 2, 5, 10}) \
.add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000})

BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_int32_imbalanced_unique,
int32_t,
Expand All @@ -158,7 +158,7 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique,
.set_name(#name) \
.add_int64_axis("string_width", {32, 64, 128, 256}) \
.add_int64_axis("num_rows", {32768, 262144, 2097152}) \
.add_int64_axis("num_comparisons", {1, 2, 3, 4}) \
.add_int64_axis("tree_levels", {1, 2, 3, 4}) \
.add_int64_axis("hit_rate", {50, 100})

STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(string_compare_binaryop_transform,
Expand Down
12 changes: 6 additions & 6 deletions cpp/benchmarks/binaryop/compiled_binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
template <typename TypeLhs, typename TypeRhs, typename TypeOut>
void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
{
auto const table_size = static_cast<cudf::size_type>(state.get_int64("table_size"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));

auto const source_table = create_random_table(
{cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{table_size});
{cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{num_rows});

auto lhs = cudf::column_view(source_table->get_column(0));
auto rhs = cudf::column_view(source_table->get_column(1));
Expand All @@ -37,9 +37,9 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
cudf::binary_operation(lhs, rhs, binop, output_dtype);

// use number of bytes read and written to global memory
state.add_global_memory_reads<TypeLhs>(table_size);
state.add_global_memory_reads<TypeRhs>(table_size);
state.add_global_memory_writes<TypeOut>(table_size);
state.add_global_memory_reads<TypeLhs>(num_rows);
state.add_global_memory_reads<TypeRhs>(num_rows);
state.add_global_memory_writes<TypeOut>(num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); });
Expand All @@ -55,7 +55,7 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
} \
NVBENCH_BENCH(name) \
.set_name("compiled_binary_op_" BM_STRINGIFY(name)) \
.add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000})
.add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000})

#define build_name(a, b, c, d) a##_##b##_##c##_##d

Expand Down
12 changes: 11 additions & 1 deletion java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -155,6 +155,16 @@ public static HostMemoryBuffer allocate(long bytes) {
return allocate(bytes, defaultPreferPinned);
}

/**
* Allocate host memory bypassing the default allocator. This is intended to only be used by other allocators.
* Pinned memory will not be used for these allocations.
* @param bytes size in bytes to allocate
* @return the newly created buffer
*/
public static HostMemoryBuffer allocateRaw(long bytes) {
return new HostMemoryBuffer(UnsafeMemoryAccessor.allocate(bytes), bytes);
}

/**
* Create a host buffer that is memory-mapped to a file.
* @param path path to the file to map into host memory
Expand Down

0 comments on commit 496587e

Please sign in to comment.