Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert rank to use to experimental row comparators #12481

Merged
merged 22 commits into from
Feb 8, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
c4c7cf2
update to experimental row comparator
divyegala Jan 5, 2023
2723469
working through tests
divyegala Jan 6, 2023
4ef330c
Merge remote-tracking branch 'upstream/branch-23.02' into rank-row-co…
divyegala Jan 24, 2023
72cdb6e
lists tests passing
divyegala Jan 25, 2023
5253196
formatting
divyegala Jan 25, 2023
2dd66c0
adding tests for structs
divyegala Jan 31, 2023
0baac12
Merge remote-tracking branch 'upstream/branch-23.02' into rank-row-co…
divyegala Jan 31, 2023
1e86c5d
all tests passed
divyegala Jan 31, 2023
20197e8
Merge remote-tracking branch 'upstream/branch-23.02' into rank-row-co…
divyegala Jan 31, 2023
b017f54
add benchmarks
divyegala Jan 31, 2023
5f2aaa5
Merge remote-tracking branch 'upstream/branch-23.04' into rank-row-co…
divyegala Jan 31, 2023
7a86326
fix bad merge
divyegala Feb 1, 2023
095ef4b
formatting
divyegala Feb 1, 2023
8a1bedf
review comments
divyegala Feb 3, 2023
bdc2ee1
Merge remote-tracking branch 'upstream/branch-23.04' into rank-row-co…
divyegala Feb 3, 2023
3c6903b
copyright year
divyegala Feb 3, 2023
19075e6
address review
divyegala Feb 3, 2023
0093b32
Merge branch 'branch-23.04' into rank-row-comparator
divyegala Feb 6, 2023
91dce1f
Merge branch 'branch-23.04' into rank-row-comparator
jjacobelli Feb 7, 2023
945e89f
add make to dependencies of conda-java-tests CI
divyegala Feb 7, 2023
7b512b9
Revert "add make to dependencies of conda-java-tests CI"
divyegala Feb 7, 2023
df16780
Merge branch 'branch-23.04' into rank-row-comparator
divyegala Feb 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,10 @@ ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp)
# ##################################################################################################
# * sort benchmark --------------------------------------------------------------------------------
ConfigureBench(SORT_BENCH sort/rank.cpp sort/sort.cpp sort/sort_strings.cpp)
ConfigureNVBench(SORT_NVBENCH sort/segmented_sort.cpp sort/sort_lists.cpp sort/sort_structs.cpp)
ConfigureNVBench(
SORT_NVBENCH sort/rank_lists.cpp sort/rank_structs.cpp sort/segmented_sort.cpp
sort/sort_lists.cpp sort/sort_structs.cpp
)

# ##################################################################################################
# * quantiles benchmark
Expand Down
83 changes: 83 additions & 0 deletions cpp/benchmarks/sort/nested_types_common.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/rmm_pool_raii.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <nvbench/nvbench.cuh>

#include <random>

inline std::unique_ptr<cudf::table> create_lists_data(nvbench::state& state)
{
const size_t size_bytes(state.get_int64("size_bytes"));
const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
auto const null_frequency{state.get_float64("null_frequency")};

data_profile table_profile;
table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
table_profile.set_list_depth(depth);
table_profile.set_null_probability(null_frequency);
return create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
}

inline std::unique_ptr<cudf::table> create_structs_data(nvbench::state& state,
cudf::size_type const n_cols = 1)
{
using Type = int;
using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
std::default_random_engine generator;
std::uniform_int_distribution<int> distribution(0, 100);

const cudf::size_type n_rows{static_cast<cudf::size_type>(state.get_int64("NumRows"))};
const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("Depth"))};
const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};

// Create columns with values in the range [0,100)
std::vector<column_wrapper> columns;
columns.reserve(n_cols);
std::generate_n(std::back_inserter(columns), n_cols, [&]() {
auto const elements = cudf::detail::make_counting_transform_iterator(
0, [&](auto row) { return distribution(generator); });
if (!nulls) return column_wrapper(elements, elements + n_rows);
auto valids =
cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
return column_wrapper(elements, elements + n_rows, valids);
});

std::vector<std::unique_ptr<cudf::column>> cols;
std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
return col.release();
});

std::vector<std::unique_ptr<cudf::column>> child_cols = std::move(cols);
// Nest the child columns in a struct, then nest that struct column inside another
// struct column up to the desired depth
for (int i = 0; i < depth; i++) {
std::vector<bool> struct_validity;
std::uniform_int_distribution<int> bool_distribution(0, 100 * (i + 1));
std::generate_n(
std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
child_cols = std::vector<std::unique_ptr<cudf::column>>{};
child_cols.push_back(struct_col.release());
}

// Create table view
return std::make_unique<cudf::table>(std::move(child_cols));
}
4 changes: 2 additions & 2 deletions cpp/benchmarks/sort/rank.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -33,7 +33,7 @@ static void BM_rank(benchmark::State& state, bool nulls)
// Create columns with values in the range [0,100)
data_profile profile = data_profile_builder().cardinality(0).distribution(
cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
profile.set_null_probability(nulls ? std::optional{0.01} : std::nullopt);
profile.set_null_probability(nulls ? std::optional{0.2} : std::nullopt);
auto keys = create_random_column(cudf::type_to_id<Type>(), row_count{n_rows}, profile);

for (auto _ : state) {
Expand Down
83 changes: 83 additions & 0 deletions cpp/benchmarks/sort/rank_lists.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "nested_types_common.hpp"

#include <cudf/sorting.hpp>

#include <nvbench/nvbench.cuh>

void nvbench_rank_lists(nvbench::state& state, cudf::rank_method method)
divyegala marked this conversation as resolved.
Show resolved Hide resolved
{
cudf::rmm_pool_raii pool_raii;

auto const table = create_lists_data(state);

auto const null_frequency{state.get_float64("null_frequency")};

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::rank(table->view().column(0),
method,
cudf::order::ASCENDING,
null_frequency ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
cudf::null_order::AFTER,
rmm::mr::get_current_device_resource());
});
}

void nvbench_rank_lists_first(nvbench::state& state)
{
nvbench_rank_lists(state, cudf::rank_method::FIRST);
}
divyegala marked this conversation as resolved.
Show resolved Hide resolved

void nvbench_rank_lists_dense(nvbench::state& state)
{
nvbench_rank_lists(state, cudf::rank_method::DENSE);
}

void nvbench_rank_lists_min(nvbench::state& state)
{
nvbench_rank_lists(state, cudf::rank_method::MIN);
}

void nvbench_rank_lists_average(nvbench::state& state)
{
nvbench_rank_lists(state, cudf::rank_method::AVERAGE);
}

NVBENCH_BENCH(nvbench_rank_lists_first)
.set_name("rank_lists_first")
.add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
.add_int64_axis("depth", {1, 4})
.add_float64_axis("null_frequency", {0, 0.2});

NVBENCH_BENCH(nvbench_rank_lists_dense)
.set_name("rank_lists_dense")
.add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
.add_int64_axis("depth", {1, 4})
.add_float64_axis("null_frequency", {0, 0.2});

NVBENCH_BENCH(nvbench_rank_lists_min)
.set_name("rank_lists_min")
.add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
.add_int64_axis("depth", {1, 4})
.add_float64_axis("null_frequency", {0, 0.2});

NVBENCH_BENCH(nvbench_rank_lists_average)
.set_name("rank_lists_average")
.add_int64_power_of_two_axis("size_bytes", {10, 18, 24, 28})
.add_int64_axis("depth", {1, 4})
.add_float64_axis("null_frequency", {0, 0.2});
83 changes: 83 additions & 0 deletions cpp/benchmarks/sort/rank_structs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "nested_types_common.hpp"

#include <cudf/sorting.hpp>

#include <nvbench/nvbench.cuh>

void nvbench_rank_structs(nvbench::state& state, cudf::rank_method method)
{
cudf::rmm_pool_raii pool_raii;

auto const table = create_structs_data(state);

auto const null_frequency{state.get_float64("null_frequency")};

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::rank(table->view().column(0),
method,
cudf::order::ASCENDING,
null_frequency ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE,
cudf::null_order::AFTER,
rmm::mr::get_current_device_resource());
});
}

void nvbench_rank_structs_first(nvbench::state& state)
{
nvbench_rank_structs(state, cudf::rank_method::FIRST);
}

void nvbench_rank_structs_dense(nvbench::state& state)
{
nvbench_rank_structs(state, cudf::rank_method::DENSE);
}

void nvbench_rank_structs_min(nvbench::state& state)
{
nvbench_rank_structs(state, cudf::rank_method::MIN);
}

void nvbench_rank_structs_average(nvbench::state& state)
{
nvbench_rank_structs(state, cudf::rank_method::AVERAGE);
}

NVBENCH_BENCH(nvbench_rank_structs_first)
.set_name("rank_structs_first")
.add_int64_power_of_two_axis("NumRows", {10, 18, 26})
.add_int64_axis("Depth", {0, 1, 8})
.add_int64_axis("Nulls", {0, 1});

NVBENCH_BENCH(nvbench_rank_structs_dense)
.set_name("rank_structs_dense")
.add_int64_power_of_two_axis("NumRows", {10, 18, 26})
.add_int64_axis("Depth", {0, 1, 8})
.add_int64_axis("Nulls", {0, 1});

NVBENCH_BENCH(nvbench_rank_structs_min)
.set_name("rank_structs_min")
.add_int64_power_of_two_axis("NumRows", {10, 18, 26})
.add_int64_axis("Depth", {0, 1, 8})
.add_int64_axis("Nulls", {0, 1});

NVBENCH_BENCH(nvbench_rank_structs_average)
.set_name("rank_structs_average")
.add_int64_power_of_two_axis("NumRows", {10, 18, 26})
.add_int64_axis("Depth", {0, 1, 8})
.add_int64_axis("Nulls", {0, 1});
16 changes: 3 additions & 13 deletions cpp/benchmarks/sort/sort_lists.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,8 +14,7 @@
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/rmm_pool_raii.hpp>
#include "nested_types_common.hpp"

#include <cudf/detail/sorting.hpp>

Expand All @@ -25,16 +24,7 @@ void nvbench_sort_lists(nvbench::state& state)
{
cudf::rmm_pool_raii pool_raii;

const size_t size_bytes(state.get_int64("size_bytes"));
const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("depth"))};
auto const null_frequency{state.get_float64("null_frequency")};

data_profile table_profile;
table_profile.set_distribution_params(cudf::type_id::LIST, distribution_id::UNIFORM, 0, 5);
table_profile.set_list_depth(depth);
table_profile.set_null_probability(null_frequency);
auto const table =
create_random_table({cudf::type_id::LIST}, table_size_bytes{size_bytes}, table_profile);
auto const table = create_lists_data(state);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream_view{launch.get_stream()};
Expand Down
52 changes: 4 additions & 48 deletions cpp/benchmarks/sort/sort_structs.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,65 +14,21 @@
* limitations under the License.
*/

#include <benchmarks/fixture/rmm_pool_raii.hpp>

#include <cudf_test/column_wrapper.hpp>
#include "nested_types_common.hpp"

#include <cudf/detail/sorting.hpp>

#include <nvbench/nvbench.cuh>

#include <random>

void nvbench_sort_struct(nvbench::state& state)
{
cudf::rmm_pool_raii pool_raii;

using Type = int;
using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
std::default_random_engine generator;
std::uniform_int_distribution<int> distribution(0, 100);

const cudf::size_type n_rows{static_cast<cudf::size_type>(state.get_int64("NumRows"))};
const cudf::size_type n_cols{1};
const cudf::size_type depth{static_cast<cudf::size_type>(state.get_int64("Depth"))};
const bool nulls{static_cast<bool>(state.get_int64("Nulls"))};

// Create columns with values in the range [0,100)
std::vector<column_wrapper> columns;
columns.reserve(n_cols);
std::generate_n(std::back_inserter(columns), n_cols, [&]() {
auto const elements = cudf::detail::make_counting_transform_iterator(
0, [&](auto row) { return distribution(generator); });
if (!nulls) return column_wrapper(elements, elements + n_rows);
auto valids =
cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 10 != 0; });
return column_wrapper(elements, elements + n_rows, valids);
});

std::vector<std::unique_ptr<cudf::column>> cols;
std::transform(columns.begin(), columns.end(), std::back_inserter(cols), [](column_wrapper& col) {
return col.release();
});

std::vector<std::unique_ptr<cudf::column>> child_cols = std::move(cols);
// Lets add some layers
for (int i = 0; i < depth; i++) {
std::vector<bool> struct_validity;
std::uniform_int_distribution<int> bool_distribution(0, 100 * (i + 1));
std::generate_n(
std::back_inserter(struct_validity), n_rows, [&]() { return bool_distribution(generator); });
cudf::test::structs_column_wrapper struct_col(std::move(child_cols), struct_validity);
child_cols = std::vector<std::unique_ptr<cudf::column>>{};
child_cols.push_back(struct_col.release());
}

// Create table view
auto const input = cudf::table(std::move(child_cols));
auto const input = create_structs_data(state);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream_view{launch.get_stream()};
cudf::detail::sorted_order(input, {}, {}, stream_view, rmm::mr::get_current_device_resource());
cudf::detail::sorted_order(*input, {}, {}, stream_view, rmm::mr::get_current_device_resource());
});
}

Expand Down
Loading