Skip to content

Commit

Permalink
Improve MG graph creation (#2044)
Browse files Browse the repository at this point in the history
Improve multi-node multi-GPU scalability of graph creation (especially the code computing renumber_map)
Fix an overflow bug when creating a graph with more than 2^31 vertices

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Kumar Aatish (https://github.com/kaatish)

URL: #2044
  • Loading branch information
seunghwak authored Feb 9, 2022
1 parent ff88061 commit 2cbf6c5
Show file tree
Hide file tree
Showing 5 changed files with 409 additions and 341 deletions.
11 changes: 10 additions & 1 deletion cpp/include/cugraph/detail/graph_utils.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -77,5 +77,14 @@ struct compute_partition_id_from_edge_t {
}
};

template <typename vertex_t>
struct is_first_in_run_t {
vertex_t const* vertices{nullptr};
__device__ bool operator()(size_t i) const
{
return (i == 0) || (vertices[i - 1] != vertices[i]);
}
};

} // namespace detail
} // namespace cugraph
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,6 @@ struct call_key_aggregated_e_op_t {
}
};

// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
template <typename vertex_t>
struct is_first_in_run_t {
vertex_t const* major_vertices{nullptr};
__device__ bool operator()(size_t i) const
{
return ((i == 0) || (major_vertices[i] != major_vertices[i - 1])) ? true : false;
}
};

// a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used
template <typename vertex_t>
struct is_valid_vertex_t {
Expand Down
68 changes: 34 additions & 34 deletions cpp/include/cugraph/utilities/shuffle_comm.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -22,6 +22,7 @@
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/binary_search.h>
#include <thrust/distance.h>
#include <thrust/fill.h>
#include <thrust/reduce.h>
Expand All @@ -36,6 +37,23 @@ namespace cugraph {

namespace detail {

template <typename GroupIdIterator>
struct compute_group_id_count_pair_t {
GroupIdIterator group_id_first{};
GroupIdIterator group_id_last{};

__device__ thrust::tuple<int, size_t> operator()(size_t i) const
{
static_assert(
std::is_same_v<typename thrust::iterator_traits<GroupIdIterator>::value_type, int>);
auto lower_it =
thrust::lower_bound(thrust::seq, group_id_first, group_id_last, static_cast<int>(i));
auto upper_it = thrust::upper_bound(thrust::seq, lower_it, group_id_last, static_cast<int>(i));
return thrust::make_tuple(static_cast<int>(i),
static_cast<size_t>(thrust::distance(lower_it, upper_it)));
}
};

// inline to suppress a complaint about ODR violation
inline std::tuple<std::vector<size_t>,
std::vector<size_t>,
Expand Down Expand Up @@ -128,23 +146,14 @@ rmm::device_uvector<size_t> groupby_and_count(ValueIterator tx_value_first /* [I
[value_to_group_id_op] __device__(auto value) { return value_to_group_id_op(value); });
rmm::device_uvector<int> d_tx_dst_ranks(num_groups, stream_view);
rmm::device_uvector<size_t> d_tx_value_counts(d_tx_dst_ranks.size(), stream_view);
auto last =
thrust::reduce_by_key(rmm::exec_policy(stream_view),
group_id_first,
group_id_first + thrust::distance(tx_value_first, tx_value_last),
thrust::make_constant_iterator(size_t{1}),
d_tx_dst_ranks.begin(),
d_tx_value_counts.begin());
if (thrust::distance(d_tx_dst_ranks.begin(), thrust::get<0>(last)) < num_groups) {
rmm::device_uvector<size_t> d_counts(num_groups, stream_view);
thrust::fill(rmm::exec_policy(stream_view), d_counts.begin(), d_counts.end(), size_t{0});
thrust::scatter(rmm::exec_policy(stream_view),
d_tx_value_counts.begin(),
thrust::get<1>(last),
d_tx_dst_ranks.begin(),
d_counts.begin());
d_tx_value_counts = std::move(d_counts);
}
auto rank_count_pair_first = thrust::make_zip_iterator(
thrust::make_tuple(d_tx_dst_ranks.begin(), d_tx_value_counts.begin()));
thrust::tabulate(
rmm::exec_policy(stream_view),
rank_count_pair_first,
rank_count_pair_first + num_groups,
detail::compute_group_id_count_pair_t<decltype(group_id_first)>{
group_id_first, group_id_first + thrust::distance(tx_value_first, tx_value_last)});

return d_tx_value_counts;
}
Expand All @@ -169,22 +178,13 @@ rmm::device_uvector<size_t> groupby_and_count(VertexIterator tx_key_first /* [IN
tx_key_first, [key_to_group_id_op] __device__(auto key) { return key_to_group_id_op(key); });
rmm::device_uvector<int> d_tx_dst_ranks(num_groups, stream_view);
rmm::device_uvector<size_t> d_tx_value_counts(d_tx_dst_ranks.size(), stream_view);
auto last = thrust::reduce_by_key(rmm::exec_policy(stream_view),
group_id_first,
group_id_first + thrust::distance(tx_key_first, tx_key_last),
thrust::make_constant_iterator(size_t{1}),
d_tx_dst_ranks.begin(),
d_tx_value_counts.begin());
if (thrust::distance(d_tx_dst_ranks.begin(), thrust::get<0>(last)) < num_groups) {
rmm::device_uvector<size_t> d_counts(num_groups, stream_view);
thrust::fill(rmm::exec_policy(stream_view), d_counts.begin(), d_counts.end(), size_t{0});
thrust::scatter(rmm::exec_policy(stream_view),
d_tx_value_counts.begin(),
thrust::get<1>(last),
d_tx_dst_ranks.begin(),
d_counts.begin());
d_tx_value_counts = std::move(d_counts);
}
auto rank_count_pair_first = thrust::make_zip_iterator(
thrust::make_tuple(d_tx_dst_ranks.begin(), d_tx_value_counts.begin()));
thrust::tabulate(rmm::exec_policy(stream_view),
rank_count_pair_first,
rank_count_pair_first + num_groups,
detail::compute_group_id_count_pair_t<decltype(group_id_first)>{
group_id_first, group_id_first + thrust::distance(tx_key_first, tx_key_last)});

return d_tx_value_counts;
}
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/generators/generate_rmat_edgelist.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -94,8 +94,8 @@ std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> generat
}
}
}
src += src_bit_set ? static_cast<vertex_t>(1 << bit) : 0;
dst += dst_bit_set ? static_cast<vertex_t>(1 << bit) : 0;
src += src_bit_set ? static_cast<vertex_t>(vertex_t{1} << bit) : 0;
dst += dst_bit_set ? static_cast<vertex_t>(vertex_t{1} << bit) : 0;
}
return thrust::make_tuple(src, dst);
});
Expand Down
Loading

0 comments on commit 2cbf6c5

Please sign in to comment.