Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update generation and flattening of dendrogram in Leiden #4347

Merged
27 changes: 0 additions & 27 deletions cpp/src/community/flatten_dendrogram.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,31 +60,4 @@ void partition_at_level(raft::handle_t const& handle,
});
}

template <typename vertex_t, bool multi_gpu>
void leiden_partition_at_level(raft::handle_t const& handle,
Dendrogram<vertex_t> const& dendrogram,
vertex_t* d_partition,
size_t level)
{
vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0);
raft::copy(
d_partition, dendrogram.get_level_ptr_nocheck(0), local_num_verts, handle.get_stream());

rmm::device_uvector<vertex_t> local_vertex_ids_v(local_num_verts, handle.get_stream());

std::for_each(
thrust::make_counting_iterator<size_t>(0),
thrust::make_counting_iterator<size_t>((level - 1) / 2),
[&handle, &dendrogram, &local_vertex_ids_v, &d_partition, local_num_verts](size_t l) {
cugraph::relabel<vertex_t, multi_gpu>(
handle,
std::tuple<vertex_t const*, vertex_t const*>(dendrogram.get_level_ptr_nocheck(2 * l + 1),
dendrogram.get_level_ptr_nocheck(2 * l + 2)),
dendrogram.get_level_size_nocheck(2 * l + 1),
d_partition,
local_num_verts,
false);
});
}

} // namespace cugraph
131 changes: 96 additions & 35 deletions cpp/src/community/leiden_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(

rmm::device_uvector<vertex_t> louvain_of_refined_graph(0, handle.get_stream()); // #V

while (dendrogram->num_levels() < 2 * max_level + 1) {
while (dendrogram->num_levels() < max_level) {
//
// Initialize every cluster to reference each vertex to itself
//
Expand Down Expand Up @@ -249,8 +249,8 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
detail::timer_start<graph_view_t::is_multi_gpu>(handle, hr_timer, "update_clustering");
#endif

rmm::device_uvector<vertex_t> louvain_assignment_for_vertices =
rmm::device_uvector<vertex_t>(dendrogram->current_level_size(), handle.get_stream());
rmm::device_uvector<vertex_t> louvain_assignment_for_vertices(dendrogram->current_level_size(),
handle.get_stream());

raft::copy(louvain_assignment_for_vertices.begin(),
dendrogram->current_level_begin(),
Expand Down Expand Up @@ -427,6 +427,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
dst_louvain_assignment_cache,
up_down);
}

// Clear buffer and contract the graph

cluster_keys.resize(0, handle.get_stream());
Expand All @@ -452,8 +453,8 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(

if (nr_unique_leiden < current_graph_view.number_of_vertices()) {
// Create aggregate graph based on refined (leiden) partition
std::optional<rmm::device_uvector<vertex_t>> cluster_assignment{std::nullopt};
std::tie(coarse_graph, coarsen_graph_edge_weight, cluster_assignment) =
std::optional<rmm::device_uvector<vertex_t>> numbering_map{std::nullopt};
std::tie(coarse_graph, coarsen_graph_edge_weight, numbering_map) =
coarsen_graph(handle,
current_graph_view,
current_edge_weight_view,
Expand All @@ -466,34 +467,99 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
std::make_optional<edge_property_view_t<edge_t, weight_t const*>>(
(*coarsen_graph_edge_weight).view());

// cluster_assignment contains leiden cluster ids of aggregated nodes
// After call to relabel, cluster_assignment will louvain cluster ids
// of the aggregated nodes
// FIXME: reconsider what's put into dendrogram->current_level_begin()
// at what point in the code. I'm just going to overwrite it here,
// so perhaps it should be in different structures until now

// New approach, mimic Louvain, we'll store the Leiden results in the dendrogram
raft::copy(dendrogram->current_level_begin(),
refined_leiden_partition.data(),
refined_leiden_partition.size(),
handle.get_stream());

louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(),
handle.get_stream());
rmm::device_uvector<vertex_t> numeric_sequence(
current_graph_view.local_vertex_partition_range_size(), handle.get_stream());

detail::sequence_fill(handle.get_stream(),
numeric_sequence.data(),
numeric_sequence.size(),
current_graph_view.local_vertex_partition_range_first());

relabel<vertex_t, multi_gpu>(
handle,
std::make_tuple(static_cast<vertex_t const*>((*numbering_map).begin()),
static_cast<vertex_t const*>(numeric_sequence.begin())),
(*numbering_map).size(),
dendrogram->current_level_begin(),
dendrogram->current_level_size(),
false);

raft::copy(louvain_of_refined_graph.begin(),
numbering_map->data(),
numbering_map->size(),
handle.get_stream());

relabel<vertex_t, multi_gpu>(
handle,
std::make_tuple(static_cast<vertex_t const*>(leiden_to_louvain_map.first.begin()),
static_cast<vertex_t const*>(leiden_to_louvain_map.second.begin())),
leiden_to_louvain_map.first.size(),
(*cluster_assignment).data(),
(*cluster_assignment).size(),
louvain_of_refined_graph.data(),
louvain_of_refined_graph.size(),
false);
// louvain assignment of aggregated graph which is necessary to flatten dendrogram
dendrogram->add_level(current_graph_view.local_vertex_partition_range_first(),
current_graph_view.local_vertex_partition_range_size(),
handle.get_stream());

raft::copy(dendrogram->current_level_begin(),
(*cluster_assignment).begin(),
(*cluster_assignment).size(),
// Relabel clusters so that each cluster is identified by the lowest vertex id
// that is assigned to it. Note that numbering_map and numeric_sequence go out
// of scope at the end of this block, we will reuse their memory
raft::copy(numbering_map->begin(),
louvain_of_refined_graph.data(),
louvain_of_refined_graph.size(),
handle.get_stream());

louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(),
handle.get_stream());
thrust::sort(handle.get_thrust_policy(),
thrust::make_zip_iterator(numbering_map->begin(), numeric_sequence.begin()),
thrust::make_zip_iterator(numbering_map->end(), numeric_sequence.end()));

raft::copy(louvain_of_refined_graph.begin(),
(*cluster_assignment).begin(),
(*cluster_assignment).size(),
handle.get_stream());
size_t new_size = thrust::distance(numbering_map->begin(),
thrust::unique_by_key(handle.get_thrust_policy(),
numbering_map->begin(),
numbering_map->end(),
numeric_sequence.begin())
.first);

numbering_map->resize(new_size, handle.get_stream());
numeric_sequence.resize(new_size, handle.get_stream());

if constexpr (multi_gpu) {
std::tie(*numbering_map, numeric_sequence) =
shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning(
handle, std::move(*numbering_map), std::move(numeric_sequence));

thrust::sort(handle.get_thrust_policy(),
thrust::make_zip_iterator(numbering_map->begin(), numeric_sequence.begin()),
thrust::make_zip_iterator(numbering_map->end(), numeric_sequence.end()));

size_t new_size = thrust::distance(numbering_map->begin(),
thrust::unique_by_key(handle.get_thrust_policy(),
numbering_map->begin(),
numbering_map->end(),
numeric_sequence.begin())
.first);

numbering_map->resize(new_size, handle.get_stream());
numeric_sequence.resize(new_size, handle.get_stream());
}

relabel<vertex_t, multi_gpu>(
handle,
std::make_tuple(static_cast<vertex_t const*>((*numbering_map).begin()),
static_cast<vertex_t const*>(numeric_sequence.begin())),
(*numbering_map).size(),
louvain_of_refined_graph.data(),
louvain_of_refined_graph.size(),
false);
}
}

Expand Down Expand Up @@ -565,20 +631,15 @@ void flatten_leiden_dendrogram(raft::handle_t const& handle,
Dendrogram<vertex_t> const& dendrogram,
vertex_t* clustering)
{
leiden_partition_at_level<vertex_t, multi_gpu>(
handle, dendrogram, clustering, dendrogram.num_levels());
rmm::device_uvector<vertex_t> vertex_ids_v(graph_view.number_of_vertices(), handle.get_stream());

rmm::device_uvector<vertex_t> unique_cluster_ids(graph_view.local_vertex_partition_range_size(),
handle.get_stream());
thrust::copy(handle.get_thrust_policy(),
clustering,
clustering + graph_view.local_vertex_partition_range_size(),
unique_cluster_ids.begin());

remove_duplicates<vertex_t, multi_gpu>(handle, unique_cluster_ids);
detail::sequence_fill(handle.get_stream(),
vertex_ids_v.begin(),
vertex_ids_v.size(),
graph_view.local_vertex_partition_range_first());

relabel_cluster_ids<vertex_t, multi_gpu>(
handle, unique_cluster_ids, clustering, graph_view.local_vertex_partition_range_size());
partition_at_level<vertex_t, multi_gpu>(
handle, dendrogram, vertex_ids_v.data(), clustering, dendrogram.num_levels());
}

} // namespace detail
Expand Down
6 changes: 3 additions & 3 deletions python/cugraph/cugraph/tests/community/test_leiden.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2023, NVIDIA CORPORATION.
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -83,8 +83,8 @@
"input_type": "CSR",
"expected_output": {
# fmt: off
"partition": [3, 3, 3, 3, 2, 2, 2, 3, 1, 3, 2, 3, 3, 3, 1, 1, 2, 3, 1, 3,
1, 3, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1],
"partition": [0, 0, 0, 0, 3, 3, 3, 0, 1, 0, 3, 0, 0, 0, 1, 1, 3, 0, 1, 0,
1, 0, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1],
# fmt: on
"modularity_score": 0.41880345,
},
Expand Down
Loading