From c6cc992fc6c458da3e17d0ae6d5f130e5106949e Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Mon, 8 Apr 2024 09:59:59 -0700 Subject: [PATCH 1/3] check in debugging code... dgx17 is flaky --- cpp/src/community/flatten_dendrogram.hpp | 63 +++++++ cpp/src/community/leiden_impl.cuh | 200 +++++++++++++++++++++-- 2 files changed, 253 insertions(+), 10 deletions(-) diff --git a/cpp/src/community/flatten_dendrogram.hpp b/cpp/src/community/flatten_dendrogram.hpp index 83aaf389612..7404ca2af95 100644 --- a/cpp/src/community/flatten_dendrogram.hpp +++ b/cpp/src/community/flatten_dendrogram.hpp @@ -39,6 +39,9 @@ void partition_at_level(raft::handle_t const& handle, raft::copy(d_partition, d_vertex_ids, local_num_verts, handle.get_stream()); + // raft::print_device_vector("before relabeling", d_partition + 409, 4, std::cout); + raft::print_device_vector("before relabeling", d_partition + 1832, 4, std::cout); + std::for_each( thrust::make_counting_iterator(0), thrust::make_counting_iterator(level), @@ -49,6 +52,18 @@ void partition_at_level(raft::handle_t const& handle, dendrogram.get_level_size_nocheck(l), dendrogram.get_level_first_index_nocheck(l)); + char label[128]; + // snprintf(label, 128, "before relabel+409 %lu", l); + snprintf(label, 128, "before relabel+1832 %lu", l); + // raft::print_device_vector(label, d_partition, local_num_verts, std::cout); + // raft::print_device_vector(label, d_partition + 409, 4, std::cout); + raft::print_device_vector(label, d_partition + 1832, 4, std::cout); + + std::cout << "dendrogram size: " << dendrogram.get_level_size_nocheck(l) << std::endl; + if (dendrogram.get_level_size_nocheck(l) > 20) + raft::print_device_vector( + " dendrogram", dendrogram.get_level_ptr_nocheck(l) + 15, 4, std::cout); + cugraph::relabel( handle, std::tuple(local_vertex_ids_v.data(), @@ -57,6 +72,12 @@ void partition_at_level(raft::handle_t const& handle, d_partition, local_num_verts, false); + + // snprintf(label, 128, "after relabel+409 %lu", l); + snprintf(label, 128, "after relabel+1832 %lu", l); + // raft::print_device_vector(label, d_partition, local_num_verts, std::cout); + // raft::print_device_vector(label, d_partition + 409, 4, std::cout); + raft::print_device_vector(label, d_partition + 1832, 4, std::cout); }); } @@ -72,18 +93,60 @@ void leiden_partition_at_level(raft::handle_t const& handle, rmm::device_uvector local_vertex_ids_v(local_num_verts, handle.get_stream()); + raft::print_device_vector("before relabeling", d_partition + 409, 4, std::cout); + std::for_each( thrust::make_counting_iterator(0), thrust::make_counting_iterator((level - 1) / 2), [&handle, &dendrogram, &local_vertex_ids_v, &d_partition, local_num_verts](size_t l) { + char label[128]; + snprintf(label, 128, "before relabel+409 %lu", l); + // raft::print_device_vector(label, d_partition, local_num_verts, std::cout); + raft::print_device_vector(label, d_partition + 409, 4, std::cout); + + raft::print_device_vector(" p1", + dendrogram.get_level_ptr_nocheck(2 * l + 1), + dendrogram.get_level_size_nocheck(2 * l + 1), + std::cout); + raft::print_device_vector(" p2", + dendrogram.get_level_ptr_nocheck(2 * l + 2), + dendrogram.get_level_size_nocheck(2 * l + 2), + std::cout); + + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(1), + [p1 = dendrogram.get_level_ptr_nocheck(2 * l + 1), + p2 = dendrogram.get_level_ptr_nocheck(2 * l + 2), + size = dendrogram.get_level_size_nocheck(2 * l + 1)] __device__(auto) { + for (size_t i = 0; i < size; ++i) { +#if 0 + if (p1[i] == 2410) { +#else + if (p2[i] == 2410) { +#endif + printf("%lu: p1 = %d, p2 = %d\n", i, (int)p1[i], (int)p2[i]); + } + } + }); + cugraph::relabel( handle, +#if 0 std::tuple(dendrogram.get_level_ptr_nocheck(2 * l + 1), dendrogram.get_level_ptr_nocheck(2 * l + 2)), +#else + std::tuple(dendrogram.get_level_ptr_nocheck(2 * l + 2), + dendrogram.get_level_ptr_nocheck(2 * l + 1)), +#endif dendrogram.get_level_size_nocheck(2 * l + 1), d_partition, local_num_verts, false); + + snprintf(label, 128, "after relabel+409 %lu", l); + // raft::print_device_vector(label, d_partition, local_num_verts, std::cout); + raft::print_device_vector(label, d_partition + 409, 4, std::cout); }); } diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index 499724583a9..42757f932bd 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -362,6 +362,14 @@ std::pair>, weight_t> leiden( } } +#if 1 + std::cout << "level = " << dendrogram->num_levels() << std::endl; + raft::print_device_vector(" louvain output", + dendrogram->current_level_begin(), + louvain_assignment_for_vertices.size(), + std::cout); +#endif + #ifdef TIMING detail::timer_stop(handle, hr_timer); #endif @@ -427,6 +435,11 @@ std::pair>, weight_t> leiden( dst_louvain_assignment_cache, up_down); } + +#if 0 + raft::print_device_vector(" after refinement output", refined_leiden_partition.data(), refined_leiden_partition.size(), std::cout); +#endif + // Clear buffer and contract the graph cluster_keys.resize(0, handle.get_stream()); @@ -452,49 +465,182 @@ std::pair>, weight_t> leiden( if (nr_unique_leiden < current_graph_view.number_of_vertices()) { // Create aggregate graph based on refined (leiden) partition - std::optional> cluster_assignment{std::nullopt}; - std::tie(coarse_graph, coarsen_graph_edge_weight, cluster_assignment) = + std::optional> numbering_map{std::nullopt}; + std::tie(coarse_graph, coarsen_graph_edge_weight, numbering_map) = coarsen_graph(handle, current_graph_view, current_edge_weight_view, refined_leiden_partition.data(), true); + std::cout << "after coarsen_graph" << std::endl; + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(1), + [nm = numbering_map->begin(), + size = numbering_map->size(), + old_louvain = dendrogram->current_level_begin(), + old_louvain_size = dendrogram->current_level_size(), + old_leiden = refined_leiden_partition.data(), + l2l_map_first = leiden_to_louvain_map.first.data(), + l2l_map_second = leiden_to_louvain_map.second.begin(), + l2l_map_size = leiden_to_louvain_map.first.size()] __device__(auto) { +#if 0 + printf("l2l\n"); + for (size_t i = 0 ; i < l2l_map_size ; ++i) { + printf(" %lu: (%d, %d)\n", i, (int) l2l_map_first[i], (int) l2l_map_second[i]); + } +#endif + printf("old louvain/leiden\n"); + for (size_t i = 0; i < old_louvain_size; ++i) { + // if (old_louvain[i] == 2410) + if (old_louvain[i] == 1833) + printf( + " %lu: (%d, %d)\n", i, (int)old_louvain[i], (int)old_leiden[i]); + } + }); + current_graph_view = coarse_graph.view(); current_edge_weight_view = std::make_optional>( (*coarsen_graph_edge_weight).view()); - // cluster_assignment contains leiden cluster ids of aggregated nodes - // After call to relabel, cluster_assignment will louvain cluster ids +#if 0 + // numbering_map contains leiden cluster ids of aggregated nodes + // After call to relabel, numbering_map will louvain cluster ids // of the aggregated nodes relabel( handle, std::make_tuple(static_cast(leiden_to_louvain_map.first.begin()), static_cast(leiden_to_louvain_map.second.begin())), leiden_to_louvain_map.first.size(), - (*cluster_assignment).data(), - (*cluster_assignment).size(), + (*numbering_map).data(), + (*numbering_map).size(), false); + // louvain assignment of aggregated graph which is necessary to flatten dendrogram dendrogram->add_level(current_graph_view.local_vertex_partition_range_first(), current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); raft::copy(dendrogram->current_level_begin(), - (*cluster_assignment).begin(), - (*cluster_assignment).size(), + (*numbering_map).begin(), + (*numbering_map).size(), handle.get_stream()); louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); raft::copy(louvain_of_refined_graph.begin(), - (*cluster_assignment).begin(), - (*cluster_assignment).size(), + (*numbering_map).begin(), + (*numbering_map).size(), + handle.get_stream()); +#else + // FIXME: reconsider what's put into dendrogram->current_level_begin(), since + // I'm just going to overwrite it here... + + // New approach, mimic Louvain + // We'll store the Leiden results in the dendrogram + raft::copy(dendrogram->current_level_begin(), + refined_leiden_partition.data(), + refined_leiden_partition.size(), handle.get_stream()); + + // if (dendrogram->num_levels() == 1) raft::print_device_vector(" dendrogram before + // relabel", dendrogram->current_level_begin() + 409, 4, std::cout); + if (dendrogram->num_levels() == 1) + raft::print_device_vector( + " dendrogram before relabel", dendrogram->current_level_begin() + 1832, 4, std::cout); + + louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(), + handle.get_stream()); + + detail::sequence_fill(handle.get_stream(), + louvain_of_refined_graph.data(), + louvain_of_refined_graph.size(), + current_graph_view.local_vertex_partition_range_first()); + + relabel( + handle, + std::make_tuple(static_cast((*numbering_map).begin()), + static_cast(louvain_of_refined_graph.begin())), + current_graph_view.local_vertex_partition_range_size(), + dendrogram->current_level_begin(), + dendrogram->current_level_size(), + false); + + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(1), + [d = dendrogram->current_level_begin(), + orig = refined_leiden_partition.data(), + size = dendrogram->current_level_size()] __device__(auto) { + for (size_t i = 0; i < size; ++i) { + if (d[i] < 0) + printf(" %lu: %d, orig = %d\n", i, (int)d[i], (int)orig[i]); + } + }); + + // if (dendrogram->num_levels() == 1) raft::print_device_vector(" dendrogram after + // relabel", dendrogram->current_level_begin() + 409, 4, std::cout); + if (dendrogram->num_levels() == 1) + raft::print_device_vector( + " dendrogram after relabel", dendrogram->current_level_begin() + 1832, 4, std::cout); + + relabel( + handle, + std::make_tuple(static_cast(leiden_to_louvain_map.first.begin()), + static_cast(leiden_to_louvain_map.second.begin())), + current_graph_view.local_vertex_partition_range_size(), + louvain_of_refined_graph.data(), + louvain_of_refined_graph.size(), + false); + + // + // TODO: This isn't quite right. + // + // louvain_of_refined_data is filled with values [0,n) with the new ids + // leiden_to_louvain_map.first is filled with subset of values [0,n) with old ids + // (old graph, so more vertices, n is bigger). + // leiden_to_louvain_map.second is also filled from that bigger set + // + // Can I just renumber these? I think I would need + // + // kkkk + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(1), + [d = louvain_of_refined_graph.data(), + first = leiden_to_louvain_map.first.begin(), + second = leiden_to_louvain_map.second.begin(), + f_size = leiden_to_louvain_map.first.size(), + nm = numbering_map->data(), + nm_size = numbering_map->size(), + size = louvain_of_refined_graph.size()] __device__(auto) { + printf("NUMBER MAP:\n"); + for (size_t i = 0; i < nm_size; ++i) { + printf(" %lu: %d\n", i, (int)nm[i]); + } + + printf("MAP:\n"); + for (size_t i = 0; i < f_size; ++i) { + printf(" %lu: (%d -> %d)\n", i, (int)first[i], (int)second[i]); + } + + printf("Bad values:\n"); + for (size_t i = 0; i < size; ++i) { + if (d[i] < 0) printf(" %lu: %d\n", i, (int)d[i]); + } + }); + +#endif } + } else { + raft::print_device_vector("final dendrogram level", + dendrogram->current_level_begin(), + dendrogram->current_level_size(), + std::cout); } copied_louvain_partition.resize(0, handle.get_stream()); @@ -511,6 +657,14 @@ std::pair>, weight_t> leiden( detail::timer_display(handle, hr_timer, std::cout); #endif +#if 0 + for (size_t i = 0 ; i < dendrogram->num_levels() ; ++i) { + char tmp[128]; + snprintf(tmp, 128, "dendrogram level = %lu", i); + raft::print_device_vector(tmp, dendrogram->get_level_ptr_nocheck(i), dendrogram->get_level_size_nocheck(i), std::cout); + } +#endif + return std::make_pair(std::move(dendrogram), best_modularity); } @@ -565,9 +719,20 @@ void flatten_leiden_dendrogram(raft::handle_t const& handle, Dendrogram const& dendrogram, vertex_t* clustering) { +#if 0 leiden_partition_at_level( handle, dendrogram, clustering, dendrogram.num_levels()); +#if 0 + raft::print_device_vector("clustering before relabel", clustering, graph_view.number_of_vertices(), std::cout); + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(1), + [clustering, dend = dendrogram.get_level_ptr_nocheck(0)] __device__(auto) { + printf(" clustering[410] = %d, dend[410] = %d\n", (int) clustering[410], (int) dend[410]); + }); +#endif + rmm::device_uvector unique_cluster_ids(graph_view.local_vertex_partition_range_size(), handle.get_stream()); thrust::copy(handle.get_thrust_policy(), @@ -579,6 +744,17 @@ void flatten_leiden_dendrogram(raft::handle_t const& handle, relabel_cluster_ids( handle, unique_cluster_ids, clustering, graph_view.local_vertex_partition_range_size()); +#else + rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices(), handle.get_stream()); + + detail::sequence_fill(handle.get_stream(), + vertex_ids_v.begin(), + vertex_ids_v.size(), + graph_view.local_vertex_partition_range_first()); + + partition_at_level( + handle, dendrogram, vertex_ids_v.data(), clustering, dendrogram.num_levels()); +#endif } } // namespace detail @@ -634,6 +810,10 @@ std::pair leiden( detail::flatten_leiden_dendrogram(handle, graph_view, *dendrogram, clustering); +#if 0 + raft::print_device_vector("clustering", clustering, graph_view.number_of_vertices(), std::cout); +#endif + return std::make_pair(dendrogram->num_levels(), modularity); } From 84abe60d64e2d41afff595e00d273b2a75e41dd5 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Mon, 15 Apr 2024 17:39:01 -0700 Subject: [PATCH 2/3] Refactor how dendrogram is populated for Leiden, makes flattening deterministic --- cpp/src/community/flatten_dendrogram.hpp | 90 ----------- cpp/src/community/leiden_impl.cuh | 190 ++--------------------- 2 files changed, 10 insertions(+), 270 deletions(-) diff --git a/cpp/src/community/flatten_dendrogram.hpp b/cpp/src/community/flatten_dendrogram.hpp index 7404ca2af95..4bf6d3ed240 100644 --- a/cpp/src/community/flatten_dendrogram.hpp +++ b/cpp/src/community/flatten_dendrogram.hpp @@ -39,9 +39,6 @@ void partition_at_level(raft::handle_t const& handle, raft::copy(d_partition, d_vertex_ids, local_num_verts, handle.get_stream()); - // raft::print_device_vector("before relabeling", d_partition + 409, 4, std::cout); - raft::print_device_vector("before relabeling", d_partition + 1832, 4, std::cout); - std::for_each( thrust::make_counting_iterator(0), thrust::make_counting_iterator(level), @@ -52,18 +49,6 @@ void partition_at_level(raft::handle_t const& handle, dendrogram.get_level_size_nocheck(l), dendrogram.get_level_first_index_nocheck(l)); - char label[128]; - // snprintf(label, 128, "before relabel+409 %lu", l); - snprintf(label, 128, "before relabel+1832 %lu", l); - // raft::print_device_vector(label, d_partition, local_num_verts, std::cout); - // raft::print_device_vector(label, d_partition + 409, 4, std::cout); - raft::print_device_vector(label, d_partition + 1832, 4, std::cout); - - std::cout << "dendrogram size: " << dendrogram.get_level_size_nocheck(l) << std::endl; - if (dendrogram.get_level_size_nocheck(l) > 20) - raft::print_device_vector( - " dendrogram", dendrogram.get_level_ptr_nocheck(l) + 15, 4, std::cout); - cugraph::relabel( handle, std::tuple(local_vertex_ids_v.data(), @@ -72,81 +57,6 @@ void partition_at_level(raft::handle_t const& handle, d_partition, local_num_verts, false); - - // snprintf(label, 128, "after relabel+409 %lu", l); - snprintf(label, 128, "after relabel+1832 %lu", l); - // raft::print_device_vector(label, d_partition, local_num_verts, std::cout); - // raft::print_device_vector(label, d_partition + 409, 4, std::cout); - raft::print_device_vector(label, d_partition + 1832, 4, std::cout); - }); -} - -template -void leiden_partition_at_level(raft::handle_t const& handle, - Dendrogram const& dendrogram, - vertex_t* d_partition, - size_t level) -{ - vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0); - raft::copy( - d_partition, dendrogram.get_level_ptr_nocheck(0), local_num_verts, handle.get_stream()); - - rmm::device_uvector local_vertex_ids_v(local_num_verts, handle.get_stream()); - - raft::print_device_vector("before relabeling", d_partition + 409, 4, std::cout); - - std::for_each( - thrust::make_counting_iterator(0), - thrust::make_counting_iterator((level - 1) / 2), - [&handle, &dendrogram, &local_vertex_ids_v, &d_partition, local_num_verts](size_t l) { - char label[128]; - snprintf(label, 128, "before relabel+409 %lu", l); - // raft::print_device_vector(label, d_partition, local_num_verts, std::cout); - raft::print_device_vector(label, d_partition + 409, 4, std::cout); - - raft::print_device_vector(" p1", - dendrogram.get_level_ptr_nocheck(2 * l + 1), - dendrogram.get_level_size_nocheck(2 * l + 1), - std::cout); - raft::print_device_vector(" p2", - dendrogram.get_level_ptr_nocheck(2 * l + 2), - dendrogram.get_level_size_nocheck(2 * l + 2), - std::cout); - - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(1), - [p1 = dendrogram.get_level_ptr_nocheck(2 * l + 1), - p2 = dendrogram.get_level_ptr_nocheck(2 * l + 2), - size = dendrogram.get_level_size_nocheck(2 * l + 1)] __device__(auto) { - for (size_t i = 0; i < size; ++i) { -#if 0 - if (p1[i] == 2410) { -#else - if (p2[i] == 2410) { -#endif - printf("%lu: p1 = %d, p2 = %d\n", i, (int)p1[i], (int)p2[i]); - } - } - }); - - cugraph::relabel( - handle, -#if 0 - std::tuple(dendrogram.get_level_ptr_nocheck(2 * l + 1), - dendrogram.get_level_ptr_nocheck(2 * l + 2)), -#else - std::tuple(dendrogram.get_level_ptr_nocheck(2 * l + 2), - dendrogram.get_level_ptr_nocheck(2 * l + 1)), -#endif - dendrogram.get_level_size_nocheck(2 * l + 1), - d_partition, - local_num_verts, - false); - - snprintf(label, 128, "after relabel+409 %lu", l); - // raft::print_device_vector(label, d_partition, local_num_verts, std::cout); - raft::print_device_vector(label, d_partition + 409, 4, std::cout); }); } diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index 42757f932bd..73382ae51eb 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -362,14 +362,6 @@ std::pair>, weight_t> leiden( } } -#if 1 - std::cout << "level = " << dendrogram->num_levels() << std::endl; - raft::print_device_vector(" louvain output", - dendrogram->current_level_begin(), - louvain_assignment_for_vertices.size(), - std::cout); -#endif - #ifdef TIMING detail::timer_stop(handle, hr_timer); #endif @@ -436,10 +428,6 @@ std::pair>, weight_t> leiden( up_down); } -#if 0 - raft::print_device_vector(" after refinement output", refined_leiden_partition.data(), refined_leiden_partition.size(), std::cout); -#endif - // Clear buffer and contract the graph cluster_keys.resize(0, handle.get_stream()); @@ -473,89 +461,27 @@ std::pair>, weight_t> leiden( refined_leiden_partition.data(), true); - std::cout << "after coarsen_graph" << std::endl; - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(1), - [nm = numbering_map->begin(), - size = numbering_map->size(), - old_louvain = dendrogram->current_level_begin(), - old_louvain_size = dendrogram->current_level_size(), - old_leiden = refined_leiden_partition.data(), - l2l_map_first = leiden_to_louvain_map.first.data(), - l2l_map_second = leiden_to_louvain_map.second.begin(), - l2l_map_size = leiden_to_louvain_map.first.size()] __device__(auto) { -#if 0 - printf("l2l\n"); - for (size_t i = 0 ; i < l2l_map_size ; ++i) { - printf(" %lu: (%d, %d)\n", i, (int) l2l_map_first[i], (int) l2l_map_second[i]); - } -#endif - printf("old louvain/leiden\n"); - for (size_t i = 0; i < old_louvain_size; ++i) { - // if (old_louvain[i] == 2410) - if (old_louvain[i] == 1833) - printf( - " %lu: (%d, %d)\n", i, (int)old_louvain[i], (int)old_leiden[i]); - } - }); - current_graph_view = coarse_graph.view(); current_edge_weight_view = std::make_optional>( (*coarsen_graph_edge_weight).view()); -#if 0 - // numbering_map contains leiden cluster ids of aggregated nodes - // After call to relabel, numbering_map will louvain cluster ids - // of the aggregated nodes - relabel( - handle, - std::make_tuple(static_cast(leiden_to_louvain_map.first.begin()), - static_cast(leiden_to_louvain_map.second.begin())), - leiden_to_louvain_map.first.size(), - (*numbering_map).data(), - (*numbering_map).size(), - false); - - // louvain assignment of aggregated graph which is necessary to flatten dendrogram - dendrogram->add_level(current_graph_view.local_vertex_partition_range_first(), - current_graph_view.local_vertex_partition_range_size(), - handle.get_stream()); - - raft::copy(dendrogram->current_level_begin(), - (*numbering_map).begin(), - (*numbering_map).size(), - handle.get_stream()); - - louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(), - handle.get_stream()); - - raft::copy(louvain_of_refined_graph.begin(), - (*numbering_map).begin(), - (*numbering_map).size(), - handle.get_stream()); -#else - // FIXME: reconsider what's put into dendrogram->current_level_begin(), since - // I'm just going to overwrite it here... + // FIXME: reconsider what's put into dendrogram->current_level_begin() + // at what point in the code. I'm just going to overwrite it here, + // so perhaps it should be in different structures until now - // New approach, mimic Louvain - // We'll store the Leiden results in the dendrogram + // New approach, mimic Louvain, we'll store the Leiden results in the dendrogram raft::copy(dendrogram->current_level_begin(), refined_leiden_partition.data(), refined_leiden_partition.size(), handle.get_stream()); - // if (dendrogram->num_levels() == 1) raft::print_device_vector(" dendrogram before - // relabel", dendrogram->current_level_begin() + 409, 4, std::cout); - if (dendrogram->num_levels() == 1) - raft::print_device_vector( - " dendrogram before relabel", dendrogram->current_level_begin() + 1832, 4, std::cout); - louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + // Temporarily use louvain_of_refined_graph to be a numeric sequence to renumber the + // dendrogram. detail::sequence_fill(handle.get_stream(), louvain_of_refined_graph.data(), louvain_of_refined_graph.size(), @@ -570,23 +496,10 @@ std::pair>, weight_t> leiden( dendrogram->current_level_size(), false); - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(1), - [d = dendrogram->current_level_begin(), - orig = refined_leiden_partition.data(), - size = dendrogram->current_level_size()] __device__(auto) { - for (size_t i = 0; i < size; ++i) { - if (d[i] < 0) - printf(" %lu: %d, orig = %d\n", i, (int)d[i], (int)orig[i]); - } - }); - - // if (dendrogram->num_levels() == 1) raft::print_device_vector(" dendrogram after - // relabel", dendrogram->current_level_begin() + 409, 4, std::cout); - if (dendrogram->num_levels() == 1) - raft::print_device_vector( - " dendrogram after relabel", dendrogram->current_level_begin() + 1832, 4, std::cout); + raft::copy(louvain_of_refined_graph.begin(), + numbering_map->data(), + numbering_map->size(), + handle.get_stream()); relabel( handle, @@ -596,51 +509,7 @@ std::pair>, weight_t> leiden( louvain_of_refined_graph.data(), louvain_of_refined_graph.size(), false); - - // - // TODO: This isn't quite right. - // - // louvain_of_refined_data is filled with values [0,n) with the new ids - // leiden_to_louvain_map.first is filled with subset of values [0,n) with old ids - // (old graph, so more vertices, n is bigger). - // leiden_to_louvain_map.second is also filled from that bigger set - // - // Can I just renumber these? I think I would need - // - // kkkk - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(1), - [d = louvain_of_refined_graph.data(), - first = leiden_to_louvain_map.first.begin(), - second = leiden_to_louvain_map.second.begin(), - f_size = leiden_to_louvain_map.first.size(), - nm = numbering_map->data(), - nm_size = numbering_map->size(), - size = louvain_of_refined_graph.size()] __device__(auto) { - printf("NUMBER MAP:\n"); - for (size_t i = 0; i < nm_size; ++i) { - printf(" %lu: %d\n", i, (int)nm[i]); - } - - printf("MAP:\n"); - for (size_t i = 0; i < f_size; ++i) { - printf(" %lu: (%d -> %d)\n", i, (int)first[i], (int)second[i]); - } - - printf("Bad values:\n"); - for (size_t i = 0; i < size; ++i) { - if (d[i] < 0) printf(" %lu: %d\n", i, (int)d[i]); - } - }); - -#endif } - } else { - raft::print_device_vector("final dendrogram level", - dendrogram->current_level_begin(), - dendrogram->current_level_size(), - std::cout); } copied_louvain_partition.resize(0, handle.get_stream()); @@ -657,14 +526,6 @@ std::pair>, weight_t> leiden( detail::timer_display(handle, hr_timer, std::cout); #endif -#if 0 - for (size_t i = 0 ; i < dendrogram->num_levels() ; ++i) { - char tmp[128]; - snprintf(tmp, 128, "dendrogram level = %lu", i); - raft::print_device_vector(tmp, dendrogram->get_level_ptr_nocheck(i), dendrogram->get_level_size_nocheck(i), std::cout); - } -#endif - return std::make_pair(std::move(dendrogram), best_modularity); } @@ -719,32 +580,6 @@ void flatten_leiden_dendrogram(raft::handle_t const& handle, Dendrogram const& dendrogram, vertex_t* clustering) { -#if 0 - leiden_partition_at_level( - handle, dendrogram, clustering, dendrogram.num_levels()); - -#if 0 - raft::print_device_vector("clustering before relabel", clustering, graph_view.number_of_vertices(), std::cout); - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(1), - [clustering, dend = dendrogram.get_level_ptr_nocheck(0)] __device__(auto) { - printf(" clustering[410] = %d, dend[410] = %d\n", (int) clustering[410], (int) dend[410]); - }); -#endif - - rmm::device_uvector unique_cluster_ids(graph_view.local_vertex_partition_range_size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - clustering, - clustering + graph_view.local_vertex_partition_range_size(), - unique_cluster_ids.begin()); - - remove_duplicates(handle, unique_cluster_ids); - - relabel_cluster_ids( - handle, unique_cluster_ids, clustering, graph_view.local_vertex_partition_range_size()); -#else rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices(), handle.get_stream()); detail::sequence_fill(handle.get_stream(), @@ -754,7 +589,6 @@ void flatten_leiden_dendrogram(raft::handle_t const& handle, partition_at_level( handle, dendrogram, vertex_ids_v.data(), clustering, dendrogram.num_levels()); -#endif } } // namespace detail @@ -810,10 +644,6 @@ std::pair leiden( detail::flatten_leiden_dendrogram(handle, graph_view, *dendrogram, clustering); -#if 0 - raft::print_device_vector("clustering", clustering, graph_view.number_of_vertices(), std::cout); -#endif - return std::make_pair(dendrogram->num_levels(), modularity); } From 2e060b1fabdd754a699fbe4fbe17e56186613ad8 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Thu, 18 Apr 2024 15:48:51 -0700 Subject: [PATCH 3/3] relabel with the correct size, update pytest golden result, deterministic labeling of partitions --- cpp/src/community/leiden_impl.cuh | 71 ++++++++++++++++--- .../cugraph/tests/community/test_leiden.py | 6 +- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index 73382ae51eb..c07f9f6ffba 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -108,7 +108,7 @@ std::pair>, weight_t> leiden( rmm::device_uvector louvain_of_refined_graph(0, handle.get_stream()); // #V - while (dendrogram->num_levels() < 2 * max_level + 1) { + while (dendrogram->num_levels() < max_level) { // // Initialize every cluster to reference each vertex to itself // @@ -249,8 +249,8 @@ std::pair>, weight_t> leiden( detail::timer_start(handle, hr_timer, "update_clustering"); #endif - rmm::device_uvector louvain_assignment_for_vertices = - rmm::device_uvector(dendrogram->current_level_size(), handle.get_stream()); + rmm::device_uvector louvain_assignment_for_vertices(dendrogram->current_level_size(), + handle.get_stream()); raft::copy(louvain_assignment_for_vertices.begin(), dendrogram->current_level_begin(), @@ -479,19 +479,19 @@ std::pair>, weight_t> leiden( louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + rmm::device_uvector numeric_sequence( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); - // Temporarily use louvain_of_refined_graph to be a numeric sequence to renumber the - // dendrogram. detail::sequence_fill(handle.get_stream(), - louvain_of_refined_graph.data(), - louvain_of_refined_graph.size(), + numeric_sequence.data(), + numeric_sequence.size(), current_graph_view.local_vertex_partition_range_first()); relabel( handle, std::make_tuple(static_cast((*numbering_map).begin()), - static_cast(louvain_of_refined_graph.begin())), - current_graph_view.local_vertex_partition_range_size(), + static_cast(numeric_sequence.begin())), + (*numbering_map).size(), dendrogram->current_level_begin(), dendrogram->current_level_size(), false); @@ -505,7 +505,58 @@ std::pair>, weight_t> leiden( handle, std::make_tuple(static_cast(leiden_to_louvain_map.first.begin()), static_cast(leiden_to_louvain_map.second.begin())), - current_graph_view.local_vertex_partition_range_size(), + leiden_to_louvain_map.first.size(), + louvain_of_refined_graph.data(), + louvain_of_refined_graph.size(), + false); + + // Relabel clusters so that each cluster is identified by the lowest vertex id + // that is assigned to it. Note that numbering_map and numeric_sequence go out + // of scope at the end of this block, we will reuse their memory + raft::copy(numbering_map->begin(), + louvain_of_refined_graph.data(), + louvain_of_refined_graph.size(), + handle.get_stream()); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(numbering_map->begin(), numeric_sequence.begin()), + thrust::make_zip_iterator(numbering_map->end(), numeric_sequence.end())); + + size_t new_size = thrust::distance(numbering_map->begin(), + thrust::unique_by_key(handle.get_thrust_policy(), + numbering_map->begin(), + numbering_map->end(), + numeric_sequence.begin()) + .first); + + numbering_map->resize(new_size, handle.get_stream()); + numeric_sequence.resize(new_size, handle.get_stream()); + + if constexpr (multi_gpu) { + std::tie(*numbering_map, numeric_sequence) = + shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle, std::move(*numbering_map), std::move(numeric_sequence)); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(numbering_map->begin(), numeric_sequence.begin()), + thrust::make_zip_iterator(numbering_map->end(), numeric_sequence.end())); + + size_t new_size = thrust::distance(numbering_map->begin(), + thrust::unique_by_key(handle.get_thrust_policy(), + numbering_map->begin(), + numbering_map->end(), + numeric_sequence.begin()) + .first); + + numbering_map->resize(new_size, handle.get_stream()); + numeric_sequence.resize(new_size, handle.get_stream()); + } + + relabel( + handle, + std::make_tuple(static_cast((*numbering_map).begin()), + static_cast(numeric_sequence.begin())), + (*numbering_map).size(), louvain_of_refined_graph.data(), louvain_of_refined_graph.size(), false); diff --git a/python/cugraph/cugraph/tests/community/test_leiden.py b/python/cugraph/cugraph/tests/community/test_leiden.py index 71117c4210f..48300b2201c 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden.py +++ b/python/cugraph/cugraph/tests/community/test_leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -83,8 +83,8 @@ "input_type": "CSR", "expected_output": { # fmt: off - "partition": [3, 3, 3, 3, 2, 2, 2, 3, 1, 3, 2, 3, 3, 3, 1, 1, 2, 3, 1, 3, - 1, 3, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1], + "partition": [0, 0, 0, 0, 3, 3, 3, 0, 1, 0, 3, 0, 0, 0, 1, 1, 3, 0, 1, 0, + 1, 0, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1], # fmt: on "modularity_score": 0.41880345, },