Skip to content

Commit

Permalink
C++ implementation for unweighted Jaccard/Sorensen/Overlap (#2750)
Browse files Browse the repository at this point in the history
Closes #2543 

Implements unweighted similarity algorithms using the new primitive defined in #2728.

Weighted implementations will be tracked by issue #2749

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Naim (https://github.com/naimnv)
  - Seunghwa Kang (https://github.com/seunghwak)

URL: #2750
  • Loading branch information
ChuckHastings authored Oct 4, 2022
1 parent 2b8395a commit 4c6d038
Show file tree
Hide file tree
Showing 12 changed files with 403 additions and 166 deletions.
14 changes: 9 additions & 5 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,17 @@ endif()
################################################################################
# - libcugraph library target --------------------------------------------------

# NOTE: The most expensive compilations are listed first
# since ninja will run them in parallel in this order,
# which should give us a better parallel schedule.

set(CUGRAPH_SOURCES
src/detail/utility_wrappers.cu
src/detail/shuffle_wrappers.cu
src/sampling/random_walks_mg.cu
src/community/detail/common_methods_mg.cu
src/community/detail/common_methods_sg.cu
src/detail/utility_wrappers.cu
src/structure/graph_view_mg.cu
src/utilities/cython.cu
src/utilities/path_retrieval.cu
src/utilities/graph_bcast.cpp
Expand All @@ -181,8 +189,6 @@ set(CUGRAPH_SOURCES
src/layout/legacy/force_atlas2.cu
src/converters/legacy/COOtoCSR.cu
src/community/legacy/spectral_clustering.cu
src/community/detail/common_methods_sg.cu
src/community/detail/common_methods_mg.cu
src/community/louvain_sg.cu
src/community/louvain_mg.cu
src/community/leiden_sg.cu
Expand All @@ -195,7 +201,6 @@ set(CUGRAPH_SOURCES
src/community/legacy/egonet.cu
src/sampling/random_walks.cu
src/sampling/random_walks_sg.cu
src/sampling/random_walks_mg.cu
src/sampling/detail/sampling_utils_mg.cu
src/sampling/detail/sampling_utils_sg.cu
src/sampling/uniform_neighbor_sampling_mg.cpp
Expand All @@ -215,7 +220,6 @@ set(CUGRAPH_SOURCES
src/structure/graph_sg.cu
src/structure/graph_mg.cu
src/structure/graph_view_sg.cu
src/structure/graph_view_mg.cu
src/structure/coarsen_graph_sg.cu
src/structure/coarsen_graph_mg.cu
src/structure/renumber_edgelist_sg.cu
Expand Down
19 changes: 6 additions & 13 deletions cpp/src/link_prediction/jaccard_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,19 @@ namespace detail {

struct jaccard_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(size_t cardinality_a,
size_t cardinality_b,
size_t cardinality_a_intersect_b,
weight_t,
weight_t,
weight_t)
weight_t __device__ compute_score(weight_t cardinality_a,
weight_t cardinality_b,
weight_t cardinality_a_intersect_b) const
{
return static_cast<weight_t>(cardinality_a_intersect_b) /
static_cast<weight_t>(cardinality_a + cardinality_b - cardinality_a_intersect_b);
return cardinality_a_intersect_b / cardinality_a + cardinality_b - cardinality_a_intersect_b;
}
};

struct weighted_jaccard_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(size_t cardinality_a,
size_t cardinality_b,
size_t cardinality_a_intersect_b,
weight_t weight_a,
weight_t __device__ compute_score(weight_t weight_a,
weight_t weight_b,
weight_t min_weight_a_intersect_b)
weight_t min_weight_a_intersect_b) const
{
return min_weight_a_intersect_b / (weight_a + weight_b - min_weight_a_intersect_b);
}
Expand Down
19 changes: 6 additions & 13 deletions cpp/src/link_prediction/overlap_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,19 @@ namespace detail {

struct overlap_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(size_t cardinality_a,
size_t cardinality_b,
size_t cardinality_a_intersect_b,
weight_t,
weight_t,
weight_t)
weight_t __device__ compute_score(weight_t cardinality_a,
weight_t cardinality_b,
weight_t cardinality_a_intersect_b) const
{
return static_cast<weight_t>(cardinality_a_intersect_b) /
static_cast<weight_t>(std::min(cardinality_a, cardinality_b));
return cardinality_a_intersect_b / std::min(cardinality_a, cardinality_b);
}
};

struct weighted_overlap_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(size_t cardinality_a,
size_t cardinality_b,
size_t cardinality_a_intersect_b,
weight_t weight_a,
weight_t __device__ compute_score(weight_t weight_a,
weight_t weight_b,
weight_t min_weight_a_intersect_b)
weight_t min_weight_a_intersect_b) const
{
return min_weight_a_intersect_b / std::min(weight_a, weight_b);
}
Expand Down
64 changes: 54 additions & 10 deletions cpp/src/link_prediction/similarity_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,16 @@
*/
#pragma once

#include <raft/core/span.hpp>
#include <prims/per_v_pair_transform_dst_nbr_intersection.cuh>
#include <prims/update_edge_src_dst_property.cuh>

#include <cugraph/graph_view.hpp>

#include <raft/core/device_span.hpp>
#include <raft/handle.hpp>

#include <rmm/device_uvector.hpp>

#include <optional>
#include <tuple>

Expand All @@ -32,15 +39,52 @@ rmm::device_uvector<weight_t> similarity(
bool use_weights,
functor_t functor)
{
CUGRAPH_FAIL("not implemented");

// Implementation, using primitives, that computes:
// For use_weights == False: cardinality of A intersect B
// For use_weights == True: sum of minimum weight in A intersect B, sum of maximum weight in A
// intersect B
//
// Then use the functor to compute the score
//
using GraphViewType = graph_view_t<vertex_t, edge_t, weight_t, false, multi_gpu>;
constexpr bool do_expensive_check = false;

CUGRAPH_EXPECTS(std::get<0>(vertex_pairs).size() == std::get<1>(vertex_pairs).size(),
"vertex pairs have mismatched sizes");

if (use_weights)
CUGRAPH_EXPECTS(graph_view.is_weighted(), "attempting to use weights on an unweighted graph");

size_t num_vertex_pairs = std::get<0>(vertex_pairs).size();
auto vertex_pairs_begin =
thrust::make_zip_iterator(std::get<0>(vertex_pairs).data(), std::get<1>(vertex_pairs).data());

if (use_weights) {
// FIXME: need implementation, similar to unweighted
// Use compute_out_weight_sums instead of compute_out_degrees
// Sum up for each common edge compute (u,a,v): min weight ((u,a), (a,v)) and
// max weight((u,a), (a,v)).
// Use these to compute weighted score
//
CUGRAPH_FAIL("weighted similarity computations are not supported in this release");
} else {
rmm::device_uvector<weight_t> similarity_score(num_vertex_pairs, handle.get_stream());

//
// Compute vertex_degree for all vertices, then distribute to each GPU.
// Need to use this instead of the dummy properties below
//
auto out_degrees = graph_view.compute_out_degrees(handle);

per_v_pair_transform_dst_nbr_intersection(
handle,
graph_view,
vertex_pairs_begin,
vertex_pairs_begin + num_vertex_pairs,
out_degrees.begin(),
[functor] __device__(auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection) {
return functor.compute_score(static_cast<weight_t>(v1_degree),
static_cast<weight_t>(v2_degree),
static_cast<weight_t>(intersection.size()));
},
similarity_score.begin(),
do_expensive_check);

return similarity_score;
}
}

} // namespace detail
Expand Down
19 changes: 6 additions & 13 deletions cpp/src/link_prediction/sorensen_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,19 @@ namespace detail {

struct sorensen_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(size_t cardinality_a,
size_t cardinality_b,
size_t cardinality_a_intersect_b,
weight_t,
weight_t,
weight_t)
weight_t __device__ compute_score(weight_t cardinality_a,
weight_t cardinality_b,
weight_t cardinality_a_intersect_b) const
{
return static_cast<weight_t>(2 * cardinality_a_intersect_b) /
static_cast<weight_t>(cardinality_a + cardinality_b);
return (2 * cardinality_a_intersect_b) / (cardinality_a + cardinality_b);
}
};

struct weighted_sorensen_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(size_t cardinality_a,
size_t cardinality_b,
size_t cardinality_a_intersect_b,
weight_t weight_a,
weight_t __device__ compute_score(weight_t weight_a,
weight_t weight_b,
weight_t min_weight_a_intersect_b)
weight_t min_weight_a_intersect_b) const
{
return (2 * min_weight_a_intersect_b) / (weight_a + weight_b);
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ struct call_intersection_op_t {
auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset;
auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset;
src_prop = *(vertex_property_first + src_offset);
dst_prop = *(vertex_property_first + src_offset);
dst_prop = *(vertex_property_first + dst_offset);
}
*(major_minor_pair_value_output_first + index) =
evaluate_intersection_op<GraphViewType,
Expand Down Expand Up @@ -264,7 +264,7 @@ void per_v_pair_transform_dst_nbr_intersection(
std::vector<vertex_t> h_edge_partition_major_range_lasts(
graph_view.number_of_local_edge_partitions());
for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) {
h_edge_partition_major_range_lasts[i] = graph_view.local_edge_partition_src_range_first(i);
h_edge_partition_major_range_lasts[i] = graph_view.local_edge_partition_src_range_last(i);
}
rmm::device_uvector<vertex_t> d_edge_partition_major_range_lasts(
h_edge_partition_major_range_lasts.size(), handle.get_stream());
Expand Down
Loading

0 comments on commit 4c6d038

Please sign in to comment.