Skip to content

Commit

Permalink
Update primitive to compute weighted Jaccard, Sorensen and Overlap si…
Browse files Browse the repository at this point in the history
…milarity (#3728)

This PR
- changes `per_v_pair_transform_dst_nbr_intersection` to support computing weighted intersection
- updates implementation of `similarity`, `jaccard_coefficients`, `sorensen_coefficients`, `overlap_coefficients` for weighted  graphs

NOTE: current implementation doesn't support computing similarity for multi-edge graphs.

closes #2748
closes #3477

Authors:
  - Naim (https://github.com/naimnv)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Seunghwa Kang (https://github.com/seunghwak)
  - Joseph Nke (https://github.com/jnke2016)

URL: #3728
  • Loading branch information
naimnv authored Jul 31, 2023
1 parent be2a63e commit 14862c6
Show file tree
Hide file tree
Showing 18 changed files with 2,202 additions and 305 deletions.
2 changes: 1 addition & 1 deletion cpp/src/c_api/similarity.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down
37 changes: 11 additions & 26 deletions cpp/src/link_prediction/jaccard_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,15 @@ namespace cugraph {
namespace detail {

struct jaccard_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t cardinality_a,
weight_t cardinality_b,
weight_t cardinality_a_intersect_b) const
{
return cardinality_a_intersect_b / (cardinality_a + cardinality_b - cardinality_a_intersect_b);
}
};

struct weighted_jaccard_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t weight_a,
weight_t weight_b,
weight_t min_weight_a_intersect_b) const
weight_t weight_a_intersect_b,
weight_t weight_a_union_b) const
{
return min_weight_a_intersect_b / (weight_a + weight_b - min_weight_a_intersect_b);
return weight_a_union_b <= std::numeric_limits<weight_t>::min()
? weight_t{0}
: weight_a_intersect_b / weight_a_union_b;
}
};

Expand All @@ -55,20 +48,12 @@ rmm::device_uvector<weight_t> jaccard_coefficients(
{
CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");

if (!edge_weight_view)
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::jaccard_functor_t{},
do_expensive_check);
else
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::weighted_jaccard_functor_t{},
do_expensive_check);
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::jaccard_functor_t{},
do_expensive_check);
}

} // namespace cugraph
37 changes: 11 additions & 26 deletions cpp/src/link_prediction/overlap_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,15 @@ namespace cugraph {
namespace detail {

struct overlap_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t cardinality_a,
weight_t cardinality_b,
weight_t cardinality_a_intersect_b) const
{
return cardinality_a_intersect_b / std::min(cardinality_a, cardinality_b);
}
};

struct weighted_overlap_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t weight_a,
weight_t weight_b,
weight_t min_weight_a_intersect_b) const
weight_t weight_a_intersect_b,
weight_t weight_a_union_b) const
{
return min_weight_a_intersect_b / std::min(weight_a, weight_b);
return std::min(weight_a, weight_b) <= std::numeric_limits<weight_t>::min()
? weight_t{0}
: weight_a_intersect_b / std::min(weight_a, weight_b);
}
};

Expand All @@ -55,20 +48,12 @@ rmm::device_uvector<weight_t> overlap_coefficients(
{
CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");

if (!edge_weight_view)
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::overlap_functor_t{},
do_expensive_check);
else
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::weighted_overlap_functor_t{},
do_expensive_check);
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::overlap_functor_t{},
do_expensive_check);
}

} // namespace cugraph
105 changes: 90 additions & 15 deletions cpp/src/link_prediction/similarity_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
*/
#pragma once

#include <prims/count_if_e.cuh>
#include <prims/per_v_pair_transform_dst_nbr_intersection.cuh>
#include <prims/update_edge_src_dst_property.cuh>

#include <cugraph/graph_functions.hpp>
#include <cugraph/graph_view.hpp>

#include <raft/core/device_span.hpp>
Expand Down Expand Up @@ -51,33 +53,106 @@ rmm::device_uvector<weight_t> similarity(
auto vertex_pairs_begin =
thrust::make_zip_iterator(std::get<0>(vertex_pairs).data(), std::get<1>(vertex_pairs).data());

if (do_expensive_check) {
auto num_invalids = detail::count_invalid_vertex_pairs(
handle, graph_view, vertex_pairs_begin, vertex_pairs_begin + num_vertex_pairs);
CUGRAPH_EXPECTS(num_invalids == 0,
"Invalid input arguments: there are invalid input vertex pairs.");

if (edge_weight_view) {
auto num_negative_edge_weights =
count_if_e(handle,
graph_view,
edge_src_dummy_property_t{}.view(),
edge_dst_dummy_property_t{}.view(),
*edge_weight_view,
[] __device__(vertex_t, vertex_t, auto, auto, weight_t w) { return w < 0.0; });
CUGRAPH_EXPECTS(
num_negative_edge_weights == 0,
"Invalid input argument: input edge weights should have non-negative values.");
}
}

if (edge_weight_view) {
// FIXME: need implementation, similar to unweighted
// Use compute_out_weight_sums instead of compute_out_degrees
// Sum up for each common edge compute (u,a,v): min weight ((u,a), (a,v)) and
// max weight((u,a), (a,v)).
// Use these to compute weighted score
//
CUGRAPH_FAIL("weighted similarity computations are not supported in this release");
rmm::device_uvector<weight_t> similarity_score(num_vertex_pairs, handle.get_stream());
rmm::device_uvector<weight_t> weighted_out_degrees =
compute_out_weight_sums(handle, graph_view, *edge_weight_view);

per_v_pair_transform_dst_nbr_intersection(
handle,
graph_view,
*edge_weight_view,
vertex_pairs_begin,
vertex_pairs_begin + num_vertex_pairs,
weighted_out_degrees.begin(),
[functor] __device__(auto a,
auto b,
auto weight_a,
auto weight_b,
auto intersection,
auto intersected_properties_a,
auto intersected_properties_b) {
weight_t sum_of_min_weight_a_intersect_b = weight_t{0};
weight_t sum_of_max_weight_a_intersect_b = weight_t{0};
weight_t sum_of_intersected_a = weight_t{0};
weight_t sum_of_intersected_b = weight_t{0};

auto pair_first = thrust::make_zip_iterator(intersected_properties_a.data(),
intersected_properties_b.data());
thrust::tie(sum_of_min_weight_a_intersect_b,
sum_of_max_weight_a_intersect_b,
sum_of_intersected_a,
sum_of_intersected_b) =
thrust::transform_reduce(
thrust::seq,
pair_first,
pair_first + intersected_properties_a.size(),
[] __device__(auto property_pair) {
auto prop_a = thrust::get<0>(property_pair);
auto prop_b = thrust::get<1>(property_pair);
return thrust::make_tuple(min(prop_a, prop_b), max(prop_a, prop_b), prop_a, prop_b);
},
thrust::make_tuple(weight_t{0}, weight_t{0}, weight_t{0}, weight_t{0}),
[] __device__(auto lhs, auto rhs) {
return thrust::make_tuple(thrust::get<0>(lhs) + thrust::get<0>(rhs),
thrust::get<1>(lhs) + thrust::get<1>(rhs),
thrust::get<2>(lhs) + thrust::get<2>(rhs),
thrust::get<3>(lhs) + thrust::get<3>(rhs));
});

weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a;
weight_t sum_of_uniq_b = weight_b - sum_of_intersected_b;

sum_of_max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b;

return functor.compute_score(static_cast<weight_t>(weight_a),
static_cast<weight_t>(weight_b),
static_cast<weight_t>(sum_of_min_weight_a_intersect_b),
static_cast<weight_t>(sum_of_max_weight_a_intersect_b));
},
similarity_score.begin(),
do_expensive_check);

return similarity_score;
} else {
rmm::device_uvector<weight_t> similarity_score(num_vertex_pairs, handle.get_stream());

//
// Compute vertex_degree for all vertices, then distribute to each GPU.
// Need to use this instead of the dummy properties below
//
auto out_degrees = graph_view.compute_out_degrees(handle);

per_v_pair_transform_dst_nbr_intersection(
handle,
graph_view,
cugraph::edge_dummy_property_t{}.view(),
vertex_pairs_begin,
vertex_pairs_begin + num_vertex_pairs,
out_degrees.begin(),
[functor] __device__(auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection) {
return functor.compute_score(static_cast<weight_t>(v1_degree),
static_cast<weight_t>(v2_degree),
static_cast<weight_t>(intersection.size()));
[functor] __device__(
auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection, auto, auto) {
return functor.compute_score(
static_cast<weight_t>(v1_degree),
static_cast<weight_t>(v2_degree),
static_cast<weight_t>(intersection.size()),
static_cast<weight_t>(v1_degree + v2_degree - intersection.size()));
},
similarity_score.begin(),
do_expensive_check);
Expand Down
37 changes: 11 additions & 26 deletions cpp/src/link_prediction/sorensen_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,15 @@ namespace cugraph {
namespace detail {

struct sorensen_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t cardinality_a,
weight_t cardinality_b,
weight_t cardinality_a_intersect_b) const
{
return (2 * cardinality_a_intersect_b) / (cardinality_a + cardinality_b);
}
};

struct weighted_sorensen_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t weight_a,
weight_t weight_b,
weight_t min_weight_a_intersect_b) const
weight_t weight_a_intersect_b,
weight_t weight_a_union_b) const
{
return (2 * min_weight_a_intersect_b) / (weight_a + weight_b);
return (weight_a + weight_b) <= std::numeric_limits<weight_t>::min()
? weight_t{0}
: (2 * weight_a_intersect_b) / (weight_a + weight_b);
}
};

Expand All @@ -55,20 +48,12 @@ rmm::device_uvector<weight_t> sorensen_coefficients(
{
CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");

if (!edge_weight_view)
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::sorensen_functor_t{},
do_expensive_check);
else
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::weighted_sorensen_functor_t{},
do_expensive_check);
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::sorensen_functor_t{},
do_expensive_check);
}

} // namespace cugraph
78 changes: 1 addition & 77 deletions cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#pragma once

#include <prims/detail/optional_dataframe_buffer.hpp>
#include <prims/property_op_utils.cuh>

#include <cugraph/edge_partition_device_view.cuh>
Expand Down Expand Up @@ -60,83 +61,6 @@ namespace detail {

int32_t constexpr extract_transform_v_frontier_e_kernel_block_size = 512;

// we cannot use thrust::iterator_traits<Iterator>::value_type if Iterator is void* (reference to
// void is not allowed)
template <typename Iterator, typename Enable = void>
struct optional_dataframe_buffer_value_type_t;

template <typename Iterator>
struct optional_dataframe_buffer_value_type_t<Iterator,
std::enable_if_t<!std::is_same_v<Iterator, void*>>> {
using value = typename thrust::iterator_traits<Iterator>::value_type;
};

template <typename Iterator>
struct optional_dataframe_buffer_value_type_t<Iterator,
std::enable_if_t<std::is_same_v<Iterator, void*>>> {
using value = void;
};

template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
std::byte allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream)
{
return std::byte{0}; // dummy
}

template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
auto allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream)
{
return allocate_dataframe_buffer<T>(size, stream);
}

template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
void* get_optional_dataframe_buffer_begin(std::byte& optional_dataframe_buffer)
{
return static_cast<void*>(nullptr);
}

template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
auto get_optional_dataframe_buffer_begin(
std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer)
{
return get_dataframe_buffer_begin(optional_dataframe_buffer);
}

template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
void resize_optional_dataframe_buffer(std::byte& optional_dataframe_buffer,
size_t new_buffer_size,
rmm::cuda_stream_view stream_view)
{
return;
}

template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
void resize_optional_dataframe_buffer(
std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer,
size_t new_buffer_size,
rmm::cuda_stream_view stream_view)
{
return resize_dataframe_buffer(optional_dataframe_buffer, new_buffer_size, stream_view);
}

template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
void shrink_to_fit_optional_dataframe_buffer(std::byte& optional_dataframe_buffer,
rmm::cuda_stream_view stream_view)
{
return;
}

template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
void shrink_to_fit_optional_dataframe_buffer(
std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer,
rmm::cuda_stream_view stream_view)
{
return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view);
}

template <typename e_op_result_t,
typename BufferKeyOutputIterator,
typename BufferValueOutputIterator>
Expand Down
Loading

0 comments on commit 14862c6

Please sign in to comment.