Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update primitive to compute weighted Jaccard, Sorensen and Overlap similarity #3728

Merged
merged 26 commits into from
Jul 31, 2023
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
bfc4ac6
Debug- pass around pointer to weights
Jun 14, 2023
f4d5e51
Changes for weighted similarity, with debug statements
Jul 13, 2023
6204701
Merge branch 'branch-23.08' of github.com:rapidsai/cugraph into weigh…
Jul 14, 2023
f4dd849
weighted jaccard, sorsen and overlap tests, with debugging statements
Jul 19, 2023
e5b241c
weighted jaccard, sorsen and overlap tests, with debugging statements
Jul 20, 2023
39ce654
Merge branch 'branch-23.08' of github.com:rapidsai/cugraph into weigh…
Jul 20, 2023
2df17e0
weighted jaccard, sorsen and overlap tests, with debugging statements
Jul 20, 2023
23e91e5
Fix test for per_v_pair_transform_dst_nbr_intersection
Jul 21, 2023
261ec7a
Change test functor to check per_v_pair_transform_dst_nbr_intersectio…
Jul 21, 2023
c5dfb4c
Style fix
Jul 21, 2023
4ee3f25
Style fix
Jul 21, 2023
9726631
Style fix
Jul 21, 2023
278cd82
Style fix
Jul 21, 2023
c0e93c9
Style fix
Jul 21, 2023
c6d1412
Address PR comments
Jul 24, 2023
d2cb64f
Address PR comments part-2
Jul 25, 2023
d35fe1b
Address PR comments part-3
Jul 25, 2023
cc2197d
Address PR comments part-3
Jul 26, 2023
be7afcd
Replace several optionals with conditionals
Jul 26, 2023
4707d79
Replace several optional variables with conditional variables
Jul 27, 2023
00d9524
Update test code to use multiple common input mtx files
Jul 27, 2023
a8a5c02
Merge branch 'branch-23.08' of github.com:rapidsai/cugraph into weigh…
Jul 27, 2023
40f33ba
Move optional dataframe buffer related code to a new file
Jul 27, 2023
232d231
Address PR comments
Jul 28, 2023
73ccaff
Address additional PR comments
Jul 29, 2023
7295838
Remove duplicate code
Jul 29, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/src/c_api/similarity.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down
37 changes: 11 additions & 26 deletions cpp/src/link_prediction/jaccard_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,15 @@ namespace cugraph {
namespace detail {

struct jaccard_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t cardinality_a,
weight_t cardinality_b,
weight_t cardinality_a_intersect_b) const
{
return cardinality_a_intersect_b / (cardinality_a + cardinality_b - cardinality_a_intersect_b);
}
};

struct weighted_jaccard_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t weight_a,
weight_t weight_b,
weight_t min_weight_a_intersect_b) const
weight_t weight_a_intersect_b,
weight_t weight_a_union_b) const
{
return min_weight_a_intersect_b / (weight_a + weight_b - min_weight_a_intersect_b);
return weight_a_union_b <= std::numeric_limits<weight_t>::min()
? weight_t{0}
: weight_a_intersect_b / weight_a_union_b;
}
};

Expand All @@ -55,20 +48,12 @@ rmm::device_uvector<weight_t> jaccard_coefficients(
{
CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");

if (!edge_weight_view)
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::jaccard_functor_t{},
do_expensive_check);
else
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::weighted_jaccard_functor_t{},
do_expensive_check);
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::jaccard_functor_t{},
do_expensive_check);
}

} // namespace cugraph
37 changes: 11 additions & 26 deletions cpp/src/link_prediction/overlap_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,15 @@ namespace cugraph {
namespace detail {

struct overlap_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t cardinality_a,
weight_t cardinality_b,
weight_t cardinality_a_intersect_b) const
{
return cardinality_a_intersect_b / std::min(cardinality_a, cardinality_b);
}
};

struct weighted_overlap_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t weight_a,
weight_t weight_b,
weight_t min_weight_a_intersect_b) const
weight_t weight_a_intersect_b,
weight_t weight_a_union_b) const
{
return min_weight_a_intersect_b / std::min(weight_a, weight_b);
return std::min(weight_a, weight_b) <= std::numeric_limits<weight_t>::min()
? weight_t{0}
: weight_a_intersect_b / std::min(weight_a, weight_b);
}
};

Expand All @@ -55,20 +48,12 @@ rmm::device_uvector<weight_t> overlap_coefficients(
{
CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");

if (!edge_weight_view)
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::overlap_functor_t{},
do_expensive_check);
else
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::weighted_overlap_functor_t{},
do_expensive_check);
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::overlap_functor_t{},
do_expensive_check);
}

} // namespace cugraph
91 changes: 76 additions & 15 deletions cpp/src/link_prediction/similarity_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@
*/
#pragma once

#include <prims/count_if_e.cuh>
#include <prims/per_v_pair_transform_dst_nbr_intersection.cuh>
#include <prims/update_edge_src_dst_property.cuh>

#include <cugraph/graph_functions.hpp>
#include <cugraph/graph_view.hpp>

#include <raft/core/device_span.hpp>
Expand Down Expand Up @@ -51,33 +53,92 @@ rmm::device_uvector<weight_t> similarity(
auto vertex_pairs_begin =
thrust::make_zip_iterator(std::get<0>(vertex_pairs).data(), std::get<1>(vertex_pairs).data());

if (do_expensive_check) {
auto num_invalids = detail::count_invalid_vertex_pairs(
handle, graph_view, vertex_pairs_begin, vertex_pairs_begin + num_vertex_pairs);
CUGRAPH_EXPECTS(num_invalids == 0,
"Invalid input arguments: there are invalid input vertex pairs.");

if (edge_weight_view) {
auto num_negative_edge_weights =
count_if_e(handle,
graph_view,
edge_src_dummy_property_t{}.view(),
edge_dst_dummy_property_t{}.view(),
*edge_weight_view,
[] __device__(vertex_t, vertex_t, auto, auto, weight_t w) { return w < 0.0; });
CUGRAPH_EXPECTS(
num_negative_edge_weights == 0,
"Invalid input argument: input edge weights should have non-negative values.");
}
}

if (edge_weight_view) {
// FIXME: need implementation, similar to unweighted
// Use compute_out_weight_sums instead of compute_out_degrees
// Sum up for each common edge compute (u,a,v): min weight ((u,a), (a,v)) and
// max weight((u,a), (a,v)).
// Use these to compute weighted score
//
CUGRAPH_FAIL("weighted similarity computations are not supported in this release");
naimnv marked this conversation as resolved.
Show resolved Hide resolved
rmm::device_uvector<weight_t> similarity_score(num_vertex_pairs, handle.get_stream());
rmm::device_uvector<weight_t> weighted_out_degrees =
compute_out_weight_sums(handle, graph_view, *edge_weight_view);

per_v_pair_transform_dst_nbr_intersection(
handle,
graph_view,
*edge_weight_view,
vertex_pairs_begin,
vertex_pairs_begin + num_vertex_pairs,
weighted_out_degrees.begin(),
[functor] __device__(auto a,
auto b,
auto weight_a,
auto weight_b,
auto intersection,
auto intersected_properties_a,
auto intersected_properties_b) {
weight_t sum_of_min_weight_a_intersect_b = weight_t{0};
weight_t sum_of_max_weight_a_intersect_b = weight_t{0};
weight_t sum_of_intersected_a = weight_t{0};
weight_t sum_of_intersected_b = weight_t{0};

for (size_t k = 0; k < intersection.size(); k++) {
naimnv marked this conversation as resolved.
Show resolved Hide resolved
sum_of_min_weight_a_intersect_b +=
std::min(intersected_properties_a[k], intersected_properties_b[k]);
sum_of_max_weight_a_intersect_b +=
std::max(intersected_properties_a[k], intersected_properties_b[k]);
sum_of_intersected_a += intersected_properties_a[k];
sum_of_intersected_b += intersected_properties_b[k];
}
naimnv marked this conversation as resolved.
Show resolved Hide resolved

weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a;
weight_t sum_of_uniq_b = weight_b - sum_of_intersected_b;

sum_of_max_weight_a_intersect_b += sum_of_uniq_a + sum_of_uniq_b;

return functor.compute_score(static_cast<weight_t>(weight_a),
static_cast<weight_t>(weight_b),
static_cast<weight_t>(sum_of_min_weight_a_intersect_b),
static_cast<weight_t>(sum_of_max_weight_a_intersect_b));
},
similarity_score.begin(),
do_expensive_check);

return similarity_score;
} else {
rmm::device_uvector<weight_t> similarity_score(num_vertex_pairs, handle.get_stream());

//
// Compute vertex_degree for all vertices, then distribute to each GPU.
// Need to use this instead of the dummy properties below
//
auto out_degrees = graph_view.compute_out_degrees(handle);

per_v_pair_transform_dst_nbr_intersection(
handle,
graph_view,
cugraph::edge_dummy_property_t{}.view(),
vertex_pairs_begin,
vertex_pairs_begin + num_vertex_pairs,
out_degrees.begin(),
[functor] __device__(auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection) {
return functor.compute_score(static_cast<weight_t>(v1_degree),
static_cast<weight_t>(v2_degree),
static_cast<weight_t>(intersection.size()));
[functor] __device__(
auto v1, auto v2, auto v1_degree, auto v2_degree, auto intersection, auto, auto) {
return functor.compute_score(
static_cast<weight_t>(v1_degree),
static_cast<weight_t>(v2_degree),
static_cast<weight_t>(intersection.size()),
static_cast<weight_t>(v1_degree + v2_degree - intersection.size()));
},
similarity_score.begin(),
do_expensive_check);
Expand Down
37 changes: 11 additions & 26 deletions cpp/src/link_prediction/sorensen_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,15 @@ namespace cugraph {
namespace detail {

struct sorensen_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t cardinality_a,
weight_t cardinality_b,
weight_t cardinality_a_intersect_b) const
{
return (2 * cardinality_a_intersect_b) / (cardinality_a + cardinality_b);
}
};

struct weighted_sorensen_functor_t {
template <typename weight_t>
weight_t __device__ compute_score(weight_t weight_a,
weight_t weight_b,
weight_t min_weight_a_intersect_b) const
weight_t weight_a_intersect_b,
weight_t weight_a_union_b) const
{
return (2 * min_weight_a_intersect_b) / (weight_a + weight_b);
return (weight_a + weight_b) <= std::numeric_limits<weight_t>::min()
? weight_t{0}
: (2 * weight_a_intersect_b) / (weight_a + weight_b);
}
};

Expand All @@ -55,20 +48,12 @@ rmm::device_uvector<weight_t> sorensen_coefficients(
{
CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");

if (!edge_weight_view)
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::sorensen_functor_t{},
do_expensive_check);
else
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::weighted_sorensen_functor_t{},
do_expensive_check);
return detail::similarity(handle,
graph_view,
edge_weight_view,
vertex_pairs,
detail::sorensen_functor_t{},
do_expensive_check);
}

} // namespace cugraph
78 changes: 1 addition & 77 deletions cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#pragma once

#include <prims/detail/optional_dataframe_buffer.hpp>
#include <prims/property_op_utils.cuh>

#include <cugraph/edge_partition_device_view.cuh>
Expand Down Expand Up @@ -60,83 +61,6 @@ namespace detail {

int32_t constexpr extract_transform_v_frontier_e_kernel_block_size = 512;

// we cannot use thrust::iterator_traits<Iterator>::value_type if Iterator is void* (reference to
// void is not allowed)
template <typename Iterator, typename Enable = void>
struct optional_dataframe_buffer_value_type_t;

template <typename Iterator>
struct optional_dataframe_buffer_value_type_t<Iterator,
std::enable_if_t<!std::is_same_v<Iterator, void*>>> {
using value = typename thrust::iterator_traits<Iterator>::value_type;
};

template <typename Iterator>
struct optional_dataframe_buffer_value_type_t<Iterator,
std::enable_if_t<std::is_same_v<Iterator, void*>>> {
using value = void;
};

template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
std::byte allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream)
{
return std::byte{0}; // dummy
}

template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
auto allocate_optional_dataframe_buffer(size_t size, rmm::cuda_stream_view stream)
{
return allocate_dataframe_buffer<T>(size, stream);
}

template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
void* get_optional_dataframe_buffer_begin(std::byte& optional_dataframe_buffer)
{
return static_cast<void*>(nullptr);
}

template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
auto get_optional_dataframe_buffer_begin(
std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer)
{
return get_dataframe_buffer_begin(optional_dataframe_buffer);
}

template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
void resize_optional_dataframe_buffer(std::byte& optional_dataframe_buffer,
size_t new_buffer_size,
rmm::cuda_stream_view stream_view)
{
return;
}

template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
void resize_optional_dataframe_buffer(
std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer,
size_t new_buffer_size,
rmm::cuda_stream_view stream_view)
{
return resize_dataframe_buffer(optional_dataframe_buffer, new_buffer_size, stream_view);
}

template <typename T, std::enable_if_t<std::is_same_v<T, void>>* = nullptr>
void shrink_to_fit_optional_dataframe_buffer(std::byte& optional_dataframe_buffer,
rmm::cuda_stream_view stream_view)
{
return;
}

template <typename T, std::enable_if_t<!std::is_same_v<T, void>>* = nullptr>
void shrink_to_fit_optional_dataframe_buffer(
std::add_lvalue_reference_t<decltype(allocate_dataframe_buffer<T>(
size_t{0}, rmm::cuda_stream_view{}))> optional_dataframe_buffer,
rmm::cuda_stream_view stream_view)
{
return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view);
}

template <typename e_op_result_t,
typename BufferKeyOutputIterator,
typename BufferValueOutputIterator>
Expand Down
Loading