From b2ae4a7899af967d5aeabb31f91c86fe8d20ddf3 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 14 Jul 2023 22:18:38 -0700 Subject: [PATCH 1/8] update docstrings reflecting that unrenumbered datasets are not supported --- python/cugraph/cugraph/link_prediction/jaccard.py | 8 ++++++++ python/cugraph/cugraph/link_prediction/overlap.py | 8 ++++++++ python/cugraph/cugraph/link_prediction/sorensen.py | 8 ++++++++ python/cugraph/cugraph/link_prediction/wjaccard.py | 4 ++++ python/cugraph/cugraph/link_prediction/woverlap.py | 4 ++++ python/cugraph/cugraph/link_prediction/wsorensen.py | 4 ++++ 6 files changed, 36 insertions(+) diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index 1c4fed7a8f9..d80bf40dc61 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -36,6 +36,10 @@ def jaccard(input_graph, vertex_pair=None): of cugraph.jaccard is different from the behavior of networkx.jaccard_coefficient. + This algorithm doesn't currently support datasets with vertices that + are not (re)numebred vertices from 0 to V-1 where V is the total number of + vertices as this creates isolated vertices. + cugraph.jaccard, in the absence of a specified vertex pair list, will use the edges of the graph to construct a vertex pair list and will return the jaccard coefficient for those vertex pairs. @@ -124,6 +128,10 @@ def jaccard_coefficient(G, ebunch=None): """ For NetworkX Compatability. See `jaccard` + NOTE: This algorithm doesn't currently support datasets with vertices that + are not (re)numebred vertices from 0 to V-1 where V is the total number of + vertices as this creates isolated vertices. + Parameters ---------- graph : cugraph.Graph diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index ba9f225062e..1151864376e 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -24,6 +24,10 @@ def overlap_coefficient(G, ebunch=None): """ For NetworkX Compatability. See `overlap` + NOTE: This algorithm doesn't currently support datasets with vertices that + are not (re)numebred vertices from 0 to V-1 where V is the total number of + vertices as this creates isolated vertices. + """ vertex_pair = None @@ -54,6 +58,10 @@ def overlap(input_graph, vertex_pair=None): neighbors. If first is specified but second is not, or vice versa, an exception will be thrown. + NOTE: This algorithm doesn't currently support datasets with vertices that + are not (re)numebred vertices from 0 to V-1 where V is the total number of + vertices as this creates isolated vertices. + Parameters ---------- input_graph : cugraph.Graph diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index 20238e10464..4305c9dfc86 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -30,6 +30,10 @@ def sorensen(input_graph, vertex_pair=None): If first is specified but second is not, or vice versa, an exception will be thrown. + NOTE: This algorithm doesn't currently support datasets with vertices that + are not (re)numebred vertices from 0 to V-1 where V is the total number of + vertices as this creates isolated vertices. + cugraph.sorensen, in the absence of a specified vertex pair list, will use the edges of the graph to construct a vertex pair list and will return the sorensen coefficient for those vertex pairs. @@ -98,6 +102,10 @@ def sorensen_coefficient(G, ebunch=None): """ For NetworkX Compatability. See `sorensen` + NOTE: This algorithm doesn't currently support datasets with vertices that + are not (re)numebred vertices from 0 to V-1 where V is the total number of + vertices as this creates isolated vertices. + Parameters ---------- G : cugraph.Graph diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py index b8ef33d926f..31ce8e99a0b 100644 --- a/python/cugraph/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/cugraph/link_prediction/wjaccard.py @@ -29,6 +29,10 @@ def jaccard_w(input_graph, weights, vertex_pair=None): neighbors. If first is specified but second is not, or vice versa, an exception will be thrown. + NOTE: This algorithm doesn't currently support datasets with vertices that + are not (re)numebred vertices from 0 to V-1 where V is the total number of + vertices as this creates isolated vertices. + Parameters ---------- input_graph : cugraph.Graph diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py index c7d4f56a428..1f5ba0e35a9 100644 --- a/python/cugraph/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/cugraph/link_prediction/woverlap.py @@ -28,6 +28,10 @@ def overlap_w(input_graph, weights, vertex_pair=None): neighbors. If first is specified but second is not, or vice versa, an exception will be thrown. + NOTE: This algorithm doesn't currently support datasets with vertices that + are not (re)numebred vertices from 0 to V-1 where V is the total number of + vertices as this creates isolated vertices. + Parameters ---------- input_graph : cugraph.Graph diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py index c017463a294..019693b2793 100644 --- a/python/cugraph/cugraph/link_prediction/wsorensen.py +++ b/python/cugraph/cugraph/link_prediction/wsorensen.py @@ -24,6 +24,10 @@ def sorensen_w(input_graph, weights, vertex_pair=None): the user. Sorensen coefficient is defined between two sets as the ratio of twice the volume of their intersection divided by the volume of each set. + NOTE: This algorithm doesn't currently support datasets with vertices that + are not (re)numebred vertices from 0 to V-1 where V is the total number of + vertices as this creates isolated vertices. + Parameters ---------- input_graph : cugraph.Graph From c9ec7a04de988ad65d0e65d57e9d83700e13b095 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 14 Jul 2023 23:32:36 -0700 Subject: [PATCH 2/8] add check ensuring that the vertices are renumbered --- .../cugraph/cugraph/link_prediction/jaccard.py | 16 ++++++++++++++-- .../cugraph/cugraph/link_prediction/overlap.py | 15 +++++++++++++-- .../cugraph/cugraph/link_prediction/sorensen.py | 16 ++++++++++++++-- .../cugraph/cugraph/link_prediction/wjaccard.py | 14 +++++++++++++- .../cugraph/cugraph/link_prediction/woverlap.py | 13 ++++++++++++- .../cugraph/cugraph/link_prediction/wsorensen.py | 14 +++++++++++++- 6 files changed, 79 insertions(+), 9 deletions(-) diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index d80bf40dc61..da32c95efc9 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -20,7 +20,7 @@ ) -def jaccard(input_graph, vertex_pair=None): +def jaccard(input_graph, vertex_pair=None, do_expensive_check=True): """ Compute the Jaccard similarity between each pair of vertices connected by an edge, or between arbitrary pairs of vertices specified by the user. @@ -108,6 +108,18 @@ def jaccard(input_graph, vertex_pair=None): >>> df = cugraph.jaccard(G) """ + if do_expensive_check: + if not input_graph.renumbered: + input_df = input_graph.edgelist.edgelist_df + max_vertex = input_df.max().max() + expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( + input_df.dtypes[0]) + nodes = cudf.concat( + [input_df["src"], input_df["dst"]] + ).unique().sort_values().reset_index(drop=True) + if not expected_nodes.equals(nodes): + raise RuntimeError("Unrenumbered vertices are not supported.") + if input_graph.is_directed(): raise ValueError("Input must be an undirected Graph.") if type(vertex_pair) == cudf.DataFrame: @@ -124,7 +136,7 @@ def jaccard(input_graph, vertex_pair=None): return df -def jaccard_coefficient(G, ebunch=None): +def jaccard_coefficient(G, ebunch=None, do_expensive_check=True): """ For NetworkX Compatability. See `jaccard` diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index 1151864376e..784e1302055 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -20,7 +20,7 @@ ) -def overlap_coefficient(G, ebunch=None): +def overlap_coefficient(G, ebunch=None, do_expensive_check=True): """ For NetworkX Compatability. See `overlap` @@ -46,7 +46,7 @@ def overlap_coefficient(G, ebunch=None): return df -def overlap(input_graph, vertex_pair=None): +def overlap(input_graph, vertex_pair=None, do_expensive_check=True): """ Compute the Overlap Coefficient between each pair of vertices connected by an edge, or between arbitrary pairs of vertices specified by the user. @@ -98,6 +98,17 @@ def overlap(input_graph, vertex_pair=None): >>> df = cugraph.overlap(G) """ + if do_expensive_check: + if not input_graph.renumbered: + input_df = input_graph.edgelist.edgelist_df + max_vertex = input_df.max().max() + expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( + input_df.dtypes[0]) + nodes = cudf.concat( + [input_df["src"], input_df["dst"]] + ).unique().sort_values().reset_index(drop=True) + if not expected_nodes.equals(nodes): + raise RuntimeError("Unrenumbered vertices are not supported.") if type(vertex_pair) == cudf.DataFrame: vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index 4305c9dfc86..916032ee66a 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -21,7 +21,7 @@ ) -def sorensen(input_graph, vertex_pair=None): +def sorensen(input_graph, vertex_pair=None, do_expensive_check=True): """ Compute the Sorensen coefficient between each pair of vertices connected by an edge, or between arbitrary pairs of vertices specified by the user. @@ -80,6 +80,18 @@ def sorensen(input_graph, vertex_pair=None): >>> df = cugraph.sorensen(G) """ + if do_expensive_check: + if not input_graph.renumbered: + input_df = input_graph.edgelist.edgelist_df + max_vertex = input_df.max().max() + expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( + input_df.dtypes[0]) + nodes = cudf.concat( + [input_df["src"], input_df["dst"]] + ).unique().sort_values().reset_index(drop=True) + if not expected_nodes.equals(nodes): + raise RuntimeError("Unrenumbered vertices are not supported.") + if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") @@ -98,7 +110,7 @@ def sorensen(input_graph, vertex_pair=None): return df -def sorensen_coefficient(G, ebunch=None): +def sorensen_coefficient(G, ebunch=None, do_expensive_check=True): """ For NetworkX Compatability. See `sorensen` diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py index 31ce8e99a0b..792aaaeb524 100644 --- a/python/cugraph/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/cugraph/link_prediction/wjaccard.py @@ -17,7 +17,7 @@ from cugraph.utilities import renumber_vertex_pair -def jaccard_w(input_graph, weights, vertex_pair=None): +def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): """ Compute the weighted Jaccard similarity between each pair of vertices connected by an edge, or between arbitrary pairs of vertices specified by @@ -91,6 +91,18 @@ def jaccard_w(input_graph, weights, vertex_pair=None): >>> df = cugraph.jaccard_w(G, weights) """ + if do_expensive_check: + if not input_graph.renumbered: + input_df = input_graph.edgelist.edgelist_df + max_vertex = input_df.max().max() + expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( + input_df.dtypes[0]) + nodes = cudf.concat( + [input_df["src"], input_df["dst"]] + ).unique().sort_values().reset_index(drop=True) + if not expected_nodes.equals(nodes): + raise RuntimeError("Unrenumbered vertices are not supported.") + if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py index 1f5ba0e35a9..4d758cad891 100644 --- a/python/cugraph/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/cugraph/link_prediction/woverlap.py @@ -16,7 +16,7 @@ from cugraph.utilities import renumber_vertex_pair -def overlap_w(input_graph, weights, vertex_pair=None): +def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): """ Compute the weighted Overlap Coefficient between each pair of vertices connected by an edge, or between arbitrary pairs of vertices specified by @@ -92,6 +92,17 @@ def overlap_w(input_graph, weights, vertex_pair=None): ... len(weights['vertex']))] >>> df = cugraph.overlap_w(G, weights) """ + if do_expensive_check: + if not input_graph.renumbered: + input_df = input_graph.edgelist.edgelist_df + max_vertex = input_df.max().max() + expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( + input_df.dtypes[0]) + nodes = cudf.concat( + [input_df["src"], input_df["dst"]] + ).unique().sort_values().reset_index(drop=True) + if not expected_nodes.equals(nodes): + raise RuntimeError("Unrenumbered vertices are not supported.") if type(vertex_pair) == cudf.DataFrame: vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py index 019693b2793..a5105eb87fe 100644 --- a/python/cugraph/cugraph/link_prediction/wsorensen.py +++ b/python/cugraph/cugraph/link_prediction/wsorensen.py @@ -17,7 +17,7 @@ from cugraph.utilities import renumber_vertex_pair -def sorensen_w(input_graph, weights, vertex_pair=None): +def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): """ Compute the weighted Sorensen similarity between each pair of vertices connected by an edge, or between arbitrary pairs of vertices specified by @@ -89,6 +89,18 @@ def sorensen_w(input_graph, weights, vertex_pair=None): >>> df = cugraph.sorensen_w(G, weights) """ + if do_expensive_check: + if not input_graph.renumbered: + input_df = input_graph.edgelist.edgelist_df + max_vertex = input_df.max().max() + expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( + input_df.dtypes[0]) + nodes = cudf.concat( + [input_df["src"], input_df["dst"]] + ).unique().sort_values().reset_index(drop=True) + if not expected_nodes.equals(nodes): + raise RuntimeError("Unrenumbered vertices are not supported.") + if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") From a82307f600cc082adec8a322fcffc86f83524c6c Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 15 Jul 2023 00:15:16 -0700 Subject: [PATCH 3/8] fix style --- .../cugraph/cugraph/link_prediction/jaccard.py | 2 +- .../cugraph/cugraph/link_prediction/overlap.py | 2 +- .../cugraph/cugraph/link_prediction/sorensen.py | 16 ++++++++++------ .../cugraph/cugraph/link_prediction/wjaccard.py | 2 +- .../cugraph/cugraph/link_prediction/woverlap.py | 16 ++++++++++------ .../cugraph/cugraph/link_prediction/wsorensen.py | 16 ++++++++++------ .../tests/link_prediction/test_jaccard.py | 11 +++++++++++ .../tests/link_prediction/test_overlap.py | 11 +++++++++++ .../tests/link_prediction/test_sorensen.py | 11 +++++++++++ .../tests/link_prediction/test_wjaccard.py | 11 +++++++++++ .../tests/link_prediction/test_woverlap.py | 11 +++++++++++ .../tests/link_prediction/test_wsorensen.py | 11 +++++++++++ 12 files changed, 99 insertions(+), 21 deletions(-) diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index da32c95efc9..f8e0b1fda82 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index 784e1302055..4763c2b3382 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index 916032ee66a..4e6714ad21f 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -84,13 +84,17 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True): if not input_graph.renumbered: input_df = input_graph.edgelist.edgelist_df max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( - input_df.dtypes[0]) - nodes = cudf.concat( - [input_df["src"], input_df["dst"]] - ).unique().sort_values().reset_index(drop=True) + expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( + input_df.dtypes[0] + ) + nodes = ( + cudf.concat([input_df["src"], input_df["dst"]]) + .unique() + .sort_values() + .reset_index(drop=True) + ) if not expected_nodes.equals(nodes): - raise RuntimeError("Unrenumbered vertices are not supported.") + raise ValueError("Unrenumbered vertices are not supported.") if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py index 792aaaeb524..9cbebcb93f2 100644 --- a/python/cugraph/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/cugraph/link_prediction/wjaccard.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py index 4d758cad891..2d50ef3a01a 100644 --- a/python/cugraph/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/cugraph/link_prediction/woverlap.py @@ -96,13 +96,17 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): if not input_graph.renumbered: input_df = input_graph.edgelist.edgelist_df max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( - input_df.dtypes[0]) - nodes = cudf.concat( - [input_df["src"], input_df["dst"]] - ).unique().sort_values().reset_index(drop=True) + expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( + input_df.dtypes[0] + ) + nodes = ( + cudf.concat([input_df["src"], input_df["dst"]]) + .unique() + .sort_values() + .reset_index(drop=True) + ) if not expected_nodes.equals(nodes): - raise RuntimeError("Unrenumbered vertices are not supported.") + raise ValueError("Unrenumbered vertices are not supported.") if type(vertex_pair) == cudf.DataFrame: vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py index a5105eb87fe..23ad418a9a1 100644 --- a/python/cugraph/cugraph/link_prediction/wsorensen.py +++ b/python/cugraph/cugraph/link_prediction/wsorensen.py @@ -93,13 +93,17 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): if not input_graph.renumbered: input_df = input_graph.edgelist.edgelist_df max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( - input_df.dtypes[0]) - nodes = cudf.concat( - [input_df["src"], input_df["dst"]] - ).unique().sort_values().reset_index(drop=True) + expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( + input_df.dtypes[0] + ) + nodes = ( + cudf.concat([input_df["src"], input_df["dst"]]) + .unique() + .sort_values() + .reset_index(drop=True) + ) if not expected_nodes.equals(nodes): - raise RuntimeError("Unrenumbered vertices are not supported.") + raise ValueError("Unrenumbered vertices are not supported.") if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py index b04c4c741b1..82c6cd7894d 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py @@ -326,3 +326,14 @@ def test_weighted_exp_jaccard(): use_weight = True with pytest.raises(ValueError): exp_jaccard(G, use_weight=use_weight) + + +@pytest.mark.sg +def test_invalid_datasets_jaccard(): + karate = DATASETS_UNDIRECTED[0] + df = karate.get_edgelist() + df = df.add(1) + G = cugraph.Graph(directed=False) + G.from_cudf_edgelist(df, source="src", destination="dst") + with pytest.raises(ValueError): + cugraph.jaccard(G) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap.py index 68f879dacdb..03bee451f3c 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_overlap.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_overlap.py @@ -225,3 +225,14 @@ def test_weighted_exp_overlap(): use_weight = True with pytest.raises(ValueError): exp_overlap(G, use_weight=use_weight) + + +@pytest.mark.sg +def test_invalid_datasets_overlap(): + karate = DATASETS_UNDIRECTED[0] + df = karate.get_edgelist() + df = df.add(1) + G = cugraph.Graph(directed=False) + G.from_cudf_edgelist(df, source="src", destination="dst") + with pytest.raises(ValueError): + cugraph.overlap(G) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py index 3457627ed7d..ffb5aed5a95 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py @@ -288,3 +288,14 @@ def test_weighted_exp_sorensen(): use_weight = True with pytest.raises(ValueError): exp_sorensen(G, use_weight=use_weight) + + +@pytest.mark.sg +def test_invalid_datasets_sorensen(): + karate = DATASETS_UNDIRECTED[0] + df = karate.get_edgelist() + df = df.add(1) + G = cugraph.Graph(directed=False) + G.from_cudf_edgelist(df, source="src", destination="dst") + with pytest.raises(ValueError): + cugraph.sorensen(G) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py index 22ace93c0e4..7a7b3668dda 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py @@ -176,3 +176,14 @@ def test_wjaccard_multi_column(read_csv): actual = df_res.sort_values("0_first").reset_index() expected = df_exp.sort_values("first").reset_index() assert_series_equal(actual["jaccard_coeff"], expected["jaccard_coeff"]) + + +@pytest.mark.sg +def test_invalid_datasets_jaccard_w(): + karate = DATASETS_UNDIRECTED[0] + df = karate.get_edgelist() + df = df.add(1) + G = cugraph.Graph(directed=False) + G.from_cudf_edgelist(df, source="src", destination="dst") + with pytest.raises(ValueError): + cugraph.jaccard_w(G) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py b/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py index f4fab9d0faa..070016011bc 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py @@ -159,3 +159,14 @@ def test_woverlap_multi_column(graph_file): actual = df_res.sort_values("0_first").reset_index() expected = df_exp.sort_values("first").reset_index() assert_series_equal(actual["overlap_coeff"], expected["overlap_coeff"]) + + +@pytest.mark.sg +def test_invalid_datasets_overlap_w(): + karate = DATASETS_UNDIRECTED[0] + df = karate.get_edgelist() + df = df.add(1) + G = cugraph.Graph(directed=False) + G.from_cudf_edgelist(df, source="src", destination="dst") + with pytest.raises(ValueError): + cugraph.overlap_w(G) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py index 0cf775d666c..9febe318a5c 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py @@ -180,3 +180,14 @@ def test_wsorensen_multi_column(read_csv): actual = df_res.sort_values("0_first").reset_index() expected = df_exp.sort_values("first").reset_index() assert_series_equal(actual["sorensen_coeff"], expected["sorensen_coeff"]) + + +@pytest.mark.sg +def test_invalid_datasets_sorensen_w(): + karate = DATASETS_UNDIRECTED[0] + df = karate.get_edgelist() + df = df.add(1) + G = cugraph.Graph(directed=False) + G.from_cudf_edgelist(df, source="src", destination="dst") + with pytest.raises(ValueError): + cugraph.sorensen_w(G) From 818fd487635279f3e99ecf55c00ea6c21401eb41 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 15 Jul 2023 00:25:58 -0700 Subject: [PATCH 4/8] fix dtype error --- .../cugraph/cugraph/link_prediction/jaccard.py | 6 +++--- .../cugraph/cugraph/link_prediction/overlap.py | 5 +++-- .../cugraph/cugraph/link_prediction/sorensen.py | 17 +++++++---------- .../cugraph/cugraph/link_prediction/wjaccard.py | 5 +++-- .../cugraph/cugraph/link_prediction/woverlap.py | 17 +++++++---------- .../cugraph/link_prediction/wsorensen.py | 17 +++++++---------- 6 files changed, 30 insertions(+), 37 deletions(-) diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index f8e0b1fda82..f21180c69dd 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -110,7 +110,7 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True): """ if do_expensive_check: if not input_graph.renumbered: - input_df = input_graph.edgelist.edgelist_df + input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( input_df.dtypes[0]) @@ -118,7 +118,7 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True): [input_df["src"], input_df["dst"]] ).unique().sort_values().reset_index(drop=True) if not expected_nodes.equals(nodes): - raise RuntimeError("Unrenumbered vertices are not supported.") + raise ValueError("Unrenumbered vertices are not supported.") if input_graph.is_directed(): raise ValueError("Input must be an undirected Graph.") diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index 4763c2b3382..85b6e9e4a48 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -100,7 +100,7 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True): """ if do_expensive_check: if not input_graph.renumbered: - input_df = input_graph.edgelist.edgelist_df + input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( input_df.dtypes[0]) @@ -108,7 +108,8 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True): [input_df["src"], input_df["dst"]] ).unique().sort_values().reset_index(drop=True) if not expected_nodes.equals(nodes): - raise RuntimeError("Unrenumbered vertices are not supported.") + raise ValueError("Unrenumbered vertices are not supported.") + if type(vertex_pair) == cudf.DataFrame: vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index 4e6714ad21f..e5481291e53 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -82,20 +82,17 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True): """ if do_expensive_check: if not input_graph.renumbered: - input_df = input_graph.edgelist.edgelist_df + input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( - input_df.dtypes[0] - ) - nodes = ( - cudf.concat([input_df["src"], input_df["dst"]]) - .unique() - .sort_values() - .reset_index(drop=True) - ) + expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( + input_df.dtypes[0]) + nodes = cudf.concat( + [input_df["src"], input_df["dst"]] + ).unique().sort_values().reset_index(drop=True) if not expected_nodes.equals(nodes): raise ValueError("Unrenumbered vertices are not supported.") + if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py index 9cbebcb93f2..eabf9898e1b 100644 --- a/python/cugraph/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/cugraph/link_prediction/wjaccard.py @@ -93,7 +93,7 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): """ if do_expensive_check: if not input_graph.renumbered: - input_df = input_graph.edgelist.edgelist_df + input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( input_df.dtypes[0]) @@ -101,7 +101,8 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): [input_df["src"], input_df["dst"]] ).unique().sort_values().reset_index(drop=True) if not expected_nodes.equals(nodes): - raise RuntimeError("Unrenumbered vertices are not supported.") + raise ValueError("Unrenumbered vertices are not supported.") + if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py index 2d50ef3a01a..9a19546479b 100644 --- a/python/cugraph/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/cugraph/link_prediction/woverlap.py @@ -94,20 +94,17 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): """ if do_expensive_check: if not input_graph.renumbered: - input_df = input_graph.edgelist.edgelist_df + input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( - input_df.dtypes[0] - ) - nodes = ( - cudf.concat([input_df["src"], input_df["dst"]]) - .unique() - .sort_values() - .reset_index(drop=True) - ) + expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( + input_df.dtypes[0]) + nodes = cudf.concat( + [input_df["src"], input_df["dst"]] + ).unique().sort_values().reset_index(drop=True) if not expected_nodes.equals(nodes): raise ValueError("Unrenumbered vertices are not supported.") + if type(vertex_pair) == cudf.DataFrame: vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is not None: diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py index 23ad418a9a1..689c2b080eb 100644 --- a/python/cugraph/cugraph/link_prediction/wsorensen.py +++ b/python/cugraph/cugraph/link_prediction/wsorensen.py @@ -91,20 +91,17 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): """ if do_expensive_check: if not input_graph.renumbered: - input_df = input_graph.edgelist.edgelist_df + input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( - input_df.dtypes[0] - ) - nodes = ( - cudf.concat([input_df["src"], input_df["dst"]]) - .unique() - .sort_values() - .reset_index(drop=True) - ) + expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( + input_df.dtypes[0]) + nodes = cudf.concat( + [input_df["src"], input_df["dst"]] + ).unique().sort_values().reset_index(drop=True) if not expected_nodes.equals(nodes): raise ValueError("Unrenumbered vertices are not supported.") + if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") From 536d835624b4bd878e24c96ffda67caabb2a2371 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 15 Jul 2023 00:35:50 -0700 Subject: [PATCH 5/8] update copyright, skip tests --- .../cugraph/cugraph/link_prediction/jaccard.py | 16 ++++++++++------ .../cugraph/cugraph/link_prediction/overlap.py | 15 +++++++++------ .../cugraph/cugraph/link_prediction/sorensen.py | 15 +++++++++------ .../cugraph/cugraph/link_prediction/wjaccard.py | 15 +++++++++------ .../cugraph/cugraph/link_prediction/woverlap.py | 15 +++++++++------ .../cugraph/cugraph/link_prediction/wsorensen.py | 15 +++++++++------ .../tests/link_prediction/test_jaccard.py | 1 + .../tests/link_prediction/test_sorensen.py | 1 + 8 files changed, 57 insertions(+), 36 deletions(-) diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index f21180c69dd..933f1c4c772 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2019-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -112,11 +112,15 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True): if not input_graph.renumbered: input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( - input_df.dtypes[0]) - nodes = cudf.concat( - [input_df["src"], input_df["dst"]] - ).unique().sort_values().reset_index(drop=True) + expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( + input_df.dtypes[0] + ) + nodes = ( + cudf.concat([input_df["src"], input_df["dst"]]) + .unique() + .sort_values() + .reset_index(drop=True) + ) if not expected_nodes.equals(nodes): raise ValueError("Unrenumbered vertices are not supported.") diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index 85b6e9e4a48..e5ac7ceb618 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -102,15 +102,18 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True): if not input_graph.renumbered: input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( - input_df.dtypes[0]) - nodes = cudf.concat( - [input_df["src"], input_df["dst"]] - ).unique().sort_values().reset_index(drop=True) + expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( + input_df.dtypes[0] + ) + nodes = ( + cudf.concat([input_df["src"], input_df["dst"]]) + .unique() + .sort_values() + .reset_index(drop=True) + ) if not expected_nodes.equals(nodes): raise ValueError("Unrenumbered vertices are not supported.") - if type(vertex_pair) == cudf.DataFrame: vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is not None: diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index e5481291e53..b46bfe12b25 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -84,15 +84,18 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True): if not input_graph.renumbered: input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( - input_df.dtypes[0]) - nodes = cudf.concat( - [input_df["src"], input_df["dst"]] - ).unique().sort_values().reset_index(drop=True) + expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( + input_df.dtypes[0] + ) + nodes = ( + cudf.concat([input_df["src"], input_df["dst"]]) + .unique() + .sort_values() + .reset_index(drop=True) + ) if not expected_nodes.equals(nodes): raise ValueError("Unrenumbered vertices are not supported.") - if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py index eabf9898e1b..3b47ee9fc88 100644 --- a/python/cugraph/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/cugraph/link_prediction/wjaccard.py @@ -95,15 +95,18 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): if not input_graph.renumbered: input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( - input_df.dtypes[0]) - nodes = cudf.concat( - [input_df["src"], input_df["dst"]] - ).unique().sort_values().reset_index(drop=True) + expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( + input_df.dtypes[0] + ) + nodes = ( + cudf.concat([input_df["src"], input_df["dst"]]) + .unique() + .sort_values() + .reset_index(drop=True) + ) if not expected_nodes.equals(nodes): raise ValueError("Unrenumbered vertices are not supported.") - if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py index 9a19546479b..cfd2d5f505e 100644 --- a/python/cugraph/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/cugraph/link_prediction/woverlap.py @@ -96,15 +96,18 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): if not input_graph.renumbered: input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( - input_df.dtypes[0]) - nodes = cudf.concat( - [input_df["src"], input_df["dst"]] - ).unique().sort_values().reset_index(drop=True) + expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( + input_df.dtypes[0] + ) + nodes = ( + cudf.concat([input_df["src"], input_df["dst"]]) + .unique() + .sort_values() + .reset_index(drop=True) + ) if not expected_nodes.equals(nodes): raise ValueError("Unrenumbered vertices are not supported.") - if type(vertex_pair) == cudf.DataFrame: vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is not None: diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py index 689c2b080eb..aaab5699813 100644 --- a/python/cugraph/cugraph/link_prediction/wsorensen.py +++ b/python/cugraph/cugraph/link_prediction/wsorensen.py @@ -93,15 +93,18 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): if not input_graph.renumbered: input_df = input_graph.edgelist.edgelist_df[["src", "dst"]] max_vertex = input_df.max().max() - expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype( - input_df.dtypes[0]) - nodes = cudf.concat( - [input_df["src"], input_df["dst"]] - ).unique().sort_values().reset_index(drop=True) + expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype( + input_df.dtypes[0] + ) + nodes = ( + cudf.concat([input_df["src"], input_df["dst"]]) + .unique() + .sort_values() + .reset_index(drop=True) + ) if not expected_nodes.equals(nodes): raise ValueError("Unrenumbered vertices are not supported.") - if type(input_graph) is not Graph: raise TypeError("input graph must a Graph") diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py index 82c6cd7894d..43077126827 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py @@ -202,6 +202,7 @@ def test_nx_jaccard_time(read_csv, gpubenchmark): @pytest.mark.sg @pytest.mark.parametrize("graph_file", [netscience]) +@pytest.mark.skip(reason="Skipping because this datasets is unrenumbered") def test_jaccard_edgevals(gpubenchmark, graph_file): dataset_path = netscience.get_path() M = utils.read_csv_for_nx(dataset_path) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py index ffb5aed5a95..14d84784161 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py @@ -187,6 +187,7 @@ def test_nx_sorensen_time(gpubenchmark, read_csv): @pytest.mark.sg @pytest.mark.parametrize("graph_file", [netscience]) +@pytest.mark.skip(reason="Skipping because this datasets is unrenumbered") def test_sorensen_edgevals(gpubenchmark, graph_file): dataset_path = netscience.get_path() M = utils.read_csv_for_nx(dataset_path) From f5ade1e411578c9035481e9513331e0f4fd47fd6 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 15 Jul 2023 00:40:49 -0700 Subject: [PATCH 6/8] update tests --- python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py | 2 +- python/cugraph/cugraph/tests/link_prediction/test_woverlap.py | 2 +- python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py index 7a7b3668dda..2bc39b877ea 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py @@ -186,4 +186,4 @@ def test_invalid_datasets_jaccard_w(): G = cugraph.Graph(directed=False) G.from_cudf_edgelist(df, source="src", destination="dst") with pytest.raises(ValueError): - cugraph.jaccard_w(G) + cugraph.jaccard_w(G, None) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py b/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py index 070016011bc..5e35bb66f07 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py @@ -169,4 +169,4 @@ def test_invalid_datasets_overlap_w(): G = cugraph.Graph(directed=False) G.from_cudf_edgelist(df, source="src", destination="dst") with pytest.raises(ValueError): - cugraph.overlap_w(G) + cugraph.overlap_w(G, None) diff --git a/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py index 9febe318a5c..cca2363d2d6 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py @@ -190,4 +190,4 @@ def test_invalid_datasets_sorensen_w(): G = cugraph.Graph(directed=False) G.from_cudf_edgelist(df, source="src", destination="dst") with pytest.raises(ValueError): - cugraph.sorensen_w(G) + cugraph.sorensen_w(G, None) From 119a6928a3c1ab83d343c553c6dc642c5393e475 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Mon, 17 Jul 2023 23:56:55 -0700 Subject: [PATCH 7/8] update docstrings --- python/cugraph/cugraph/link_prediction/jaccard.py | 4 ++++ python/cugraph/cugraph/link_prediction/overlap.py | 4 ++++ python/cugraph/cugraph/link_prediction/sorensen.py | 4 ++++ python/cugraph/cugraph/link_prediction/wjaccard.py | 4 ++++ python/cugraph/cugraph/link_prediction/woverlap.py | 4 ++++ python/cugraph/cugraph/link_prediction/wsorensen.py | 4 ++++ 6 files changed, 24 insertions(+) diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index 933f1c4c772..bc5bb034c9f 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -83,6 +83,10 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True): given vertex pairs. If the vertex_pair is not provided then the current implementation computes the jaccard coefficient for all adjacent vertices in the graph. + + do_expensive_check: bool (default=True) + When set to True, check if the vertices in the graph are (re)numbered + from 0 to V-1 where V is the total number of vertices. Returns ------- diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index e5ac7ceb618..6b265af156f 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -73,6 +73,10 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True): A GPU dataframe consisting of two columns representing pairs of vertices. If provided, the overlap coefficient is computed for the given vertex pairs, else, it is computed for all vertex pairs. + + do_expensive_check: bool (default=True) + When set to True, check if the vertices in the graph are (re)numbered + from 0 to V-1 where V is the total number of vertices. Returns ------- diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index b46bfe12b25..12499a94109 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -53,6 +53,10 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True): given vertex pairs. If the vertex_pair is not provided then the current implementation computes the Sorensen coefficient for all adjacent vertices in the graph. + + do_expensive_check: bool (default=True) + When set to True, check if the vertices in the graph are (re)numbered + from 0 to V-1 where V is the total number of vertices. Returns ------- diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py index 3b47ee9fc88..3213a24b037 100644 --- a/python/cugraph/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/cugraph/link_prediction/wjaccard.py @@ -54,6 +54,10 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): A GPU dataframe consisting of two columns representing pairs of vertices. If provided, the jaccard coefficient is computed for the given vertex pairs, else, it is computed for all vertex pairs. + + do_expensive_check: bool (default=True) + When set to True, check if the vertices in the graph are (re)numbered + from 0 to V-1 where V is the total number of vertices. Returns ------- diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py index cfd2d5f505e..8db50b8fc7d 100644 --- a/python/cugraph/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/cugraph/link_prediction/woverlap.py @@ -54,6 +54,10 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): A GPU dataframe consisting of two columns representing pairs of vertices. If provided, the overlap coefficient is computed for the given vertex pairs, else, it is computed for all vertex pairs. + + do_expensive_check: bool (default=True) + When set to True, check if the vertices in the graph are (re)numbered + from 0 to V-1 where V is the total number of vertices. Returns ------- diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py index aaab5699813..d5b54ee6401 100644 --- a/python/cugraph/cugraph/link_prediction/wsorensen.py +++ b/python/cugraph/cugraph/link_prediction/wsorensen.py @@ -50,6 +50,10 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): A GPU dataframe consisting of two columns representing pairs of vertices. If provided, the sorensen coefficient is computed for the given vertex pairs, else, it is computed for all vertex pairs. + + do_expensive_check: bool (default=True) + When set to True, check if the vertices in the graph are (re)numbered + from 0 to V-1 where V is the total number of vertices. Returns ------- From 890244852f491653119155d5869587d1b161be8d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 18 Jul 2023 00:17:45 -0700 Subject: [PATCH 8/8] fix style --- python/cugraph/cugraph/link_prediction/jaccard.py | 2 +- python/cugraph/cugraph/link_prediction/overlap.py | 2 +- python/cugraph/cugraph/link_prediction/sorensen.py | 2 +- python/cugraph/cugraph/link_prediction/wjaccard.py | 2 +- python/cugraph/cugraph/link_prediction/woverlap.py | 2 +- python/cugraph/cugraph/link_prediction/wsorensen.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index bc5bb034c9f..dd411fa889d 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -83,7 +83,7 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True): given vertex pairs. If the vertex_pair is not provided then the current implementation computes the jaccard coefficient for all adjacent vertices in the graph. - + do_expensive_check: bool (default=True) When set to True, check if the vertices in the graph are (re)numbered from 0 to V-1 where V is the total number of vertices. diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index 6b265af156f..e05e0c944fe 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -73,7 +73,7 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True): A GPU dataframe consisting of two columns representing pairs of vertices. If provided, the overlap coefficient is computed for the given vertex pairs, else, it is computed for all vertex pairs. - + do_expensive_check: bool (default=True) When set to True, check if the vertices in the graph are (re)numbered from 0 to V-1 where V is the total number of vertices. diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index 12499a94109..0f35f868b7c 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -53,7 +53,7 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True): given vertex pairs. If the vertex_pair is not provided then the current implementation computes the Sorensen coefficient for all adjacent vertices in the graph. - + do_expensive_check: bool (default=True) When set to True, check if the vertices in the graph are (re)numbered from 0 to V-1 where V is the total number of vertices. diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py index 3213a24b037..fc6edae8d3e 100644 --- a/python/cugraph/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/cugraph/link_prediction/wjaccard.py @@ -54,7 +54,7 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): A GPU dataframe consisting of two columns representing pairs of vertices. If provided, the jaccard coefficient is computed for the given vertex pairs, else, it is computed for all vertex pairs. - + do_expensive_check: bool (default=True) When set to True, check if the vertices in the graph are (re)numbered from 0 to V-1 where V is the total number of vertices. diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py index 8db50b8fc7d..27fb7d608ca 100644 --- a/python/cugraph/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/cugraph/link_prediction/woverlap.py @@ -54,7 +54,7 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): A GPU dataframe consisting of two columns representing pairs of vertices. If provided, the overlap coefficient is computed for the given vertex pairs, else, it is computed for all vertex pairs. - + do_expensive_check: bool (default=True) When set to True, check if the vertices in the graph are (re)numbered from 0 to V-1 where V is the total number of vertices. diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py index d5b54ee6401..c27e4f66a02 100644 --- a/python/cugraph/cugraph/link_prediction/wsorensen.py +++ b/python/cugraph/cugraph/link_prediction/wsorensen.py @@ -50,7 +50,7 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True): A GPU dataframe consisting of two columns representing pairs of vertices. If provided, the sorensen coefficient is computed for the given vertex pairs, else, it is computed for all vertex pairs. - + do_expensive_check: bool (default=True) When set to True, check if the vertices in the graph are (re)numbered from 0 to V-1 where V is the total number of vertices.