From b2ae4a7899af967d5aeabb31f91c86fe8d20ddf3 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke@gmail.com>
Date: Fri, 14 Jul 2023 22:18:38 -0700
Subject: [PATCH 1/8] update docstrings reflecting that unrenumbered datasets
 are not supported

---
 python/cugraph/cugraph/link_prediction/jaccard.py   | 8 ++++++++
 python/cugraph/cugraph/link_prediction/overlap.py   | 8 ++++++++
 python/cugraph/cugraph/link_prediction/sorensen.py  | 8 ++++++++
 python/cugraph/cugraph/link_prediction/wjaccard.py  | 4 ++++
 python/cugraph/cugraph/link_prediction/woverlap.py  | 4 ++++
 python/cugraph/cugraph/link_prediction/wsorensen.py | 4 ++++
 6 files changed, 36 insertions(+)

diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py
index 1c4fed7a8f9..d80bf40dc61 100644
--- a/python/cugraph/cugraph/link_prediction/jaccard.py
+++ b/python/cugraph/cugraph/link_prediction/jaccard.py
@@ -36,6 +36,10 @@ def jaccard(input_graph, vertex_pair=None):
     of cugraph.jaccard is different from the behavior of
     networkx.jaccard_coefficient.
 
+    This algorithm doesn't currently support datasets with vertices that
+    are not (re)numebred vertices from 0 to V-1 where V is the total number of
+    vertices as this creates isolated vertices.
+
     cugraph.jaccard, in the absence of a specified vertex pair list, will
     use the edges of the graph to construct a vertex pair list and will
     return the jaccard coefficient for those vertex pairs.
@@ -124,6 +128,10 @@ def jaccard_coefficient(G, ebunch=None):
     """
     For NetworkX Compatability.  See `jaccard`
 
+    NOTE: This algorithm doesn't currently support datasets with vertices that
+    are not (re)numebred vertices from 0 to V-1 where V is the total number of
+    vertices as this creates isolated vertices.
+
     Parameters
     ----------
     graph : cugraph.Graph
diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py
index ba9f225062e..1151864376e 100644
--- a/python/cugraph/cugraph/link_prediction/overlap.py
+++ b/python/cugraph/cugraph/link_prediction/overlap.py
@@ -24,6 +24,10 @@ def overlap_coefficient(G, ebunch=None):
     """
     For NetworkX Compatability.  See `overlap`
 
+    NOTE: This algorithm doesn't currently support datasets with vertices that
+    are not (re)numebred vertices from 0 to V-1 where V is the total number of
+    vertices as this creates isolated vertices.
+
     """
     vertex_pair = None
 
@@ -54,6 +58,10 @@ def overlap(input_graph, vertex_pair=None):
     neighbors. If first is specified but second is not, or vice versa, an
     exception will be thrown.
 
+    NOTE: This algorithm doesn't currently support datasets with vertices that
+    are not (re)numebred vertices from 0 to V-1 where V is the total number of
+    vertices as this creates isolated vertices.
+
     Parameters
     ----------
     input_graph : cugraph.Graph
diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py
index 20238e10464..4305c9dfc86 100644
--- a/python/cugraph/cugraph/link_prediction/sorensen.py
+++ b/python/cugraph/cugraph/link_prediction/sorensen.py
@@ -30,6 +30,10 @@ def sorensen(input_graph, vertex_pair=None):
     If first is specified but second is not, or vice versa, an exception will
     be thrown.
 
+    NOTE: This algorithm doesn't currently support datasets with vertices that
+    are not (re)numebred vertices from 0 to V-1 where V is the total number of
+    vertices as this creates isolated vertices.
+
     cugraph.sorensen, in the absence of a specified vertex pair list, will
     use the edges of the graph to construct a vertex pair list and will
     return the sorensen coefficient for those vertex pairs.
@@ -98,6 +102,10 @@ def sorensen_coefficient(G, ebunch=None):
     """
     For NetworkX Compatability.  See `sorensen`
 
+    NOTE: This algorithm doesn't currently support datasets with vertices that
+    are not (re)numebred vertices from 0 to V-1 where V is the total number of
+    vertices as this creates isolated vertices.
+
     Parameters
     ----------
     G : cugraph.Graph
diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py
index b8ef33d926f..31ce8e99a0b 100644
--- a/python/cugraph/cugraph/link_prediction/wjaccard.py
+++ b/python/cugraph/cugraph/link_prediction/wjaccard.py
@@ -29,6 +29,10 @@ def jaccard_w(input_graph, weights, vertex_pair=None):
     neighbors. If first is specified but second is not, or vice versa, an
     exception will be thrown.
 
+    NOTE: This algorithm doesn't currently support datasets with vertices that
+    are not (re)numebred vertices from 0 to V-1 where V is the total number of
+    vertices as this creates isolated vertices.
+
     Parameters
     ----------
     input_graph : cugraph.Graph
diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py
index c7d4f56a428..1f5ba0e35a9 100644
--- a/python/cugraph/cugraph/link_prediction/woverlap.py
+++ b/python/cugraph/cugraph/link_prediction/woverlap.py
@@ -28,6 +28,10 @@ def overlap_w(input_graph, weights, vertex_pair=None):
     neighbors. If first is specified but second is not, or vice versa, an
     exception will be thrown.
 
+    NOTE: This algorithm doesn't currently support datasets with vertices that
+    are not (re)numebred vertices from 0 to V-1 where V is the total number of
+    vertices as this creates isolated vertices.
+
     Parameters
     ----------
     input_graph : cugraph.Graph
diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py
index c017463a294..019693b2793 100644
--- a/python/cugraph/cugraph/link_prediction/wsorensen.py
+++ b/python/cugraph/cugraph/link_prediction/wsorensen.py
@@ -24,6 +24,10 @@ def sorensen_w(input_graph, weights, vertex_pair=None):
     the user. Sorensen coefficient is defined between two sets as the ratio of
     twice the volume of their intersection divided by the volume of each set.
 
+    NOTE: This algorithm doesn't currently support datasets with vertices that
+    are not (re)numebred vertices from 0 to V-1 where V is the total number of
+    vertices as this creates isolated vertices.
+
     Parameters
     ----------
     input_graph : cugraph.Graph

From c9ec7a04de988ad65d0e65d57e9d83700e13b095 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke@gmail.com>
Date: Fri, 14 Jul 2023 23:32:36 -0700
Subject: [PATCH 2/8] add check ensuring that the vertices are renumbered

---
 .../cugraph/cugraph/link_prediction/jaccard.py   | 16 ++++++++++++++--
 .../cugraph/cugraph/link_prediction/overlap.py   | 15 +++++++++++++--
 .../cugraph/cugraph/link_prediction/sorensen.py  | 16 ++++++++++++++--
 .../cugraph/cugraph/link_prediction/wjaccard.py  | 14 +++++++++++++-
 .../cugraph/cugraph/link_prediction/woverlap.py  | 13 ++++++++++++-
 .../cugraph/cugraph/link_prediction/wsorensen.py | 14 +++++++++++++-
 6 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py
index d80bf40dc61..da32c95efc9 100644
--- a/python/cugraph/cugraph/link_prediction/jaccard.py
+++ b/python/cugraph/cugraph/link_prediction/jaccard.py
@@ -20,7 +20,7 @@
 )
 
 
-def jaccard(input_graph, vertex_pair=None):
+def jaccard(input_graph, vertex_pair=None, do_expensive_check=True):
     """
     Compute the Jaccard similarity between each pair of vertices connected by
     an edge, or between arbitrary pairs of vertices specified by the user.
@@ -108,6 +108,18 @@ def jaccard(input_graph, vertex_pair=None):
     >>> df = cugraph.jaccard(G)
 
     """
+    if do_expensive_check:
+        if not input_graph.renumbered:
+            input_df = input_graph.edgelist.edgelist_df
+            max_vertex = input_df.max().max()
+            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
+                input_df.dtypes[0])
+            nodes = cudf.concat(
+                    [input_df["src"], input_df["dst"]]
+                ).unique().sort_values().reset_index(drop=True)
+            if not expected_nodes.equals(nodes):
+                raise RuntimeError("Unrenumbered vertices are not supported.")
+
     if input_graph.is_directed():
         raise ValueError("Input must be an undirected Graph.")
     if type(vertex_pair) == cudf.DataFrame:
@@ -124,7 +136,7 @@ def jaccard(input_graph, vertex_pair=None):
     return df
 
 
-def jaccard_coefficient(G, ebunch=None):
+def jaccard_coefficient(G, ebunch=None, do_expensive_check=True):
     """
     For NetworkX Compatability.  See `jaccard`
 
diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py
index 1151864376e..784e1302055 100644
--- a/python/cugraph/cugraph/link_prediction/overlap.py
+++ b/python/cugraph/cugraph/link_prediction/overlap.py
@@ -20,7 +20,7 @@
 )
 
 
-def overlap_coefficient(G, ebunch=None):
+def overlap_coefficient(G, ebunch=None, do_expensive_check=True):
     """
     For NetworkX Compatability.  See `overlap`
 
@@ -46,7 +46,7 @@ def overlap_coefficient(G, ebunch=None):
     return df
 
 
-def overlap(input_graph, vertex_pair=None):
+def overlap(input_graph, vertex_pair=None, do_expensive_check=True):
     """
     Compute the Overlap Coefficient between each pair of vertices connected by
     an edge, or between arbitrary pairs of vertices specified by the user.
@@ -98,6 +98,17 @@ def overlap(input_graph, vertex_pair=None):
     >>> df = cugraph.overlap(G)
 
     """
+    if do_expensive_check:
+        if not input_graph.renumbered:
+            input_df = input_graph.edgelist.edgelist_df
+            max_vertex = input_df.max().max()
+            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
+                input_df.dtypes[0])
+            nodes = cudf.concat(
+                    [input_df["src"], input_df["dst"]]
+                ).unique().sort_values().reset_index(drop=True)
+            if not expected_nodes.equals(nodes):
+                raise RuntimeError("Unrenumbered vertices are not supported.")
 
     if type(vertex_pair) == cudf.DataFrame:
         vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py
index 4305c9dfc86..916032ee66a 100644
--- a/python/cugraph/cugraph/link_prediction/sorensen.py
+++ b/python/cugraph/cugraph/link_prediction/sorensen.py
@@ -21,7 +21,7 @@
 )
 
 
-def sorensen(input_graph, vertex_pair=None):
+def sorensen(input_graph, vertex_pair=None, do_expensive_check=True):
     """
     Compute the Sorensen coefficient between each pair of vertices connected by
     an edge, or between arbitrary pairs of vertices specified by the user.
@@ -80,6 +80,18 @@ def sorensen(input_graph, vertex_pair=None):
     >>> df = cugraph.sorensen(G)
 
     """
+    if do_expensive_check:
+        if not input_graph.renumbered:
+            input_df = input_graph.edgelist.edgelist_df
+            max_vertex = input_df.max().max()
+            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
+                input_df.dtypes[0])
+            nodes = cudf.concat(
+                    [input_df["src"], input_df["dst"]]
+                ).unique().sort_values().reset_index(drop=True)
+            if not expected_nodes.equals(nodes):
+                raise RuntimeError("Unrenumbered vertices are not supported.")
+
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
 
@@ -98,7 +110,7 @@ def sorensen(input_graph, vertex_pair=None):
     return df
 
 
-def sorensen_coefficient(G, ebunch=None):
+def sorensen_coefficient(G, ebunch=None, do_expensive_check=True):
     """
     For NetworkX Compatability.  See `sorensen`
 
diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py
index 31ce8e99a0b..792aaaeb524 100644
--- a/python/cugraph/cugraph/link_prediction/wjaccard.py
+++ b/python/cugraph/cugraph/link_prediction/wjaccard.py
@@ -17,7 +17,7 @@
 from cugraph.utilities import renumber_vertex_pair
 
 
-def jaccard_w(input_graph, weights, vertex_pair=None):
+def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
     """
     Compute the weighted Jaccard similarity between each pair of vertices
     connected by an edge, or between arbitrary pairs of vertices specified by
@@ -91,6 +91,18 @@ def jaccard_w(input_graph, weights, vertex_pair=None):
     >>> df = cugraph.jaccard_w(G, weights)
 
     """
+    if do_expensive_check:
+        if not input_graph.renumbered:
+            input_df = input_graph.edgelist.edgelist_df
+            max_vertex = input_df.max().max()
+            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
+                input_df.dtypes[0])
+            nodes = cudf.concat(
+                    [input_df["src"], input_df["dst"]]
+                ).unique().sort_values().reset_index(drop=True)
+            if not expected_nodes.equals(nodes):
+                raise RuntimeError("Unrenumbered vertices are not supported.")
+
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
 
diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py
index 1f5ba0e35a9..4d758cad891 100644
--- a/python/cugraph/cugraph/link_prediction/woverlap.py
+++ b/python/cugraph/cugraph/link_prediction/woverlap.py
@@ -16,7 +16,7 @@
 from cugraph.utilities import renumber_vertex_pair
 
 
-def overlap_w(input_graph, weights, vertex_pair=None):
+def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
     """
     Compute the weighted Overlap Coefficient between each pair of vertices
     connected by an edge, or between arbitrary pairs of vertices specified by
@@ -92,6 +92,17 @@ def overlap_w(input_graph, weights, vertex_pair=None):
     ...                      len(weights['vertex']))]
     >>> df = cugraph.overlap_w(G, weights)
     """
+    if do_expensive_check:
+        if not input_graph.renumbered:
+            input_df = input_graph.edgelist.edgelist_df
+            max_vertex = input_df.max().max()
+            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
+                input_df.dtypes[0])
+            nodes = cudf.concat(
+                    [input_df["src"], input_df["dst"]]
+                ).unique().sort_values().reset_index(drop=True)
+            if not expected_nodes.equals(nodes):
+                raise RuntimeError("Unrenumbered vertices are not supported.")
 
     if type(vertex_pair) == cudf.DataFrame:
         vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py
index 019693b2793..a5105eb87fe 100644
--- a/python/cugraph/cugraph/link_prediction/wsorensen.py
+++ b/python/cugraph/cugraph/link_prediction/wsorensen.py
@@ -17,7 +17,7 @@
 from cugraph.utilities import renumber_vertex_pair
 
 
-def sorensen_w(input_graph, weights, vertex_pair=None):
+def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
     """
     Compute the weighted Sorensen similarity between each pair of vertices
     connected by an edge, or between arbitrary pairs of vertices specified by
@@ -89,6 +89,18 @@ def sorensen_w(input_graph, weights, vertex_pair=None):
     >>> df = cugraph.sorensen_w(G, weights)
 
     """
+    if do_expensive_check:
+        if not input_graph.renumbered:
+            input_df = input_graph.edgelist.edgelist_df
+            max_vertex = input_df.max().max()
+            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
+                input_df.dtypes[0])
+            nodes = cudf.concat(
+                    [input_df["src"], input_df["dst"]]
+                ).unique().sort_values().reset_index(drop=True)
+            if not expected_nodes.equals(nodes):
+                raise RuntimeError("Unrenumbered vertices are not supported.")
+
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
 

From a82307f600cc082adec8a322fcffc86f83524c6c Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke@gmail.com>
Date: Sat, 15 Jul 2023 00:15:16 -0700
Subject: [PATCH 3/8] fix style

---
 .../cugraph/cugraph/link_prediction/jaccard.py   |  2 +-
 .../cugraph/cugraph/link_prediction/overlap.py   |  2 +-
 .../cugraph/cugraph/link_prediction/sorensen.py  | 16 ++++++++++------
 .../cugraph/cugraph/link_prediction/wjaccard.py  |  2 +-
 .../cugraph/cugraph/link_prediction/woverlap.py  | 16 ++++++++++------
 .../cugraph/cugraph/link_prediction/wsorensen.py | 16 ++++++++++------
 .../tests/link_prediction/test_jaccard.py        | 11 +++++++++++
 .../tests/link_prediction/test_overlap.py        | 11 +++++++++++
 .../tests/link_prediction/test_sorensen.py       | 11 +++++++++++
 .../tests/link_prediction/test_wjaccard.py       | 11 +++++++++++
 .../tests/link_prediction/test_woverlap.py       | 11 +++++++++++
 .../tests/link_prediction/test_wsorensen.py      | 11 +++++++++++
 12 files changed, 99 insertions(+), 21 deletions(-)

diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py
index da32c95efc9..f8e0b1fda82 100644
--- a/python/cugraph/cugraph/link_prediction/jaccard.py
+++ b/python/cugraph/cugraph/link_prediction/jaccard.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py
index 784e1302055..4763c2b3382 100644
--- a/python/cugraph/cugraph/link_prediction/overlap.py
+++ b/python/cugraph/cugraph/link_prediction/overlap.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py
index 916032ee66a..4e6714ad21f 100644
--- a/python/cugraph/cugraph/link_prediction/sorensen.py
+++ b/python/cugraph/cugraph/link_prediction/sorensen.py
@@ -84,13 +84,17 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True):
         if not input_graph.renumbered:
             input_df = input_graph.edgelist.edgelist_df
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
-                input_df.dtypes[0])
-            nodes = cudf.concat(
-                    [input_df["src"], input_df["dst"]]
-                ).unique().sort_values().reset_index(drop=True)
+            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
+                input_df.dtypes[0]
+            )
+            nodes = (
+                cudf.concat([input_df["src"], input_df["dst"]])
+                .unique()
+                .sort_values()
+                .reset_index(drop=True)
+            )
             if not expected_nodes.equals(nodes):
-                raise RuntimeError("Unrenumbered vertices are not supported.")
+                raise ValueError("Unrenumbered vertices are not supported.")
 
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py
index 792aaaeb524..9cbebcb93f2 100644
--- a/python/cugraph/cugraph/link_prediction/wjaccard.py
+++ b/python/cugraph/cugraph/link_prediction/wjaccard.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py
index 4d758cad891..2d50ef3a01a 100644
--- a/python/cugraph/cugraph/link_prediction/woverlap.py
+++ b/python/cugraph/cugraph/link_prediction/woverlap.py
@@ -96,13 +96,17 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         if not input_graph.renumbered:
             input_df = input_graph.edgelist.edgelist_df
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
-                input_df.dtypes[0])
-            nodes = cudf.concat(
-                    [input_df["src"], input_df["dst"]]
-                ).unique().sort_values().reset_index(drop=True)
+            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
+                input_df.dtypes[0]
+            )
+            nodes = (
+                cudf.concat([input_df["src"], input_df["dst"]])
+                .unique()
+                .sort_values()
+                .reset_index(drop=True)
+            )
             if not expected_nodes.equals(nodes):
-                raise RuntimeError("Unrenumbered vertices are not supported.")
+                raise ValueError("Unrenumbered vertices are not supported.")
 
     if type(vertex_pair) == cudf.DataFrame:
         vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py
index a5105eb87fe..23ad418a9a1 100644
--- a/python/cugraph/cugraph/link_prediction/wsorensen.py
+++ b/python/cugraph/cugraph/link_prediction/wsorensen.py
@@ -93,13 +93,17 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         if not input_graph.renumbered:
             input_df = input_graph.edgelist.edgelist_df
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
-                input_df.dtypes[0])
-            nodes = cudf.concat(
-                    [input_df["src"], input_df["dst"]]
-                ).unique().sort_values().reset_index(drop=True)
+            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
+                input_df.dtypes[0]
+            )
+            nodes = (
+                cudf.concat([input_df["src"], input_df["dst"]])
+                .unique()
+                .sort_values()
+                .reset_index(drop=True)
+            )
             if not expected_nodes.equals(nodes):
-                raise RuntimeError("Unrenumbered vertices are not supported.")
+                raise ValueError("Unrenumbered vertices are not supported.")
 
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
index b04c4c741b1..82c6cd7894d 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
@@ -326,3 +326,14 @@ def test_weighted_exp_jaccard():
     use_weight = True
     with pytest.raises(ValueError):
         exp_jaccard(G, use_weight=use_weight)
+
+
+@pytest.mark.sg
+def test_invalid_datasets_jaccard():
+    karate = DATASETS_UNDIRECTED[0]
+    df = karate.get_edgelist()
+    df = df.add(1)
+    G = cugraph.Graph(directed=False)
+    G.from_cudf_edgelist(df, source="src", destination="dst")
+    with pytest.raises(ValueError):
+        cugraph.jaccard(G)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap.py
index 68f879dacdb..03bee451f3c 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_overlap.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_overlap.py
@@ -225,3 +225,14 @@ def test_weighted_exp_overlap():
     use_weight = True
     with pytest.raises(ValueError):
         exp_overlap(G, use_weight=use_weight)
+
+
+@pytest.mark.sg
+def test_invalid_datasets_overlap():
+    karate = DATASETS_UNDIRECTED[0]
+    df = karate.get_edgelist()
+    df = df.add(1)
+    G = cugraph.Graph(directed=False)
+    G.from_cudf_edgelist(df, source="src", destination="dst")
+    with pytest.raises(ValueError):
+        cugraph.overlap(G)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
index 3457627ed7d..ffb5aed5a95 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
@@ -288,3 +288,14 @@ def test_weighted_exp_sorensen():
     use_weight = True
     with pytest.raises(ValueError):
         exp_sorensen(G, use_weight=use_weight)
+
+
+@pytest.mark.sg
+def test_invalid_datasets_sorensen():
+    karate = DATASETS_UNDIRECTED[0]
+    df = karate.get_edgelist()
+    df = df.add(1)
+    G = cugraph.Graph(directed=False)
+    G.from_cudf_edgelist(df, source="src", destination="dst")
+    with pytest.raises(ValueError):
+        cugraph.sorensen(G)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py
index 22ace93c0e4..7a7b3668dda 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py
@@ -176,3 +176,14 @@ def test_wjaccard_multi_column(read_csv):
     actual = df_res.sort_values("0_first").reset_index()
     expected = df_exp.sort_values("first").reset_index()
     assert_series_equal(actual["jaccard_coeff"], expected["jaccard_coeff"])
+
+
+@pytest.mark.sg
+def test_invalid_datasets_jaccard_w():
+    karate = DATASETS_UNDIRECTED[0]
+    df = karate.get_edgelist()
+    df = df.add(1)
+    G = cugraph.Graph(directed=False)
+    G.from_cudf_edgelist(df, source="src", destination="dst")
+    with pytest.raises(ValueError):
+        cugraph.jaccard_w(G)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py b/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py
index f4fab9d0faa..070016011bc 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py
@@ -159,3 +159,14 @@ def test_woverlap_multi_column(graph_file):
     actual = df_res.sort_values("0_first").reset_index()
     expected = df_exp.sort_values("first").reset_index()
     assert_series_equal(actual["overlap_coeff"], expected["overlap_coeff"])
+
+
+@pytest.mark.sg
+def test_invalid_datasets_overlap_w():
+    karate = DATASETS_UNDIRECTED[0]
+    df = karate.get_edgelist()
+    df = df.add(1)
+    G = cugraph.Graph(directed=False)
+    G.from_cudf_edgelist(df, source="src", destination="dst")
+    with pytest.raises(ValueError):
+        cugraph.overlap_w(G)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py
index 0cf775d666c..9febe318a5c 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py
@@ -180,3 +180,14 @@ def test_wsorensen_multi_column(read_csv):
     actual = df_res.sort_values("0_first").reset_index()
     expected = df_exp.sort_values("first").reset_index()
     assert_series_equal(actual["sorensen_coeff"], expected["sorensen_coeff"])
+
+
+@pytest.mark.sg
+def test_invalid_datasets_sorensen_w():
+    karate = DATASETS_UNDIRECTED[0]
+    df = karate.get_edgelist()
+    df = df.add(1)
+    G = cugraph.Graph(directed=False)
+    G.from_cudf_edgelist(df, source="src", destination="dst")
+    with pytest.raises(ValueError):
+        cugraph.sorensen_w(G)

From 818fd487635279f3e99ecf55c00ea6c21401eb41 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke@gmail.com>
Date: Sat, 15 Jul 2023 00:25:58 -0700
Subject: [PATCH 4/8] fix dtype error

---
 .../cugraph/cugraph/link_prediction/jaccard.py  |  6 +++---
 .../cugraph/cugraph/link_prediction/overlap.py  |  5 +++--
 .../cugraph/cugraph/link_prediction/sorensen.py | 17 +++++++----------
 .../cugraph/cugraph/link_prediction/wjaccard.py |  5 +++--
 .../cugraph/cugraph/link_prediction/woverlap.py | 17 +++++++----------
 .../cugraph/link_prediction/wsorensen.py        | 17 +++++++----------
 6 files changed, 30 insertions(+), 37 deletions(-)

diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py
index f8e0b1fda82..f21180c69dd 100644
--- a/python/cugraph/cugraph/link_prediction/jaccard.py
+++ b/python/cugraph/cugraph/link_prediction/jaccard.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -110,7 +110,7 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True):
     """
     if do_expensive_check:
         if not input_graph.renumbered:
-            input_df = input_graph.edgelist.edgelist_df
+            input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
             expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
                 input_df.dtypes[0])
@@ -118,7 +118,7 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True):
                     [input_df["src"], input_df["dst"]]
                 ).unique().sort_values().reset_index(drop=True)
             if not expected_nodes.equals(nodes):
-                raise RuntimeError("Unrenumbered vertices are not supported.")
+                raise ValueError("Unrenumbered vertices are not supported.")
 
     if input_graph.is_directed():
         raise ValueError("Input must be an undirected Graph.")
diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py
index 4763c2b3382..85b6e9e4a48 100644
--- a/python/cugraph/cugraph/link_prediction/overlap.py
+++ b/python/cugraph/cugraph/link_prediction/overlap.py
@@ -100,7 +100,7 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True):
     """
     if do_expensive_check:
         if not input_graph.renumbered:
-            input_df = input_graph.edgelist.edgelist_df
+            input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
             expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
                 input_df.dtypes[0])
@@ -108,7 +108,8 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True):
                     [input_df["src"], input_df["dst"]]
                 ).unique().sort_values().reset_index(drop=True)
             if not expected_nodes.equals(nodes):
-                raise RuntimeError("Unrenumbered vertices are not supported.")
+                raise ValueError("Unrenumbered vertices are not supported.")
+
 
     if type(vertex_pair) == cudf.DataFrame:
         vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py
index 4e6714ad21f..e5481291e53 100644
--- a/python/cugraph/cugraph/link_prediction/sorensen.py
+++ b/python/cugraph/cugraph/link_prediction/sorensen.py
@@ -82,20 +82,17 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True):
     """
     if do_expensive_check:
         if not input_graph.renumbered:
-            input_df = input_graph.edgelist.edgelist_df
+            input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
-                input_df.dtypes[0]
-            )
-            nodes = (
-                cudf.concat([input_df["src"], input_df["dst"]])
-                .unique()
-                .sort_values()
-                .reset_index(drop=True)
-            )
+            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
+                input_df.dtypes[0])
+            nodes = cudf.concat(
+                    [input_df["src"], input_df["dst"]]
+                ).unique().sort_values().reset_index(drop=True)
             if not expected_nodes.equals(nodes):
                 raise ValueError("Unrenumbered vertices are not supported.")
 
+
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
 
diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py
index 9cbebcb93f2..eabf9898e1b 100644
--- a/python/cugraph/cugraph/link_prediction/wjaccard.py
+++ b/python/cugraph/cugraph/link_prediction/wjaccard.py
@@ -93,7 +93,7 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
     """
     if do_expensive_check:
         if not input_graph.renumbered:
-            input_df = input_graph.edgelist.edgelist_df
+            input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
             expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
                 input_df.dtypes[0])
@@ -101,7 +101,8 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
                     [input_df["src"], input_df["dst"]]
                 ).unique().sort_values().reset_index(drop=True)
             if not expected_nodes.equals(nodes):
-                raise RuntimeError("Unrenumbered vertices are not supported.")
+                raise ValueError("Unrenumbered vertices are not supported.")
+
 
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py
index 2d50ef3a01a..9a19546479b 100644
--- a/python/cugraph/cugraph/link_prediction/woverlap.py
+++ b/python/cugraph/cugraph/link_prediction/woverlap.py
@@ -94,20 +94,17 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
     """
     if do_expensive_check:
         if not input_graph.renumbered:
-            input_df = input_graph.edgelist.edgelist_df
+            input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
-                input_df.dtypes[0]
-            )
-            nodes = (
-                cudf.concat([input_df["src"], input_df["dst"]])
-                .unique()
-                .sort_values()
-                .reset_index(drop=True)
-            )
+            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
+                input_df.dtypes[0])
+            nodes = cudf.concat(
+                    [input_df["src"], input_df["dst"]]
+                ).unique().sort_values().reset_index(drop=True)
             if not expected_nodes.equals(nodes):
                 raise ValueError("Unrenumbered vertices are not supported.")
 
+
     if type(vertex_pair) == cudf.DataFrame:
         vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
     elif vertex_pair is not None:
diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py
index 23ad418a9a1..689c2b080eb 100644
--- a/python/cugraph/cugraph/link_prediction/wsorensen.py
+++ b/python/cugraph/cugraph/link_prediction/wsorensen.py
@@ -91,20 +91,17 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
     """
     if do_expensive_check:
         if not input_graph.renumbered:
-            input_df = input_graph.edgelist.edgelist_df
+            input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
-                input_df.dtypes[0]
-            )
-            nodes = (
-                cudf.concat([input_df["src"], input_df["dst"]])
-                .unique()
-                .sort_values()
-                .reset_index(drop=True)
-            )
+            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
+                input_df.dtypes[0])
+            nodes = cudf.concat(
+                    [input_df["src"], input_df["dst"]]
+                ).unique().sort_values().reset_index(drop=True)
             if not expected_nodes.equals(nodes):
                 raise ValueError("Unrenumbered vertices are not supported.")
 
+
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
 

From 536d835624b4bd878e24c96ffda67caabb2a2371 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke@gmail.com>
Date: Sat, 15 Jul 2023 00:35:50 -0700
Subject: [PATCH 5/8] update copyright, skip tests

---
 .../cugraph/cugraph/link_prediction/jaccard.py   | 16 ++++++++++------
 .../cugraph/cugraph/link_prediction/overlap.py   | 15 +++++++++------
 .../cugraph/cugraph/link_prediction/sorensen.py  | 15 +++++++++------
 .../cugraph/cugraph/link_prediction/wjaccard.py  | 15 +++++++++------
 .../cugraph/cugraph/link_prediction/woverlap.py  | 15 +++++++++------
 .../cugraph/cugraph/link_prediction/wsorensen.py | 15 +++++++++------
 .../tests/link_prediction/test_jaccard.py        |  1 +
 .../tests/link_prediction/test_sorensen.py       |  1 +
 8 files changed, 57 insertions(+), 36 deletions(-)

diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py
index f21180c69dd..933f1c4c772 100644
--- a/python/cugraph/cugraph/link_prediction/jaccard.py
+++ b/python/cugraph/cugraph/link_prediction/jaccard.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -112,11 +112,15 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True):
         if not input_graph.renumbered:
             input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
-                input_df.dtypes[0])
-            nodes = cudf.concat(
-                    [input_df["src"], input_df["dst"]]
-                ).unique().sort_values().reset_index(drop=True)
+            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
+                input_df.dtypes[0]
+            )
+            nodes = (
+                cudf.concat([input_df["src"], input_df["dst"]])
+                .unique()
+                .sort_values()
+                .reset_index(drop=True)
+            )
             if not expected_nodes.equals(nodes):
                 raise ValueError("Unrenumbered vertices are not supported.")
 
diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py
index 85b6e9e4a48..e5ac7ceb618 100644
--- a/python/cugraph/cugraph/link_prediction/overlap.py
+++ b/python/cugraph/cugraph/link_prediction/overlap.py
@@ -102,15 +102,18 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True):
         if not input_graph.renumbered:
             input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
-                input_df.dtypes[0])
-            nodes = cudf.concat(
-                    [input_df["src"], input_df["dst"]]
-                ).unique().sort_values().reset_index(drop=True)
+            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
+                input_df.dtypes[0]
+            )
+            nodes = (
+                cudf.concat([input_df["src"], input_df["dst"]])
+                .unique()
+                .sort_values()
+                .reset_index(drop=True)
+            )
             if not expected_nodes.equals(nodes):
                 raise ValueError("Unrenumbered vertices are not supported.")
 
-
     if type(vertex_pair) == cudf.DataFrame:
         vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
     elif vertex_pair is not None:
diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py
index e5481291e53..b46bfe12b25 100644
--- a/python/cugraph/cugraph/link_prediction/sorensen.py
+++ b/python/cugraph/cugraph/link_prediction/sorensen.py
@@ -84,15 +84,18 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True):
         if not input_graph.renumbered:
             input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
-                input_df.dtypes[0])
-            nodes = cudf.concat(
-                    [input_df["src"], input_df["dst"]]
-                ).unique().sort_values().reset_index(drop=True)
+            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
+                input_df.dtypes[0]
+            )
+            nodes = (
+                cudf.concat([input_df["src"], input_df["dst"]])
+                .unique()
+                .sort_values()
+                .reset_index(drop=True)
+            )
             if not expected_nodes.equals(nodes):
                 raise ValueError("Unrenumbered vertices are not supported.")
 
-
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
 
diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py
index eabf9898e1b..3b47ee9fc88 100644
--- a/python/cugraph/cugraph/link_prediction/wjaccard.py
+++ b/python/cugraph/cugraph/link_prediction/wjaccard.py
@@ -95,15 +95,18 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         if not input_graph.renumbered:
             input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
-                input_df.dtypes[0])
-            nodes = cudf.concat(
-                    [input_df["src"], input_df["dst"]]
-                ).unique().sort_values().reset_index(drop=True)
+            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
+                input_df.dtypes[0]
+            )
+            nodes = (
+                cudf.concat([input_df["src"], input_df["dst"]])
+                .unique()
+                .sort_values()
+                .reset_index(drop=True)
+            )
             if not expected_nodes.equals(nodes):
                 raise ValueError("Unrenumbered vertices are not supported.")
 
-
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
 
diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py
index 9a19546479b..cfd2d5f505e 100644
--- a/python/cugraph/cugraph/link_prediction/woverlap.py
+++ b/python/cugraph/cugraph/link_prediction/woverlap.py
@@ -96,15 +96,18 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         if not input_graph.renumbered:
             input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
-                input_df.dtypes[0])
-            nodes = cudf.concat(
-                    [input_df["src"], input_df["dst"]]
-                ).unique().sort_values().reset_index(drop=True)
+            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
+                input_df.dtypes[0]
+            )
+            nodes = (
+                cudf.concat([input_df["src"], input_df["dst"]])
+                .unique()
+                .sort_values()
+                .reset_index(drop=True)
+            )
             if not expected_nodes.equals(nodes):
                 raise ValueError("Unrenumbered vertices are not supported.")
 
-
     if type(vertex_pair) == cudf.DataFrame:
         vertex_pair = renumber_vertex_pair(input_graph, vertex_pair)
     elif vertex_pair is not None:
diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py
index 689c2b080eb..aaab5699813 100644
--- a/python/cugraph/cugraph/link_prediction/wsorensen.py
+++ b/python/cugraph/cugraph/link_prediction/wsorensen.py
@@ -93,15 +93,18 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         if not input_graph.renumbered:
             input_df = input_graph.edgelist.edgelist_df[["src", "dst"]]
             max_vertex = input_df.max().max()
-            expected_nodes = cudf.Series(range(0, max_vertex + 1 ,1)).astype(
-                input_df.dtypes[0])
-            nodes = cudf.concat(
-                    [input_df["src"], input_df["dst"]]
-                ).unique().sort_values().reset_index(drop=True)
+            expected_nodes = cudf.Series(range(0, max_vertex + 1, 1)).astype(
+                input_df.dtypes[0]
+            )
+            nodes = (
+                cudf.concat([input_df["src"], input_df["dst"]])
+                .unique()
+                .sort_values()
+                .reset_index(drop=True)
+            )
             if not expected_nodes.equals(nodes):
                 raise ValueError("Unrenumbered vertices are not supported.")
 
-
     if type(input_graph) is not Graph:
         raise TypeError("input graph must a Graph")
 
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
index 82c6cd7894d..43077126827 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
@@ -202,6 +202,7 @@ def test_nx_jaccard_time(read_csv, gpubenchmark):
 
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", [netscience])
+@pytest.mark.skip(reason="Skipping because this datasets is unrenumbered")
 def test_jaccard_edgevals(gpubenchmark, graph_file):
     dataset_path = netscience.get_path()
     M = utils.read_csv_for_nx(dataset_path)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
index ffb5aed5a95..14d84784161 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
@@ -187,6 +187,7 @@ def test_nx_sorensen_time(gpubenchmark, read_csv):
 
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", [netscience])
+@pytest.mark.skip(reason="Skipping because this datasets is unrenumbered")
 def test_sorensen_edgevals(gpubenchmark, graph_file):
     dataset_path = netscience.get_path()
     M = utils.read_csv_for_nx(dataset_path)

From f5ade1e411578c9035481e9513331e0f4fd47fd6 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke@gmail.com>
Date: Sat, 15 Jul 2023 00:40:49 -0700
Subject: [PATCH 6/8] update tests

---
 python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py  | 2 +-
 python/cugraph/cugraph/tests/link_prediction/test_woverlap.py  | 2 +-
 python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py
index 7a7b3668dda..2bc39b877ea 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_wjaccard.py
@@ -186,4 +186,4 @@ def test_invalid_datasets_jaccard_w():
     G = cugraph.Graph(directed=False)
     G.from_cudf_edgelist(df, source="src", destination="dst")
     with pytest.raises(ValueError):
-        cugraph.jaccard_w(G)
+        cugraph.jaccard_w(G, None)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py b/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py
index 070016011bc..5e35bb66f07 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_woverlap.py
@@ -169,4 +169,4 @@ def test_invalid_datasets_overlap_w():
     G = cugraph.Graph(directed=False)
     G.from_cudf_edgelist(df, source="src", destination="dst")
     with pytest.raises(ValueError):
-        cugraph.overlap_w(G)
+        cugraph.overlap_w(G, None)
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py
index 9febe318a5c..cca2363d2d6 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_wsorensen.py
@@ -190,4 +190,4 @@ def test_invalid_datasets_sorensen_w():
     G = cugraph.Graph(directed=False)
     G.from_cudf_edgelist(df, source="src", destination="dst")
     with pytest.raises(ValueError):
-        cugraph.sorensen_w(G)
+        cugraph.sorensen_w(G, None)

From 119a6928a3c1ab83d343c553c6dc642c5393e475 Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke@gmail.com>
Date: Mon, 17 Jul 2023 23:56:55 -0700
Subject: [PATCH 7/8] update docstrings

---
 python/cugraph/cugraph/link_prediction/jaccard.py   | 4 ++++
 python/cugraph/cugraph/link_prediction/overlap.py   | 4 ++++
 python/cugraph/cugraph/link_prediction/sorensen.py  | 4 ++++
 python/cugraph/cugraph/link_prediction/wjaccard.py  | 4 ++++
 python/cugraph/cugraph/link_prediction/woverlap.py  | 4 ++++
 python/cugraph/cugraph/link_prediction/wsorensen.py | 4 ++++
 6 files changed, 24 insertions(+)

diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py
index 933f1c4c772..bc5bb034c9f 100644
--- a/python/cugraph/cugraph/link_prediction/jaccard.py
+++ b/python/cugraph/cugraph/link_prediction/jaccard.py
@@ -83,6 +83,10 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True):
         given vertex pairs.  If the vertex_pair is not provided then the
         current implementation computes the jaccard coefficient for all
         adjacent vertices in the graph.
+    
+    do_expensive_check: bool (default=True)
+        When set to True, check if the vertices in the graph are (re)numbered
+        from 0 to V-1 where V is the total number of vertices.
 
     Returns
     -------
diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py
index e5ac7ceb618..6b265af156f 100644
--- a/python/cugraph/cugraph/link_prediction/overlap.py
+++ b/python/cugraph/cugraph/link_prediction/overlap.py
@@ -73,6 +73,10 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True):
         A GPU dataframe consisting of two columns representing pairs of
         vertices. If provided, the overlap coefficient is computed for the
         given vertex pairs, else, it is computed for all vertex pairs.
+    
+    do_expensive_check: bool (default=True)
+        When set to True, check if the vertices in the graph are (re)numbered
+        from 0 to V-1 where V is the total number of vertices.
 
     Returns
     -------
diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py
index b46bfe12b25..12499a94109 100644
--- a/python/cugraph/cugraph/link_prediction/sorensen.py
+++ b/python/cugraph/cugraph/link_prediction/sorensen.py
@@ -53,6 +53,10 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True):
         given vertex pairs.  If the vertex_pair is not provided then the
         current implementation computes the Sorensen coefficient for all
         adjacent vertices in the graph.
+    
+    do_expensive_check: bool (default=True)
+        When set to True, check if the vertices in the graph are (re)numbered
+        from 0 to V-1 where V is the total number of vertices.
 
     Returns
     -------
diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py
index 3b47ee9fc88..3213a24b037 100644
--- a/python/cugraph/cugraph/link_prediction/wjaccard.py
+++ b/python/cugraph/cugraph/link_prediction/wjaccard.py
@@ -54,6 +54,10 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         A GPU dataframe consisting of two columns representing pairs of
         vertices. If provided, the jaccard coefficient is computed for the
         given vertex pairs, else, it is computed for all vertex pairs.
+    
+    do_expensive_check: bool (default=True)
+        When set to True, check if the vertices in the graph are (re)numbered
+        from 0 to V-1 where V is the total number of vertices.
 
     Returns
     -------
diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py
index cfd2d5f505e..8db50b8fc7d 100644
--- a/python/cugraph/cugraph/link_prediction/woverlap.py
+++ b/python/cugraph/cugraph/link_prediction/woverlap.py
@@ -54,6 +54,10 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         A GPU dataframe consisting of two columns representing pairs of
         vertices. If provided, the overlap coefficient is computed for the
         given vertex pairs, else, it is computed for all vertex pairs.
+    
+    do_expensive_check: bool (default=True)
+        When set to True, check if the vertices in the graph are (re)numbered
+        from 0 to V-1 where V is the total number of vertices.
 
     Returns
     -------
diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py
index aaab5699813..d5b54ee6401 100644
--- a/python/cugraph/cugraph/link_prediction/wsorensen.py
+++ b/python/cugraph/cugraph/link_prediction/wsorensen.py
@@ -50,6 +50,10 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         A GPU dataframe consisting of two columns representing pairs of
         vertices. If provided, the sorensen coefficient is computed for the
         given vertex pairs, else, it is computed for all vertex pairs.
+    
+    do_expensive_check: bool (default=True)
+        When set to True, check if the vertices in the graph are (re)numbered
+        from 0 to V-1 where V is the total number of vertices.
 
     Returns
     -------

From 890244852f491653119155d5869587d1b161be8d Mon Sep 17 00:00:00 2001
From: jnke2016 <jnke@gmail.com>
Date: Tue, 18 Jul 2023 00:17:45 -0700
Subject: [PATCH 8/8] fix style

---
 python/cugraph/cugraph/link_prediction/jaccard.py   | 2 +-
 python/cugraph/cugraph/link_prediction/overlap.py   | 2 +-
 python/cugraph/cugraph/link_prediction/sorensen.py  | 2 +-
 python/cugraph/cugraph/link_prediction/wjaccard.py  | 2 +-
 python/cugraph/cugraph/link_prediction/woverlap.py  | 2 +-
 python/cugraph/cugraph/link_prediction/wsorensen.py | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py
index bc5bb034c9f..dd411fa889d 100644
--- a/python/cugraph/cugraph/link_prediction/jaccard.py
+++ b/python/cugraph/cugraph/link_prediction/jaccard.py
@@ -83,7 +83,7 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True):
         given vertex pairs.  If the vertex_pair is not provided then the
         current implementation computes the jaccard coefficient for all
         adjacent vertices in the graph.
-    
+
     do_expensive_check: bool (default=True)
         When set to True, check if the vertices in the graph are (re)numbered
         from 0 to V-1 where V is the total number of vertices.
diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py
index 6b265af156f..e05e0c944fe 100644
--- a/python/cugraph/cugraph/link_prediction/overlap.py
+++ b/python/cugraph/cugraph/link_prediction/overlap.py
@@ -73,7 +73,7 @@ def overlap(input_graph, vertex_pair=None, do_expensive_check=True):
         A GPU dataframe consisting of two columns representing pairs of
         vertices. If provided, the overlap coefficient is computed for the
         given vertex pairs, else, it is computed for all vertex pairs.
-    
+
     do_expensive_check: bool (default=True)
         When set to True, check if the vertices in the graph are (re)numbered
         from 0 to V-1 where V is the total number of vertices.
diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py
index 12499a94109..0f35f868b7c 100644
--- a/python/cugraph/cugraph/link_prediction/sorensen.py
+++ b/python/cugraph/cugraph/link_prediction/sorensen.py
@@ -53,7 +53,7 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True):
         given vertex pairs.  If the vertex_pair is not provided then the
         current implementation computes the Sorensen coefficient for all
         adjacent vertices in the graph.
-    
+
     do_expensive_check: bool (default=True)
         When set to True, check if the vertices in the graph are (re)numbered
         from 0 to V-1 where V is the total number of vertices.
diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py
index 3213a24b037..fc6edae8d3e 100644
--- a/python/cugraph/cugraph/link_prediction/wjaccard.py
+++ b/python/cugraph/cugraph/link_prediction/wjaccard.py
@@ -54,7 +54,7 @@ def jaccard_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         A GPU dataframe consisting of two columns representing pairs of
         vertices. If provided, the jaccard coefficient is computed for the
         given vertex pairs, else, it is computed for all vertex pairs.
-    
+
     do_expensive_check: bool (default=True)
         When set to True, check if the vertices in the graph are (re)numbered
         from 0 to V-1 where V is the total number of vertices.
diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py
index 8db50b8fc7d..27fb7d608ca 100644
--- a/python/cugraph/cugraph/link_prediction/woverlap.py
+++ b/python/cugraph/cugraph/link_prediction/woverlap.py
@@ -54,7 +54,7 @@ def overlap_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         A GPU dataframe consisting of two columns representing pairs of
         vertices. If provided, the overlap coefficient is computed for the
         given vertex pairs, else, it is computed for all vertex pairs.
-    
+
     do_expensive_check: bool (default=True)
         When set to True, check if the vertices in the graph are (re)numbered
         from 0 to V-1 where V is the total number of vertices.
diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py
index d5b54ee6401..c27e4f66a02 100644
--- a/python/cugraph/cugraph/link_prediction/wsorensen.py
+++ b/python/cugraph/cugraph/link_prediction/wsorensen.py
@@ -50,7 +50,7 @@ def sorensen_w(input_graph, weights, vertex_pair=None, do_expensive_check=True):
         A GPU dataframe consisting of two columns representing pairs of
         vertices. If provided, the sorensen coefficient is computed for the
         given vertex pairs, else, it is computed for all vertex pairs.
-    
+
     do_expensive_check: bool (default=True)
         When set to True, check if the vertices in the graph are (re)numbered
         from 0 to V-1 where V is the total number of vertices.