From 81481237b445e66f47f6218b802d94f731acb2ae Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Wed, 30 Nov 2022 11:29:05 -0600 Subject: [PATCH] Shuffle the vertex pair (#3002) An illegal memory access occurs when running the MG similarity algos at certain scale. This is caused by vertex pairs not being shuffled appropriately. This PR: 1. Shuffle the vertex pairs based on the edge partitioning 2. Update the the vertex pairs column names which are not necessarily edgelists 3. Update the docstrings, tests and notebooks accordingly closes #3001 Authors: - Joseph Nke (https://github.com/jnke2016) - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/3002 --- cpp/src/c_api/graph_functions.cpp | 8 ++- .../link_prediction/Jaccard-Similarity.ipynb | 26 ++++----- .../link_prediction/Overlap-Similarity.ipynb | 56 +++++++++---------- notebooks/algorithms/structure/Renumber.ipynb | 10 ++-- .../cugraph/dask/link_prediction/jaccard.py | 24 ++++---- .../cugraph/dask/link_prediction/overlap.py | 24 ++++---- .../cugraph/dask/link_prediction/sorensen.py | 24 ++++---- .../experimental/link_prediction/jaccard.py | 49 ++++++++-------- .../experimental/link_prediction/overlap.py | 49 ++++++++-------- .../experimental/link_prediction/sorensen.py | 45 +++++++-------- .../cugraph/link_prediction/jaccard.py | 26 ++++----- .../link_prediction/jaccard_wrapper.pyx | 12 ++-- .../cugraph/link_prediction/overlap.py | 18 +++--- .../link_prediction/overlap_wrapper.pyx | 12 ++-- .../cugraph/link_prediction/sorensen.py | 22 ++++---- .../cugraph/link_prediction/wjaccard.py | 16 +++--- .../cugraph/link_prediction/woverlap.py | 16 +++--- .../cugraph/link_prediction/wsorensen.py | 16 +++--- .../cugraph/tests/mg/test_mg_jaccard.py | 4 +- .../cugraph/tests/mg/test_mg_overlap.py | 4 +- .../cugraph/tests/mg/test_mg_sorensen.py | 4 +- python/cugraph/cugraph/tests/test_jaccard.py | 14 ++--- python/cugraph/cugraph/tests/test_overlap.py | 8 +-- python/cugraph/cugraph/tests/test_sorensen.py | 14 ++--- python/cugraph/cugraph/tests/test_wjaccard.py | 6 +- python/cugraph/cugraph/tests/test_woverlap.py | 6 +- .../cugraph/cugraph/tests/test_wsorensen.py | 6 +- .../pylibcugraph/jaccard_coefficients.pyx | 14 ++++- .../pylibcugraph/overlap_coefficients.pyx | 14 ++++- .../pylibcugraph/sorensen_coefficients.pyx | 14 ++++- 30 files changed, 294 insertions(+), 267 deletions(-) diff --git a/cpp/src/c_api/graph_functions.cpp b/cpp/src/c_api/graph_functions.cpp index 6c813191233..bcb9fdf761f 100644 --- a/cpp/src/c_api/graph_functions.cpp +++ b/cpp/src/c_api/graph_functions.cpp @@ -71,7 +71,13 @@ struct create_vertex_pairs_functor : public cugraph::c_api::abstract_functor { second_copy.data(), second_->as_type(), second_->size_, handle_.get_stream()); if constexpr (multi_gpu) { - // FIXME: shuffle first_copy/second_copy + std::tie(first_copy, second_copy, std::ignore) = + cugraph::detail::shuffle_edgelist_by_gpu_id( + handle_, + std::move(first_copy), + std::move(second_copy), + std::nullopt); // vertex pairs should be shuffled based on the edge partitioning, so we + // can use this edge shuffling function to shuffle vertex pairs. } result_ = new cugraph::c_api::cugraph_vertex_pairs_t{ diff --git a/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb b/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb index 4f555753f94..146a17af58f 100755 --- a/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb +++ b/notebooks/algorithms/link_prediction/Jaccard-Similarity.ipynb @@ -59,9 +59,9 @@ "Returns:\n", "\n", " df: cudf.DataFrame with three names columns:\n", - " df[\"source\"]: The source vertex id.\n", - " df[\"destination\"]: The destination vertex id.\n", - " df[\"jaccard_coeff\"]: The jaccard coefficient computed between the source and destination vertex.\n", + " df[\"first\"]: The first vertex id of each pair.\n", + " df[\"second\"]: The second vertex i of each pair.\n", + " df[\"jaccard_coeff\"]: The jaccard coefficient computed between the vertex pairs.\n", "
\n", "\n", "__References__ \n", @@ -102,9 +102,9 @@ "Returns: \n", "\n", " df: cudf.DataFrame with three names columns:\n", - " df['source']: The source vertex id.\n", - " df['destination']: The destination vertex id.\n", - " df['jaccard_coeff']: The weighted jaccard coefficient computed between the source and destination vertex.\n", + " df['first']: The first vertex id of each pair.\n", + " df['second']: The second vertex id of each pair.\n", + " df['jaccard_coeff']: The weighted jaccard coefficient computed between the vertex pairs.\n", " \n", "\n", "__Note:__ For this example we will be using PageRank as the edge weights. Please review the PageRank notebook if you have any questions about running PageRank\n" @@ -187,8 +187,8 @@ " \n", " #find the best\n", " for i in range(len(dm)): \n", - " print(\"Vertices \" + str(dm['source'].iloc[i]) + \" and \" + \n", - " str(dm['destination'].iloc[i]) + \" are most similar with score: \" \n", + " print(\"Vertices \" + str(dm['first'].iloc[i]) + \" and \" + \n", + " str(dm['second'].iloc[i]) + \" are most similar with score: \" \n", " + str(dm['jaccard_coeff'].iloc[i]))\n", " del jmax\n", " del dm" @@ -206,8 +206,8 @@ " filtered = _d.query('jaccard_coeff > @limit')\n", " \n", " for i in range(len(filtered)):\n", - " print(\"Vertices \" + str(filtered['source'].iloc[i]) + \" and \" + \n", - " str(filtered['destination'].iloc[i]) + \" are similar with score: \" + \n", + " print(\"Vertices \" + str(filtered['first'].iloc[i]) + \" and \" + \n", + " str(filtered['second'].iloc[i]) + \" are similar with score: \" + \n", " str(filtered['jaccard_coeff'].iloc[i]))" ] }, @@ -322,7 +322,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The Most similar shoul be 33 and 34.\n", + "The Most similar should be 33 and 34.\n", "Vertex 33 has 12 neighbors, vertex 34 has 17 neighbors. They share 10 neighbors in common:\n", "$jaccard = 10 / (10 + (12 -10) + (17-10)) = 10 / 19 = 0.526$" ] @@ -348,7 +348,7 @@ "# Before printing, let's get rid of the duplicates (x compared to y is the same as y compared to x). We will do that\n", "# by performing a query. Then let's sort the data by score\n", "\n", - "jdf_s = jdf.query('source < destination').sort_values(by='jaccard_coeff', ascending=False)\n", + "jdf_s = jdf.query('first < second').sort_values(by='jaccard_coeff', ascending=False)\n", "\n", "print_jaccard_threshold(jdf_s, 0.0)" ] @@ -500,4 +500,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb b/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb index 20ad406ae4e..8ed7aca1f24 100755 --- a/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb +++ b/notebooks/algorithms/link_prediction/Overlap-Similarity.ipynb @@ -59,9 +59,9 @@ "Returns:\n", "\n", " df: cudf.DataFrame with three names columns:\n", - " df[\"source\"]: The source vertex id.\n", - " df[\"destination\"]: The destination vertex id.\n", - " df[\"overlap_coeff\"]: The overlap coefficient computed between the source and destination vertex.\n", + " df[\"first\"]: The first vertex id of each pair.\n", + " df[\"second\"]: The second vertex id of each pair.\n", + " df[\"overlap_coeff\"]: The overlap coefficient computed between the vertex pairs.\n", "\n", "__References__\n", "- https://en.wikipedia.org/wiki/Overlap_coefficient\n" @@ -87,9 +87,9 @@ "Returns:\n", "\n", " df: cudf.DataFrame with three names columns:\n", - " df[\"source\"]: The source vertex id.\n", - " df[\"destination\"]: The destination vertex id.\n", - " df[\"jaccard_coeff\"]: The jaccard coefficient computed between the source and destination vertex.\n", + " df[\"first\"]: The first vertex id of each pair.\n", + " df[\"second\"]: The second vertex id of each pair.\n", + " df[\"jaccard_coeff\"]: The jaccard coefficient computed between the vertex pairs.\n", "
\n", "\n", "See the Jaccard notebook for additional information and background" @@ -174,8 +174,8 @@ " \n", " #find the best\n", " for i in range(len(dm)): \n", - " print(\"Vertices \" + str(dm['source'].iloc[i]) + \" and \" + \n", - " str(dm['destination'].iloc[i]) + \" are most similar with score: \" \n", + " print(\"Vertices \" + str(dm['first'].iloc[i]) + \" and \" + \n", + " str(dm['second'].iloc[i]) + \" are most similar with score: \" \n", " + str(dm['jaccard_coeff'].iloc[i]))\n", " del jmax\n", " del dm" @@ -191,11 +191,11 @@ "def print_most_similar_overlap(df):\n", " \n", " smax = df['overlap_coeff'].max()\n", - " dm = df.query('overlap_coeff >= @smax and source < destination') \n", + " dm = df.query('overlap_coeff >= @smax and first < second') \n", " \n", " for i in range(len(dm)):\n", - " print(\"Vertices \" + str(dm['source'].iloc[i]) + \" and \" + \n", - " str(dm['destination'].iloc[i]) + \" are most similar with score: \" \n", + " print(\"Vertices \" + str(dm['first'].iloc[i]) + \" and \" + \n", + " str(dm['second'].iloc[i]) + \" are most similar with score: \" \n", " + str(dm['overlap_coeff'].iloc[i]))\n", " \n", " del smax\n", @@ -214,8 +214,8 @@ " filtered = _d.query('jaccard_coeff > @limit')\n", " \n", " for i in range(len(filtered)):\n", - " print(\"Vertices \" + str(filtered['source'].iloc[i]) + \" and \" + \n", - " str(filtered['destination'].iloc[i]) + \" are similar with score: \" + \n", + " print(\"Vertices \" + str(filtered['first'].iloc[i]) + \" and \" + \n", + " str(filtered['second'].iloc[i]) + \" are similar with score: \" + \n", " str(filtered['jaccard_coeff'].iloc[i]))" ] }, @@ -231,9 +231,9 @@ " filtered = _d.query('overlap_coeff > @limit')\n", " \n", " for i in range(len(filtered)):\n", - " if filtered['source'].iloc[i] != filtered['destination'].iloc[i] :\n", - " print(\"Vertices \" + str(filtered['source'].iloc[i]) + \" and \" + \n", - " str(filtered['destination'].iloc[i]) + \" are similar with score: \" + \n", + " if filtered['first'].iloc[i] != filtered['second'].iloc[i] :\n", + " print(\"Vertices \" + str(filtered['first'].iloc[i]) + \" and \" + \n", + " str(filtered['second'].iloc[i]) + \" are similar with score: \" + \n", " str(filtered['overlap_coeff'].iloc[i]))" ] }, @@ -361,7 +361,7 @@ "# Before printing, let's get rid of the duplicates (x compared to y is the same as y compared to x). We will do that\n", "# by performing a query. Then let's sort the data by score\n", "\n", - "jdf_s = jdf.query('source < destination').sort_values(by='jaccard_coeff', ascending=False)\n", + "jdf_s = jdf.query('first < second').sort_values(by='jaccard_coeff', ascending=False)\n", "\n", "print_jaccard_threshold(jdf_s, 0.0)" ] @@ -410,8 +410,8 @@ "outputs": [], "source": [ "# print all similarities over a threshold, in this case 0.5\n", - "#also, drop duplicates\n", - "odf_s = odf.query('source < destination').sort_values(by='overlap_coeff', ascending=False)\n", + "# also, drop duplicates\n", + "odf_s = odf.query('first < second').sort_values(by='overlap_coeff', ascending=False)\n", "\n", "print_overlap_threshold(odf_s, 0.5)" ] @@ -467,7 +467,7 @@ "source": [ "# print all similarities over a threshold, in this case 0.5\n", "# also, drop duplicates\n", - "odf_s2 = ol2.query('source < destination').sort_values(by='overlap_coeff', ascending=False)\n", + "odf_s2 = ol2.query('first < second').sort_values(by='overlap_coeff', ascending=False)\n", "\n", "print_overlap_threshold(odf_s2, 0.74)" ] @@ -513,7 +513,7 @@ "outputs": [], "source": [ "# Let's combine the Jaccard and Overlap scores\n", - "mdf = jdf.merge(odf, on=['source','destination'])" + "mdf = jdf.merge(odf, on=['first','second'])" ] }, { @@ -532,8 +532,8 @@ "metadata": {}, "outputs": [], "source": [ - "dS = degree.rename(columns={'vertex':'source','degree': 'src_degree'})\n", - "dD = degree.rename(columns={'vertex':'destination','degree': 'dst_degree'})" + "dS = degree.rename(columns={'vertex':'first','degree': 'src_degree'})\n", + "dD = degree.rename(columns={'vertex':'second','degree': 'dst_degree'})" ] }, { @@ -542,8 +542,8 @@ "metadata": {}, "outputs": [], "source": [ - "m = mdf.merge(dS, how=\"left\", on='source')\n", - "m = m.merge(dD, how=\"left\", on='destination')" + "m = mdf.merge(dS, how=\"left\", on='first')\n", + "m = m.merge(dD, how=\"left\", on='second')" ] }, { @@ -552,7 +552,7 @@ "metadata": {}, "outputs": [], "source": [ - "m.query('source < destination').sort_values(by='jaccard_coeff', ascending=False).head(20)" + "m.query('first < second').sort_values(by='jaccard_coeff', ascending=False).head(20)" ] }, { @@ -562,7 +562,7 @@ "outputs": [], "source": [ "# Now sort on the overlap\n", - "m.query('source < destination').sort_values(by='overlap_coeff', ascending=False).head(20)" + "m.query('first < second').sort_values(by='overlap_coeff', ascending=False).head(20)" ] }, { @@ -605,4 +605,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/notebooks/algorithms/structure/Renumber.ipynb b/notebooks/algorithms/structure/Renumber.ipynb index cc4d6901824..13b1eeba074 100755 --- a/notebooks/algorithms/structure/Renumber.ipynb +++ b/notebooks/algorithms/structure/Renumber.ipynb @@ -273,16 +273,16 @@ "source": [ "jac = cugraph.jaccard(G)\n", "\n", - "jac = numbering.unrenumber(jac, 'source')\n", - "jac = numbering.unrenumber(jac, 'destination')\n", + "jac = numbering.unrenumber(jac, 'first')\n", + "jac = numbering.unrenumber(jac, 'second')\n", "\n", "jac.insert(len(jac.columns),\n", " \"original_source\",\n", - " [ socket.inet_ntoa(struct.pack('!L', x)) for x in jac['source'].values_host ])\n", + " [ socket.inet_ntoa(struct.pack('!L', x)) for x in jac['first'].values_host ])\n", "\n", "jac.insert(len(jac.columns),\n", " \"original_destination\",\n", - " [ socket.inet_ntoa(struct.pack('!L', x)) for x in jac['destination'].values_host ])\n", + " [ socket.inet_ntoa(struct.pack('!L', x)) for x in jac['second'].values_host ])\n", "\n", "jac.to_pandas()\n" ] @@ -358,4 +358,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/python/cugraph/cugraph/dask/link_prediction/jaccard.py b/python/cugraph/cugraph/dask/link_prediction/jaccard.py index 62cb9a14a86..a04b8baa2be 100644 --- a/python/cugraph/cugraph/dask/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/dask/link_prediction/jaccard.py @@ -31,11 +31,11 @@ def convert_to_cudf(cp_arrays): Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper """ - cupy_source, cupy_destination, cupy_similarity = cp_arrays + cupy_first, cupy_second, cupy_similarity = cp_arrays df = cudf.DataFrame() - df["source"] = cupy_source - df["destination"] = cupy_destination + df["first"] = cupy_first + df["second"] = cupy_second df["jaccard_coeff"] = cupy_similarity return df @@ -125,14 +125,14 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False): result : dask_cudf.DataFrame GPU distributed data frame containing 2 dask_cudf.Series - ddf['source']: dask_cudf.Series - The source vertex ID (will be identical to first if specified) - ddf['destination']: dask_cudf.Series - The destination vertex ID (will be identical to second if - specified) + ddf['first']: dask_cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + ddf['second']: dask_cudf.Series + The second vertex ID of each pair (will be identical to second if + specified). ddf['jaccard_coeff']: dask_cudf.Series - The computed Jaccard coefficient between the source and destination - vertices + The computed jaccard coefficient between the first and the second + vertex ID. """ if input_graph.is_directed(): @@ -198,7 +198,7 @@ def jaccard(input_graph, vertex_pair=None, use_weight=False): wait([(r.release(), c_r.release()) for r, c_r in zip(result, cudf_result)]) if input_graph.renumbered: - ddf = input_graph.unrenumber(ddf, "source") - ddf = input_graph.unrenumber(ddf, "destination") + ddf = input_graph.unrenumber(ddf, "first") + ddf = input_graph.unrenumber(ddf, "second") return ddf diff --git a/python/cugraph/cugraph/dask/link_prediction/overlap.py b/python/cugraph/cugraph/dask/link_prediction/overlap.py index def274cf974..4e3e844737e 100644 --- a/python/cugraph/cugraph/dask/link_prediction/overlap.py +++ b/python/cugraph/cugraph/dask/link_prediction/overlap.py @@ -31,11 +31,11 @@ def convert_to_cudf(cp_arrays): Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper """ - cupy_source, cupy_destination, cupy_similarity = cp_arrays + cupy_first, cupy_second, cupy_similarity = cp_arrays df = cudf.DataFrame() - df["source"] = cupy_source - df["destination"] = cupy_destination + df["first"] = cupy_first + df["second"] = cupy_second df["overlap_coeff"] = cupy_similarity return df @@ -103,14 +103,14 @@ def overlap(input_graph, vertex_pair=None, use_weight=False): result : dask_cudf.DataFrame GPU distributed data frame containing 2 dask_cudf.Series - ddf['source']: dask_cudf.Series - The source vertex ID (will be identical to first if specified) - ddf['destination']: dask_cudf.Series - The destination vertex ID (will be identical to second if - specified) + ddf['first']: dask_cudf.Series + The first vertex ID of each pair(will be identical to first if specified). + ddf['second']: dask_cudf.Series + The second vertex ID of each pair(will be identical to second if + specified). ddf['overlap_coeff']: dask_cudf.Series - The computed Overlap coefficient between the source and destination - vertices + The computed overlap coefficient between the first and the second + vertex ID. """ if input_graph.is_directed(): @@ -176,7 +176,7 @@ def overlap(input_graph, vertex_pair=None, use_weight=False): wait([(r.release(), c_r.release()) for r, c_r in zip(result, cudf_result)]) if input_graph.renumbered: - ddf = input_graph.unrenumber(ddf, "source") - ddf = input_graph.unrenumber(ddf, "destination") + ddf = input_graph.unrenumber(ddf, "first") + ddf = input_graph.unrenumber(ddf, "second") return ddf diff --git a/python/cugraph/cugraph/dask/link_prediction/sorensen.py b/python/cugraph/cugraph/dask/link_prediction/sorensen.py index de1116fa6ce..ad150a5116e 100644 --- a/python/cugraph/cugraph/dask/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/dask/link_prediction/sorensen.py @@ -31,11 +31,11 @@ def convert_to_cudf(cp_arrays): Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper """ - cupy_source, cupy_destination, cupy_similarity = cp_arrays + cupy_first, cupy_second, cupy_similarity = cp_arrays df = cudf.DataFrame() - df["source"] = cupy_source - df["destination"] = cupy_destination + df["first"] = cupy_first + df["second"] = cupy_second df["sorensen_coeff"] = cupy_similarity return df @@ -99,14 +99,14 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False): result : dask_cudf.DataFrame GPU distributed data frame containing 2 dask_cudf.Series - ddf['source']: dask_cudf.Series - The source vertex ID (will be identical to first if specified) - ddf['destination']: dask_cudf.Series - The destination vertex ID (will be identical to second if - specified) + ddf['first']: dask_cudf.Series + The first vertex ID of each pair(will be identical to first if specified). + ddf['second']: dask_cudf.Series + The second vertex ID of each pair(will be identical to second if + specified). ddf['sorensen_coeff']: dask_cudf.Series - The computed sorensen coefficient between the source and destination - vertices + The computed sorensen coefficient between the first and the second + vertex ID. """ if input_graph.is_directed(): @@ -172,7 +172,7 @@ def sorensen(input_graph, vertex_pair=None, use_weight=False): wait([(r.release(), c_r.release()) for r, c_r in zip(result, cudf_result)]) if input_graph.renumbered: - ddf = input_graph.unrenumber(ddf, "source") - ddf = input_graph.unrenumber(ddf, "destination") + ddf = input_graph.unrenumber(ddf, "first") + ddf = input_graph.unrenumber(ddf, "second") return ddf diff --git a/python/cugraph/cugraph/experimental/link_prediction/jaccard.py b/python/cugraph/cugraph/experimental/link_prediction/jaccard.py index 84aee3561d3..eba487eb8e5 100644 --- a/python/cugraph/cugraph/experimental/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/experimental/link_prediction/jaccard.py @@ -74,7 +74,7 @@ def EXPERIMENTAL__jaccard(G, vertex_pair=None, use_weight=False): ---------- G : cugraph.Graph cuGraph Graph instance, should contain the connectivity information - as an edge list (edge weights are not used for this algorithm). The + as an edge list (edge weights are not supported yet for this algorithm). The graph should be undirected where an undirected edge is represented by a directed edge in both direction. The adjacency list will be computed if not already present. @@ -99,14 +99,14 @@ def EXPERIMENTAL__jaccard(G, vertex_pair=None, use_weight=False): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified) - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if - specified) + df['first'] : cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + df['second'] : cudf.Series + The second vertex ID of each pair (will be identical to second if + specified). df['jaccard_coeff'] : cudf.Series - The computed Jaccard coefficient between the source and destination - vertices + The computed jaccard coefficient between the first and the second + vertex ID. Examples -------- @@ -119,16 +119,11 @@ def EXPERIMENTAL__jaccard(G, vertex_pair=None, use_weight=False): if G.is_directed(): raise ValueError("Input must be an undirected Graph.") - if G.edgelist.weights: - raise ValueError( - "Weighted graphs are currently not supported " - "but will be in the next release." - ) + if G.is_weighted(): + raise ValueError("Weighted graphs are currently not supported.") if use_weight: - raise ValueError( - "'use_weight' is currently not supported but will " "be in the next release" - ) + raise ValueError("'use_weight' is currently not supported.") if vertex_pair is None: # Call two_hop neighbor of the entire graph @@ -163,7 +158,7 @@ def EXPERIMENTAL__jaccard(G, vertex_pair=None, use_weight=False): if v_p_num_col == 2: # single column vertex vertex_pair = vertex_pair.rename( - columns={src_col_name: "source", dst_col_name: "destination"} + columns={src_col_name: "first", dst_col_name: "second"} ) df = vertex_pair @@ -180,7 +175,7 @@ def EXPERIMENTAL__jaccard_coefficient(G, ebunch=None, use_weight=False): ---------- graph : cugraph.Graph cuGraph Graph instance, should contain the connectivity information - as an edge list (edge weights are not used for this algorithm). The + as an edge list (edge weights are not supported yet for this algorithm). The graph should be undirected where an undirected edge is represented by a directed edge in both direction. The adjacency list will be computed if not already present. @@ -203,14 +198,14 @@ def EXPERIMENTAL__jaccard_coefficient(G, ebunch=None, use_weight=False): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified) - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if - specified) - df['jaccard_coeff'] : cudf.Series - The computed Jaccard coefficient between the source and destination - vertices + ddf['first']: dask_cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + ddf['second']: dask_cudf.Series + The second vertex ID of each pair (will be identical to second if + specified). + ddf['jaccard_coeff']: dask_cudf.Series + The computed jaccard coefficient between the first and the second + vertex ID. Examples -------- @@ -233,7 +228,7 @@ def EXPERIMENTAL__jaccard_coefficient(G, ebunch=None, use_weight=False): if isNx is True: df = df_edge_score_to_dictionary( - df, k="jaccard_coeff", src="source", dst="destination" + df, k="jaccard_coeff", src="first", dst="second" ) return df diff --git a/python/cugraph/cugraph/experimental/link_prediction/overlap.py b/python/cugraph/cugraph/experimental/link_prediction/overlap.py index 372cd93e727..b587dc3bfed 100644 --- a/python/cugraph/cugraph/experimental/link_prediction/overlap.py +++ b/python/cugraph/cugraph/experimental/link_prediction/overlap.py @@ -30,9 +30,9 @@ def EXPERIMENTAL__overlap_coefficient(G, ebunch=None, use_weight=False): Parameters ---------- - graph : cugraph.Graph + G : cugraph.Graph cuGraph Graph instance, should contain the connectivity information - as an edge list (edge weights are not used for this algorithm). The + as an edge list (edge weights are not supported yet for this algorithm). The graph should be undirected where an undirected edge is represented by a directed edge in both direction. The adjacency list will be computed if not already present. @@ -55,14 +55,14 @@ def EXPERIMENTAL__overlap_coefficient(G, ebunch=None, use_weight=False): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified) - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if - specified) - df['overlap_coeff'] : cudf.Series - The computed Overlap coefficient between the source and destination - vertices + ddf['first']: dask_cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + ddf['second']: dask_cudf.Series + The second vertex ID of each pair (will be identical to second if + specified). + ddf['overlap_coeff']: dask_cudf.Series + The computed overlap coefficient between the first and the second + vertex ID. Examples -------- @@ -84,7 +84,7 @@ def EXPERIMENTAL__overlap_coefficient(G, ebunch=None, use_weight=False): if isNx is True: df = df_edge_score_to_dictionary( - df, k="overlap_coeff", src="source", dst="destination" + df, k="overlap_coeff", src="first", dst="second" ) return df @@ -112,7 +112,7 @@ def EXPERIMENTAL__overlap(G, vertex_pair=None, use_weight=False): ---------- G : cugraph.Graph cuGraph Graph instance, should contain the connectivity information - as an edge list (edge weights are not used for this algorithm). The + as an edge list (edge weights are not supported yet for this algorithm). The adjacency list will be computed if not already present. This implementation only supports undirected, unweighted Graph. @@ -133,14 +133,14 @@ def EXPERIMENTAL__overlap(G, vertex_pair=None, use_weight=False): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified). - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if + df['first'] : cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + df['second'] : cudf.Series + The second vertex ID of each pair (will be identical to second if specified). df['overlap_coeff'] : cudf.Series - The computed Overlap coefficient between the source and destination - vertices. + The computed overlap coefficient between the first and the second + vertex ID. Examples -------- @@ -154,16 +154,11 @@ def EXPERIMENTAL__overlap(G, vertex_pair=None, use_weight=False): if G.is_directed(): raise ValueError("Input must be an undirected Graph.") - if G.edgelist.weights: - raise ValueError( - "Weighted graphs are currently not supported " - "but will be in the next release." - ) + if G.is_weighted(): + raise ValueError("Weighted graphs are currently not supported.") if use_weight: - raise ValueError( - "'use_weight' is currently not supported but will " "be in the next release" - ) + raise ValueError("'use_weight' is currently not supported.") if vertex_pair is None: # Call two_hop neighbor of the entire graph @@ -198,7 +193,7 @@ def EXPERIMENTAL__overlap(G, vertex_pair=None, use_weight=False): if v_p_num_col == 2: # single column vertex vertex_pair = vertex_pair.rename( - columns={src_col_name: "source", dst_col_name: "destination"} + columns={src_col_name: "first", dst_col_name: "second"} ) df = vertex_pair diff --git a/python/cugraph/cugraph/experimental/link_prediction/sorensen.py b/python/cugraph/cugraph/experimental/link_prediction/sorensen.py index ca2069ec44c..7908127cac5 100644 --- a/python/cugraph/cugraph/experimental/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/experimental/link_prediction/sorensen.py @@ -42,7 +42,7 @@ def EXPERIMENTAL__sorensen(G, vertex_pair=None, use_weight=False): ---------- G : cugraph.Graph cuGraph Graph instance, should contain the connectivity information - as an edge list (edge weights are not used for this algorithm). The + as an edge list (edge weights are not supported yet for this algorithm). The graph should be undirected where an undirected edge is represented by a directed edge in both direction. The adjacency list will be computed if not already present. @@ -67,14 +67,14 @@ def EXPERIMENTAL__sorensen(G, vertex_pair=None, use_weight=False): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified) - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if - specified) + df['first'] : cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + df['second'] : cudf.Series + The second vertex ID of each pair (will be identical to second if + specified). df['sorensen_coeff'] : cudf.Series - The computed Sorensen coefficient between the source and - destination vertices + The computed sorensen coefficient between the first and the second + vertex ID. Examples -------- @@ -87,16 +87,11 @@ def EXPERIMENTAL__sorensen(G, vertex_pair=None, use_weight=False): if G.is_directed(): raise ValueError("Input must be an undirected Graph.") - if G.edgelist.weights: - raise ValueError( - "Weighted graphs are currently not supported " - "but will be in the next release." - ) + if G.is_weighted(): + raise ValueError("Weighted graphs are currently not supported.") if use_weight: - raise ValueError( - "'use_weight' is currently not supported but will " "be in the next release" - ) + raise ValueError("'use_weight' is currently not supported.") if vertex_pair is None: # Call two_hop neighbor of the entire graph @@ -131,7 +126,7 @@ def EXPERIMENTAL__sorensen(G, vertex_pair=None, use_weight=False): if v_p_num_col == 2: # single column vertex vertex_pair = vertex_pair.rename( - columns={src_col_name: "source", dst_col_name: "destination"} + columns={src_col_name: "first", dst_col_name: "second"} ) df = vertex_pair @@ -169,14 +164,14 @@ def EXPERIMENTAL__sorensen_coefficient(G, ebunch=None, use_weight=False): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified) - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if - specified) + df['first'] : cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + df['second'] : cudf.Series + The second vertex ID of each pair (will be identical to second if + specified). df['sorensen_coeff'] : cudf.Series - The computed sorensen coefficient between the source and - destination vertices + The computed sorensen coefficient between the first and the second + vertex ID. Examples -------- @@ -199,7 +194,7 @@ def EXPERIMENTAL__sorensen_coefficient(G, ebunch=None, use_weight=False): if isNx is True: df = df_edge_score_to_dictionary( - df, k="sorensen_coeff", src="source", dst="destination" + df, k="sorensen_coeff", src="first", dst="second" ) return df diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index 2e5590a0cca..1c4fed7a8f9 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -89,13 +89,13 @@ def jaccard(input_graph, vertex_pair=None): pairs. df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified) + The source vertex ID (will be identical to first if specified). df['destination'] : cudf.Series The destination vertex ID (will be identical to second if - specified) + specified). df['jaccard_coeff'] : cudf.Series - The computed Jaccard coefficient between the source and destination - vertices + The computed jaccard coefficient between the first and the second + vertex ID. Examples -------- @@ -114,8 +114,8 @@ def jaccard(input_graph, vertex_pair=None): df = jaccard_wrapper.jaccard(input_graph, None, vertex_pair) if input_graph.renumbered: - df = input_graph.unrenumber(df, "source") - df = input_graph.unrenumber(df, "destination") + df = input_graph.unrenumber(df, "first") + df = input_graph.unrenumber(df, "second") return df @@ -148,14 +148,14 @@ def jaccard_coefficient(G, ebunch=None): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified) - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if - specified) + df['first'] : cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + df['second'] : cudf.Series + the second vertex ID of each pair (will be identical to second if + specified). df['jaccard_coeff'] : cudf.Series The computed Jaccard coefficient between the source and destination - vertices + vertices. Examples -------- @@ -175,7 +175,7 @@ def jaccard_coefficient(G, ebunch=None): if isNx is True: df = df_edge_score_to_dictionary( - df, k="jaccard_coeff", src="source", dst="destination" + df, k="jaccard_coeff", src="first", dst="second" ) return df diff --git a/python/cugraph/cugraph/link_prediction/jaccard_wrapper.pyx b/python/cugraph/cugraph/link_prediction/jaccard_wrapper.pyx index 8d236c60ee2..e66d8bf0b5c 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard_wrapper.pyx +++ b/python/cugraph/cugraph/link_prediction/jaccard_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -86,8 +86,8 @@ def jaccard(input_graph, weights_arr=None, vertex_pair=None): second = vertex_pair[cols[1]].astype(np.int32) # FIXME: multi column support - df['source'] = first - df['destination'] = second + df['first'] = first + df['second'] = second c_first_col = first.__cuda_array_interface__['data'][0] c_second_col = second.__cuda_array_interface__['data'][0] @@ -116,10 +116,10 @@ def jaccard(input_graph, weights_arr=None, vertex_pair=None): assert vertex_pair is None df = cudf.DataFrame() - df['source'] = cudf.Series(np.zeros(num_edges, indices.dtype)) - df['destination'] = indices + df['first'] = cudf.Series(np.zeros(num_edges, indices.dtype)) + df['second'] = indices - c_src_index_col = df['source'].__cuda_array_interface__['data'][0] + c_src_index_col = df['first'].__cuda_array_interface__['data'][0] if weight_type == np.float32: df['jaccard_coeff'] = cudf.Series(np.ones(num_edges, dtype=np.float32), diff --git a/python/cugraph/cugraph/link_prediction/overlap.py b/python/cugraph/cugraph/link_prediction/overlap.py index 161632a08b4..ba9f225062e 100644 --- a/python/cugraph/cugraph/link_prediction/overlap.py +++ b/python/cugraph/cugraph/link_prediction/overlap.py @@ -36,7 +36,7 @@ def overlap_coefficient(G, ebunch=None): if isNx is True: df = df_edge_score_to_dictionary( - df, k="overlap_coeff", src="source", dst="destination" + df, k="overlap_coeff", src="first", dst="second" ) return df @@ -74,14 +74,14 @@ def overlap(input_graph, vertex_pair=None): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified). - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if + df['first'] : cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + df['second'] : cudf.Series + The second vertex ID of each pair (will be identical to second if specified). df['overlap_coeff'] : cudf.Series - The computed Overlap coefficient between the source and destination - vertices. + The computed overlap coefficient between the first and the second + vertex ID. Examples -------- @@ -99,7 +99,7 @@ def overlap(input_graph, vertex_pair=None): df = overlap_wrapper.overlap(input_graph, None, vertex_pair) if input_graph.renumbered: - df = input_graph.unrenumber(df, "source") - df = input_graph.unrenumber(df, "destination") + df = input_graph.unrenumber(df, "first") + df = input_graph.unrenumber(df, "second") return df diff --git a/python/cugraph/cugraph/link_prediction/overlap_wrapper.pyx b/python/cugraph/cugraph/link_prediction/overlap_wrapper.pyx index ec0274716fb..0f61460a72f 100644 --- a/python/cugraph/cugraph/link_prediction/overlap_wrapper.pyx +++ b/python/cugraph/cugraph/link_prediction/overlap_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -73,8 +73,8 @@ def overlap(input_graph, weights_arr=None, vertex_pair=None): second = vertex_pair[cols[1]] # FIXME: multi column support - df['source'] = first - df['destination'] = second + df['first'] = first + df['second'] = second c_first_col = first.__cuda_array_interface__['data'][0] c_second_col = second.__cuda_array_interface__['data'][0] @@ -103,10 +103,10 @@ def overlap(input_graph, weights_arr=None, vertex_pair=None): assert vertex_pair is None df = cudf.DataFrame() - df['source'] = cudf.Series(np.zeros(num_edges, indices.dtype)) - df['destination'] = indices + df['first'] = cudf.Series(np.zeros(num_edges, indices.dtype)) + df['second'] = indices - c_src_index_col = df['source'].__cuda_array_interface__['data'][0] + c_src_index_col = df['first'].__cuda_array_interface__['data'][0] if weight_type == np.float32: df['overlap_coeff'] = cudf.Series(np.ones(num_edges, dtype=np.float32), diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index 4269cd2fa1a..03db9b74db0 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -58,10 +58,10 @@ def sorensen(input_graph, vertex_pair=None): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified) - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if + df['first'] : cudf.Series + The first vertex ID of each pair (will be identical to first if specified) + df['second'] : cudf.Series + The second vertex ID of each pair (will be identical to second if specified) df['sorensen_coeff'] : cudf.Series The computed Sorensen coefficient between the source and @@ -86,8 +86,8 @@ def sorensen(input_graph, vertex_pair=None): df.jaccard_coeff = (2 * df.jaccard_coeff) / (1 + df.jaccard_coeff) df.rename({"jaccard_coeff": "sorensen_coeff"}, axis=1, inplace=True) if input_graph.renumbered: - df = input_graph.unrenumber(df, "source") - df = input_graph.unrenumber(df, "destination") + df = input_graph.unrenumber(df, "first") + df = input_graph.unrenumber(df, "second") return df @@ -120,13 +120,13 @@ def sorensen_coefficient(G, ebunch=None): pairs. df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified) + The source vertex ID (will be identical to first if specified). df['destination'] : cudf.Series The destination vertex ID (will be identical to second if - specified) + specified). df['sorensen_coeff'] : cudf.Series - The computed sorensen coefficient between the source and - destination vertices + The computed sorensen coefficient between the first and the second + vertex ID. Examples -------- @@ -146,7 +146,7 @@ def sorensen_coefficient(G, ebunch=None): if isNx is True: df = df_edge_score_to_dictionary( - df, k="sorensen_coeff", src="source", dst="destination" + df, k="sorensen_coeff", src="first", dst="second" ) return df diff --git a/python/cugraph/cugraph/link_prediction/wjaccard.py b/python/cugraph/cugraph/link_prediction/wjaccard.py index d155428f778..b8ef33d926f 100644 --- a/python/cugraph/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/cugraph/link_prediction/wjaccard.py @@ -59,13 +59,13 @@ def jaccard_w(input_graph, weights, vertex_pair=None): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID - df['destination'] : cudf.Series - The destination vertex ID + df['first'] : cudf.Series + The first vertex ID of each pair. + df['second'] : cudf.Series + The second vertex ID of each pair. df['jaccard_coeff'] : cudf.Series - The computed weighted Jaccard coefficient between the source and - destination vertices. + The computed weighted Jaccard coefficient between the first and the + second vertex ID. Examples -------- @@ -111,7 +111,7 @@ def jaccard_w(input_graph, weights, vertex_pair=None): df = jaccard_wrapper.jaccard(input_graph, jaccard_weights, vertex_pair) if input_graph.renumbered: - df = input_graph.unrenumber(df, "source") - df = input_graph.unrenumber(df, "destination") + df = input_graph.unrenumber(df, "first") + df = input_graph.unrenumber(df, "second") return df diff --git a/python/cugraph/cugraph/link_prediction/woverlap.py b/python/cugraph/cugraph/link_prediction/woverlap.py index f894512b99f..87498c72e51 100644 --- a/python/cugraph/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/cugraph/link_prediction/woverlap.py @@ -58,13 +58,13 @@ def overlap_w(input_graph, weights, vertex_pair=None): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID - df['destination'] : cudf.Series - The destination vertex ID + df['first'] : cudf.Series + The first vertex ID of each pair. + df['second'] : cudf.Series + The second vertex ID of each pair. df['overlap_coeff'] : cudf.Series - The computed weighted Overlap coefficient between the source and - destination vertices. + The computed weighted Overlap coefficient between the first and the + second vertex ID. Examples -------- @@ -107,7 +107,7 @@ def overlap_w(input_graph, weights, vertex_pair=None): df = overlap_wrapper.overlap(input_graph, overlap_weights, vertex_pair) if input_graph.renumbered: - df = input_graph.unrenumber(df, "source") - df = input_graph.unrenumber(df, "destination") + df = input_graph.unrenumber(df, "first") + df = input_graph.unrenumber(df, "second") return df diff --git a/python/cugraph/cugraph/link_prediction/wsorensen.py b/python/cugraph/cugraph/link_prediction/wsorensen.py index 01949be4690..00c89370106 100644 --- a/python/cugraph/cugraph/link_prediction/wsorensen.py +++ b/python/cugraph/cugraph/link_prediction/wsorensen.py @@ -54,13 +54,13 @@ def sorensen_w(input_graph, weights, vertex_pair=None): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID - df['destination'] : cudf.Series - The destination vertex ID + df['first'] : cudf.Series + The first vertex ID of each pair. + df['second'] : cudf.Series + The second vertex ID of each pair. df['sorensen_coeff'] : cudf.Series - The computed weighted Sorensen coefficient between the source and - destination vertices. + The computed weighted Sorensen coefficient between the first and the + second vertex ID. Examples -------- @@ -103,7 +103,7 @@ def sorensen_w(input_graph, weights, vertex_pair=None): df.rename({"jaccard_coeff": "sorensen_coeff"}, axis=1, inplace=True) if input_graph.renumbered: - df = input_graph.unrenumber(df, "source") - df = input_graph.unrenumber(df, "destination") + df = input_graph.unrenumber(df, "first") + df = input_graph.unrenumber(df, "second") return df diff --git a/python/cugraph/cugraph/tests/mg/test_mg_jaccard.py b/python/cugraph/cugraph/tests/mg/test_mg_jaccard.py index 4a0e3b66465..e7b09ee72fc 100644 --- a/python/cugraph/cugraph/tests/mg/test_mg_jaccard.py +++ b/python/cugraph/cugraph/tests/mg/test_mg_jaccard.py @@ -129,14 +129,14 @@ def test_dask_jaccard(dask_client, benchmark, input_expected_output): result_jaccard = ( result_jaccard.compute() - .sort_values(["source", "destination"]) + .sort_values(["first", "second"]) .reset_index(drop=True) .rename(columns={"jaccard_coeff": "mg_cugraph_jaccard_coeff"}) ) expected_output = ( input_expected_output["sg_cugraph_results"] - .sort_values(["source", "destination"]) + .sort_values(["first", "second"]) .reset_index(drop=True) ) diff --git a/python/cugraph/cugraph/tests/mg/test_mg_overlap.py b/python/cugraph/cugraph/tests/mg/test_mg_overlap.py index 08fc7c27bac..ade24b31d64 100644 --- a/python/cugraph/cugraph/tests/mg/test_mg_overlap.py +++ b/python/cugraph/cugraph/tests/mg/test_mg_overlap.py @@ -129,14 +129,14 @@ def test_dask_overlap(dask_client, benchmark, input_expected_output): result_overlap = ( result_overlap.compute() - .sort_values(["source", "destination"]) + .sort_values(["first", "second"]) .reset_index(drop=True) .rename(columns={"overlap_coeff": "mg_cugraph_overlap_coeff"}) ) expected_output = ( input_expected_output["sg_cugraph_results"] - .sort_values(["source", "destination"]) + .sort_values(["first", "second"]) .reset_index(drop=True) ) diff --git a/python/cugraph/cugraph/tests/mg/test_mg_sorensen.py b/python/cugraph/cugraph/tests/mg/test_mg_sorensen.py index b26cbc12d56..5b25e5a54ca 100644 --- a/python/cugraph/cugraph/tests/mg/test_mg_sorensen.py +++ b/python/cugraph/cugraph/tests/mg/test_mg_sorensen.py @@ -129,14 +129,14 @@ def test_dask_sorensen(dask_client, benchmark, input_expected_output): result_sorensen = ( result_sorensen.compute() - .sort_values(["source", "destination"]) + .sort_values(["first", "second"]) .reset_index(drop=True) .rename(columns={"sorensen_coeff": "mg_cugraph_sorensen_coeff"}) ) expected_output = ( input_expected_output["sg_cugraph_results"] - .sort_values(["source", "destination"]) + .sort_values(["first", "second"]) .reset_index(drop=True) ) diff --git a/python/cugraph/cugraph/tests/test_jaccard.py b/python/cugraph/cugraph/tests/test_jaccard.py index 10d0206bd84..3839a2b8cce 100644 --- a/python/cugraph/cugraph/tests/test_jaccard.py +++ b/python/cugraph/cugraph/tests/test_jaccard.py @@ -66,11 +66,11 @@ def compare_jaccard_two_hop(G, Gnx, edgevals=True): # print(u, " ", v, " ", p) nx_coeff.append(p) df = cugraph.jaccard(G, pairs) - df = df.sort_values(by=["source", "destination"]).reset_index(drop=True) + df = df.sort_values(by=["first", "second"]).reset_index(drop=True) if not edgevals: # experimental jaccard currently only supports unweighted graphs df_exp = exp_jaccard(G, pairs) - df_exp = df_exp.sort_values(by=["source", "destination"]).reset_index(drop=True) + df_exp = df_exp.sort_values(by=["first", "second"]).reset_index(drop=True) assert_frame_equal(df, df_exp, check_dtype=False, check_like=True) assert len(nx_coeff) == len(df) @@ -94,11 +94,11 @@ def cugraph_call(benchmark_callable, graph_file, edgevals=False, input_df=None): # cugraph Jaccard Call df = benchmark_callable(cugraph.jaccard, G, vertex_pair=vertex_pair) - df = df.sort_values(["source", "destination"]).reset_index(drop=True) + df = df.sort_values(["first", "second"]).reset_index(drop=True) return ( - df["source"].to_numpy(), - df["destination"].to_numpy(), + df["first"].to_numpy(), + df["second"].to_numpy(), df["jaccard_coeff"].to_numpy(), ) @@ -302,8 +302,8 @@ def test_jaccard_multi_column(read_csv): df_exp = cugraph.jaccard(G2, vertex_pair[["src_0", "dst_0"]]) # Calculating mismatch - actual = df_res.sort_values("0_source").reset_index() - expected = df_exp.sort_values("source").reset_index() + actual = df_res.sort_values("0_first").reset_index() + expected = df_exp.sort_values("first").reset_index() assert_series_equal(actual["jaccard_coeff"], expected["jaccard_coeff"]) diff --git a/python/cugraph/cugraph/tests/test_overlap.py b/python/cugraph/cugraph/tests/test_overlap.py index 3b940d8b39c..7dcdb9afb2f 100644 --- a/python/cugraph/cugraph/tests/test_overlap.py +++ b/python/cugraph/cugraph/tests/test_overlap.py @@ -56,11 +56,11 @@ def cugraph_call(benchmark_callable, graph_file, pairs, edgevals=False): ) # cugraph Overlap Call df = benchmark_callable(cugraph.overlap, G, pairs) - df = df.sort_values(by=["source", "destination"]).reset_index(drop=True) + df = df.sort_values(by=["first", "second"]).reset_index(drop=True) if not edgevals: # experimental overlap currently only supports unweighted graphs df_exp = exp_overlap(G, pairs) - df_exp = df_exp.sort_values(by=["source", "destination"]).reset_index(drop=True) + df_exp = df_exp.sort_values(by=["first", "second"]).reset_index(drop=True) assert_frame_equal(df, df_exp, check_dtype=False, check_like=True) return df["overlap_coeff"].to_numpy() @@ -206,8 +206,8 @@ def test_overlap_multi_column(graph_file): df_exp = cugraph.overlap(G2, vertex_pair[["src_0", "dst_0"]]) # Calculating mismatch - actual = df_res.sort_values("0_source").reset_index() - expected = df_exp.sort_values("source").reset_index() + actual = df_res.sort_values("0_first").reset_index() + expected = df_exp.sort_values("first").reset_index() assert_series_equal(actual["overlap_coeff"], expected["overlap_coeff"]) diff --git a/python/cugraph/cugraph/tests/test_sorensen.py b/python/cugraph/cugraph/tests/test_sorensen.py index b827cf32665..9c61fa06a03 100644 --- a/python/cugraph/cugraph/tests/test_sorensen.py +++ b/python/cugraph/cugraph/tests/test_sorensen.py @@ -69,11 +69,11 @@ def compare_sorensen_two_hop(G, Gnx, edgevals=False): # No networkX equivalent nx_coeff.append((2 * p) / (1 + p)) df = cugraph.sorensen(G, pairs) - df = df.sort_values(by=["source", "destination"]).reset_index(drop=True) + df = df.sort_values(by=["first", "second"]).reset_index(drop=True) if not edgevals: # experimental sorensen currently only supports unweighted graphs df_exp = exp_sorensen(G, pairs) - df_exp = df_exp.sort_values(by=["source", "destination"]).reset_index(drop=True) + df_exp = df_exp.sort_values(by=["first", "second"]).reset_index(drop=True) assert_frame_equal(df, df_exp, check_dtype=False, check_like=True) assert len(nx_coeff) == len(df) for i in range(len(df)): @@ -96,11 +96,11 @@ def cugraph_call(benchmark_callable, graph_file, edgevals=False, input_df=None): # cugraph Sorensen Call df = benchmark_callable(cugraph.sorensen, G, vertex_pair=vertex_pair) - df = df.sort_values(["source", "destination"]).reset_index(drop=True) + df = df.sort_values(["first", "second"]).reset_index(drop=True) return ( - df["source"].to_numpy(), - df["destination"].to_numpy(), + df["first"].to_numpy(), + df["second"].to_numpy(), df["sorensen_coeff"].to_numpy(), ) @@ -266,8 +266,8 @@ def test_sorensen_multi_column(read_csv): df_exp = cugraph.sorensen(G2, vertex_pair[["src_0", "dst_0"]]) # Calculating mismatch - actual = df_res.sort_values("0_source").reset_index() - expected = df_exp.sort_values("source").reset_index() + actual = df_res.sort_values("0_first").reset_index() + expected = df_exp.sort_values("first").reset_index() assert_series_equal(actual["sorensen_coeff"], expected["sorensen_coeff"]) diff --git a/python/cugraph/cugraph/tests/test_wjaccard.py b/python/cugraph/cugraph/tests/test_wjaccard.py index cb67038f8e2..00ba7e9cd51 100644 --- a/python/cugraph/cugraph/tests/test_wjaccard.py +++ b/python/cugraph/cugraph/tests/test_wjaccard.py @@ -60,7 +60,7 @@ def cugraph_call(benchmark_callable, graph_file): # cugraph Jaccard Call df = benchmark_callable(cugraph.jaccard_w, G, weights) - df = df.sort_values(["source", "destination"]).reset_index(drop=True) + df = df.sort_values(["first", "second"]).reset_index(drop=True) return df["jaccard_coeff"] @@ -169,6 +169,6 @@ def test_wjaccard_multi_column(read_csv): df_exp = cugraph.jaccard_w(G2, weights, vertex_pair[["src_0", "dst_0"]]) # Calculating mismatch - actual = df_res.sort_values("0_source").reset_index() - expected = df_exp.sort_values("source").reset_index() + actual = df_res.sort_values("0_first").reset_index() + expected = df_exp.sort_values("first").reset_index() assert_series_equal(actual["jaccard_coeff"], expected["jaccard_coeff"]) diff --git a/python/cugraph/cugraph/tests/test_woverlap.py b/python/cugraph/cugraph/tests/test_woverlap.py index 6bea4e12888..f8022b08cb1 100644 --- a/python/cugraph/cugraph/tests/test_woverlap.py +++ b/python/cugraph/cugraph/tests/test_woverlap.py @@ -46,7 +46,7 @@ def cugraph_call(benchmark_callable, graph_file, pairs): # cugraph Overlap Call df = benchmark_callable(cugraph.overlap_w, G, weights, pairs) - df = df.sort_values(by=["source", "destination"]) + df = df.sort_values(by=["first", "second"]) return df["overlap_coeff"].to_numpy() @@ -154,6 +154,6 @@ def test_woverlap_multi_column(graph_file): df_exp = cugraph.overlap_w(G2, weights, vertex_pair[["src_0", "dst_0"]]) # Calculating mismatch - actual = df_res.sort_values("0_source").reset_index() - expected = df_exp.sort_values("source").reset_index() + actual = df_res.sort_values("0_first").reset_index() + expected = df_exp.sort_values("first").reset_index() assert_series_equal(actual["overlap_coeff"], expected["overlap_coeff"]) diff --git a/python/cugraph/cugraph/tests/test_wsorensen.py b/python/cugraph/cugraph/tests/test_wsorensen.py index 067e82fdf3b..acd877693c4 100644 --- a/python/cugraph/cugraph/tests/test_wsorensen.py +++ b/python/cugraph/cugraph/tests/test_wsorensen.py @@ -60,7 +60,7 @@ def cugraph_call(benchmark_callable, graph_file): # cugraph Sorensen Call df = benchmark_callable(cugraph.sorensen_w, G, weights) - df = df.sort_values(["source", "destination"]).reset_index(drop=True) + df = df.sort_values(["first", "second"]).reset_index(drop=True) return df["sorensen_coeff"] @@ -173,6 +173,6 @@ def test_wsorensen_multi_column(read_csv): df_exp = cugraph.sorensen_w(G2, weights, vertex_pair[["src_0", "dst_0"]]) # Calculating mismatch - actual = df_res.sort_values("0_source").reset_index() - expected = df_exp.sort_values("source").reset_index() + actual = df_res.sort_values("0_first").reset_index() + expected = df_exp.sort_values("first").reset_index() assert_series_equal(actual["sorensen_coeff"], expected["sorensen_coeff"]) diff --git a/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx b/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx index c7f8e2368b4..805ee821eab 100644 --- a/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/jaccard_coefficients.pyx @@ -30,6 +30,8 @@ from pylibcugraph._cugraph_c.array cimport ( ) from pylibcugraph._cugraph_c.graph_functions cimport ( cugraph_vertex_pairs_t, + cugraph_vertex_pairs_get_first, + cugraph_vertex_pairs_get_second, cugraph_vertex_pairs_free, cugraph_create_vertex_pairs ) @@ -145,6 +147,16 @@ def EXPERIMENTAL__jaccard_coefficients(ResourceHandle resource_handle, cupy_similarity = copy_to_cupy_array(c_resource_handle_ptr, similarity_ptr) + cdef cugraph_type_erased_device_array_view_t* first_ptr = \ + cugraph_vertex_pairs_get_first(vertex_pairs_ptr) + + cupy_first = copy_to_cupy_array(c_resource_handle_ptr, first_ptr) + + cdef cugraph_type_erased_device_array_view_t* second_ptr = \ + cugraph_vertex_pairs_get_second(vertex_pairs_ptr) + + cupy_second = copy_to_cupy_array(c_resource_handle_ptr, second_ptr) + # Free all pointers cugraph_similarity_result_free(result_ptr) cugraph_vertex_pairs_free(vertex_pairs_ptr) @@ -152,4 +164,4 @@ def EXPERIMENTAL__jaccard_coefficients(ResourceHandle resource_handle, cugraph_type_erased_device_array_view_free(first_view_ptr) cugraph_type_erased_device_array_view_free(second_view_ptr) - return first, second, cupy_similarity + return cupy_first, cupy_second, cupy_similarity diff --git a/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx b/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx index 4910cc15c65..6af71116469 100644 --- a/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/overlap_coefficients.pyx @@ -30,6 +30,8 @@ from pylibcugraph._cugraph_c.array cimport ( ) from pylibcugraph._cugraph_c.graph_functions cimport ( cugraph_vertex_pairs_t, + cugraph_vertex_pairs_get_first, + cugraph_vertex_pairs_get_second, cugraph_vertex_pairs_free, cugraph_create_vertex_pairs ) @@ -146,6 +148,16 @@ def EXPERIMENTAL__overlap_coefficients(ResourceHandle resource_handle, cupy_similarity = copy_to_cupy_array(c_resource_handle_ptr, similarity_ptr) + cdef cugraph_type_erased_device_array_view_t* first_ptr = \ + cugraph_vertex_pairs_get_first(vertex_pairs_ptr) + + cupy_first = copy_to_cupy_array(c_resource_handle_ptr, first_ptr) + + cdef cugraph_type_erased_device_array_view_t* second_ptr = \ + cugraph_vertex_pairs_get_second(vertex_pairs_ptr) + + cupy_second = copy_to_cupy_array(c_resource_handle_ptr, second_ptr) + # Free all pointers cugraph_similarity_result_free(result_ptr) cugraph_vertex_pairs_free(vertex_pairs_ptr) @@ -153,4 +165,4 @@ def EXPERIMENTAL__overlap_coefficients(ResourceHandle resource_handle, cugraph_type_erased_device_array_view_free(first_view_ptr) cugraph_type_erased_device_array_view_free(second_view_ptr) - return first, second, cupy_similarity + return cupy_first, cupy_second, cupy_similarity diff --git a/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx b/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx index 8c4755f10ee..12647baccb2 100644 --- a/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx +++ b/python/pylibcugraph/pylibcugraph/sorensen_coefficients.pyx @@ -30,6 +30,8 @@ from pylibcugraph._cugraph_c.array cimport ( ) from pylibcugraph._cugraph_c.graph_functions cimport ( cugraph_vertex_pairs_t, + cugraph_vertex_pairs_get_first, + cugraph_vertex_pairs_get_second, cugraph_vertex_pairs_free, cugraph_create_vertex_pairs ) @@ -145,6 +147,16 @@ def EXPERIMENTAL__sorensen_coefficients(ResourceHandle resource_handle, cupy_similarity = copy_to_cupy_array(c_resource_handle_ptr, similarity_ptr) + cdef cugraph_type_erased_device_array_view_t* first_ptr = \ + cugraph_vertex_pairs_get_first(vertex_pairs_ptr) + + cupy_first = copy_to_cupy_array(c_resource_handle_ptr, first_ptr) + + cdef cugraph_type_erased_device_array_view_t* second_ptr = \ + cugraph_vertex_pairs_get_second(vertex_pairs_ptr) + + cupy_second = copy_to_cupy_array(c_resource_handle_ptr, second_ptr) + # Free all pointers cugraph_similarity_result_free(result_ptr) cugraph_vertex_pairs_free(vertex_pairs_ptr) @@ -152,4 +164,4 @@ def EXPERIMENTAL__sorensen_coefficients(ResourceHandle resource_handle, cugraph_type_erased_device_array_view_free(first_view_ptr) cugraph_type_erased_device_array_view_free(second_view_ptr) - return first, second, cupy_similarity + return cupy_first, cupy_second, cupy_similarity