Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor the python function symmetrizing the edgelist #4649

Merged
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
f2f3a04
add new symmetrize file
jnke2016 Sep 10, 2024
0c8b927
implement symmetrize_edgelist in the CAPI
jnke2016 Sep 10, 2024
c01456a
deprecated the python symmetrize function
jnke2016 Sep 19, 2024
fb115da
expose flag symmetrizing the edgelist
jnke2016 Sep 19, 2024
47b0677
Remove python symmetrize from the SG graph creation
jnke2016 Sep 22, 2024
f09fd41
update API to symmetrize the edgelist and update docstring
jnke2016 Sep 23, 2024
b737b02
deprecate python method to symmetrize the edgelist
jnke2016 Sep 23, 2024
ea19b62
fix style
jnke2016 Sep 23, 2024
bd7eede
fix style
jnke2016 Sep 23, 2024
6e433ab
Merge remote-tracking branch 'upstream/branch-24.10' into branch-24.1…
jnke2016 Sep 23, 2024
e5c94f7
fix typo
jnke2016 Sep 23, 2024
0470299
update docstrings and remove unused function declaration
jnke2016 Sep 26, 2024
2df98f5
ensure the graph properties match when symmetrizing
jnke2016 Sep 26, 2024
fabcb37
update docstrings
jnke2016 Sep 26, 2024
3b1c1d0
update docstrings
jnke2016 Sep 26, 2024
0462529
update error reporting
jnke2016 Sep 28, 2024
e8beb72
support the symmetrization of edgelist for legacy algorithms
jnke2016 Sep 28, 2024
e3e413b
fix typo
jnke2016 Sep 29, 2024
5336a91
fix property_graph tests
jnke2016 Sep 29, 2024
3b40fc5
update graph creation for the C tests
jnke2016 Sep 29, 2024
cbbfb88
update the test to call the deprecated symmetrize
jnke2016 Oct 1, 2024
8891b1f
fix typo
jnke2016 Oct 1, 2024
4e9e85d
fix typo
jnke2016 Oct 1, 2024
d135ac1
undo changes to symmetrize
jnke2016 Oct 1, 2024
a9c7064
fix typo
jnke2016 Oct 1, 2024
1c70046
use the deprecated symmetrize call
jnke2016 Oct 1, 2024
2cfb0fb
update SG graph creation
jnke2016 Oct 1, 2024
fec091f
Merge remote-tracking branch 'upstream/branch-24.10' into branch-24.1…
jnke2016 Oct 1, 2024
efbdb38
fix style
jnke2016 Oct 1, 2024
35c25db
fix style
jnke2016 Oct 1, 2024
f91bb81
remove debug print and pass kwarg name
jnke2016 Oct 2, 2024
0389f27
call deprecated symmetrize call
jnke2016 Oct 2, 2024
97472cd
update flag passed to the deprecated symmetrize function
jnke2016 Oct 2, 2024
ec7da8d
fix style
jnke2016 Oct 2, 2024
0e5a9ba
update docstrings
jnke2016 Oct 2, 2024
2a78356
remove redundant assignments
jnke2016 Oct 2, 2024
43ba0cd
ignore deprecation warning for symmetrizing
jnke2016 Oct 2, 2024
cfecd90
symmetrize
jnke2016 Oct 2, 2024
96d2e20
fix style
jnke2016 Oct 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions cpp/src/c_api/graph_mg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,12 +378,12 @@ extern "C" cugraph_error_code_t cugraph_graph_create_mg(
if (weight_type == cugraph_data_type_id_t::NTYPES) weight_type = p_weights[i]->type_;
}

if (symmetrize == TRUE){
CAPI_EXPECTS(
(properties->is_symmetric == TRUE),
CUGRAPH_INVALID_INPUT,
"Invalid input arguments: The graph property must be symmetric if 'symmetrize' is set to True.",
*error);
if (symmetrize == TRUE) {
CAPI_EXPECTS((properties->is_symmetric == TRUE),
CUGRAPH_INVALID_INPUT,
"Invalid input arguments: The graph property must be symmetric if 'symmetrize' "
"is set to True.",
*error);
}

CAPI_EXPECTS(p_src[i]->type_ == vertex_type,
Expand Down
24 changes: 12 additions & 12 deletions cpp/src/c_api/graph_sg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,12 +581,12 @@ extern "C" cugraph_error_code_t cugraph_graph_create_sg(
auto p_edge_type_ids =
reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(edge_type_ids);

if (symmetrize == TRUE){
CAPI_EXPECTS(
(properties->is_symmetric == TRUE),
CUGRAPH_INVALID_INPUT,
"Invalid input arguments: The graph property must be symmetric if 'symmetrize' is set to True.",
*error);
if (symmetrize == TRUE) {
CAPI_EXPECTS((properties->is_symmetric == TRUE),
CUGRAPH_INVALID_INPUT,
"Invalid input arguments: The graph property must be symmetric if 'symmetrize' is "
"set to True.",
*error);
}

CAPI_EXPECTS(p_src->size_ == p_dst->size_,
Expand Down Expand Up @@ -757,12 +757,12 @@ cugraph_error_code_t cugraph_graph_create_sg_from_csr(
weight_type = cugraph_data_type_id_t::FLOAT32;
}

if (symmetrize == TRUE){
CAPI_EXPECTS(
(properties->is_symmetric == TRUE),
CUGRAPH_INVALID_INPUT,
"Invalid input arguments: The graph property must be symmetric if 'symmetrize' is set to True.",
*error);
if (symmetrize == TRUE) {
CAPI_EXPECTS((properties->is_symmetric == TRUE),
CUGRAPH_INVALID_INPUT,
"Invalid input arguments: The graph property must be symmetric if 'symmetrize' is "
"set to True.",
*error);
}

CAPI_EXPECTS(
Expand Down
2 changes: 0 additions & 2 deletions python/cugraph/cugraph/components/connectivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,6 @@ def strongly_connected_components(
"strongly_connected_components", G, directed, connection, return_labels
)

print("G = ", G)

# FIXME: allow nx_weight_attr to be specified
(G, input_type) = ensure_cugraph_obj(
G, nx_weight_attr="weight", matrix_graph_type=Graph(directed=directed)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def __from_edgelist(
"If the edges are already symmetric, set the 'symmetrize' "
"flag to False"
)

if symmetrize is None:
# default behavior
symmetrize = not self.properties.directed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from cugraph.structure import graph_primtypes_wrapper
from cugraph.structure.replicate_edgelist import replicate_cudf_dataframe
from cugraph.structure.symmetrize import symmetrize as symmetrize
from cugraph.structure.symmetrize import symmetrize as symmetrize_df
from cugraph.structure.number_map import NumberMap
import cugraph.dask.common.mg_utils as mg_utils
import cudf
Expand Down Expand Up @@ -148,7 +148,7 @@ def __from_edgelist(
raise ValueError(
"The edgelist can only be symmetrized for undirected graphs."
)

if self.properties.directed:
if symmetrize:
raise ValueError(
Expand All @@ -168,7 +168,7 @@ def __from_edgelist(
"If the edges are already symmetric, set the 'symmetrize' "
"flag to False"
)

if symmetrize is None:
# default behavior
symmetrize = not self.properties.directed
Expand Down Expand Up @@ -1171,6 +1171,7 @@ def _make_plc_graph(
drop_multi_edges: bool = False,
symmetrize: bool = False,
):
print("in PLC symmetrize = ", symmetrize)
jnke2016 marked this conversation as resolved.
Show resolved Hide resolved
"""
Parameters
----------
Expand Down Expand Up @@ -1301,14 +1302,14 @@ def to_undirected(self, G, store_transposed=False):
else:
df = self.edgelist.edgelist_df
if self.edgelist.weights:
source_col, dest_col, value_col = symmetrize(
source_col, dest_col, value_col = symmetrize_df(
df,
simpleGraphImpl.srcCol,
simpleGraphImpl.dstCol,
simpleGraphImpl.edgeWeightCol,
)
else:
source_col, dest_col = symmetrize(
source_col, dest_col = symmetrize_df(
df, simpleGraphImpl.srcCol, simpleGraphImpl.dstCol
)
value_col = None
Expand Down Expand Up @@ -1343,6 +1344,24 @@ def has_edge(self, u, v):
v = tmp["id"][1]

df = self.edgelist.edgelist_df

if self.edgelist.weights:
# FIXME: Update this function to not call the deprecated
# symmetrize function.
source_col, dest_col, value_col = symmetrize_df(
df,
simpleGraphImpl.srcCol,
simpleGraphImpl.dstCol,
simpleGraphImpl.edgeWeightCol,
)
else:
source_col, dest_col = symmetrize_df(
df, simpleGraphImpl.srcCol, simpleGraphImpl.dstCol
)
value_col = None

self.edgelist = simpleGraphImpl.EdgeList(source_col, dest_col, value_col)

return (
(df[simpleGraphImpl.srcCol] == u) & (df[simpleGraphImpl.dstCol] == v)
).any()
Expand Down
27 changes: 27 additions & 0 deletions python/cugraph/cugraph/structure/hypergraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import cudf
import numpy as np
from cugraph.structure.graph_classes import Graph
from cugraph.structure.symmetrize import symmetrize


def hypergraph(
Expand Down Expand Up @@ -277,6 +278,32 @@ def hypergraph(
renumber=True,
)

df = cudf.DataFrame()

# Need to refactor this code as it uses the
# deprecated symmetrize call.
if "weights" in graph.edgelist.edgelist_df:
source_col, dest_col, value_col = symmetrize(
graph.edgelist.edgelist_df,
"src",
"dst",
"weights",
symmetrize=not graph.is_directed(),
)

df["src"] = source_col
df["dst"] = dest_col
df["weights"] = value_col
else:
source_col, dest_col = symmetrize(
graph.edgelist.edgelist_df, "src", "dst", symmetrize=not graph.is_directed()
)

df["src"] = source_col
df["dst"] = dest_col

graph.edgelist.edgelist_df = df

return {
"nodes": nodes,
"edges": edges,
Expand Down
26 changes: 17 additions & 9 deletions python/cugraph/cugraph/structure/property_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -2005,23 +2005,31 @@ def edge_props_to_graph(
G.from_cudf_edgelist(edge_prop_df.reset_index(), **create_args)
else:
G.from_pandas_edgelist(edge_prop_df.reset_index(), **create_args)

# FIXME: Property_graph does not fully leverage the PLC API yet.
# It still relies on the edges being symmetrized by the deprecated
# symmetrize function.

# Symmetrize the internal representation of the edgelists
source_col, dest_col, value_col = symmetrize(
G.edgelist.edgelist_df,
"src",
"dst",
"weights",
symmetrize=not G.is_directed())


if edge_attr is not None:
source_col, dest_col, value_col = symmetrize(
G.edgelist.edgelist_df,
"src",
"dst",
"weights",
symmetrize=not G.is_directed(),
)
else:
source_col, dest_col = symmetrize(
G.edgelist.edgelist_df, "src", "dst", symmetrize=not G.is_directed()
)

renumbered_edge_prop_df = cudf.DataFrame()
renumbered_edge_prop_df["src"] = source_col
renumbered_edge_prop_df["dst"] = dest_col
renumbered_edge_prop_df["weights"] = value_col
if edge_attr:
renumbered_edge_prop_df["weights"] = value_col

G.edgelist.edgelist_df = renumbered_edge_prop_df

Expand Down
1 change: 0 additions & 1 deletion python/cugraph/cugraph/structure/symmetrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,6 @@ def symmetrize(
>>> df['values'] = cudf.Series(M['2'])
>>> src, dst, val = symmetrize(df, 'sources', 'destinations', 'values', multi=True)
"""

warnings.warn(
"This method is deprecated and will no longer be supported. The symmetrization "
"of the edges are only supported by setting the 'symmetrize' flag to 'True'",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from cugraph import uniform_neighbor_sample
from cugraph.testing import UNDIRECTED_DATASETS
from cugraph.datasets import email_Eu_core, small_tree
from cugraph.structure.symmetrize import symmetrize
from pylibcugraph.testing.utils import gen_fixture_params_product


Expand Down Expand Up @@ -148,6 +149,15 @@ def test_uniform_neighbor_sample_simple(input_combo):
# should be 'None' if the datasets was never renumbered
input_df = G.edgelist.edgelist_df

# FIXME: Uses the deprecated implementation of symmetrize.
source_col, dest_col = symmetrize(
input_df, "src", "dst", symmetrize=not G.is_directed()
)

input_df = cudf.DataFrame()
input_df["src"] = source_col
input_df["dst"] = dest_col

result_nbr = uniform_neighbor_sample(
G,
input_combo["start_list"],
Expand Down
13 changes: 13 additions & 0 deletions python/cugraph/cugraph/tests/structure/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from cugraph.testing import utils
from cudf.testing import assert_series_equal
from cudf.testing.testing import assert_frame_equal
from cugraph.structure.symmetrize import symmetrize

# MG
import dask_cudf
Expand Down Expand Up @@ -534,6 +535,18 @@ def test_to_directed(graph_file):
# cugraph add_edge_list
G = cugraph.Graph()
G.from_cudf_edgelist(cu_M, source="0", destination="1")

# FIXME: Uses the deprecated implementation of symmetrize.
source_col, dest_col = symmetrize(
G.edgelist.edgelist_df, "src", "dst", symmetrize=not G.is_directed()
)

input_df = cudf.DataFrame()
input_df["src"] = source_col
input_df["dst"] = dest_col

G.edgelist.edgelist_df = input_df

Gnx = nx.from_pandas_edgelist(M, source="0", target="1", create_using=nx.Graph())

DiG = G.to_directed()
Expand Down
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/tests/structure/test_multigraph.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -76,7 +76,7 @@ def test_Graph_from_MultiGraph(graph_file):

G = cugraph.Graph(GM)
Gnx = nx.Graph(GnxM)
assert Gnx.number_of_edges() == G.number_of_edges()
assert Gnx.number_of_edges() == G.number_of_edges(True)
GdM = graph_file.get_graph(create_using=cugraph.MultiGraph(directed=True))
jnke2016 marked this conversation as resolved.
Show resolved Hide resolved
GnxdM = nx.from_pandas_edgelist(
nxM,
Expand Down
33 changes: 11 additions & 22 deletions python/cugraph/cugraph/utilities/nx_factory.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -119,10 +119,8 @@ def convert_from_nx(
"""

if isinstance(nxG, nx.classes.digraph.DiGraph):
print("creating a directed graph")
G = cugraph.Graph(directed=True)
elif isinstance(nxG, nx.classes.graph.Graph):
print("creating an undirected graph")
G = cugraph.Graph()
else:
raise TypeError(
Expand All @@ -137,11 +135,9 @@ def convert_from_nx(
# hence, the symmetrization cannot be performed at the graph
# creation. Use the deprecated 'symmetrize' function for now.
source_col, dest_col = symmetrize(
_gdf,
"src",
"dst",
symmetrize=not G.is_directed())

_gdf, "src", "dst", symmetrize=not G.is_directed()
)

_gdf = cudf.DataFrame()

_gdf["src"] = source_col
Expand All @@ -153,20 +149,17 @@ def convert_from_nx(
edge_attr=None,
renumber=do_renumber,
store_transposed=store_transposed,
)
)
else:
if weight is None:
_gdf = convert_weighted_unnamed_to_gdf(nxG, vertex_type)
# FIXME: The legacy algorithms do not support the PLC graph
# hence, the symmetrization cannot be performed at the graph
# creation. Use the deprecated 'symmetrize' function for now.
source_col, dest_col, value_col = symmetrize(
_gdf,
"src",
"target",
"weight",
symmetrize=not G.is_directed())

_gdf, "src", "target", "weight", symmetrize=not G.is_directed()
)

_gdf = cudf.DataFrame()

_gdf["src"] = source_col
Expand All @@ -187,12 +180,9 @@ def convert_from_nx(
# hence, the symmetrization cannot be performed at the graph
# creation. Use the deprecated 'symmetrize' function for now.
source_col, dest_col, value_col = symmetrize(
_gdf,
"src",
"dst",
"weight",
symmetrize=not G.is_directed())

_gdf, "src", "dst", "weight", symmetrize=not G.is_directed()
)

_gdf = cudf.DataFrame()

_gdf["src"] = source_col
Expand All @@ -207,7 +197,6 @@ def convert_from_nx(
renumber=do_renumber,
store_transposed=store_transposed,
)


return G

Expand Down
2 changes: 1 addition & 1 deletion python/pylibcugraph/pylibcugraph/graphs.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@ cdef class SGGraph(_GPUGraph):
self.edge_id_view_ptr,
edge_type_view_ptr,
store_transposed,
symmetrize,
renumber,
symmetrize,
# drop_self_loops, #FIXME: Not supported yet
# drop_multi_edges, #FIXME: Not supported yet
do_expensive_check,
Expand Down
Loading