Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-23.10 to branch-23.12 #3899

Merged
merged 1 commit into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 73 additions & 18 deletions python/cugraph/cugraph/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union, Tuple
from cugraph.structure import Graph
from cugraph.utilities import (
is_nx_graph_type,
ensure_cugraph_obj_for_nx,
df_score_to_dictionary,
)
Expand All @@ -21,9 +24,26 @@
from pylibcugraph import louvain as pylibcugraph_louvain
from pylibcugraph import ResourceHandle

from cugraph.utilities.utils import import_optional

# FIXME: the networkx.Graph type used in type annotations is specified
# using a string literal to avoid depending on and importing networkx.
# Instead, networkx is imported optionally, which may cause a problem
# for a type checker if run in an environment where networkx is not installed.
networkx = import_optional("networkx")

VERTEX_COL_NAME = "vertex"
CLUSTER_ID_COL_NAME = "partition"


# FIXME: max_level should default to 100 once max_iter is removed
def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
def louvain(
G: Union[Graph, "networkx.Graph"],
max_level: Union[int, None] = None,
max_iter: Union[int, None] = None,
resolution: float = 1.0,
threshold: float = 1e-7,
) -> Tuple[Union[cudf.DataFrame, dict], float]:
"""
Compute the modularity optimizing partition of the input graph using the
Louvain method
Expand All @@ -48,6 +68,9 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
than the specified number of levels. No error occurs when the
algorithm terminates early in this manner.

If max_level > 500, it will be set to 500 and a warning is emitted
in order to prevent excessive runtime.

max_iter : integer, optional (default=None)
This parameter is deprecated in favor of max_level. Previously
it was used to control the maximum number of levels of the Louvain
Expand All @@ -68,18 +91,21 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):

Returns
-------
parts : cudf.DataFrame
GPU data frame of size V containing two columns the vertex id and the
partition id it is assigned to.
result: cudf.DataFrame or dict
If input graph G is of type cugraph.Graph, a GPU dataframe
with two columns.

result[VERTEX_COL_NAME] : cudf.Series
Contains the vertex identifiers
result[CLUSTER_ID_COL_NAME] : cudf.Series
Contains the partition assigned to the vertices

df['vertex'] : cudf.Series
Contains the vertex identifiers
df['partition'] : cudf.Series
Contains the partition assigned to the vertices
If input graph G is of type networkx.Graph, a dict
Dictionary of vertices and their partition ids.

modularity_score : float
a floating point number containing the global modularity score of the
partitioning.
A floating point number containing the global modularity score
of the partitioning.

Examples
--------
Expand All @@ -89,6 +115,17 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):

"""

# FIXME: Onece the graph construction calls support isolated vertices through
# the C API (the C++ interface already supports this) then there will be
# no need to compute isolated vertices here.

isolated_vertices = list()
if is_nx_graph_type(type(G)):
isolated_vertices = [v for v in range(G.number_of_nodes()) if G.degree[v] == 0]
else:
# FIXME: Gather isolated vertices of G
pass

G, isNx = ensure_cugraph_obj_for_nx(G)

if G.is_directed():
Expand All @@ -112,7 +149,12 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
if max_level is None:
max_level = 100

vertex, partition, mod_score = pylibcugraph_louvain(
if max_level > 500:
w_msg = "max_level is set too high, clamping it down to 500."
warnings.warn(w_msg)
max_level = 500

vertex, partition, modularity_score = pylibcugraph_louvain(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
max_level=max_level,
Expand All @@ -121,14 +163,27 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
do_expensive_check=False,
)

df = cudf.DataFrame()
df["vertex"] = vertex
df["partition"] = partition
result = cudf.DataFrame()
result[VERTEX_COL_NAME] = vertex
result[CLUSTER_ID_COL_NAME] = partition

if len(isolated_vertices) > 0:
unique_cids = result[CLUSTER_ID_COL_NAME].unique()
max_cluster_id = -1 if len(result) == 0 else unique_cids.max()

isolated_vtx_and_cids = cudf.DataFrame()
isolated_vtx_and_cids[VERTEX_COL_NAME] = isolated_vertices
isolated_vtx_and_cids[CLUSTER_ID_COL_NAME] = [
(max_cluster_id + i + 1) for i in range(len(isolated_vertices))
]
result = cudf.concat(
[result, isolated_vtx_and_cids], ignore_index=True, sort=False
)

if G.renumbered:
df = G.unrenumber(df, "vertex")
if G.renumbered and len(G.input_df) > 0:
result = G.unrenumber(result, VERTEX_COL_NAME)

if isNx is True:
df = df_score_to_dictionary(df, "partition")
result = df_score_to_dictionary(result, CLUSTER_ID_COL_NAME)

return df, mod_score
return result, modularity_score
16 changes: 16 additions & 0 deletions python/cugraph/cugraph/tests/community/test_louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,19 @@ def test_louvain_csr_graph(is_weighted):

assert len(parition_diffs) == 0
assert mod_csr == mod_coo


@pytest.mark.sg
def test_louvain_nx_graph_with_isolated_nodes():
# Cluster IDs are expected to unique if all nodes are isolated
G = nx.Graph()
G.add_nodes_from(range(5))
result, _ = cugraph.louvain(G)
assert set(result.keys()) == set(G.nodes)
assert len(set(result.values())) == G.number_of_nodes()

# A graph with 5 nodes, where 3 of the nodes are isolated
G.add_edge(1, 2)
result, _ = cugraph.louvain(G)
assert set(result.keys()) == set(G.nodes)
assert len(set(result.values())) == G.number_of_nodes() - 1
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/utilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,8 @@ def is_matrix_type(m):
return is_cp_matrix_type(m) or is_sp_matrix_type(m)


def is_nx_graph_type(g):
return g in __nx_graph_types
def is_nx_graph_type(graph_type):
return graph_type in __nx_graph_types


def is_cugraph_graph_type(g):
Expand Down