Skip to content

Commit

Permalink
Merge 30d55ac into 008273b
Browse files Browse the repository at this point in the history
  • Loading branch information
jnke2016 authored May 31, 2022
2 parents 008273b + 30d55ac commit 6caaa2b
Show file tree
Hide file tree
Showing 16 changed files with 908 additions and 65 deletions.
2 changes: 1 addition & 1 deletion benchmarks/python_e2e/cugraph_dask_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import cudf

import cugraph
from cugraph.comms import comms as Comms
from cugraph.dask.comms import comms as Comms
from cugraph.dask.common.mg_utils import get_visible_devices
from cugraph.generators import rmat
import tempfile
Expand Down
4 changes: 4 additions & 0 deletions python/cugraph/cugraph/community/triangle_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from cugraph.community import triangle_count_wrapper
from cugraph.utilities import ensure_cugraph_obj_for_nx
import warnings


def triangles(G):
Expand Down Expand Up @@ -47,6 +48,9 @@ def triangles(G):
>>> count = cugraph.triangles(G)
"""
warning_msg = ("This call is deprecated and will be refactored "
"in the next release")
warnings.warn(warning_msg, PendingDeprecationWarning)

G, _ = ensure_cugraph_obj_for_nx(G)

Expand Down
1 change: 1 addition & 0 deletions python/cugraph/cugraph/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .traversal.sssp import sssp
from .common.read_utils import get_chunksize
from .community.louvain import louvain
from .community.triangle_count import triangle_count
from .centrality.katz_centrality import katz_centrality
from .components.connectivity import weakly_connected_components
from .centrality.eigenvector_centrality import eigenvector_centrality
3 changes: 2 additions & 1 deletion python/cugraph/cugraph/dask/community/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -12,3 +12,4 @@
# limitations under the License.

from .louvain import louvain
from .triangle_count import triangle_count
180 changes: 180 additions & 0 deletions python/cugraph/cugraph/dask/community/triangle_count.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from dask.distributed import wait, default_client
from cugraph.dask.common.input_utils import get_distributed_data

import cugraph.dask.comms.comms as Comms
import dask_cudf
import cudf

from pylibcugraph.experimental import triangle_count as \
pylibcugraph_triangle_count

from pylibcugraph import (ResourceHandle,
GraphProperties,
MGGraph
)


def call_triangles(sID,
data,
src_col_name,
dst_col_name,
graph_properties,
store_transposed,
num_edges,
do_expensive_check,
start_list
):

handle = Comms.get_handle(sID)
h = ResourceHandle(handle.getHandle())
srcs = data[0][src_col_name]
dsts = data[0][dst_col_name]
weights = None
if "value" in data[0].columns:
weights = data[0]['value']

mg = MGGraph(h,
graph_properties,
srcs,
dsts,
weights,
store_transposed,
num_edges,
do_expensive_check)

result = pylibcugraph_triangle_count(h,
mg,
start_list,
do_expensive_check)

return result


def convert_to_cudf(cp_arrays):
"""
Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper
"""
cupy_vertices, cupy_counts = cp_arrays
df = cudf.DataFrame()
df["vertex"] = cupy_vertices
df["counts"] = cupy_counts

return df


def triangle_count(input_graph,
start_list=None):
"""
Computes the number of triangles (cycles of length three) and the number
per vertex in the input graph.
Parameters
----------
input_graph : cugraph.graph
cuGraph graph descriptor, should contain the connectivity information,
(edge weights are not used in this algorithm).
The current implementation only supports undirected graphs.
start_list : not supported
list of vertices for triangle count. if None the entire set of vertices
in the graph is processed
Returns
-------
result : dask_cudf.DataFrame
GPU distributed data frame containing 2 dask_cudf.Series
ddf['vertex']: dask_cudf.Series
Contains the triangle counting vertices
ddf['counts']: dask_cudf.Series
Contains the triangle counting counts
"""
# FIXME: start_list is disabled
start_list = None
if input_graph.is_directed():
raise ValueError("input graph must be undirected")
# Initialize dask client
client = default_client()
# In the future, once all the algos follow the C/Pylibcugraph path,
# compute_renumber_edge_list will only be used for multicolumn and
# string vertices since the renumbering will be done in pylibcugraph
input_graph.compute_renumber_edge_list(
transposed=False, legacy_renum_only=True)

if start_list is not None:
if isinstance(start_list, int):
start_list = [start_list]
if isinstance(start_list, list):
start_list = cudf.Series(start_list)
if start_list.dtype != 'int32':
raise ValueError(f"'start_list' must have int32 values, "
f"got: {start_list.dtype}")
if not isinstance(start_list, cudf.Series):
raise TypeError(
f"'start_list' must be either a list or a cudf.Series,"
f"got: {start_list.dtype}")

# start_list uses "external" vertex IDs, but since the graph has been
# renumbered, the start vertex IDs must also be renumbered.
if input_graph.renumbered:
start_list = input_graph.lookup_internal_vertex_id(
start_list).compute()

ddf = input_graph.edgelist.edgelist_df

# FIXME: The parameter is_multigraph, store_transposed and
# do_expensive_check must be derived from the input_graph.
# For now, they are hardcoded.
graph_properties = GraphProperties(
is_symmetric=True, is_multigraph=False)
store_transposed = False
do_expensive_check = True

num_edges = len(ddf)
data = get_distributed_data(ddf)

src_col_name = input_graph.renumber_map.renumbered_src_col_name
dst_col_name = input_graph.renumber_map.renumbered_dst_col_name

result = [client.submit(call_triangles,
Comms.get_session_id(),
wf[1],
src_col_name,
dst_col_name,
graph_properties,
store_transposed,
num_edges,
do_expensive_check,
start_list,
workers=[wf[0]])
for idx, wf in enumerate(data.worker_to_parts.items())]

wait(result)

cudf_result = [client.submit(convert_to_cudf,
cp_arrays)
for cp_arrays in result]

wait(cudf_result)

ddf = dask_cudf.from_delayed(cudf_result)
if input_graph.renumbered:
ddf = input_graph.unrenumber(ddf, "vertex")

return ddf
4 changes: 2 additions & 2 deletions python/cugraph/cugraph/dask/link_analysis/hits.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def hits(input_graph, tol=1.0e-5, max_iter=100, nstart=None, normalized=True):
Returns
-------
HubsAndAuthorities : dask_cudf.DataFrame
GPU data frame containing three cudf.Series of size V: the vertex
identifiers and the corresponding hubs values and the corresponding
GPU distributed data frame containing three dask_cudf.Series of
size V: the vertex identifiers and the corresponding hubs and
authorities values.
df['vertex'] : dask_cudf.Series
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def EXPERIMENTAL__uniform_neighborhood(input_graph,
Returns
-------
result : dask_cudf.DataFrame
GPU data frame containing two dask_cudf.Series
GPU distributed data frame containing 4 dask_cudf.Series
ddf['sources']: dask_cudf.Series
Contains the source vertices from the sampling result
Expand Down
4 changes: 4 additions & 0 deletions python/cugraph/cugraph/experimental/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,7 @@

from cugraph.structure.property_graph import EXPERIMENTAL__PropertySelection
PropertySelection = experimental_warning_wrapper(EXPERIMENTAL__PropertySelection)

from cugraph.experimental.community.triangle_count import \
EXPERIMENTAL__triangle_count
triangle_count = experimental_warning_wrapper(EXPERIMENTAL__triangle_count)
Empty file.
122 changes: 122 additions & 0 deletions python/cugraph/cugraph/experimental/community/triangle_count.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.utilities import ensure_cugraph_obj_for_nx
import cudf

from pylibcugraph.experimental import triangle_count as \
pylibcugraph_triangle_count

from pylibcugraph import (ResourceHandle,
GraphProperties,
SGGraph
)


# FIXME: rename this to triangle_conut to match the MG implmentation
def EXPERIMENTAL__triangle_count(G, start_list=None):
"""
Compute the number of triangles (cycles of length three) in the
input graph.
Parameters
----------
G : cugraph.graph or networkx.Graph
cuGraph graph descriptor, should contain the connectivity information,
(edge weights are not used in this algorithm).
The current implementation only supports undirected graphs.
start_list : not supported
list of vertices for triangle count. if None the entire set of vertices
in the graph is processed
Returns
-------
result : cudf.DataFrame
GPU data frame containing 2 cudf.Series
ddf['vertex']: cudf.Series
Contains the triangle counting vertices
ddf['counts']: cudf.Series
Contains the triangle counting counts
Examples
--------
>>> gdf = cudf.read_csv(datasets_path / 'karate.csv',
... delimiter = ' ',
... dtype=['int32', 'int32', 'float32'],
... header=None)
>>> G = cugraph.Graph()
>>> G.from_cudf_edgelist(gdf, source='0', destination='1', edge_attr='2')
>>> count = cugraph.experimental.triangle_count(G)
"""
# FIXME: start_list is disabled
start_list = None
G, _ = ensure_cugraph_obj_for_nx(G)

if G.is_directed():
raise ValueError("input graph must be undirected")

if start_list is not None:
if isinstance(start_list, int):
start_list = [start_list]
if isinstance(start_list, list):
start_list = cudf.Series(start_list)
if start_list.dtype != 'int32':
raise ValueError(f"'start_list' must have int32 values, "
f"got: {start_list.dtype}")
if not isinstance(start_list, cudf.Series):
raise TypeError(
f"'start_list' must be either a list or a cudf.Series,"
f"got: {start_list.dtype}")

if G.renumbered is True:
if isinstance(start_list, cudf.DataFrame):
start_list = G.lookup_internal_vertex_id(
start_list, start_list.columns)
else:
start_list = G.lookup_internal_vertex_id(start_list)

srcs = G.edgelist.edgelist_df['src']
dsts = G.edgelist.edgelist_df['dst']
weights = G.edgelist.edgelist_df['weights']

if srcs.dtype != 'int32':
raise ValueError(f"Graph vertices must have int32 values, "
f"got: {srcs.dtype}")

resource_handle = ResourceHandle()
graph_props = GraphProperties(
is_symmetric=True, is_multigraph=G.is_multigraph())
store_transposed = False

# FIXME: This should be based on the renumber parameter set when creating
# the graph
renumber = False
do_expensive_check = False

sg = SGGraph(resource_handle, graph_props, srcs, dsts, weights,
store_transposed, renumber, do_expensive_check)

vertex, counts = pylibcugraph_triangle_count(
resource_handle, sg, start_list, do_expensive_check)

df = cudf.DataFrame()
df["vertex"] = vertex
df["counts"] = counts

if G.renumbered:
df = G.unrenumber(df, "vertex")

return df
Loading

0 comments on commit 6caaa2b

Please sign in to comment.