Skip to content

Commit

Permalink
Add core number to the python API (rapidsai#2414)
Browse files Browse the repository at this point in the history
This PR extends the `core number` functionality to `pylibcugraph` and python `cugraph`

Authors:
  - Joseph Nke (https://github.com/jnke2016)
  - Dylan Chima-Sanchez (https://github.com/betochimas)
  - Alex Barghi (https://github.com/alexbarghi-nv)
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Brad Rees (https://github.com/BradReesWork)
  - Rick Ratzel (https://github.com/rlratzel)

URL: rapidsai#2414
  • Loading branch information
jnke2016 authored Jul 22, 2022
1 parent 5d0744c commit 3cea60d
Show file tree
Hide file tree
Showing 13 changed files with 474 additions and 158 deletions.
42 changes: 39 additions & 3 deletions python/cugraph/cugraph/cores/core_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.cores import core_number_wrapper
from cugraph.utilities import (ensure_cugraph_obj_for_nx,
df_score_to_dictionary,
)
import cudf
import warnings

from pylibcugraph import (core_number as pylibcugraph_core_number,
ResourceHandle
)

def core_number(G):

def core_number(G, degree_type=None):
"""
Compute the core numbers for the nodes of the graph G. A k-core of a graph
is a maximal subgraph that contains nodes of degree k or more.
Expand All @@ -33,6 +38,13 @@ def core_number(G):
can contain edge weights, they don't participate in the calculation
of the core numbers.
degree_type: str
This option determines if the core number computation should be based
on input, output, or both directed edges, with valid values being
"incoming", "outgoing", and "bidirectional" respectively.
This option is currently ignored in this release, and setting it will
result in a warning.
Returns
-------
df : cudf.DataFrame or python dictionary (in NetworkX input)
Expand All @@ -56,7 +68,31 @@ def core_number(G):

G, isNx = ensure_cugraph_obj_for_nx(G)

df = core_number_wrapper.core_number(G)
if degree_type is not None:
warning_msg = (
"The 'degree_type' parameter is ignored in this release.")
warnings.warn(warning_msg, Warning)

if G.is_directed():
raise ValueError("input graph must be undirected")

# FIXME: enable this check once 'degree_type' is supported
"""
if degree_type not in ["incoming", "outgoing", "bidirectional"]:
raise ValueError(f"'degree_type' must be either incoming, "
f"outgoing or bidirectional, got: {degree_type}")
"""
vertex, core_number = \
pylibcugraph_core_number(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
degree_type=degree_type,
do_expensive_check=False
)

df = cudf.DataFrame()
df["vertex"] = vertex
df["core_number"] = core_number

if G.renumbered:
df = G.unrenumber(df, "vertex")
Expand Down
53 changes: 0 additions & 53 deletions python/cugraph/cugraph/cores/core_number_wrapper.pyx

This file was deleted.

23 changes: 21 additions & 2 deletions python/cugraph/cugraph/cores/k_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,32 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.cores import k_core_wrapper, core_number_wrapper
from cugraph.cores import k_core_wrapper
import cudf
from pylibcugraph import (core_number as pylibcugraph_core_number,
ResourceHandle
)
from cugraph.utilities import (ensure_cugraph_obj_for_nx,
cugraph_to_nx,
)
from cugraph.structure.graph_classes import Graph


def _call_plc_core_number(G):
vertex, core_number = \
pylibcugraph_core_number(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
degree_type=None,
do_expensive_check=False
)

df = cudf.DataFrame()
df["vertex"] = vertex
df["core_number"] = core_number
return df


def k_core(G, k=None, core_number=None):
"""
Compute the k-core of the graph G based on the out degree of its nodes. A
Expand Down Expand Up @@ -81,7 +100,7 @@ def k_core(G, k=None, core_number=None):
cols)

else:
core_number = core_number_wrapper.core_number(G)
core_number = _call_plc_core_number(G)
core_number = core_number.rename(
columns={"core_number": "values"}, copy=False
)
Expand Down
1 change: 1 addition & 0 deletions python/cugraph/cugraph/dask/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@
from .components.connectivity import weakly_connected_components
from .sampling.uniform_neighbor_sample import uniform_neighbor_sample
from .centrality.eigenvector_centrality import eigenvector_centrality
from .cores.core_number import core_number
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Copyright (c) 2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -11,15 +11,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True
# cython: language_level = 3

from cugraph.structure.graph_primtypes cimport *

cdef extern from "cugraph/algorithms.hpp" namespace "cugraph":

cdef void core_number[VT,ET,WT](
const GraphCSRView[VT,ET,WT] &graph,
VT *core_number) except +
from .core_number import core_number
133 changes: 133 additions & 0 deletions python/cugraph/cugraph/dask/cores/core_number.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from dask.distributed import wait
import cugraph.dask.comms.comms as Comms
import dask_cudf
import cudf
import warnings

from pylibcugraph import (ResourceHandle,
core_number as pylibcugraph_core_number
)


def convert_to_cudf(cp_arrays):
"""
Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper
"""
cupy_vertices, cupy_core_number = cp_arrays
df = cudf.DataFrame()
df["vertex"] = cupy_vertices
df["core_number"] = cupy_core_number

return df


def _call_plc_core_number(sID,
mg_graph_x,
dt_x,
do_expensive_check
):
return pylibcugraph_core_number(
resource_handle=ResourceHandle(
Comms.get_handle(sID).getHandle()
),
graph=mg_graph_x,
degree_type=dt_x,
do_expensive_check=do_expensive_check
)


def core_number(input_graph,
degree_type=None):
"""
Compute the core numbers for the nodes of the graph G. A k-core of a graph
is a maximal subgraph that contains nodes of degree k or more.
A node has a core number of k if it belongs a k-core but not to k+1-core.
This call does not support a graph with self-loops and parallel
edges.
Parameters
----------
input_graph : cugraph.graph
cuGraph graph descriptor, should contain the connectivity information,
(edge weights are not used in this algorithm).
The current implementation only supports undirected graphs.
degree_type: str
This option determines if the core number computation should be based
on input, output, or both directed edges, with valid values being
"incoming", "outgoing", and "bidirectional" respectively.
This option is currently ignored in this release, and setting it will
result in a warning.
Returns
-------
result : dask_cudf.DataFrame
GPU distributed data frame containing 2 dask_cudf.Series
ddf['vertex']: dask_cudf.Series
Contains the core number vertices
ddf['core_number']: dask_cudf.Series
Contains the core number of vertices
"""

if input_graph.is_directed():
raise ValueError("input graph must be undirected")

if degree_type is not None:
warning_msg = (
"The 'degree_type' parameter is ignored in this release.")
warnings.warn(warning_msg, Warning)

# FIXME: enable this check once 'degree_type' is supported
"""
if degree_type not in ["incoming", "outgoing", "bidirectional"]:
raise ValueError(f"'degree_type' must be either incoming, "
f"outgoing or bidirectional, got: {degree_type}")
"""

# Initialize dask client
client = input_graph._client

do_expensive_check = False

result = [
client.submit(
_call_plc_core_number,
Comms.get_session_id(),
input_graph._plc_graph[w],
degree_type,
do_expensive_check,
workers=[w],
)
for w in Comms.get_workers()
]

wait(result)

cudf_result = [client.submit(convert_to_cudf,
cp_arrays)
for cp_arrays in result]

wait(cudf_result)

ddf = dask_cudf.from_delayed(cudf_result)
if input_graph.renumbered:
ddf = input_graph.unrenumber(ddf, "vertex")

return ddf
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
)


# FIXME: rename this to triangle_conut to match the MG implmentation
def EXPERIMENTAL__triangle_count(G, start_list=None):
"""
Compute the number of triangles (cycles of length three) in the
Expand Down
Loading

0 comments on commit 3cea60d

Please sign in to comment.