From 3cea60dc37ec2348844111f70467719de77e89d2 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Fri, 22 Jul 2022 08:23:45 -0500 Subject: [PATCH] Add core number to the python API (#2414) This PR extends the `core number` functionality to `pylibcugraph` and python `cugraph` Authors: - Joseph Nke (https://github.com/jnke2016) - Dylan Chima-Sanchez (https://github.com/betochimas) - Alex Barghi (https://github.com/alexbarghi-nv) - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Brad Rees (https://github.com/BradReesWork) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/2414 --- python/cugraph/cugraph/cores/core_number.py | 42 ++++- .../cugraph/cores/core_number_wrapper.pyx | 53 ------ python/cugraph/cugraph/cores/k_core.py | 23 ++- python/cugraph/cugraph/dask/__init__.py | 1 + .../cores/__init__.py} | 15 +- .../cugraph/cugraph/dask/cores/core_number.py | 133 ++++++++++++++ .../experimental/community/triangle_count.py | 1 - .../cugraph/tests/mg/test_mg_core_number.py | 165 ++++++++++++++++++ .../cugraph/cugraph/tests/test_core_number.py | 144 ++++++++------- python/pylibcugraph/pylibcugraph/__init__.py | 2 + .../_cugraph_c/core_algorithms.pxd | 4 +- .../pylibcugraph/pylibcugraph/core_number.pyx | 45 +++-- .../pylibcugraph/experimental/__init__.py | 4 - 13 files changed, 474 insertions(+), 158 deletions(-) delete mode 100644 python/cugraph/cugraph/cores/core_number_wrapper.pyx rename python/cugraph/cugraph/{cores/core_number.pxd => dask/cores/__init__.py} (58%) create mode 100644 python/cugraph/cugraph/dask/cores/core_number.py create mode 100644 python/cugraph/cugraph/tests/mg/test_mg_core_number.py diff --git a/python/cugraph/cugraph/cores/core_number.py b/python/cugraph/cugraph/cores/core_number.py index 34464e024ae..f5a1e00de9f 100644 --- a/python/cugraph/cugraph/cores/core_number.py +++ b/python/cugraph/cugraph/cores/core_number.py @@ -11,13 +11,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.cores import core_number_wrapper from cugraph.utilities import (ensure_cugraph_obj_for_nx, df_score_to_dictionary, ) +import cudf +import warnings +from pylibcugraph import (core_number as pylibcugraph_core_number, + ResourceHandle + ) -def core_number(G): + +def core_number(G, degree_type=None): """ Compute the core numbers for the nodes of the graph G. A k-core of a graph is a maximal subgraph that contains nodes of degree k or more. @@ -33,6 +38,13 @@ def core_number(G): can contain edge weights, they don't participate in the calculation of the core numbers. + degree_type: str + This option determines if the core number computation should be based + on input, output, or both directed edges, with valid values being + "incoming", "outgoing", and "bidirectional" respectively. + This option is currently ignored in this release, and setting it will + result in a warning. + Returns ------- df : cudf.DataFrame or python dictionary (in NetworkX input) @@ -56,7 +68,31 @@ def core_number(G): G, isNx = ensure_cugraph_obj_for_nx(G) - df = core_number_wrapper.core_number(G) + if degree_type is not None: + warning_msg = ( + "The 'degree_type' parameter is ignored in this release.") + warnings.warn(warning_msg, Warning) + + if G.is_directed(): + raise ValueError("input graph must be undirected") + + # FIXME: enable this check once 'degree_type' is supported + """ + if degree_type not in ["incoming", "outgoing", "bidirectional"]: + raise ValueError(f"'degree_type' must be either incoming, " + f"outgoing or bidirectional, got: {degree_type}") + """ + vertex, core_number = \ + pylibcugraph_core_number( + resource_handle=ResourceHandle(), + graph=G._plc_graph, + degree_type=degree_type, + do_expensive_check=False + ) + + df = cudf.DataFrame() + df["vertex"] = vertex + df["core_number"] = core_number if G.renumbered: df = G.unrenumber(df, "vertex") diff --git a/python/cugraph/cugraph/cores/core_number_wrapper.pyx b/python/cugraph/cugraph/cores/core_number_wrapper.pyx deleted file mode 100644 index 9fcc3b4746c..00000000000 --- a/python/cugraph/cugraph/cores/core_number_wrapper.pyx +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -cimport cugraph.cores.core_number as c_core -from cugraph.structure.graph_primtypes cimport * -from cugraph.structure import graph_primtypes_wrapper -from libc.stdint cimport uintptr_t -import cudf -import numpy as np - - -def core_number(input_graph): - """ - Call core_number - """ - if not input_graph.adjlist: - input_graph.view_adj_list() - - [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - - num_verts = input_graph.number_of_vertices() - num_edges = input_graph.number_of_edges(directed_edges=True) - - df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - df['core_number'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - - cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] - cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] - cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; - cdef uintptr_t c_core_number = df['core_number'].__cuda_array_interface__['data'][0]; - - cdef GraphCSRView[int,int,float] graph = GraphCSRView[int,int,float](c_offsets, c_indices, NULL, num_verts, num_edges) - - graph.get_vertex_identifiers(c_identifier) - c_core.core_number(graph, c_core_number) - - return df diff --git a/python/cugraph/cugraph/cores/k_core.py b/python/cugraph/cugraph/cores/k_core.py index 9fbce5a3ce4..7e935c55558 100644 --- a/python/cugraph/cugraph/cores/k_core.py +++ b/python/cugraph/cugraph/cores/k_core.py @@ -11,13 +11,32 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.cores import k_core_wrapper, core_number_wrapper +from cugraph.cores import k_core_wrapper +import cudf +from pylibcugraph import (core_number as pylibcugraph_core_number, + ResourceHandle + ) from cugraph.utilities import (ensure_cugraph_obj_for_nx, cugraph_to_nx, ) from cugraph.structure.graph_classes import Graph +def _call_plc_core_number(G): + vertex, core_number = \ + pylibcugraph_core_number( + resource_handle=ResourceHandle(), + graph=G._plc_graph, + degree_type=None, + do_expensive_check=False + ) + + df = cudf.DataFrame() + df["vertex"] = vertex + df["core_number"] = core_number + return df + + def k_core(G, k=None, core_number=None): """ Compute the k-core of the graph G based on the out degree of its nodes. A @@ -81,7 +100,7 @@ def k_core(G, k=None, core_number=None): cols) else: - core_number = core_number_wrapper.core_number(G) + core_number = _call_plc_core_number(G) core_number = core_number.rename( columns={"core_number": "values"}, copy=False ) diff --git a/python/cugraph/cugraph/dask/__init__.py b/python/cugraph/cugraph/dask/__init__.py index ca5d3e27cc6..ec0873923c9 100644 --- a/python/cugraph/cugraph/dask/__init__.py +++ b/python/cugraph/cugraph/dask/__init__.py @@ -23,3 +23,4 @@ from .components.connectivity import weakly_connected_components from .sampling.uniform_neighbor_sample import uniform_neighbor_sample from .centrality.eigenvector_centrality import eigenvector_centrality +from .cores.core_number import core_number diff --git a/python/cugraph/cugraph/cores/core_number.pxd b/python/cugraph/cugraph/dask/cores/__init__.py similarity index 58% rename from python/cugraph/cugraph/cores/core_number.pxd rename to python/cugraph/cugraph/dask/cores/__init__.py index 17dc1118a5e..4dff071ce91 100644 --- a/python/cugraph/cugraph/cores/core_number.pxd +++ b/python/cugraph/cugraph/dask/cores/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2022, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,15 +11,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -from cugraph.structure.graph_primtypes cimport * - -cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": - - cdef void core_number[VT,ET,WT]( - const GraphCSRView[VT,ET,WT] &graph, - VT *core_number) except + +from .core_number import core_number diff --git a/python/cugraph/cugraph/dask/cores/core_number.py b/python/cugraph/cugraph/dask/cores/core_number.py new file mode 100644 index 00000000000..b02a1878fb6 --- /dev/null +++ b/python/cugraph/cugraph/dask/cores/core_number.py @@ -0,0 +1,133 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from dask.distributed import wait +import cugraph.dask.comms.comms as Comms +import dask_cudf +import cudf +import warnings + +from pylibcugraph import (ResourceHandle, + core_number as pylibcugraph_core_number + ) + + +def convert_to_cudf(cp_arrays): + """ + Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper + """ + cupy_vertices, cupy_core_number = cp_arrays + df = cudf.DataFrame() + df["vertex"] = cupy_vertices + df["core_number"] = cupy_core_number + + return df + + +def _call_plc_core_number(sID, + mg_graph_x, + dt_x, + do_expensive_check + ): + return pylibcugraph_core_number( + resource_handle=ResourceHandle( + Comms.get_handle(sID).getHandle() + ), + graph=mg_graph_x, + degree_type=dt_x, + do_expensive_check=do_expensive_check + ) + + +def core_number(input_graph, + degree_type=None): + """ + Compute the core numbers for the nodes of the graph G. A k-core of a graph + is a maximal subgraph that contains nodes of degree k or more. + A node has a core number of k if it belongs a k-core but not to k+1-core. + This call does not support a graph with self-loops and parallel + edges. + + Parameters + ---------- + input_graph : cugraph.graph + cuGraph graph descriptor, should contain the connectivity information, + (edge weights are not used in this algorithm). + The current implementation only supports undirected graphs. + + degree_type: str + This option determines if the core number computation should be based + on input, output, or both directed edges, with valid values being + "incoming", "outgoing", and "bidirectional" respectively. + This option is currently ignored in this release, and setting it will + result in a warning. + + + Returns + ------- + result : dask_cudf.DataFrame + GPU distributed data frame containing 2 dask_cudf.Series + + ddf['vertex']: dask_cudf.Series + Contains the core number vertices + ddf['core_number']: dask_cudf.Series + Contains the core number of vertices + """ + + if input_graph.is_directed(): + raise ValueError("input graph must be undirected") + + if degree_type is not None: + warning_msg = ( + "The 'degree_type' parameter is ignored in this release.") + warnings.warn(warning_msg, Warning) + + # FIXME: enable this check once 'degree_type' is supported + """ + if degree_type not in ["incoming", "outgoing", "bidirectional"]: + raise ValueError(f"'degree_type' must be either incoming, " + f"outgoing or bidirectional, got: {degree_type}") + """ + + # Initialize dask client + client = input_graph._client + + do_expensive_check = False + + result = [ + client.submit( + _call_plc_core_number, + Comms.get_session_id(), + input_graph._plc_graph[w], + degree_type, + do_expensive_check, + workers=[w], + ) + for w in Comms.get_workers() + ] + + wait(result) + + cudf_result = [client.submit(convert_to_cudf, + cp_arrays) + for cp_arrays in result] + + wait(cudf_result) + + ddf = dask_cudf.from_delayed(cudf_result) + if input_graph.renumbered: + ddf = input_graph.unrenumber(ddf, "vertex") + + return ddf diff --git a/python/cugraph/cugraph/experimental/community/triangle_count.py b/python/cugraph/cugraph/experimental/community/triangle_count.py index a974111223d..e21833f06ec 100644 --- a/python/cugraph/cugraph/experimental/community/triangle_count.py +++ b/python/cugraph/cugraph/experimental/community/triangle_count.py @@ -23,7 +23,6 @@ ) -# FIXME: rename this to triangle_conut to match the MG implmentation def EXPERIMENTAL__triangle_count(G, start_list=None): """ Compute the number of triangles (cycles of length three) in the diff --git a/python/cugraph/cugraph/tests/mg/test_mg_core_number.py b/python/cugraph/cugraph/tests/mg/test_mg_core_number.py new file mode 100644 index 00000000000..4d01bf85018 --- /dev/null +++ b/python/cugraph/cugraph/tests/mg/test_mg_core_number.py @@ -0,0 +1,165 @@ +# Copyright (c) 2022, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc + +import pytest + +import cugraph +from cugraph.testing import utils +import cugraph.dask as dcg +import dask_cudf + + +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() + + +# ============================================================================= +# Pytest fixtures +# ============================================================================= +datasets = utils.DATASETS_UNDIRECTED +degree_type = ["incoming", "outgoing"] + +fixture_params = utils.genFixtureParamsProduct((datasets, "graph_file"), + (degree_type, "degree_type"), + ) + + +@pytest.fixture(scope="module", params=fixture_params) +def input_combo(request): + """ + Simply return the current combination of params as a dictionary for use in + tests or other parameterized fixtures. + """ + parameters = dict(zip(("graph_file", + "degree_type"), request.param)) + + return parameters + + +@pytest.fixture(scope="module") +def input_expected_output(dask_client, input_combo): + """ + This fixture returns the inputs and expected results from the Core number + algo. + """ + degree_type = input_combo["degree_type"] + input_data_path = input_combo["graph_file"] + G = utils.generate_cugraph_graph_from_file( + input_data_path, directed=False, edgevals=True) + + input_combo["SGGraph"] = G + + sg_core_number_results = cugraph.core_number(G, degree_type) + sg_core_number_results = sg_core_number_results.sort_values( + "vertex").reset_index(drop=True) + + input_combo["sg_core_number_results"] = sg_core_number_results + input_combo["degree_type"] = degree_type + + # Creating an edgelist from a dask cudf dataframe + chunksize = dcg.get_chunksize(input_data_path) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + dg = cugraph.Graph(directed=False) + dg.from_dask_cudf_edgelist( + ddf, source='src', destination='dst', + edge_attr="value", renumber=True, legacy_renum_only=True) + + input_combo["MGGraph"] = dg + + return input_combo + + +# ============================================================================= +# Tests +# ============================================================================= +def test_sg_core_number(dask_client, benchmark, input_expected_output): + # This test is only for benchmark purposes. + sg_core_number_results = None + G = input_expected_output["SGGraph"] + degree_type = input_expected_output["degree_type"] + warning_msg = ( + "The 'degree_type' parameter is ignored in this release.") + + # FIXME: Remove this warning test once 'degree_type' is supported" + with pytest.warns(Warning, match=warning_msg): + sg_core_number_results = benchmark( + cugraph.core_number, G, degree_type) + assert sg_core_number_results is not None + + +def test_core_number(dask_client, benchmark, input_expected_output): + + dg = input_expected_output["MGGraph"] + degree_type = input_expected_output["degree_type"] + + warning_msg = ( + "The 'degree_type' parameter is ignored in this release.") + + # FIXME: Remove this warning test once 'degree_type' is supported" + with pytest.warns(Warning, match=warning_msg): + result_core_number = benchmark(dcg.core_number, dg, degree_type) + + result_core_number = result_core_number.drop_duplicates().compute(). \ + sort_values("vertex").reset_index(drop=True).rename( + columns={"core_number": "mg_core_number"}) + + expected_output = input_expected_output["sg_core_number_results"] + + # Update the mg core number with sg core number results + # for easy comparison using cuDF DataFrame methods. + result_core_number["sg_core_number"] = expected_output['core_number'] + counts_diffs = result_core_number.query('mg_core_number != sg_core_number') + + assert len(counts_diffs) == 0 + + +def test_core_number_invalid_input(input_expected_output): + input_data_path = (utils.RAPIDS_DATASET_ROOT_DIR_PATH / + "karate-asymmetric.csv").as_posix() + + chunksize = dcg.get_chunksize(input_data_path) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + dg = cugraph.Graph(directed=True) + dg.from_dask_cudf_edgelist( + ddf, source='src', destination='dst', + edge_attr="value", renumber=True) + + with pytest.raises(ValueError): + dcg.core_number(dg) + + # FIXME: enable this check once 'degree_type' is supported + """ + invalid_degree_type = 3 + dg = input_expected_output["MGGraph"] + with pytest.raises(ValueError): + cugraph.core_number(dg, invalid_degree_type) + """ diff --git a/python/cugraph/cugraph/tests/test_core_number.py b/python/cugraph/cugraph/tests/test_core_number.py index 4b304d85f93..49b2f76664e 100644 --- a/python/cugraph/cugraph/tests/test_core_number.py +++ b/python/cugraph/cugraph/tests/test_core_number.py @@ -12,94 +12,110 @@ # limitations under the License. import gc + import pytest + +import cudf import cugraph +import networkx as nx from cugraph.testing import utils -from cugraph.utilities import df_score_to_dictionary -# Temporarily suppress warnings till networkX fixes deprecation warnings -# (Using or importing the ABCs from 'collections' instead of from -# 'collections.abc' is deprecated, and in 3.8 it will stop working) for -# python 3.7. Also, this import networkx needs to be relocated in the -# third-party group once this gets fixed. -import warnings -with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - import networkx as nx +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() -print("Networkx version : {} ".format(nx.__version__)) +# ============================================================================= +# Pytest fixtures +# ============================================================================= +datasets = utils.DATASETS_UNDIRECTED +degree_type = ["incoming", "outgoing"] +fixture_params = utils.genFixtureParamsProduct((datasets, "graph_file"), + (degree_type, "degree_type"), + ) -def calc_nx_core_number(graph_file): - NM = utils.read_csv_for_nx(graph_file) - Gnx = nx.from_pandas_edgelist( - NM, source="0", target="1", create_using=nx.Graph() - ) - nc = nx.core_number(Gnx) - return nc +@pytest.fixture(scope="module", params=fixture_params) +def input_combo(request): + """ + This fixture returns a dictionary containing all input params required to + run a Core number algo + """ + parameters = dict( + zip(("graph_file", "degree_type"), request.param)) -def calc_cg_core_number(graph_file): - M = utils.read_csv_file(graph_file) - G = cugraph.Graph() - G.from_cudf_edgelist(M, source="0", destination="1") + input_data_path = parameters["graph_file"] - cn = cugraph.core_number(G) - return cn + G = utils.generate_cugraph_graph_from_file( + input_data_path, directed=False, edgevals=True) + Gnx = utils.generate_nx_graph_from_file( + input_data_path, directed=False, edgevals=True) -def calc_core_number(graph_file): - NM = utils.read_csv_for_nx(graph_file) - Gnx = nx.from_pandas_edgelist( - NM, source="0", target="1", create_using=nx.Graph() - ) - nc = nx.core_number(Gnx) + parameters["G"] = G + parameters["Gnx"] = Gnx - M = utils.read_csv_file(graph_file) - G = cugraph.Graph() - G.from_cudf_edgelist(M, source="0", destination="1") + return parameters - cn = cugraph.core_number(G) - cn = cn.sort_values("vertex").reset_index(drop=True) - pdf = [nc[k] for k in sorted(nc.keys())] - cn["nx_core_number"] = pdf - cn = cn.rename(columns={"core_number": "cu_core_number"}, copy=False) - return cn +# ============================================================================= +# Tests +# ============================================================================= +def test_core_number(input_combo): + G = input_combo["G"] + Gnx = input_combo["Gnx"] + degree_type = input_combo["degree_type"] + nx_core_number_results = cudf.DataFrame() + dic_results = nx.core_number(Gnx) + nx_core_number_results["vertex"] = dic_results.keys() + nx_core_number_results["core_number"] = dic_results.values() + nx_core_number_results = nx_core_number_results.sort_values( + "vertex").reset_index(drop=True) -# FIXME: the default set of datasets includes an asymmetric directed graph -# (email-EU-core.csv), which currently causes an error with NetworkX: -# "networkx.exception.NetworkXError: Input graph has self loops which is not -# permitted; Consider using G.remove_edges_from(nx.selfloop_edges(G))" -# -# https://github.com/rapidsai/cugraph/issues/1045 -# -# @pytest.mark.parametrize("graph_file", utils.DATASETS) -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) -def test_core_number(graph_file): - gc.collect() + warning_msg = ( + "The 'degree_type' parameter is ignored in this release.") - nx_num = calc_nx_core_number(graph_file) - cg_num = calc_cg_core_number(graph_file) + # FIXME: Remove this warning test once 'degree_type' is supported" + with pytest.warns(Warning, match=warning_msg): + core_number_results = cugraph.core_number(G, degree_type).sort_values( + "vertex").reset_index(drop=True).rename(columns={ + "core_number": "cugraph_core_number"}) - # convert cugraph dataframe to a dictionary - cg_num_dic = df_score_to_dictionary(cg_num, k="core_number") + # Compare the nx core number results with cugraph + core_number_results["nx_core_number"] = \ + nx_core_number_results["core_number"] - assert cg_num_dic == nx_num + counts_diff = core_number_results.query( + 'nx_core_number != cugraph_core_number') + assert len(counts_diff) == 0 -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) -def test_core_number_nx(graph_file): - gc.collect() +def test_core_number_invalid_input(input_combo): + input_data_path = (utils.RAPIDS_DATASET_ROOT_DIR_PATH / + "karate-asymmetric.csv").as_posix() + M = utils.read_csv_for_nx(input_data_path) + G = cugraph.Graph(directed=True) + cu_M = cudf.DataFrame() + cu_M["src"] = cudf.Series(M["0"]) + cu_M["dst"] = cudf.Series(M["1"]) - NM = utils.read_csv_for_nx(graph_file) - Gnx = nx.from_pandas_edgelist( - NM, source="0", target="1", create_using=nx.Graph() + cu_M["weights"] = cudf.Series(M["weight"]) + G.from_cudf_edgelist( + cu_M, source="src", destination="dst", edge_attr="weights" ) - nc = nx.core_number(Gnx) - cc = cugraph.core_number(Gnx) - assert nc == cc + with pytest.raises(ValueError): + cugraph.core_number(G) + + # FIXME: enable this check once 'degree_type' is supported + """ + invalid_degree_type = "invalid" + G = input_combo["G"] + with pytest.raises(ValueError): + experimental_core_number(G, invalid_degree_type) + """ diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py index 41b8cdab885..7d604bf2dbb 100644 --- a/python/pylibcugraph/pylibcugraph/__init__.py +++ b/python/pylibcugraph/pylibcugraph/__init__.py @@ -42,3 +42,5 @@ from pylibcugraph.bfs import bfs from pylibcugraph.uniform_neighbor_sample import uniform_neighbor_sample + +from pylibcugraph.core_number import core_number diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd index be0c96dc977..1830cc71a49 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/core_algorithms.pxd @@ -53,7 +53,9 @@ cdef extern from "cugraph_c/core_algorithms.h": ) ctypedef enum cugraph_k_core_degree_type_t: - pass + K_CORE_DEGREE_TYPE_IN=0, + K_CORE_DEGREE_TYPE_OUT=1, + K_CORE_DEGREE_TYPE_INOUT=2 cdef cugraph_error_code_t \ cugraph_core_number( diff --git a/python/pylibcugraph/pylibcugraph/core_number.pyx b/python/pylibcugraph/pylibcugraph/core_number.pyx index ea585d7374a..ee065b6b9dd 100644 --- a/python/pylibcugraph/pylibcugraph/core_number.pyx +++ b/python/pylibcugraph/pylibcugraph/core_number.pyx @@ -15,6 +15,7 @@ # cython: language_level = 3 from libc.stdint cimport uintptr_t +import warnings from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, @@ -36,6 +37,7 @@ from pylibcugraph._cugraph_c.graph cimport ( from pylibcugraph._cugraph_c.core_algorithms cimport ( cugraph_core_result_t, cugraph_core_number, + cugraph_k_core_degree_type_t, cugraph_core_result_get_vertices, cugraph_core_result_get_core_numbers, cugraph_core_result_free, @@ -53,10 +55,10 @@ from pylibcugraph.utils cimport ( get_c_type_from_numpy_type, ) -def EXPERIMENTAL__core_number(ResourceHandle resource_handle, - _GPUGraph graph, - degree_type, - bool_t do_expensive_check): +def core_number(ResourceHandle resource_handle, + _GPUGraph graph, + degree_type, + bool_t do_expensive_check): """ Computes core number. @@ -69,8 +71,14 @@ def EXPERIMENTAL__core_number(ResourceHandle resource_handle, graph: MGGraph The input graph, for Multi-GPU operations. - degree_type: device array type - Device array containing the degree type as a character. + degree_type: str + This option determines if the core number computation should be based + on input, output, or both directed edges, with valid values being + "incoming", "outgoing", and "bidirectional" respectively. + This option is currently ignored in this release, and setting it will + result in a warning. + + This implementation only supports bidirectional edges. do_expensive_check: bool If True, performs more extensive tests on the inputs to ensure @@ -84,16 +92,7 @@ def EXPERIMENTAL__core_number(ResourceHandle resource_handle, Examples -------- - >>> import pylibcugraph, cupy, numpy - >>> srcs = cupy.asarray([0, 1, 2], dtype=numpy.int32) - >>> dsts = cupy.asarray([1, 2, 3], dtype=numpy.int32) - >>> weights = cupy.asarray([1.0, 1.0, 1.0], dtype=numpy.float32) - >>> resource_handle = pylibcugraph.ResourceHandle() - >>> graph_props = pylibcugraph.GraphProperties( - ... is_symmetric=False, is_multigraph=False) - >>> G = pylibcugraph.SGGraph( - ... resource_handle, graph_props, srcs, dsts, weights, - ... store_transposed=True, renumber=False, do_expensive_check=False) + # FIXME: No example yet """ cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ @@ -104,9 +103,21 @@ def EXPERIMENTAL__core_number(ResourceHandle resource_handle, cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr + if degree_type is not None: + warning_msg = ( + "The 'degree_type' parameter is ignored in this release.") + warnings.warn(warning_msg, Warning) + + degree_type = "bidirectional" + + degree_type_map = { + "incoming": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_IN, + "outgoing": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_OUT, + "bidirectional": cugraph_k_core_degree_type_t.K_CORE_DEGREE_TYPE_INOUT} + error_code = cugraph_core_number(c_resource_handle_ptr, c_graph_ptr, - degree_type, + degree_type_map[degree_type], do_expensive_check, &result_ptr, &error_ptr) diff --git a/python/pylibcugraph/pylibcugraph/experimental/__init__.py b/python/pylibcugraph/pylibcugraph/experimental/__init__.py index eb1e9fcfa7d..1876599466f 100644 --- a/python/pylibcugraph/pylibcugraph/experimental/__init__.py +++ b/python/pylibcugraph/pylibcugraph/experimental/__init__.py @@ -68,7 +68,3 @@ from pylibcugraph.triangle_count import EXPERIMENTAL__triangle_count triangle_count = experimental_warning_wrapper(EXPERIMENTAL__triangle_count) - -from pylibcugraph.core_number import EXPERIMENTAL__core_number -core_number = experimental_warning_wrapper(EXPERIMENTAL__core_number) -