From 83f9f0c9f01450a2ad5bfc8af27377c15485c39d Mon Sep 17 00:00:00 2001 From: betochimas <97180625+betochimas@users.noreply.github.com> Date: Wed, 1 Jun 2022 06:52:19 -0700 Subject: [PATCH] Refactored SG `hits` and MG `katz_centrality` (#2276) This PR: 1. Refactors SG `hits` with the updated pylibcugraph implementation 2. Refactors MG `katz_centrality` with the updated pylibcugraph implementation, adding support for multiple arguments 3. Improves support within `test_doctests.py` to ignore certain docstring examples based on the build architecture (such as ktruss in CUDA 11.4) 4. Passing MG `katz_centrality` testing This PR also closes #2025. Authors: - https://github.com/betochimas Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) - Joseph Nke (https://github.com/jnke2016) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/2276 --- docs/cugraph/source/api_docs/pylibcugraph.rst | 7 + .../cugraph/community/ktruss_subgraph.py | 2 + .../dask/centrality/katz_centrality.py | 185 ++++++++++++------ .../cugraph/dask/link_analysis/hits.py | 10 +- .../cugraph/cugraph/link_analysis/__init__.py | 2 +- python/cugraph/cugraph/link_analysis/hits.pxd | 29 --- python/cugraph/cugraph/link_analysis/hits.py | 84 +++++--- .../cugraph/link_analysis/hits_wrapper.pyx | 108 ---------- .../tests/mg/test_mg_katz_centrality.py | 55 ++++++ python/cugraph/cugraph/utilities/api_tools.py | 16 +- python/pylibcugraph/pylibcugraph/hits.pyx | 5 +- .../pylibcugraph/katz_centrality.pyx | 6 + .../tests/test_eigenvector_centrality.py | 2 +- .../tests/test_katz_centrality.py | 3 +- .../pylibcugraph/utilities/api_tools.py | 49 +++++ 15 files changed, 316 insertions(+), 247 deletions(-) delete mode 100644 python/cugraph/cugraph/link_analysis/hits.pxd delete mode 100644 python/cugraph/cugraph/link_analysis/hits_wrapper.pyx diff --git a/docs/cugraph/source/api_docs/pylibcugraph.rst b/docs/cugraph/source/api_docs/pylibcugraph.rst index 9317274cf8c..93efec37f03 100644 --- a/docs/cugraph/source/api_docs/pylibcugraph.rst +++ b/docs/cugraph/source/api_docs/pylibcugraph.rst @@ -11,5 +11,12 @@ Methods .. autosummary:: :toctree: api/ + pylibcugraph.eigenvector_centrality + pylibcugraph.katz_centrality pylibcugraph.strongly_connected_components pylibcugraph.weakly_connected_components + pylibcugraph.pagerank + pylibcugraph.hits + pylibcugraph.node2vec + pylibcugraph.bfs + pylibcugraph.sssp diff --git a/python/cugraph/cugraph/community/ktruss_subgraph.py b/python/cugraph/cugraph/community/ktruss_subgraph.py index 203dc0c327e..c32c6ce177c 100644 --- a/python/cugraph/cugraph/community/ktruss_subgraph.py +++ b/python/cugraph/cugraph/community/ktruss_subgraph.py @@ -67,6 +67,7 @@ def k_truss(G, k): Examples -------- + >>> import cudf # k_truss does not run on CUDA 11.5 >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', ... dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() @@ -149,6 +150,7 @@ def ktruss_subgraph(G, k, use_weights=True): Examples -------- + >>> import cudf # ktruss_subgraph does not run on CUDA 11.5 >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', ... dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() diff --git a/python/cugraph/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/cugraph/dask/centrality/katz_centrality.py index 03f8b0bc4be..dca8c6637f5 100644 --- a/python/cugraph/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/cugraph/dask/centrality/katz_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,14 +16,22 @@ from dask.distributed import wait, default_client from cugraph.dask.common.input_utils import (get_distributed_data, get_vertex_partition_offsets) -from cugraph.dask.centrality import\ - mg_katz_centrality_wrapper as mg_katz_centrality +from pylibcugraph import (ResourceHandle, + GraphProperties, + MGGraph, + katz_centrality as pylibcugraph_katz + ) import cugraph.dask.comms.comms as Comms import dask_cudf +import cudf +import cupy def call_katz_centrality(sID, data, + graph_properties, + store_transposed, + do_expensive_check, src_col_name, dst_col_name, num_verts, @@ -34,37 +42,52 @@ def call_katz_centrality(sID, beta, max_iter, tol, - nstart, + initial_hubs_guess_values, normalized): - wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) - local_size = len(aggregate_segment_offsets) // Comms.get_n_workers(sID) - segment_offsets = \ - aggregate_segment_offsets[local_size * wid: local_size * (wid + 1)] - return mg_katz_centrality.mg_katz_centrality(data[0], - src_col_name, - dst_col_name, - num_verts, - num_edges, - vertex_partition_offsets, - wid, - handle, - segment_offsets, - alpha, - beta, - max_iter, - tol, - nstart, - normalized) - - -def katz_centrality(input_graph, - alpha=None, - beta=None, - max_iter=100, - tol=1.0e-5, - nstart=None, - normalized=True): + h = ResourceHandle(handle.getHandle()) + srcs = data[0][src_col_name] + dsts = data[0][dst_col_name] + weights = cudf.Series(cupy.ones(srcs.size, dtype="float32")) + + if "value" in data[0].columns: + weights = data[0]['value'] + + mg = MGGraph(h, + graph_properties, + srcs, + dsts, + weights, + store_transposed, + num_edges, + do_expensive_check) + + result = pylibcugraph_katz(h, + mg, + initial_hubs_guess_values, + alpha, + beta, + tol, + max_iter, + do_expensive_check) + return result + + +def convert_to_cudf(cp_arrays): + """ + create a cudf DataFrame from cupy arrays + """ + cupy_vertices, cupy_values = cp_arrays + df = cudf.DataFrame() + df["vertex"] = cupy_vertices + df["katz_centrality"] = cupy_values + return df + + +def katz_centrality( + input_graph, alpha=None, beta=1.0, max_iter=100, tol=1.0e-6, + nstart=None, normalized=True +): """ Compute the Katz centrality for the nodes of the graph G. @@ -89,8 +112,9 @@ def katz_centrality(input_graph, guarantee that it will never exceed alpha_max thus in turn fulfilling the requirement for convergence. - beta : None - A weight scalar - currently Not Supported + beta : float, optional (default=None) + Weight scalar added to each vertex's new Katz Centrality score in every + iteration. If beta is not specified then it is set as 1.0. max_iter : int, optional (default=100) The maximum number of iterations before an answer is returned. This can @@ -109,7 +133,8 @@ def katz_centrality(input_graph, acceptable. nstart : dask_cudf.Dataframe, optional (default=None) - GPU Dataframe containing the initial guess for katz centrality + Distributed GPU Dataframe containing the initial guess for katz + centrality. nstart['vertex'] : dask_cudf.Series Contains the vertex identifiers @@ -122,8 +147,8 @@ def katz_centrality(input_graph, Returns ------- katz_centrality : dask_cudf.DataFrame - GPU data frame containing two dask_cudf.Series of size V: the - vertex identifiers and the corresponding katz centrality values. + GPU distributed data frame containing two dask_cudf.Series of size V: + the vertex identifiers and the corresponding katz centrality values. ddf['vertex'] : dask_cudf.Series Contains the vertex identifiers @@ -147,39 +172,73 @@ def katz_centrality(input_graph, >>> pr = dcg.katz_centrality(dg) """ - nstart = None - client = default_client() - input_graph.compute_renumber_edge_list(transposed=True) + graph_properties = GraphProperties( + is_multigraph=False) + + store_transposed = False + do_expensive_check = False + + src_col_name = input_graph.renumber_map.renumbered_src_col_name + dst_col_name = input_graph.renumber_map.renumbered_dst_col_name + ddf = input_graph.edgelist.edgelist_df - vertex_partition_offsets = get_vertex_partition_offsets(input_graph) - num_verts = vertex_partition_offsets.iloc[-1] + num_edges = len(ddf) data = get_distributed_data(ddf) - src_col_name = input_graph.renumber_map.renumbered_src_col_name - dst_col_name = input_graph.renumber_map.renumbered_dst_col_name + # FIXME: Incorporate legacy_renum_only=True to only trigger the python + # renumbering when more support is added in the C/C++ API + input_graph.compute_renumber_edge_list(transposed=True, + legacy_renum_only=False) + vertex_partition_offsets = get_vertex_partition_offsets(input_graph) + num_verts = vertex_partition_offsets.iloc[-1] - result = [client.submit(call_katz_centrality, - Comms.get_session_id(), - wf[1], - src_col_name, - dst_col_name, - num_verts, - num_edges, - vertex_partition_offsets, - input_graph.aggregate_segment_offsets, - alpha, - beta, - max_iter, - tol, - nstart, - normalized, - workers=[wf[0]]) - for idx, wf in enumerate(data.worker_to_parts.items())] - wait(result) - ddf = dask_cudf.from_delayed(result) + initial_hubs_guess_values = None + if nstart: + if input_graph.renumbered: + if len(input_graph.renumber_map.implementation.col_names) > 1: + cols = nstart.columns[:-1].to_list() + else: + cols = 'vertex' + nstart = input_graph.add_internal_vertex_id(nstart, 'vertex', cols) + initial_hubs_guess_values = nstart[nstart.columns[0]].compute() + else: + initial_hubs_guess_values = nstart["values"].compute() + + cupy_result = [client.submit(call_katz_centrality, + Comms.get_session_id(), + wf[1], + graph_properties, + store_transposed, + do_expensive_check, + src_col_name, + dst_col_name, + num_verts, + num_edges, + vertex_partition_offsets, + input_graph.aggregate_segment_offsets, + alpha, + beta, + max_iter, + tol, + initial_hubs_guess_values, + normalized, + workers=[wf[0]]) + for idx, wf in enumerate(data.worker_to_parts.items())] + + wait(cupy_result) + + cudf_result = [client.submit(convert_to_cudf, + cp_arrays, + workers=client.who_has( + cp_arrays)[cp_arrays.key]) + for cp_arrays in cupy_result] + + wait(cudf_result) + + ddf = dask_cudf.from_delayed(cudf_result) if input_graph.renumbered: return input_graph.unrenumber(ddf, 'vertex') diff --git a/python/cugraph/cugraph/dask/link_analysis/hits.py b/python/cugraph/cugraph/dask/link_analysis/hits.py index 1ee200d121d..d8b3e6ac3a5 100644 --- a/python/cugraph/cugraph/dask/link_analysis/hits.py +++ b/python/cugraph/cugraph/dask/link_analysis/hits.py @@ -20,11 +20,11 @@ import dask_cudf import cudf -from pylibcugraph.experimental import (ResourceHandle, - GraphProperties, - MGGraph, - hits as pylibcugraph_hits - ) +from pylibcugraph import (ResourceHandle, + GraphProperties, + MGGraph, + hits as pylibcugraph_hits + ) def call_hits(sID, diff --git a/python/cugraph/cugraph/link_analysis/__init__.py b/python/cugraph/cugraph/link_analysis/__init__.py index 606435fe226..764c530a335 100644 --- a/python/cugraph/cugraph/link_analysis/__init__.py +++ b/python/cugraph/cugraph/link_analysis/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/cugraph/link_analysis/hits.pxd b/python/cugraph/cugraph/link_analysis/hits.pxd deleted file mode 100644 index 0f170fc4753..00000000000 --- a/python/cugraph/cugraph/link_analysis/hits.pxd +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from libcpp cimport bool - -from cugraph.structure.graph_utilities cimport graph_container_t -from raft.common.handle cimport handle_t - - -cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": - cdef void call_hits[vertex_t,weight_t]( - const handle_t &handle, - const graph_container_t &g, - weight_t *hubs, - weight_t *authorities, - int max_iter, - weight_t tolerance, - const weight_t *starting_value, - bool normalized) except + diff --git a/python/cugraph/cugraph/link_analysis/hits.py b/python/cugraph/cugraph/link_analysis/hits.py index 3deffee3bac..820f7d6aba1 100644 --- a/python/cugraph/cugraph/link_analysis/hits.py +++ b/python/cugraph/cugraph/link_analysis/hits.py @@ -1,4 +1,5 @@ -# Copyright (c) 2019-2022, NVIDIA CORPORATION. +# Copyright (c) 2022, NVIDIA CORPORATION. +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,14 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -from cugraph.link_analysis import hits_wrapper from cugraph.utilities import (ensure_cugraph_obj_for_nx, df_score_to_dictionary, ) +from pylibcugraph import (ResourceHandle, + GraphProperties, + SGGraph, + hits as pylibcugraph_hits + ) +import cudf -def hits(G, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): +def hits( + G, max_iter=100, tol=1.0e-5, nstart=None, normalized=True +): """ Compute HITS hubs and authorities values for each vertex @@ -25,35 +34,33 @@ def hits(G, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): estimates the node value based on the incoming links. Hubs estimates the node value based on outgoing links. - The cuGraph implementation of HITS is a wrapper around the gunrock - implementation of HITS. - - Note that the gunrock implementation uses a 2-norm, while networkx - uses a 1-norm. The raw scores will be different, but the rank ordering - should be comparable with networkx. + Both cuGraph and networkx implementation use a 1-norm. Parameters ---------- - graph : cugraph.Graph + G : cugraph.Graph cuGraph graph descriptor, should contain the connectivity information as an edge list (edge weights are not used for this algorithm). The adjacency list will be computed if not already present. max_iter : int, optional (default=100) The maximum number of iterations before an answer is returned. - The gunrock implementation does not currently support tolerance, - so this will in fact be the number of iterations the HITS algorithm - executes. tol : float, optional (default=1.0e-5) Set the tolerance the approximation, this parameter should be a small - magnitude value. This parameter is not currently supported. + magnitude value. nstart : cudf.Dataframe, optional (default=None) - Not currently supported + The initial hubs guess vertices along with their initial hubs guess + value + + nstart['vertex'] : cudf.Series + Initial hubs guess vertices + nstart['values'] : cudf.Series + Initial hubs guess values normalized : bool, optional (default=True) - Not currently supported, always used as True + A flag to normalize the results Returns ------- @@ -82,15 +89,44 @@ def hits(G, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): G, isNx = ensure_cugraph_obj_for_nx(G) - df = hits_wrapper.hits(G, max_iter, tol) - - if G.renumbered: - df = G.unrenumber(df, "vertex") + srcs = G.edgelist.edgelist_df['src'] + dsts = G.edgelist.edgelist_df['dst'] + # edge weights are not used for this algorithm + weights = G.edgelist.edgelist_df['src'] * 0.0 + + resource_handle = ResourceHandle() + graph_props = GraphProperties(is_multigraph=G.is_multigraph()) + store_transposed = False + renumber = False + do_expensive_check = False + init_hubs_guess_vertices = None + init_hubs_guess_values = None + + if nstart is not None: + init_hubs_guess_vertices = nstart['vertex'] + init_hubs_guess_values = nstart['values'] + + sg = SGGraph(resource_handle, graph_props, srcs, dsts, weights, + store_transposed, renumber, do_expensive_check) + + vertices, hubs, authorities = pylibcugraph_hits(resource_handle, sg, tol, + max_iter, + init_hubs_guess_vertices, + init_hubs_guess_values, + normalized, + do_expensive_check) + results = cudf.DataFrame() + results["vertex"] = cudf.Series(vertices) + results["hubs"] = cudf.Series(hubs) + results["authorities"] = cudf.Series(authorities) if isNx is True: - d1 = df_score_to_dictionary(df[["vertex", "hubs"]], "hubs") - d2 = df_score_to_dictionary(df[["vertex", "authorities"]], + d1 = df_score_to_dictionary(results[["vertex", "hubs"]], "hubs") + d2 = df_score_to_dictionary(results[["vertex", "authorities"]], "authorities") - df = (d1, d2) + results = (d1, d2) + + if G.renumbered: + results = G.unrenumber(results, "vertex") - return df + return results diff --git a/python/cugraph/cugraph/link_analysis/hits_wrapper.pyx b/python/cugraph/cugraph/link_analysis/hits_wrapper.pyx deleted file mode 100644 index b86b0aa6e3d..00000000000 --- a/python/cugraph/cugraph/link_analysis/hits_wrapper.pyx +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -from libc.stdint cimport uintptr_t -from libcpp.memory cimport unique_ptr - -import cudf -import numpy as np - -from cugraph.structure import graph_primtypes_wrapper -from cugraph.structure.graph_utilities cimport (graph_container_t, - numberTypeEnum, - populate_graph_container, - ) -from raft.common.handle cimport handle_t -from cugraph.link_analysis cimport hits as c_hits - - -def hits(input_graph, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): - """ - Call HITS, return a DataFrame containing the hubs and authorities for each - vertex. - """ - cdef graph_container_t graph_container - - numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, - np.dtype("int64") : numberTypeEnum.int64Type, - np.dtype("float32") : numberTypeEnum.floatType, - np.dtype("double") : numberTypeEnum.doubleType} - - - if nstart is not None: - raise ValueError('nstart is not currently supported') - - # Inputs - vertex_t = np.dtype("int32") - edge_t = np.dtype("int32") - weight_t = np.dtype("float32") - - [src, dst] = graph_primtypes_wrapper.datatype_cast( - [input_graph.edgelist.edgelist_df['src'], - input_graph.edgelist.edgelist_df['dst']], - [np.int32]) - weights = None - cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] - cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] - cdef uintptr_t c_edge_weights = NULL - - num_verts = input_graph.number_of_vertices() - num_edges = input_graph.number_of_edges(directed_edges=True) - is_symmetric = not input_graph.is_directed() - - cdef unique_ptr[handle_t] handle_ptr - handle_ptr.reset(new handle_t()) - handle_ = handle_ptr.get(); - - populate_graph_container(graph_container, - handle_[0], - c_src_vertices, c_dst_vertices, c_edge_weights, - NULL, - NULL, - 0, - ((numberTypeMap[vertex_t])), - ((numberTypeMap[edge_t])), - ((numberTypeMap[weight_t])), - num_edges, - num_verts, num_edges, - False, - is_symmetric, - False, - False) - - # Outputs - df = cudf.DataFrame() - df['hubs'] = cudf.Series(np.zeros(num_verts, dtype=np.float32)) - df['authorities'] = cudf.Series(np.zeros(num_verts, dtype=np.float32)) - # The vertex Series is simply the renumbered vertex IDs, which is just 0 to (num_verts-1) - df['vertex'] = cudf.Series(np.arange(num_verts, dtype=np.int32)) - - cdef uintptr_t c_hubs_ptr = df['hubs'].__cuda_array_interface__['data'][0]; - cdef uintptr_t c_authorities_ptr = df['authorities'].__cuda_array_interface__['data'][0]; - - # Call HITS - c_hits.call_hits[int, float](handle_ptr.get()[0], - graph_container, - c_hubs_ptr, - c_authorities_ptr, - max_iter, - tol, - NULL, - normalized) - - return df diff --git a/python/cugraph/cugraph/tests/mg/test_mg_katz_centrality.py b/python/cugraph/cugraph/tests/mg/test_mg_katz_centrality.py index cd93f257db0..f258cca7e54 100644 --- a/python/cugraph/cugraph/tests/mg/test_mg_katz_centrality.py +++ b/python/cugraph/cugraph/tests/mg/test_mg_katz_centrality.py @@ -94,3 +94,58 @@ def test_dask_katz_centrality(dask_client, directed): if diff > tol * 1.1: err = err + 1 assert err == 0 + + +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +@pytest.mark.parametrize("directed", IS_DIRECTED) +def test_dask_katz_centrality_nstart(dask_client, directed): + input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / + "karate.csv").as_posix() + print(f"dataset={input_data_path}") + chunksize = dcg.get_chunksize(input_data_path) + + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + dg = cugraph.Graph(directed=True) + dg.from_dask_cudf_edgelist(ddf, "src", "dst") + + largest_out_degree = dg.out_degree().compute().\ + nlargest(n=1, columns="degree") + largest_out_degree = largest_out_degree["degree"].iloc[0] + katz_alpha = 1 / (largest_out_degree + 1) + + mg_res = dcg.katz_centrality(dg, alpha=katz_alpha, max_iter=50, tol=1e-6) + mg_res = mg_res.compute() + + estimate = mg_res.copy() + estimate = estimate.rename(columns={"vertex": "vertex", + "katz_centrality": "values"}) + estimate["values"] = 0.5 + + mg_estimate_res = dcg.katz_centrality(dg, alpha=katz_alpha, + nstart=estimate, + max_iter=50, tol=1e-6) + mg_estimate_res = mg_estimate_res.compute() + + err = 0 + tol = 1.0e-05 + compare_res = mg_res.merge( + mg_estimate_res, on="vertex", suffixes=["_dask", "_nstart"] + ) + + for i in range(len(compare_res)): + diff = abs( + compare_res["katz_centrality_dask"].iloc[i] + - compare_res["katz_centrality_nstart"].iloc[i] + ) + if diff > tol * 1.1: + err = err + 1 + assert err == 0 diff --git a/python/cugraph/cugraph/utilities/api_tools.py b/python/cugraph/cugraph/utilities/api_tools.py index e57ad45defa..195a5885818 100644 --- a/python/cugraph/cugraph/utilities/api_tools.py +++ b/python/cugraph/cugraph/utilities/api_tools.py @@ -17,20 +17,12 @@ def experimental_warning_wrapper(obj): - """ - Wrap obj in a function or class that prints a warning about it being - "experimental" (ie. it is in the public API but subject to change or - removal), prior to calling obj and returning its value. - - The object's name used in the warning message also has any leading __ - and/or EXPERIMENTAL string are removed from the name used in warning - messages. This allows an object to be named with a "private" name in the - public API so it can remain hidden while it is still experimental, but - have a public name within the experimental namespace so it can be easily - discovered and used. - """ return api_tools.experimental_warning_wrapper(obj) def promoted_experimental_warning_wrapper(obj): return api_tools.promoted_experimental_warning_wrapper(obj) + + +def deprecated_warning_wrapper(obj): + return api_tools.deprecated_warning_wrapper(obj) diff --git a/python/pylibcugraph/pylibcugraph/hits.pyx b/python/pylibcugraph/pylibcugraph/hits.pyx index c1bf0b52ccb..7c472f54866 100644 --- a/python/pylibcugraph/pylibcugraph/hits.pyx +++ b/python/pylibcugraph/pylibcugraph/hits.pyx @@ -93,8 +93,8 @@ def hits(ResourceHandle resource_handle, Device array containing the pointer to the array of initial hub guess values normalized : bool, optional (default=True) - - + + do_expensive_check : bool If True, performs more extensive tests on the inputs to ensure validitity, at the expense of increased run time. @@ -154,7 +154,6 @@ def hits(ResourceHandle resource_handle, cdef cugraph_error_code_t error_code cdef cugraph_error_t* error_ptr - error_code = cugraph_hits(c_resource_handle_ptr, c_graph_ptr, tol, diff --git a/python/pylibcugraph/pylibcugraph/katz_centrality.pyx b/python/pylibcugraph/pylibcugraph/katz_centrality.pyx index 5cc111b05ed..f491e9fd293 100644 --- a/python/pylibcugraph/pylibcugraph/katz_centrality.pyx +++ b/python/pylibcugraph/pylibcugraph/katz_centrality.pyx @@ -97,6 +97,12 @@ def katz_centrality(ResourceHandle resource_handle, do_expensive_check : bool_t A flag to run expensive checks for input arguments if True. + + Returns + ------- + + Examples + -------- """ diff --git a/python/pylibcugraph/pylibcugraph/tests/test_eigenvector_centrality.py b/python/pylibcugraph/pylibcugraph/tests/test_eigenvector_centrality.py index 47b9165ef2e..daaa9159b3d 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_eigenvector_centrality.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_eigenvector_centrality.py @@ -18,7 +18,7 @@ GraphProperties, SGGraph, eigenvector_centrality) -from cugraph.testing import utils +from pylibcugraph.testing import utils TOY = utils.RAPIDS_DATASET_ROOT_DIR_PATH/'toy_graph.csv' diff --git a/python/pylibcugraph/pylibcugraph/tests/test_katz_centrality.py b/python/pylibcugraph/pylibcugraph/tests/test_katz_centrality.py index 3e46f4d406c..bbf182c9e30 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_katz_centrality.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_katz_centrality.py @@ -17,7 +17,8 @@ from pylibcugraph import (ResourceHandle, GraphProperties, SGGraph, - katz_centrality) + katz_centrality + ) from pylibcugraph.testing import utils diff --git a/python/pylibcugraph/pylibcugraph/utilities/api_tools.py b/python/pylibcugraph/pylibcugraph/utilities/api_tools.py index cdabde6905f..0cee609c730 100644 --- a/python/pylibcugraph/pylibcugraph/utilities/api_tools.py +++ b/python/pylibcugraph/pylibcugraph/utilities/api_tools.py @@ -145,3 +145,52 @@ def warning_wrapper_function(*args, **kwargs): warning_wrapper_function.__name__ = obj_name return warning_wrapper_function + + +def deprecated_warning_wrapper(obj): + """ + Wrap obj in a function or class that prints a warning about it being + deprecated (ie. it is in the public API but will be removed or replaced + by a refactored version), prior to calling obj and returning its value. + """ + obj_type = type(obj) + if obj_type not in [type, types.FunctionType, types.BuiltinFunctionType]: + raise TypeError("obj must be a class or a function type, got " + f"{obj_type}") + + obj_name = obj.__name__ + call_stack = inspect.stack() + calling_frame = call_stack[1].frame + ns_name = calling_frame.f_locals.get("__name__") + dot = "." if ns_name is not None else "" + + warning_msg = (f"{ns_name}{dot}{obj_name} has been deprecated and will " + "be removed next release. If an experimental version " + "exists, it may replace this version in a future release.") + + if obj_type is type: + class WarningWrapperClass(obj): + def __init__(self, *args, **kwargs): + warnings.warn(warning_msg, DeprecationWarning) + # call base class __init__ for python, but cython classes do + # not have a standard callable __init__ and assigning to self + # works instead. + if isinstance(obj.__init__, types.FunctionType): + super(WarningWrapperClass, self).__init__(*args, **kwargs) + else: + self = obj(*args, **kwargs) + WarningWrapperClass.__module__ = ns_name + WarningWrapperClass.__qualname__ = obj_name + WarningWrapperClass.__name__ = obj_name + + return WarningWrapperClass + + @functools.wraps(obj) + def warning_wrapper_function(*args, **kwargs): + warnings.warn(warning_msg, DeprecationWarning) + return obj(*args, **kwargs) + warning_wrapper_function.__module__ = ns_name + warning_wrapper_function.__qualname__ = obj_name + warning_wrapper_function.__name__ = obj_name + + return warning_wrapper_function