diff --git a/benchmarks/python_e2e/cugraph_dask_funcs.py b/benchmarks/python_e2e/cugraph_dask_funcs.py index b9d993795ae..84d75a92a11 100644 --- a/benchmarks/python_e2e/cugraph_dask_funcs.py +++ b/benchmarks/python_e2e/cugraph_dask_funcs.py @@ -138,7 +138,7 @@ def bfs(G, start): def sssp(G, start): - return cugraph.dask.sssp(G, source=start, check_start=False) + return cugraph.dask.sssp(G, source=start, check_source=False) def wcc(G): diff --git a/python/cugraph/cugraph/dask/centrality/eigenvector_centrality.py b/python/cugraph/cugraph/dask/centrality/eigenvector_centrality.py index 02ee00ebdd8..1226ea0452f 100644 --- a/python/cugraph/cugraph/dask/centrality/eigenvector_centrality.py +++ b/python/cugraph/cugraph/dask/centrality/eigenvector_centrality.py @@ -14,8 +14,7 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import (get_distributed_data, - get_vertex_partition_offsets) +from cugraph.dask.common.input_utils import get_distributed_data from pylibcugraph import (ResourceHandle, GraphProperties, MGGraph, @@ -34,10 +33,7 @@ def call_eigenvector_centrality(sID, do_expensive_check, src_col_name, dst_col_name, - num_verts, num_edges, - vertex_partition_offsets, - aggregate_segment_offsets, max_iter, tol, normalized): @@ -140,7 +136,7 @@ def eigenvector_centrality( """ client = default_client() - # Calling renumbering results in data that is sorted by degree + input_graph.compute_renumber_edge_list( transposed=False, legacy_renum_only=True) @@ -148,25 +144,17 @@ def eigenvector_centrality( is_multigraph=False) store_transposed = False + # FIXME: should we add this parameter as an option? do_expensive_check = False src_col_name = input_graph.renumber_map.renumbered_src_col_name dst_col_name = input_graph.renumber_map.renumbered_dst_col_name - # FIXME Move this call to the function creating a directed - # graph from a dask dataframe because duplicated edges need - # to be dropped ddf = input_graph.edgelist.edgelist_df - ddf = ddf.map_partitions( - lambda df: df.drop_duplicates(subset=[src_col_name, dst_col_name])) num_edges = len(ddf) data = get_distributed_data(ddf) - input_graph.compute_renumber_edge_list(transposed=True) - vertex_partition_offsets = get_vertex_partition_offsets(input_graph) - num_verts = vertex_partition_offsets.iloc[-1] - cupy_result = [client.submit(call_eigenvector_centrality, Comms.get_session_id(), wf[1], @@ -175,10 +163,7 @@ def eigenvector_centrality( do_expensive_check, src_col_name, dst_col_name, - num_verts, num_edges, - vertex_partition_offsets, - input_graph.aggregate_segment_offsets, max_iter, tol, normalized, @@ -197,4 +182,6 @@ def eigenvector_centrality( ddf = dask_cudf.from_delayed(cudf_result) if input_graph.renumbered: - return input_graph.unrenumber(ddf, 'vertex') + ddf = input_graph.unrenumber(ddf, "vertex") + + return ddf diff --git a/python/cugraph/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/cugraph/dask/centrality/katz_centrality.py index e3549685ba2..186b59aabad 100644 --- a/python/cugraph/cugraph/dask/centrality/katz_centrality.py +++ b/python/cugraph/cugraph/dask/centrality/katz_centrality.py @@ -14,8 +14,7 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import (get_distributed_data, - get_vertex_partition_offsets) +from cugraph.dask.common.input_utils import get_distributed_data from pylibcugraph import (ResourceHandle, GraphProperties, MGGraph, @@ -34,10 +33,7 @@ def call_katz_centrality(sID, do_expensive_check, src_col_name, dst_col_name, - num_verts, num_edges, - vertex_partition_offsets, - aggregate_segment_offsets, alpha, beta, max_iter, @@ -187,12 +183,13 @@ def katz_centrality( # compute_renumber_edge_list will only be used for multicolumn and # string vertices since the renumbering will be done in pylibcugraph input_graph.compute_renumber_edge_list(transposed=True, - legacy_renum_only=False) + legacy_renum_only=True) graph_properties = GraphProperties( is_multigraph=False) store_transposed = False + # FIXME: should we add this parameter as an option? do_expensive_check = False src_col_name = input_graph.renumber_map.renumbered_src_col_name @@ -203,9 +200,6 @@ def katz_centrality( num_edges = len(ddf) data = get_distributed_data(ddf) - vertex_partition_offsets = get_vertex_partition_offsets(input_graph) - num_verts = vertex_partition_offsets.iloc[-1] - initial_hubs_guess_values = None if nstart: if input_graph.renumbered: @@ -216,7 +210,9 @@ def katz_centrality( nstart = input_graph.add_internal_vertex_id(nstart, 'vertex', cols) initial_hubs_guess_values = nstart[nstart.columns[0]].compute() else: - initial_hubs_guess_values = nstart["values"].compute() + initial_hubs_guess_values = nstart["values"] + if isinstance(nstart, dask_cudf.DataFrame): + initial_hubs_guess_values = initial_hubs_guess_values.compute() cupy_result = [client.submit(call_katz_centrality, Comms.get_session_id(), @@ -226,10 +222,7 @@ def katz_centrality( do_expensive_check, src_col_name, dst_col_name, - num_verts, num_edges, - vertex_partition_offsets, - input_graph.aggregate_segment_offsets, alpha, beta, max_iter, diff --git a/python/cugraph/cugraph/dask/community/triangle_count.py b/python/cugraph/cugraph/dask/community/triangle_count.py index 0f7e141d176..f8da3b84d49 100644 --- a/python/cugraph/cugraph/dask/community/triangle_count.py +++ b/python/cugraph/cugraph/dask/community/triangle_count.py @@ -144,7 +144,8 @@ def triangle_count(input_graph, graph_properties = GraphProperties( is_symmetric=True, is_multigraph=False) store_transposed = False - do_expensive_check = True + # FIXME: should we add this parameter as an option? + do_expensive_check = False num_edges = len(ddf) data = get_distributed_data(ddf) diff --git a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py index f9bceea5b79..8a745b15262 100644 --- a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py @@ -171,7 +171,8 @@ def uniform_neighbor_sample(input_graph, if input_graph.renumbered: start_list = input_graph.lookup_internal_vertex_id( start_list).compute() - do_expensive_check = True + # FIXME: should we add this parameter as an option? + do_expensive_check = False result = [client.submit(call_nbr_sampling, Comms.get_session_id(), diff --git a/python/cugraph/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/cugraph/tests/test_betweenness_centrality.py index d723a4ae934..6f879e40017 100755 --- a/python/cugraph/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/cugraph/tests/test_betweenness_centrality.py @@ -502,6 +502,7 @@ def test_betweenness_centrality_nx( and cugraph_bc[i][0] == networkx_bc[i][0] ): err = err + 1 - print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") + print(f"{cugraph_bc[i][1]} and {networkx_bc[i][1]}") + print(f"{cugraph_bc[i][0]} and {networkx_bc[i][0]}") print("Mismatches:", err) assert err < (0.01 * len(cugraph_bc))