Skip to content

Commit

Permalink
add debug print for betweenness centrality, fix typo (rapidsai#2369)
Browse files Browse the repository at this point in the history
This PR 
1. adds/updates debug print for betweenness centrality
2. remove legacy path for `katz` and `eigenvector centrality`

Authors:
  - Joseph Nke (https://github.com/jnke2016)

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)
  - Brad Rees (https://github.com/BradReesWork)

URL: rapidsai#2369
  • Loading branch information
jnke2016 authored Jul 5, 2022
1 parent d4e8e2d commit 3385189
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 36 deletions.
2 changes: 1 addition & 1 deletion benchmarks/python_e2e/cugraph_dask_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def bfs(G, start):


def sssp(G, start):
return cugraph.dask.sssp(G, source=start, check_start=False)
return cugraph.dask.sssp(G, source=start, check_source=False)


def wcc(G):
Expand Down
25 changes: 6 additions & 19 deletions python/cugraph/cugraph/dask/centrality/eigenvector_centrality.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
#

from dask.distributed import wait, default_client
from cugraph.dask.common.input_utils import (get_distributed_data,
get_vertex_partition_offsets)
from cugraph.dask.common.input_utils import get_distributed_data
from pylibcugraph import (ResourceHandle,
GraphProperties,
MGGraph,
Expand All @@ -34,10 +33,7 @@ def call_eigenvector_centrality(sID,
do_expensive_check,
src_col_name,
dst_col_name,
num_verts,
num_edges,
vertex_partition_offsets,
aggregate_segment_offsets,
max_iter,
tol,
normalized):
Expand Down Expand Up @@ -140,33 +136,25 @@ def eigenvector_centrality(
"""
client = default_client()
# Calling renumbering results in data that is sorted by degree

input_graph.compute_renumber_edge_list(
transposed=False, legacy_renum_only=True)

graph_properties = GraphProperties(
is_multigraph=False)

store_transposed = False
# FIXME: should we add this parameter as an option?
do_expensive_check = False

src_col_name = input_graph.renumber_map.renumbered_src_col_name
dst_col_name = input_graph.renumber_map.renumbered_dst_col_name

# FIXME Move this call to the function creating a directed
# graph from a dask dataframe because duplicated edges need
# to be dropped
ddf = input_graph.edgelist.edgelist_df
ddf = ddf.map_partitions(
lambda df: df.drop_duplicates(subset=[src_col_name, dst_col_name]))

num_edges = len(ddf)
data = get_distributed_data(ddf)

input_graph.compute_renumber_edge_list(transposed=True)
vertex_partition_offsets = get_vertex_partition_offsets(input_graph)
num_verts = vertex_partition_offsets.iloc[-1]

cupy_result = [client.submit(call_eigenvector_centrality,
Comms.get_session_id(),
wf[1],
Expand All @@ -175,10 +163,7 @@ def eigenvector_centrality(
do_expensive_check,
src_col_name,
dst_col_name,
num_verts,
num_edges,
vertex_partition_offsets,
input_graph.aggregate_segment_offsets,
max_iter,
tol,
normalized,
Expand All @@ -197,4 +182,6 @@ def eigenvector_centrality(

ddf = dask_cudf.from_delayed(cudf_result)
if input_graph.renumbered:
return input_graph.unrenumber(ddf, 'vertex')
ddf = input_graph.unrenumber(ddf, "vertex")

return ddf
19 changes: 6 additions & 13 deletions python/cugraph/cugraph/dask/centrality/katz_centrality.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
#

from dask.distributed import wait, default_client
from cugraph.dask.common.input_utils import (get_distributed_data,
get_vertex_partition_offsets)
from cugraph.dask.common.input_utils import get_distributed_data
from pylibcugraph import (ResourceHandle,
GraphProperties,
MGGraph,
Expand All @@ -34,10 +33,7 @@ def call_katz_centrality(sID,
do_expensive_check,
src_col_name,
dst_col_name,
num_verts,
num_edges,
vertex_partition_offsets,
aggregate_segment_offsets,
alpha,
beta,
max_iter,
Expand Down Expand Up @@ -187,12 +183,13 @@ def katz_centrality(
# compute_renumber_edge_list will only be used for multicolumn and
# string vertices since the renumbering will be done in pylibcugraph
input_graph.compute_renumber_edge_list(transposed=True,
legacy_renum_only=False)
legacy_renum_only=True)

graph_properties = GraphProperties(
is_multigraph=False)

store_transposed = False
# FIXME: should we add this parameter as an option?
do_expensive_check = False

src_col_name = input_graph.renumber_map.renumbered_src_col_name
Expand All @@ -203,9 +200,6 @@ def katz_centrality(
num_edges = len(ddf)
data = get_distributed_data(ddf)

vertex_partition_offsets = get_vertex_partition_offsets(input_graph)
num_verts = vertex_partition_offsets.iloc[-1]

initial_hubs_guess_values = None
if nstart:
if input_graph.renumbered:
Expand All @@ -216,7 +210,9 @@ def katz_centrality(
nstart = input_graph.add_internal_vertex_id(nstart, 'vertex', cols)
initial_hubs_guess_values = nstart[nstart.columns[0]].compute()
else:
initial_hubs_guess_values = nstart["values"].compute()
initial_hubs_guess_values = nstart["values"]
if isinstance(nstart, dask_cudf.DataFrame):
initial_hubs_guess_values = initial_hubs_guess_values.compute()

cupy_result = [client.submit(call_katz_centrality,
Comms.get_session_id(),
Expand All @@ -226,10 +222,7 @@ def katz_centrality(
do_expensive_check,
src_col_name,
dst_col_name,
num_verts,
num_edges,
vertex_partition_offsets,
input_graph.aggregate_segment_offsets,
alpha,
beta,
max_iter,
Expand Down
3 changes: 2 additions & 1 deletion python/cugraph/cugraph/dask/community/triangle_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ def triangle_count(input_graph,
graph_properties = GraphProperties(
is_symmetric=True, is_multigraph=False)
store_transposed = False
do_expensive_check = True
# FIXME: should we add this parameter as an option?
do_expensive_check = False

num_edges = len(ddf)
data = get_distributed_data(ddf)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ def uniform_neighbor_sample(input_graph,
if input_graph.renumbered:
start_list = input_graph.lookup_internal_vertex_id(
start_list).compute()
do_expensive_check = True
# FIXME: should we add this parameter as an option?
do_expensive_check = False

result = [client.submit(call_nbr_sampling,
Comms.get_session_id(),
Expand Down
3 changes: 2 additions & 1 deletion python/cugraph/cugraph/tests/test_betweenness_centrality.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ def test_betweenness_centrality_nx(
and cugraph_bc[i][0] == networkx_bc[i][0]
):
err = err + 1
print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}")
print(f"{cugraph_bc[i][1]} and {networkx_bc[i][1]}")
print(f"{cugraph_bc[i][0]} and {networkx_bc[i][0]}")
print("Mismatches:", err)
assert err < (0.01 * len(cugraph_bc))

0 comments on commit 3385189

Please sign in to comment.