Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enforce matching type #4161

Merged
32 changes: 32 additions & 0 deletions cpp/src/c_api/random_walks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,14 @@ cugraph_error_code_t cugraph_node2vec(const cugraph_resource_handle_t* handle,
cugraph_random_walk_result_t** result,
cugraph_error_t** error)
{
CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
start_vertices)
->type_,
CUGRAPH_INVALID_INPUT,
"vertex type of graph and start_vertices must match",
*error);

cugraph::c_api::node2vec_functor functor(
handle, graph, start_vertices, max_length, compress_results, p, q);

Expand Down Expand Up @@ -528,6 +536,14 @@ cugraph_error_code_t cugraph_uniform_random_walks(
cugraph_random_walk_result_t** result,
cugraph_error_t** error)
{
CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
start_vertices)
->type_,
CUGRAPH_INVALID_INPUT,
"vertex type of graph and start_vertices must match",
*error);

uniform_random_walks_functor functor(handle, graph, start_vertices, max_length);

return cugraph::c_api::run_algorithm(graph, functor, result, error);
Expand All @@ -541,6 +557,14 @@ cugraph_error_code_t cugraph_biased_random_walks(
cugraph_random_walk_result_t** result,
cugraph_error_t** error)
{
CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
start_vertices)
->type_,
CUGRAPH_INVALID_INPUT,
"vertex type of graph and start_vertices must match",
*error);

biased_random_walks_functor functor(handle, graph, start_vertices, max_length);

return cugraph::c_api::run_algorithm(graph, functor, result, error);
Expand All @@ -556,6 +580,14 @@ cugraph_error_code_t cugraph_node2vec_random_walks(
cugraph_random_walk_result_t** result,
cugraph_error_t** error)
{
CAPI_EXPECTS(reinterpret_cast<cugraph::c_api::cugraph_graph_t*>(graph)->vertex_type_ ==
reinterpret_cast<cugraph::c_api::cugraph_type_erased_device_array_view_t const*>(
start_vertices)
->type_,
CUGRAPH_INVALID_INPUT,
"vertex type of graph and start_vertices must match",
*error);

node2vec_random_walks_functor functor(handle, graph, start_vertices, max_length, p, q);

return cugraph::c_api::run_algorithm(graph, functor, result, error);
Expand Down
26 changes: 25 additions & 1 deletion python/cugraph/cugraph/sampling/node2vec.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -16,10 +16,32 @@
node2vec as pylibcugraph_node2vec,
)
from cugraph.utilities import ensure_cugraph_obj_for_nx
import warnings

import cudf


# FIXME: Move this function to the utility module so that it can be
# shared by other algos
def ensure_valid_dtype(input_graph, start_vertices):
vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0]
if isinstance(start_vertices, cudf.Series):
start_vertices_dtypes = start_vertices.dtype
else:
jnke2016 marked this conversation as resolved.
Show resolved Hide resolved
start_vertices_dtypes = start_vertices.dtypes[0]

jnke2016 marked this conversation as resolved.
Show resolved Hide resolved
if start_vertices_dtypes != vertex_dtype:
warning_msg = (
"Node2vec requires 'start_vertices' to match the graph's "
f"'vertex' type. input graph's vertex type is: {vertex_dtype} and got "
f"'start_vertices' of type: {start_vertices_dtypes}."
)
warnings.warn(warning_msg, UserWarning)
start_vertices = start_vertices.astype(vertex_dtype)

return start_vertices


def node2vec(G, start_vertices, max_depth=1, compress_result=True, p=1.0, q=1.0):
"""
Computes random walks for each node in 'start_vertices', under the
Expand Down Expand Up @@ -120,6 +142,8 @@ def node2vec(G, start_vertices, max_depth=1, compress_result=True, p=1.0, q=1.0)
else:
start_vertices = G.lookup_internal_vertex_id(start_vertices)

start_vertices = ensure_valid_dtype(G, start_vertices)

vertex_set, edge_set, sizes = pylibcugraph_node2vec(
resource_handle=ResourceHandle(),
graph=G._plc_graph,
Expand Down
26 changes: 19 additions & 7 deletions python/cugraph/cugraph/tests/sampling/test_node2vec.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -27,6 +27,7 @@
# =============================================================================
DIRECTED_GRAPH_OPTIONS = [False, True]
COMPRESSED = [False, True]
START_VERTICES_TYPE = ["int32", "int64"]
LINE = small_line
KARATE = karate

Expand Down Expand Up @@ -150,11 +151,8 @@ def test_node2vec_line(graph_file, directed):
@pytest.mark.parametrize(*_get_param_args("graph_file", SMALL_DATASETS))
@pytest.mark.parametrize(*_get_param_args("directed", DIRECTED_GRAPH_OPTIONS))
@pytest.mark.parametrize(*_get_param_args("compress", COMPRESSED))
def test_node2vec(
graph_file,
directed,
compress,
):
@pytest.mark.parametrize(*_get_param_args("start_vertices_type", START_VERTICES_TYPE))
def test_node2vec(graph_file, directed, compress, start_vertices_type):
dataset_path = graph_file.get_path()
cu_M = utils.read_csv_file(dataset_path)

Expand All @@ -165,8 +163,22 @@ def test_node2vec(
)
num_verts = G.number_of_vertices()
k = random.randint(6, 12)
start_vertices = cudf.Series(random.sample(range(num_verts), k), dtype="int32")
start_vertices = cudf.Series(
random.sample(range(num_verts), k), dtype=start_vertices_type
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this is a FIXME, but if we're picking samples at random it could make it hard to repro a test failure. We should somehow make it so tests with specific samples can be re-run in the event that the test fails.

Copy link
Contributor Author

@jnke2016 jnke2016 Mar 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. The samples are random so that the test doesn't cover a fixed set of start vertices but I do agree it makes it hard to debug. Perhaps we can add a print those start_vertices for debugging purposes? I added a FIXME regarding this.

max_depth = 5

if start_vertices_type == "int64":
warning_msg = (
"Node2vec requires 'start_vertices' to match the graph's "
"'vertex' type. input graph's vertex type is: int32 and "
"got 'start_vertices' of type: int64."
)
naimnv marked this conversation as resolved.
Show resolved Hide resolved
with pytest.warns(UserWarning, match=warning_msg):
calc_node2vec(
G, start_vertices, max_depth, compress_result=compress, p=0.8, q=0.5
)

result, seeds = calc_node2vec(
G, start_vertices, max_depth, compress_result=compress, p=0.8, q=0.5
)
Expand Down
Loading