From f87afba62586583497201bb6134b8f768fe79fde Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 9 Feb 2024 10:05:27 -0800 Subject: [PATCH 1/7] Throw a warning message for dtype mismatch --- python/cugraph/cugraph/sampling/node2vec.py | 24 +++++++++++++++++++ .../cugraph/tests/sampling/test_node2vec.py | 23 +++++++++++++++--- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/python/cugraph/cugraph/sampling/node2vec.py b/python/cugraph/cugraph/sampling/node2vec.py index bc9b88250af..7a4d5f5735b 100644 --- a/python/cugraph/cugraph/sampling/node2vec.py +++ b/python/cugraph/cugraph/sampling/node2vec.py @@ -16,10 +16,32 @@ node2vec as pylibcugraph_node2vec, ) from cugraph.utilities import ensure_cugraph_obj_for_nx +import warnings import cudf +# FIXME: Move this function to the utility module so that it can be +# shared by other algos +def ensure_valid_dtype(input_graph, start_vertices): + vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0] + if isinstance(start_vertices, cudf.Series): + start_vertices_dtypes = start_vertices.dtype + else: + start_vertices_dtypes = start_vertices.dtypes[0] + + if start_vertices_dtypes != vertex_dtype: + warning_msg = ( + "Node2vec requires 'start_vertices' to match the graph's " + f"'vertex' type. input graph's vertex type is: {vertex_dtype} and got " + f"'start_vertices' of type: {start_vertices_dtypes}." + ) + warnings.warn(warning_msg, UserWarning) + start_vertices = start_vertices.astype(vertex_dtype) + + return start_vertices + + def node2vec(G, start_vertices, max_depth=1, compress_result=True, p=1.0, q=1.0): """ Computes random walks for each node in 'start_vertices', under the @@ -119,6 +141,8 @@ def node2vec(G, start_vertices, max_depth=1, compress_result=True, p=1.0, q=1.0) ) else: start_vertices = G.lookup_internal_vertex_id(start_vertices) + + start_vertices = ensure_valid_dtype(G, start_vertices) vertex_set, edge_set, sizes = pylibcugraph_node2vec( resource_handle=ResourceHandle(), diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec.py b/python/cugraph/cugraph/tests/sampling/test_node2vec.py index 0bfdd460cae..261542733ab 100644 --- a/python/cugraph/cugraph/tests/sampling/test_node2vec.py +++ b/python/cugraph/cugraph/tests/sampling/test_node2vec.py @@ -27,6 +27,7 @@ # ============================================================================= DIRECTED_GRAPH_OPTIONS = [False, True] COMPRESSED = [False, True] +START_VERTICES_TYPE = ["int32", "int64"] LINE = small_line KARATE = karate @@ -147,13 +148,15 @@ def test_node2vec_line(graph_file, directed): @pytest.mark.sg -@pytest.mark.parametrize(*_get_param_args("graph_file", SMALL_DATASETS)) +@pytest.mark.parametrize(*_get_param_args("graph_file", [SMALL_DATASETS[0]])) @pytest.mark.parametrize(*_get_param_args("directed", DIRECTED_GRAPH_OPTIONS)) @pytest.mark.parametrize(*_get_param_args("compress", COMPRESSED)) -def test_node2vec( +@pytest.mark.parametrize(*_get_param_args("start_vertices_type", START_VERTICES_TYPE)) +def test_node2vec_0( graph_file, directed, compress, + start_vertices_type ): dataset_path = graph_file.get_path() cu_M = utils.read_csv_file(dataset_path) @@ -165,8 +168,22 @@ def test_node2vec( ) num_verts = G.number_of_vertices() k = random.randint(6, 12) - start_vertices = cudf.Series(random.sample(range(num_verts), k), dtype="int32") + start_vertices = cudf.Series( + random.sample(range(num_verts), k), dtype=start_vertices_type) max_depth = 5 + + if start_vertices_type == "int64": + warning_msg = ( + "Node2vec requires 'start_vertices' to match the graph's " + "'vertex' type. input graph's vertex type is: int32 and " + "got 'start_vertices' of type: int64." + ) + with pytest.warns(UserWarning, match=warning_msg): + calc_node2vec( + G, start_vertices, max_depth, compress_result=compress, p=0.8, q=0.5 + ) + + result, seeds = calc_node2vec( G, start_vertices, max_depth, compress_result=compress, p=0.8, q=0.5 ) From e96ed71892440951f4aa0bf79d304690f1b11df4 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 9 Feb 2024 10:07:03 -0800 Subject: [PATCH 2/7] fix style --- python/cugraph/cugraph/sampling/node2vec.py | 2 +- python/cugraph/cugraph/tests/sampling/test_node2vec.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph/cugraph/sampling/node2vec.py b/python/cugraph/cugraph/sampling/node2vec.py index 7a4d5f5735b..ae8566526d8 100644 --- a/python/cugraph/cugraph/sampling/node2vec.py +++ b/python/cugraph/cugraph/sampling/node2vec.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec.py b/python/cugraph/cugraph/tests/sampling/test_node2vec.py index 261542733ab..61ea8d27e4e 100644 --- a/python/cugraph/cugraph/tests/sampling/test_node2vec.py +++ b/python/cugraph/cugraph/tests/sampling/test_node2vec.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at From f561ab858b21578baeb29763140637635716637d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 10 Feb 2024 00:47:47 -0800 Subject: [PATCH 3/7] add type check --- cpp/src/c_api/random_walks.cpp | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp index 1af933c86e6..439cc4bb706 100644 --- a/cpp/src/c_api/random_walks.cpp +++ b/cpp/src/c_api/random_walks.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -475,6 +475,14 @@ cugraph_error_code_t cugraph_node2vec(const cugraph_resource_handle_t* handle, cugraph_random_walk_result_t** result, cugraph_error_t** error) { + CAPI_EXPECTS( + reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast(start_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and start_vertices must match", + *error); + cugraph::c_api::node2vec_functor functor( handle, graph, start_vertices, max_length, compress_results, p, q); @@ -528,6 +536,14 @@ cugraph_error_code_t cugraph_uniform_random_walks( cugraph_random_walk_result_t** result, cugraph_error_t** error) { + CAPI_EXPECTS( + reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast(start_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and start_vertices must match", + *error); + uniform_random_walks_functor functor(handle, graph, start_vertices, max_length); return cugraph::c_api::run_algorithm(graph, functor, result, error); @@ -541,6 +557,14 @@ cugraph_error_code_t cugraph_biased_random_walks( cugraph_random_walk_result_t** result, cugraph_error_t** error) { + CAPI_EXPECTS( + reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast(start_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and start_vertices must match", + *error); + biased_random_walks_functor functor(handle, graph, start_vertices, max_length); return cugraph::c_api::run_algorithm(graph, functor, result, error); @@ -556,6 +580,14 @@ cugraph_error_code_t cugraph_node2vec_random_walks( cugraph_random_walk_result_t** result, cugraph_error_t** error) { + CAPI_EXPECTS( + reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast(start_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and start_vertices must match", + *error); + node2vec_random_walks_functor functor(handle, graph, start_vertices, max_length, p, q); return cugraph::c_api::run_algorithm(graph, functor, result, error); From 3322f6ae90bd70199018a7b77329ddea5fa71be7 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 10 Feb 2024 00:49:37 -0800 Subject: [PATCH 4/7] fix style --- cpp/src/c_api/random_walks.cpp | 56 +++++++++---------- python/cugraph/cugraph/sampling/node2vec.py | 2 +- .../cugraph/tests/sampling/test_node2vec.py | 13 ++--- 3 files changed, 33 insertions(+), 38 deletions(-) diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp index 439cc4bb706..0f0b2e907f8 100644 --- a/cpp/src/c_api/random_walks.cpp +++ b/cpp/src/c_api/random_walks.cpp @@ -475,13 +475,13 @@ cugraph_error_code_t cugraph_node2vec(const cugraph_resource_handle_t* handle, cugraph_random_walk_result_t** result, cugraph_error_t** error) { - CAPI_EXPECTS( - reinterpret_cast(graph)->vertex_type_ == - reinterpret_cast(start_vertices) - ->type_, - CUGRAPH_INVALID_INPUT, - "vertex type of graph and start_vertices must match", - *error); + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + start_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and start_vertices must match", + *error); cugraph::c_api::node2vec_functor functor( handle, graph, start_vertices, max_length, compress_results, p, q); @@ -536,13 +536,13 @@ cugraph_error_code_t cugraph_uniform_random_walks( cugraph_random_walk_result_t** result, cugraph_error_t** error) { - CAPI_EXPECTS( - reinterpret_cast(graph)->vertex_type_ == - reinterpret_cast(start_vertices) - ->type_, - CUGRAPH_INVALID_INPUT, - "vertex type of graph and start_vertices must match", - *error); + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + start_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and start_vertices must match", + *error); uniform_random_walks_functor functor(handle, graph, start_vertices, max_length); @@ -557,13 +557,13 @@ cugraph_error_code_t cugraph_biased_random_walks( cugraph_random_walk_result_t** result, cugraph_error_t** error) { - CAPI_EXPECTS( - reinterpret_cast(graph)->vertex_type_ == - reinterpret_cast(start_vertices) - ->type_, - CUGRAPH_INVALID_INPUT, - "vertex type of graph and start_vertices must match", - *error); + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + start_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and start_vertices must match", + *error); biased_random_walks_functor functor(handle, graph, start_vertices, max_length); @@ -580,13 +580,13 @@ cugraph_error_code_t cugraph_node2vec_random_walks( cugraph_random_walk_result_t** result, cugraph_error_t** error) { - CAPI_EXPECTS( - reinterpret_cast(graph)->vertex_type_ == - reinterpret_cast(start_vertices) - ->type_, - CUGRAPH_INVALID_INPUT, - "vertex type of graph and start_vertices must match", - *error); + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + start_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and start_vertices must match", + *error); node2vec_random_walks_functor functor(handle, graph, start_vertices, max_length, p, q); diff --git a/python/cugraph/cugraph/sampling/node2vec.py b/python/cugraph/cugraph/sampling/node2vec.py index ae8566526d8..4479e8b3d62 100644 --- a/python/cugraph/cugraph/sampling/node2vec.py +++ b/python/cugraph/cugraph/sampling/node2vec.py @@ -141,7 +141,7 @@ def node2vec(G, start_vertices, max_depth=1, compress_result=True, p=1.0, q=1.0) ) else: start_vertices = G.lookup_internal_vertex_id(start_vertices) - + start_vertices = ensure_valid_dtype(G, start_vertices) vertex_set, edge_set, sizes = pylibcugraph_node2vec( diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec.py b/python/cugraph/cugraph/tests/sampling/test_node2vec.py index 61ea8d27e4e..c1d571ddd1f 100644 --- a/python/cugraph/cugraph/tests/sampling/test_node2vec.py +++ b/python/cugraph/cugraph/tests/sampling/test_node2vec.py @@ -148,16 +148,11 @@ def test_node2vec_line(graph_file, directed): @pytest.mark.sg -@pytest.mark.parametrize(*_get_param_args("graph_file", [SMALL_DATASETS[0]])) +@pytest.mark.parametrize(*_get_param_args("graph_file", SMALL_DATASETS) @pytest.mark.parametrize(*_get_param_args("directed", DIRECTED_GRAPH_OPTIONS)) @pytest.mark.parametrize(*_get_param_args("compress", COMPRESSED)) @pytest.mark.parametrize(*_get_param_args("start_vertices_type", START_VERTICES_TYPE)) -def test_node2vec_0( - graph_file, - directed, - compress, - start_vertices_type -): +def test_node2vec(graph_file, directed, compress, start_vertices_type): dataset_path = graph_file.get_path() cu_M = utils.read_csv_file(dataset_path) @@ -169,7 +164,8 @@ def test_node2vec_0( num_verts = G.number_of_vertices() k = random.randint(6, 12) start_vertices = cudf.Series( - random.sample(range(num_verts), k), dtype=start_vertices_type) + random.sample(range(num_verts), k), dtype=start_vertices_type + ) max_depth = 5 if start_vertices_type == "int64": @@ -183,7 +179,6 @@ def test_node2vec_0( G, start_vertices, max_depth, compress_result=compress, p=0.8, q=0.5 ) - result, seeds = calc_node2vec( G, start_vertices, max_depth, compress_result=compress, p=0.8, q=0.5 ) From 5c13578026059cb064638a4ddf777ce343eb7e0b Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 10 Feb 2024 00:50:23 -0800 Subject: [PATCH 5/7] fix typo --- python/cugraph/cugraph/tests/sampling/test_node2vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec.py b/python/cugraph/cugraph/tests/sampling/test_node2vec.py index c1d571ddd1f..1ba1c36639d 100644 --- a/python/cugraph/cugraph/tests/sampling/test_node2vec.py +++ b/python/cugraph/cugraph/tests/sampling/test_node2vec.py @@ -148,7 +148,7 @@ def test_node2vec_line(graph_file, directed): @pytest.mark.sg -@pytest.mark.parametrize(*_get_param_args("graph_file", SMALL_DATASETS) +@pytest.mark.parametrize(*_get_param_args("graph_file", SMALL_DATASETS)) @pytest.mark.parametrize(*_get_param_args("directed", DIRECTED_GRAPH_OPTIONS)) @pytest.mark.parametrize(*_get_param_args("compress", COMPRESSED)) @pytest.mark.parametrize(*_get_param_args("start_vertices_type", START_VERTICES_TYPE)) From 6a44ca682c3acaa5bac85635bfd6fed482a5f55a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 12 Mar 2024 17:23:42 -0700 Subject: [PATCH 6/7] remove deprecated call and add fixme --- python/cugraph/cugraph/sampling/node2vec.py | 10 +++++----- python/cugraph/cugraph/tests/sampling/test_node2vec.py | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/python/cugraph/cugraph/sampling/node2vec.py b/python/cugraph/cugraph/sampling/node2vec.py index 4479e8b3d62..7bdac2a2abe 100644 --- a/python/cugraph/cugraph/sampling/node2vec.py +++ b/python/cugraph/cugraph/sampling/node2vec.py @@ -24,17 +24,17 @@ # FIXME: Move this function to the utility module so that it can be # shared by other algos def ensure_valid_dtype(input_graph, start_vertices): - vertex_dtype = input_graph.edgelist.edgelist_df.dtypes[0] + vertex_dtype = input_graph.edgelist.edgelist_df.iloc[0] if isinstance(start_vertices, cudf.Series): - start_vertices_dtypes = start_vertices.dtype + start_vertices_dtype = start_vertices.dtype else: - start_vertices_dtypes = start_vertices.dtypes[0] + start_vertices_dtype = start_vertices.iloc[0] - if start_vertices_dtypes != vertex_dtype: + if start_vertices_dtype != vertex_dtype: warning_msg = ( "Node2vec requires 'start_vertices' to match the graph's " f"'vertex' type. input graph's vertex type is: {vertex_dtype} and got " - f"'start_vertices' of type: {start_vertices_dtypes}." + f"'start_vertices' of type: {start_vertices_dtype}." ) warnings.warn(warning_msg, UserWarning) start_vertices = start_vertices.astype(vertex_dtype) diff --git a/python/cugraph/cugraph/tests/sampling/test_node2vec.py b/python/cugraph/cugraph/tests/sampling/test_node2vec.py index 1ba1c36639d..00c32705338 100644 --- a/python/cugraph/cugraph/tests/sampling/test_node2vec.py +++ b/python/cugraph/cugraph/tests/sampling/test_node2vec.py @@ -163,6 +163,7 @@ def test_node2vec(graph_file, directed, compress, start_vertices_type): ) num_verts = G.number_of_vertices() k = random.randint(6, 12) + # FIXME: Random sample can make it hard to debug start_vertices = cudf.Series( random.sample(range(num_verts), k), dtype=start_vertices_type ) From 6bd59d1e5e494f63f1cf729c8bbd30007a91eef9 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 12 Mar 2024 17:28:13 -0700 Subject: [PATCH 7/7] fix typo --- python/cugraph/cugraph/sampling/node2vec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cugraph/cugraph/sampling/node2vec.py b/python/cugraph/cugraph/sampling/node2vec.py index 7bdac2a2abe..71fc2969f86 100644 --- a/python/cugraph/cugraph/sampling/node2vec.py +++ b/python/cugraph/cugraph/sampling/node2vec.py @@ -24,11 +24,11 @@ # FIXME: Move this function to the utility module so that it can be # shared by other algos def ensure_valid_dtype(input_graph, start_vertices): - vertex_dtype = input_graph.edgelist.edgelist_df.iloc[0] + vertex_dtype = input_graph.edgelist.edgelist_df.dtypes.iloc[0] if isinstance(start_vertices, cudf.Series): start_vertices_dtype = start_vertices.dtype else: - start_vertices_dtype = start_vertices.iloc[0] + start_vertices_dtype = start_vertices.dtypes.iloc[0] if start_vertices_dtype != vertex_dtype: warning_msg = (