From 73e22bee8a4ab05135634ad68cb34b46c26381c1 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Fri, 12 Jul 2024 10:02:29 -0700 Subject: [PATCH 1/5] Define C API for biased sampling --- cpp/include/cugraph_c/properties.h | 147 ++++++++++++++++++++ cpp/include/cugraph_c/sampling_algorithms.h | 60 ++++++++ 2 files changed, 207 insertions(+) create mode 100644 cpp/include/cugraph_c/properties.h diff --git a/cpp/include/cugraph_c/properties.h b/cpp/include/cugraph_c/properties.h new file mode 100644 index 00000000000..e7ed1557113 --- /dev/null +++ b/cpp/include/cugraph_c/properties.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +// +// Speculative description of handling generic vertex and edge properties. +// +// If we have vertex properties and edge properties that we want to apply to an existing graph +// (after it was created) we could use these methods to construct C++ objects to represent these +// properties. +// +// These assume the use of external vertex ids and external edge ids as the mechanism for +// correlating a property to a particular vertex or edge. +// + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int32_t align_; +} cugraph_vertex_property_t; + +typedef struct { + int32_t align_; +} cugraph_edge_property_t; + +typedef struct { + int32_t align_; +} cugraph_vertex_property_view_t; + +typedef struct { + int32_t align_; +} cugraph_edge_property_view_t; + +#if 0 +// Blocking out definition of these since this is speculative work. + +/** + * @brief Create a vertex property + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph. + * @param [in] vertex_ids Device array of vertex ids + * @param [in] property Device array of vertex property + * @param [out] result Pointer to the location to store the pointer to the vertex property object + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_vertex_property_create( + const cugraph_resource_handle_t* handle, + const cugraph_graph_t * graph, + const cugraph_type_erased_device_array_t* vertex_ids, + const cugraph_type_erased_device_array_t* properties, + cugraph_vertex_property_t** result, + cugraph_error_t** error); + +/** + * @brief Create a edge property + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph. + * @param [in] lookup_container Lookup map + * @param [in] edge_ids Device array of edge ids + * @param [in] property Device array of edge property + * @param [out] result Pointer to the location to store the pointer to the edge property object + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_edge_property_create( + const cugraph_resource_handle_t* handle, + const cugraph_graph_t * graph, + const cugraph_lookup_container_t* lookup_container, + const cugraph_type_erased_device_array_t* edge_ids, + const cugraph_type_erased_device_array_t* properties, + cugraph_edge_property_t** result, + cugraph_error_t** error); + +/** + * @brief Create a vertex_property_view from a vertex property + * + * @param [in] vertex_property Pointer to the vertex property object + * @return Pointer to the view of the host array + */ +cugraph_vertex_property_view_t* cugraph_vertex_property_view( + cugraph_vertex_property_view* vertex_property); + +/** + * @brief Create a edge_property_view from a edge property + * + * @param [in] edge_property Pointer to the edge property object + * @return Pointer to the view of the host array + */ +cugraph_edge_property_view_t* cugraph_edge_property_view( + cugraph_edge_property_view* edge_property); + +/** + * @brief Destroy a vertex_property object + * + * @param [in] p Pointer to the vertex_property object + */ +void cugraph_vertex_property_free(cugraph_vertex_property_t* p); + +/** + * @brief Destroy a edge_property object + * + * @param [in] p Pointer to the edge_property object + */ +void cugraph_edge_property_free(cugraph_edge_property_t* p); + +/** + * @brief Destroy a vertex_property_view object + * + * @param [in] p Pointer to the vertex_property_view object + */ +void cugraph_vertex_property__viewfree(cugraph_vertex_property__viewt* p); + +/** + * @brief Destroy a edge_property_view object + * + * @param [in] p Pointer to the edge_property_view object + */ +void cugraph_edge_property_view_free(cugraph_edge_property_view_t* p); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index a7490ad2c63..1a3d20b9339 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -373,6 +374,65 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( cugraph_sample_result_t** result, cugraph_error_t** error); +/** + * @brief Biased Neighborhood Sampling + * + * Returns a sample of the neighborhood around specified start vertices. Optionally, each + * start vertex can be associated with a label, allowing the caller to specify multiple batches + * of sampling requests in the same function call - which should improve GPU utilization. + * + * If label is NULL then all start vertices will be considered part of the same batch and the + * return value will not have a label column. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage + * needs to be transposed + * @param [in] edge_biases Device array of edge biases to use for sampling. If NULL + * use the edge weight as the bias. NOTE: This is a placeholder for future capability, the + * value for edge_biases should always be set to NULL at the moment. + * @param [in] start_vertices Device array of start vertices for the sampling + * @param [in] start_vertex_labels Device array of start vertex labels for the sampling. The + * labels associated with each start vertex will be included in the output associated with results + * that were derived from that start vertex. We only support label of type INT32. If label is + * NULL, the return data will not be labeled. + * @param [in] label_list Device array of the labels included in @p start_vertex_labels. If + * @p label_to_comm_rank is not specified this parameter is ignored. If specified, label_list + * must be sorted in ascending order. + * @param [in] label_to_comm_rank Device array identifying which comm rank the output for a + * particular label should be shuffled in the output. If not specifed the data is not organized in + * output. If specified then the all data from @p label_list[i] will be shuffled to rank @p. This + * cannot be specified unless @p start_vertex_labels is also specified + * label_to_comm_rank[i]. If not specified then the output data will not be shuffled between ranks. + * @param [in] label_offsets Device array of the offsets for each label in the seed list. This + * parameter is only used with the retain_seeds option. + * @param [in] fanout Host array defining the fan out at each step in the sampling algorithm. + * We only support fanout values of type INT32 + * @param [in,out] rng_state State of the random number generator, updated with each call + * @param [in] sampling_options + * Opaque pointer defining the sampling options. + * @param [in] do_expensive_check + * A flag to run expensive checks for input arguments (if set to true) + * @param [out] result Output from the uniform_neighbor_sample call + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_biased_neighbor_sample( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_edge_property_view_t* edge_biases, + const cugraph_type_erased_device_array_view_t* start_vertices, + const cugraph_type_erased_device_array_view_t* start_vertex_labels, + const cugraph_type_erased_device_array_view_t* label_list, + const cugraph_type_erased_device_array_view_t* label_to_comm_rank, + const cugraph_type_erased_device_array_view_t* label_offsets, + const cugraph_type_erased_host_array_view_t* fan_out, + cugraph_rng_state_t* rng_state, + const cugraph_sampling_options_t* options, + bool_t do_expensive_check, + cugraph_sample_result_t** result, + cugraph_error_t** error); + /** * @deprecated This call should be replaced with cugraph_sample_result_get_majors * @brief Get the source vertices from the sampling algorithm result From 75a9fda10e26297d29cd12a5e004a83a0facf2ac Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Tue, 16 Jul 2024 12:40:40 -0700 Subject: [PATCH 2/5] Add implementation and tests for C API (SG and MG) for biased sampling --- cpp/include/cugraph_c/properties.h | 5 +- cpp/src/c_api/uniform_neighbor_sampling.cpp | 429 +++++ cpp/tests/CMakeLists.txt | 2 + cpp/tests/c_api/biased_neighbor_sample_test.c | 973 ++++++++++++ .../c_api/mg_biased_neighbor_sample_test.c | 1398 +++++++++++++++++ 5 files changed, 2804 insertions(+), 3 deletions(-) create mode 100644 cpp/tests/c_api/biased_neighbor_sample_test.c create mode 100644 cpp/tests/c_api/mg_biased_neighbor_sample_test.c diff --git a/cpp/include/cugraph_c/properties.h b/cpp/include/cugraph_c/properties.h index e7ed1557113..e4f2a4b20a7 100644 --- a/cpp/include/cugraph_c/properties.h +++ b/cpp/include/cugraph_c/properties.h @@ -131,7 +131,7 @@ void cugraph_edge_property_free(cugraph_edge_property_t* p); * * @param [in] p Pointer to the vertex_property_view object */ -void cugraph_vertex_property__viewfree(cugraph_vertex_property__viewt* p); +void cugraph_vertex_property_view_free(cugraph_vertex_property__viewt* p); /** * @brief Destroy a edge_property_view object @@ -139,9 +139,8 @@ void cugraph_vertex_property__viewfree(cugraph_vertex_property__viewt* p); * @param [in] p Pointer to the edge_property_view object */ void cugraph_edge_property_view_free(cugraph_edge_property_view_t* p); +#endif #ifdef __cplusplus } #endif - -#endif diff --git a/cpp/src/c_api/uniform_neighbor_sampling.cpp b/cpp/src/c_api/uniform_neighbor_sampling.cpp index 45609fc0e01..69306806030 100644 --- a/cpp/src/c_api/uniform_neighbor_sampling.cpp +++ b/cpp/src/c_api/uniform_neighbor_sampling.cpp @@ -16,6 +16,7 @@ #include "c_api/abstract_functor.hpp" #include "c_api/graph.hpp" +#include "c_api/properties.hpp" #include "c_api/random.hpp" #include "c_api/resource_handle.hpp" #include "c_api/utils.hpp" @@ -402,6 +403,356 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct } }; +struct biased_neighbor_sampling_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_; + cugraph::c_api::cugraph_graph_t* graph_{nullptr}; + cugraph::c_api::cugraph_edge_property_view_t const* edge_biases_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertex_labels_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_list_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_to_comm_rank_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* label_offsets_{nullptr}; + cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; + cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr}; + cugraph::c_api::cugraph_sampling_options_t options_{}; + bool do_expensive_check_{false}; + cugraph::c_api::cugraph_sample_result_t* result_{nullptr}; + + biased_neighbor_sampling_functor( + cugraph_resource_handle_t const* handle, + cugraph_graph_t* graph, + cugraph_edge_property_view_t const* edge_biases, + cugraph_type_erased_device_array_view_t const* start_vertices, + cugraph_type_erased_device_array_view_t const* start_vertex_labels, + cugraph_type_erased_device_array_view_t const* label_list, + cugraph_type_erased_device_array_view_t const* label_to_comm_rank, + cugraph_type_erased_device_array_view_t const* label_offsets, + cugraph_type_erased_host_array_view_t const* fan_out, + cugraph_rng_state_t* rng_state, + cugraph::c_api::cugraph_sampling_options_t options, + bool do_expensive_check) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + graph_(reinterpret_cast(graph)), + edge_biases_( + reinterpret_cast(edge_biases)), + start_vertices_( + reinterpret_cast( + start_vertices)), + start_vertex_labels_( + reinterpret_cast( + start_vertex_labels)), + label_list_(reinterpret_cast( + label_list)), + label_to_comm_rank_( + reinterpret_cast( + label_to_comm_rank)), + label_offsets_( + reinterpret_cast( + label_offsets)), + fan_out_( + reinterpret_cast(fan_out)), + rng_state_(reinterpret_cast(rng_state)), + options_(options), + do_expensive_check_(do_expensive_check) + { + } + + template + void operator()() + { + using label_t = int32_t; + + // FIXME: Think about how to handle SG vice MG + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + // uniform_nbr_sample expects store_transposed == false + if constexpr (store_transposed) { + error_code_ = cugraph::c_api:: + transpose_storage( + handle_, graph_, error_.get()); + if (error_code_ != CUGRAPH_SUCCESS) return; + } + + auto graph = + reinterpret_cast*>(graph_->graph_); + + auto graph_view = graph->view(); + + auto edge_weights = reinterpret_cast< + cugraph::edge_property_t, + weight_t>*>(graph_->edge_weights_); + + auto edge_ids = reinterpret_cast< + cugraph::edge_property_t, + edge_t>*>(graph_->edge_ids_); + + auto edge_types = reinterpret_cast< + cugraph::edge_property_t, + edge_type_t>*>(graph_->edge_types_); + + auto number_map = reinterpret_cast*>(graph_->number_map_); + + auto edge_biases = + edge_biases_ ? reinterpret_cast*>( + edge_biases_->edge_property_) + : nullptr; + + rmm::device_uvector start_vertices(start_vertices_->size_, handle_.get_stream()); + raft::copy(start_vertices.data(), + start_vertices_->as_type(), + start_vertices.size(), + handle_.get_stream()); + + std::optional> start_vertex_labels{std::nullopt}; + + if (start_vertex_labels_ != nullptr) { + start_vertex_labels = + rmm::device_uvector{start_vertex_labels_->size_, handle_.get_stream()}; + raft::copy(start_vertex_labels->data(), + start_vertex_labels_->as_type(), + start_vertex_labels_->size_, + handle_.get_stream()); + } + + if constexpr (multi_gpu) { + if (start_vertex_labels) { + std::tie(start_vertices, *start_vertex_labels) = + cugraph::detail::shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle_, std::move(start_vertices), std::move(*start_vertex_labels)); + } else { + start_vertices = + cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( + handle_, std::move(start_vertices)); + } + } + + // + // Need to renumber start_vertices + // + cugraph::renumber_local_ext_vertices( + handle_, + start_vertices.data(), + start_vertices.size(), + number_map->data(), + graph_view.local_vertex_partition_range_first(), + graph_view.local_vertex_partition_range_last(), + do_expensive_check_); + + auto&& [src, dst, wgt, edge_id, edge_type, hop, edge_label, offsets] = + cugraph::biased_neighbor_sample( + handle_, + graph_view, + (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, + (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, + (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, + (edge_biases != nullptr) ? *edge_biases : edge_weights->view(), + raft::device_span{start_vertices.data(), start_vertices.size()}, + (start_vertex_labels_ != nullptr) + ? std::make_optional>(start_vertex_labels->data(), + start_vertex_labels->size()) + : std::nullopt, + (label_list_ != nullptr) + ? std::make_optional(std::make_tuple( + raft::device_span{label_list_->as_type(), + label_list_->size_}, + raft::device_span{label_to_comm_rank_->as_type(), + label_to_comm_rank_->size_})) + : std::nullopt, + raft::host_span(fan_out_->as_type(), fan_out_->size_), + rng_state_->rng_state_, + options_.return_hops_, + options_.with_replacement_, + options_.prior_sources_behavior_, + options_.dedupe_sources_, + do_expensive_check_); + + std::vector vertex_partition_lasts = graph_view.vertex_partition_range_lasts(); + + cugraph::unrenumber_int_vertices(handle_, + src.data(), + src.size(), + number_map->data(), + vertex_partition_lasts, + do_expensive_check_); + + cugraph::unrenumber_int_vertices(handle_, + dst.data(), + dst.size(), + number_map->data(), + vertex_partition_lasts, + do_expensive_check_); + + std::optional> majors{std::nullopt}; + rmm::device_uvector minors(0, handle_.get_stream()); + std::optional> major_offsets{std::nullopt}; + + std::optional> label_hop_offsets{std::nullopt}; + + std::optional> renumber_map{std::nullopt}; + std::optional> renumber_map_offsets{std::nullopt}; + + bool src_is_major = (options_.compression_type_ == cugraph_compression_type_t::CSR) || + (options_.compression_type_ == cugraph_compression_type_t::DCSR) || + (options_.compression_type_ == cugraph_compression_type_t::COO); + + if (options_.renumber_results_) { + if (options_.compression_type_ == cugraph_compression_type_t::COO) { + // COO + + rmm::device_uvector output_majors(0, handle_.get_stream()); + rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + std::tie(output_majors, + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = + cugraph::renumber_and_sort_sampled_edgelist( + handle_, + std::move(src), + std::move(dst), + std::move(wgt), + std::move(edge_id), + std::move(edge_type), + std::move(hop), + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertices_->as_type(), start_vertices_->size_}) + : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + label_offsets_->as_type(), label_offsets_->size_}) + : std::nullopt, + offsets ? std::make_optional( + raft::device_span{offsets->data(), offsets->size()}) + : std::nullopt, + edge_label ? edge_label->size() : size_t{1}, + hop ? fan_out_->size_ : size_t{1}, + src_is_major, + do_expensive_check_); + + majors.emplace(std::move(output_majors)); + renumber_map.emplace(std::move(output_renumber_map)); + } else { + // (D)CSC, (D)CSR + + bool doubly_compress = (options_.compression_type_ == cugraph_compression_type_t::DCSR) || + (options_.compression_type_ == cugraph_compression_type_t::DCSC); + + rmm::device_uvector output_major_offsets(0, handle_.get_stream()); + rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + std::tie(majors, + output_major_offsets, + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = + cugraph::renumber_and_compress_sampled_edgelist( + handle_, + std::move(src), + std::move(dst), + std::move(wgt), + std::move(edge_id), + std::move(edge_type), + std::move(hop), + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertices_->as_type(), start_vertices_->size_}) + : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + label_offsets_->as_type(), label_offsets_->size_}) + : std::nullopt, + offsets ? std::make_optional( + raft::device_span{offsets->data(), offsets->size()}) + : std::nullopt, + edge_label ? edge_label->size() : size_t{1}, + hop ? fan_out_->size_ : size_t{1}, + src_is_major, + options_.compress_per_hop_, + doubly_compress, + do_expensive_check_); + + renumber_map.emplace(std::move(output_renumber_map)); + major_offsets.emplace(std::move(output_major_offsets)); + } + + // These are now represented by label_hop_offsets + hop.reset(); + offsets.reset(); + } else { + if (options_.compression_type_ != cugraph_compression_type_t::COO) { + CUGRAPH_FAIL("Can only use COO format if not renumbering"); + } + + std::tie(src, dst, wgt, edge_id, edge_type, label_hop_offsets) = + cugraph::sort_sampled_edgelist(handle_, + std::move(src), + std::move(dst), + std::move(wgt), + std::move(edge_id), + std::move(edge_type), + std::move(hop), + offsets + ? std::make_optional(raft::device_span{ + offsets->data(), offsets->size()}) + : std::nullopt, + edge_label ? edge_label->size() : size_t{1}, + hop ? fan_out_->size_ : size_t{1}, + src_is_major, + do_expensive_check_); + + majors.emplace(std::move(src)); + minors = std::move(dst); + + hop.reset(); + offsets.reset(); + } + + result_ = new cugraph::c_api::cugraph_sample_result_t{ + (major_offsets) + ? new cugraph::c_api::cugraph_type_erased_device_array_t(*major_offsets, SIZE_T) + : nullptr, + (majors) + ? new cugraph::c_api::cugraph_type_erased_device_array_t(*majors, graph_->vertex_type_) + : nullptr, + new cugraph::c_api::cugraph_type_erased_device_array_t(minors, graph_->vertex_type_), + (edge_id) + ? new cugraph::c_api::cugraph_type_erased_device_array_t(*edge_id, graph_->edge_type_) + : nullptr, + (edge_type) ? new cugraph::c_api::cugraph_type_erased_device_array_t( + *edge_type, graph_->edge_type_id_type_) + : nullptr, + (wgt) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*wgt, graph_->weight_type_) + : nullptr, + (hop) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*hop, INT32) + : nullptr, // FIXME get rid of this + (label_hop_offsets) + ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) + : nullptr, + (edge_label) + ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) + : nullptr, + (renumber_map) ? new cugraph::c_api::cugraph_type_erased_device_array_t( + renumber_map.value(), graph_->vertex_type_) + : nullptr, + (renumber_map_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t( + renumber_map_offsets.value(), SIZE_T) + : nullptr}; + } + } +}; + } // namespace extern "C" cugraph_error_code_t cugraph_sampling_options_create( @@ -954,3 +1305,81 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( do_expensive_check}; return cugraph::c_api::run_algorithm(graph, functor, result, error); } + +cugraph_error_code_t cugraph_biased_neighbor_sample( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_edge_property_view_t* edge_biases, + const cugraph_type_erased_device_array_view_t* start_vertices, + const cugraph_type_erased_device_array_view_t* start_vertex_labels, + const cugraph_type_erased_device_array_view_t* label_list, + const cugraph_type_erased_device_array_view_t* label_to_comm_rank, + const cugraph_type_erased_device_array_view_t* label_offsets, + const cugraph_type_erased_host_array_view_t* fan_out, + cugraph_rng_state_t* rng_state, + const cugraph_sampling_options_t* options, + bool_t do_expensive_check, + cugraph_sample_result_t** result, + cugraph_error_t** error) +{ + auto options_cpp = *reinterpret_cast(options); + + CAPI_EXPECTS( + (edge_biases != nullptr) || + (reinterpret_cast(graph)->edge_weights_ != nullptr), + CUGRAPH_INVALID_INPUT, + "edge_biases is required if the graph is not weighted", + *error); + + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (label_offsets != nullptr), + CUGRAPH_INVALID_INPUT, + "must specify label_offsets if retain_seeds is true", + *error); + + CAPI_EXPECTS((start_vertex_labels == nullptr) || + (reinterpret_cast( + start_vertex_labels) + ->type_ == INT32), + CUGRAPH_INVALID_INPUT, + "start_vertex_labels should be of type int", + *error); + + CAPI_EXPECTS((label_to_comm_rank == nullptr) || (start_vertex_labels != nullptr), + CUGRAPH_INVALID_INPUT, + "cannot specify label_to_comm_rank unless start_vertex_labels is also specified", + *error); + + CAPI_EXPECTS((label_to_comm_rank == nullptr) || (label_list != nullptr), + CUGRAPH_INVALID_INPUT, + "cannot specify label_to_comm_rank unless label_list is also specified", + *error); + + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + start_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and start_vertices must match", + *error); + + CAPI_EXPECTS( + reinterpret_cast(fan_out) + ->type_ == INT32, + CUGRAPH_INVALID_INPUT, + "fan_out should be of type int", + *error); + + biased_neighbor_sampling_functor functor{handle, + graph, + edge_biases, + start_vertices, + start_vertex_labels, + label_list, + label_to_comm_rank, + label_offsets, + fan_out, + rng_state, + std::move(options_cpp), + do_expensive_check}; + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 73a3104f27b..892ba91af86 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -768,6 +768,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureCTestMG(MG_CAPI_EDGE_BETWEENNESS_CENTRALITY_TEST c_api/mg_edge_betweenness_centrality_test.c) ConfigureCTestMG(MG_CAPI_HITS_TEST c_api/mg_hits_test.c) ConfigureCTestMG(MG_CAPI_UNIFORM_NEIGHBOR_SAMPLE_TEST c_api/mg_uniform_neighbor_sample_test.c) + ConfigureCTestMG(MG_CAPI_BIASED_NEIGHBOR_SAMPLE_TEST c_api/mg_biased_neighbor_sample_test.c) ConfigureCTestMG(MG_CAPI_LOOKUP_SRC_DST_TEST c_api/mg_lookup_src_dst_test.c) ConfigureCTestMG(MG_CAPI_RANDOM_WALKS_TEST c_api/mg_random_walks_test.c) ConfigureCTestMG(MG_CAPI_TRIANGLE_COUNT_TEST c_api/mg_triangle_count_test.c) @@ -805,6 +806,7 @@ ConfigureCTest(CAPI_NODE2VEC_TEST c_api/node2vec_test.c) ConfigureCTest(CAPI_WEAKLY_CONNECTED_COMPONENTS_TEST c_api/weakly_connected_components_test.c) ConfigureCTest(CAPI_STRONGLY_CONNECTED_COMPONENTS_TEST c_api/strongly_connected_components_test.c) ConfigureCTest(CAPI_UNIFORM_NEIGHBOR_SAMPLE_TEST c_api/uniform_neighbor_sample_test.c) +ConfigureCTest(CAPI_BIASED_NEIGHBOR_SAMPLE_TEST c_api/biased_neighbor_sample_test.c) ConfigureCTest(CAPI_RANDOM_WALKS_TEST c_api/sg_random_walks_test.c) ConfigureCTest(CAPI_TRIANGLE_COUNT_TEST c_api/triangle_count_test.c) ConfigureCTest(CAPI_LOUVAIN_TEST c_api/louvain_test.c) diff --git a/cpp/tests/c_api/biased_neighbor_sample_test.c b/cpp/tests/c_api/biased_neighbor_sample_test.c new file mode 100644 index 00000000000..fe80514c825 --- /dev/null +++ b/cpp/tests/c_api/biased_neighbor_sample_test.c @@ -0,0 +1,973 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include +#include +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +data_type_id_t vertex_tid = INT32; +data_type_id_t edge_tid = INT32; +data_type_id_t weight_tid = FLOAT32; +data_type_id_t edge_id_tid = INT32; +data_type_id_t edge_type_tid = INT32; + +int vertex_id_compare_function(const void* a, const void* b) +{ + if (*((vertex_t*)a) < *((vertex_t*)b)) + return -1; + else if (*((vertex_t*)a) > *((vertex_t*)b)) + return 1; + else + return 0; +} + +int generic_biased_neighbor_sample_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + edge_t* h_edge_ids, + int32_t* h_edge_types, + size_t num_vertices, + size_t num_edges, + vertex_t* h_start, + int* h_start_labels, + size_t num_start_vertices, + size_t num_start_labels, + int* fan_out, + size_t fan_out_size, + bool_t with_replacement, + bool_t return_hops, + cugraph_prior_sources_behavior_t prior_sources_behavior, + bool_t dedupe_sources, + bool_t renumber_results) +{ + // Create graph + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + cugraph_graph_t* graph = NULL; + cugraph_sample_result_t* result = NULL; + + ret_code = create_sg_test_graph(handle, + vertex_tid, + edge_tid, + h_src, + h_dst, + weight_tid, + h_wgt, + edge_type_tid, + h_edge_types, + edge_id_tid, + h_edge_ids, + num_edges, + FALSE, + TRUE, + FALSE, + FALSE, + &graph, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + cugraph_type_erased_device_array_t* d_start = NULL; + cugraph_type_erased_device_array_view_t* d_start_view = NULL; + cugraph_type_erased_device_array_t* d_start_labels = NULL; + cugraph_type_erased_device_array_view_t* d_start_labels_view = NULL; + cugraph_type_erased_host_array_view_t* h_fan_out_view = NULL; + + ret_code = cugraph_type_erased_device_array_create( + handle, num_start_vertices, INT32, &d_start, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start create failed."); + + d_start_view = cugraph_type_erased_device_array_view(d_start); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_view, (byte_t*)h_start, &ret_error); + + ret_code = cugraph_type_erased_device_array_create( + handle, num_start_vertices, INT32, &d_start_labels, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start_labels create failed."); + + d_start_labels_view = cugraph_type_erased_device_array_view(d_start_labels); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_labels_view, (byte_t*)h_start_labels, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start_labels copy_from_host failed."); + + h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, fan_out_size, INT32); + + cugraph_rng_state_t* rng_state; + ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); + + cugraph_sampling_options_t* sampling_options; + + ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed."); + + cugraph_sampling_set_with_replacement(sampling_options, with_replacement); + cugraph_sampling_set_return_hops(sampling_options, return_hops); + cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior); + cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources); + cugraph_sampling_set_renumber_results(sampling_options, renumber_results); + + ret_code = cugraph_biased_neighbor_sample(handle, + graph, + NULL, + d_start_view, + d_start_labels_view, + NULL, + NULL, + NULL, + h_fan_out_view, + rng_state, + sampling_options, + FALSE, + &result, + &ret_error); + +#ifdef NO_CUGRAPH_OPS + TEST_ASSERT( + test_ret_value, ret_code != CUGRAPH_SUCCESS, "biased_neighbor_sample should have failed") +#else + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "biased_neighbor_sample failed."); + + cugraph_sampling_options_free(sampling_options); + + cugraph_type_erased_device_array_view_t* result_srcs; + cugraph_type_erased_device_array_view_t* result_dsts; + cugraph_type_erased_device_array_view_t* result_edge_id; + cugraph_type_erased_device_array_view_t* result_weights; + cugraph_type_erased_device_array_view_t* result_edge_types; + cugraph_type_erased_device_array_view_t* result_hops; + cugraph_type_erased_device_array_view_t* result_offsets; + cugraph_type_erased_device_array_view_t* result_labels; + cugraph_type_erased_device_array_view_t* result_renumber_map; + cugraph_type_erased_device_array_view_t* result_renumber_map_offsets; + + result_srcs = cugraph_sample_result_get_sources(result); + result_dsts = cugraph_sample_result_get_destinations(result); + result_edge_id = cugraph_sample_result_get_edge_id(result); + result_weights = cugraph_sample_result_get_edge_weight(result); + result_edge_types = cugraph_sample_result_get_edge_type(result); + result_hops = cugraph_sample_result_get_hop(result); + result_hops = cugraph_sample_result_get_hop(result); + result_offsets = cugraph_sample_result_get_offsets(result); + result_labels = cugraph_sample_result_get_start_labels(result); + result_renumber_map = cugraph_sample_result_get_renumber_map(result); + result_renumber_map_offsets = cugraph_sample_result_get_renumber_map_offsets(result); + + size_t result_size = cugraph_type_erased_device_array_view_size(result_srcs); + size_t result_offsets_size = cugraph_type_erased_device_array_view_size(result_offsets); + size_t renumber_map_size = 0; + + if (renumber_results) { + renumber_map_size = cugraph_type_erased_device_array_view_size(result_renumber_map); + } + + vertex_t h_result_srcs[result_size]; + vertex_t h_result_dsts[result_size]; + edge_t h_result_edge_id[result_size]; + weight_t h_result_weight[result_size]; + int32_t h_result_edge_types[result_size]; + int32_t h_result_hops[result_size]; + size_t h_result_offsets[result_offsets_size]; + int h_result_labels[num_start_labels]; + vertex_t h_renumber_map[renumber_map_size]; + size_t h_renumber_map_offsets[result_offsets_size]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_srcs, result_srcs, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_dsts, result_dsts, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_edge_id, result_edge_id, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_weight, result_weights, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_edge_types, result_edge_types, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + TEST_ASSERT(test_ret_value, result_hops == NULL, "hops was not empty"); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_offsets, result_offsets, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_labels, result_labels, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int k = 0; k < result_offsets_size - 1; k += fan_out_size) { + for (int h = 0; h < fan_out_size; ++h) { + int hop_start = h_result_offsets[k + h]; + int hop_end = h_result_offsets[k + h + 1]; + for (int i = hop_start; i < hop_end; ++i) { + h_result_hops[i] = h; + } + } + } + + for (int k = 0; k < num_start_labels + 1; ++k) { + h_result_offsets[k] = h_result_offsets[k * fan_out_size]; + } + result_offsets_size = num_start_labels + 1; + + if (renumber_results) { + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_renumber_map, result_renumber_map, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_renumber_map_offsets, result_renumber_map_offsets, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + } + + // First, check that all edges are actually part of the graph + weight_t M_w[num_vertices][num_vertices]; + edge_t M_edge_id[num_vertices][num_vertices]; + int32_t M_edge_type[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) { + M_w[i][j] = 0.0; + M_edge_id[i][j] = -1; + M_edge_type[i][j] = -1; + } + + for (int i = 0; i < num_edges; ++i) { + M_w[h_src[i]][h_dst[i]] = h_wgt[i]; + M_edge_id[h_src[i]][h_dst[i]] = h_edge_ids[i]; + M_edge_type[h_src[i]][h_dst[i]] = h_edge_types[i]; + } + + if (renumber_results) { + for (int label_id = 0; label_id < (result_offsets_size - 1); ++label_id) { + for (size_t i = h_result_offsets[label_id]; + (i < h_result_offsets[label_id + 1]) && (test_ret_value == 0); + ++i) { + vertex_t src = h_renumber_map[h_renumber_map_offsets[label_id] + h_result_srcs[i]]; + vertex_t dst = h_renumber_map[h_renumber_map_offsets[label_id] + h_result_dsts[i]]; + + TEST_ASSERT(test_ret_value, + M_w[src][dst] == h_result_weight[i], + "biased_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_id[src][dst] == h_result_edge_id[i], + "biased_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_type[src][dst] == h_result_edge_types[i], + "biased_neighbor_sample got edge that doesn't exist"); + } + } + } else { + for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + M_w[h_result_srcs[i]][h_result_dsts[i]] == h_result_weight[i], + "biased_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_id[h_result_srcs[i]][h_result_dsts[i]] == h_result_edge_id[i], + "biased_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_type[h_result_srcs[i]][h_result_dsts[i]] == h_result_edge_types[i], + "biased_neighbor_sample got edge that doesn't exist"); + } + } + + // + // For the sampling result to make sense, all sources in hop 0 must be in the seeds, + // all sources in hop 1 must be a result from hop 0, etc. + // + vertex_t check_v1[result_size]; + vertex_t check_v2[result_size]; + vertex_t* check_sources = check_v1; + vertex_t* check_destinations = check_v2; + + size_t degree[num_vertices]; + for (size_t i = 0; i < num_vertices; ++i) + degree[i] = 0; + + for (size_t i = 0; i < num_edges; ++i) { + degree[h_src[i]]++; + } + + for (int label_id = 0; label_id < (result_offsets_size - 1); ++label_id) { + size_t sources_size = 0; + size_t destinations_size = 0; + + // Fill sources with the input sources + for (size_t i = 0; i < num_start_vertices; ++i) { + if (h_start_labels[i] == h_result_labels[label_id]) { + check_sources[sources_size] = h_start[i]; + ++sources_size; + } + } + + if (renumber_results) { + size_t num_vertex_ids = 2 * (h_result_offsets[label_id + 1] - h_result_offsets[label_id]); + vertex_t vertex_ids[num_vertex_ids]; + + for (size_t i = 0; (i < (h_result_offsets[label_id + 1] - h_result_offsets[label_id])) && + (test_ret_value == 0); + ++i) { + vertex_ids[2 * i] = h_result_srcs[h_result_offsets[label_id] + i]; + vertex_ids[2 * i + 1] = h_result_dsts[h_result_offsets[label_id] + i]; + } + + qsort(vertex_ids, num_vertex_ids, sizeof(vertex_t), vertex_id_compare_function); + + vertex_t current_v = 0; + for (size_t i = 0; (i < num_vertex_ids) && (test_ret_value == 0); ++i) { + if (vertex_ids[i] == current_v) + ++current_v; + else + TEST_ASSERT(test_ret_value, + vertex_ids[i] == (current_v - 1), + "vertices are not properly renumbered"); + } + } + + for (int hop = 0; hop < fan_out_size; ++hop) { + if (prior_sources_behavior == CARRY_OVER) { + destinations_size = sources_size; + for (size_t i = 0; i < sources_size; ++i) { + check_destinations[i] = check_sources[i]; + } + } + + for (size_t i = h_result_offsets[label_id]; + (i < h_result_offsets[label_id + 1]) && (test_ret_value == 0); + ++i) { + if (h_result_hops[i] == hop) { + bool found = false; + for (size_t j = 0; (!found) && (j < sources_size); ++j) { + found = renumber_results + ? (h_renumber_map[h_renumber_map_offsets[label_id] + h_result_srcs[i]] == + check_sources[j]) + : (h_result_srcs[i] == check_sources[j]); + } + + TEST_ASSERT(test_ret_value, + found, + "encountered source vertex that was not part of previous frontier"); + } + + if (prior_sources_behavior == CARRY_OVER) { + // Make sure destination isn't already in the source list + bool found = false; + for (size_t j = 0; (!found) && (j < destinations_size); ++j) { + found = renumber_results + ? (h_renumber_map[h_renumber_map_offsets[label_id] + h_result_dsts[i]] == + check_destinations[j]) + : (h_result_dsts[i] == check_destinations[j]); + } + + if (!found) { + check_destinations[destinations_size] = + renumber_results ? h_renumber_map[h_renumber_map_offsets[label_id] + h_result_dsts[i]] + : h_result_dsts[i]; + ++destinations_size; + } + } else { + check_destinations[destinations_size] = + renumber_results ? h_renumber_map[h_renumber_map_offsets[label_id] + h_result_dsts[i]] + : h_result_dsts[i]; + ++destinations_size; + } + } + + vertex_t* tmp = check_sources; + check_sources = check_destinations; + check_destinations = tmp; + sources_size = destinations_size; + destinations_size = 0; + } + + if (prior_sources_behavior == EXCLUDE) { + // Make sure vertex v only appears as source in the first hop after it is encountered + for (size_t i = h_result_offsets[label_id]; + (i < h_result_offsets[label_id + 1]) && (test_ret_value == 0); + ++i) { + for (size_t j = i + 1; (j < h_result_offsets[label_id + 1]) && (test_ret_value == 0); ++j) { + if (h_result_srcs[i] == h_result_srcs[j]) { + TEST_ASSERT(test_ret_value, + h_result_hops[i] == h_result_hops[j], + "source vertex should not have been used in diferent hops"); + } + } + } + } + + if (dedupe_sources) { + // Make sure vertex v only appears as source once for each edge after it appears as + // destination Externally test this by verifying that vertex v only appears in <= hop + // size/degree + for (size_t i = h_result_offsets[label_id]; + (i < h_result_offsets[label_id + 1]) && (test_ret_value == 0); + ++i) { + if (h_result_hops[i] > 0) { + size_t num_occurrences = 1; + for (size_t j = i + 1; j < h_result_offsets[label_id + 1]; ++j) { + if ((h_result_srcs[j] == h_result_srcs[i]) && (h_result_hops[j] == h_result_hops[i])) + num_occurrences++; + } + + if (fan_out[h_result_hops[i]] < 0) { + TEST_ASSERT(test_ret_value, + num_occurrences <= degree[h_result_srcs[i]], + "source vertex used in too many return edges"); + } else { + TEST_ASSERT(test_ret_value, + num_occurrences < fan_out[h_result_hops[i]], + "source vertex used in too many return edges"); + } + } + } + } + } + + cugraph_sample_result_free(result); +#endif + + cugraph_sg_graph_free(graph); + cugraph_error_free(ret_error); + return test_ret_value; +} + +int test_biased_neighbor_sample_with_labels(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + size_t num_edges = 8; + size_t num_vertices = 6; + size_t fan_out_size = 1; + size_t num_starts = 2; + + vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7}; + weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8}; + int32_t edge_types[] = {7, 6, 5, 4, 3, 2, 1, 0}; + vertex_t start[] = {2, 3}; + size_t start_labels[] = {6, 12}; + int fan_out[] = {-1}; + + // Create graph + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + cugraph_graph_t* graph = NULL; + cugraph_sample_result_t* result = NULL; + + bool_t with_replacement = TRUE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT; + bool_t dedupe_sources = FALSE; + bool_t renumber_results = FALSE; + cugraph_compression_type_t compression = COO; + bool_t compress_per_hop = FALSE; + + ret_code = create_sg_test_graph(handle, + vertex_tid, + edge_tid, + src, + dst, + weight_tid, + weight, + edge_type_tid, + edge_types, + edge_id_tid, + edge_ids, + num_edges, + FALSE, + TRUE, + FALSE, + FALSE, + &graph, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + cugraph_type_erased_device_array_t* d_start = NULL; + cugraph_type_erased_device_array_view_t* d_start_view = NULL; + cugraph_type_erased_device_array_t* d_start_labels = NULL; + cugraph_type_erased_device_array_view_t* d_start_labels_view = NULL; + cugraph_type_erased_host_array_view_t* h_fan_out_view = NULL; + + ret_code = + cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start create failed."); + + d_start_view = cugraph_type_erased_device_array_view(d_start); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_view, (byte_t*)start, &ret_error); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start_labels, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start_labels create failed."); + + d_start_labels_view = cugraph_type_erased_device_array_view(d_start_labels); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_labels_view, (byte_t*)start_labels, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start_labels copy_from_host failed."); + + h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, 1, INT32); + + cugraph_rng_state_t* rng_state; + ret_code = cugraph_rng_state_create(handle, 0, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); + + cugraph_sampling_options_t* sampling_options; + + ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed."); + + cugraph_sampling_set_with_replacement(sampling_options, with_replacement); + cugraph_sampling_set_return_hops(sampling_options, return_hops); + cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior); + cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources); + cugraph_sampling_set_renumber_results(sampling_options, renumber_results); + cugraph_sampling_set_compression_type(sampling_options, compression); + cugraph_sampling_set_compress_per_hop(sampling_options, compress_per_hop); + + ret_code = cugraph_biased_neighbor_sample(handle, + graph, + NULL, + d_start_view, + d_start_labels_view, + NULL, + NULL, + NULL, + h_fan_out_view, + rng_state, + sampling_options, + FALSE, + &result, + &ret_error); + +#ifdef NO_CUGRAPH_OPS + TEST_ASSERT( + test_ret_value, ret_code != CUGRAPH_SUCCESS, "biased_neighbor_sample should have failed") +#else + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "biased_neighbor_sample failed."); + + cugraph_type_erased_device_array_view_t* result_srcs; + cugraph_type_erased_device_array_view_t* result_dsts; + cugraph_type_erased_device_array_view_t* result_edge_id; + cugraph_type_erased_device_array_view_t* result_weights; + cugraph_type_erased_device_array_view_t* result_edge_types; + cugraph_type_erased_device_array_view_t* result_hops; + cugraph_type_erased_device_array_view_t* result_offsets; + + result_srcs = cugraph_sample_result_get_sources(result); + result_dsts = cugraph_sample_result_get_destinations(result); + result_edge_id = cugraph_sample_result_get_edge_id(result); + result_weights = cugraph_sample_result_get_edge_weight(result); + result_edge_types = cugraph_sample_result_get_edge_type(result); + result_hops = cugraph_sample_result_get_hop(result); + result_offsets = cugraph_sample_result_get_offsets(result); + + size_t result_size = cugraph_type_erased_device_array_view_size(result_srcs); + size_t result_offsets_size = cugraph_type_erased_device_array_view_size(result_offsets); + + vertex_t h_srcs[result_size]; + vertex_t h_dsts[result_size]; + edge_t h_edge_id[result_size]; + weight_t h_weight[result_size]; + int32_t h_edge_types[result_size]; + int32_t h_hops[result_size]; + size_t h_result_offsets[result_offsets_size]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_srcs, result_srcs, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_dsts, result_dsts, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_edge_id, result_edge_id, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_weight, result_weights, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_edge_types, result_edge_types, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + TEST_ASSERT(test_ret_value, result_hops == NULL, "hops was not empty"); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_offsets, result_offsets, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + // NOTE: The C++ tester does a more thorough validation. For our purposes + // here we will do a simpler validation, merely checking that all edges + // are actually part of the graph + weight_t M_w[num_vertices][num_vertices]; + edge_t M_edge_id[num_vertices][num_vertices]; + int32_t M_edge_type[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) { + M_w[i][j] = 0.0; + M_edge_id[i][j] = -1; + M_edge_type[i][j] = -1; + } + + for (int i = 0; i < num_edges; ++i) { + M_w[src[i]][dst[i]] = weight[i]; + M_edge_id[src[i]][dst[i]] = edge_ids[i]; + M_edge_type[src[i]][dst[i]] = edge_types[i]; + } + + for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + M_w[h_srcs[i]][h_dsts[i]] == h_weight[i], + "biased_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_id[h_srcs[i]][h_dsts[i]] == h_edge_id[i], + "biased_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_type[h_srcs[i]][h_dsts[i]] == h_edge_types[i], + "biased_neighbor_sample got edge that doesn't exist"); + } + + cugraph_sample_result_free(result); + cugraph_sampling_options_free(sampling_options); +#endif + + cugraph_sg_graph_free(graph); + cugraph_error_free(ret_error); +} + +int test_biased_neighbor_sample_clean(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t fan_out_size = 3; + size_t num_starts = 2; + size_t num_start_labels = 2; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 3, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}; + int32_t edge_types[] = {8, 7, 6, 5, 4, 3, 2, 1, 0}; + vertex_t start[] = {2, 3}; + int start_labels[] = {6, 12}; + int fan_out[] = {-1, -1, -1}; + + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT; + bool_t dedupe_sources = FALSE; + bool_t renumber_results = FALSE; + + return generic_biased_neighbor_sample_test(handle, + src, + dst, + weight, + edge_ids, + edge_types, + num_vertices, + num_edges, + start, + start_labels, + num_starts, + num_start_labels, + fan_out, + fan_out_size, + with_replacement, + return_hops, + prior_sources_behavior, + dedupe_sources, + renumber_results); +} + +int test_biased_neighbor_sample_dedupe_sources(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t fan_out_size = 3; + size_t num_starts = 2; + size_t num_start_labels = 2; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 3, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}; + int32_t edge_types[] = {8, 7, 6, 5, 4, 3, 2, 1, 0}; + vertex_t start[] = {2, 3}; + int start_labels[] = {6, 12}; + int fan_out[] = {-1, -1, -1}; + + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT; + bool_t dedupe_sources = TRUE; + bool_t renumber_results = FALSE; + + return generic_biased_neighbor_sample_test(handle, + src, + dst, + weight, + edge_ids, + edge_types, + num_vertices, + num_edges, + start, + start_labels, + num_starts, + num_start_labels, + fan_out, + fan_out_size, + with_replacement, + return_hops, + prior_sources_behavior, + dedupe_sources, + renumber_results); +} + +int test_biased_neighbor_sample_unique_sources(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t fan_out_size = 3; + size_t num_starts = 2; + size_t num_start_labels = 2; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}; + int32_t edge_types[] = {8, 7, 6, 5, 4, 3, 2, 1, 0}; + vertex_t start[] = {2, 3}; + int start_labels[] = {6, 12}; + int fan_out[] = {-1, -1, -1}; + + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = EXCLUDE; + bool_t dedupe_sources = FALSE; + bool_t renumber_results = FALSE; + + return generic_biased_neighbor_sample_test(handle, + src, + dst, + weight, + edge_ids, + edge_types, + num_vertices, + num_edges, + start, + start_labels, + num_starts, + num_start_labels, + fan_out, + fan_out_size, + with_replacement, + return_hops, + prior_sources_behavior, + dedupe_sources, + renumber_results); +} + +int test_biased_neighbor_sample_carry_over_sources(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t fan_out_size = 3; + size_t num_starts = 2; + size_t num_start_labels = 2; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}; + int32_t edge_types[] = {8, 7, 6, 5, 4, 3, 2, 1, 0}; + vertex_t start[] = {2, 3}; + int start_labels[] = {6, 12}; + int fan_out[] = {-1, -1, -1}; + + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = CARRY_OVER; + bool_t dedupe_sources = FALSE; + bool_t renumber_results = FALSE; + + return generic_biased_neighbor_sample_test(handle, + src, + dst, + weight, + edge_ids, + edge_types, + num_vertices, + num_edges, + start, + start_labels, + num_starts, + num_start_labels, + fan_out, + fan_out_size, + with_replacement, + return_hops, + prior_sources_behavior, + dedupe_sources, + renumber_results); +} + +int test_biased_neighbor_sample_renumber_results(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t fan_out_size = 3; + size_t num_starts = 2; + size_t num_start_labels = 2; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}; + int32_t edge_types[] = {8, 7, 6, 5, 4, 3, 2, 1, 0}; + vertex_t start[] = {2, 3}; + int start_labels[] = {6, 12}; + int fan_out[] = {-1, -1, -1}; + + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT; + bool_t dedupe_sources = FALSE; + bool_t renumber_results = TRUE; + + return generic_biased_neighbor_sample_test(handle, + src, + dst, + weight, + edge_ids, + edge_types, + num_vertices, + num_edges, + start, + start_labels, + num_starts, + num_start_labels, + fan_out, + fan_out_size, + with_replacement, + return_hops, + prior_sources_behavior, + dedupe_sources, + renumber_results); +} + +int main(int argc, char** argv) +{ + cugraph_resource_handle_t* handle = NULL; + + handle = cugraph_create_resource_handle(NULL); + + int result = 0; + result |= RUN_TEST_NEW(test_biased_neighbor_sample_clean, handle); + result |= RUN_TEST_NEW(test_biased_neighbor_sample_dedupe_sources, handle); + result |= RUN_TEST_NEW(test_biased_neighbor_sample_unique_sources, handle); + result |= RUN_TEST_NEW(test_biased_neighbor_sample_carry_over_sources, handle); + result |= RUN_TEST_NEW(test_biased_neighbor_sample_renumber_results, handle); + + cugraph_free_resource_handle(handle); + + return result; +} diff --git a/cpp/tests/c_api/mg_biased_neighbor_sample_test.c b/cpp/tests/c_api/mg_biased_neighbor_sample_test.c new file mode 100644 index 00000000000..ce96fc6f5f7 --- /dev/null +++ b/cpp/tests/c_api/mg_biased_neighbor_sample_test.c @@ -0,0 +1,1398 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_test_utils.h" /* RUN_MG_TEST */ + +#include +#include + +#include +#include +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +data_type_id_t vertex_tid = INT32; +data_type_id_t edge_tid = INT32; +data_type_id_t weight_tid = FLOAT32; +data_type_id_t edge_id_tid = INT32; +data_type_id_t edge_type_tid = INT32; + +int generic_biased_neighbor_sample_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + edge_t* h_edge_ids, + int32_t* h_edge_types, + size_t num_vertices, + size_t num_edges, + vertex_t* h_start, + int* h_start_labels, + size_t num_start_vertices, + int* fan_out, + size_t fan_out_size, + bool_t with_replacement, + bool_t return_hops, + cugraph_prior_sources_behavior_t prior_sources_behavior, + bool_t dedupe_sources) +{ + // Create graph + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + cugraph_graph_t* graph = NULL; + cugraph_sample_result_t* result = NULL; + + int rank = cugraph_resource_handle_get_rank(handle); + + ret_code = create_mg_test_graph_new(handle, + vertex_tid, + edge_tid, + h_src, + h_dst, + weight_tid, + h_wgt, + edge_type_tid, + h_edge_types, + edge_id_tid, + h_edge_ids, + num_edges, + FALSE, + TRUE, + FALSE, + FALSE, + &graph, + &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + cugraph_type_erased_device_array_t* d_start = NULL; + cugraph_type_erased_device_array_view_t* d_start_view = NULL; + cugraph_type_erased_device_array_t* d_start_labels = NULL; + cugraph_type_erased_device_array_view_t* d_start_labels_view = NULL; + cugraph_type_erased_host_array_view_t* h_fan_out_view = NULL; + + if (rank > 0) num_start_vertices = 0; + + ret_code = cugraph_type_erased_device_array_create( + handle, num_start_vertices, INT32, &d_start, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start create failed."); + + d_start_view = cugraph_type_erased_device_array_view(d_start); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_view, (byte_t*)h_start, &ret_error); + + if (h_start_labels != NULL) { + ret_code = cugraph_type_erased_device_array_create( + handle, num_start_vertices, INT32, &d_start_labels, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start_labels create failed."); + + d_start_labels_view = cugraph_type_erased_device_array_view(d_start_labels); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_labels_view, (byte_t*)h_start_labels, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start_labels copy_from_host failed."); + } + + h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, fan_out_size, INT32); + + cugraph_rng_state_t* rng_state; + ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); + + cugraph_sampling_options_t* sampling_options; + + ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed."); + + cugraph_sampling_set_with_replacement(sampling_options, with_replacement); + cugraph_sampling_set_return_hops(sampling_options, return_hops); + cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior); + cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources); + + ret_code = cugraph_biased_neighbor_sample(handle, + graph, + NULL, + d_start_view, + d_start_labels_view, + NULL, + NULL, + NULL, + h_fan_out_view, + rng_state, + sampling_options, + FALSE, + &result, + &ret_error); + +#ifdef NO_CUGRAPH_OPS + TEST_ASSERT( + test_ret_value, ret_code != CUGRAPH_SUCCESS, "biased_neighbor_sample should have failed") +#else + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "biased_neighbor_sample failed."); + + cugraph_sampling_options_free(sampling_options); + + cugraph_type_erased_device_array_view_t* result_srcs; + cugraph_type_erased_device_array_view_t* result_dsts; + cugraph_type_erased_device_array_view_t* result_edge_id; + cugraph_type_erased_device_array_view_t* result_weights; + cugraph_type_erased_device_array_view_t* result_edge_types; + cugraph_type_erased_device_array_view_t* result_hops; + cugraph_type_erased_device_array_view_t* result_offsets = NULL; + cugraph_type_erased_device_array_view_t* result_labels = NULL; + + result_srcs = cugraph_sample_result_get_sources(result); + result_dsts = cugraph_sample_result_get_destinations(result); + result_edge_id = cugraph_sample_result_get_edge_id(result); + result_weights = cugraph_sample_result_get_edge_weight(result); + result_edge_types = cugraph_sample_result_get_edge_type(result); + result_hops = cugraph_sample_result_get_hop(result); + result_hops = cugraph_sample_result_get_hop(result); + + size_t result_offsets_size = 2; + + if (d_start_labels != NULL) { + result_offsets = cugraph_sample_result_get_offsets(result); + result_labels = cugraph_sample_result_get_start_labels(result); + result_offsets_size = + 1 + cugraph_test_scalar_reduce( + handle, cugraph_type_erased_device_array_view_size(result_offsets) - 1); + } + + size_t result_size = cugraph_test_device_gatherv_size(handle, result_srcs); + + vertex_t h_result_srcs[result_size]; + vertex_t h_result_dsts[result_size]; + edge_t h_result_edge_id[result_size]; + weight_t h_result_weight[result_size]; + int32_t h_result_edge_types[result_size]; + int32_t h_result_hops[result_size]; + size_t h_result_offsets[result_offsets_size]; + int h_result_labels[result_offsets_size - 1]; + + if (result_offsets_size == 2) { + h_result_offsets[0] = 0; + h_result_offsets[1] = result_size; + } + + ret_code = cugraph_test_device_gatherv_fill(handle, result_srcs, h_result_srcs); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "gatherv_fill failed."); + + ret_code = cugraph_test_device_gatherv_fill(handle, result_dsts, h_result_dsts); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "gatherv_fill failed."); + + if (h_edge_ids != NULL) { + ret_code = cugraph_test_device_gatherv_fill(handle, result_edge_id, h_result_edge_id); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "gatherv_fill failed."); + } + + if (h_wgt != NULL) { + ret_code = cugraph_test_device_gatherv_fill(handle, result_weights, h_result_weight); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "gatherv_fill failed."); + } + + if (h_edge_types != NULL) { + ret_code = cugraph_test_device_gatherv_fill(handle, result_edge_types, h_result_edge_types); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "gatherv_fill failed."); + } + + if (d_start_labels != NULL) { + size_t sz = cugraph_type_erased_device_array_view_size(result_offsets); + + ret_code = cugraph_test_device_gatherv_fill(handle, result_labels, h_result_labels); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "gatherv_fill failed."); + + size_t tmp_result_offsets[sz]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)tmp_result_offsets, result_offsets, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + // convert to size + for (size_t i = 1; i < sz; ++i) { + tmp_result_offsets[i - 1] = tmp_result_offsets[i] - tmp_result_offsets[i - 1]; + } + + cugraph_test_host_gatherv_fill( + handle, tmp_result_offsets, sz - 1, SIZE_T, h_result_offsets + 1); + + h_result_offsets[0] = 0; + for (size_t i = 1; i < result_offsets_size; ++i) { + h_result_offsets[i] += h_result_offsets[i - 1]; + } + } + + // First, check that all edges are actually part of the graph + weight_t M_w[num_vertices][num_vertices]; + edge_t M_edge_id[num_vertices][num_vertices]; + int32_t M_edge_type[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) { + M_w[i][j] = 0.0; + M_edge_id[i][j] = -1; + M_edge_type[i][j] = -1; + } + + for (int i = 0; i < num_edges; ++i) { + if (h_wgt != NULL) + M_w[h_src[i]][h_dst[i]] = h_wgt[i]; + else + M_w[h_src[i]][h_dst[i]] = 1.0; + + if (h_edge_ids != NULL) M_edge_id[h_src[i]][h_dst[i]] = h_edge_ids[i]; + if (h_edge_types != NULL) M_edge_type[h_src[i]][h_dst[i]] = h_edge_types[i]; + } + + for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { + if (h_wgt != NULL) { + TEST_ASSERT(test_ret_value, + M_w[h_result_srcs[i]][h_result_dsts[i]] == h_result_weight[i], + "biased_neighbor_sample got edge that doesn't exist"); + } else { + TEST_ASSERT(test_ret_value, + M_w[h_result_srcs[i]][h_result_dsts[i]] == 1.0, + "biased_neighbor_sample got edge that doesn't exist"); + } + + if (h_edge_ids != NULL) + TEST_ASSERT(test_ret_value, + M_edge_id[h_result_srcs[i]][h_result_dsts[i]] == h_result_edge_id[i], + "biased_neighbor_sample got edge that doesn't exist"); + if (h_edge_types != NULL) + TEST_ASSERT(test_ret_value, + M_edge_type[h_result_srcs[i]][h_result_dsts[i]] == h_result_edge_types[i], + "biased_neighbor_sample got edge that doesn't exist"); + } + + if ((return_hops) && (d_start_labels != NULL) && (result_offsets_size > 0)) { + // + // For the sampling result to make sense, all sources in hop 0 must be in the seeds, + // all sources in hop 1 must be a result from hop 0, etc. + // + vertex_t check_v1[result_size]; + vertex_t check_v2[result_size]; + vertex_t* check_sources = check_v1; + vertex_t* check_destinations = check_v2; + + size_t degree[num_vertices]; + for (size_t i = 0; i < num_vertices; ++i) + degree[i] = 0; + + for (size_t i = 0; i < num_edges; ++i) { + degree[h_src[i]]++; + } + + for (size_t label_id = 0; label_id < (result_offsets_size - 1); ++label_id) { + // Skip any labels we already processed + bool already_processed = false; + for (size_t i = 0; (i < label_id) && !already_processed; ++i) + already_processed = (h_result_labels[label_id] == h_result_labels[i]); + + if (already_processed) continue; + + size_t sources_size = 0; + size_t destinations_size = 0; + + // Fill sources with the input sources + for (size_t i = 0; i < num_start_vertices; ++i) { + if (h_start_labels[i] == h_result_labels[label_id]) { + check_sources[sources_size] = h_start[i]; + ++sources_size; + } + } + + for (int hop = 0; hop < fan_out_size; ++hop) { + if (prior_sources_behavior == CARRY_OVER) { + destinations_size = sources_size; + for (size_t i = 0; i < sources_size; ++i) { + check_destinations[i] = check_sources[i]; + } + } + + for (size_t current_label_id = label_id; current_label_id < (result_offsets_size - 1); + ++current_label_id) { + if (h_result_labels[current_label_id] == h_result_labels[label_id]) { + for (size_t i = h_result_offsets[current_label_id]; + (i < h_result_offsets[current_label_id + 1]) && (test_ret_value == 0); + ++i) { + if (h_result_hops[i] == hop) { + bool found = false; + for (size_t j = 0; (!found) && (j < sources_size); ++j) { + found = (h_result_srcs[i] == check_sources[j]); + } + + TEST_ASSERT(test_ret_value, + found, + "encountered source vertex that was not part of previous frontier"); + } + + if (prior_sources_behavior == CARRY_OVER) { + // Make sure destination isn't already in the source list + bool found = false; + for (size_t j = 0; (!found) && (j < destinations_size); ++j) { + found = (h_result_dsts[i] == check_destinations[j]); + } + + if (!found) { + check_destinations[destinations_size] = h_result_dsts[i]; + ++destinations_size; + } + } else { + check_destinations[destinations_size] = h_result_dsts[i]; + ++destinations_size; + } + } + } + } + + vertex_t* tmp = check_sources; + check_sources = check_destinations; + check_destinations = tmp; + sources_size = destinations_size; + destinations_size = 0; + } + + if (prior_sources_behavior == EXCLUDE) { + // Make sure vertex v only appears as source in the first hop after it is encountered + for (size_t current_label_id = label_id; current_label_id < (result_offsets_size - 1); + ++current_label_id) { + if (h_result_labels[current_label_id] == h_result_labels[label_id]) { + for (size_t i = h_result_offsets[current_label_id]; + (i < h_result_offsets[current_label_id + 1]) && (test_ret_value == 0); + ++i) { + for (size_t j = i + 1; + (j < h_result_offsets[current_label_id + 1]) && (test_ret_value == 0); + ++j) { + if (h_result_srcs[i] == h_result_srcs[j]) { + TEST_ASSERT(test_ret_value, + h_result_hops[i] == h_result_hops[j], + "source vertex should not have been used in diferent hops"); + } + } + } + } + } + } + + if (dedupe_sources) { + // Make sure vertex v only appears as source once for each edge after it appears as + // destination Externally test this by verifying that vertex v only appears in <= hop + // size/degree + for (size_t current_label_id = label_id; current_label_id < (result_offsets_size - 1); + ++current_label_id) { + if (h_result_labels[current_label_id] == h_result_labels[label_id]) { + for (size_t i = h_result_offsets[current_label_id]; + (i < h_result_offsets[current_label_id + 1]) && (test_ret_value == 0); + ++i) { + if (h_result_hops[i] > 0) { + size_t num_occurrences = 1; + for (size_t j = i + 1; j < h_result_offsets[current_label_id + 1]; ++j) { + if ((h_result_srcs[j] == h_result_srcs[i]) && + (h_result_hops[j] == h_result_hops[i])) + num_occurrences++; + } + + if (fan_out[h_result_hops[i]] < 0) { + TEST_ASSERT(test_ret_value, + num_occurrences <= degree[h_result_srcs[i]], + "source vertex used in too many return edges"); + } else { + TEST_ASSERT(test_ret_value, + num_occurrences < fan_out[h_result_hops[i]], + "source vertex used in too many return edges"); + } + } + } + } + } + } + } + } + + cugraph_sample_result_free(result); +#endif + + cugraph_mg_graph_free(graph); + cugraph_error_free(ret_error); + return test_ret_value; +} + +int test_biased_neighbor_sample(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 8; + size_t num_vertices = 6; + size_t fan_out_size = 2; + size_t num_starts = 2; + + vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t wgt[] = {1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7}; + vertex_t start[] = {2, 2}; + int fan_out[] = {1, 2}; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT; + bool_t dedupe_sources = FALSE; + + return generic_biased_neighbor_sample_test(handle, + src, + dst, + wgt, + NULL, + NULL, + num_vertices, + num_edges, + start, + NULL, + num_starts, + fan_out, + fan_out_size, + with_replacement, + return_hops, + prior_sources_behavior, + dedupe_sources); +} + +int test_biased_neighbor_from_alex(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 12; + size_t num_vertices = 5; + size_t fan_out_size = 2; + size_t num_starts = 2; + size_t num_start_labels = 2; + + vertex_t src[] = {0, 1, 2, 3, 4, 3, 4, 2, 0, 1, 0, 2}; + vertex_t dst[] = {1, 2, 4, 2, 3, 4, 1, 1, 2, 3, 4, 4}; + edge_t idx[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; + int32_t typ[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 0}; + weight_t wgt[] = {0.0, 0.1, 0.2, 3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.10, 0.11}; + vertex_t start[] = {0, 4}; + int32_t batch[] = {0, 1}; + int fan_out[] = {2, 2}; + + bool_t store_transposed = FALSE; + + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + + cugraph_graph_t* graph = NULL; + cugraph_sample_result_t* result = NULL; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT; + bool_t dedupe_sources = FALSE; + bool_t renumber_results = FALSE; + cugraph_compression_type_t compression = COO; + bool_t compress_per_hop = FALSE; + + cugraph_type_erased_device_array_t* d_start = NULL; + cugraph_type_erased_device_array_t* d_label = NULL; + cugraph_type_erased_device_array_view_t* d_start_view = NULL; + cugraph_type_erased_device_array_view_t* d_label_view = NULL; + cugraph_type_erased_host_array_view_t* h_fan_out_view = NULL; + + int rank = cugraph_resource_handle_get_rank(handle); + + cugraph_rng_state_t* rng_state; + ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = create_mg_test_graph_with_properties( + handle, src, dst, idx, typ, wgt, num_edges, store_transposed, FALSE, &graph, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start create failed."); + + d_start_view = cugraph_type_erased_device_array_view(d_start); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_view, (byte_t*)start, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_label, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_label create failed."); + + d_label_view = cugraph_type_erased_device_array_view(d_label); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_label_view, (byte_t*)batch, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed."); + + h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, fan_out_size, INT32); + + cugraph_sampling_options_t* sampling_options; + + ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed."); + + cugraph_sampling_set_with_replacement(sampling_options, with_replacement); + cugraph_sampling_set_return_hops(sampling_options, return_hops); + cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior); + cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources); + cugraph_sampling_set_renumber_results(sampling_options, renumber_results); + cugraph_sampling_set_compression_type(sampling_options, compression); + cugraph_sampling_set_compress_per_hop(sampling_options, compress_per_hop); + + ret_code = cugraph_biased_neighbor_sample(handle, + graph, + NULL, + d_start_view, + d_label_view, + NULL, + NULL, + NULL, + h_fan_out_view, + rng_state, + sampling_options, + FALSE, + &result, + &ret_error); + +#ifdef NO_CUGRAPH_OPS + TEST_ASSERT( + test_ret_value, ret_code != CUGRAPH_SUCCESS, "biased_neighbor_sample should have failed"); +#else + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "biased_neighbor_sample failed."); + + cugraph_type_erased_device_array_view_t* result_src; + cugraph_type_erased_device_array_view_t* result_dst; + cugraph_type_erased_device_array_view_t* result_index; + cugraph_type_erased_device_array_view_t* result_type; + cugraph_type_erased_device_array_view_t* result_weight; + cugraph_type_erased_device_array_view_t* result_labels; + cugraph_type_erased_device_array_view_t* result_hops; + cugraph_type_erased_device_array_view_t* result_offsets; + + result_src = cugraph_sample_result_get_sources(result); + result_dst = cugraph_sample_result_get_destinations(result); + result_index = cugraph_sample_result_get_edge_id(result); + result_type = cugraph_sample_result_get_edge_type(result); + result_weight = cugraph_sample_result_get_edge_weight(result); + result_labels = cugraph_sample_result_get_start_labels(result); + result_hops = cugraph_sample_result_get_hop(result); + result_offsets = cugraph_sample_result_get_offsets(result); + + size_t result_size = cugraph_type_erased_device_array_view_size(result_src); + size_t offsets_size = cugraph_type_erased_device_array_view_size(result_offsets); + + vertex_t h_srcs[result_size]; + vertex_t h_dsts[result_size]; + edge_t h_index[result_size]; + int h_type[result_size]; + weight_t h_wgt[result_size]; + int h_labels[result_size]; + int h_hop[result_size]; + int h_offsets[offsets_size]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_srcs, result_src, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_dsts, result_dst, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_index, result_index, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_type, result_type, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_wgt, result_weight, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_labels, result_labels, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_offsets, result_offsets, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int k = 0; k < offsets_size - 1; k += fan_out_size) { + for (int h = 0; h < fan_out_size; ++h) { + int hop_start = h_offsets[k + h]; + int hop_end = h_offsets[k + h + 1]; + for (int i = hop_start; i < hop_end; ++i) { + h_hop[i] = h; + } + } + } + + for (int k = 0; k < num_start_labels + 1; ++k) { + h_offsets[k] = h_offsets[k * fan_out_size]; + } + offsets_size = num_start_labels + 1; + + // NOTE: The C++ tester does a more thorough validation. For our purposes + // here we will do a simpler validation, merely checking that all edges + // are actually part of the graph + edge_t M[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) + M[i][j] = -1; + + for (int i = 0; i < num_edges; ++i) + M[src[i]][dst[i]] = idx[i]; + + for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + M[h_srcs[i]][h_dsts[i]] >= 0, + "biased_neighbor_sample got edge that doesn't exist"); + } +#endif + + cugraph_sample_result_free(result); + + cugraph_type_erased_host_array_view_free(h_fan_out_view); + cugraph_mg_graph_free(graph); + cugraph_error_free(ret_error); + cugraph_sampling_options_free(sampling_options); + + return test_ret_value; +} + +int test_biased_neighbor_sample_alex_bug(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 156; + size_t num_vertices = 34; + size_t fan_out_size = 2; + size_t num_starts = 4; + size_t num_labels = 3; + + vertex_t src[] = {1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 2, 3, 7, 13, + 17, 19, 21, 30, 3, 7, 8, 9, 13, 27, 28, 32, 7, 12, 13, 6, 10, 6, 10, 16, + 16, 30, 32, 33, 33, 33, 32, 33, 32, 33, 32, 33, 33, 32, 33, 32, 33, 25, 27, 29, + 32, 33, 25, 27, 31, 31, 29, 33, 33, 31, 33, 32, 33, 32, 33, 32, 33, 33, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 8, + 8, 8, 9, 13, 14, 14, 15, 15, 18, 18, 19, 20, 20, 22, 22, 23, 23, 23, 23, 23, + 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32}; + vertex_t dst[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, + 6, 8, 8, 8, 9, 13, 14, 14, 15, 15, 18, 18, 19, 20, 20, 22, 22, 23, 23, 23, + 23, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 1, 2, + 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 2, 3, 7, 13, 17, 19, + 21, 30, 3, 7, 8, 9, 13, 27, 28, 32, 7, 12, 13, 6, 10, 6, 10, 16, 16, 30, + 32, 33, 33, 33, 32, 33, 32, 33, 32, 33, 33, 32, 33, 32, 33, 25, 27, 29, 32, 33, + 25, 27, 31, 31, 29, 33, 33, 31, 33, 32, 33, 32, 33, 32, 33, 33}; + weight_t wgt[] = { + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + + edge_t edge_ids[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155}; + + vertex_t start[] = {0, 1, 2, 5}; + int32_t start_labels[] = {0, 0, 1, 2}; + int32_t label_list[] = {0, 1, 2}; + int32_t label_to_output_comm_rank[] = {0, 0, 1}; + int fan_out[] = {2, 3}; + + size_t expected_size[] = {3, 2, 1, 1, 1, 1, 1, 1}; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = CARRY_OVER; + bool_t dedupe_sources = TRUE; + bool_t renumber_results = FALSE; + cugraph_compression_type_t compression = COO; + bool_t compress_per_hop = FALSE; + + // Create graph + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + cugraph_graph_t* graph = NULL; + cugraph_sample_result_t* result = NULL; + + ret_code = create_mg_test_graph_with_properties( + handle, src, dst, edge_ids, NULL, wgt, num_edges, FALSE, TRUE, &graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + cugraph_type_erased_device_array_t* d_start = NULL; + cugraph_type_erased_device_array_view_t* d_start_view = NULL; + cugraph_type_erased_device_array_t* d_start_labels = NULL; + cugraph_type_erased_device_array_view_t* d_start_labels_view = NULL; + cugraph_type_erased_device_array_t* d_label_list = NULL; + cugraph_type_erased_device_array_view_t* d_label_list_view = NULL; + cugraph_type_erased_device_array_t* d_label_to_output_comm_rank = NULL; + cugraph_type_erased_device_array_view_t* d_label_to_output_comm_rank_view = NULL; + cugraph_type_erased_host_array_view_t* h_fan_out_view = NULL; + + int rank = cugraph_resource_handle_get_rank(handle); + + if (rank > 0) { num_starts = 0; } + + cugraph_rng_state_t* rng_state; + ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start create failed."); + + d_start_view = cugraph_type_erased_device_array_view(d_start); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_view, (byte_t*)start, &ret_error); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start_labels, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start_labels create failed."); + + d_start_labels_view = cugraph_type_erased_device_array_view(d_start_labels); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_labels_view, (byte_t*)start_labels, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start_labels copy_from_host failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_labels, INT32, &d_label_list, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_label_list create failed."); + + d_label_list_view = cugraph_type_erased_device_array_view(d_label_list); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_label_list_view, (byte_t*)label_list, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "label_list copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_create( + handle, num_labels, INT32, &d_label_to_output_comm_rank, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_label_to_output_comm_rank create failed."); + + d_label_to_output_comm_rank_view = + cugraph_type_erased_device_array_view(d_label_to_output_comm_rank); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_label_to_output_comm_rank_view, (byte_t*)label_to_output_comm_rank, &ret_error); + + TEST_ASSERT(test_ret_value, + ret_code == CUGRAPH_SUCCESS, + "label_to_output_comm_rank copy_from_host failed."); + + h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, fan_out_size, INT32); + + cugraph_sampling_options_t* sampling_options; + ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed."); + + cugraph_sampling_set_with_replacement(sampling_options, with_replacement); + cugraph_sampling_set_return_hops(sampling_options, return_hops); + cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior); + cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources); + cugraph_sampling_set_renumber_results(sampling_options, renumber_results); + cugraph_sampling_set_compression_type(sampling_options, compression); + cugraph_sampling_set_compress_per_hop(sampling_options, compress_per_hop); + + ret_code = cugraph_biased_neighbor_sample(handle, + graph, + NULL, + d_start_view, + d_start_labels_view, + d_label_list_view, + d_label_to_output_comm_rank_view, + NULL, + h_fan_out_view, + rng_state, + sampling_options, + FALSE, + &result, + &ret_error); + +#ifdef NO_CUGRAPH_OPS + TEST_ASSERT( + test_ret_value, ret_code != CUGRAPH_SUCCESS, "biased_neighbor_sample should have failed") +#else + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "biased_neighbor_sample failed."); + + cugraph_type_erased_device_array_view_t* result_srcs = NULL; + cugraph_type_erased_device_array_view_t* result_dsts = NULL; + cugraph_type_erased_device_array_view_t* result_edge_id = NULL; + cugraph_type_erased_device_array_view_t* result_weights = NULL; + cugraph_type_erased_device_array_view_t* result_hops = NULL; + cugraph_type_erased_device_array_view_t* result_offsets = NULL; + + result_srcs = cugraph_sample_result_get_sources(result); + result_dsts = cugraph_sample_result_get_destinations(result); + result_edge_id = cugraph_sample_result_get_edge_id(result); + result_weights = cugraph_sample_result_get_edge_weight(result); + result_hops = cugraph_sample_result_get_hop(result); + result_offsets = cugraph_sample_result_get_offsets(result); + + size_t result_size = cugraph_type_erased_device_array_view_size(result_srcs); + size_t result_offsets_size = cugraph_type_erased_device_array_view_size(result_offsets); + + vertex_t h_srcs[result_size]; + vertex_t h_dsts[result_size]; + edge_t h_edge_id[result_size]; + weight_t h_weight[result_size]; + int32_t h_hops[result_size]; + size_t h_result_offsets[result_offsets_size]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_srcs, result_srcs, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_dsts, result_dsts, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_edge_id, result_edge_id, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_weight, result_weights, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_hops, result_hops, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_offsets, result_offsets, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + // NOTE: The C++ tester does a more thorough validation. For our purposes + // here we will do a simpler validation, merely checking that all edges + // are actually part of the graph + weight_t M_w[num_vertices][num_vertices]; + edge_t M_edge_id[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) { + M_w[i][j] = 0.0; + M_edge_id[i][j] = -1; + } + + for (int i = 0; i < num_edges; ++i) { + M_w[src[i]][dst[i]] = wgt[i]; + M_edge_id[src[i]][dst[i]] = edge_ids[i]; + } + + for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + M_w[h_srcs[i]][h_dsts[i]] == h_weight[i], + "biased_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_id[h_srcs[i]][h_dsts[i]] == h_edge_id[i], + "biased_neighbor_sample got edge that doesn't exist"); + } + + TEST_ASSERT( + test_ret_value, result_offsets_size == expected_size[rank], "incorrect number of results"); + + cugraph_sample_result_free(result); +#endif + + cugraph_sg_graph_free(graph); + cugraph_error_free(ret_error); +} + +int test_biased_neighbor_sample_sort_by_hop(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 156; + size_t num_vertices = 34; + size_t fan_out_size = 2; + size_t num_starts = 4; + size_t num_labels = 3; + + vertex_t src[] = {1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 2, 3, 7, 13, + 17, 19, 21, 30, 3, 7, 8, 9, 13, 27, 28, 32, 7, 12, 13, 6, 10, 6, 10, 16, + 16, 30, 32, 33, 33, 33, 32, 33, 32, 33, 32, 33, 33, 32, 33, 32, 33, 25, 27, 29, + 32, 33, 25, 27, 31, 31, 29, 33, 33, 31, 33, 32, 33, 32, 33, 32, 33, 33, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 6, 8, + 8, 8, 9, 13, 14, 14, 15, 15, 18, 18, 19, 20, 20, 22, 22, 23, 23, 23, 23, 23, + 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32}; + vertex_t dst[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, + 6, 8, 8, 8, 9, 13, 14, 14, 15, 15, 18, 18, 19, 20, 20, 22, 22, 23, 23, 23, + 23, 23, 24, 24, 24, 25, 26, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 1, 2, + 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 2, 3, 7, 13, 17, 19, + 21, 30, 3, 7, 8, 9, 13, 27, 28, 32, 7, 12, 13, 6, 10, 6, 10, 16, 16, 30, + 32, 33, 33, 33, 32, 33, 32, 33, 32, 33, 33, 32, 33, 32, 33, 25, 27, 29, 32, 33, + 25, 27, 31, 31, 29, 33, 33, 31, 33, 32, 33, 32, 33, 32, 33, 33}; + weight_t wgt[] = { + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; + + edge_t edge_ids[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155}; + + vertex_t start[] = {0, 1, 2, 5}; + int32_t start_labels[] = {0, 0, 1, 2}; + int32_t label_list[] = {0, 1, 2}; + int32_t label_to_output_comm_rank[] = {0, 0, 1}; + int fan_out[] = {2, 3}; + + size_t expected_size[] = {3, 2, 1, 1, 1, 1, 1, 1}; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = CARRY_OVER; + bool_t dedupe_sources = TRUE; + bool_t renumber_results = FALSE; + cugraph_compression_type_t compression = COO; + bool_t compress_per_hop = FALSE; + + // Create graph + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + cugraph_graph_t* graph = NULL; + cugraph_sample_result_t* result = NULL; + + ret_code = create_mg_test_graph_with_properties( + handle, src, dst, edge_ids, NULL, wgt, num_edges, FALSE, TRUE, &graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); + + cugraph_type_erased_device_array_t* d_start = NULL; + cugraph_type_erased_device_array_view_t* d_start_view = NULL; + cugraph_type_erased_device_array_t* d_start_labels = NULL; + cugraph_type_erased_device_array_view_t* d_start_labels_view = NULL; + cugraph_type_erased_device_array_t* d_label_list = NULL; + cugraph_type_erased_device_array_view_t* d_label_list_view = NULL; + cugraph_type_erased_device_array_t* d_label_to_output_comm_rank = NULL; + cugraph_type_erased_device_array_view_t* d_label_to_output_comm_rank_view = NULL; + cugraph_type_erased_host_array_view_t* h_fan_out_view = NULL; + + int rank = cugraph_resource_handle_get_rank(handle); + + if (rank > 0) { num_starts = 0; } + + cugraph_rng_state_t* rng_state; + ret_code = cugraph_rng_state_create(handle, rank, &rng_state, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "rng_state create failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start create failed."); + + d_start_view = cugraph_type_erased_device_array_view(d_start); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_view, (byte_t*)start, &ret_error); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start_labels, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start_labels create failed."); + + d_start_labels_view = cugraph_type_erased_device_array_view(d_start_labels); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_start_labels_view, (byte_t*)start_labels, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start_labels copy_from_host failed."); + + ret_code = + cugraph_type_erased_device_array_create(handle, num_labels, INT32, &d_label_list, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_label_list create failed."); + + d_label_list_view = cugraph_type_erased_device_array_view(d_label_list); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_label_list_view, (byte_t*)label_list, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "label_list copy_from_host failed."); + + ret_code = cugraph_type_erased_device_array_create( + handle, num_labels, INT32, &d_label_to_output_comm_rank, &ret_error); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_label_to_output_comm_rank create failed."); + + d_label_to_output_comm_rank_view = + cugraph_type_erased_device_array_view(d_label_to_output_comm_rank); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, d_label_to_output_comm_rank_view, (byte_t*)label_to_output_comm_rank, &ret_error); + + TEST_ASSERT(test_ret_value, + ret_code == CUGRAPH_SUCCESS, + "label_to_output_comm_rank copy_from_host failed."); + + h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, fan_out_size, INT32); + + cugraph_sampling_options_t* sampling_options; + ret_code = cugraph_sampling_options_create(&sampling_options, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "sampling_options create failed."); + + cugraph_sampling_set_with_replacement(sampling_options, with_replacement); + cugraph_sampling_set_return_hops(sampling_options, return_hops); + cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior); + cugraph_sampling_set_dedupe_sources(sampling_options, dedupe_sources); + cugraph_sampling_set_renumber_results(sampling_options, renumber_results); + cugraph_sampling_set_compression_type(sampling_options, compression); + cugraph_sampling_set_compress_per_hop(sampling_options, compress_per_hop); + + ret_code = cugraph_biased_neighbor_sample(handle, + graph, + NULL, + d_start_view, + d_start_labels_view, + d_label_list_view, + d_label_to_output_comm_rank_view, + NULL, + h_fan_out_view, + rng_state, + sampling_options, + FALSE, + &result, + &ret_error); + +#ifdef NO_CUGRAPH_OPS + TEST_ASSERT( + test_ret_value, ret_code != CUGRAPH_SUCCESS, "biased_neighbor_sample should have failed") +#else + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "biased_neighbor_sample failed."); + + cugraph_type_erased_device_array_view_t* result_srcs = NULL; + cugraph_type_erased_device_array_view_t* result_dsts = NULL; + cugraph_type_erased_device_array_view_t* result_edge_id = NULL; + cugraph_type_erased_device_array_view_t* result_weights = NULL; + cugraph_type_erased_device_array_view_t* result_hops = NULL; + cugraph_type_erased_device_array_view_t* result_offsets = NULL; + + result_srcs = cugraph_sample_result_get_sources(result); + result_dsts = cugraph_sample_result_get_destinations(result); + result_edge_id = cugraph_sample_result_get_edge_id(result); + result_weights = cugraph_sample_result_get_edge_weight(result); + result_hops = cugraph_sample_result_get_hop(result); + result_offsets = cugraph_sample_result_get_offsets(result); + + size_t result_size = cugraph_type_erased_device_array_view_size(result_srcs); + size_t result_offsets_size = cugraph_type_erased_device_array_view_size(result_offsets); + + vertex_t h_srcs[result_size]; + vertex_t h_dsts[result_size]; + edge_t h_edge_id[result_size]; + weight_t h_weight[result_size]; + int32_t h_hops[result_size]; + size_t h_result_offsets[result_offsets_size]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_srcs, result_srcs, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_dsts, result_dsts, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_edge_id, result_edge_id, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_weight, result_weights, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_offsets, result_offsets, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (int k = 0; k < result_offsets_size - 1; k += fan_out_size) { + for (int h = 0; h < fan_out_size; ++h) { + int hop_start = h_result_offsets[k + h]; + int hop_end = h_result_offsets[k + h + 1]; + for (int i = hop_start; i < hop_end; ++i) { + h_hops[i] = h; + } + } + } + + size_t num_local_labels = (result_offsets_size - 1) / fan_out_size; + + for (int k = 0; k < num_local_labels + 1; ++k) { + h_result_offsets[k] = h_result_offsets[k * fan_out_size]; + } + result_offsets_size = num_local_labels + 1; + + // NOTE: The C++ tester does a more thorough validation. For our purposes + // here we will do a simpler validation, merely checking that all edges + // are actually part of the graph + weight_t M_w[num_vertices][num_vertices]; + edge_t M_edge_id[num_vertices][num_vertices]; + + for (int i = 0; i < num_vertices; ++i) + for (int j = 0; j < num_vertices; ++j) { + M_w[i][j] = 0.0; + M_edge_id[i][j] = -1; + } + + for (int i = 0; i < num_edges; ++i) { + M_w[src[i]][dst[i]] = wgt[i]; + M_edge_id[src[i]][dst[i]] = edge_ids[i]; + } + + for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + M_w[h_srcs[i]][h_dsts[i]] == h_weight[i], + "biased_neighbor_sample got edge that doesn't exist"); + TEST_ASSERT(test_ret_value, + M_edge_id[h_srcs[i]][h_dsts[i]] == h_edge_id[i], + "biased_neighbor_sample got edge that doesn't exist"); + } + + TEST_ASSERT( + test_ret_value, result_offsets_size == expected_size[rank], "incorrect number of results"); + + for (int i = 0; i < (result_offsets_size - 1) && (test_ret_value == 0); ++i) { + for (int j = h_result_offsets[i]; j < (h_result_offsets[i + 1] - 1) && (test_ret_value == 0); + ++j) { + TEST_ASSERT(test_ret_value, h_hops[j] <= h_hops[j + 1], "Results not sorted by hop id"); + } + } + + cugraph_sample_result_free(result); +#endif + + cugraph_sg_graph_free(graph); + cugraph_error_free(ret_error); +} + +int test_biased_neighbor_sample_dedupe_sources(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t fan_out_size = 3; + size_t num_starts = 2; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 3, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}; + int32_t edge_types[] = {8, 7, 6, 5, 4, 3, 2, 1, 0}; + vertex_t start[] = {2, 3}; + int start_labels[] = {6, 12}; + int fan_out[] = {-1, -1, -1}; + + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = DEFAULT; + bool_t dedupe_sources = TRUE; + + return generic_biased_neighbor_sample_test(handle, + src, + dst, + weight, + edge_ids, + edge_types, + num_vertices, + num_edges, + start, + start_labels, + num_starts, + fan_out, + fan_out_size, + with_replacement, + return_hops, + prior_sources_behavior, + dedupe_sources); +} + +int test_biased_neighbor_sample_unique_sources(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t fan_out_size = 3; + size_t num_starts = 2; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}; + int32_t edge_types[] = {8, 7, 6, 5, 4, 3, 2, 1, 0}; + vertex_t start[] = {2, 3}; + int start_labels[] = {6, 12}; + int fan_out[] = {-1, -1, -1}; + + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = EXCLUDE; + bool_t dedupe_sources = FALSE; + + return generic_biased_neighbor_sample_test(handle, + src, + dst, + weight, + edge_ids, + edge_types, + num_vertices, + num_edges, + start, + start_labels, + num_starts, + fan_out, + fan_out_size, + with_replacement, + return_hops, + prior_sources_behavior, + dedupe_sources); +} + +int test_biased_neighbor_sample_carry_over_sources(const cugraph_resource_handle_t* handle) +{ + data_type_id_t vertex_tid = INT32; + data_type_id_t edge_tid = INT32; + data_type_id_t weight_tid = FLOAT32; + data_type_id_t edge_id_tid = INT32; + data_type_id_t edge_type_tid = INT32; + + size_t num_edges = 9; + size_t num_vertices = 6; + size_t fan_out_size = 3; + size_t num_starts = 2; + + vertex_t src[] = {0, 0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 2, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; + weight_t weight[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}; + int32_t edge_types[] = {8, 7, 6, 5, 4, 3, 2, 1, 0}; + vertex_t start[] = {2, 3}; + int start_labels[] = {6, 12}; + int fan_out[] = {-1, -1, -1}; + + int test_ret_value = 0; + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error = NULL; + + bool_t with_replacement = FALSE; + bool_t return_hops = TRUE; + cugraph_prior_sources_behavior_t prior_sources_behavior = CARRY_OVER; + bool_t dedupe_sources = FALSE; + + return generic_biased_neighbor_sample_test(handle, + src, + dst, + weight, + edge_ids, + edge_types, + num_vertices, + num_edges, + start, + start_labels, + num_starts, + fan_out, + fan_out_size, + with_replacement, + return_hops, + prior_sources_behavior, + dedupe_sources); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + void* raft_handle = create_mg_raft_handle(argc, argv); + cugraph_resource_handle_t* handle = cugraph_create_resource_handle(raft_handle); + + int result = 0; + result |= RUN_MG_TEST(test_biased_neighbor_sample, handle); + result |= RUN_MG_TEST(test_biased_neighbor_from_alex, handle); + // result |= RUN_MG_TEST(test_biased_neighbor_sample_alex_bug, handle); + result |= RUN_MG_TEST(test_biased_neighbor_sample_sort_by_hop, handle); + // result |= RUN_MG_TEST(test_biased_neighbor_sample_dedupe_sources, handle); + // result |= RUN_MG_TEST(test_biased_neighbor_sample_unique_sources, handle); + // result |= RUN_MG_TEST(test_biased_neighbor_sample_carry_over_sources, handle); + + cugraph_free_resource_handle(handle); + free_mg_raft_handle(raft_handle); + + return result; +} From dd5be663e4bedd73ede9f68c93cd92ea8bdefefe Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Tue, 16 Jul 2024 12:51:55 -0700 Subject: [PATCH 3/5] rename uniform_neighbor_sampling.cpp, since both uniform and biased sampling are in same file --- cpp/CMakeLists.txt | 2 +- .../{uniform_neighbor_sampling.cpp => neighbor_sampling.cpp} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/src/c_api/{uniform_neighbor_sampling.cpp => neighbor_sampling.cpp} (100%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cf511b1e08a..61ec34c3319 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -652,7 +652,7 @@ add_library(cugraph_c src/c_api/lookup_src_dst.cpp src/c_api/louvain.cpp src/c_api/triangle_count.cpp - src/c_api/uniform_neighbor_sampling.cpp + src/c_api/neighbor_sampling.cpp src/c_api/labeling_result.cpp src/c_api/weakly_connected_components.cpp src/c_api/strongly_connected_components.cpp diff --git a/cpp/src/c_api/uniform_neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp similarity index 100% rename from cpp/src/c_api/uniform_neighbor_sampling.cpp rename to cpp/src/c_api/neighbor_sampling.cpp From d3ec01627af90b604ed44b864e7955a3a6165776 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Tue, 16 Jul 2024 13:07:04 -0700 Subject: [PATCH 4/5] missed new header file --- cpp/src/c_api/properties.hpp | 44 ++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 cpp/src/c_api/properties.hpp diff --git a/cpp/src/c_api/properties.hpp b/cpp/src/c_api/properties.hpp new file mode 100644 index 00000000000..971607586bb --- /dev/null +++ b/cpp/src/c_api/properties.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace cugraph { +namespace c_api { + +typedef struct { + cugraph_data_type_id_t property_type_; + void* vertex_property_; +} cugraph_vertex_property_t; + +typedef struct { + cugraph_data_type_id_t property_type_; + void* edge_property_; +} cugraph_edge_property_t; + +typedef struct { + cugraph_data_type_id_t property_type_; + void* vertex_property_; +} cugraph_vertex_property_view_t; + +typedef struct { + cugraph_data_type_id_t property_type_; + void* edge_property_; +} cugraph_edge_property_view_t; + +} // namespace c_api +} // namespace cugraph From aae235785445b7792d444fa7ea551378a5519133 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Wed, 17 Jul 2024 12:12:50 -0700 Subject: [PATCH 5/5] add PLC bindings for biased sampling --- cpp/include/cugraph_c/properties.h | 79 +++++++++++++++---- .../pylibcugraph/_cugraph_c/properties.pxd | 29 +++++++ .../_cugraph_c/sampling_algorithms.pxd | 21 +++++ 3 files changed, 113 insertions(+), 16 deletions(-) create mode 100644 python/pylibcugraph/pylibcugraph/_cugraph_c/properties.pxd diff --git a/cpp/include/cugraph_c/properties.h b/cpp/include/cugraph_c/properties.h index e4f2a4b20a7..d7775bbf783 100644 --- a/cpp/include/cugraph_c/properties.h +++ b/cpp/include/cugraph_c/properties.h @@ -55,13 +55,14 @@ typedef struct { /** * @brief Create a vertex property * - * @param [in] handle Handle for accessing resources - * @param [in] graph Pointer to graph. - * @param [in] vertex_ids Device array of vertex ids - * @param [in] property Device array of vertex property - * @param [out] result Pointer to the location to store the pointer to the vertex property object - * @param [out] error Pointer to an error object storing details of any error. Will - * be populated if error code is not CUGRAPH_SUCCESS + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph. + * @param [in] vertex_ids Device array of vertex ids + * @param [in] property Device array of vertex property + * @param [in] default_property Device array of vertex property + * @param [out] result Pointer to the location to store the pointer to the vertex property object + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS * @return error code */ cugraph_error_code_t cugraph_vertex_property_create( @@ -69,20 +70,22 @@ cugraph_error_code_t cugraph_vertex_property_create( const cugraph_graph_t * graph, const cugraph_type_erased_device_array_t* vertex_ids, const cugraph_type_erased_device_array_t* properties, + const cugraph_type_erased_scalar_t* default_property, cugraph_vertex_property_t** result, cugraph_error_t** error); /** - * @brief Create a edge property + * @brief Create an edge property * - * @param [in] handle Handle for accessing resources - * @param [in] graph Pointer to graph. - * @param [in] lookup_container Lookup map - * @param [in] edge_ids Device array of edge ids - * @param [in] property Device array of edge property - * @param [out] result Pointer to the location to store the pointer to the edge property object - * @param [out] error Pointer to an error object storing details of any error. Will - * be populated if error code is not CUGRAPH_SUCCESS + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph. + * @param [in] lookup_container Lookup map + * @param [in] edge_ids Device array of edge ids + * @param [in] property Device array of edge property + * @param [in] default_property Device array of vertex property + * @param [out] result Pointer to the location to store the pointer to the edge property object + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS * @return error code */ cugraph_error_code_t cugraph_edge_property_create( @@ -91,9 +94,53 @@ cugraph_error_code_t cugraph_edge_property_create( const cugraph_lookup_container_t* lookup_container, const cugraph_type_erased_device_array_t* edge_ids, const cugraph_type_erased_device_array_t* properties, + const cugraph_type_erased_scalar_t* default_property, cugraph_edge_property_t** result, cugraph_error_t** error); +/** + * @brief Update an existing vertex property + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph. + * @param [in] vertex_ids Device array of vertex ids to update + * @param [in] property Device array of vertex properties to update + * @param [in/out] result Pointer to the vertex property object to update + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_vertex_property_update( + const cugraph_resource_handle_t* handle, + const cugraph_graph_t * graph, + const cugraph_type_erased_device_array_t* vertex_ids, + const cugraph_type_erased_device_array_t* properties, + const cugraph_type_erased_scalar_t* default_property, + cugraph_vertex_property_view_t* result, + cugraph_error_t** error); + +/** + * @brief Update an existing edge property + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph. + * @param [in] lookup_container Lookup map + * @param [in] edge_ids Device array of edge ids to update + * @param [in] property Device array of edge properties to update + * @param [in/out] result Pointer to the edge property object to update + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_edge_property_create( + const cugraph_resource_handle_t* handle, + const cugraph_graph_t * graph, + const cugraph_lookup_container_t* lookup_container, + const cugraph_type_erased_device_array_t* edge_ids, + const cugraph_type_erased_device_array_t* properties, + cugraph_edge_property_view_t* result, + cugraph_error_t** error); + /** * @brief Create a vertex_property_view from a vertex property * diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/properties.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/properties.pxd new file mode 100644 index 00000000000..2838de3d0ab --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/properties.pxd @@ -0,0 +1,29 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +cdef extern from "cugraph_c/properties.h": + + ctypedef struct cugraph_vertex_property_t: + pass + + ctypedef struct cugraph_edge_property_t: + pass + + ctypedef struct cugraph_vertex_property_view_t: + pass + + ctypedef struct cugraph_edge_property_view_t: + pass diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd index dbd3ef4b7e1..0f852d9cecd 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd @@ -40,6 +40,10 @@ from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_t, ) +from pylibcugraph._cugraph_c.properties cimport ( + cugraph_edge_property_view_t, +) + cdef extern from "cugraph_c/sampling_algorithms.h": ########################################################################### @@ -59,6 +63,23 @@ cdef extern from "cugraph_c/sampling_algorithms.h": cugraph_error_t** error ) + cdef cugraph_error_code_t cugraph_biased_neighbor_sample( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_edge_property_view_t* edge_biases, + const cugraph_type_erased_device_array_view_t* start_vertices, + const cugraph_type_erased_device_array_view_t* start_vertex_labels, + const cugraph_type_erased_device_array_view_t* label_list, + const cugraph_type_erased_device_array_view_t* label_to_comm_rank, + const cugraph_type_erased_device_array_view_t* label_offsets, + const cugraph_type_erased_host_array_view_t* fan_out, + cugraph_rng_state_t* rng_state, + const cugraph_sampling_options_t* options, + bool_t do_expensive_check, + cugraph_sample_result_t** result, + cugraph_error_t** error + ) + cdef cugraph_error_code_t cugraph_test_uniform_neighborhood_sample_result_create( const cugraph_resource_handle_t* handle, const cugraph_type_erased_device_array_view_t* srcs,