From eae4585e617bf82613069ded641d8de735bff36e Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Fri, 8 Jul 2022 13:26:33 -0400 Subject: [PATCH] Delete old nbr sampling software (#2371) Time ran out during 22.06 development to replace the old code. This PR renames the experimental version of uniform neighborhood sampling to the regular name and eliminates the original implementation. Tagged as `breaking` since we eliminate a function from the C++ API. Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/2371 --- cpp/CMakeLists.txt | 2 - cpp/include/cugraph/algorithms.hpp | 36 - .../detail/decompress_edge_partition.cuh | 16 +- .../cugraph/detail/graph_functions.cuh | 243 ------ cpp/include/cugraph_c/sampling_algorithms.h | 33 - cpp/src/c_api/uniform_neighbor_sampling.cpp | 137 +--- cpp/src/sampling/detail/gather_utils_impl.cu | 382 --------- cpp/src/sampling/detail/gather_utils_impl.cuh | 775 ------------------ cpp/src/sampling/detail/graph_functions.hpp | 3 +- .../sampling/detail/sampling_utils_impl.cuh | 108 +-- cpp/src/sampling/detail/sampling_utils_mg.cu | 54 +- cpp/src/sampling/detail/sampling_utils_sg.cu | 54 +- cpp/src/sampling/nbr_sampling_impl.cuh | 556 ------------- cpp/src/sampling/nbr_sampling_mg.cu | 106 --- .../uniform_neighbor_sampling_impl.hpp | 7 +- cpp/tests/CMakeLists.txt | 4 - .../c_api/mg_uniform_neighbor_sample_test.c | 202 +---- .../c_api/uniform_neighbor_sample_test.c | 213 +---- .../sampling/detail/mg_gather_one_hop.cu | 152 ++-- cpp/tests/sampling/detail/mg_gather_utils.cu | 258 +++--- .../sampling/detail/nbr_sampling_utils.cuh | 1 - .../sampling/mg_uniform_neighbor_sampling.cu | 31 +- .../sampling/sg_uniform_neighbor_sampling.cu | 4 +- cpp/tests/utilities/device_comm_wrapper.cu | 58 +- cpp/tests/utilities/device_comm_wrapper.hpp | 25 +- .../pylibcugraph/_cugraph_c/algorithms.pxd | 2 +- .../pylibcugraph/uniform_neighbor_sample.pyx | 4 +- 27 files changed, 421 insertions(+), 3045 deletions(-) delete mode 100644 cpp/include/cugraph/detail/graph_functions.cuh delete mode 100644 cpp/src/sampling/detail/gather_utils_impl.cu delete mode 100644 cpp/src/sampling/detail/gather_utils_impl.cuh delete mode 100644 cpp/src/sampling/nbr_sampling_impl.cuh delete mode 100644 cpp/src/sampling/nbr_sampling_mg.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index d0faeafca76..5231cc7dd04 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -186,10 +186,8 @@ add_library(cugraph src/community/legacy/egonet.cu src/sampling/neighborhood.cu src/sampling/random_walks.cu - src/sampling/detail/gather_utils_impl.cu src/sampling/detail/sampling_utils_mg.cu src/sampling/detail/sampling_utils_sg.cu - src/sampling/nbr_sampling_mg.cu src/sampling/uniform_neighbor_sampling_mg.cpp src/sampling/uniform_neighbor_sampling_sg.cpp src/cores/legacy/core_number.cu diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 6664978a3a1..bbfffe09466 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -1468,42 +1468,6 @@ void core_number(raft::handle_t const& handle, size_t k_last = std::numeric_limits::max(), bool do_expensive_check = false); -/** - * @brief Multi-GPU Uniform Neighborhood Sampling. - * @deprecated will be removed later in this release (22.06) - * - * @tparam graph_view_t Type of graph view. - * @tparam gpu_t Type of rank (GPU) indices; - * @tparam index_t Type used for indexing; typically edge_t - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Graph View object to generate NBR Sampling on. - * @param ptr_d_starting_vertices Device array of starting vertex IDs for the NBR Sampling. - * @param ptr_d_ranks Device array of: rank IDs (GPU IDs) for the NBR Sampling. - * @param num_starting_vertices size of starting vertex set - * @param h_fan_out vector of branching out (fan-out) degree per source vertex for each level - * parameter used for obtaining local out-degree information - * @param with_replacement boolean flag specifying if random sampling is done with replacement - * (true); or, without replacement (false); default = true; - * @return tuple of tuple of device vectors and counts: - * ((vertex_t source_vertex, vertex_t destination_vertex, int rank, edge_t index), rx_counts) - */ -template -std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector>, - std::vector> -uniform_nbr_sample(raft::handle_t const& handle, - graph_view_t const& graph_view, - typename graph_view_t::vertex_type const* ptr_d_starting_vertices, - gpu_t const* ptr_d_ranks, - size_t num_starting_vertices, - std::vector const& h_fan_out, - bool with_replacement = true); - /** * @brief Uniform Neighborhood Sampling. * diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh index afe841e31cb..520c4272bce 100644 --- a/cpp/include/cugraph/detail/decompress_edge_partition.cuh +++ b/cpp/include/cugraph/detail/decompress_edge_partition.cuh @@ -255,7 +255,7 @@ __global__ void partially_decompress_to_edgelist_mid_degree( edge_partition.local_edges(major_partition_offset); auto major_offset = input_major_start_offsets[idx]; - for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { output_majors[major_offset + i] = major; output_minors[major_offset + i] = indices[i]; @@ -290,7 +290,10 @@ void partially_decompress_edge_partition_to_fill_edgelist( vertex_t* minors, thrust::optional weights, thrust::optional> property, - thrust::optional> global_edge_index) + thrust::optional> global_edge_index, + // FIXME: Once PR 2356 is merged, this parameter could go away because + // major_hypersparse_first will be part of edge_partition + std::optional> local_edge_partition_segment_offsets) { auto execution_policy = handle.get_thrust_policy(); static_assert(detail::num_sparse_segments_per_vertex_partition == 3); @@ -408,6 +411,10 @@ void partially_decompress_edge_partition_to_fill_edgelist( ? thrust::make_optional(thrust::make_tuple( thrust::get<0>(*property) + segment_offsets[3], thrust::get<1>(*property))) : thrust::nullopt, + // FIXME: Once PR 2356 is merged, this parameter could go away because + // major_hypersparse_first will be part of edge_partition + segment_offsets_last = + (*local_edge_partition_segment_offsets)[detail::num_sparse_segments_per_vertex_partition], global_edge_index] __device__(auto idx) { auto major = input_majors[idx]; auto major_offset = input_major_start_offsets[idx]; @@ -416,7 +423,10 @@ void partially_decompress_edge_partition_to_fill_edgelist( vertex_t const* indices{nullptr}; thrust::optional weights{thrust::nullopt}; edge_t local_degree{}; - thrust::tie(indices, weights, local_degree) = edge_partition.local_edges(*major_idx); + // FIXME: Once PR 2356 is merged, this computation should be changed to use + // major_hypersparse_first which will be part of edge_partition + thrust::tie(indices, weights, local_degree) = + edge_partition.local_edges(segment_offsets_last + *major_idx); thrust::fill( thrust::seq, majors + major_offset, majors + major_offset + local_degree, major); thrust::copy(thrust::seq, indices, indices + local_degree, minors + major_offset); diff --git a/cpp/include/cugraph/detail/graph_functions.cuh b/cpp/include/cugraph/detail/graph_functions.cuh deleted file mode 100644 index 049c7ba2a05..00000000000 --- a/cpp/include/cugraph/detail/graph_functions.cuh +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -namespace cugraph { -namespace detail { -namespace original { - -/** - * @brief Compute local out degrees of the majors belonging to the adjacency matrices - * stored on each gpu - * - * Iterate through partitions and store their local degrees - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @return A single vector containing the local out degrees of the majors belong to the adjacency - * matrices - */ -template -rmm::device_uvector compute_local_major_degrees( - raft::handle_t const& handle, GraphViewType const& graph_view); - -/** - * @brief Calculate global degree information for all vertices represented by current gpu - * - * Calculate local degree and perform row wise exclusive scan over all gpus in column - * communicator. - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @return Tuple of two device vectors. The first one contains per source edge-count encountered - * by gpus in the column communicator before current gpu. The second device vector contains the - * global out degree for every source represented by current gpu - */ -template -std::tuple, - rmm::device_uvector> -get_global_degree_information(raft::handle_t const& handle, GraphViewType const& graph_view); - -/** - * @brief Calculate global adjacency offset for all majors represented by current gpu - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param[in] global_degree_offsets Global degree offset to local adjacency list for every major - * represented by current gpu - * @param global_out_degrees Global out degrees for every source represented by current gpu - * @return Device vector containing the number of edges that are prior to the adjacency list of - * every major that can be represented by the current gpu - */ -template -rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - GraphViewType const& graph_view, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_out_degrees); - -/** - * @brief Gather active majors and associated client gpu ids across gpus in a - * column communicator - * - * Collect all the vertex ids and client gpu ids to be processed by every gpu in - * the column communicator and call sort on the list. - * - * @tparam vertex_t Type of vertex indices. - * @tparam VertexIterator Type of the iterator for vertex identifiers. - * @tparam GPUIdIterator Type of the iterator for gpu id identifiers. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param vertex_input_first Iterator pointing to the first vertex id to be processed - * @param vertex_input_last Iterator pointing to the last (exclusive) vertex id to be processed - * @param gpu_id_first Iterator pointing to the first gpu id to be processed - * @return Device vector containing all the vertices that are to be processed by every gpu - * in the column communicator - */ -template -std::tuple, - rmm::device_uvector::value_type>> -gather_active_majors(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexIterator vertex_input_first, - VertexIterator vertex_input_last, - GPUIdIterator gpu_id_first); - -/** - * @brief Return global out degrees of active majors - * - * Get partition information of all graph partitions on the gpu and select - * global degrees of all active majors - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param active_majors Device vector containing all the vertex id that are processed by - * gpus in the column communicator - * @param global_out_degrees Global out degrees for every source represented by current gpu - * @return Global out degrees of all majors in active_majors - */ -template -rmm::device_uvector get_active_major_global_degrees( - raft::handle_t const& handle, - GraphViewType const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& global_out_degrees); - -/** - * @brief Return partition information of all vertex ids of all the partitions belonging to a gpu - * - * Iterate through partitions and store the starting vertex ids, exclusive scan of vertex counts, - * offsets and indices of the partitions csr structure - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @return Tuple of device vectors. The first vector contains all the partitions related to the - * gpu. The second and third vectors contain starting and ending vertex ids of all the partitions - * belonging to the gpu. The fourth vector contains the starting vertex id of the hypersparse - * region in each partition. The fifth vector denotes the vertex count offset (how many vertices - * are dealt with by the previous partitions. - */ -template -std::tuple>, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -partition_information(raft::handle_t const& handle, GraphViewType const& graph_view); - -/** - * @brief Gather valid edges present on the current gpu - * - * Collect all the edges that are present in the adjacency lists on the current gpu - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam gpu_t Type of gpu id identifiers. - * @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, - * and handles to various CUDA libraries) to run graph algorithms. - * @param[in] graph_view Non-owning graph object. - * @param[in] active_majors Device vector containing all the vertex id that are processed by - * gpus in the column communicator - * @param[in] active_major_gpu_ids Device vector containing the gpu id associated by every vertex - * present in active_majors - * @param[in] minor_map Device vector of minor indices (modifiable in-place) corresponding to - * vertex IDs being returned - * @param[in] indices_per_major Number of indices supplied for every major in the range - * [vertex_input_first, vertex_input_last) - * @param[in] global_degree_offsets Global degree offset to local adjacency list for every major - * represented by current gpu - * @return A tuple of device vector containing the majors, minors, gpu_ids and indices gathered - * locally - */ -template -std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_local_edges( - raft::handle_t const& handle, - GraphViewType const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - rmm::device_uvector&& minor_map, - typename GraphViewType::edge_type indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); - -/** - * @brief Gather edge list for specified vertices - * - * Collect all the edges that are present in the adjacency lists on the current gpu - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam prop_t Type of the property associated with the majors. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param active_majors Device vector containing all the vertex id that are processed by - * gpus in the column communicator - * @param active_major_property Device vector containing the property values associated by every - * vertex present in active_majors - * @return A tuple of device vector containing the majors, minors and properties gathered locally - */ -template -std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_one_hop_edgelist( - raft::handle_t const& handle, - GraphViewType const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_property, - const rmm::device_uvector& global_adjacency_list_offsets); - -} // namespace original -} // namespace detail -} // namespace cugraph diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index 16c1a9011d4..dbefac81742 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -122,9 +122,6 @@ typedef struct { * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage * needs to be transposed * @param [in] start Device array of start vertices for the sampling - * @param [in] start_label Device array of start labels. These labels will propagate to the - * results so that the result can be properly organized when the input needs to be sent back to - * different callers (different processes or different gpus). * @param [in] fanout Host array defining the fan out at each step in the sampling algorithm * @param [in] with_replacement * Boolean value. If true selection of edges is done with @@ -136,37 +133,7 @@ typedef struct { * be populated if error code is not CUGRAPH_SUCCESS * @return error code */ -// FIXME: This older API will be phased out this release in favor of the experimental one below cugraph_error_code_t cugraph_uniform_neighbor_sample( - const cugraph_resource_handle_t* handle, - cugraph_graph_t* graph, - const cugraph_type_erased_device_array_view_t* start, - const cugraph_type_erased_device_array_view_t* start_label, - const cugraph_type_erased_host_array_view_t* fan_out, - bool_t with_replacement, - bool_t do_expensive_check, - cugraph_sample_result_t** result, - cugraph_error_t** error); - -/** - * @brief Uniform Neighborhood Sampling - * - * @param [in] handle Handle for accessing resources - * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage - * needs to be transposed - * @param [in] start Device array of start vertices for the sampling - * @param [in] fanout Host array defining the fan out at each step in the sampling algorithm - * @param [in] with_replacement - * Boolean value. If true selection of edges is done with - * replacement. If false selection is done without replacement. - * @param [in] do_expensive_check - * A flag to run expensive checks for input arguments (if set to true) - * @param [in] result Output from the uniform_neighbor_sample call - * @param [out] error Pointer to an error object storing details of any error. Will - * be populated if error code is not CUGRAPH_SUCCESS - * @return error code - */ -cugraph_error_code_t cugraph_experimental_uniform_neighbor_sample( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start, diff --git a/cpp/src/c_api/uniform_neighbor_sampling.cpp b/cpp/src/c_api/uniform_neighbor_sampling.cpp index 612284c93c8..ed458eaf1cd 100644 --- a/cpp/src/c_api/uniform_neighbor_sampling.cpp +++ b/cpp/src/c_api/uniform_neighbor_sampling.cpp @@ -32,10 +32,11 @@ namespace cugraph { namespace c_api { struct cugraph_sample_result_t { - bool experimental_{true}; cugraph_type_erased_device_array_t* src_{nullptr}; cugraph_type_erased_device_array_t* dst_{nullptr}; // FIXME: Will be deleted once experimental replaces current + // NOTE: Leaving in place while we discuss some future changes, although + // not currently used. cugraph_type_erased_device_array_t* label_{nullptr}; cugraph_type_erased_device_array_t* index_{nullptr}; // FIXME: Will be deleted once experimental replaces current @@ -53,7 +54,6 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct raft::handle_t const& handle_; cugraph::c_api::cugraph_graph_t* graph_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_{nullptr}; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_label_{nullptr}; cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; bool with_replacement_{false}; bool do_expensive_check_{false}; @@ -62,125 +62,9 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct uniform_neighbor_sampling_functor(cugraph_resource_handle_t const* handle, cugraph_graph_t* graph, cugraph_type_erased_device_array_view_t const* start, - cugraph_type_erased_device_array_view_t const* start_label, cugraph_type_erased_host_array_view_t const* fan_out, bool with_replacement, bool do_expensive_check) - : abstract_functor(), - handle_(*reinterpret_cast(handle)->handle_), - graph_(reinterpret_cast(graph)), - start_( - reinterpret_cast(start)), - start_label_(reinterpret_cast( - start_label)), - fan_out_( - reinterpret_cast(fan_out)), - with_replacement_(with_replacement), - do_expensive_check_(do_expensive_check) - { - } - - template - void operator()() - { - // FIXME: Think about how to handle SG vice MG - if constexpr (!cugraph::is_candidate::value) { - unsupported(); - } else if constexpr (!multi_gpu) { - unsupported(); - } else { - // uniform_nbr_sample expects store_transposed == false - if constexpr (store_transposed) { - error_code_ = cugraph::c_api:: - transpose_storage( - handle_, graph_, error_.get()); - if (error_code_ != CUGRAPH_SUCCESS) return; - } - - auto graph = - reinterpret_cast*>( - graph_->graph_); - - auto graph_view = graph->view(); - - auto number_map = reinterpret_cast*>(graph_->number_map_); - - rmm::device_uvector start(start_->size_, handle_.get_stream()); - raft::copy(start.data(), start_->as_type(), start.size(), handle_.get_stream()); - - // - // Need to renumber sources - // - cugraph::renumber_ext_vertices( - handle_, - start.data(), - start.size(), - number_map->data(), - graph_view.local_vertex_partition_range_first(), - graph_view.local_vertex_partition_range_last(), - false); - - // C++ API wants an std::vector - std::vector fan_out(fan_out_->size_); - std::copy_n(fan_out_->as_type(), fan_out_->size_, fan_out.data()); - - auto&& [tmp_tuple, counts] = cugraph::uniform_nbr_sample(handle_, - graph_view, - start.data(), - start_label_->as_type(), - start.size(), - fan_out, - with_replacement_); - - auto&& [srcs, dsts, labels, indices] = tmp_tuple; - - std::vector vertex_partition_lasts = graph_view.vertex_partition_range_lasts(); - - cugraph::unrenumber_int_vertices(handle_, - srcs.data(), - srcs.size(), - number_map->data(), - vertex_partition_lasts, - do_expensive_check_); - - cugraph::unrenumber_int_vertices(handle_, - dsts.data(), - dsts.size(), - number_map->data(), - vertex_partition_lasts, - do_expensive_check_); - - result_ = new cugraph::c_api::cugraph_sample_result_t{ - false, - new cugraph::c_api::cugraph_type_erased_device_array_t(srcs, graph_->vertex_type_), - new cugraph::c_api::cugraph_type_erased_device_array_t(dsts, graph_->vertex_type_), - new cugraph::c_api::cugraph_type_erased_device_array_t(labels, start_label_->type_), - new cugraph::c_api::cugraph_type_erased_device_array_t(indices, graph_->edge_type_), - new cugraph::c_api::cugraph_type_erased_host_array_t(counts, graph_->vertex_type_)}; - } - } -}; - -struct experimental_uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_functor { - raft::handle_t const& handle_; - cugraph::c_api::cugraph_graph_t* graph_{nullptr}; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_{nullptr}; - cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; - bool with_replacement_{false}; - bool do_expensive_check_{false}; - cugraph::c_api::cugraph_sample_result_t* result_{nullptr}; - - experimental_uniform_neighbor_sampling_functor( - cugraph_resource_handle_t const* handle, - cugraph_graph_t* graph, - cugraph_type_erased_device_array_view_t const* start, - cugraph_type_erased_host_array_view_t const* fan_out, - bool with_replacement, - bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), graph_(reinterpret_cast(graph)), @@ -259,7 +143,6 @@ struct experimental_uniform_neighbor_sampling_functor : public cugraph::c_api::a do_expensive_check_); result_ = new cugraph::c_api::cugraph_sample_result_t{ - true, new cugraph::c_api::cugraph_type_erased_device_array_t(srcs, graph_->vertex_type_), new cugraph::c_api::cugraph_type_erased_device_array_t(dsts, graph_->vertex_type_), nullptr, @@ -276,7 +159,6 @@ extern "C" cugraph_error_code_t cugraph_uniform_neighbor_sample( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start, - const cugraph_type_erased_device_array_view_t* start_labels, const cugraph_type_erased_host_array_view_t* fan_out, bool_t with_replacement, bool_t do_expensive_check, @@ -284,21 +166,6 @@ extern "C" cugraph_error_code_t cugraph_uniform_neighbor_sample( cugraph_error_t** error) { uniform_neighbor_sampling_functor functor{ - handle, graph, start, start_labels, fan_out, with_replacement, do_expensive_check}; - return cugraph::c_api::run_algorithm(graph, functor, result, error); -} - -extern "C" cugraph_error_code_t cugraph_experimental_uniform_neighbor_sample( - const cugraph_resource_handle_t* handle, - cugraph_graph_t* graph, - const cugraph_type_erased_device_array_view_t* start, - const cugraph_type_erased_host_array_view_t* fan_out, - bool_t with_replacement, - bool_t do_expensive_check, - cugraph_sample_result_t** result, - cugraph_error_t** error) -{ - experimental_uniform_neighbor_sampling_functor functor{ handle, graph, start, fan_out, with_replacement, do_expensive_check}; return cugraph::c_api::run_algorithm(graph, functor, result, error); } diff --git a/cpp/src/sampling/detail/gather_utils_impl.cu b/cpp/src/sampling/detail/gather_utils_impl.cu deleted file mode 100644 index 3c8a7e2d16b..00000000000 --- a/cpp/src/sampling/detail/gather_utils_impl.cu +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -namespace cugraph { -namespace detail { -namespace original { - -template rmm::device_uvector compute_local_major_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view); - -template rmm::device_uvector compute_local_major_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view); - -template rmm::device_uvector compute_local_major_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view); - -template rmm::device_uvector compute_local_major_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view); - -template rmm::device_uvector compute_local_major_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view); - -template rmm::device_uvector compute_local_major_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple, rmm::device_uvector> -get_global_degree_information(raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple, rmm::device_uvector> -get_global_degree_information( - raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple, rmm::device_uvector> -get_global_degree_information(raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple, rmm::device_uvector> -get_global_degree_information( - raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple, rmm::device_uvector> -get_global_degree_information(raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple, rmm::device_uvector> -get_global_degree_information( - raft::handle_t const& handle, - graph_view_t const& graph_view); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template std::tuple, rmm::device_uvector> -gather_active_majors(raft::handle_t const& handle, - graph_view_t const& graph_view, - int32_t const* vertex_input_first, - int32_t const* vertex_input_last, - int32_t const* gpu_id_first); - -template std::tuple, rmm::device_uvector> -gather_active_majors(raft::handle_t const& handle, - graph_view_t const& graph_view, - int32_t const* vertex_input_first, - int32_t const* vertex_input_last, - int32_t const* gpu_id_first); - -template std::tuple, rmm::device_uvector> -gather_active_majors(raft::handle_t const& handle, - graph_view_t const& graph_view, - int32_t const* vertex_input_first, - int32_t const* vertex_input_last, - int32_t const* gpu_id_first); - -template std::tuple, rmm::device_uvector> -gather_active_majors(raft::handle_t const& handle, - graph_view_t const& graph_view, - int32_t const* vertex_input_first, - int32_t const* vertex_input_last, - int32_t const* gpu_id_first); - -template std::tuple, rmm::device_uvector> -gather_active_majors(raft::handle_t const& handle, - graph_view_t const& graph_view, - int64_t const* vertex_input_first, - int64_t const* vertex_input_last, - int32_t const* gpu_id_first); - -template std::tuple, rmm::device_uvector> -gather_active_majors(raft::handle_t const& handle, - graph_view_t const& graph_view, - int64_t const* vertex_input_first, - int64_t const* vertex_input_last, - int32_t const* gpu_id_first); - -template rmm::device_uvector get_active_major_global_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& global_out_degrees); - -template rmm::device_uvector get_active_major_global_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& global_out_degrees); - -template rmm::device_uvector get_active_major_global_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& global_out_degrees); - -template rmm::device_uvector get_active_major_global_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& global_out_degrees); - -template rmm::device_uvector get_active_major_global_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& global_out_degrees); - -template rmm::device_uvector get_active_major_global_degrees( - raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& global_out_degrees); - -template std::tuple< - rmm::device_uvector>, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -partition_information(raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple< - rmm::device_uvector>, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -partition_information(raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple< - rmm::device_uvector>, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -partition_information(raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple< - rmm::device_uvector>, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -partition_information(raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple< - rmm::device_uvector>, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -partition_information(raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple< - rmm::device_uvector>, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -partition_information(raft::handle_t const& handle, - graph_view_t const& graph_view); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_local_edges(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - rmm::device_uvector&& minor_map, - int32_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_local_edges(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - rmm::device_uvector&& minor_map, - int32_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_local_edges(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - rmm::device_uvector&& minor_map, - int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_local_edges(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - rmm::device_uvector&& minor_map, - int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_local_edges(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - rmm::device_uvector&& minor_map, - int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_local_edges(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - rmm::device_uvector&& minor_map, - int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_one_hop_edgelist(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_one_hop_edgelist(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_one_hop_edgelist(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_one_hop_edgelist(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_one_hop_edgelist(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - const rmm::device_uvector& global_adjacency_list_offsets); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_one_hop_edgelist(raft::handle_t const& handle, - graph_view_t const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - const rmm::device_uvector& global_adjacency_list_offsets); - -} // namespace original -} // namespace detail -} // namespace cugraph diff --git a/cpp/src/sampling/detail/gather_utils_impl.cuh b/cpp/src/sampling/detail/gather_utils_impl.cuh deleted file mode 100644 index fe8f04adcbb..00000000000 --- a/cpp/src/sampling/detail/gather_utils_impl.cuh +++ /dev/null @@ -1,775 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -namespace cugraph { -namespace detail { -namespace original { - -template -rmm::device_uvector compute_local_major_degrees( - raft::handle_t const& handle, GraphViewType const& graph_view) -{ - static_assert(GraphViewType::is_storage_transposed == false); - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - - rmm::device_uvector local_degrees(GraphViewType::is_storage_transposed - ? graph_view.local_edge_partition_dst_range_size() - : graph_view.local_edge_partition_src_range_size(), - handle.get_stream()); - - // FIXME optimize for communication - // local_edge_partition_src_range_size == summation of major_range_size() of all partitions - // belonging to the gpu - vertex_t partial_offset{0}; - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - auto edge_partition = - edge_partition_device_view_t( - graph_view.local_edge_partition_view(i)); - - // Check if hypersparse segment is present in the partition - auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); - auto use_dcs = segment_offsets - ? ((*segment_offsets).size() > (num_sparse_segments_per_vertex_partition + 1)) - : false; - - if (use_dcs) { - auto major_hypersparse_first = edge_partition.major_range_first() + - (*segment_offsets)[num_sparse_segments_per_vertex_partition]; - // Calculate degrees in sparse region - auto sparse_begin = local_degrees.begin() + partial_offset; - auto sparse_end = local_degrees.begin() + partial_offset + - (major_hypersparse_first - edge_partition.major_range_first()); - ; - - thrust::tabulate(handle.get_thrust_policy(), - sparse_begin, - sparse_end, - [offsets = edge_partition.offsets()] __device__(auto i) { - return offsets[i + 1] - offsets[i]; - }); - - // Calculate degrees in hypersparse region - auto dcs_nzd_vertex_count = *(edge_partition.dcs_nzd_vertex_count()); - // Initialize hypersparse region degrees as 0 - thrust::fill(handle.get_thrust_policy(), - sparse_end, - sparse_begin + edge_partition.major_range_size(), - edge_t{0}); - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(dcs_nzd_vertex_count), - [major_hypersparse_first, - major_range_first = edge_partition.major_range_first(), - vertex_ids = *(edge_partition.dcs_nzd_vertices()), - offsets = edge_partition.offsets(), - local_degrees = thrust::raw_pointer_cast(sparse_begin)] __device__(auto i) { - auto d = offsets[(major_hypersparse_first - major_range_first) + i + 1] - - offsets[(major_hypersparse_first - major_range_first) + i]; - auto v = vertex_ids[i]; - local_degrees[v - major_range_first] = d; - }); - } else { - auto sparse_begin = local_degrees.begin() + partial_offset; - auto sparse_end = local_degrees.begin() + partial_offset + edge_partition.major_range_size(); - thrust::tabulate(handle.get_thrust_policy(), - sparse_begin, - sparse_end, - [offsets = edge_partition.offsets()] __device__(auto i) { - return offsets[i + 1] - offsets[i]; - }); - } - partial_offset += edge_partition.major_range_size(); - } - return local_degrees; -} - -template -rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - GraphViewType const& graph_view, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_out_degrees) -{ - static_assert(GraphViewType::is_multi_gpu == true); - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - // auto const& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_size = col_comm.get_size(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_rank = row_comm.get_rank(); - auto const row_size = row_comm.get_size(); - - rmm::device_uvector global_adjacency_list_offsets(global_degree_offsets.size(), - handle.get_stream()); - - edge_t edge_count_in_all_previous_partitions{0}; - vertex_t vertex_offset{0}; - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - auto edge_partition = - edge_partition_device_view_t( - graph_view.local_edge_partition_view(i)); - auto edge_counts = - cugraph::host_scalar_allgather(comm, edge_partition.number_of_edges(), handle.get_stream()); - edge_t partial_edge_count{0}; - for (int r = 0; r < row_rank; ++r) { - for (int c = 0; c < col_size; ++c) { - partial_edge_count += edge_counts[r + c * row_size]; - } - } - thrust::exclusive_scan( - handle.get_thrust_policy(), - global_out_degrees.cbegin() + vertex_offset, - global_out_degrees.cbegin() + vertex_offset + edge_partition.major_range_size(), - global_adjacency_list_offsets.begin() + vertex_offset); - - thrust::transform( - handle.get_thrust_policy(), - global_adjacency_list_offsets.cbegin() + vertex_offset, - global_adjacency_list_offsets.cbegin() + vertex_offset + edge_partition.major_range_size(), - global_degree_offsets.cbegin() + vertex_offset, - global_adjacency_list_offsets.begin() + vertex_offset, - [offset = edge_count_in_all_previous_partitions + partial_edge_count] __device__( - auto val0, auto val1) { return val0 + val1 + offset; }); - - edge_count_in_all_previous_partitions += - std::accumulate(edge_counts.begin(), edge_counts.end(), edge_t{0}); - vertex_offset += edge_partition.major_range_size(); - } - return global_adjacency_list_offsets; -} - -template -std::tuple, - rmm::device_uvector> -get_global_degree_information(raft::handle_t const& handle, GraphViewType const& graph_view) -{ - static_assert(GraphViewType::is_multi_gpu == true); - using edge_t = typename GraphViewType::edge_type; - auto local_degrees = compute_local_major_degrees(handle, graph_view); - - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_size = col_comm.get_size(); - auto const col_rank = col_comm.get_rank(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_size = row_comm.get_size(); - - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - - rmm::device_uvector temp_input(local_degrees.size(), handle.get_stream()); - raft::update_device( - temp_input.data(), local_degrees.data(), local_degrees.size(), handle.get_stream()); - - rmm::device_uvector recv_data(local_degrees.size(), handle.get_stream()); - if (col_rank == 0) { - thrust::fill(handle.get_thrust_policy(), recv_data.begin(), recv_data.end(), edge_t{0}); - } - for (int i = 0; i < col_size - 1; ++i) { - if (col_rank == i) { - comm.device_send( - temp_input.begin(), temp_input.size(), comm_rank + row_size, handle.get_stream()); - } - if (col_rank == i + 1) { - comm.device_recv( - recv_data.begin(), recv_data.size(), comm_rank - row_size, handle.get_stream()); - thrust::transform(handle.get_thrust_policy(), - temp_input.begin(), - temp_input.end(), - recv_data.begin(), - temp_input.begin(), - thrust::plus()); - } - col_comm.barrier(); - } - // Get global degrees - device_bcast(col_comm, - temp_input.begin(), - temp_input.begin(), - temp_input.size(), - col_size - 1, - handle.get_stream()); - - return std::make_tuple(std::move(recv_data), std::move(temp_input)); -} - -template -std::tuple, - rmm::device_uvector::value_type>> -gather_active_majors(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexIterator vertex_input_first, - VertexIterator vertex_input_last, - GPUIdIterator gpu_id_first) -{ - static_assert(GraphViewType::is_multi_gpu == true); - static_assert(GraphViewType::is_storage_transposed == false); - using gpu_t = typename std::iterator_traits::value_type; - using vertex_t = typename GraphViewType::vertex_type; - - auto const& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - size_t source_count = thrust::distance(vertex_input_first, vertex_input_last); - auto external_source_counts = - cugraph::host_scalar_allgather(col_comm, source_count, handle.get_stream()); - auto total_external_source_count = - std::accumulate(external_source_counts.begin(), external_source_counts.end(), size_t{0}); - std::vector displacements(external_source_counts.size(), size_t{0}); - std::exclusive_scan( - external_source_counts.begin(), external_source_counts.end(), displacements.begin(), size_t{0}); - - rmm::device_uvector active_majors(total_external_source_count, handle.get_stream()); - rmm::device_uvector active_major_gpu_ids(total_external_source_count, handle.get_stream()); - // Get the sources other gpus on the same row are working on - // FIXME : replace with device_bcast for better scaling - device_allgatherv(col_comm, - vertex_input_first, - active_majors.data(), - external_source_counts, - displacements, - handle.get_stream()); - device_allgatherv(col_comm, - gpu_id_first, - active_major_gpu_ids.data(), - external_source_counts, - displacements, - handle.get_stream()); - thrust::sort_by_key(handle.get_thrust_policy(), - active_majors.begin(), - active_majors.end(), - active_major_gpu_ids.begin()); - return std::make_tuple(std::move(active_majors), std::move(active_major_gpu_ids)); -} - -template -rmm::device_uvector get_active_major_global_degrees( - raft::handle_t const& handle, - GraphViewType const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& global_out_degrees) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using partition_t = edge_partition_device_view_t; - rmm::device_uvector active_major_degrees(active_majors.size(), handle.get_stream()); - - std::vector id_begin; - std::vector id_end; - std::vector count_offsets; - id_begin.reserve(graph_view.number_of_local_edge_partitions()); - id_end.reserve(graph_view.number_of_local_edge_partitions()); - count_offsets.reserve(graph_view.number_of_local_edge_partitions()); - vertex_t counter{0}; - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - auto edge_partition = partition_t(graph_view.local_edge_partition_view(i)); - // Starting vertex ids of each partition - id_begin.push_back(edge_partition.major_range_first()); - id_end.push_back(edge_partition.major_range_last()); - count_offsets.push_back(counter); - counter += edge_partition.major_range_size(); - } - rmm::device_uvector vertex_id_begin(id_begin.size(), handle.get_stream()); - rmm::device_uvector vertex_id_end(id_end.size(), handle.get_stream()); - rmm::device_uvector vertex_count_offsets(count_offsets.size(), handle.get_stream()); - raft::update_device( - vertex_id_begin.data(), id_begin.data(), id_begin.size(), handle.get_stream()); - raft::update_device(vertex_id_end.data(), id_end.data(), id_end.size(), handle.get_stream()); - raft::update_device( - vertex_count_offsets.data(), count_offsets.data(), count_offsets.size(), handle.get_stream()); - - thrust::transform(handle.get_thrust_policy(), - active_majors.begin(), - active_majors.end(), - active_major_degrees.begin(), - [id_begin = vertex_id_begin.data(), - id_end = vertex_id_end.data(), - global_out_degrees = global_out_degrees.data(), - vertex_count_offsets = vertex_count_offsets.data(), - count = vertex_id_end.size()] __device__(auto v) { - // Find which partition id did the vertex belong to - auto partition_id = thrust::distance( - id_end, thrust::upper_bound(thrust::seq, id_end, id_end + count, v)); - // starting position of the segment within global_degree_offset - // where the information for partition (partition_id) starts - // vertex_count_offsets[partition_id] - // The relative location of offset information for vertex id v within - // the segment - // v - id_end[partition_id] - auto location_in_segment = v - id_begin[partition_id]; - // read location of global_degree_offset needs to take into account the - // partition offsets because it is a concatenation of all the offsets - // across all partitions - auto location = location_in_segment + vertex_count_offsets[partition_id]; - return global_out_degrees[location]; - }); - return active_major_degrees; -} - -template -std::tuple>, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -partition_information(raft::handle_t const& handle, GraphViewType const& graph_view) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using partition_t = edge_partition_device_view_t; - - std::vector partitions; - std::vector id_begin; - std::vector id_end; - std::vector hypersparse_begin; - std::vector vertex_count_offsets; - - partitions.reserve(graph_view.number_of_local_edge_partitions()); - id_begin.reserve(graph_view.number_of_local_edge_partitions()); - id_end.reserve(graph_view.number_of_local_edge_partitions()); - hypersparse_begin.reserve(graph_view.number_of_local_edge_partitions()); - vertex_count_offsets.reserve(graph_view.number_of_local_edge_partitions()); - - vertex_t counter{0}; - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - partitions.emplace_back(graph_view.local_edge_partition_view(i)); - auto& edge_partition = partitions.back(); - - // Starting vertex ids of each partition - id_begin.push_back(edge_partition.major_range_first()); - id_end.push_back(edge_partition.major_range_last()); - - auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); - auto use_dcs = segment_offsets - ? ((*segment_offsets).size() > (num_sparse_segments_per_vertex_partition + 1)) - : false; - if (use_dcs) { - auto major_hypersparse_first = edge_partition.major_range_first() + - (*segment_offsets)[num_sparse_segments_per_vertex_partition]; - hypersparse_begin.push_back(major_hypersparse_first); - } else { - hypersparse_begin.push_back(edge_partition.major_range_last()); - } - - // Count of relative position of the vertices - vertex_count_offsets.push_back(counter); - - counter += edge_partition.major_range_size(); - } - - // Allocate device memory for transfer - rmm::device_uvector edge_partitions(graph_view.number_of_local_edge_partitions(), - handle.get_stream()); - - rmm::device_uvector major_begin(id_begin.size(), handle.get_stream()); - rmm::device_uvector minor_end(id_end.size(), handle.get_stream()); - rmm::device_uvector hs_begin(hypersparse_begin.size(), handle.get_stream()); - rmm::device_uvector vc_offsets(vertex_count_offsets.size(), handle.get_stream()); - - // Transfer data - raft::update_device( - edge_partitions.data(), partitions.data(), partitions.size(), handle.get_stream()); - raft::update_device(major_begin.data(), id_begin.data(), id_begin.size(), handle.get_stream()); - raft::update_device(minor_end.data(), id_end.data(), id_end.size(), handle.get_stream()); - raft::update_device(vc_offsets.data(), - vertex_count_offsets.data(), - vertex_count_offsets.size(), - handle.get_stream()); - raft::update_device( - hs_begin.data(), hypersparse_begin.data(), hypersparse_begin.size(), handle.get_stream()); - - return std::make_tuple(std::move(edge_partitions), - std::move(major_begin), - std::move(minor_end), - std::move(hs_begin), - std::move(vc_offsets)); -} - -template -std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_local_edges( - raft::handle_t const& handle, - GraphViewType const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_gpu_ids, - rmm::device_uvector&& minor_map, - typename GraphViewType::edge_type indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets) -{ - static_assert(GraphViewType::is_multi_gpu == true); - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - auto edge_count = active_majors.size() * indices_per_major; - rmm::device_uvector majors(edge_count, handle.get_stream()); - rmm::device_uvector minors(edge_count, handle.get_stream()); - rmm::device_uvector minor_gpu_ids(edge_count, handle.get_stream()); - vertex_t invalid_vertex_id = graph_view.number_of_vertices(); - - auto [partitions, id_begin, id_end, hypersparse_begin, vertex_count_offsets] = - partition_information(handle, graph_view); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(edge_count), - [edge_index_first = minor_map.begin(), - active_majors = active_majors.data(), - active_major_gpu_ids = active_major_gpu_ids.data(), - id_begin = id_begin.data(), - id_end = id_end.data(), - id_seg_count = id_begin.size(), - vertex_count_offsets = vertex_count_offsets.data(), - glbl_degree_offsets = global_degree_offsets.data(), - glbl_adj_list_offsets = global_adjacency_list_offsets.data(), - majors = majors.data(), - minors = minors.data(), - dst_gpu_ids = minor_gpu_ids.data(), - partitions = partitions.data(), - hypersparse_begin = hypersparse_begin.data(), - invalid_vertex_id, - indices_per_major] __device__(auto index) { - // major which this edge index refers to - auto loc = index / indices_per_major; - auto major = active_majors[loc]; - majors[index] = major; - dst_gpu_ids[index] = active_major_gpu_ids[loc]; - - // Find which partition id did the major belong to - auto partition_id = thrust::distance( - id_end, thrust::upper_bound(thrust::seq, id_end, id_end + id_seg_count, major)); - // starting position of the segment within global_degree_offset - // where the information for partition (partition_id) starts - // vertex_count_offsets[partition_id] - // The relative location of offset information for vertex id v within - // the segment - // v - seg[partition_id] - vertex_t location_in_segment; - if (major < hypersparse_begin[partition_id]) { - location_in_segment = major - id_begin[partition_id]; - } else { - auto row_hypersparse_idx = - partitions[partition_id].major_hypersparse_idx_from_major_nocheck(major); - if (row_hypersparse_idx) { - location_in_segment = *(row_hypersparse_idx)-id_begin[partition_id]; - } else { - minors[index] = invalid_vertex_id; - return; - } - } - - // csr offset value for vertex v that belongs to partition (partition_id) - auto offset_ptr = partitions[partition_id].offsets(); - auto sparse_offset = offset_ptr[location_in_segment]; - auto local_out_degree = offset_ptr[location_in_segment + 1] - sparse_offset; - vertex_t const* adjacency_list = partitions[partition_id].indices() + sparse_offset; - // read location of global_degree_offset needs to take into account the - // partition offsets because it is a concatenation of all the offsets - // across all partitions - auto location = location_in_segment + vertex_count_offsets[partition_id]; - auto g_degree_offset = glbl_degree_offsets[location]; - auto g_dst_index = edge_index_first[index]; - if ((g_dst_index >= g_degree_offset) && (g_dst_index < g_degree_offset + local_out_degree)) { - minors[index] = adjacency_list[g_dst_index - g_degree_offset]; - edge_index_first[index] = g_dst_index - g_degree_offset + glbl_adj_list_offsets[location]; - } else { - minors[index] = invalid_vertex_id; - } - }); - auto input_iter = thrust::make_zip_iterator( - thrust::make_tuple(majors.begin(), minors.begin(), minor_gpu_ids.begin(), minor_map.begin())); - - auto compacted_length = thrust::distance( - input_iter, - thrust::remove_if( - handle.get_thrust_policy(), - input_iter, - input_iter + minors.size(), - minors.begin(), - [invalid_vertex_id] __device__(auto dst) { return (dst == invalid_vertex_id); })); - majors.resize(compacted_length, handle.get_stream()); - minors.resize(compacted_length, handle.get_stream()); - minor_gpu_ids.resize(compacted_length, handle.get_stream()); - minor_map.resize(compacted_length, handle.get_stream()); - return std::make_tuple( - std::move(majors), std::move(minors), std::move(minor_gpu_ids), std::move(minor_map)); -} - -template -typename GraphViewType::edge_type edgelist_count(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexIterator vertex_input_first, - VertexIterator vertex_input_last) -{ - using edge_t = typename GraphViewType::edge_type; - // Expect that vertex input list is sorted - auto [partitions, id_begin, id_end, hypersparse_begin, vertex_count_offsets] = - partition_information(handle, graph_view); - return thrust::transform_reduce( - handle.get_thrust_policy(), - vertex_input_first, - vertex_input_last, - [partitions = partitions.data(), - id_begin = id_begin.data(), - id_end = id_end.data(), - id_seg_count = id_begin.size(), - hypersparse_begin = hypersparse_begin.data(), - vertex_count_offsets = vertex_count_offsets.data()] __device__(auto major) { - // Find which partition id did the vertex belong to - auto partition_id = thrust::distance( - id_end, thrust::upper_bound(thrust::seq, id_end, id_end + id_seg_count, major)); - auto edge_partition = partitions[partition_id]; - auto major_hypersparse_first = hypersparse_begin[partition_id]; - if (major < major_hypersparse_first) { - auto major_offset = edge_partition.major_offset_from_major_nocheck(major); - return edge_partition.local_degree(major_offset); - } else { - auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); - return major_hypersparse_idx - ? edge_partition.local_degree( - edge_partition.major_offset_from_major_nocheck(major_hypersparse_first) + - *major_hypersparse_idx) - : edge_t{0}; - } - }, - edge_t{0}, - thrust::plus()); -} - -template -std::vector get_active_major_segments(raft::handle_t const& handle, - vertex_t major_range_first, - vertex_t major_range_last, - std::vector const& partition_segments, - const rmm::device_uvector& active_majors) -{ - std::vector segments(partition_segments.size()); - std::transform(partition_segments.begin(), - partition_segments.end(), - segments.begin(), - [major_range_first](auto s) { return s + major_range_first; }); - segments.push_back(major_range_last); - - rmm::device_uvector p_segments(segments.size(), handle.get_stream()); - raft::update_device(p_segments.data(), segments.data(), segments.size(), handle.get_stream()); - rmm::device_uvector majors_segments(segments.size(), handle.get_stream()); - thrust::lower_bound(handle.get_thrust_policy(), - active_majors.cbegin(), - active_majors.cend(), - p_segments.begin(), - p_segments.end(), - majors_segments.begin()); - std::vector active_majors_segments(majors_segments.size()); - raft::update_host(active_majors_segments.data(), - majors_segments.data(), - majors_segments.size(), - handle.get_stream()); - return active_majors_segments; -} - -template -void local_major_degree( - raft::handle_t const& handle, - edge_partition_device_view_t partition, - rmm::device_uvector const& active_majors, - std::vector const& majors_segments, - std::vector const& partition_segments, - edge_t* out_degrees) -{ - auto active_major_count = majors_segments.back() - majors_segments.front(); - // Sparse region - if (majors_segments[3] - majors_segments[0] > 0) { - thrust::transform(handle.get_thrust_policy(), - active_majors.cbegin() + majors_segments[0], - active_majors.cbegin() + majors_segments[3], - out_degrees, - [partition] __device__(auto major) { - auto major_offset = partition.major_offset_from_major_nocheck(major); - return partition.local_degree(major_offset); - }); - } - // Hypersparse region - if (majors_segments[4] - majors_segments[3] > 0) { - auto major_hypersparse_first = - partition.major_range_first() + - partition_segments[detail::num_sparse_segments_per_vertex_partition]; - auto major_offset = - static_cast(major_hypersparse_first - partition.major_range_first()); - thrust::transform(handle.get_thrust_policy(), - active_majors.cbegin() + majors_segments[3], - active_majors.cbegin() + majors_segments[4], - out_degrees + majors_segments[3] - majors_segments[0], - [partition, major_offset] __device__(auto major) { - auto major_idx = partition.major_hypersparse_idx_from_major_nocheck(major); - if (major_idx) { - return partition.local_degree(major_offset + *major_idx); - } else { - return edge_t{0}; - } - }); - } -} - -template -std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector> -gather_one_hop_edgelist( - raft::handle_t const& handle, - GraphViewType const& graph_view, - const rmm::device_uvector& active_majors, - const rmm::device_uvector& active_major_property, - const rmm::device_uvector& global_adjacency_list_offsets) -{ - // Assumes active_majors is sorted - - static_assert(GraphViewType::is_multi_gpu == true); - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - - std::vector> active_majors_segments; - vertex_t max_active_major_count{0}; - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - auto partition = - edge_partition_device_view_t( - graph_view.local_edge_partition_view(i)); - // Identify segments of active_majors - active_majors_segments.emplace_back( - get_active_major_segments(handle, - partition.major_range_first(), - partition.major_range_last(), - *(graph_view.local_edge_partition_segment_offsets(i)), - active_majors)); - auto& majors_segments = active_majors_segments.back(); - // Count of active majors belonging to this partition - max_active_major_count = - std::max(max_active_major_count, majors_segments.back() - majors_segments.front()); - } - - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - rmm::device_uvector active_majors_out_offsets(1 + max_active_major_count, - handle.get_stream()); - auto edge_count = edgelist_count(handle, graph_view, active_majors.begin(), active_majors.end()); - rmm::device_uvector majors(edge_count, handle.get_stream()); - rmm::device_uvector minors(edge_count, handle.get_stream()); - rmm::device_uvector minor_prop_ids(edge_count, handle.get_stream()); - rmm::device_uvector minor_map(edge_count, handle.get_stream()); - - edge_t output_offset = 0; - vertex_t vertex_offset{0}; - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - auto partition = - edge_partition_device_view_t( - graph_view.local_edge_partition_view(i)); - auto& majors_segments = active_majors_segments[i]; - // Calculate local degree offsets - auto active_major_count = majors_segments.back() - majors_segments.front(); - active_majors_out_offsets.set_element_to_zero_async(0, handle.get_stream()); - local_major_degree(handle, - partition, - active_majors, - majors_segments, - *(graph_view.local_edge_partition_segment_offsets(i)), - 1 + active_majors_out_offsets.data()); - thrust::inclusive_scan(handle.get_thrust_policy(), - active_majors_out_offsets.begin() + 1, - active_majors_out_offsets.begin() + 1 + active_major_count, - active_majors_out_offsets.begin() + 1); - active_majors_out_offsets.resize(1 + active_major_count, handle.get_stream()); - partially_decompress_edge_partition_to_fill_edgelist( - handle, - partition, - active_majors.cbegin(), - active_majors_out_offsets.cbegin(), - majors_segments, - output_offset + majors.data(), - output_offset + minors.data(), - thrust::nullopt, - thrust::make_optional( - thrust::make_tuple(active_major_property.cbegin(), output_offset + minor_prop_ids.data())), - thrust::make_optional( - thrust::make_tuple(global_adjacency_list_offsets.cbegin() + vertex_offset, - output_offset + minor_map.begin()))); - output_offset += active_majors_out_offsets.back_element(handle.get_stream()); - vertex_offset += partition.major_range_size(); - } - - return std::make_tuple( - std::move(majors), std::move(minors), std::move(minor_prop_ids), std::move(minor_map)); -} - -} // namespace original -} // namespace detail -} // namespace cugraph diff --git a/cpp/src/sampling/detail/graph_functions.hpp b/cpp/src/sampling/detail/graph_functions.hpp index f0b1580b88e..8eef9c83d61 100644 --- a/cpp/src/sampling/detail/graph_functions.hpp +++ b/cpp/src/sampling/detail/graph_functions.hpp @@ -150,8 +150,7 @@ gather_local_edges( const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, typename GraphViewType::edge_type indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); /** * @brief Gather edge list for specified vertices diff --git a/cpp/src/sampling/detail/sampling_utils_impl.cuh b/cpp/src/sampling/detail/sampling_utils_impl.cuh index 478f75095c1..793df64a8d6 100644 --- a/cpp/src/sampling/detail/sampling_utils_impl.cuh +++ b/cpp/src/sampling/detail/sampling_utils_impl.cuh @@ -135,73 +135,6 @@ rmm::device_uvector compute_local_major_degre return local_degrees; } -template -rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - GraphViewType const& graph_view, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_out_degrees) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - - rmm::device_uvector global_adjacency_list_offsets(global_degree_offsets.size(), - handle.get_stream()); - - if constexpr (GraphViewType::is_multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_size = col_comm.get_size(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_rank = row_comm.get_rank(); - auto const row_size = row_comm.get_size(); - - edge_t edge_count_in_all_previous_partitions{0}; - vertex_t vertex_offset{0}; - for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { - auto edge_partition = - edge_partition_device_view_t( - graph_view.local_edge_partition_view(i)); - auto edge_counts = - cugraph::host_scalar_allgather(comm, edge_partition.number_of_edges(), handle.get_stream()); - edge_t partial_edge_count{0}; - for (int r = 0; r < row_rank; ++r) { - for (int c = 0; c < col_size; ++c) { - partial_edge_count += edge_counts[r + c * row_size]; - } - } - thrust::exclusive_scan( - handle.get_thrust_policy(), - global_out_degrees.cbegin() + vertex_offset, - global_out_degrees.cbegin() + vertex_offset + edge_partition.major_range_size(), - global_adjacency_list_offsets.begin() + vertex_offset); - - thrust::transform( - handle.get_thrust_policy(), - global_adjacency_list_offsets.cbegin() + vertex_offset, - global_adjacency_list_offsets.cbegin() + vertex_offset + edge_partition.major_range_size(), - global_degree_offsets.cbegin() + vertex_offset, - global_adjacency_list_offsets.begin() + vertex_offset, - [offset = edge_count_in_all_previous_partitions + partial_edge_count] __device__( - auto val0, auto val1) { return val0 + val1 + offset; }); - - edge_count_in_all_previous_partitions += - std::accumulate(edge_counts.begin(), edge_counts.end(), edge_t{0}); - vertex_offset += edge_partition.major_range_size(); - } - } else { - thrust::fill(handle.get_thrust_policy(), - global_adjacency_list_offsets.begin(), - global_adjacency_list_offsets.end(), - edge_t{0}); - } - - return global_adjacency_list_offsets; -} - template std::tuple, rmm::device_uvector> @@ -268,8 +201,10 @@ rmm::device_uvector allgather_active_majors(raft::handle_t const& hand { auto const& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); size_t source_count = d_in.size(); + auto external_source_counts = cugraph::host_scalar_allgather(col_comm, source_count, handle.get_stream()); + auto total_external_source_count = std::accumulate(external_source_counts.begin(), external_source_counts.end(), size_t{0}); std::vector displacements(external_source_counts.size(), size_t{0}); @@ -453,8 +388,7 @@ gather_local_edges( const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, typename GraphViewType::edge_type indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets) + const rmm::device_uvector& global_degree_offsets) { using vertex_t = typename GraphViewType::vertex_type; using edge_t = typename GraphViewType::edge_type; @@ -479,19 +413,18 @@ gather_local_edges( handle.get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(edge_count), - [edge_index_first = minor_map.begin(), - active_majors = active_majors.data(), - id_begin = id_begin.data(), - id_end = id_end.data(), - id_seg_count = id_begin.size(), - vertex_count_offsets = vertex_count_offsets.data(), - glbl_degree_offsets = global_degree_offsets.data(), - glbl_adj_list_offsets = global_adjacency_list_offsets.data(), - majors = majors.data(), - minors = minors.data(), - weights = weights ? weights->data() : nullptr, - partitions = partitions.data(), - hypersparse_begin = hypersparse_begin.data(), + [edge_index_first = minor_map.begin(), + active_majors = active_majors.data(), + id_begin = id_begin.data(), + id_end = id_end.data(), + id_seg_count = id_begin.size(), + vertex_count_offsets = vertex_count_offsets.data(), + glbl_degree_offsets = global_degree_offsets.data(), + majors = majors.data(), + minors = minors.data(), + weights = weights ? weights->data() : nullptr, + partitions = partitions.data(), + hypersparse_begin = hypersparse_begin.data(), invalid_vertex_id, indices_per_major] __device__(auto index) { // major which this edge index refers to @@ -510,7 +443,6 @@ gather_local_edges( if (major < hypersparse_begin[partition_id]) { location_in_segment = major - id_begin[partition_id]; local_out_degree = offset_ptr[location_in_segment + 1] - offset_ptr[location_in_segment]; - ; } else { auto row_hypersparse_idx = partitions[partition_id].major_hypersparse_idx_from_major_nocheck(major); @@ -520,7 +452,6 @@ gather_local_edges( (hypersparse_begin[partition_id] - id_begin[partition_id]) + *row_hypersparse_idx; local_out_degree = offset_ptr[location_in_segment + 1] - offset_ptr[location_in_segment]; - ; } } @@ -855,7 +786,10 @@ gather_one_hop_edgelist( output_offset + minors.data(), weights ? thrust::make_optional(output_offset + weights->data()) : thrust::nullopt, thrust::nullopt, - thrust::nullopt); + thrust::nullopt, + // FIXME: When PR 2365 is merged, this parameter can be removed + graph_view.local_edge_partition_segment_offsets(i)); + output_offset += active_majors_out_offsets.back_element(handle.get_stream()); vertex_offset += partition.major_range_size(); } @@ -911,7 +845,9 @@ gather_one_hop_edgelist( minors.data(), weights ? thrust::make_optional(weights->data()) : thrust::nullopt, thrust::nullopt, - thrust::nullopt); + thrust::nullopt, + // FIXME: When PR 2365 is merged, this parameter can be removed + std::nullopt); } return std::make_tuple(std::move(majors), std::move(minors), std::move(weights)); diff --git a/cpp/src/sampling/detail/sampling_utils_mg.cu b/cpp/src/sampling/detail/sampling_utils_mg.cu index ffcead02cf9..726309e5370 100644 --- a/cpp/src/sampling/detail/sampling_utils_mg.cu +++ b/cpp/src/sampling/detail/sampling_utils_mg.cu @@ -46,42 +46,6 @@ get_global_degree_information( raft::handle_t const& handle, graph_view_t const& graph_view); -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - template rmm::device_uvector allgather_active_majors(raft::handle_t const& handle, rmm::device_uvector&& d_in); @@ -186,8 +150,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int32_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, @@ -197,8 +160,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, @@ -208,8 +170,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, @@ -219,8 +180,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int32_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, @@ -230,8 +190,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, @@ -241,8 +200,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, diff --git a/cpp/src/sampling/detail/sampling_utils_sg.cu b/cpp/src/sampling/detail/sampling_utils_sg.cu index 52f2f9245b9..ae2980e5f10 100644 --- a/cpp/src/sampling/detail/sampling_utils_sg.cu +++ b/cpp/src/sampling/detail/sampling_utils_sg.cu @@ -49,42 +49,6 @@ get_global_degree_information( raft::handle_t const& handle, graph_view_t const& graph_view); -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - -template rmm::device_uvector get_global_adjacency_offset( - raft::handle_t const& handle, - graph_view_t const& graph_view, - rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_out_degrees); - template rmm::device_uvector get_active_major_global_degrees( raft::handle_t const& handle, graph_view_t const& graph_view, @@ -129,8 +93,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int32_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, @@ -140,8 +103,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, @@ -151,8 +113,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, @@ -162,8 +123,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int32_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, @@ -173,8 +133,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, @@ -184,8 +143,7 @@ gather_local_edges(raft::handle_t const& handle, const rmm::device_uvector& active_majors, rmm::device_uvector&& minor_map, int64_t indices_per_major, - const rmm::device_uvector& global_degree_offsets, - const rmm::device_uvector& global_adjacency_list_offsets); + const rmm::device_uvector& global_degree_offsets); template std::tuple, rmm::device_uvector, diff --git a/cpp/src/sampling/nbr_sampling_impl.cuh b/cpp/src/sampling/nbr_sampling_impl.cuh deleted file mode 100644 index 7b0402057ad..00000000000 --- a/cpp/src/sampling/nbr_sampling_impl.cuh +++ /dev/null @@ -1,556 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Andrei Schaffer, aschaffer@nvidia.com -// -#pragma once - -#include -#include -#include - -#include - -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "rw_traversals.hpp" - -#include - -#include -#include -#include -#include -#include - -namespace cugraph { -namespace detail { -namespace original { - -/** - * @brief Projects zip input onto the lower dim zip output, where lower dimension components are - * specified by tuple indices; e.g., extracts the (destination_vertex_id, rank_to_send_it_to) - * components from the quadruplet (vertex_t source_vertex, vertex_t destination_vertex, int rank, - * edge_t index) via indices {1,2}; - * @tparam vertex_index non-type template parameter specifying index in the input tuple where vertex - * IDs are stored; - * @tparam rank_index non-type template parameter specifying index in the input tuple where rank IDs - * are stored; - * @tparam zip_in_it_t zip Type for the input tuple; - * @tparam zip_out_it_t zip Type for the output tuple; - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param[in] begin zip begin iterator of quadruplets from which new input is extracted; typically - * (vertex_t source_vertex, vertex_t destination_vertex, int rank, edge_t index) - * @param[in] end zip end iterator of quadruplets from which new input is extracted; - * @param[out] result begin of result zip iterator of pairs for next iteration; typically - * (vertex_t source_vertex, int rank) - */ -template -void project(raft::handle_t const& handle, zip_in_it_t begin, zip_in_it_t end, zip_out_it_t result) -{ - thrust::transform(handle.get_thrust_policy(), begin, end, result, [] __device__(auto const& tpl) { - return thrust::make_tuple(thrust::get(tpl), thrust::get(tpl)); - }); -} - -/** - * @brief Shuffles zipped pairs of vertex IDs and ranks IDs to the GPU's that the vertex IDs belong - * to. The assumption is that the return provides a per-GPU coalesced set of pairs, with - * corresponding counts vector. To limit the result to the self-GPU one needs additional filtering - * to extract the corresponding set from the coalesced set of sets and using the corresponding - * counts entry. - * @tparam graph_view_t Type of graph view. - * @tparam zip_iterator_t zip Type for the zipped tuple (vertexID, rank); - * @tparam gpu_t Type used for storing GPU rank IDs; - * @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, - * and handles to various CUDA libraries) to run graph algorithms. - * @param[in] graph_view Graph View object to generate NBR Sampling on. - * @param[in] begin zip begin iterator of (vertexID, rank) pairs. - * @param[in] end zip end iterator of (vertexID, rank) pairs. - * @param[in] unnamed tag used for template tag dispatching - * @return tuple pair of coalesced pairs and counts - */ -template -std::tuple, device_vec_t>, - std::vector> -shuffle_to_gpus(raft::handle_t const& handle, - graph_view_t const& graph_view, - zip_iterator_t begin, - zip_iterator_t end, - gpu_t) -{ - using vertex_t = typename graph_view_t::vertex_type; - using edge_t = typename graph_view_t::edge_type; - - auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); - device_vec_t d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - return groupby_gpu_id_and_shuffle_values( - handle.get_comms(), - begin, - end, - [vertex_partition_range_lasts = d_vertex_partition_range_lasts.data(), - num_vertex_partitions = d_vertex_partition_range_lasts.size()] __device__(auto tpl_v_r) { - return static_cast( - thrust::distance(vertex_partition_range_lasts, - thrust::lower_bound(thrust::seq, - vertex_partition_range_lasts, - vertex_partition_range_lasts + num_vertex_partitions, - thrust::get<0>(tpl_v_r)))); - }, - handle.get_stream()); -} - -/** - * @brief Updates pair of vertex IDs and ranks IDs to the GPU's that the vertex IDs belong - * to. - * @tparam graph_view_t Type of graph view. - * @tparam zip_iterator_t zip Type for the zipped tuple (vertexID, rank). - * @tparam gpu_t Type used for storing GPU rank IDs; - * @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, - * and handles to various CUDA libraries) to run graph algorithms. - * @param[in] graph_view Graph View object to generate NBR Sampling on. - * @param[in] begin zip begin iterator of (vertexID, rank) pairs. - * @param[in] end zip end iterator of (vertexID, rank) pairs. - * @param[in] rank for which data is to be extracted. - * @param[out] d_in vertex set to be updated. - * @param[out] d_ranks corresponding rank set to be updated. - * @param[in] unnamed tag used for template tag dispatching. - */ -template -void update_input_by_rank(raft::handle_t const& handle, - graph_view_t const& graph_view, - zip_iterator_t begin, - zip_iterator_t end, - size_t rank, - device_vec_t& d_in, - device_vec_t& d_ranks, - gpu_t) -{ - auto&& [rx_tpl_v_r, rx_counts] = - detail::original::shuffle_to_gpus(handle, graph_view, begin, end, gpu_t{}); - - // filter rx_tpl_v_r and rx_counts vector by rank: - // - decltype(rx_counts) rx_offsets(rx_counts.size()); - std::exclusive_scan(rx_counts.begin(), rx_counts.end(), rx_offsets.begin(), 0); - - // resize d_in, d_ranks: - // - auto new_in_sz = rx_counts.at(rank); - d_in.resize(new_in_sz, handle.get_stream()); - d_ranks.resize(new_in_sz, handle.get_stream()); - - // project output onto input: - // zip d_in, d_ranks - // - auto new_in_zip = thrust::make_zip_iterator( - thrust::make_tuple(d_in.begin(), d_ranks.begin())); // result start_zip - - auto&& d_new_dests = std::get<0>(rx_tpl_v_r); - auto&& d_new_ranks = std::get<1>(rx_tpl_v_r); - auto offset = rx_offsets.at(rank); - - auto tpl_in_it_begin = thrust::make_zip_iterator( - thrust::make_tuple(d_new_dests.begin() + offset, d_new_ranks.begin() + offset)); - project<0, 1>(handle, tpl_in_it_begin, tpl_in_it_begin + new_in_sz, new_in_zip); -} - -/** - * @brief Shuffles zipped tuples of (vertex_t source_vertex, vertex_t destination_vertex, int rank, - * index_t index) to specified target GPU's. - * @tparam vertex_t Type of vertex IDs. - * @tparam gpu_t Type used for storing GPU rank IDs. - * @tparam index_t Type used for indexing; typically edge_t. - * @param[in] handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, - * and handles to various CUDA libraries) to run graph algorithms. - * @param[in] d_src source vertex IDs; shuffle prims require it be mutable. - * @param[in] d_dst destination vertex IDs; must be mutable. - * @param[in] d_gpu_id_keys target GPU IDs (ranks); must be mutable. - * @param[in] d_indices indices of destination vertices; must be mutable. - * @return tuple of tuple of device vectors and counts: - * ((vertex_t source_vertex, vertex_t destination_vertex, int rank, edge_t index), rx_counts) - */ -template -std::tuple, - device_vec_t, - device_vec_t, - device_vec_t>, - std::vector> -shuffle_to_target_gpu_ids(raft::handle_t const& handle, - device_vec_t& d_src, - device_vec_t& d_dst, - device_vec_t& d_gpu_id_keys, - device_vec_t& d_indices) -{ - auto zip_it_begin = - thrust::make_zip_iterator(thrust::make_tuple(d_src.begin(), d_dst.begin(), d_indices.begin())); - - thrust::sort_by_key( - handle.get_thrust_policy(), d_gpu_id_keys.begin(), d_gpu_id_keys.end(), zip_it_begin); - - rmm::device_uvector tx_counts(handle.get_comms().get_size(), handle.get_stream()); - - thrust::tabulate( - handle.get_thrust_policy(), - tx_counts.begin(), - tx_counts.end(), - [gpu_id_first = d_gpu_id_keys.begin(), gpu_id_last = d_gpu_id_keys.end()] __device__(size_t i) { - return static_cast(thrust::distance( - gpu_id_first, - thrust::upper_bound(thrust::seq, gpu_id_first, gpu_id_last, static_cast(i)))); - }); - - thrust::adjacent_difference( - handle.get_thrust_policy(), tx_counts.begin(), tx_counts.end(), tx_counts.begin()); - - std::vector h_tx_counts(tx_counts.size()); - raft::update_host(h_tx_counts.data(), tx_counts.data(), tx_counts.size(), handle.get_stream()); - - handle.sync_stream(); - - return // [rx_tuple, rx_counts] - shuffle_values(handle.get_comms(), - thrust::make_zip_iterator(thrust::make_tuple( - d_src.begin(), d_dst.begin(), d_gpu_id_keys.begin(), d_indices.begin())), - h_tx_counts, - handle.get_stream()); -} - -/** - * @brief Multi-GPU Uniform Neighborhood Sampling. The outline of the algorithm: - * - * uniform_nbr_sample(J[p][], L, K[], flag_unique) { - * Out[p][] = {}; // initialize output result - * (empty) - * - * loop level in {0,…, L-1} { // 1 tree level / iteration - * n_per_level = |J| * L^ (level+1); // size of output per level - * - * J[] = union(J[], {J[partition_row], - * for partition_row same as `p`}; - * - * for each pair (s, _) in J[] { // cache out-degrees of src_v - * set; d_out_deg[s] = mnmg_get_out_deg(graph, s); - * } - * - * d_indices[] = segmented_random_generator(d_out_degs[], // sizes[] to define range to - * // sample from; - * K[level], // fanout per-level - * flag_unique); - * // for each (s, _) in J[]{ - * // generate {0,…,out-deg(s)};} - * - * d_out[] = gather_nbr(J[], d_indices[], level, K[level]); // {(s, d, r),…} MNMG prim that - * // gathers the NBR for current - * // level of each src_v; - * // output is set of triplets - * // (src_v, dst_v, - * rank_to_send_to) Out[p][] = union(Out[p][], d_out[]); // append local - * output to result d_out[] = shuffle(d_out[]); // reshuffle output - * to - * // corresponding rank - * J[] = project(d_out[], []((s,d,r)){ return (d,r);}); // extract the (d, r) from (s,d, - * r) - * // for next iter - * } - * return Out[p][]; - * } - * - * @tparam graph_view_t Type of graph view. - * @tparam gpu_t Type used for storing GPU rank IDs; - * @tparam index_t Type used for indexing; typically edge_t. - * @tparam seeder_t Type for generating random engine seeds. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Graph View object to generate NBR Sampling on. - * @param d_in Device vector of starting vertex IDs for the NBR Sampling. Must be non-const for - * shuffling. - * @param d_ranks Device vector of ranks for which corresponding vertex ID data must be sent to. The - * pairs (vertex_ID, rank) must be shuffled together. Must be non-const for shuffling. - * @param h_fan_out vector of branching out (fan-out) degree per source vertex for each level - * @param global_degree_offsets local partition of global out-degree cache; pass-through - * parameter used for obtaining local out-degree information - * @param flag_replacement boolean flag specifying if random sampling is done without replacement - * (true); or, with replacement (false); default = true; - * @return tuple of device vectors: - * (vertex_t source_vertex, vertex_t destination_vertex, int rank, edge_t index) - */ -template > -std::tuple, - device_vec_t, - device_vec_t, - device_vec_t> -uniform_nbr_sample_impl( - raft::handle_t const& handle, - graph_view_t const& graph_view, - device_vec_t& d_in, - device_vec_t& d_ranks, - std::vector const& h_fan_out, - device_vec_t const& global_out_degrees, - device_vec_t const& global_degree_offsets, - device_vec_t const& global_adjacency_list_offsets, - bool flag_replacement) -{ - using vertex_t = typename graph_view_t::vertex_type; - using edge_t = typename graph_view_t::edge_type; - using return_t = std::tuple, - device_vec_t, - device_vec_t, - device_vec_t>; - namespace cugraph_ops = cugraph::ops::gnn::graph; - - if constexpr (graph_view_t::is_multi_gpu) { - size_t num_starting_vs = d_in.size(); - - CUGRAPH_EXPECTS(num_starting_vs == d_ranks.size(), - "Sets of input vertices and ranks must have same sizes."); - - auto num_levels = h_fan_out.size(); - - CUGRAPH_EXPECTS(num_levels > 0, "Invalid input argument: number of levels must be non-zero."); - - // Output quad of accumulators to collect results into: - // (all start as empty) - // - device_vec_t d_acc_src(0, handle.get_stream()); - device_vec_t d_acc_dst(0, handle.get_stream()); - device_vec_t d_acc_ranks(0, handle.get_stream()); - device_vec_t d_acc_indices(0, handle.get_stream()); - - auto&& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto&& row_rank = row_comm.get_rank(); - - auto&& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto&& col_rank = col_comm.get_rank(); - - auto const self_rank = handle.get_comms().get_rank(); - - size_t level{0l}; - for (auto&& k_level : h_fan_out) { - // prep step for extracting out-degs(sources): - // - auto&& [d_new_in, d_new_rank] = - gather_active_majors(handle, graph_view, d_in.cbegin(), d_in.cend(), d_ranks.cbegin()); - - rmm::device_uvector d_out_src(0, handle.get_stream()); - rmm::device_uvector d_out_dst(0, handle.get_stream()); - rmm::device_uvector d_out_ranks(0, handle.get_stream()); - rmm::device_uvector d_indices(0, handle.get_stream()); - - if (k_level != 0) { - // extract out-degs(sources): - // - auto&& d_out_degs = - get_active_major_global_degrees(handle, graph_view, d_new_in, global_out_degrees); - - // segemented-random-generation of indices: - // - device_vec_t d_rnd_indices(d_new_in.size() * k_level, handle.get_stream()); - - raft::random::RngState rng_state(row_rank + level); - cugraph_ops::get_sampling_index(detail::original::raw_ptr(d_rnd_indices), - rng_state, - detail::original::raw_const_ptr(d_out_degs), - static_cast(d_out_degs.size()), - static_cast(k_level), - flag_replacement, - handle.get_stream()); - - // gather edges step: - // invalid entries (not found, etc.) filtered out in result; - // d_indices[] filtered out in-place (to avoid copies+moves); - // - auto&& [temp_d_out_src, temp_d_out_dst, temp_d_out_ranks, temp_d_indices] = - gather_local_edges(handle, - graph_view, - d_new_in, - d_new_rank, - std::move(d_rnd_indices), - static_cast(k_level), - global_degree_offsets, - global_adjacency_list_offsets); - d_out_src = std::move(temp_d_out_src); - d_out_dst = std::move(temp_d_out_dst); - d_out_ranks = std::move(temp_d_out_ranks); - d_indices = std::move(temp_d_indices); - } else { - auto&& [temp_d_out_src, temp_d_out_dst, temp_d_out_ranks, temp_d_indices] = - gather_one_hop_edgelist( - handle, graph_view, d_new_in, d_new_rank, global_adjacency_list_offsets); - d_out_src = std::move(temp_d_out_src); - d_out_dst = std::move(temp_d_out_dst); - d_out_ranks = std::move(temp_d_out_ranks); - d_indices = std::move(temp_d_indices); - } - - // resize accumulators: - // - auto old_sz = d_acc_dst.size(); - auto add_sz = d_out_dst.size(); - auto new_sz = old_sz + add_sz; - - d_acc_src.resize(new_sz, handle.get_stream()); - d_acc_dst.resize(new_sz, handle.get_stream()); - d_acc_ranks.resize(new_sz, handle.get_stream()); - d_acc_indices.resize(new_sz, handle.get_stream()); - - // zip quad; must be done after resizing, - // because they grow from one iteration to another, - // so iterators could be invalidated: - // - auto acc_zip_it = - thrust::make_zip_iterator(thrust::make_tuple(d_acc_src.begin() + old_sz, - d_acc_dst.begin() + old_sz, - d_acc_ranks.begin() + old_sz, - d_acc_indices.begin() + old_sz)); - - // union step: - // - auto out_zip_it = thrust::make_zip_iterator(thrust::make_tuple( - d_out_src.begin(), d_out_dst.begin(), d_out_ranks.begin(), d_indices.begin())); - - thrust::copy_n(handle.get_thrust_policy(), out_zip_it, add_sz, acc_zip_it); - - // shuffle step: update input for self_rank - // zipping is necessary to preserve rank info during shuffle! - // - auto next_in_zip_begin = - thrust::make_zip_iterator(thrust::make_tuple(d_out_dst.begin(), d_out_ranks.begin())); - auto next_in_zip_end = - thrust::make_zip_iterator(thrust::make_tuple(d_out_dst.end(), d_out_ranks.end())); - - update_input_by_rank(handle, - graph_view, - next_in_zip_begin, - next_in_zip_end, - static_cast(self_rank), - d_in, - d_ranks, - gpu_t{}); - - ++level; - } - - return std::make_tuple( - std::move(d_acc_src), std::move(d_acc_dst), std::move(d_acc_ranks), std::move(d_acc_indices)); - } else { - CUGRAPH_FAIL("Neighborhood sampling functionality is supported only for the multi-gpu case."); - } -} - -} // namespace original -} // namespace detail - -template -std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector>, - std::vector> -uniform_nbr_sample(raft::handle_t const& handle, - graph_view_t const& graph_view, - typename graph_view_t::vertex_type const* ptr_d_start, - gpu_t const* ptr_d_ranks, - size_t num_starting_vs, - std::vector const& h_fan_out, - bool flag_replacement) -{ - using vertex_t = typename graph_view_t::vertex_type; - using edge_t = typename graph_view_t::edge_type; - - size_t const self_rank = handle.get_comms().get_rank(); - - // shuffle input data to its corresponding rank; - // (Note: shuffle prims require mutable iterators) - // - detail::original::device_vec_t d_start_vs(num_starting_vs, handle.get_stream()); - detail::original::device_vec_t d_ranks(num_starting_vs, handle.get_stream()); - // ...hence copy required: - // - thrust::copy_n(handle.get_thrust_policy(), ptr_d_start, num_starting_vs, d_start_vs.begin()); - thrust::copy_n(handle.get_thrust_policy(), ptr_d_ranks, num_starting_vs, d_ranks.begin()); - - // shuffle data to local rank: - // - auto next_in_zip_begin = - thrust::make_zip_iterator(thrust::make_tuple(d_start_vs.begin(), d_ranks.begin())); - - auto next_in_zip_end = - thrust::make_zip_iterator(thrust::make_tuple(d_start_vs.end(), d_ranks.end())); - - detail::original::update_input_by_rank(handle, - graph_view, - next_in_zip_begin, - next_in_zip_end, - self_rank, - d_start_vs, - d_ranks, - gpu_t{}); - - // preamble step for out-degree info: - // - auto&& [global_degree_offsets, global_out_degrees] = - detail::original::get_global_degree_information(handle, graph_view); - auto&& global_adjacency_list_offsets = detail::original::get_global_adjacency_offset( - handle, graph_view, global_degree_offsets, global_out_degrees); - - // extract output quad SOA: - // - auto&& [d_src, d_dst, d_gpus, d_indices] = - detail::original::uniform_nbr_sample_impl(handle, - graph_view, - d_start_vs, - d_ranks, - h_fan_out, - global_out_degrees, - global_degree_offsets, - global_adjacency_list_offsets, - flag_replacement); - - // shuffle quad SOA by d_gpus: - // - return detail::original::shuffle_to_target_gpu_ids(handle, d_src, d_dst, d_gpus, d_indices); -} - -} // namespace cugraph diff --git a/cpp/src/sampling/nbr_sampling_mg.cu b/cpp/src/sampling/nbr_sampling_mg.cu deleted file mode 100644 index efb79d3995a..00000000000 --- a/cpp/src/sampling/nbr_sampling_mg.cu +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "nbr_sampling_impl.cuh" - -namespace cugraph { -// template explicit instantiation directives (EIDir's): -// -// SG FP32{ -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector>, - std::vector> -uniform_nbr_sample(raft::handle_t const& handle, - graph_view_t const& gview, - int32_t const* ptr_d_start, - int32_t const* ptr_d_ranks, - size_t num_starting_vs, - std::vector const& h_fan_out, - bool flag_replacement); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector>, - std::vector> -uniform_nbr_sample(raft::handle_t const& handle, - graph_view_t const& gview, - int32_t const* ptr_d_start, - int32_t const* ptr_d_ranks, - size_t num_starting_vs, - std::vector const& h_fan_out, - bool flag_replacement); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector>, - std::vector> -uniform_nbr_sample(raft::handle_t const& handle, - graph_view_t const& gview, - int64_t const* ptr_d_start, - int32_t const* ptr_d_ranks, - size_t num_starting_vs, - std::vector const& h_fan_out, - bool flag_replacement); -//} -// -// SG FP64{ -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector>, - std::vector> -uniform_nbr_sample(raft::handle_t const& handle, - graph_view_t const& gview, - int32_t const* ptr_d_start, - int32_t const* ptr_d_ranks, - size_t num_starting_vs, - std::vector const& h_fan_out, - bool flag_replacement); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector>, - std::vector> -uniform_nbr_sample(raft::handle_t const& handle, - graph_view_t const& gview, - int32_t const* ptr_d_start, - int32_t const* ptr_d_ranks, - size_t num_starting_vs, - std::vector const& h_fan_out, - bool flag_replacement); - -template std::tuple, - rmm::device_uvector, - rmm::device_uvector, - rmm::device_uvector>, - std::vector> -uniform_nbr_sample(raft::handle_t const& handle, - graph_view_t const& gview, - int64_t const* ptr_d_start, - int32_t const* ptr_d_ranks, - size_t num_starting_vs, - std::vector const& h_fan_out, - bool flag_replacement); -//} - -} // namespace cugraph diff --git a/cpp/src/sampling/uniform_neighbor_sampling_impl.hpp b/cpp/src/sampling/uniform_neighbor_sampling_impl.hpp index 2748d75d2fc..9fe22d8a8d5 100644 --- a/cpp/src/sampling/uniform_neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/uniform_neighbor_sampling_impl.hpp @@ -52,7 +52,6 @@ uniform_nbr_sample_impl( raft::host_span h_fan_out, rmm::device_uvector const& global_out_degrees, rmm::device_uvector const& global_degree_offsets, - rmm::device_uvector const& global_adjacency_list_offsets, bool with_replacement, uint64_t seed) { @@ -123,8 +122,7 @@ uniform_nbr_sample_impl( d_in, std::move(d_rnd_indices), static_cast(k_level), - global_degree_offsets, - global_adjacency_list_offsets); + global_degree_offsets); } else { std::tie(d_out_src, d_out_dst, d_out_indices) = gather_one_hop_edgelist(handle, graph_view, d_in); @@ -183,8 +181,6 @@ uniform_nbr_sample( // auto&& [global_degree_offsets, global_out_degrees] = detail::get_global_degree_information(handle, graph_view); - auto&& global_adjacency_list_offsets = detail::get_global_adjacency_offset( - handle, graph_view, global_degree_offsets, global_out_degrees); return detail::uniform_nbr_sample_impl(handle, graph_view, @@ -192,7 +188,6 @@ uniform_nbr_sample( fan_out, global_out_degrees, global_degree_offsets, - global_adjacency_list_offsets, with_replacement, seed); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 26fc60e2cd6..a9841403723 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -610,10 +610,6 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG GATHER_ONE_HOP tests --------------------------------------------------------------- ConfigureTestMG(MG_GATHER_ONE_HOP_TEST sampling/detail/mg_gather_one_hop.cu) - ########################################################################################### - # - MG NBR SAMPLING tests ----------------------------------------------------------------- - ConfigureTestMG(MG_NBR_SAMPLING_TEST sampling/detail/mg_nbr_sampling.cu) - ########################################################################################### # - MG NBR SAMPLING tests ----------------------------------------------------------------- ConfigureTestMG(MG_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_uniform_neighbor_sampling.cu) diff --git a/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c b/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c index 8fbd80a90c0..46a54b74cc3 100644 --- a/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c +++ b/cpp/tests/c_api/mg_uniform_neighbor_sample_test.c @@ -25,18 +25,18 @@ typedef int32_t vertex_t; typedef int32_t edge_t; typedef float weight_t; -int generic_experimental_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle, - vertex_t* h_src, - vertex_t* h_dst, - edge_t* h_idx, - size_t num_vertices, - size_t num_edges, - vertex_t* h_start, - size_t num_starts, - int* fan_out, - size_t max_depth, - bool_t with_replacement, - bool_t store_transposed) +int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + edge_t* h_idx, + size_t num_vertices, + size_t num_edges, + vertex_t* h_start, + size_t num_starts, + int* fan_out, + size_t max_depth, + bool_t with_replacement, + bool_t store_transposed) { int test_ret_value = 0; @@ -66,7 +66,7 @@ int generic_experimental_uniform_neighbor_sample_test(const cugraph_resource_han h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, max_depth, INT32); - ret_code = cugraph_experimental_uniform_neighbor_sample( + ret_code = cugraph_uniform_neighbor_sample( handle, graph, d_start_view, h_fan_out_view, with_replacement, FALSE, &result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); @@ -123,140 +123,6 @@ int generic_experimental_uniform_neighbor_sample_test(const cugraph_resource_han return test_ret_value; } -int generic_uniform_neighbor_sample_test(const cugraph_resource_handle_t* handle, - vertex_t* h_src, - vertex_t* h_dst, - weight_t* h_wgt, - size_t num_vertices, - size_t num_edges, - vertex_t* h_start, - int* h_start_label, - size_t num_starts, - int* fan_out, - size_t max_depth, - bool_t with_replacement, - bool_t store_transposed) -{ - int test_ret_value = 0; - - cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; - cugraph_error_t* ret_error = NULL; - - cugraph_graph_t* graph = NULL; - cugraph_sample_result_t* result = NULL; - - cugraph_type_erased_device_array_t* d_start = NULL; - cugraph_type_erased_device_array_view_t* d_start_view = NULL; - cugraph_type_erased_device_array_t* d_start_label = NULL; - cugraph_type_erased_device_array_view_t* d_start_label_view = NULL; - cugraph_type_erased_host_array_view_t* h_fan_out_view = NULL; - - ret_code = create_mg_test_graph( - handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, &graph, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); - - ret_code = - cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start create failed."); - - d_start_view = cugraph_type_erased_device_array_view(d_start); - - ret_code = cugraph_type_erased_device_array_view_copy_from_host( - handle, d_start_view, (byte_t*)h_start, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed."); - - ret_code = - cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start_label, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start_label create failed."); - - d_start_label_view = cugraph_type_erased_device_array_view(d_start_label); - - ret_code = cugraph_type_erased_device_array_view_copy_from_host( - handle, d_start_label_view, (byte_t*)h_start_label, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed."); - - h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, max_depth, INT32); - - ret_code = cugraph_uniform_neighbor_sample(handle, - graph, - d_start_view, - d_start_label_view, - h_fan_out_view, - with_replacement, - FALSE, - &result, - &ret_error); - - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "uniform_neighbor_sample failed."); - - cugraph_type_erased_device_array_view_t* srcs; - cugraph_type_erased_device_array_view_t* dsts; - cugraph_type_erased_device_array_view_t* labels; - cugraph_type_erased_device_array_view_t* index; - cugraph_type_erased_host_array_view_t* counts; - - srcs = cugraph_sample_result_get_sources(result); - dsts = cugraph_sample_result_get_destinations(result); - labels = cugraph_sample_result_get_start_labels(result); - index = cugraph_sample_result_get_index(result); - counts = cugraph_sample_result_get_counts(result); - - size_t result_size = cugraph_type_erased_device_array_view_size(srcs); - - vertex_t h_srcs[result_size]; - vertex_t h_dsts[result_size]; - int h_labels[result_size]; - edge_t h_index[result_size]; - size_t* h_counts; - - ret_code = - cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_srcs, srcs, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - - ret_code = - cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_dsts, dsts, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - - ret_code = cugraph_type_erased_device_array_view_copy_to_host( - handle, (byte_t*)h_labels, labels, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - - ret_code = - cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_index, index, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - - h_counts = (size_t*)cugraph_type_erased_host_array_pointer(counts); - - // NOTE: The C++ tester does a more thorough validation. For our purposes - // here we will do a simpler validation, merely checking that all edges - // are actually part of the graph - weight_t M[num_vertices][num_vertices]; - - for (int i = 0; i < num_vertices; ++i) - for (int j = 0; j < num_vertices; ++j) - M[i][j] = 0.0; - - for (int i = 0; i < num_edges; ++i) - M[h_src[i]][h_dst[i]] = h_wgt[i]; - - for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { - TEST_ASSERT(test_ret_value, - M[h_srcs[i]][h_dsts[i]] > 0.0, - "uniform_neighbor_sample got edge that doesn't exist"); - - bool_t found = FALSE; - for (int j = 0; j < num_starts; ++j) - found = found || (h_labels[i] == h_start_label[j]); - - TEST_ASSERT(test_ret_value, found, "invalid label"); - } - - cugraph_type_erased_host_array_view_free(h_fan_out_view); - - return test_ret_value; -} - int test_uniform_neighbor_sample(const cugraph_resource_handle_t* handle) { size_t num_edges = 8; @@ -264,21 +130,19 @@ int test_uniform_neighbor_sample(const cugraph_resource_handle_t* handle) size_t fan_out_size = 2; size_t num_starts = 2; - vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; - vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; - weight_t wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t start[] = {2, 2}; - vertex_t start_labels[] = {0, 1}; - int fan_out[] = {1, 2}; + vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + edge_t idx[] = {0, 1, 2, 3, 4, 5, 6, 7}; + vertex_t start[] = {2, 2}; + int fan_out[] = {1, 2}; return generic_uniform_neighbor_sample_test(handle, src, dst, - wgt, + idx, num_vertices, num_edges, start, - start_labels, num_starts, fan_out, fan_out_size, @@ -286,33 +150,6 @@ int test_uniform_neighbor_sample(const cugraph_resource_handle_t* handle) FALSE); } -int test_experimental_uniform_neighbor_sample(const cugraph_resource_handle_t* handle) -{ - size_t num_edges = 8; - size_t num_vertices = 6; - size_t fan_out_size = 2; - size_t num_starts = 2; - - vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; - vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; - edge_t idx[] = {0, 1, 2, 3, 4, 5, 6, 7}; - vertex_t start[] = {2, 2}; - int fan_out[] = {1, 2}; - - return generic_experimental_uniform_neighbor_sample_test(handle, - src, - dst, - idx, - num_vertices, - num_edges, - start, - num_starts, - fan_out, - fan_out_size, - TRUE, - FALSE); -} - /******************************************************************************/ int main(int argc, char** argv) @@ -340,7 +177,6 @@ int main(int argc, char** argv) if (result == 0) { result |= RUN_MG_TEST(test_uniform_neighbor_sample, handle); - result |= RUN_MG_TEST(test_experimental_uniform_neighbor_sample, handle); cugraph_free_resource_handle(handle); } diff --git a/cpp/tests/c_api/uniform_neighbor_sample_test.c b/cpp/tests/c_api/uniform_neighbor_sample_test.c index 428ccbec7a9..180ab96566a 100644 --- a/cpp/tests/c_api/uniform_neighbor_sample_test.c +++ b/cpp/tests/c_api/uniform_neighbor_sample_test.c @@ -110,18 +110,18 @@ int create_test_graph_with_edge_ids(const cugraph_resource_handle_t* p_handle, return test_ret_value; } -int generic_experimental_uniform_neighbor_sample_test(vertex_t* h_src, - vertex_t* h_dst, - edge_t* h_ids, - size_t num_vertices, - size_t num_edges, - vertex_t* h_start, - size_t num_starts, - int* fan_out, - size_t max_depth, - bool_t with_replacement, - bool_t renumber, - bool_t store_transposed) +int generic_uniform_neighbor_sample_test(vertex_t* h_src, + vertex_t* h_dst, + edge_t* h_ids, + size_t num_vertices, + size_t num_edges, + vertex_t* h_start, + size_t num_starts, + int* fan_out, + size_t max_depth, + bool_t with_replacement, + bool_t renumber, + bool_t store_transposed) { int test_ret_value = 0; @@ -155,7 +155,7 @@ int generic_experimental_uniform_neighbor_sample_test(vertex_t* h_src, h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, max_depth, INT32); - ret_code = cugraph_experimental_uniform_neighbor_sample( + ret_code = cugraph_uniform_neighbor_sample( handle, graph, d_start_view, h_fan_out_view, with_replacement, FALSE, &result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); @@ -210,151 +210,6 @@ int generic_experimental_uniform_neighbor_sample_test(vertex_t* h_src, return test_ret_value; } -int generic_uniform_neighbor_sample_test(vertex_t* h_src, - vertex_t* h_dst, - weight_t* h_wgt, - size_t num_vertices, - size_t num_edges, - vertex_t* h_start, - int* h_start_label, - size_t num_starts, - int* fan_out, - size_t max_depth, - bool_t with_replacement, - bool_t renumber, - bool_t store_transposed) -{ - int test_ret_value = 0; - - cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; - cugraph_error_t* ret_error = NULL; - - cugraph_resource_handle_t* handle = NULL; - cugraph_graph_t* graph = NULL; - cugraph_sample_result_t* result = NULL; - - cugraph_type_erased_device_array_t* d_start = NULL; - cugraph_type_erased_device_array_view_t* d_start_view = NULL; - cugraph_type_erased_device_array_t* d_start_label = NULL; - cugraph_type_erased_device_array_view_t* d_start_label_view = NULL; - cugraph_type_erased_host_array_view_t* h_fan_out_view = NULL; - - handle = cugraph_create_resource_handle(NULL); - TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); - - ret_code = create_test_graph( - handle, h_src, h_dst, h_wgt, num_edges, store_transposed, renumber, FALSE, &graph, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "graph creation failed."); - - ret_code = - cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start create failed."); - - d_start_view = cugraph_type_erased_device_array_view(d_start); - - ret_code = cugraph_type_erased_device_array_view_copy_from_host( - handle, d_start_view, (byte_t*)h_start, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start copy_from_host failed."); - - ret_code = - cugraph_type_erased_device_array_create(handle, num_starts, INT32, &d_start_label, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "d_start_label create failed."); - - d_start_label_view = cugraph_type_erased_device_array_view(d_start_label); - - ret_code = cugraph_type_erased_device_array_view_copy_from_host( - handle, d_start_label_view, (byte_t*)h_start_label, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "start_label copy_from_host failed."); - - h_fan_out_view = cugraph_type_erased_host_array_view_create(fan_out, max_depth, INT32); - - ret_code = cugraph_uniform_neighbor_sample(handle, - graph, - d_start_view, - d_start_label_view, - h_fan_out_view, - with_replacement, - FALSE, - &result, - &ret_error); - - TEST_ASSERT(test_ret_value, - ret_code != CUGRAPH_SUCCESS, - "cugraph_uniform_neighbor_sample expected to fail in SG test"); - -#if 0 - // FIXME: cugraph_uniform_neighbor_sample does not support SG - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "uniform_neighbor_sample failed."); - - cugraph_type_erased_device_array_view_t* srcs; - cugraph_type_erased_device_array_view_t* dsts; - cugraph_type_erased_device_array_view_t* labels; - cugraph_type_erased_device_array_view_t* index; - cugraph_type_erased_host_array_view_t* counts; - - srcs = cugraph_sample_result_get_sources(result); - dsts = cugraph_sample_result_get_destinations(result); - labels = cugraph_sample_result_get_start_labels(result); - index = cugraph_sample_result_get_index(result); - counts = cugraph_sample_result_get_counts(result); - - size_t result_size = cugraph_type_erased_device_array_view_size(srcs); - - vertex_t h_srcs[result_size]; - vertex_t h_dsts[result_size]; - int h_labels[result_size]; - edge_t h_index[result_size]; - size_t* h_counts; - - ret_code = - cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_srcs, srcs, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - - ret_code = - cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_dsts, dsts, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - - ret_code = cugraph_type_erased_device_array_view_copy_to_host( - handle, (byte_t*)h_labels, labels, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - - ret_code = - cugraph_type_erased_device_array_view_copy_to_host(handle, (byte_t*)h_index, index, &ret_error); - TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); - - h_counts = (size_t*)cugraph_type_erased_host_array_pointer(counts); - - // NOTE: The C++ tester does a more thorough validation. For our purposes - // here we will do a simpler validation, merely checking that all edges - // are actually part of the graph - weight_t M[num_vertices][num_vertices]; - - for (int i = 0; i < num_vertices; ++i) - for (int j = 0; j < num_vertices; ++j) - M[i][j] = 0.0; - - for (int i = 0; i < num_edges; ++i) - M[h_src[i]][h_dst[i]] = h_wgt[i]; - - for (int i = 0; (i < result_size) && (test_ret_value == 0); ++i) { - TEST_ASSERT(test_ret_value, - M[h_srcs[i]][h_dsts[i]] > 0.0, - "uniform_neighbor_sample got edge that doesn't exist"); - - bool_t found = FALSE; - for (int j = 0; j < num_starts; ++j) - found = found || (h_labels[i] == h_start_label[j]); - - TEST_ASSERT(test_ret_value, found, "invalid label"); - } - - cugraph_type_erased_host_array_view_free(h_fan_out_view); -#endif - - return test_ret_value; -} - int test_uniform_neighbor_sample() { size_t num_edges = 8; @@ -362,20 +217,18 @@ int test_uniform_neighbor_sample() size_t fan_out_size = 2; size_t num_starts = 2; - vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; - vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; - weight_t wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; - vertex_t start[] = {2, 2}; - vertex_t start_labels[] = {0, 1}; - int fan_out[] = {1, 2}; + vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7}; + vertex_t start[] = {2, 2}; + int fan_out[] = {1, 2}; return generic_uniform_neighbor_sample_test(src, dst, - wgt, + edge_ids, num_vertices, num_edges, start, - start_labels, num_starts, fan_out, fan_out_size, @@ -384,37 +237,9 @@ int test_uniform_neighbor_sample() FALSE); } -int test_experimental_uniform_neighbor_sample() -{ - size_t num_edges = 8; - size_t num_vertices = 6; - size_t fan_out_size = 2; - size_t num_starts = 2; - - vertex_t src[] = {0, 1, 1, 2, 2, 2, 3, 4}; - vertex_t dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; - edge_t edge_ids[] = {0, 1, 2, 3, 4, 5, 6, 7}; - vertex_t start[] = {2, 2}; - int fan_out[] = {1, 2}; - - return generic_experimental_uniform_neighbor_sample_test(src, - dst, - edge_ids, - num_vertices, - num_edges, - start, - num_starts, - fan_out, - fan_out_size, - TRUE, - FALSE, - FALSE); -} - int main(int argc, char** argv) { int result = 0; result |= RUN_TEST(test_uniform_neighbor_sample); - result |= RUN_TEST(test_experimental_uniform_neighbor_sample); return result; } diff --git a/cpp/tests/sampling/detail/mg_gather_one_hop.cu b/cpp/tests/sampling/detail/mg_gather_one_hop.cu index 7e7dc50bb09..11e3df78f8e 100644 --- a/cpp/tests/sampling/detail/mg_gather_one_hop.cu +++ b/cpp/tests/sampling/detail/mg_gather_one_hop.cu @@ -15,6 +15,9 @@ */ #include "nbr_sampling_utils.cuh" + +#include + #include #include @@ -92,49 +95,43 @@ class Tests_MG_GatherEdges // Generate random vertex ids in the range of current gpu auto [global_degree_offsets, global_out_degrees] = - cugraph::detail::original::get_global_degree_information(handle, mg_graph_view); - auto global_adjacency_list_offsets = cugraph::detail::original::get_global_adjacency_offset( - handle, mg_graph_view, global_degree_offsets, global_out_degrees); + cugraph::detail::get_global_degree_information(handle, mg_graph_view); // Generate random sources to gather on auto random_sources = cugraph::test::random_vertex_ids(handle, mg_graph_view.local_vertex_partition_range_first(), mg_graph_view.local_vertex_partition_range_last(), - source_sample_count, + std::min(mg_graph_view.local_vertex_partition_range_size() * + (repetitions_per_vertex + vertex_t{1}), + source_sample_count), repetitions_per_vertex); - rmm::device_uvector random_source_gpu_ids(random_sources.size(), handle.get_stream()); - thrust::fill(handle.get_thrust_policy(), - random_source_gpu_ids.begin(), - random_source_gpu_ids.end(), - comm_rank); - - auto [active_sources, active_source_gpu_ids] = - cugraph::detail::original::gather_active_majors(handle, - mg_graph_view, - random_sources.cbegin(), - random_sources.cend(), - random_source_gpu_ids.cbegin()); - - auto [src, dst, gpu_ids, edge_ids] = cugraph::detail::original::gather_one_hop_edgelist( - handle, mg_graph_view, active_sources, active_source_gpu_ids, global_adjacency_list_offsets); + + // FIXME: allgather is probably a poor name for this function. + // It's really an allgather across the row communicator + auto active_sources = + cugraph::detail::allgather_active_majors(handle, std::move(random_sources)); + + auto [src, dst, edge_ids] = + cugraph::detail::gather_one_hop_edgelist(handle, mg_graph_view, active_sources); if (prims_usecase.check_correctness) { - // Gather outputs - auto mg_out_srcs = cugraph::test::device_gatherv(handle, src.data(), src.size()); - auto mg_out_dsts = cugraph::test::device_gatherv(handle, dst.data(), dst.size()); - auto mg_out_prop = cugraph::test::device_gatherv(handle, gpu_ids.data(), gpu_ids.size()); - - auto mg_out_edge_ids = - cugraph::test::device_gatherv(handle, edge_ids.data(), edge_ids.size()); - - // Gather inputs - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_rank = col_comm.get_rank(); - auto sg_active_sources = cugraph::test::device_gatherv( - handle, active_sources.data(), col_rank == 0 ? active_sources.size() : 0); - auto sg_active_sources_gpu_ids = cugraph::test::device_gatherv( - handle, active_source_gpu_ids.data(), col_rank == 0 ? active_source_gpu_ids.size() : 0); + // Gather outputs to gpu 0 + auto mg_out_srcs = cugraph::test::device_gatherv( + handle, raft::device_span{src.data(), src.size()}); + auto mg_out_dsts = cugraph::test::device_gatherv( + handle, raft::device_span{dst.data(), dst.size()}); + + // Gather relevant edges from graph + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_rank = col_comm.get_rank(); + auto all_active_sources = cugraph::test::device_allgatherv( + handle, + raft::device_span{active_sources.data(), + col_rank == 0 ? active_sources.size() : 0}); + + thrust::sort( + handle.get_thrust_policy(), all_active_sources.begin(), all_active_sources.end()); // Gather input graph edgelist rmm::device_uvector sg_src(0, handle.get_stream()); @@ -142,42 +139,57 @@ class Tests_MG_GatherEdges std::tie(sg_src, sg_dst, std::ignore) = mg_graph_view.decompress_to_edgelist(handle, std::nullopt); - auto aggregated_sg_src = cugraph::test::device_gatherv(handle, sg_src.begin(), sg_src.size()); - auto aggregated_sg_dst = cugraph::test::device_gatherv(handle, sg_dst.begin(), sg_dst.size()); - - sort_coo(handle, mg_out_srcs, mg_out_prop, mg_out_dsts); - - if (handle.get_comms().get_rank() == int{0}) { - cugraph::graph_t sg_graph(handle); - auto aggregated_edge_iter = thrust::make_zip_iterator( - thrust::make_tuple(aggregated_sg_src.begin(), aggregated_sg_dst.begin())); - thrust::sort(handle.get_thrust_policy(), - aggregated_edge_iter, - aggregated_edge_iter + aggregated_sg_src.size()); - auto sg_graph_properties = - cugraph::graph_properties_t{mg_graph_view.is_symmetric(), mg_graph_view.is_multigraph()}; - - std::tie(sg_graph, std::ignore) = - cugraph::create_graph_from_edgelist( - handle, - std::nullopt, - std::move(aggregated_sg_src), - std::move(aggregated_sg_dst), - std::nullopt, - sg_graph_properties, - false); - auto sg_graph_view = sg_graph.view(); - // Call single gpu gather - auto [sg_out_srcs, sg_out_dsts, sg_out_prop] = - sg_gather_edges(handle, sg_graph_view, sg_active_sources, sg_active_sources_gpu_ids); - sort_coo(handle, sg_out_srcs, sg_out_prop, sg_out_dsts); - - auto passed = thrust::equal( - handle.get_thrust_policy(), sg_out_srcs.begin(), sg_out_srcs.end(), mg_out_srcs.begin()); - passed &= thrust::equal( - handle.get_thrust_policy(), sg_out_dsts.begin(), sg_out_dsts.end(), mg_out_dsts.begin()); - ASSERT_TRUE(passed); - } + auto begin_iter = thrust::make_zip_iterator(sg_src.begin(), sg_dst.begin()); + auto new_end = thrust::remove_if( + handle.get_thrust_policy(), + begin_iter, + begin_iter + sg_src.size(), + [sources = all_active_sources.data(), size = all_active_sources.size()] __device__(auto t) { + auto src = thrust::get<0>(t); + return !thrust::binary_search(thrust::seq, sources, sources + size, src); + }); + + sg_src.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); + sg_dst.resize(thrust::distance(begin_iter, new_end), handle.get_stream()); + + auto aggregated_sg_src = cugraph::test::device_gatherv( + handle, raft::device_span{sg_src.begin(), sg_src.size()}); + auto aggregated_sg_dst = cugraph::test::device_gatherv( + handle, raft::device_span{sg_dst.begin(), sg_dst.size()}); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(mg_out_srcs.begin(), mg_out_dsts.begin()), + thrust::make_zip_iterator(mg_out_srcs.end(), mg_out_dsts.end())); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(aggregated_sg_src.begin(), aggregated_sg_dst.begin()), + thrust::make_zip_iterator(aggregated_sg_src.end(), aggregated_sg_dst.end())); + + // FIXME: This is ignoring the case of the same seed being specified multiple + // times. Not sure that's worth worrying about, so taking the easy way out here. + auto unique_end = + thrust::unique(handle.get_thrust_policy(), + thrust::make_zip_iterator(mg_out_srcs.begin(), mg_out_dsts.begin()), + thrust::make_zip_iterator(mg_out_srcs.end(), mg_out_dsts.end())); + + mg_out_srcs.resize( + thrust::distance(thrust::make_zip_iterator(mg_out_srcs.begin(), mg_out_dsts.begin()), + unique_end), + handle.get_stream()); + mg_out_dsts.resize( + thrust::distance(thrust::make_zip_iterator(mg_out_srcs.begin(), mg_out_dsts.begin()), + unique_end), + handle.get_stream()); + + auto passed = thrust::equal(handle.get_thrust_policy(), + mg_out_srcs.begin(), + mg_out_srcs.end(), + aggregated_sg_src.begin()); + passed &= thrust::equal(handle.get_thrust_policy(), + mg_out_dsts.begin(), + mg_out_dsts.end(), + aggregated_sg_dst.begin()); + ASSERT_TRUE(passed); } } }; diff --git a/cpp/tests/sampling/detail/mg_gather_utils.cu b/cpp/tests/sampling/detail/mg_gather_utils.cu index 6ea0e40c60a..dc0a2fb2cc1 100644 --- a/cpp/tests/sampling/detail/mg_gather_utils.cu +++ b/cpp/tests/sampling/detail/mg_gather_utils.cu @@ -15,9 +15,14 @@ */ #include "nbr_sampling_utils.cuh" + +#include + #include #include +#include + #include #include @@ -30,6 +35,102 @@ struct Prims_Usecase { bool check_correctness{true}; }; +template +std::tuple, std::vector> test_gather_local_edges( + raft::handle_t const& handle, + cugraph::graph_view_t const& mg_graph_view, + rmm::device_uvector const& sources, + rmm::device_uvector const& destination_offsets, + edge_t indices_per_source) +{ + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_rank = col_comm.get_rank(); + + // logic relies on gather_one_hop not having duplicates + rmm::device_uvector sources_copy(sources.size(), handle.get_stream()); + raft::copy(sources_copy.data(), sources.data(), sources.size(), handle.get_stream()); + thrust::sort(handle.get_thrust_policy(), sources_copy.begin(), sources_copy.end()); + auto sources_copy_end = + thrust::unique(handle.get_thrust_policy(), sources_copy.begin(), sources_copy.end()); + sources_copy.resize(thrust::distance(sources_copy.begin(), sources_copy_end), + handle.get_stream()); + + auto [one_hop_src, one_hop_dst, one_hop_edge_ids] = + cugraph::detail::gather_one_hop_edgelist(handle, mg_graph_view, sources_copy); + + rmm::device_uvector one_hop_gpu_id(one_hop_src.size(), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + one_hop_gpu_id.begin(), + one_hop_gpu_id.end(), + handle.get_comms().get_rank()); + + // Pull everything to rank 0 + auto sg_src = cugraph::test::device_gatherv( + handle, raft::device_span{one_hop_src.data(), one_hop_src.size()}); + auto sg_dst = cugraph::test::device_gatherv( + handle, raft::device_span{one_hop_dst.data(), one_hop_dst.size()}); + auto sg_gpu_id = cugraph::test::device_gatherv( + handle, raft::device_span{one_hop_gpu_id.data(), one_hop_gpu_id.size()}); + auto sg_sources = cugraph::test::device_gatherv( + handle, raft::device_span{sources.data(), col_rank == 0 ? sources.size() : 0}); + auto sg_destination_offsets = cugraph::test::device_gatherv( + handle, + raft::device_span{destination_offsets.data(), + col_rank == 0 ? destination_offsets.size() : 0}); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(sg_src.begin(), sg_gpu_id.begin(), sg_dst.begin()), + thrust::make_zip_iterator(sg_src.end(), sg_gpu_id.end(), sg_dst.end())); + + std::vector h_sources(sg_sources.size()); + std::vector h_src(sg_src.size()); + std::vector h_dst(sg_dst.size()); + std::vector h_result_src(sg_destination_offsets.size()); + std::vector h_result_dst(sg_destination_offsets.size()); + std::vector h_destination_offsets(sg_destination_offsets.size()); + + raft::update_host(h_sources.data(), sg_sources.data(), sg_sources.size(), handle.get_stream()); + raft::update_host(h_src.data(), sg_src.data(), sg_src.size(), handle.get_stream()); + raft::update_host(h_dst.data(), sg_dst.data(), sg_dst.size(), handle.get_stream()); + raft::update_host(h_destination_offsets.data(), + sg_destination_offsets.data(), + sg_destination_offsets.size(), + handle.get_stream()); + + thrust::for_each(thrust::host, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(sg_destination_offsets.size()), + [&] __host__(auto i) { + h_result_src[i] = h_sources[i / indices_per_source]; + h_result_dst[i] = mg_graph_view.number_of_vertices(); + edge_t offset = h_destination_offsets[i]; + + for (size_t j = 0; j < h_src.size(); ++j) { + if (h_result_src[i] == h_src[j]) { + if (offset == 0) { + h_result_dst[i] = h_dst[j]; + break; + } + --offset; + } + } + }); + + auto new_end = + thrust::remove_if(thrust::host, + thrust::make_zip_iterator(h_result_src.begin(), h_result_dst.begin()), + thrust::make_zip_iterator(h_result_src.end(), h_result_dst.end()), + [invalid_vertex = mg_graph_view.number_of_vertices()] __host__(auto p) { + return (thrust::get<1>(p) == invalid_vertex); + }); + + h_result_src.resize(thrust::distance( + thrust::make_zip_iterator(h_result_src.begin(), h_result_dst.begin()), new_end)); + h_result_dst.resize(h_result_src.size()); + + return std::make_tuple(std::move(h_result_src), std::move(h_result_dst)); +} + template class Tests_MG_GatherEdges : public ::testing::TestWithParam> { @@ -93,118 +194,77 @@ class Tests_MG_GatherEdges // Generate random vertex ids in the range of current gpu auto [global_degree_offsets, global_out_degrees] = - cugraph::detail::original::get_global_degree_information(handle, mg_graph_view); - auto global_adjacency_list_offsets = cugraph::detail::original::get_global_adjacency_offset( - handle, mg_graph_view, global_degree_offsets, global_out_degrees); + cugraph::detail::get_global_degree_information(handle, mg_graph_view); // Generate random sources to gather on - auto random_sources = random_vertex_ids(handle, - mg_graph_view.local_vertex_partition_range_first(), - mg_graph_view.local_vertex_partition_range_last(), - source_sample_count, - repetitions_per_vertex); - rmm::device_uvector random_source_gpu_ids(random_sources.size(), handle.get_stream()); - thrust::fill(handle.get_thrust_policy(), - random_source_gpu_ids.begin(), - random_source_gpu_ids.end(), - comm_rank); - - auto [active_sources, active_source_gpu_ids] = - cugraph::detail::original::gather_active_majors(handle, - mg_graph_view, - random_sources.cbegin(), - random_sources.cend(), - random_source_gpu_ids.cbegin()); + auto random_sources = + random_vertex_ids(handle, + mg_graph_view.local_vertex_partition_range_first(), + mg_graph_view.local_vertex_partition_range_last(), + std::min(mg_graph_view.local_vertex_partition_range_size() * + (repetitions_per_vertex + vertex_t{1}), + source_sample_count), + repetitions_per_vertex); + + // FIXME: allgather is probably a poor name for this function. + // It's really an allgather across the row communicator + auto active_sources = + cugraph::detail::allgather_active_majors(handle, std::move(random_sources)); // get source global out degrees to generate indices - auto active_source_degrees = cugraph::detail::original::get_active_major_global_degrees( + auto active_source_degrees = cugraph::detail::get_active_major_global_degrees( handle, mg_graph_view, active_sources, global_out_degrees); - auto random_destination_indices = + auto random_destination_offsets = generate_random_destination_indices(handle, active_source_degrees, mg_graph_view.number_of_vertices(), - mg_graph_view.number_of_edges(), + edge_t{-1}, indices_per_source); - rmm::device_uvector input_destination_indices(random_destination_indices.size(), + + rmm::device_uvector input_destination_offsets(random_destination_offsets.size(), handle.get_stream()); - raft::update_device(input_destination_indices.data(), - random_destination_indices.data(), - random_destination_indices.size(), - handle.get_stream()); - - auto [src, dst, gpu_ids, dst_map] = - cugraph::detail::original::gather_local_edges(handle, - mg_graph_view, - active_sources, - active_source_gpu_ids, - std::move(input_destination_indices), - indices_per_source, - global_degree_offsets, - global_adjacency_list_offsets); + raft::copy(input_destination_offsets.data(), + random_destination_offsets.data(), + random_destination_offsets.size(), + handle.get_stream()); + + auto [src, dst, dst_map] = + cugraph::detail::gather_local_edges(handle, + mg_graph_view, + active_sources, + std::move(random_destination_offsets), + indices_per_source, + global_degree_offsets); if (prims_usecase.check_correctness) { - // Gather outputs - auto mg_out_srcs = cugraph::test::device_gatherv(handle, src.data(), src.size()); - auto mg_out_dsts = cugraph::test::device_gatherv(handle, dst.data(), dst.size()); - - // Gather inputs - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_rank = col_comm.get_rank(); - auto sg_random_srcs = cugraph::test::device_gatherv( - handle, active_sources.data(), col_rank == 0 ? active_sources.size() : 0); - auto sg_random_dst_indices = - cugraph::test::device_gatherv(handle, - random_destination_indices.data(), - col_rank == 0 ? random_destination_indices.size() : 0); - - // Gather input graph edgelist - rmm::device_uvector sg_src(0, handle.get_stream()); - rmm::device_uvector sg_dst(0, handle.get_stream()); - std::tie(sg_src, sg_dst, std::ignore) = - mg_graph_view.decompress_to_edgelist(handle, std::nullopt); - - auto aggregated_sg_src = cugraph::test::device_gatherv(handle, sg_src.begin(), sg_src.size()); - auto aggregated_sg_dst = cugraph::test::device_gatherv(handle, sg_dst.begin(), sg_dst.size()); - - sort_coo(handle, mg_out_srcs, mg_out_dsts); - - if (handle.get_comms().get_rank() == int{0}) { - cugraph::graph_t sg_graph(handle); - auto aggregated_edge_iter = thrust::make_zip_iterator( - thrust::make_tuple(aggregated_sg_src.begin(), aggregated_sg_dst.begin())); - thrust::sort(handle.get_thrust_policy(), - aggregated_edge_iter, - aggregated_edge_iter + aggregated_sg_src.size()); - auto sg_graph_properties = - cugraph::graph_properties_t{mg_graph_view.is_symmetric(), mg_graph_view.is_multigraph()}; - - std::tie(sg_graph, std::ignore) = - cugraph::create_graph_from_edgelist( - handle, - std::nullopt, - std::move(aggregated_sg_src), - std::move(aggregated_sg_dst), - std::nullopt, - sg_graph_properties, - false); - auto sg_graph_view = sg_graph.view(); - // Call single gpu gather - auto [sg_out_srcs, sg_out_dsts] = sg_gather_edges(handle, - sg_graph_view, - sg_random_srcs.begin(), - sg_random_srcs.end(), - sg_random_dst_indices.begin(), - sg_graph_view.number_of_vertices(), - indices_per_source); - sort_coo(handle, sg_out_srcs, sg_out_dsts); - - auto passed = thrust::equal( - handle.get_thrust_policy(), sg_out_srcs.begin(), sg_out_srcs.end(), mg_out_srcs.begin()); - passed &= thrust::equal( - handle.get_thrust_policy(), sg_out_dsts.begin(), sg_out_dsts.end(), mg_out_dsts.begin()); - ASSERT_TRUE(passed); - } + // NOTE: This test assumes that edgea within the data structure are sorted + // We'll use gather_one_hop_edgelist to pull out the relevant edges + auto [h_src, h_dst] = test_gather_local_edges( + handle, mg_graph_view, active_sources, input_destination_offsets, indices_per_source); + + auto agg_src = cugraph::test::device_gatherv( + handle, raft::device_span{src.data(), src.size()}); + auto agg_dst = cugraph::test::device_gatherv( + handle, raft::device_span{dst.data(), dst.size()}); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(agg_src.begin(), agg_dst.begin()), + thrust::make_zip_iterator(agg_src.end(), agg_dst.end())); + thrust::sort(thrust::host, + thrust::make_zip_iterator(h_src.begin(), h_dst.begin()), + thrust::make_zip_iterator(h_src.end(), h_dst.end())); + + std::vector h_agg_src(agg_src.size()); + std::vector h_agg_dst(agg_dst.size()); + raft::update_host(h_agg_src.data(), agg_src.data(), agg_src.size(), handle.get_stream()); + raft::update_host(h_agg_dst.data(), agg_dst.data(), agg_dst.size(), handle.get_stream()); + + // FIXME: Why are the randomly selected vertices on each GPU so similar?? + + auto passed = thrust::equal(thrust::host, h_src.begin(), h_src.end(), h_agg_src.begin()); + passed &= thrust::equal(thrust::host, h_dst.begin(), h_dst.end(), h_agg_dst.begin()); + ASSERT_TRUE(passed); } } }; diff --git a/cpp/tests/sampling/detail/nbr_sampling_utils.cuh b/cpp/tests/sampling/detail/nbr_sampling_utils.cuh index 96e6d129b24..093c231dfce 100644 --- a/cpp/tests/sampling/detail/nbr_sampling_utils.cuh +++ b/cpp/tests/sampling/detail/nbr_sampling_utils.cuh @@ -19,7 +19,6 @@ #pragma once #include -#include #include #include #include diff --git a/cpp/tests/sampling/mg_uniform_neighbor_sampling.cu b/cpp/tests/sampling/mg_uniform_neighbor_sampling.cu index a036dda5fb1..12a387d4589 100644 --- a/cpp/tests/sampling/mg_uniform_neighbor_sampling.cu +++ b/cpp/tests/sampling/mg_uniform_neighbor_sampling.cu @@ -90,12 +90,15 @@ class Tests_MG_Nbr_Sampling constexpr vertex_t source_sample_count = 3; // Generate random vertex ids in the range of current gpu - auto random_sources = random_vertex_ids(handle, - mg_graph_view.local_vertex_partition_range_first(), - mg_graph_view.local_vertex_partition_range_last(), - source_sample_count, - repetitions_per_vertex, - comm_rank); + auto random_sources = + random_vertex_ids(handle, + mg_graph_view.local_vertex_partition_range_first(), + mg_graph_view.local_vertex_partition_range_last(), + std::min(mg_graph_view.local_vertex_partition_range_size() * + (repetitions_per_vertex + vertex_t{1}), + source_sample_count), + repetitions_per_vertex, + comm_rank); std::vector h_fan_out{indices_per_source}; // depth = 1 @@ -108,14 +111,14 @@ class Tests_MG_Nbr_Sampling if (prims_usecase.check_correctness) { // Consolidate results on GPU 0 - auto d_mg_start_src = - cugraph::test::device_gatherv(handle, random_sources.data(), random_sources.size()); - auto d_mg_aggregate_src = - cugraph::test::device_gatherv(handle, d_src_out.data(), d_src_out.size()); - auto d_mg_aggregate_dst = - cugraph::test::device_gatherv(handle, d_dst_out.data(), d_dst_out.size()); - auto d_mg_aggregate_indices = - cugraph::test::device_gatherv(handle, d_indices.data(), d_indices.size()); + auto d_mg_start_src = cugraph::test::device_gatherv( + handle, raft::device_span{random_sources.data(), random_sources.size()}); + auto d_mg_aggregate_src = cugraph::test::device_gatherv( + handle, raft::device_span{d_src_out.data(), d_src_out.size()}); + auto d_mg_aggregate_dst = cugraph::test::device_gatherv( + handle, raft::device_span{d_dst_out.data(), d_dst_out.size()}); + auto d_mg_aggregate_indices = cugraph::test::device_gatherv( + handle, raft::device_span{d_indices.data(), d_indices.size()}); #if 0 // FIXME: extract_induced_subgraphs not currently support MG, so we'll skip this validation diff --git a/cpp/tests/sampling/sg_uniform_neighbor_sampling.cu b/cpp/tests/sampling/sg_uniform_neighbor_sampling.cu index 241de0bf747..346c6e1d449 100644 --- a/cpp/tests/sampling/sg_uniform_neighbor_sampling.cu +++ b/cpp/tests/sampling/sg_uniform_neighbor_sampling.cu @@ -72,7 +72,9 @@ class Tests_Uniform_Neighbor_Sampling cugraph::test::random_vertex_ids(handle, graph_view.local_vertex_partition_range_first(), graph_view.local_vertex_partition_range_last(), - source_sample_count, + std::min(graph_view.local_vertex_partition_range_size() * + (repetitions_per_vertex + vertex_t{1}), + source_sample_count), repetitions_per_vertex, uint64_t{0}); diff --git a/cpp/tests/utilities/device_comm_wrapper.cu b/cpp/tests/utilities/device_comm_wrapper.cu index 9937e59273d..9ce1f35a33c 100644 --- a/cpp/tests/utilities/device_comm_wrapper.cu +++ b/cpp/tests/utilities/device_comm_wrapper.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,11 +26,13 @@ namespace cugraph { namespace test { template -rmm::device_uvector device_gatherv(raft::handle_t const& handle, T const* d_input, size_t size) +rmm::device_uvector device_gatherv(raft::handle_t const& handle, + raft::device_span d_input) + { bool is_root = handle.get_comms().get_rank() == int{0}; auto rx_sizes = - cugraph::host_scalar_gather(handle.get_comms(), size, int{0}, handle.get_stream()); + cugraph::host_scalar_gather(handle.get_comms(), d_input.size(), int{0}, handle.get_stream()); std::vector rx_displs(is_root ? static_cast(handle.get_comms().get_size()) : size_t{0}); if (is_root) { std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); } @@ -39,9 +41,9 @@ rmm::device_uvector device_gatherv(raft::handle_t const& handle, T const* d_i is_root ? std::reduce(rx_sizes.begin(), rx_sizes.end()) : size_t{0}, handle.get_stream()); cugraph::device_gatherv(handle.get_comms(), - d_input, + d_input.data(), gathered_v.data(), - size, + d_input.size(), rx_sizes, rx_displs, int{0}, @@ -50,23 +52,53 @@ rmm::device_uvector device_gatherv(raft::handle_t const& handle, T const* d_i return gathered_v; } +template +rmm::device_uvector device_allgatherv(raft::handle_t const& handle, + raft::device_span d_input) +{ + auto rx_sizes = + cugraph::host_scalar_allgather(handle.get_comms(), d_input.size(), handle.get_stream()); + std::vector rx_displs(static_cast(handle.get_comms().get_size())); + std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + + rmm::device_uvector gathered_v(std::reduce(rx_sizes.begin(), rx_sizes.end()), + handle.get_stream()); + + cugraph::device_allgatherv(handle.get_comms(), + d_input.data(), + gathered_v.data(), + rx_sizes, + rx_displs, + handle.get_stream()); + + return gathered_v; +} + // explicit instantiation template rmm::device_uvector device_gatherv(raft::handle_t const& handle, - int32_t const* d_input, - size_t size); + raft::device_span d_input); template rmm::device_uvector device_gatherv(raft::handle_t const& handle, - int64_t const* d_input, - size_t size); + raft::device_span d_input); template rmm::device_uvector device_gatherv(raft::handle_t const& handle, - float const* d_input, - size_t size); + raft::device_span d_input); template rmm::device_uvector device_gatherv(raft::handle_t const& handle, - double const* d_input, - size_t size); + raft::device_span d_input); + +template rmm::device_uvector device_allgatherv(raft::handle_t const& handle, + raft::device_span d_input); + +template rmm::device_uvector device_allgatherv(raft::handle_t const& handle, + raft::device_span d_input); + +template rmm::device_uvector device_allgatherv(raft::handle_t const& handle, + raft::device_span d_input); + +template rmm::device_uvector device_allgatherv(raft::handle_t const& handle, + raft::device_span d_input); } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/device_comm_wrapper.hpp b/cpp/tests/utilities/device_comm_wrapper.hpp index 55145edd71b..c1d7b6b8250 100644 --- a/cpp/tests/utilities/device_comm_wrapper.hpp +++ b/cpp/tests/utilities/device_comm_wrapper.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,14 +16,35 @@ #pragma once +#include #include + #include namespace cugraph { namespace test { template -rmm::device_uvector device_gatherv(raft::handle_t const& handle, T const* d_input, size_t size); +rmm::device_uvector device_gatherv(raft::handle_t const& handle, + raft::device_span d_input); + +template +rmm::device_uvector device_gatherv(raft::handle_t const& handle, T const* d_input, size_t size) +{ + return device_gatherv(handle, raft::device_span{d_input, size}); +} + +template +rmm::device_uvector device_allgatherv(raft::handle_t const& handle, + raft::device_span d_input); + +template +rmm::device_uvector device_allgatherv(raft::handle_t const& handle, + T const* d_input, + size_t size) +{ + return device_allgatherv(handle, raft::device_span{d_input, size}); +} } // namespace test } // namespace cugraph diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd index 396b73afee5..cea48f5c420 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd @@ -183,7 +183,7 @@ cdef extern from "cugraph_c/algorithms.h": # uniform neighborhood sampling cdef cugraph_error_code_t \ - cugraph_experimental_uniform_neighbor_sample( + cugraph_uniform_neighbor_sample( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start, diff --git a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx index 8dfea32d821..e23a35396fb 100644 --- a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx @@ -37,7 +37,7 @@ from pylibcugraph._cugraph_c.graph cimport ( cugraph_graph_t, ) from pylibcugraph._cugraph_c.algorithms cimport ( - cugraph_experimental_uniform_neighbor_sample, + cugraph_uniform_neighbor_sample, cugraph_sample_result_t, cugraph_sample_result_get_sources, cugraph_sample_result_get_destinations, @@ -130,7 +130,7 @@ def uniform_neighbor_sample(ResourceHandle resource_handle, len(h_fan_out), get_c_type_from_numpy_type(h_fan_out.dtype)) - error_code = cugraph_experimental_uniform_neighbor_sample( + error_code = cugraph_uniform_neighbor_sample( c_resource_handle_ptr, c_graph_ptr, start_ptr,