Skip to content

Commit

Permalink
fix merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
Joseph Nke committed May 16, 2022
2 parents 8c89335 + 4a6263a commit 3f1d7ef
Show file tree
Hide file tree
Showing 139 changed files with 4,236 additions and 880 deletions.
4 changes: 2 additions & 2 deletions benchmarks/python_pytest_based/bench_algos.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -31,7 +31,7 @@ def setFixtureParamNames(*args, **kwargs):

import cugraph
from cugraph.structure.number_map import NumberMap
from cugraph.tests import utils
from cugraph.testing import utils
from cugraph.utilities.utils import is_device_version_less_than
import rmm

Expand Down
7 changes: 3 additions & 4 deletions benchmarks/python_pytest_based/params.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -13,8 +13,7 @@

import pytest

from cugraph.tests.utils import genFixtureParamsProduct
from cugraph.tests import utils
from cugraph.testing import utils
from pathlib import PurePath

# FIXME: write and use mechanism described here for specifying datasets:
Expand Down Expand Up @@ -52,7 +51,7 @@
marks=[pytest.mark.poolallocator_off]),
]

FIXTURE_PARAMS = genFixtureParamsProduct(
FIXTURE_PARAMS = utils.genFixtureParamsProduct(
(DIRECTED_DATASETS + UNDIRECTED_DATASETS, "ds"),
(MANAGED_MEMORY, "mm"),
(POOL_ALLOCATOR, "pa"))
15 changes: 8 additions & 7 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ export HOME=$WORKSPACE
cd $WORKSPACE
export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
unset GIT_DESCRIBE_TAG

# ucx-py version
export UCX_PY_VERSION='0.26.*'
Expand Down Expand Up @@ -102,15 +103,15 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
gpuci_logger "Build from source"
$WORKSPACE/build.sh -v clean libcugraph pylibcugraph cugraph
else
echo "Installing libcugraph-tests"
gpuci_logger "Installing libcugraph-tests"
gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} libcugraph libcugraph_etl libcugraph-tests

gpuci_logger "Install the master version of dask and distributed"
pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps
pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps

echo "Build pylibcugraph and cugraph..."
$WORKSPACE/build.sh pylibcugraph cugraph
gpuci_logger "Building and installing pylibcugraph and cugraph..."
export CONDA_BLD_DIR="${WORKSPACE}/.conda-bld"
export VERSION_SUFFIX=""
gpuci_conda_retry build conda/recipes/pylibcugraph --no-build-id --croot ${CONDA_BLD_DIR} -c ${CONDA_ARTIFACT_PATH} --python=${PYTHON}
gpuci_conda_retry build conda/recipes/cugraph --no-build-id --croot ${CONDA_BLD_DIR} -c ${CONDA_ARTIFACT_PATH} --python=${PYTHON}
gpuci_mamba_retry install cugraph pylibcugraph -c ${CONDA_BLD_DIR} -c ${CONDA_ARTIFACT_PATH}
fi

################################################################################
Expand Down
7 changes: 5 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,11 @@ add_library(cugraph
src/sampling/neighborhood.cu
src/sampling/random_walks.cu
src/sampling/detail/gather_utils_impl.cu
src/sampling/detail/sampling_utils_mg.cu
src/sampling/detail/sampling_utils_sg.cu
src/sampling/nbr_sampling_mg.cu
src/sampling/uniform_neighbor_sampling_mg.cpp
src/sampling/uniform_neighbor_sampling_sg.cpp
src/cores/legacy/core_number.cu
src/cores/core_number_sg.cu
src/cores/core_number_mg.cu
Expand All @@ -213,8 +217,7 @@ add_library(cugraph
src/structure/relabel_sg.cu
src/structure/relabel_mg.cu
src/structure/induced_subgraph_sg.cu
## FIXME: Not currently supported
##src/structure/induced_subgraph_mg.cu
src/structure/induced_subgraph_mg.cu
src/traversal/extract_bfs_paths_sg.cu
src/traversal/extract_bfs_paths_mg.cu
src/traversal/bfs_sg.cu
Expand Down
43 changes: 39 additions & 4 deletions cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <cugraph-ops/graph/sampling.hpp>

#include <raft/handle.hpp>
#include <raft/random/rng_state.hpp>
#include <raft/span.hpp>

namespace cugraph {
Expand Down Expand Up @@ -1389,7 +1390,7 @@ random_walks(raft::handle_t const& handle,
* single-gpu).
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param rng The Rng (stateful) instance holding pseudo-random number generator state.
* @param rng_state The RngState instance holding pseudo-random number generator state.
* @param graph Graph (view )object to sub-sample.
* @param ptr_d_start Device pointer to set of starting vertex indices for the sub-sampling.
* @param num_start_vertices = number(vertices) to use for sub-sampling.
Expand All @@ -1404,7 +1405,7 @@ template <typename graph_t>
std::tuple<rmm::device_uvector<typename graph_t::edge_type>,
rmm::device_uvector<typename graph_t::vertex_type>>
sample_neighbors_adjacency_list(raft::handle_t const& handle,
ops::gnn::graph::Rng& rng,
raft::random::RngState& rng_state,
graph_t const& graph,
typename graph_t::vertex_type const* ptr_d_start,
size_t num_start_vertices,
Expand All @@ -1420,7 +1421,7 @@ sample_neighbors_adjacency_list(raft::handle_t const& handle,
* single-gpu).
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param rng The Rng (stateful) instance holding pseudo-random number generator state.
* @param rng_state The RngState instance holding pseudo-random number generator state.
* @param graph Graph (view )object to sub-sample.
* @param ptr_d_start Device pointer to set of starting vertex indices for the sub-sampling.
* @param num_start_vertices = number(vertices) to use for sub-sampling.
Expand All @@ -1435,7 +1436,7 @@ template <typename graph_t>
std::tuple<rmm::device_uvector<typename graph_t::vertex_type>,
rmm::device_uvector<typename graph_t::vertex_type>>
sample_neighbors_edgelist(raft::handle_t const& handle,
ops::gnn::graph::Rng& rng,
raft::random::RngState& rng_state,
graph_t const& graph,
typename graph_t::vertex_type const* ptr_d_start,
size_t num_start_vertices,
Expand Down Expand Up @@ -1502,6 +1503,7 @@ void core_number(raft::handle_t const& handle,

/**
* @brief Multi-GPU Uniform Neighborhood Sampling.
* @deprecated will be removed later in this release (22.06)
*
* @tparam graph_view_t Type of graph view.
* @tparam gpu_t Type of rank (GPU) indices;
Expand Down Expand Up @@ -1535,6 +1537,39 @@ uniform_nbr_sample(raft::handle_t const& handle,
std::vector<int> const& h_fan_out,
bool with_replacement = true);

/**
* @brief Uniform Neighborhood Sampling.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph View object to generate NBR Sampling on.
* @param starting_vertices Device span of starting vertex IDs for the NBR Sampling.
* @param fan_out Host span defining branching out (fan-out) degree per source vertex for each
* level
* @param with_replacement boolean flag specifying if random sampling is done with replacement
* (true); or, without replacement (false); default = true;
* @param seed A seed to initialize the random number generator
* @return tuple device vectors (vertex_t source_vertex, vertex_t destination_vertex, weight_t wgt)
*/
template <typename vertex_t,
typename edge_t,
typename weight_t,
bool store_transposed,
bool multi_gpu>
std::
tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>, rmm::device_uvector<weight_t>>
uniform_nbr_sample(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu> const& graph_view,
raft::device_span<vertex_t> starting_vertices,
raft::host_span<const int> fan_out,
bool with_replacement = true,
uint64_t seed = 0);

/*
* @brief Compute triangle counts.
*
Expand Down
38 changes: 32 additions & 6 deletions cpp/include/cugraph/detail/decompress_edge_partition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ __global__ void partially_decompress_to_edgelist_high_degree(
vertex_t input_major_count,
vertex_t* output_majors,
vertex_t* output_minors,
thrust::optional<weight_t*> output_weights,
thrust::optional<thrust::tuple<prop_t const*, prop_t*>> property,
thrust::optional<thrust::tuple<edge_t const*, edge_t*>> global_edge_index)
{
Expand All @@ -204,6 +205,8 @@ __global__ void partially_decompress_to_edgelist_high_degree(
for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) {
output_majors[major_offset + i] = major;
output_minors[major_offset + i] = indices[i];

if (output_weights) (*output_weights)[major_offset + i] = (*weights)[i];
}
if (property) {
auto input_property = thrust::get<0>(*property)[idx];
Expand Down Expand Up @@ -231,6 +234,7 @@ __global__ void partially_decompress_to_edgelist_mid_degree(
vertex_t input_major_count,
vertex_t* output_majors,
vertex_t* output_minors,
thrust::optional<weight_t*> output_weights,
thrust::optional<thrust::tuple<prop_t const*, prop_t*>> property,
thrust::optional<thrust::tuple<edge_t const*, edge_t*>> global_edge_index)
{
Expand All @@ -242,11 +246,18 @@ __global__ void partially_decompress_to_edgelist_mid_degree(
auto major = input_majors[idx];
auto major_partition_offset = static_cast<size_t>(major - edge_partition.major_range_first());
vertex_t const* indices{nullptr};
thrust::optional<weight_t const*> weights{thrust::nullopt};
edge_t local_degree{};

thrust::tie(indices, weights, local_degree) =
edge_partition.local_edges(major_partition_offset);

auto major_offset = input_major_start_offsets[idx];
for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) {
output_majors[major_offset + i] = major;
output_minors[major_offset + i] = indices[i];

if (output_weights) (*output_weights)[major_offset + i] = (*weights)[i];
}
if (property) {
auto input_property = thrust::get<0>(*property)[idx];
Expand Down Expand Up @@ -275,6 +286,7 @@ void partially_decompress_edge_partition_to_fill_edgelist(
std::vector<vertex_t> const& segment_offsets,
vertex_t* majors,
vertex_t* minors,
thrust::optional<weight_t*> weights,
thrust::optional<thrust::tuple<prop_t const*, prop_t*>> property,
thrust::optional<thrust::tuple<edge_t const*, edge_t*>> global_edge_index)
{
Expand All @@ -297,6 +309,7 @@ void partially_decompress_edge_partition_to_fill_edgelist(
segment_offsets[1],
majors,
minors,
weights,
property ? thrust::make_optional(thrust::make_tuple(
thrust::get<0>(*property) + segment_offsets[0], thrust::get<1>(*property)))
: thrust::nullopt,
Expand All @@ -317,6 +330,7 @@ void partially_decompress_edge_partition_to_fill_edgelist(
segment_offsets[2] - segment_offsets[1],
majors,
minors,
weights,
property ? thrust::make_optional(thrust::make_tuple(
thrust::get<0>(*property) + segment_offsets[1], thrust::get<1>(*property)))
: thrust::nullopt,
Expand All @@ -333,10 +347,11 @@ void partially_decompress_edge_partition_to_fill_edgelist(
input_major_start_offsets + segment_offsets[2] - segment_offsets[0],
majors,
minors,
property = property
? thrust::make_optional(thrust::make_tuple(
output_weights = weights,
property = property
? thrust::make_optional(thrust::make_tuple(
thrust::get<0>(*property) + segment_offsets[2], thrust::get<1>(*property)))
: thrust::nullopt,
: thrust::nullopt,
global_edge_index] __device__(auto idx) {
auto major = input_majors[idx];
auto major_offset = input_major_start_offsets[idx];
Expand All @@ -347,9 +362,16 @@ void partially_decompress_edge_partition_to_fill_edgelist(
edge_t local_degree{};
thrust::tie(indices, weights, local_degree) =
edge_partition.local_edges(major_partition_offset);

// FIXME: This can lead to thread divergence if local_degree varies significantly
// within threads in this warp
thrust::fill(
thrust::seq, majors + major_offset, majors + major_offset + local_degree, major);
thrust::copy(thrust::seq, indices, indices + local_degree, minors + major_offset);
if (weights)
thrust::copy(
thrust::seq, *weights, *weights + local_degree, *output_weights + major_offset);

if (property) {
auto major_input_property = thrust::get<0>(*property)[idx];
auto minor_output_property = thrust::get<1>(*property);
Expand Down Expand Up @@ -379,10 +401,11 @@ void partially_decompress_edge_partition_to_fill_edgelist(
input_major_start_offsets + segment_offsets[3] - segment_offsets[0],
majors,
minors,
property = property
? thrust::make_optional(thrust::make_tuple(
output_weights = weights,
property = property
? thrust::make_optional(thrust::make_tuple(
thrust::get<0>(*property) + segment_offsets[3], thrust::get<1>(*property)))
: thrust::nullopt,
: thrust::nullopt,
global_edge_index] __device__(auto idx) {
auto major = input_majors[idx];
auto major_offset = input_major_start_offsets[idx];
Expand All @@ -395,6 +418,9 @@ void partially_decompress_edge_partition_to_fill_edgelist(
thrust::fill(
thrust::seq, majors + major_offset, majors + major_offset + local_degree, major);
thrust::copy(thrust::seq, indices, indices + local_degree, minors + major_offset);
if (output_weights)
thrust::copy(
thrust::seq, *weights, *weights + local_degree, *output_weights + major_offset);
if (property) {
auto major_input_property = thrust::get<0>(*property)[idx];
auto minor_output_property = thrust::get<1>(*property);
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cugraph/detail/graph_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@
#include <vector>

namespace cugraph {

namespace detail {
namespace original {

/**
* @brief Compute local out degrees of the majors belonging to the adjacency matrices
Expand Down Expand Up @@ -238,6 +238,6 @@ gather_one_hop_edgelist(
const rmm::device_uvector<prop_t>& active_major_property,
const rmm::device_uvector<typename GraphViewType::edge_type>& global_adjacency_list_offsets);

} // namespace original
} // namespace detail

} // namespace cugraph
20 changes: 19 additions & 1 deletion cpp/include/cugraph/detail/graph_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@
#include <cugraph/utilities/device_comm.cuh>

#include <raft/handle.hpp>
#include <raft/span.hpp>

#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <cuco/detail/hash_functions.cuh>
#include <thrust/binary_search.h>
#include <thrust/sort.h>
#include <thrust/tabulate.h>
#include <thrust/transform.h>
Expand All @@ -37,7 +40,7 @@ namespace cugraph {
namespace detail {

template <typename vertex_t>
struct compute_gpu_id_from_vertex_t {
struct compute_gpu_id_from_ext_vertex_t {
int comm_size{0};

__device__ int operator()(vertex_t v) const
Expand All @@ -47,6 +50,21 @@ struct compute_gpu_id_from_vertex_t {
}
};

template <typename vertex_t>
struct compute_gpu_id_from_int_vertex_t {
raft::device_span<vertex_t> vertex_partition_range_lasts_span;

__device__ int operator()(vertex_t v) const
{
return static_cast<int>(
thrust::distance(vertex_partition_range_lasts_span.begin(),
thrust::upper_bound(thrust::seq,
vertex_partition_range_lasts_span.begin(),
vertex_partition_range_lasts_span.end(),
v)));
}
};

template <typename vertex_t>
struct compute_gpu_id_from_edge_t {
int comm_size{0};
Expand Down
Loading

0 comments on commit 3f1d7ef

Please sign in to comment.