Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEA] [REVIEW] RMAT rectangular graph generator #738

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
eeb69b1
initial version of the generalized RMAT generation logic
teju85 May 31, 2022
8a78059
added the wrapper public API for the rmat generator logic
teju85 May 31, 2022
67a87a3
add the thread-global-idx to the base_subsequence itself instead of p…
teju85 May 31, 2022
d827cd8
added an option to generate AoS and SoA like edgelist output formats …
teju85 Jun 8, 2022
f84b4b3
support for passing same a,b,c,d values at all scales, like it is cur…
teju85 Jun 8, 2022
2ee1b40
added unit-tests for the rmat generator
teju85 Jun 8, 2022
340b421
added ability to use ccache to speedup local builds
teju85 Jun 30, 2022
6e009b7
Merge branch 'fea-ext-enable-ccache' into fea-ext-rmat-rectangular-ge…
teju85 Jul 1, 2022
81d61d8
used a --ccache-tool option instead of separate --sccache and --ccach…
teju85 Jul 4, 2022
fee23b0
Merge branch 'branch-22.08' of github.com:rapidsai/raft into fea-ext-…
teju85 Jul 7, 2022
117c502
fixed the hang in shfl-logic due to divergence
teju85 Jul 8, 2022
c23b711
fixed the indexing and probability distribution related issues for th…
teju85 Jul 11, 2022
fed6f0a
updated the doxygen doc of the rmat generator API to explain how the …
teju85 Jul 11, 2022
c8a5ebe
fixed clang format
teju85 Jul 11, 2022
87b762c
Merge branch 'branch-22.08' of github.com:rapidsai/raft into fea-ext-…
teju85 Jul 11, 2022
e6dcb85
used a simpler smem based cdf generation logic to fix test failures o…
teju85 Jul 26, 2022
4e436af
clang format fixes
teju85 Jul 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions cpp/include/raft/cuda_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,25 @@ DI T shfl(T val, int srcLane, int width = WarpSize, uint32_t mask = 0xffffffffu)
#endif
}

/**
* @brief Shuffle the data inside a warp from lower lane IDs
* @tparam T the data type (currently assumed to be 4B)
* @param val value to be shuffled
* @param delta lower lane ID delta from where to shuffle
* @param width lane width
* @param mask mask of participating threads (Volta+)
* @return the shuffled data
*/
template <typename T>
DI T shfl_up(T val, int delta, int width = WarpSize, uint32_t mask = 0xffffffffu)
{
#if CUDART_VERSION >= 9000
return __shfl_up_sync(mask, val, delta, width);
#else
return __shfl_up(val, delta, width);
#endif
}

/**
* @brief Shuffle the data inside a warp
* @tparam T the data type (currently assumed to be 4B)
Expand Down
187 changes: 187 additions & 0 deletions cpp/include/raft/random/detail/rmat_rectangular_generator.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <raft/cuda_utils.cuh>
#include <raft/cudart_utils.h>
#include <raft/random/rng_device.cuh>
#include <raft/random/rng_state.hpp>

namespace raft {
namespace random {
namespace detail {

template <typename IdxT, typename ProbT>
DI void gen_and_update_bits(IdxT& src_id,
IdxT& dst_id,
ProbT a,
ProbT ab,
ProbT abc,
IdxT r_scale,
IdxT c_scale,
IdxT curr_depth,
raft::random::PCGenerator& gen)
{
bool src_bit, dst_bit;
ProbT val;
gen.next(val);
if (val <= a) {
src_bit = dst_bit = false;
} else if (val <= ab) {
src_bit = false;
dst_bit = true;
} else if (val <= abc) {
src_bit = true;
dst_bit = false;
} else {
src_bit = dst_bit = true;
}
if (curr_depth < r_scale) { src_id |= (IdxT(src_bit) << (r_scale - curr_depth - 1)); }
if (curr_depth < c_scale) { dst_id |= (IdxT(dst_bit) << (c_scale - curr_depth - 1)); }
}

template <typename IdxT>
DI void store_ids(
IdxT* out, IdxT* out_src, IdxT* out_dst, IdxT src_id, IdxT dst_id, IdxT idx, IdxT n_edges)
{
if (idx < n_edges) {
if (out != nullptr) {
// uncoalesced gmem accesses!
out[idx * 2] = src_id;
out[idx * 2 + 1] = dst_id;
}
if (out_src != nullptr) { out_src[idx] = src_id; }
if (out_dst != nullptr) { out_dst[idx] = dst_id; }
}
}

template <typename IdxT, typename ProbT>
__global__ void rmat_gen_kernel(IdxT* out,
IdxT* out_src,
IdxT* out_dst,
const ProbT* theta,
IdxT r_scale,
IdxT c_scale,
IdxT n_edges,
IdxT max_scale,
raft::random::RngState r)
{
IdxT idx = threadIdx.x + ((IdxT)blockIdx.x * blockDim.x);
extern __shared__ ProbT s_theta[];
auto theta_len = max_scale * 2 * 2;
// load the probabilities into shared memory and then convert them into cdf's
// currently there are smem bank conflicts due to the way these are accessed
for (int i = threadIdx.x; i < theta_len; i += blockDim.x) {
s_theta[i] = theta[i];
}
__syncthreads();
for (int i = threadIdx.x; i < max_scale; i += blockDim.x) {
auto a = s_theta[4 * i];
auto b = s_theta[4 * i + 1];
auto c = s_theta[4 * i + 2];
s_theta[4 * i + 1] = a + b;
s_theta[4 * i + 2] = a + b + c;
s_theta[4 * i + 3] += a + b + c;
}
__syncthreads();
IdxT src_id{0}, dst_id{0};
raft::random::PCGenerator gen{r.seed, r.base_subsequence + idx, 0};
for (IdxT i = 0; i < max_scale; ++i) {
auto a = s_theta[i * 4], ab = s_theta[i * 4 + 1], abc = s_theta[i * 4 + 2];
gen_and_update_bits(src_id, dst_id, a, ab, abc, r_scale, c_scale, i, gen);
}
store_ids(out, out_src, out_dst, src_id, dst_id, idx, n_edges);
}

template <typename IdxT, typename ProbT>
void rmat_rectangular_gen_caller(IdxT* out,
IdxT* out_src,
IdxT* out_dst,
const ProbT* theta,
IdxT r_scale,
IdxT c_scale,
IdxT n_edges,
cudaStream_t stream,
raft::random::RngState& r)
{
if (n_edges <= 0) return;
static constexpr int N_THREADS = 512;
auto max_scale = max(r_scale, c_scale);
size_t smem_size = sizeof(ProbT) * max_scale * 2 * 2;
auto n_blks = raft::ceildiv<IdxT>(n_edges, N_THREADS);
rmat_gen_kernel<<<n_blks, N_THREADS, smem_size, stream>>>(
out, out_src, out_dst, theta, r_scale, c_scale, n_edges, max_scale, r);
RAFT_CUDA_TRY(cudaGetLastError());
r.advance(n_edges, max_scale);
}

template <typename IdxT, typename ProbT>
__global__ void rmat_gen_kernel(IdxT* out,
IdxT* out_src,
IdxT* out_dst,
ProbT a,
ProbT b,
ProbT c,
IdxT r_scale,
IdxT c_scale,
IdxT n_edges,
IdxT max_scale,
raft::random::RngState r)
{
IdxT idx = threadIdx.x + ((IdxT)blockIdx.x * blockDim.x);
IdxT src_id{0}, dst_id{0};
raft::random::PCGenerator gen{r.seed, r.base_subsequence + idx, 0};
auto min_scale = min(r_scale, c_scale);
IdxT i = 0;
for (; i < min_scale; ++i) {
gen_and_update_bits(src_id, dst_id, a, a + b, a + b + c, r_scale, c_scale, i, gen);
}
for (; i < r_scale; ++i) {
gen_and_update_bits(src_id, dst_id, a + b, a + b, ProbT(1), r_scale, c_scale, i, gen);
}
for (; i < c_scale; ++i) {
gen_and_update_bits(src_id, dst_id, a + c, ProbT(1), ProbT(1), r_scale, c_scale, i, gen);
}
store_ids(out, out_src, out_dst, src_id, dst_id, idx, n_edges);
}

template <typename IdxT, typename ProbT>
void rmat_rectangular_gen_caller(IdxT* out,
IdxT* out_src,
IdxT* out_dst,
ProbT a,
ProbT b,
ProbT c,
IdxT r_scale,
IdxT c_scale,
IdxT n_edges,
cudaStream_t stream,
raft::random::RngState& r)
{
if (n_edges <= 0) return;
static constexpr int N_THREADS = 512;
auto max_scale = max(r_scale, c_scale);
auto n_blks = raft::ceildiv<IdxT>(n_edges, N_THREADS);
rmat_gen_kernel<<<n_blks, N_THREADS, 0, stream>>>(
out, out_src, out_dst, a, b, c, r_scale, c_scale, n_edges, max_scale, r);
RAFT_CUDA_TRY(cudaGetLastError());
r.advance(n_edges, max_scale);
}

} // end namespace detail
} // end namespace random
} // end namespace raft
104 changes: 104 additions & 0 deletions cpp/include/raft/random/rmat_rectangular_generator.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "detail/rmat_rectangular_generator.cuh"

namespace raft::random {

/**
* @brief Generate RMAT for a rectangular shaped adjacency matrices (useful when
* graphs to be generated are bipartite)
*
* @tparam IdxT node indices type
* @tparam ProbT data type used for probability distributions (either fp32 or fp64)
*
* @param[out] out generated edgelist [on device] [dim = n_edges x 2]. On each row
* the first element corresponds to the source node id while the
* second, the destination node id. If you don't need this output
* then pass a `nullptr` in its place.
* @param[out] out_src list of source node id's [on device] [len = n_edges]. If you
* don't need this output then pass a `nullptr` in its place.
* @param[out] out_dst list of destination node id's [on device] [len = n_edges]. If
* you don't need this output then pass a `nullptr` in its place.
* @param[in] theta distribution of each quadrant at each level of resolution.
* Since these are probabilities, each of the 2x2 matrix for
* each level of the RMAT must sum to one. [on device]
* [dim = max(r_scale, c_scale) x 2 x 2]. Of course, it is assumed
* that each of the group of 2 x 2 numbers all sum up to 1.
* @param[in] r_scale 2^r_scale represents the number of source nodes
* @param[in] c_scale 2^c_scale represents the number of destination nodes
* @param[in] n_edges number of edges to generate
* @param[in] stream cuda stream to schedule the work on
* @param[in] r underlying state of the random generator. Especially useful when
* one wants to call this API for multiple times in order to generate
* a larger graph. For that case, just create this object with the
* initial seed once and after every call continue to pass the same
* object for the successive calls.
*
* When `r_scale != c_scale` it is referred to as rectangular adjacency matrix case (IOW generating
* bipartite graphs). In this case, at `depth >= r_scale`, the distribution is assumed to be:
* `[theta[4 * depth] + theta[4 * depth + 2], theta[4 * depth + 1] + theta[4 * depth + 3]; 0, 0]`.
* Then for the `depth >= c_scale`, the distribution is assumed to be:
* `[theta[4 * depth] + theta[4 * depth + 1], 0; theta[4 * depth + 2] + theta[4 * depth + 3], 0]`.
*
* @note This can generate duplicate edges and self-loops. It is the responsibility of the
* caller to clean them up accordingly.

* @note This also only generates directed graphs. If undirected graphs are needed, then a
* separate post-processing step is expected to be done by the caller.
*
* @{
*/
template <typename IdxT, typename ProbT>
void rmat_rectangular_gen(IdxT* out,
IdxT* out_src,
IdxT* out_dst,
const ProbT* theta,
IdxT r_scale,
IdxT c_scale,
IdxT n_edges,
cudaStream_t stream,
raft::random::RngState& r)
{
detail::rmat_rectangular_gen_caller(
out, out_src, out_dst, theta, r_scale, c_scale, n_edges, stream, r);
}

/**
* This is the same as the previous method but assumes the same a, b, c, d probability
* distributions across all the scales
*/
template <typename IdxT, typename ProbT>
void rmat_rectangular_gen(IdxT* out,
IdxT* out_src,
IdxT* out_dst,
ProbT a,
ProbT b,
ProbT c,
IdxT r_scale,
IdxT c_scale,
IdxT n_edges,
cudaStream_t stream,
raft::random::RngState& r)
{
detail::rmat_rectangular_gen_caller(
out, out_src, out_dst, a, b, c, r_scale, c_scale, n_edges, stream, r);
}
/** @} */

} // end namespace raft::random
1 change: 1 addition & 0 deletions cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ add_executable(test_raft
test/random/permute.cu
test/random/rng.cu
test/random/rng_int.cu
test/random/rmat_rectangular_generator.cu
test/random/sample_without_replacement.cu
test/span.cpp
test/span.cu
Expand Down
Loading