diff --git a/cpp/include/raft/cuda_utils.cuh b/cpp/include/raft/cuda_utils.cuh
index 362dba66c5..62f5cdcccf 100644
--- a/cpp/include/raft/cuda_utils.cuh
+++ b/cpp/include/raft/cuda_utils.cuh
@@ -630,6 +630,25 @@ DI T shfl(T val, int srcLane, int width = WarpSize, uint32_t mask = 0xffffffffu)
 #endif
 }
 
+/**
+ * @brief Shuffle the data inside a warp from lower lane IDs
+ * @tparam T the data type (currently assumed to be 4B)
+ * @param val value to be shuffled
+ * @param delta lower lane ID delta from where to shuffle
+ * @param width lane width
+ * @param mask mask of participating threads (Volta+)
+ * @return the shuffled data
+ */
+template <typename T>
+DI T shfl_up(T val, int delta, int width = WarpSize, uint32_t mask = 0xffffffffu)
+{
+#if CUDART_VERSION >= 9000
+  return __shfl_up_sync(mask, val, delta, width);
+#else
+  return __shfl_up(val, delta, width);
+#endif
+}
+
 /**
  * @brief Shuffle the data inside a warp
  * @tparam T the data type (currently assumed to be 4B)
diff --git a/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh b/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh
new file mode 100644
index 0000000000..8a1f23e785
--- /dev/null
+++ b/cpp/include/raft/random/detail/rmat_rectangular_generator.cuh
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <raft/cuda_utils.cuh>
+#include <raft/cudart_utils.h>
+#include <raft/random/rng_device.cuh>
+#include <raft/random/rng_state.hpp>
+
+namespace raft {
+namespace random {
+namespace detail {
+
+template <typename IdxT, typename ProbT>
+DI void gen_and_update_bits(IdxT& src_id,
+                            IdxT& dst_id,
+                            ProbT a,
+                            ProbT ab,
+                            ProbT abc,
+                            IdxT r_scale,
+                            IdxT c_scale,
+                            IdxT curr_depth,
+                            raft::random::PCGenerator& gen)
+{
+  bool src_bit, dst_bit;
+  ProbT val;
+  gen.next(val);
+  if (val <= a) {
+    src_bit = dst_bit = false;
+  } else if (val <= ab) {
+    src_bit = false;
+    dst_bit = true;
+  } else if (val <= abc) {
+    src_bit = true;
+    dst_bit = false;
+  } else {
+    src_bit = dst_bit = true;
+  }
+  if (curr_depth < r_scale) { src_id |= (IdxT(src_bit) << (r_scale - curr_depth - 1)); }
+  if (curr_depth < c_scale) { dst_id |= (IdxT(dst_bit) << (c_scale - curr_depth - 1)); }
+}
+
+template <typename IdxT>
+DI void store_ids(
+  IdxT* out, IdxT* out_src, IdxT* out_dst, IdxT src_id, IdxT dst_id, IdxT idx, IdxT n_edges)
+{
+  if (idx < n_edges) {
+    if (out != nullptr) {
+      // uncoalesced gmem accesses!
+      out[idx * 2]     = src_id;
+      out[idx * 2 + 1] = dst_id;
+    }
+    if (out_src != nullptr) { out_src[idx] = src_id; }
+    if (out_dst != nullptr) { out_dst[idx] = dst_id; }
+  }
+}
+
+template <typename IdxT, typename ProbT>
+__global__ void rmat_gen_kernel(IdxT* out,
+                                IdxT* out_src,
+                                IdxT* out_dst,
+                                const ProbT* theta,
+                                IdxT r_scale,
+                                IdxT c_scale,
+                                IdxT n_edges,
+                                IdxT max_scale,
+                                raft::random::RngState r)
+{
+  IdxT idx = threadIdx.x + ((IdxT)blockIdx.x * blockDim.x);
+  extern __shared__ ProbT s_theta[];
+  auto theta_len = max_scale * 2 * 2;
+  // load the probabilities into shared memory and then convert them into cdf's
+  // currently there are smem bank conflicts due to the way these are accessed
+  for (int i = threadIdx.x; i < theta_len; i += blockDim.x) {
+    s_theta[i] = theta[i];
+  }
+  __syncthreads();
+  for (int i = threadIdx.x; i < max_scale; i += blockDim.x) {
+    auto a             = s_theta[4 * i];
+    auto b             = s_theta[4 * i + 1];
+    auto c             = s_theta[4 * i + 2];
+    s_theta[4 * i + 1] = a + b;
+    s_theta[4 * i + 2] = a + b + c;
+    s_theta[4 * i + 3] += a + b + c;
+  }
+  __syncthreads();
+  IdxT src_id{0}, dst_id{0};
+  raft::random::PCGenerator gen{r.seed, r.base_subsequence + idx, 0};
+  for (IdxT i = 0; i < max_scale; ++i) {
+    auto a = s_theta[i * 4], ab = s_theta[i * 4 + 1], abc = s_theta[i * 4 + 2];
+    gen_and_update_bits(src_id, dst_id, a, ab, abc, r_scale, c_scale, i, gen);
+  }
+  store_ids(out, out_src, out_dst, src_id, dst_id, idx, n_edges);
+}
+
+template <typename IdxT, typename ProbT>
+void rmat_rectangular_gen_caller(IdxT* out,
+                                 IdxT* out_src,
+                                 IdxT* out_dst,
+                                 const ProbT* theta,
+                                 IdxT r_scale,
+                                 IdxT c_scale,
+                                 IdxT n_edges,
+                                 cudaStream_t stream,
+                                 raft::random::RngState& r)
+{
+  if (n_edges <= 0) return;
+  static constexpr int N_THREADS = 512;
+  auto max_scale                 = max(r_scale, c_scale);
+  size_t smem_size               = sizeof(ProbT) * max_scale * 2 * 2;
+  auto n_blks                    = raft::ceildiv<IdxT>(n_edges, N_THREADS);
+  rmat_gen_kernel<<<n_blks, N_THREADS, smem_size, stream>>>(
+    out, out_src, out_dst, theta, r_scale, c_scale, n_edges, max_scale, r);
+  RAFT_CUDA_TRY(cudaGetLastError());
+  r.advance(n_edges, max_scale);
+}
+
+template <typename IdxT, typename ProbT>
+__global__ void rmat_gen_kernel(IdxT* out,
+                                IdxT* out_src,
+                                IdxT* out_dst,
+                                ProbT a,
+                                ProbT b,
+                                ProbT c,
+                                IdxT r_scale,
+                                IdxT c_scale,
+                                IdxT n_edges,
+                                IdxT max_scale,
+                                raft::random::RngState r)
+{
+  IdxT idx = threadIdx.x + ((IdxT)blockIdx.x * blockDim.x);
+  IdxT src_id{0}, dst_id{0};
+  raft::random::PCGenerator gen{r.seed, r.base_subsequence + idx, 0};
+  auto min_scale = min(r_scale, c_scale);
+  IdxT i         = 0;
+  for (; i < min_scale; ++i) {
+    gen_and_update_bits(src_id, dst_id, a, a + b, a + b + c, r_scale, c_scale, i, gen);
+  }
+  for (; i < r_scale; ++i) {
+    gen_and_update_bits(src_id, dst_id, a + b, a + b, ProbT(1), r_scale, c_scale, i, gen);
+  }
+  for (; i < c_scale; ++i) {
+    gen_and_update_bits(src_id, dst_id, a + c, ProbT(1), ProbT(1), r_scale, c_scale, i, gen);
+  }
+  store_ids(out, out_src, out_dst, src_id, dst_id, idx, n_edges);
+}
+
+template <typename IdxT, typename ProbT>
+void rmat_rectangular_gen_caller(IdxT* out,
+                                 IdxT* out_src,
+                                 IdxT* out_dst,
+                                 ProbT a,
+                                 ProbT b,
+                                 ProbT c,
+                                 IdxT r_scale,
+                                 IdxT c_scale,
+                                 IdxT n_edges,
+                                 cudaStream_t stream,
+                                 raft::random::RngState& r)
+{
+  if (n_edges <= 0) return;
+  static constexpr int N_THREADS = 512;
+  auto max_scale                 = max(r_scale, c_scale);
+  auto n_blks                    = raft::ceildiv<IdxT>(n_edges, N_THREADS);
+  rmat_gen_kernel<<<n_blks, N_THREADS, 0, stream>>>(
+    out, out_src, out_dst, a, b, c, r_scale, c_scale, n_edges, max_scale, r);
+  RAFT_CUDA_TRY(cudaGetLastError());
+  r.advance(n_edges, max_scale);
+}
+
+}  // end namespace detail
+}  // end namespace random
+}  // end namespace raft
diff --git a/cpp/include/raft/random/rmat_rectangular_generator.cuh b/cpp/include/raft/random/rmat_rectangular_generator.cuh
new file mode 100644
index 0000000000..aad1cf0c88
--- /dev/null
+++ b/cpp/include/raft/random/rmat_rectangular_generator.cuh
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "detail/rmat_rectangular_generator.cuh"
+
+namespace raft::random {
+
+/**
+ * @brief Generate RMAT for a rectangular shaped adjacency matrices (useful when
+ *        graphs to be generated are bipartite)
+ *
+ * @tparam IdxT  node indices type
+ * @tparam ProbT data type used for probability distributions (either fp32 or fp64)
+ *
+ * @param[out] out     generated edgelist [on device] [dim = n_edges x 2]. On each row
+ *                     the first element corresponds to the source node id while the
+ *                     second, the destination node id. If you don't need this output
+ *                     then pass a `nullptr` in its place.
+ * @param[out] out_src list of source node id's [on device] [len = n_edges]. If you
+ *                     don't need this output then pass a `nullptr` in its place.
+ * @param[out] out_dst list of destination node id's [on device] [len = n_edges]. If
+ *                     you don't need this output then pass a `nullptr` in its place.
+ * @param[in]  theta   distribution of each quadrant at each level of resolution.
+ *                     Since these are probabilities, each of the 2x2 matrix for
+ *                     each level of the RMAT must sum to one. [on device]
+ *                     [dim = max(r_scale, c_scale) x 2 x 2]. Of course, it is assumed
+ *                     that each of the group of 2 x 2 numbers all sum up to 1.
+ * @param[in]  r_scale 2^r_scale represents the number of source nodes
+ * @param[in]  c_scale 2^c_scale represents the number of destination nodes
+ * @param[in]  n_edges number of edges to generate
+ * @param[in]  stream  cuda stream to schedule the work on
+ * @param[in]  r       underlying state of the random generator. Especially useful when
+ *                     one wants to call this API for multiple times in order to generate
+ *                     a larger graph. For that case, just create this object with the
+ *                     initial seed once and after every call continue to pass the same
+ *                     object for the successive calls.
+ *
+ * When `r_scale != c_scale` it is referred to as rectangular adjacency matrix case (IOW generating
+ * bipartite graphs). In this case, at `depth >= r_scale`, the distribution is assumed to be:
+ * `[theta[4 * depth] + theta[4 * depth + 2], theta[4 * depth + 1] + theta[4 * depth + 3]; 0, 0]`.
+ * Then for the `depth >= c_scale`, the distribution is assumed to be:
+ * `[theta[4 * depth] + theta[4 * depth + 1], 0; theta[4 * depth + 2] + theta[4 * depth + 3], 0]`.
+ *
+ * @note This can generate duplicate edges and self-loops. It is the responsibility of the
+ *       caller to clean them up accordingly.
+
+ * @note This also only generates directed graphs. If undirected graphs are needed, then a
+ *       separate post-processing step is expected to be done by the caller.
+ *
+ * @{
+ */
+template <typename IdxT, typename ProbT>
+void rmat_rectangular_gen(IdxT* out,
+                          IdxT* out_src,
+                          IdxT* out_dst,
+                          const ProbT* theta,
+                          IdxT r_scale,
+                          IdxT c_scale,
+                          IdxT n_edges,
+                          cudaStream_t stream,
+                          raft::random::RngState& r)
+{
+  detail::rmat_rectangular_gen_caller(
+    out, out_src, out_dst, theta, r_scale, c_scale, n_edges, stream, r);
+}
+
+/**
+ * This is the same as the previous method but assumes the same a, b, c, d probability
+ * distributions across all the scales
+ */
+template <typename IdxT, typename ProbT>
+void rmat_rectangular_gen(IdxT* out,
+                          IdxT* out_src,
+                          IdxT* out_dst,
+                          ProbT a,
+                          ProbT b,
+                          ProbT c,
+                          IdxT r_scale,
+                          IdxT c_scale,
+                          IdxT n_edges,
+                          cudaStream_t stream,
+                          raft::random::RngState& r)
+{
+  detail::rmat_rectangular_gen_caller(
+    out, out_src, out_dst, a, b, c, r_scale, c_scale, n_edges, stream, r);
+}
+/** @} */
+
+}  // end namespace raft::random
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 43c6257966..10ed08c0b5 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -83,6 +83,7 @@ add_executable(test_raft
     test/random/permute.cu
     test/random/rng.cu
     test/random/rng_int.cu
+    test/random/rmat_rectangular_generator.cu
     test/random/sample_without_replacement.cu
     test/span.cpp
     test/span.cu
diff --git a/cpp/test/random/rmat_rectangular_generator.cu b/cpp/test/random/rmat_rectangular_generator.cu
new file mode 100644
index 0000000000..4ccda9c1fa
--- /dev/null
+++ b/cpp/test/random/rmat_rectangular_generator.cu
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cub/cub.cuh>
+#include <gtest/gtest.h>
+#include <sys/timeb.h>
+#include <vector>
+
+#include "../test_utils.h"
+
+#include <raft/cuda_utils.cuh>
+#include <raft/cudart_utils.h>
+#include <raft/random/rmat_rectangular_generator.cuh>
+#include <raft/random/rng.cuh>
+
+namespace raft {
+namespace random {
+
+// Courtesy: cuGraph unit-tests
+
+struct RmatInputs {
+  size_t r_scale;
+  size_t c_scale;
+  size_t n_edges;
+  bool theta_array;
+  uint64_t seed;
+  float eps;
+};
+
+template <typename OutT, typename InT>
+__global__ void normalize_kernel(
+  OutT* theta, const InT* in_vals, size_t max_scale, size_t r_scale, size_t c_scale)
+{
+  size_t idx = threadIdx.x;
+  if (idx < max_scale) {
+    auto a   = OutT(in_vals[4 * idx]);
+    auto b   = OutT(in_vals[4 * idx + 1]);
+    auto c   = OutT(in_vals[4 * idx + 2]);
+    auto d   = OutT(in_vals[4 * idx + 3]);
+    auto sum = a + b + c + d;
+    a /= sum;
+    b /= sum;
+    c /= sum;
+    d /= sum;
+    theta[4 * idx]     = a;
+    theta[4 * idx + 1] = b;
+    theta[4 * idx + 2] = c;
+    theta[4 * idx + 3] = d;
+  }
+}
+
+// handle rectangular cases correctly
+template <typename OutT>
+__global__ void handle_rect_kernel(OutT* theta, size_t max_scale, size_t r_scale, size_t c_scale)
+{
+  size_t idx = threadIdx.x;
+  if (idx < max_scale) {
+    auto a = theta[4 * idx];
+    auto b = theta[4 * idx + 1];
+    auto c = theta[4 * idx + 2];
+    auto d = theta[4 * idx + 3];
+    if (idx >= r_scale) {
+      a += c;
+      c = OutT(0);
+      b += d;
+      d = OutT(0);
+    }
+    if (idx >= c_scale) {
+      a += b;
+      b = OutT(0);
+      c += d;
+      d = OutT(0);
+    }
+    theta[4 * idx]     = a;
+    theta[4 * idx + 1] = b;
+    theta[4 * idx + 2] = c;
+    theta[4 * idx + 3] = d;
+  }
+}
+
+// for a single probability distribution across depths, just replicate the theta's!
+// this will keep the test code simpler
+template <typename OutT>
+__global__ void theta_kernel(OutT* theta, size_t max_scale, size_t r_scale, size_t c_scale)
+{
+  size_t idx = threadIdx.x;
+  if (idx != 0 && idx < max_scale) {
+    auto a = theta[0];
+    auto b = theta[1];
+    auto c = theta[2];
+    auto d = theta[3];
+    if (idx >= r_scale) {
+      a += c;
+      c = OutT(0);
+      b += d;
+      d = OutT(0);
+    }
+    if (idx >= c_scale) {
+      a += b;
+      b = OutT(0);
+      c += d;
+      d = OutT(0);
+    }
+    theta[4 * idx]     = a;
+    theta[4 * idx + 1] = b;
+    theta[4 * idx + 2] = c;
+    theta[4 * idx + 3] = d;
+  }
+}
+
+template <typename OutT, typename InT>
+void normalize(OutT* theta,
+               const InT* in_vals,
+               size_t max_scale,
+               size_t r_scale,
+               size_t c_scale,
+               bool handle_rect,
+               bool theta_array,
+               cudaStream_t stream)
+{
+  // one threadblock with 256 threads is more than enough as the 'scale' parameters
+  // won't be that large!
+  normalize_kernel<OutT, InT><<<1, 256, 0, stream>>>(theta, in_vals, max_scale, r_scale, c_scale);
+  RAFT_CUDA_TRY(cudaGetLastError());
+  if (handle_rect) {
+    handle_rect_kernel<<<1, 256, 0, stream>>>(theta, max_scale, r_scale, c_scale);
+    RAFT_CUDA_TRY(cudaGetLastError());
+  }
+  if (!theta_array) {
+    theta_kernel<<<1, 256, 0, stream>>>(theta, max_scale, r_scale, c_scale);
+    RAFT_CUDA_TRY(cudaGetLastError());
+  }
+}
+
+__global__ void compute_hist(
+  int* hist, const size_t* out, size_t len, size_t max_scale, size_t r_scale, size_t c_scale)
+{
+  size_t idx = (threadIdx.x + blockIdx.x * blockDim.x) * 2;
+  if (idx + 1 < len) {
+    auto src = out[idx], dst = out[idx + 1];
+    for (size_t j = 0; j < max_scale; ++j) {
+      bool src_bit = j < r_scale ? src & (1 << (r_scale - j - 1)) : 0;
+      bool dst_bit = j < c_scale ? dst & (1 << (c_scale - j - 1)) : 0;
+      auto idx     = j * 4 + src_bit * 2 + dst_bit;
+      atomicAdd(hist + idx, 1);
+    }
+  }
+}
+
+class RmatGenTest : public ::testing::TestWithParam<RmatInputs> {
+ public:
+  RmatGenTest()
+    : handle{},
+      stream{handle.get_stream()},
+      params{::testing::TestWithParam<RmatInputs>::GetParam()},
+      out{params.n_edges * 2, stream},
+      out_src{params.n_edges, stream},
+      out_dst{params.n_edges, stream},
+      theta{0, stream},
+      h_theta{},
+      state{params.seed, GeneratorType::GenPC},
+      max_scale{std::max(params.r_scale, params.c_scale)}
+  {
+    theta.resize(4 * max_scale, stream);
+    uniform<float>(state, theta.data(), theta.size(), 0.0f, 1.0f, stream);
+    normalize<float, float>(theta.data(),
+                            theta.data(),
+                            max_scale,
+                            params.r_scale,
+                            params.c_scale,
+                            params.r_scale != params.c_scale,
+                            params.theta_array,
+                            stream);
+    h_theta.resize(theta.size());
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+    raft::update_host(h_theta.data(), theta.data(), theta.size(), stream);
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  }
+
+ protected:
+  void SetUp() override
+  {
+    if (params.theta_array) {
+      rmat_rectangular_gen(out.data(),
+                           out_src.data(),
+                           out_dst.data(),
+                           theta.data(),
+                           params.r_scale,
+                           params.c_scale,
+                           params.n_edges,
+                           stream,
+                           state);
+    } else {
+      rmat_rectangular_gen(out.data(),
+                           out_src.data(),
+                           out_dst.data(),
+                           h_theta[0],
+                           h_theta[1],
+                           h_theta[2],
+                           params.r_scale,
+                           params.c_scale,
+                           params.n_edges,
+                           stream,
+                           state);
+    }
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+  }
+
+  void validate()
+  {
+    rmm::device_uvector<int> hist{theta.size(), stream};
+    RAFT_CUDA_TRY(cudaMemsetAsync(hist.data(), 0, hist.size() * sizeof(int), stream));
+    compute_hist<<<raft::ceildiv<size_t>(out.size() / 2, 256), 256, 0, stream>>>(
+      hist.data(), out.data(), out.size(), max_scale, params.r_scale, params.c_scale);
+    RAFT_CUDA_TRY(cudaGetLastError());
+    rmm::device_uvector<float> computed_theta{theta.size(), stream};
+    normalize<float, int>(computed_theta.data(),
+                          hist.data(),
+                          max_scale,
+                          params.r_scale,
+                          params.c_scale,
+                          false,
+                          true,
+                          stream);
+    RAFT_CUDA_TRY(cudaGetLastError());
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+    ASSERT_TRUE(devArrMatchHost(
+      h_theta.data(), computed_theta.data(), theta.size(), CompareApprox<float>(params.eps)));
+  }
+
+ protected:
+  raft::handle_t handle;
+  cudaStream_t stream;
+
+  RmatInputs params;
+  rmm::device_uvector<size_t> out, out_src, out_dst;
+  rmm::device_uvector<float> theta;
+  std::vector<float> h_theta;
+  RngState state;
+  size_t max_scale;
+};
+
+static const float TOLERANCE = 0.01f;
+
+const std::vector<RmatInputs> inputs = {
+  // square adjacency
+  {16, 16, 100000, false, 123456ULL, TOLERANCE},
+  {16, 16, 100000, true, 123456ULL, TOLERANCE},
+  {16, 16, 200000, false, 123456ULL, TOLERANCE},
+  {16, 16, 200000, true, 123456ULL, TOLERANCE},
+  {18, 18, 100000, false, 123456ULL, TOLERANCE},
+  {18, 18, 100000, true, 123456ULL, TOLERANCE},
+  {18, 18, 200000, false, 123456ULL, TOLERANCE},
+  {18, 18, 200000, true, 123456ULL, TOLERANCE},
+  {16, 16, 100000, false, 456789ULL, TOLERANCE},
+  {16, 16, 100000, true, 456789ULL, TOLERANCE},
+  {16, 16, 200000, false, 456789ULL, TOLERANCE},
+  {16, 16, 200000, true, 456789ULL, TOLERANCE},
+  {18, 18, 100000, false, 456789ULL, TOLERANCE},
+  {18, 18, 100000, true, 456789ULL, TOLERANCE},
+  {18, 18, 200000, false, 456789ULL, TOLERANCE},
+  {18, 18, 200000, true, 456789ULL, TOLERANCE},
+
+  // rectangular adjacency
+  {16, 18, 200000, false, 123456ULL, TOLERANCE},
+  {16, 18, 200000, true, 123456ULL, TOLERANCE},
+  {18, 16, 200000, false, 123456ULL, TOLERANCE},
+  {18, 16, 200000, true, 123456ULL, TOLERANCE},
+  {16, 18, 200000, false, 456789ULL, TOLERANCE},
+  {16, 18, 200000, true, 456789ULL, TOLERANCE},
+  {18, 16, 200000, false, 456789ULL, TOLERANCE},
+  {18, 16, 200000, true, 456789ULL, TOLERANCE}};
+
+TEST_P(RmatGenTest, Result) { validate(); }
+INSTANTIATE_TEST_SUITE_P(RmatGenTests, RmatGenTest, ::testing::ValuesIn(inputs));
+
+}  // namespace random
+}  // namespace raft