From ddd9c19f41d91be5e0cd223c5214b51803590365 Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Thu, 27 Jun 2024 07:07:23 -0700
Subject: [PATCH 1/2] remove openmpi ceiling (#4496)

fixes #4474

#4496 and related PRs introduced a ceiling on `openmpi`, a dependency that's only pulled in at test time, because `cugraph`'s builds were struggling to find it.

This proposes removing that pin, as the fixes in https://github.com/conda-forge/openmpi-feedstock/pull/159 should allow the package to again be found by e.g. `find_package(MPI)` in CMake scripts.

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cugraph/pull/4496
---
 conda/environments/all_cuda-118_arch-x86_64.yaml | 3 ++-
 conda/environments/all_cuda-122_arch-x86_64.yaml | 3 ++-
 dependencies.yaml                                | 5 ++++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 4a235eac7c4..40aaef5b6ed 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -42,7 +42,7 @@ dependencies:
 - numpy>=1.23,<2.0a0
 - numpydoc
 - nvcc_linux-64=11.8
-- openmpi<5.0.3
+- openmpi
 - packaging>=21
 - pandas
 - pre-commit
@@ -70,6 +70,7 @@ dependencies:
 - sphinx-markdown-tables
 - sphinx<6
 - sphinxcontrib-websupport
+- thriftpy2<=0.5.0
 - ucx-proc=*=gpu
 - ucx-py==0.39.*
 - wget
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
index 8275634e55b..1c42ad39fb1 100644
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -47,7 +47,7 @@ dependencies:
 - numba>=0.57
 - numpy>=1.23,<2.0a0
 - numpydoc
-- openmpi<5.0.3
+- openmpi
 - packaging>=21
 - pandas
 - pre-commit
@@ -75,6 +75,7 @@ dependencies:
 - sphinx-markdown-tables
 - sphinx<6
 - sphinxcontrib-websupport
+- thriftpy2<=0.5.0
 - ucx-proc=*=gpu
 - ucx-py==0.39.*
 - wget
diff --git a/dependencies.yaml b/dependencies.yaml
index 91593bf9168..c37d2080771 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -366,7 +366,7 @@ dependencies:
           - libraft-headers==24.8.*
           - libraft==24.8.*
           - librmm==24.8.*
-          - openmpi<5.0.3 # Required for building cpp-mgtests (multi-GPU tests)
+          - openmpi # Required for building cpp-mgtests (multi-GPU tests)
     specific:
       - output_types: [conda]
         matrices:
@@ -545,6 +545,9 @@ dependencies:
       - output_types: [conda]
         packages:
           - pylibwholegraph==24.8.*
+          # this thriftpy2 entry can be removed entirely (or switched to a '!=')
+          # once a new release of that project resolves https://github.com/Thriftpy/thriftpy2/issues/281
+          - thriftpy2<=0.5.0
   test_python_pylibcugraph:
     common:
       - output_types: [conda, pyproject]

From ece789dd27a4e745ff41242206248fd0b6072e31 Mon Sep 17 00:00:00 2001
From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com>
Date: Thu, 27 Jun 2024 16:47:23 -0400
Subject: [PATCH 2/2] Tweak rmm configuration for C++ unit tests (#4503)

We are seeing intermittent failures in CI from having trouble allocating the RMM pool allocator.

Dropping the memory usage by default from 1/6 to 1/10.

Added an option `maxpool` that will use 1/2 of the available memory, since we use the unit tests in larger configurations to do scale testing of algorithms.

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cugraph/pull/4503
---
 cpp/tests/utilities/base_fixture.hpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp
index cb302674a25..25011c0c97a 100644
--- a/cpp/tests/utilities/base_fixture.hpp
+++ b/cpp/tests/utilities/base_fixture.hpp
@@ -68,14 +68,18 @@ inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>
 
 inline auto make_managed() { return std::make_shared<rmm::mr::managed_memory_resource>(); }
 
-inline auto make_pool()
+// use_max set to true will use half of available GPU memory for RMM, otherwise
+// otherwise we'll use 1/10.
+inline auto make_pool(bool use_max = false)
 {
-  // Reduce the default pool allocation to 1/6th of the GPU memory so that we can
+  // Reduce the default pool allocation to 1/10 of GPU memory so that we can
   // run more than 2 tests in parallel at the same time. Changes to this value could
   // effect the maximum amount of parallel tests, and therefore `tests/CMakeLists.txt`
   // `_CUGRAPH_TEST_PERCENT` default value will need to be audited.
   auto const [free, total] = rmm::available_device_memory();
-  auto const min_alloc = rmm::align_down(std::min(free, total / 6), rmm::CUDA_ALLOCATION_ALIGNMENT);
+  auto const min_alloc =
+    use_max ? rmm::align_down(std::min(free, total / 2), rmm::CUDA_ALLOCATION_ALIGNMENT)
+            : rmm::align_down(std::min(free, total / 10), rmm::CUDA_ALLOCATION_ALIGNMENT);
   return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda(), min_alloc);
 }
 
@@ -99,7 +103,8 @@ inline auto make_binning()
  * @throw cugraph::logic_error if the `allocation_mode` is unsupported.
  *
  * @param allocation_mode String identifies which resource type.
- *        Accepted types are "pool", "cuda", and "managed" only.
+ *        Accepted types are "pool", "cuda", "managed" and
+ *        "maxpool" only.
  * @return Memory resource instance
  */
 inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
@@ -108,6 +113,7 @@ inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
   if (allocation_mode == "binning") return make_binning();
   if (allocation_mode == "cuda") return make_cuda();
   if (allocation_mode == "pool") return make_pool();
+  if (allocation_mode == "maxpool") return make_pool(true);
   if (allocation_mode == "managed") return make_managed();
   CUGRAPH_FAIL("Invalid RMM allocation mode");
 }