Skip to content

Commit

Permalink
Remove memory access flags from cuda_async_memory_resource (#1754)
Browse files Browse the repository at this point in the history
Closes #1753
It is a follow up from #1743

I would like for rapidsai/cudf#17553 to merge first, that way I don't break the build.

I've learned that I was using `cudaMemPoolSetAccess` incorrectly. This API should only be used from a `peer` device, not from the device that created the pool. This is the reason why calling `cudaMemPoolSetAccess` with none throws an error as documented here #1753. I have tested that I can still export the fabric handles and import them using UCX in a peer device with the default access that pool owner device gets (read+write is the default). Note that this read+write default access cannot be revoked from the owner, as it wouldn't make sense to have memory that nobody has access to, but peers can call `cudaMemPoolSetAccess` to gain read+write access or to stop accessing (none) a peer's pool memory.

Authors:
  - Alessandro Bellina (https://github.com/abellina)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: #1754
  • Loading branch information
abellina authored Dec 9, 2024
1 parent ff59ea4 commit 8d41610
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 38 deletions.
26 changes: 1 addition & 25 deletions include/rmm/mr/device/cuda_async_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,21 +65,6 @@ class cuda_async_memory_resource final : public device_memory_resource {
fabric = 0x8 ///< Allows a fabric handle to be used for exporting. (cudaMemFabricHandle_t)
};

/**
* @brief Flags for specifying the memory pool accessibility from other devices.
*
* @note These values are exact copies from `cudaMemAccessFlags`. See the `cudaMemAccessFlags`
* docs at https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html and ensure the
* enum values are kept in sync with the CUDA documentation. The default, `none`, marks the pool's
* memory as private to the device in which it was created. `read_write` should only be used if
* memory sharing among devices is required. Note that there is a `cudaMemAccessFlagsProtRead`
* documented, but memory pools don't support read-only access, so it has been omitted.
*/
enum class access_flags {
none = 0, ///< Default, make pool not accessible.
read_write = 3 ///< Make pool read-write accessible.
};

/**
* @brief Constructs a cuda_async_memory_resource with the optionally specified initial pool size
* and release threshold.
Expand All @@ -96,14 +81,11 @@ class cuda_async_memory_resource final : public device_memory_resource {
* @param export_handle_type Optional `cudaMemAllocationHandleType` that allocations from this
* resource should support interprocess communication (IPC). Default is `cudaMemHandleTypeNone`
* for no IPC support.
* @param access_flag Optional `cudaMemAccessFlags` that controls pool memory accessibility
* from other devices. Default is `cudaMemAccessFlagsProtNone` for no accessibility.
*/
// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
cuda_async_memory_resource(std::optional<std::size_t> initial_pool_size = {},
std::optional<std::size_t> release_threshold = {},
std::optional<allocation_handle_type> export_handle_type = {},
std::optional<access_flags> access_flag = {})
std::optional<allocation_handle_type> export_handle_type = {})
{
// Check if cudaMallocAsync Memory pool supported
RMM_EXPECTS(rmm::detail::runtime_async_alloc::is_supported(),
Expand Down Expand Up @@ -135,12 +117,6 @@ class cuda_async_memory_resource final : public device_memory_resource {
cudaMemPoolSetAttribute(pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled));
}

if (access_flag) {
cudaMemAccessDesc desc = {.location = pool_props.location,
.flags = static_cast<cudaMemAccessFlags>(*access_flag)};
RMM_CUDA_TRY(cudaMemPoolSetAccess(pool_handle(), &desc, 1));
}

auto const [free, total] = rmm::available_device_memory();

// Need an l-value to take address to pass to cudaMemPoolSetAttribute
Expand Down
13 changes: 0 additions & 13 deletions tests/mr/device/cuda_async_mr_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,18 +91,5 @@ TEST_F(AsyncMRFabricTest, FabricHandlesSupport)
RMM_CUDA_TRY(cudaDeviceSynchronize());
}

TEST_F(AsyncMRFabricTest, FabricHandlesSupportReadWriteShareable)
{
const auto pool_init_size{100};
const auto pool_release_threshold{1000};
cuda_async_mr mr{pool_init_size,
pool_release_threshold,
rmm::mr::cuda_async_memory_resource::allocation_handle_type::fabric,
rmm::mr::cuda_async_memory_resource::access_flags::read_write};
void* ptr = mr.allocate(pool_init_size);
mr.deallocate(ptr, pool_init_size);
RMM_CUDA_TRY(cudaDeviceSynchronize());
}

} // namespace
} // namespace rmm::test

0 comments on commit 8d41610

Please sign in to comment.