Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Allow construction of cuda_async_memory_resource from existing pool #889

Merged
merged 10 commits into from
Mar 23, 2022
59 changes: 49 additions & 10 deletions include/rmm/mr/device/cuda_async_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <rmm/detail/cuda_util.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <rmm/mr/device/cuda_pool_wrapper.hpp>

#include <thrust/optional.h>

Expand Down Expand Up @@ -73,14 +74,18 @@ class cuda_async_memory_resource final : public device_memory_resource {
pool_props.handleTypes = cudaMemHandleTypePosixFileDescriptor;
pool_props.location.type = cudaMemLocationTypeDevice;
pool_props.location.id = device.value();
RMM_CUDA_TRY(cudaMemPoolCreate(&cuda_pool_handle_, &pool_props));

cudaMemPool_t cuda_pool_handle{};
RMM_CUDA_TRY(cudaMemPoolCreate(&cuda_pool_handle, &pool_props));

auto const [free, total] = rmm::detail::available_device_memory();

// Need an l-value to take address to pass to cudaMemPoolSetAttribute
uint64_t threshold = release_threshold.value_or(total);
RMM_CUDA_TRY(
cudaMemPoolSetAttribute(cuda_pool_handle_, cudaMemPoolAttrReleaseThreshold, &threshold));
cudaMemPoolSetAttribute(cuda_pool_handle, cudaMemPoolAttrReleaseThreshold, &threshold));

pool_ = cuda_pool_wrapper{cuda_pool_handle};

// Allocate and immediately deallocate the initial_pool_size to prime the pool with the
// specified size
Expand All @@ -94,12 +99,47 @@ class cuda_async_memory_resource final : public device_memory_resource {
#endif
}

#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
/**
* @brief Constructs a cuda_async_memory_resource which uses an existing CUDA memory pool.
* cuda_async_memory_resource takes ownership of this pool.
*
* @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync`
* @throws rmm::runtime_error if the pool is null
* @throws rmm::runtime_error if the pool is the default memory pool of the current device
*
* @param valid_pool_handle Handle to a CUDA memory pool which will be used to
* serve allocation requests.
*/
cuda_async_memory_resource(cudaMemPool_t valid_pool_handle)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still need this ctor with the cuda_pool_wrapper?

: pool_{[valid_pool_handle]() {
RMM_EXPECTS(nullptr != valid_pool_handle, "Unexpected null pool handle.");
return valid_pool_handle;
}()}
{
// Check if cudaMallocAsync Memory pool supported
auto const device = rmm::detail::current_device();
int cuda_pool_supported{};
auto result =
cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value());
RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported,
"cudaMallocAsync not supported with this CUDA driver/runtime version");

// Check if valid_pool_handle is not equal to to default memory pool
cudaMemPool_t default_pool{};
result = cudaDeviceGetDefaultMemPool(&default_pool, device.value());

RMM_EXPECTS(result == cudaSuccess && default_pool != pool_handle(),
"Cannot take ownership of the default memory pool");
}
#endif

#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
/**
* @brief Returns the underlying native handle to the CUDA pool
*
*/
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return pool_.pool_handle(); }
#endif

~cuda_async_memory_resource() override
Expand Down Expand Up @@ -129,8 +169,9 @@ class cuda_async_memory_resource final : public device_memory_resource {
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }

private:

#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
cudaMemPool_t cuda_pool_handle_{};
cuda_pool_wrapper pool_{};
#endif

/**
Expand All @@ -147,10 +188,7 @@ class cuda_async_memory_resource final : public device_memory_resource {
{
void* ptr{nullptr};
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (bytes > 0) {
RMM_CUDA_TRY(cudaMallocFromPoolAsync(&ptr, bytes, pool_handle(), stream.value()),
rmm::bad_alloc);
}
ptr = pool_.allocate(bytes, stream);
#else
(void)bytes;
(void)stream;
Expand All @@ -165,12 +203,13 @@ class cuda_async_memory_resource final : public device_memory_resource {
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override
void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view stream) override
{
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (ptr != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(ptr, stream.value())); }
if (ptr != nullptr) { pool_.deallocate(ptr, size, stream); }
fkallen marked this conversation as resolved.
Show resolved Hide resolved
#else
(void)ptr;
(void)size;
(void)stream;
#endif
}
Expand Down
175 changes: 175 additions & 0 deletions include/rmm/mr/device/cuda_pool_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <rmm/cuda_device.hpp>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/cuda_util.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>

#include <thrust/optional.h>

#include <cuda_runtime_api.h>

#include <cstddef>
#include <limits>

#if CUDART_VERSION >= 11020 // 11.2 introduced cudaMallocAsync
#define RMM_CUDA_MALLOC_ASYNC_SUPPORT
#endif

namespace rmm::mr {

/**
* @brief `device_memory_resource` derived class that uses `cudaMallocAsync`/`cudaFreeAsync` for
* allocation/deallocation.
*/
class cuda_pool_wrapper final : public device_memory_resource {
public:

#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
/**
* @brief Constructs a cuda_pool_wrapper which uses an existing CUDA memory pool.
* The provided pool is not owned by cuda_pool_wrapper and must remain valid
* during the lifetime of the memory resource.
*
* @throws rmm::runtime_error if the CUDA version does not support `cudaMallocAsync`
*
* @param valid_pool_handle Handle to a CUDA memory pool which will be used to
* serve allocation requests.
*/
cuda_pool_wrapper(cudaMemPool_t valid_pool_handle)
: cuda_pool_handle_{[valid_pool_handle]() {
RMM_EXPECTS(nullptr != valid_pool_handle, "Unexpected null pool handle.");
return valid_pool_handle;
}()}
{
// Check if cudaMallocAsync Memory pool supported
auto const device = rmm::detail::current_device();
int cuda_pool_supported{};
auto result =
cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value());
RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported,
"cudaMallocAsync not supported with this CUDA driver/runtime version");
}
#endif

#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
/**
* @brief Returns the underlying native handle to the CUDA pool
*
*/
[[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }
#endif

cuda_pool_wrapper() = default;
cuda_pool_wrapper(cuda_pool_wrapper const&) = default;
cuda_pool_wrapper(cuda_pool_wrapper&&) = default;
cuda_pool_wrapper& operator=(cuda_pool_wrapper const&) = default;
cuda_pool_wrapper& operator=(cuda_pool_wrapper&&) = default;

/**
* @brief Query whether the resource supports use of non-null CUDA streams for
* allocation/deallocation. `cuda_memory_resource` does not support streams.
*
* @returns bool true
*/
[[nodiscard]] bool supports_streams() const noexcept override { return true; }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return false; }

private:
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
cudaMemPool_t cuda_pool_handle_{};
#endif

/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
*
* The returned pointer has at least 256B alignment.
*
* @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
*
* @param bytes The size, in bytes, of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
{
void* ptr{nullptr};
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (bytes > 0) {
RMM_CUDA_TRY(cudaMallocFromPoolAsync(&ptr, bytes, pool_handle(), stream.value()),
rmm::bad_alloc);
}
#else
(void)bytes;
(void)stream;
#endif
return ptr;
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view stream) override
{
#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
if (ptr != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(ptr, stream.value())); }
#else
(void)ptr;
(void)stream;
#endif
}

/**
* @brief Compare this resource to another.
*
* @throws Nothing.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
{
return dynamic_cast<cuda_pool_wrapper const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
return std::make_pair(0, 0);
}
};

} // namespace rmm::mr
53 changes: 53 additions & 0 deletions tests/mr/device/cuda_async_mr_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

#include <rmm/cuda_device.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/cuda_async_memory_resource.hpp>

Expand Down Expand Up @@ -54,6 +55,58 @@ TEST(PoolTest, ExplicitReleaseThreshold)
RMM_CUDA_TRY(cudaDeviceSynchronize());
}

TEST(PoolTest, TakingOwnershipOfPool)
{
cudaMemPoolProps poolProps = { };
poolProps.allocType = cudaMemAllocationTypePinned;
poolProps.location.id = rmm::detail::current_device().value();
poolProps.location.type = cudaMemLocationTypeDevice;

cudaMemPool_t memPool{};

RMM_CUDA_TRY(cudaMemPoolCreate(&memPool, &poolProps));

{

const auto pool_init_size{100};
cuda_async_mr mr{memPool};
void* ptr = mr.allocate(pool_init_size);
mr.deallocate(ptr, pool_init_size);
RMM_CUDA_TRY(cudaDeviceSynchronize());

}

auto destroy_invalid_pool = [&](){
auto result = cudaMemPoolDestroy(memPool);
RMM_EXPECTS(result == cudaErrorInvalidValue, "Owning mr did not destroy owned pool");
};

EXPECT_NO_THROW(destroy_invalid_pool());
}

TEST(PoolTest, ThrowIfNullptrPool)
{
auto construct_mr = []() {
cudaMemPool_t memPool{nullptr};
cuda_async_mr mr{memPool};
};

EXPECT_THROW(construct_mr(), rmm::logic_error);
}

TEST(PoolTest, ThrowIfDefaultPool)
{
auto construct_mr = []() {
cudaMemPool_t memPool{};
RMM_CUDA_TRY(cudaDeviceGetDefaultMemPool(&memPool, rmm::detail::current_device().value()));

cuda_async_mr mr{memPool};
};

EXPECT_THROW(construct_mr(), rmm::logic_error);
}


fkallen marked this conversation as resolved.
Show resolved Hide resolved
#endif

} // namespace
Expand Down
Loading