Implement cuda::mr::cuda_memory_resource

Fixes #1512
NVIDIA · Apr 11, 2024 · 83a2dd5 · 83a2dd5
1 parent 0e71865
commit 83a2dd5
Show file tree

Hide file tree

Showing 6 changed files with 428 additions and 0 deletions.
diff --git a/libcudacxx/include/cuda/__memory_resource/cuda_memory_resource.h b/libcudacxx/include/cuda/__memory_resource/cuda_memory_resource.h
@@ -0,0 +1,164 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA__MEMORY_RESOURCE_CUDA_MEMORY_RESOURCE_H
+#define _CUDA__MEMORY_RESOURCE_CUDA_MEMORY_RESOURCE_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if !defined(_CCCL_COMPILER_MSVC_2017)
+
+#  if !defined(_CCCL_CUDA_COMPILER_NVCC) && !defined(_CCCL_CUDA_COMPILER_NVHPC)
+#    include <cuda_runtime_api.h>
+#  endif // !_CCCL_CUDA_COMPILER_NVCC && !_CCCL_CUDA_COMPILER_NVHPC
+
+#  include <cuda/__memory_resource/get_property.h>
+#  include <cuda/__memory_resource/properties.h>
+#  include <cuda/__memory_resource/resource.h>
+#  include <cuda/__memory_resource/resource_ref.h>
+#  include <cuda/std/__cuda/api_wrapper.h>
+#  include <cuda/std/detail/libcxx/include/__new/bad_alloc.h>
+
+#  if _CCCL_STD_VER >= 2014
+
+_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR
+
+/**
+ * @brief `cuda_memory_resource` uses cudaMalloc / cudaFree for allocation/deallocation.
+ */
+struct cuda_memory_resource
+{
+  /**
+   * @brief Allocate device memory of size at least \p __bytes.
+   * @param __bytes The size in bytes of the allocation.
+   * @param __alignment The requested alignment of the allocation.
+   * @throw cuda::std::bad_alloc in case of invalid alignment or cuda::cuda_error of the returned error code.
+   * @return Pointer to the newly allocated memory
+   */
+  void* allocate(const size_t __bytes, const size_t __alignment = default_cuda_malloc_alignment) const
+  {
+    // We need to ensure that the provided alignment matches the minimal provided alignment
+    if (!__is_valid_alignment(__alignment))
+    {
+      _CUDA_VSTD_NOVERSION::__throw_bad_alloc();
+    }
+
+    void* __ptr{nullptr};
+    _CCCL_TRY_CUDA_API(::cudaMalloc, "Failed to allocate memory with cudaMalloc.", &__ptr, __bytes);
+    return __ptr;
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p __ptr.
+   * @param __ptr Pointer to be deallocated. Must have been allocated through a call to `allocate`
+   * @param __bytes The number of bytes that was passed to the `allocate` call that returned \p __ptr.
+   * @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr.
+   */
+  void deallocate(void* __ptr, const size_t, const size_t __alignment = default_cuda_malloc_alignment) const
+  {
+    // We need to ensure that the provided alignment matches the minimal provided alignment
+    _LIBCUDACXX_ASSERT(__is_valid_alignment(__alignment),
+                       "Invalid alignment passed to cuda_memory_resource::deallocate.");
+    _CCCL_ASSERT_CUDA_API(::cudaFree, "cuda_memory_resource::deallocate failed", __ptr);
+    (void) __alignment;
+  }
+
+  /**
+   * @brief Equality comparison with another cuda_memory_resource
+   * @return true
+   */
+  _LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator==(cuda_memory_resource const&) const noexcept
+  {
+    return true;
+  }
+#    if _CCCL_STD_VER <= 2017
+  /**
+   * @brief Inequality comparison with another cuda_memory_resource
+   * @return false
+   */
+  _LIBCUDACXX_NODISCARD_ATTRIBUTE constexpr bool operator!=(cuda_memory_resource const&) const noexcept
+  {
+    return false;
+  }
+#    endif // _CCCL_STD_VER <= 2017
+
+  /**
+   * @brief Equality comparison between a cuda_memory_resource and another resource
+   * @param __lhs The cuda_memory_resource
+   * @param __rhs The resource to compare to
+   * @return If the underlying types are equality comparable, returns the result of equality comparison of both
+   * resources. Otherwise, returns false.
+   */
+  template <class _Resource>
+  _LIBCUDACXX_NODISCARD_FRIEND auto operator==(cuda_memory_resource const& __lhs, _Resource const& __rhs) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_memory_resource, _Resource>)
+  {
+    return resource_ref<>{const_cast<cuda_memory_resource&>(__lhs)} == resource_ref<>{const_cast<_Resource&>(__rhs)};
+  }
+#    if _CCCL_STD_VER <= 2017
+  /**
+   * @copydoc cuda_memory_resource::operator==<_Resource>(cuda_memory_resource const&, _Resource const&)
+   */
+  template <class _Resource>
+  _LIBCUDACXX_NODISCARD_FRIEND auto operator==(_Resource const& __rhs, cuda_memory_resource const& __lhs) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_memory_resource, _Resource>)
+  {
+    return resource_ref<>{const_cast<cuda_memory_resource&>(__lhs)} == resource_ref<>{const_cast<_Resource&>(__rhs)};
+  }
+  /**
+   * @copydoc cuda_memory_resource::operator==<_Resource>(cuda_memory_resource const&, _Resource const&)
+   */
+  template <class _Resource>
+  _LIBCUDACXX_NODISCARD_FRIEND auto operator!=(cuda_memory_resource const& __lhs, _Resource const& __rhs) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_memory_resource, _Resource>)
+  {
+    return resource_ref<>{const_cast<cuda_memory_resource&>(__lhs)} != resource_ref<>{const_cast<_Resource&>(__rhs)};
+  }
+  /**
+   * @copydoc cuda_memory_resource::operator==<_Resource>(cuda_memory_resource const&, _Resource const&)
+   */
+  template <class _Resource>
+  _LIBCUDACXX_NODISCARD_FRIEND auto operator!=(_Resource const& __rhs, cuda_memory_resource const& __lhs) noexcept
+    _LIBCUDACXX_TRAILING_REQUIRES(bool)(__different_resource<cuda_memory_resource, _Resource>)
+  {
+    return resource_ref<>{const_cast<cuda_memory_resource&>(__lhs)} != resource_ref<>{const_cast<_Resource&>(__rhs)};
+  }
+#    endif // _CCCL_STD_VER <= 2017
+
+  /**
+   * @brief Enables the `device_accessible` property
+   */
+  friend constexpr void get_property(cuda_memory_resource const&, device_accessible) noexcept {}
+
+  /**
+   * @brief Checks whether the passed in alignment is valid
+   */
+  static constexpr bool __is_valid_alignment(const size_t __alignment) noexcept
+  {
+    return __alignment <= default_cuda_malloc_alignment && (default_cuda_malloc_alignment % __alignment == 0);
+  }
+};
+static_assert(resource_with<cuda_memory_resource, device_accessible>, "");
+
+_LIBCUDACXX_END_NAMESPACE_CUDA_MR
+
+#  endif // _CCCL_STD_VER >= 2014
+
+#endif // !_CCCL_COMPILER_MSVC_2017
+
+#endif //_CUDA__MEMORY_RESOURCE_CUDA_MEMORY_RESOURCE_H
diff --git a/libcudacxx/include/cuda/__memory_resource/properties.h b/libcudacxx/include/cuda/__memory_resource/properties.h
@@ -27,6 +27,11 @@
 
 _LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR
 
+/**
+ * @brief The default alignment by a cudaMalloc{...} call
+ */
+_LIBCUDACXX_INLINE_VAR constexpr size_t default_cuda_malloc_alignment = 256;
+
 /// \struct device_accessible
 /// \brief The \c device_accessible property signals that the allocated memory is device accessible
 struct device_accessible

diff --git a/libcudacxx/include/cuda/memory_resource b/libcudacxx/include/cuda/memory_resource
@@ -92,6 +92,7 @@ class resource_ref {
 #    pragma system_header
 #  endif // no system header
 
+#include <cuda/__memory_resource/cuda_memory_resource.h>
 #include <cuda/__memory_resource/get_property.h>
 #include <cuda/__memory_resource/properties.h>
 #include <cuda/__memory_resource/resource.h>

diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/allocate.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/allocate.pass.cpp
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11
+// UNSUPPORTED: msvc-19.16
+// UNSUPPORTED: nvrtc
+
+#include <cuda/memory_resource>
+#include <cuda/std/cassert>
+#include <cuda/std/cstdint>
+#include <cuda/stream_ref>
+
+#include "test_macros.h"
+
+void ensure_device_ptr(void* ptr)
+{
+  assert(ptr != nullptr);
+  cudaPointerAttributes attributes;
+  cudaError_t status = cudaPointerGetAttributes(&attributes, ptr);
+  assert(status == cudaSuccess);
+  assert(attributes.type == cudaMemoryTypeDevice);
+}
+
+void test()
+{
+  cuda::mr::cuda_memory_resource res{};
+
+  { // allocate / deallocate
+    auto* ptr = res.allocate(42);
+    static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
+    ensure_device_ptr(ptr);
+
+    res.deallocate(ptr, 42);
+  }
+
+  { // allocate / deallocate with alignment
+    constexpr size_t desired_alignment = 64;
+    auto* ptr                          = res.allocate(42, desired_alignment);
+    static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
+    ensure_device_ptr(ptr);
+
+    // also check the alignment
+    const auto alignment = reinterpret_cast<cuda::std::uintptr_t>(ptr);
+    assert(alignment >= desired_alignment);
+    res.deallocate(ptr, 42, desired_alignment);
+  }
+
+#ifndef TEST_HAS_NO_EXCEPTIONS
+  { // allocate with too small alignment
+    while (true)
+    {
+      try
+      {
+        auto* ptr = res.allocate(5, 42);
+        unused(ptr);
+      }
+      catch (const cuda::std::bad_alloc&)
+      {
+        break;
+      }
+      assert(false);
+    }
+  }
+
+  { // allocate with non matching alignment
+    while (true)
+    {
+      try
+      {
+        auto* ptr = res.allocate(5, 1337);
+        unused(ptr);
+      }
+      catch (const cuda::std::bad_alloc&)
+      {
+        break;
+      }
+      assert(false);
+    }
+  }
+#endif // TEST_HAS_NO_EXCEPTIONS
+}
+
+int main(int, char**)
+{
+  NV_IF_TARGET(NV_IS_HOST, test();)
+  return 0;
+}