From aae814fbe997b7ec0ce02f192ccd66c805a0fda6 Mon Sep 17 00:00:00 2001
From: Michael Schellenberger Costa <miscco@nvidia.com>
Date: Thu, 7 Mar 2024 18:31:58 +0100
Subject: [PATCH] Add `cuda::mr::cuda_memory_resource`

Fixes #1512
---
 .../__memory_resource/cuda_memory_resource.h  | 196 ++++++++++++++++++
 libcudacxx/include/cuda/memory_resource       |   1 +
 .../cuda_memory_resource/allocate.pass.cpp    |  72 +++++++
 .../cuda_memory_resource/equality.pass.cpp    |  46 ++++
 .../cuda_memory_resource/traits.pass.cpp      |  31 +++
 5 files changed, 346 insertions(+)
 create mode 100644 libcudacxx/include/cuda/__memory_resource/cuda_memory_resource.h
 create mode 100644 libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/allocate.pass.cpp
 create mode 100644 libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/equality.pass.cpp
 create mode 100644 libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/traits.pass.cpp

diff --git a/libcudacxx/include/cuda/__memory_resource/cuda_memory_resource.h b/libcudacxx/include/cuda/__memory_resource/cuda_memory_resource.h
new file mode 100644
index 00000000000..e3e8d9d3d73
--- /dev/null
+++ b/libcudacxx/include/cuda/__memory_resource/cuda_memory_resource.h
@@ -0,0 +1,196 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA__MEMORY_RESOURCE_CUDA_MEMORY_RESOURCE_H
+#define _CUDA__MEMORY_RESOURCE_CUDA_MEMORY_RESOURCE_H
+
+#include <cuda/__cccl_config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda_runtime_api.h>
+
+#include <cuda/__memory_resource/get_property.h>
+#include <cuda/__memory_resource/properties.h>
+#include <cuda/__memory_resource/resource_ref.h>
+#include <cuda/stream_ref>
+
+#if _CCCL_STD_VER >= 2014
+
+_LIBCUDACXX_BEGIN_NAMESPACE_CUDA_MR
+
+/**
+ * @brief `cuda_memory_resource` uses cudaMalloc/Free for allocation/deallocation.
+ */
+struct cuda_memory_resource
+{
+  /**
+   * @brief Allocate device memory of size at least \p __bytes.
+   * @param __bytes The size in bytes of the allocation.
+   * @param __alignment The requested alignment of the allocation. Is ignored!
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* allocate(const size_t __bytes, size_t) const
+  {
+    return allocate(__bytes);
+  }
+
+  /**
+   * @brief Allocate device memory of size at least \p __bytes.
+   * @param __bytes The size in bytes of the allocation.
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* allocate(const size_t __bytes) const
+  {
+    void* __ptr{nullptr};
+    const ::cudaError_t __status = ::cudaMalloc(&__ptr, __bytes);
+    _LIBCUDACXX_ASSERT(__status == cudaSuccess, "cuda_memory_resource::allocate failed");
+    return __ptr;
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p __ptr.
+   * @param __ptr Pointer to be deallocated
+   * @param __bytes The size in bytes of the allocation.
+   * @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr. Is ignored!
+   */
+  void deallocate(void* __ptr, size_t __bytes, size_t) const
+  {
+    deallocate(__ptr, __bytes);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p __ptr.
+   * @param __ptr Pointer to be deallocated
+   * @param __bytes The size in bytes of the allocation.
+   */
+  void deallocate(void* __ptr, size_t) const
+  {
+    const ::cudaError_t __status = ::cudaFree(__ptr);
+    _LIBCUDACXX_ASSERT(__status == cudaSuccess, "cuda_memory_resource::deallocate failed");
+  }
+
+  /**
+   * @brief Allocate device memory of size at least \p __bytes.
+   * @param __bytes The size in bytes of the allocation.
+   * @param __alignment The requested alignment of the allocation. Is ignored!
+   * @param __stream Stream on which to perform allocation. Is ignored!
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* allocate_async(const size_t __bytes, size_t, ::cuda::stream_ref __stream) const
+  {
+    return allocate_async(__bytes, __stream);
+  }
+
+  /**
+   * @brief Allocate device memory of size at least \p __bytes.
+   * @param __bytes The size in bytes of the allocation.
+   * @param __stream Stream on which to perform allocation. Is ignored!
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* allocate_async(const size_t __bytes, ::cuda::stream_ref) const
+  {
+    void* __ptr{nullptr};
+    const ::cudaError_t __status = ::cudaMalloc(&__ptr, __bytes);
+    _LIBCUDACXX_ASSERT(__status == cudaSuccess, "cuda_memory_resource::allocate_async failed");
+    return __ptr;
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p __ptr.
+   * @param __ptr Pointer to be deallocated
+   * @param __bytes The size in bytes of the allocation.
+   * @param __alignment The alignment that was passed to the `allocate` call that returned \p __ptr. Is ignored!
+   * @param __stream Stream on which to perform deallocation. Is ignored!
+   */
+  void deallocate_async(void* __ptr, size_t __bytes, size_t, ::cuda::stream_ref __stream) const
+  {
+    deallocate_async(__ptr, __bytes, __stream);
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p __ptr.
+   * @param __ptr Pointer to be deallocated
+   * @param __bytes The size in bytes of the allocation.
+   * @param __stream Stream on which to perform deallocation. Is ignored!
+   */
+  void deallocate_async(void* __ptr, size_t, ::cuda::stream_ref) const
+  {
+    const ::cudaError_t __status = ::cudaFree(__ptr);
+    _LIBCUDACXX_ASSERT(__status == cudaSuccess, "cuda_memory_resource::deallocate_async failed");
+  }
+    /**
+   * @brief Comparison operator between a cuda_memory_resource and another cuda_memory_resource
+   *
+   * @param __lhs The cuda_memory_resource
+   * @param __rhs The other cuda_memory_resource to compare to
+   * @return Whether the two resources are equivalent
+   */
+  _LIBCUDACXX_NODISCARD_FRIEND constexpr bool
+  operator==(cuda_memory_resource const&, cuda_memory_resource const&) noexcept
+  {
+    return true;
+  }
+#  if _CCCL_STD_VER <= 2017
+  _LIBCUDACXX_NODISCARD_FRIEND constexpr bool
+  operator!=(cuda_memory_resource const&, cuda_memory_resource const&) noexcept
+  {
+    return false;
+  }
+#  endif // _CCCL_STD_VER <= 2017
+
+  /**
+   * @brief Comparison operator between a cuda_memory_resource and another device acccessible resource
+   *
+   * @param __lhs The cuda_memory_resource
+   * @param __rhs The other resource to compare to
+   * @return Whether the two resources are equivalent
+   */
+  _LIBCUDACXX_NODISCARD_FRIEND bool
+  operator==(cuda_memory_resource __lhs, async_resource_ref<device_accessible> const& __rhs) noexcept
+  {
+    return async_resource_ref<device_accessible>{__lhs} == __rhs;
+  }
+#  if _CCCL_STD_VER <= 2017
+  _LIBCUDACXX_NODISCARD_FRIEND bool
+  operator==(async_resource_ref<device_accessible> const& __lhs, cuda_memory_resource __rhs) noexcept
+  {
+    return __lhs == async_resource_ref<device_accessible>{&__rhs};
+  }
+  _LIBCUDACXX_NODISCARD_FRIEND bool
+  operator!=(cuda_memory_resource __lhs, async_resource_ref<device_accessible> const& __rhs) noexcept
+  {
+    return async_resource_ref<device_accessible>{__lhs} != __rhs;
+  }
+  _LIBCUDACXX_NODISCARD_FRIEND bool
+  operator!=(async_resource_ref<device_accessible> const& __lhs, cuda_memory_resource __rhs) noexcept
+  {
+    return __lhs != async_resource_ref<device_accessible>{__rhs};
+  }
+#  endif // _CCCL_STD_VER <= 2017
+
+  /**
+   * @brief Enables the `device_accessible` property
+   */
+  friend constexpr void get_property(cuda_memory_resource const&, device_accessible) noexcept {}
+};
+static_assert(async_resource_with<device_accessible>, "");
+
+_LIBCUDACXX_END_NAMESPACE_CUDA_MR
+
+#endif // _CCCL_STD_VER >= 2014
+
+#endif //_CUDA__MEMORY_RESOURCE_CUDA_MEMORY_RESOURCE_H
diff --git a/libcudacxx/include/cuda/memory_resource b/libcudacxx/include/cuda/memory_resource
index 5bd158cc14a..14e776d157b 100644
--- a/libcudacxx/include/cuda/memory_resource
+++ b/libcudacxx/include/cuda/memory_resource
@@ -92,6 +92,7 @@ class resource_ref {
 #    pragma system_header
 #  endif // no system header
 
+#include <cuda/__memory_resource/cuda_memory_resource.h>
 #include <cuda/__memory_resource/get_property.h>
 #include <cuda/__memory_resource/properties.h>
 #include <cuda/__memory_resource/resource.h>
diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/allocate.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/allocate.pass.cpp
new file mode 100644
index 00000000000..1880b6402b0
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/allocate.pass.cpp
@@ -0,0 +1,72 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11
+// UNSUPPORTED: nvrtc
+
+// cuda::mr::resource_ref construction
+
+#include <cuda/memory_resource>
+#include <cuda/std/cassert>
+#include <cuda/std/cstdint>
+#include <cuda/stream_ref>
+
+void ensure_device_ptr(void* ptr) {
+    assert(ptr != nullptr);
+    cudaPointerAttributes attributes;
+    cudaError_t status = cudaPointerGetAttributes (&attributes, ptr);
+    assert(status == cudaSuccess);
+    assert(attributes.type == cudaMemoryTypeDevice );
+}
+
+void test() {
+  cuda::mr::cuda_memory_resource res{};
+
+  { // allocate / deallocate
+    auto* ptr = res.allocate(42);
+    static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
+    ensure_device_ptr(ptr);
+
+    res.deallocate(ptr, 42);
+  }
+
+  { // allocate / deallocate with alignment
+    auto* ptr = res.allocate(42, 4);
+    static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
+    ensure_device_ptr(ptr);
+
+    res.deallocate(ptr, 42, 4);
+  }
+
+  { // allocate_async / deallocate_async
+    auto* ptr = res.allocate_async(42, cuda::stream_ref{});
+    static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
+    // No async allocation, so we do not need to synchronize
+    ensure_device_ptr(ptr);
+
+    res.deallocate_async(ptr, 42, cuda::stream_ref{});
+  }
+
+  { // allocate_async / deallocate_async / deallocate with alignment
+    auto* ptr = res.allocate_async(42, 4, cuda::stream_ref{});
+    static_assert(cuda::std::is_same<decltype(ptr), void*>::value, "");
+    // No async allocation, so we do not need to synchronize
+    ensure_device_ptr(ptr);
+
+    res.deallocate_async(ptr, 42, 4, cuda::stream_ref{});
+  }
+}
+
+int main(int, char**) {
+  NV_IF_TARGET(NV_IS_HOST,
+    test();
+  )
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/equality.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/equality.pass.cpp
new file mode 100644
index 00000000000..6edea188c44
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/equality.pass.cpp
@@ -0,0 +1,46 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11
+// UNSUPPORTED: nvrtc
+
+// cuda::mr::resource_ref construction
+
+#include <cuda/memory_resource>
+#include <cuda/std/cstdint>
+#include <cuda/stream_ref>
+
+template <bool IsDeviceAccessible>
+struct async_resource {
+  void* allocate(size_t, size_t) { return nullptr; }
+
+  void deallocate(void* ptr, size_t, size_t) {}
+
+  void* allocate_async(size_t, size_t, cuda::stream_ref) { return &_val; }
+
+  void deallocate_async(void* ptr, size_t, size_t, cuda::stream_ref) {
+    _val = *static_cast<int*>(ptr);
+  }
+
+  bool operator==(const async_resource& other) const {
+    return _val == other._val;
+  }
+  bool operator!=(const async_resource& other) const {
+    return _val != other._val;
+  }
+
+  int _val = 0;
+
+  template <bool IsDeviceAccessible2 = IsDeviceAccessible,
+            cuda::std::enable_if_t<IsDeviceAccessible2, int> = 0>
+  friend void get_property(const async_resource&, cuda::mr::device_accessible) noexcept {}
+};
+
+int main(int, char**) { return 0; }
diff --git a/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/traits.pass.cpp b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/traits.pass.cpp
new file mode 100644
index 00000000000..5e18076c1dc
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/cuda/memory_resource/cuda_memory_resource/traits.pass.cpp
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11
+// UNSUPPORTED: nvrtc
+
+// cuda::mr::resource_ref construction
+
+#include <cuda/memory_resource>
+#include <cuda/std/type_traits>
+
+using resource = cuda::mr::cuda_memory_resource;
+static_assert(cuda::std::is_trivial<resource>::value, "");
+static_assert(cuda::std::is_trivially_default_constructible<resource>::value, "");
+static_assert(cuda::std::is_trivially_copy_constructible<resource>::value, "");
+static_assert(cuda::std::is_trivially_move_constructible<resource>::value, "");
+static_assert(cuda::std::is_trivially_copy_assignable<resource>::value, "");
+static_assert(cuda::std::is_trivially_move_assignable<resource>::value, "");
+static_assert(cuda::std::is_trivially_destructible<resource>::value, "");
+static_assert(cuda::std::is_empty<resource>::value, "");
+
+int main(int, char**) {
+  return 0;
+}