rapidsai · rapids-bot · Nov 16, 2023 · Jul 21, 2023 · Jul 23, 2023 · Jul 25, 2023
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+// #include <raft/core/logger.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/detail/error.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+#include <sys/mman.h>
+
+#include <cstddef>
+
+namespace raft::mr {
+/**
+ * @brief `device_memory_resource` derived class that uses mmap to allocate memory.
+ * This class enables memory allocation using huge pages.
+ * It is assumed that the allocated memory is directly accessible on device. This currently only
+ * works on GH systems.
+ *
+ * TODO(tfeher): consider improving or removing this helper once we made progress with
+ * https://github.com/rapidsai/raft/issues/1819
+ */
+class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
+ public:
+  cuda_huge_page_resource()                                          = default;
+  ~cuda_huge_page_resource() override                                = default;
+  cuda_huge_page_resource(cuda_huge_page_resource const&)            = default;
+  cuda_huge_page_resource(cuda_huge_page_resource&&)                 = default;
+  cuda_huge_page_resource& operator=(cuda_huge_page_resource const&) = default;
+  cuda_huge_page_resource& operator=(cuda_huge_page_resource&&)      = default;
+
+  /**
+   * @brief Query whether the resource supports use of non-null CUDA streams for
+   * allocation/deallocation. `cuda_huge_page_resource` does not support streams.
+   *
+   * @returns bool false
+   */
+  [[nodiscard]] bool supports_streams() const noexcept override { return false; }
+
+  /**
+   * @brief Query whether the resource supports the get_mem_info API.
+   *
+   * @return true
+   */
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }
+
+ private:
+  /**
+   * @brief Allocates memory of size at least `bytes` using cudaMalloc.
+   *
+   * The returned pointer has at least 256B alignment.
+   *
+   * @note Stream argument is ignored
+   *
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
+   *
+   * @param bytes The size, in bytes, of the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
+  {
+    void* _addr{nullptr};
+    _addr = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (_addr == MAP_FAILED) { RAFT_FAIL("huge_page_resource::MAP FAILED"); }
+    if (madvise(_addr, bytes, MADV_HUGEPAGE) == -1) {
+      munmap(_addr, bytes);
+      RAFT_FAIL("huge_page_resource::madvise MADV_HUGEPAGE");
+    }
+    memset(_addr, 0, bytes);
+    return _addr;
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p p.
+   *
+   * @note Stream argument is ignored.
+   *
+   * @throws Nothing.
+   *
+   * @param p Pointer to be deallocated
+   */
+  void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view) override
+  {
+    if (munmap(ptr, size) == -1) { RAFT_FAIL("huge_page_resource::munmap"); }
+  }
+
+  /**
+   * @brief Compare this resource to another.
+   *
+   * Two cuda_huge_page_resources always compare equal, because they can each
+   * deallocate memory allocated by the other.
+   *
+   * @throws Nothing.
+   *
+   * @param other The other resource to compare to
+   * @return true If the two resources are equivalent
+   * @return false If the two resources are not equal
+   */
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
+  {
+    return dynamic_cast<cuda_huge_page_resource const*>(&other) != nullptr;
+  }
+
+  /**
+   * @brief Get free and available memory for memory resource
+   *
+   * @throws `rmm::cuda_error` if unable to retrieve memory info.
+   *
+   * @return std::pair contaiing free_size and total_size of memory
+   */
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    rmm::cuda_stream_view) const override
+  {
+    std::size_t free_size{};
+    std::size_t total_size{};
+    RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
+    return std::make_pair(free_size, total_size);
+  }
+};
+}  // namespace raft::mr
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/detail/error.hpp>
+
+#include <cstddef>
+
+namespace raft::mr {
+/**
+ * @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for
+ * allocation/deallocation.
+ *
+ * This is almost the same as rmm::mr::host::pinned_memory_resource, but it has
+ * device_memory_resource as base class. Pinned memory can be accessed from device,
+ * and using this allocator we can create device_mdarray backed by pinned allocator.
+ *
+ * TODO(tfeher): it would be preferred to just rely on the existing allocator from rmm
+ * (pinned_memory_resource), but that is incompatible with the container_policy class
+ * for device matrix, because the latter expects a device_memory_resource. We shall
+ * revise this once we progress with Issue https://github.com/rapidsai/raft/issues/1819
+ */
+class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
+ public:
+  cuda_pinned_resource()                                       = default;
+  ~cuda_pinned_resource() override                             = default;
+  cuda_pinned_resource(cuda_pinned_resource const&)            = default;
+  cuda_pinned_resource(cuda_pinned_resource&&)                 = default;
+  cuda_pinned_resource& operator=(cuda_pinned_resource const&) = default;
+  cuda_pinned_resource& operator=(cuda_pinned_resource&&)      = default;
+
+  /**
+   * @brief Query whether the resource supports use of non-null CUDA streams for
+   * allocation/deallocation. `cuda_pinned_resource` does not support streams.
+   *
+   * @returns bool false
+   */
+  [[nodiscard]] bool supports_streams() const noexcept override { return false; }
+
+  /**
+   * @brief Query whether the resource supports the get_mem_info API.
+   *
+   * @return true
+   */
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }
+
+ private:
+  /**
+   * @brief Allocates memory of size at least `bytes` using cudaMalloc.
+   *
+   * The returned pointer has at least 256B alignment.
+   *
+   * @note Stream argument is ignored
+   *
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
+   *
+   * @param bytes The size, in bytes, of the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
+  {
+    void* ptr{nullptr};
+    RMM_CUDA_TRY_ALLOC(cudaMallocHost(&ptr, bytes));
+    return ptr;
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p p.
+   *
+   * @note Stream argument is ignored.
+   *
+   * @throws Nothing.
+   *
+   * @param p Pointer to be deallocated
+   */
+  void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view) override
+  {
+    RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr));
+  }
+
+  /**
+   * @brief Compare this resource to another.
+   *
+   * Two cuda_pinned_resources always compare equal, because they can each
+   * deallocate memory allocated by the other.
+   *
+   * @throws Nothing.
+   *
+   * @param other The other resource to compare to
+   * @return true If the two resources are equivalent
+   * @return false If the two resources are not equal
+   */
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
+  {
+    return dynamic_cast<cuda_pinned_resource const*>(&other) != nullptr;
+  }
+
+  /**
+   * @brief Get free and available memory for memory resource
+   *
+   * @throws `rmm::cuda_error` if unable to retrieve memory info.
+   *
+   * @return std::pair contaiing free_size and total_size of memory
+   */
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    rmm::cuda_stream_view) const override
+  {
+    std::size_t free_size{};
+    std::size_t total_size{};
+    RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
+    return std::make_pair(free_size, total_size);
+  }
+};
+}  // namespace raft::mr
diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
@@ -215,6 +215,21 @@ void parse_build_param(const nlohmann::json& conf,
   }
 }
 
+AllocatorType parse_allocator(std::string mem_type)
+{
+  if (mem_type == "device") {
+    return AllocatorType::Device;
+  } else if (mem_type == "host_pinned") {
+    return AllocatorType::HostPinned;
+  } else if (mem_type == "host_huge_page") {
+    return AllocatorType::HostHugePage;
+  }
+  THROW(
+    "Invalid value for memory type %s, must be one of [\"device\", \"host_pinned\", "
+    "\"host_huge_page\"",
+    mem_type.c_str());
+}
+
 template <typename T, typename IdxT>
 void parse_search_param(const nlohmann::json& conf,
                         typename raft::bench::ann::RaftCagra<T, IdxT>::SearchParam& param)
@@ -236,6 +251,8 @@ void parse_search_param(const nlohmann::json& conf,
       THROW("Invalid value for algo: %s", tmp.c_str());
     }
   }
+  if (conf.contains("graph_mem")) { param.graph_mem = parse_allocator(conf.at("graph_mem")); }
+  if (conf.contains("dataset_mem")) { param.dataset_mem = parse_allocator(conf.at("dataset_mem")); }
 }
 #endif