From 73f78c6b418311ac4b6f075c99c11635517fc2ed Mon Sep 17 00:00:00 2001
From: viclafargue <viclafargue@nvidia.com>
Date: Wed, 24 Nov 2021 11:09:41 +0100
Subject: [PATCH 1/6] Remove RAFT memory management

---
 cpp/include/raft/cudart_utils.h          |  32 ----
 cpp/include/raft/mr/allocator.hpp        |  70 --------
 cpp/include/raft/mr/buffer_base.hpp      | 210 -----------------------
 cpp/include/raft/mr/device/allocator.hpp |  54 ------
 cpp/include/raft/mr/device/buffer.hpp    |  68 --------
 cpp/include/raft/mr/host/allocator.hpp   |  58 -------
 cpp/include/raft/mr/host/buffer.hpp      |  84 ---------
 cpp/include/raft/spatial/knn/knn.hpp     |   2 -
 python/raft/common/handle.pxd            |   5 -
 9 files changed, 583 deletions(-)
 delete mode 100644 cpp/include/raft/mr/allocator.hpp
 delete mode 100644 cpp/include/raft/mr/buffer_base.hpp
 delete mode 100644 cpp/include/raft/mr/device/allocator.hpp
 delete mode 100644 cpp/include/raft/mr/device/buffer.hpp
 delete mode 100644 cpp/include/raft/mr/host/allocator.hpp
 delete mode 100644 cpp/include/raft/mr/host/buffer.hpp
diff --git a/cpp/include/raft/cudart_utils.h b/cpp/include/raft/cudart_utils.h
index 486103dedb..85b1e7cb94 100644
--- a/cpp/include/raft/cudart_utils.h
+++ b/cpp/include/raft/cudart_utils.h
@@ -277,38 +277,6 @@ void print_device_vector(const char* variable_name, const T* devMem,
 }
 /** @} */
 
-static std::mutex mutex_;
-static std::unordered_map<void*, size_t> allocations;
-
-template <typename Type>
-void allocate(Type*& ptr, size_t len, rmm::cuda_stream_view stream,
-              bool setZero = false) {
-  size_t size = len * sizeof(Type);
-  ptr = (Type*)rmm::mr::get_current_device_resource()->allocate(size, stream);
-  if (setZero) CUDA_CHECK(cudaMemsetAsync((void*)ptr, 0, size, stream));
-
-  std::lock_guard<std::mutex> _(mutex_);
-  allocations[ptr] = size;
-}
-
-template <typename Type>
-void deallocate(Type*& ptr, rmm::cuda_stream_view stream) {
-  std::lock_guard<std::mutex> _(mutex_);
-  size_t size = allocations[ptr];
-  allocations.erase(ptr);
-  rmm::mr::get_current_device_resource()->deallocate((void*)ptr, size, stream);
-}
-
-inline void deallocate_all(rmm::cuda_stream_view stream) {
-  std::lock_guard<std::mutex> _(mutex_);
-  for (auto& alloc : allocations) {
-    void* ptr = alloc.first;
-    size_t size = alloc.second;
-    rmm::mr::get_current_device_resource()->deallocate(ptr, size, stream);
-  }
-  allocations.clear();
-}
-
 /** helper method to get max usable shared mem per block parameter */
 inline int getSharedMemPerBlock() {
   int devId;
diff --git a/cpp/include/raft/mr/allocator.hpp b/cpp/include/raft/mr/allocator.hpp
deleted file mode 100644
index 08a4987c91..0000000000
--- a/cpp/include/raft/mr/allocator.hpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cuda_runtime_api.h>
-
-#include <cstddef>
-
-namespace raft {
-namespace mr {
-
-/**
- * @brief Interface for an asynchronous device/host allocator.
- *
- * An implementation of this interface can make the following assumptions:
- * - It does not need to be but it can allow async allocate and deallocate.
- *
- * @note This interface does NOT support RAII. Thus, if you need RAII-enabled
- *       interface, better to use `device_buffer` or `host_buffer`.
- */
-class base_allocator {
- public:
-  /**
-   * @brief Asynchronously allocates a memory region.
-   *
-   * An implementation of this need to return a allocation of n bytes properly
-   * align bytes on the configured device. The allocation can optionally be
-   * asynchronous in the sense that it is only save to use after all work
-   * submitted to the passed in stream prior to the call to allocate has
-   * completed. If the allocation is used before, e.g. in another stream the
-   * behaviour may be undefined.
-   * @todo: Add alignment requirments.
-   *
-   * @param[in] n         number of bytes to allocate
-   * @param[in] stream    stream to issue the possible asynchronous allocation in
-   */
-  virtual void* allocate(std::size_t n, cudaStream_t stream) = 0;
-
-  /**
-   * @brief Asynchronously deallocates device memory
-   *
-   * An implementation of this need to ensure that the allocation that the
-   * passed in pointer points to remains usable until all work sheduled in
-   * stream prior to the call to deallocate has completed.
-   *
-   * @param[inout] p      pointer to the buffer to deallocte
-   * @param[in] n         size of the buffer to deallocte in bytes
-   * @param[in] stream    stream in which the allocation might be still in use
-   */
-  virtual void deallocate(void* p, std::size_t n, cudaStream_t stream) = 0;
-
-  virtual ~base_allocator() = default;
-};  // class base_allocator
-
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp
deleted file mode 100644
index 4a2362bf97..0000000000
--- a/cpp/include/raft/mr/buffer_base.hpp
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/cudart_utils.h>
-
-#include <cuda_runtime.h>
-
-#include <cstddef>
-#include <memory>
-#include <utility>
-
-namespace raft {
-namespace mr {
-
-/**
- * @brief Base for all RAII-based owning of temporary memory allocations. This
- *        class should ideally not be used by users directly, but instead via
- *        the child classes `device_buffer` and `host_buffer`.
- *
- * @tparam T          data type
- * @tparam AllocatorT The underly allocator object
- */
-template <typename T, typename AllocatorT>
-class buffer_base {
- public:
-  using size_type = std::size_t;
-  using value_type = T;
-  using iterator = value_type*;
-  using const_iterator = const value_type*;
-  using reference = T&;
-  using const_reference = const T&;
-
-  buffer_base() = delete;
-
-  buffer_base(const buffer_base& other) = delete;
-
-  buffer_base& operator=(const buffer_base& other) = delete;
-
-  /**
-   * @brief Main ctor
-   *
-   * @param[in] allocator asynchronous allocator used for managing buffer life
-   * @param[in] stream    cuda stream where this allocation operations are async
-   * @param[in] n         size of the buffer (in number of elements)
-   */
-  buffer_base(std::shared_ptr<AllocatorT> allocator, cudaStream_t stream,
-              size_type n = 0)
-    : data_(nullptr),
-      size_(n),
-      capacity_(n),
-      stream_(stream),
-      allocator_(std::move(allocator)) {
-    if (capacity_ > 0) {
-      data_ = static_cast<value_type*>(
-        allocator_->allocate(capacity_ * sizeof(value_type), stream_));
-      CUDA_CHECK(cudaStreamSynchronize(stream_));
-    }
-  }
-
-  ~buffer_base() { release(); }
-
-  value_type* data() { return data_; }
-
-  const value_type* data() const { return data_; }
-
-  size_type size() const { return size_; }
-
-  void clear() { size_ = 0; }
-
-  iterator begin() { return data_; }
-
-  const_iterator begin() const { return data_; }
-
-  iterator end() { return data_ + size_; }
-
-  const_iterator end() const { return data_ + size_; }
-
-  /**
-   * @brief Reserve new memory size for this buffer.
-   *
-   * It re-allocates a fresh buffer if the new requested capacity is more than
-   * the current one, copies the old buffer contents to this new buffer and
-   * removes the old one.
-   *
-   * @param[in] new_capacity new capacity (in number of elements)
-   * @{
-   */
-  void reserve(size_type new_capacity) {
-    if (new_capacity > capacity_) {
-      auto* new_data = static_cast<value_type*>(
-        allocator_->allocate(new_capacity * sizeof(value_type), stream_));
-      if (size_ > 0) {
-        raft::copy(new_data, data_, size_, stream_);
-      }
-      // Only deallocate if we have allocated a pointer
-      if (nullptr != data_) {
-        allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
-      }
-      data_ = new_data;
-      capacity_ = new_capacity;
-    }
-  }
-
-  void reserve(size_type new_capacity, cudaStream_t stream) {
-    set_stream(stream);
-    reserve(new_capacity);
-  }
-  /** @} */
-
-  /**
-   * @brief Resize the underlying buffer (uses `reserve` method internally)
-   *
-   * @param[in] new_size new buffer size
-   * @{
-   */
-  void resize(const size_type new_size) {
-    reserve(new_size);
-    size_ = new_size;
-  }
-
-  void resize(const size_type new_size, cudaStream_t stream) {
-    set_stream(stream);
-    resize(new_size);
-  }
-  /** @} */
-
-  /**
-   * @brief Deletes the underlying buffer
-   *
-   * If this method is not explicitly called, it will be during the destructor
-   * @{
-   */
-  void release() {
-    if (nullptr != data_) {
-      allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
-    }
-    data_ = nullptr;
-    capacity_ = 0;
-    size_ = 0;
-  }
-
-  void release(cudaStream_t stream) {
-    set_stream(stream);
-    release();
-  }
-  /** @} */
-
-  /**
-   * @brief returns the underlying allocator used
-   *
-   * @return the allocator pointer
-   */
-  std::shared_ptr<AllocatorT> get_allocator() const { return allocator_; }
-
-  /**
-   * @brief returns the underlying stream used
-   *
-   * @return the cuda stream
-   */
-  cudaStream_t get_stream() const { return stream_; }
-
- protected:
-  value_type* data_;
-
- private:
-  size_type size_;
-  size_type capacity_;
-  cudaStream_t stream_;
-  std::shared_ptr<AllocatorT> allocator_;
-
-  /**
-   * @brief Sets a new cuda stream where the future operations will be queued
-   *
-   * This method makes sure that the inter-stream dependencies are met and taken
-   * care of, before setting the input stream as a new stream for this buffer.
-   * Ideally, the same cuda stream passed during constructor is expected to be
-   * used throughout this buffer's lifetime, for performance.
-   *
-   * @param[in] stream new cuda stream to be set. If it is the same as the
-   *                   current one, then this method will be a no-op.
-   */
-  void set_stream(cudaStream_t stream) {
-    if (stream_ != stream) {
-      cudaEvent_t event;
-      CUDA_CHECK(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
-      CUDA_CHECK(cudaEventRecord(event, stream_));
-      CUDA_CHECK(cudaStreamWaitEvent(stream, event, 0));
-      stream_ = stream;
-      CUDA_CHECK(cudaEventDestroy(event));
-    }
-  }
-};  // class buffer_base
-
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/device/allocator.hpp b/cpp/include/raft/mr/device/allocator.hpp
deleted file mode 100644
index 3d1ce38c31..0000000000
--- a/cpp/include/raft/mr/device/allocator.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/mr/allocator.hpp>
-
-#include <rmm/mr/device/per_device_resource.hpp>
-
-#include <cstddef>
-
-namespace raft {
-namespace mr {
-namespace device {
-
-/**
- * @brief An explicit interface for an asynchronous device allocator.
- *
- * This is mostly done in order to reduce work needed in cuML codebase.
- * An implementation of this interface can make the following assumptions,
- * further to the ones listed in `Allocator`:
- * - Allocations may be always on the device that was specified on construction.
- */
-class allocator : public base_allocator {};
-
-/** Default device allocator based on the one provided by RMM */
-class default_allocator : public allocator {
- public:
-  void* allocate(std::size_t n, cudaStream_t stream) override {
-    void* ptr = rmm::mr::get_current_device_resource()->allocate(n, stream);
-    return ptr;
-  }
-
-  void deallocate(void* p, std::size_t n, cudaStream_t stream) override {
-    rmm::mr::get_current_device_resource()->deallocate(p, n, stream);
-  }
-};  // class default_allocator
-
-};  // namespace device
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/device/buffer.hpp b/cpp/include/raft/mr/device/buffer.hpp
deleted file mode 100644
index 39b5674ce4..0000000000
--- a/cpp/include/raft/mr/device/buffer.hpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <memory>
-#include <raft/mr/buffer_base.hpp>
-#include "allocator.hpp"
-
-namespace raft {
-namespace mr {
-namespace device {
-
-/**
- * @brief RAII object owning a contiguous typed device buffer. The passed in
- *        allocator supports asynchronous allocation and deallocation so this
- *        can also be used for temporary memory
- *
- * @code{.cpp}
- * template<typename T>
- * void foo(..., cudaStream_t stream) {
- *   ...
- *   raft::mr::device::buffer<T> temp(stream, 0);
- *   ...
- *   temp.resize(n);
- *   kernelA<<<grid,block,0,stream>>>(...,temp.data(),...);
- *   kernelB<<<grid,block,0,stream>>>(...,temp.data(),...);
- *   temp.release();
- *   ...
- * }
- * @endcode
- */
-template <typename T>
-class buffer : public buffer_base<T, allocator> {
- public:
-  using size_type = typename buffer_base<T, allocator>::size_type;
-  using value_type = typename buffer_base<T, allocator>::value_type;
-  using iterator = typename buffer_base<T, allocator>::iterator;
-  using const_iterator = typename buffer_base<T, allocator>::const_iterator;
-  using reference = typename buffer_base<T, allocator>::reference;
-  using const_reference = typename buffer_base<T, allocator>::const_reference;
-
-  buffer() = delete;
-
-  buffer(const buffer& other) = delete;
-
-  buffer& operator=(const buffer& other) = delete;
-
-  buffer(std::shared_ptr<allocator> alloc, cudaStream_t stream, size_type n = 0)
-    : buffer_base<T, device::allocator>(alloc, stream, n) {}
-};  // class buffer
-
-};  // namespace device
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/host/allocator.hpp b/cpp/include/raft/mr/host/allocator.hpp
deleted file mode 100644
index e5b3da24eb..0000000000
--- a/cpp/include/raft/mr/host/allocator.hpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/cudart_utils.h>
-#include <raft/mr/allocator.hpp>
-
-#include <cuda_runtime.h>
-
-#include <cstddef>
-
-namespace raft {
-namespace mr {
-namespace host {
-
-/**
- * @brief An explicit interface for an asynchronous host allocations.
- *
- * This is mostly done in order to reduce work needed in cuML codebase.
- * An implementation of this interface can make the following assumptions,
- * further to the ones listed in `Allocator`:
- * - Allocations don't need to be zero copy accessible form a device.
- */
-class allocator : public base_allocator {};
-
-/** Default cudaMallocHost/cudaFreeHost based host allocator */
-class default_allocator : public allocator {
- public:
-  void* allocate(std::size_t n, cudaStream_t stream) override {
-    void* ptr = nullptr;
-    CUDA_CHECK(cudaMallocHost(&ptr, n));
-    return ptr;
-  }
-
-  void deallocate(void* p, std::size_t n, cudaStream_t stream) override {
-    //Must call _NO_THROW here since this is called frequently from object
-    //destructors which are "nothrow" by default
-    CUDA_CHECK_NO_THROW(cudaFreeHost(p));
-  }
-};  // class default_allocator
-
-};  // namespace host
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/host/buffer.hpp b/cpp/include/raft/mr/host/buffer.hpp
deleted file mode 100644
index 3c505bf2ed..0000000000
--- a/cpp/include/raft/mr/host/buffer.hpp
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <memory>
-#include <raft/mr/buffer_base.hpp>
-#include <raft/mr/device/buffer.hpp>
-#include "allocator.hpp"
-
-namespace raft {
-namespace mr {
-namespace host {
-
-/**
- * @brief RAII object owning a contigous typed host buffer (aka pinned memory).
- *        The passed in allocator supports asynchronus allocation and
- *        deallocation so this can also be used for temporary memory
- *
- * @code{.cpp}
- * template<typename T>
- * void foo(const T* in_d , T* out_d, ..., cudaStream_t stream) {
- *   ...
- *   raft::mr::host::buffer<T> temp(stream, 0);
- *   ...
- *   temp.resize(n);
- *   raft::copy(temp.data(), in_d, temp.size());
- *   ...
- *   raft::copy(out_d, temp.data(), temp.size());
- *   temp.release(stream);
- *   ...
- * }
- * @endcode
- */
-template <typename T>
-class buffer : public buffer_base<T, allocator> {
- public:
-  using size_type = typename buffer_base<T, allocator>::size_type;
-  using value_type = typename buffer_base<T, allocator>::value_type;
-  using iterator = typename buffer_base<T, allocator>::iterator;
-  using const_iterator = typename buffer_base<T, allocator>::const_iterator;
-  using reference = typename buffer_base<T, allocator>::reference;
-  using const_reference = typename buffer_base<T, allocator>::const_reference;
-
-  buffer() = delete;
-
-  buffer(const buffer& other) = delete;
-
-  buffer& operator=(const buffer& other) = delete;
-
-  buffer(std::shared_ptr<allocator> alloc, const device::buffer<T>& other)
-    : buffer_base<T, allocator>(alloc, other.get_stream(), other.size()) {
-    if (other.size() > 0) {
-      raft::copy(data_, other.data(), other.size(), other.get_stream());
-    }
-  }
-
-  buffer(std::shared_ptr<allocator> alloc, cudaStream_t stream, size_type n = 0)
-    : buffer_base<T, allocator>(alloc, stream, n) {}
-
-  reference operator[](size_type pos) { return data_[pos]; }
-
-  const_reference operator[](size_type pos) const { return data_[pos]; }
-
- private:
-  using buffer_base<T, allocator>::data_;
-};
-
-};  // namespace host
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/spatial/knn/knn.hpp b/cpp/include/raft/spatial/knn/knn.hpp
index 6472eaa80b..4716b51c98 100644
--- a/cpp/include/raft/spatial/knn/knn.hpp
+++ b/cpp/include/raft/spatial/knn/knn.hpp
@@ -25,8 +25,6 @@ namespace raft {
 namespace spatial {
 namespace knn {
 
-using deviceAllocator = raft::mr::device::allocator;
-
 /**
  * Performs a k-select across row partitioned index/distance
  * matrices formatted like the following:
diff --git a/python/raft/common/handle.pxd b/python/raft/common/handle.pxd
index 884d81bed1..11ebb30fd6 100644
--- a/python/raft/common/handle.pxd
+++ b/python/raft/common/handle.pxd
@@ -24,11 +24,6 @@ from libcpp.memory cimport shared_ptr
 from .cuda cimport _Stream
 
 
-cdef extern from "raft/mr/device/allocator.hpp" \
-        namespace "raft::mr::device" nogil:
-    cdef cppclass allocator:
-        pass
-
 cdef extern from "raft/handle.hpp" namespace "raft" nogil:
     cdef cppclass handle_t:
         handle_t() except +

From 1c0f3c3cb5d44173e13faae3e9cdc265a83c1df0 Mon Sep 17 00:00:00 2001
From: viclafargue <viclafargue@nvidia.com>
Date: Wed, 8 Dec 2021 15:20:07 +0100
Subject: [PATCH 2/6] Update fusedL2KNN test

---
 cpp/test/spatial/fused_l2_knn.cu | 89 +++++++++++++++++---------------
 1 file changed, 47 insertions(+), 42 deletions(-)

diff --git a/cpp/test/spatial/fused_l2_knn.cu b/cpp/test/spatial/fused_l2_knn.cu
index e48a3c6657..441e72d64b 100644
--- a/cpp/test/spatial/fused_l2_knn.cu
+++ b/cpp/test/spatial/fused_l2_knn.cu
@@ -99,65 +99,69 @@ testing::AssertionResult devArrMatchKnnPair(const T* expected_idx,
 
 template <typename T>
 class FusedL2KNNTest : public ::testing::TestWithParam<FusedL2KNNInputs> {
+ public:
+  FusedL2KNNTest()
+    : stream_(handle_.get_stream()),
+      params_(::testing::TestWithParam<FusedL2KNNInputs>::GetParam()),
+      database(params_.num_db_vecs * params_.dim, stream_),
+      search_queries(params_.num_queries * params_.dim, stream_),
+      raft_indices_(params_.num_queries * params_.k, stream_),
+      raft_distances_(params_.num_queries * params_.k, stream_),
+      faiss_indices_(params_.num_queries * params_.k, stream_),
+      faiss_distances_(params_.num_queries * params_.k, stream_)
+  {
+    CUDA_CHECK(cudaMemsetAsync(database.data(), 0, database.size() * sizeof(T), stream_));
+    CUDA_CHECK(
+      cudaMemsetAsync(search_queries.data(), 0, search_queries.size() * sizeof(T), stream_));
+    CUDA_CHECK(cudaMemsetAsync(raft_indices_.data(), 0, raft_indices_.size() * sizeof(T), stream_));
+    CUDA_CHECK(cudaMemsetAsync(
+      raft_distances_.data(), 0, raft_distances_.size() * sizeof(int64_t), stream_));
+    CUDA_CHECK(
+      cudaMemsetAsync(faiss_indices_.data(), 0, faiss_indices_.size() * sizeof(int64_t), stream_));
+    CUDA_CHECK(
+      cudaMemsetAsync(faiss_distances_.data(), 0, faiss_distances_.size() * sizeof(T), stream_));
+  }
+
  protected:
   void testBruteForce()
   {
-    cudaStream_t stream = handle_.get_stream();
-
     launchFaissBfknn();
     detail::fusedL2Knn(dim,
-                       raft_indices_,
-                       raft_distances_,
-                       database,
-                       search_queries,
+                       raft_indices_.data(),
+                       raft_distances_.data(),
+                       database.data(),
+                       search_queries.data(),
                        num_db_vecs,
                        num_queries,
                        k_,
                        true,
                        true,
-                       stream,
+                       stream_,
                        metric);
 
     // verify.
-    devArrMatchKnnPair(faiss_indices_,
-                       raft_indices_,
-                       faiss_distances_,
-                       raft_distances_,
+    devArrMatchKnnPair(faiss_indices_.data(),
+                       raft_indices_.data(),
+                       faiss_distances_.data(),
+                       raft_distances_.data(),
                        num_queries,
                        k_,
                        float(0.001),
-                       stream);
+                       stream_);
   }
 
   void SetUp() override
   {
-    params_     = ::testing::TestWithParam<FusedL2KNNInputs>::GetParam();
     num_queries = params_.num_queries;
     num_db_vecs = params_.num_db_vecs;
     dim         = params_.dim;
     k_          = params_.k;
     metric      = params_.metric_;
 
-    cudaStream_t stream = handle_.get_stream();
-
-    raft::allocate(database, num_db_vecs * dim, stream, true);
-    raft::allocate(search_queries, num_queries * dim, stream, true);
-
     unsigned long long int seed = 1234ULL;
     raft::random::Rng r(seed);
-    r.uniform(database, num_db_vecs * dim, T(-1.0), T(1.0), stream);
-    r.uniform(search_queries, num_queries * dim, T(-1.0), T(1.0), stream);
-
-    raft::allocate(raft_indices_, num_queries * k_, stream, true);
-    raft::allocate(raft_distances_, num_queries * k_, stream, true);
-    raft::allocate(faiss_indices_, num_queries * k_, stream, true);
-    raft::allocate(faiss_distances_, num_queries * k_, stream, true);
-  }
-
-  void TearDown() override
-  {
-    cudaStream_t stream = handle_.get_stream();
-    raft::deallocate_all(stream);
+    r.uniform(database.data(), num_db_vecs * dim, T(-1.0), T(1.0), stream_);
+    r.uniform(search_queries.data(), num_queries * dim, T(-1.0), T(1.0), stream_);
   }
 
   void launchFaissBfknn()
@@ -169,37 +173,38 @@ class FusedL2KNNTest : public ::testing::TestWithParam<FusedL2KNNInputs> {
     gpu_res.noTempMemory();
     int device;
     CUDA_CHECK(cudaGetDevice(&device));
-    gpu_res.setDefaultStream(device, handle_.get_stream());
+    gpu_res.setDefaultStream(device, stream_);
 
     faiss::gpu::GpuDistanceParams args;
     args.metric          = m;
     args.metricArg       = 0;
     args.k               = k_;
     args.dims            = dim;
-    args.vectors         = database;
+    args.vectors         = database.data();
     args.vectorsRowMajor = true;
     args.numVectors      = num_db_vecs;
-    args.queries         = search_queries;
+    args.queries         = search_queries.data();
     args.queriesRowMajor = true;
     args.numQueries      = num_queries;
-    args.outDistances    = faiss_distances_;
-    args.outIndices      = faiss_indices_;
+    args.outDistances    = faiss_distances_.data();
+    args.outIndices      = faiss_indices_.data();
 
     bfKnn(&gpu_res, args);
   }
 
  private:
   raft::handle_t handle_;
+  cudaStream_t stream_ = 0;
   FusedL2KNNInputs params_;
   int num_queries;
   int num_db_vecs;
   int dim;
-  T* database;
-  T* search_queries;
-  int64_t* raft_indices_;
-  T* raft_distances_;
-  int64_t* faiss_indices_;
-  T* faiss_distances_;
+  rmm::device_uvector<T> database;
+  rmm::device_uvector<T> search_queries;
+  rmm::device_uvector<int64_t> raft_indices_;
+  rmm::device_uvector<T> raft_distances_;
+  rmm::device_uvector<int64_t> faiss_indices_;
+  rmm::device_uvector<T> faiss_distances_;
   int k_;
   raft::distance::DistanceType metric;
 };

From 3d9fc94d8b107e4f6e84bfe16eb916601bd9ddc5 Mon Sep 17 00:00:00 2001
From: viclafargue <viclafargue@nvidia.com>
Date: Tue, 25 Jan 2022 18:29:05 +0100
Subject: [PATCH 3/6] Completing RAFT memory management removal

---
 cpp/include/raft/comms/helper.hpp             |   1 -
 cpp/include/raft/lap/lap_kernels.cuh          |   1 -
 cpp/include/raft/mr/buffer_base.hpp           | 211 ------------------
 cpp/include/raft/mr/device/allocator.hpp      |  57 -----
 cpp/include/raft/mr/device/buffer.hpp         |  70 ------
 cpp/include/raft/mr/host/allocator.hpp        |  61 -----
 cpp/include/raft/mr/host/buffer.hpp           |  85 -------
 .../raft/sparse/distance/detail/coo_spmv.cuh  |   1 -
 .../raft/sparse/distance/detail/utils.cuh     |   3 -
 cpp/include/raft/sparse/distance/distance.hpp |   1 -
 .../sparse/hierarchy/detail/agglomerative.cuh |   1 -
 .../hierarchy/detail/connectivities.cuh       |   1 -
 .../raft/sparse/hierarchy/detail/mst.cuh      |   1 -
 cpp/include/raft/sparse/op/detail/reduce.cuh  |   1 -
 .../selection/detail/connect_components.cuh   |   1 -
 .../raft/sparse/selection/detail/knn.cuh      |   1 -
 cpp/include/raft/spatial/knn/ann.hpp          |   2 -
 cpp/include/raft/spatial/knn/knn.hpp          |   2 -
 cpp/test/CMakeLists.txt                       |   2 -
 cpp/test/mr/device/buffer.cpp                 |  92 --------
 cpp/test/mr/host/buffer.cpp                   |  71 ------
 python/raft/common/handle.pxd                 |   2 -
 python/raft/common/handle.pyx                 |   7 +-
 23 files changed, 5 insertions(+), 670 deletions(-)
 delete mode 100644 cpp/include/raft/mr/buffer_base.hpp
 delete mode 100644 cpp/include/raft/mr/device/allocator.hpp
 delete mode 100644 cpp/include/raft/mr/device/buffer.hpp
 delete mode 100644 cpp/include/raft/mr/host/allocator.hpp
 delete mode 100644 cpp/include/raft/mr/host/buffer.hpp
 delete mode 100644 cpp/test/mr/device/buffer.cpp
 delete mode 100644 cpp/test/mr/host/buffer.cpp

diff --git a/cpp/include/raft/comms/helper.hpp b/cpp/include/raft/comms/helper.hpp
index 09a767bea7..d83e8f4d4f 100644
--- a/cpp/include/raft/comms/helper.hpp
+++ b/cpp/include/raft/comms/helper.hpp
@@ -18,7 +18,6 @@
 
 #include <raft/comms/std_comms.hpp>
 #include <raft/handle.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <iostream>
 #include <nccl.h>
diff --git a/cpp/include/raft/lap/lap_kernels.cuh b/cpp/include/raft/lap/lap_kernels.cuh
index 328cbf3e74..b61d0bd269 100644
--- a/cpp/include/raft/lap/lap_kernels.cuh
+++ b/cpp/include/raft/lap/lap_kernels.cuh
@@ -28,7 +28,6 @@
 
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <thrust/for_each.h>
 
diff --git a/cpp/include/raft/mr/buffer_base.hpp b/cpp/include/raft/mr/buffer_base.hpp
deleted file mode 100644
index 6998c1f186..0000000000
--- a/cpp/include/raft/mr/buffer_base.hpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/cudart_utils.h>
-
-#include <cuda_runtime.h>
-
-#include <cstddef>
-#include <memory>
-#include <utility>
-
-namespace raft {
-namespace mr {
-
-/**
- * @brief Base for all RAII-based owning of temporary memory allocations. This
- *        class should ideally not be used by users directly, but instead via
- *        the child classes `device_buffer` and `host_buffer`.
- *
- * @tparam T          data type
- * @tparam AllocatorT The underly allocator object
- */
-template <typename T, typename AllocatorT>
-class buffer_base {
- public:
-  using size_type       = std::size_t;
-  using value_type      = T;
-  using iterator        = value_type*;
-  using const_iterator  = const value_type*;
-  using reference       = T&;
-  using const_reference = const T&;
-
-  buffer_base() = delete;
-
-  buffer_base(const buffer_base& other) = delete;
-
-  buffer_base& operator=(const buffer_base& other) = delete;
-
-  /**
-   * @brief Main ctor
-   *
-   * @param[in] allocator asynchronous allocator used for managing buffer life
-   * @param[in] stream    cuda stream where this allocation operations are async
-   * @param[in] n         size of the buffer (in number of elements)
-   */
-  buffer_base(std::shared_ptr<AllocatorT> allocator, cudaStream_t stream, size_type n = 0)
-    : data_(nullptr), size_(n), capacity_(n), stream_(stream), allocator_(std::move(allocator))
-  {
-    if (capacity_ > 0) {
-      data_ =
-        static_cast<value_type*>(allocator_->allocate(capacity_ * sizeof(value_type), stream_));
-      RAFT_CUDA_TRY(cudaStreamSynchronize(stream_));
-    }
-  }
-
-  ~buffer_base() { release(); }
-
-  value_type* data() { return data_; }
-
-  const value_type* data() const { return data_; }
-
-  size_type size() const { return size_; }
-
-  void clear() { size_ = 0; }
-
-  iterator begin() { return data_; }
-
-  const_iterator begin() const { return data_; }
-
-  iterator end() { return data_ + size_; }
-
-  const_iterator end() const { return data_ + size_; }
-
-  /**
-   * @brief Reserve new memory size for this buffer.
-   *
-   * It re-allocates a fresh buffer if the new requested capacity is more than
-   * the current one, copies the old buffer contents to this new buffer and
-   * removes the old one.
-   *
-   * @param[in] new_capacity new capacity (in number of elements)
-   * @{
-   */
-  void reserve(size_type new_capacity)
-  {
-    if (new_capacity > capacity_) {
-      auto* new_data =
-        static_cast<value_type*>(allocator_->allocate(new_capacity * sizeof(value_type), stream_));
-      if (size_ > 0) { raft::copy(new_data, data_, size_, stream_); }
-      // Only deallocate if we have allocated a pointer
-      if (nullptr != data_) {
-        allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
-      }
-      data_     = new_data;
-      capacity_ = new_capacity;
-    }
-  }
-
-  void reserve(size_type new_capacity, cudaStream_t stream)
-  {
-    set_stream(stream);
-    reserve(new_capacity);
-  }
-  /** @} */
-
-  /**
-   * @brief Resize the underlying buffer (uses `reserve` method internally)
-   *
-   * @param[in] new_size new buffer size
-   * @{
-   */
-  void resize(const size_type new_size)
-  {
-    reserve(new_size);
-    size_ = new_size;
-  }
-
-  void resize(const size_type new_size, cudaStream_t stream)
-  {
-    set_stream(stream);
-    resize(new_size);
-  }
-  /** @} */
-
-  /**
-   * @brief Deletes the underlying buffer
-   *
-   * If this method is not explicitly called, it will be during the destructor
-   * @{
-   */
-  void release()
-  {
-    if (nullptr != data_) {
-      allocator_->deallocate(data_, capacity_ * sizeof(value_type), stream_);
-    }
-    data_     = nullptr;
-    capacity_ = 0;
-    size_     = 0;
-  }
-
-  void release(cudaStream_t stream)
-  {
-    set_stream(stream);
-    release();
-  }
-  /** @} */
-
-  /**
-   * @brief returns the underlying allocator used
-   *
-   * @return the allocator pointer
-   */
-  std::shared_ptr<AllocatorT> get_allocator() const { return allocator_; }
-
-  /**
-   * @brief returns the underlying stream used
-   *
-   * @return the cuda stream
-   */
-  cudaStream_t get_stream() const { return stream_; }
-
- protected:
-  value_type* data_;
-
- private:
-  size_type size_;
-  size_type capacity_;
-  cudaStream_t stream_;
-  std::shared_ptr<AllocatorT> allocator_;
-
-  /**
-   * @brief Sets a new cuda stream where the future operations will be queued
-   *
-   * This method makes sure that the inter-stream dependencies are met and taken
-   * care of, before setting the input stream as a new stream for this buffer.
-   * Ideally, the same cuda stream passed during constructor is expected to be
-   * used throughout this buffer's lifetime, for performance.
-   *
-   * @param[in] stream new cuda stream to be set. If it is the same as the
-   *                   current one, then this method will be a no-op.
-   */
-  void set_stream(cudaStream_t stream)
-  {
-    if (stream_ != stream) {
-      cudaEvent_t event;
-      RAFT_CUDA_TRY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
-      RAFT_CUDA_TRY(cudaEventRecord(event, stream_));
-      RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, event, 0));
-      stream_ = stream;
-      RAFT_CUDA_TRY(cudaEventDestroy(event));
-    }
-  }
-};  // class buffer_base
-
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/device/allocator.hpp b/cpp/include/raft/mr/device/allocator.hpp
deleted file mode 100644
index 8d306a199f..0000000000
--- a/cpp/include/raft/mr/device/allocator.hpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/mr/allocator.hpp>
-
-#include <rmm/mr/device/per_device_resource.hpp>
-
-#include <cstddef>
-
-namespace raft {
-namespace mr {
-namespace device {
-
-/**
- * @brief An explicit interface for an asynchronous device allocator.
- *
- * This is mostly done in order to reduce work needed in cuML codebase.
- * An implementation of this interface can make the following assumptions,
- * further to the ones listed in `Allocator`:
- * - Allocations may be always on the device that was specified on construction.
- */
-class allocator : public base_allocator {
-};
-
-/** Default device allocator based on the one provided by RMM */
-class default_allocator : public allocator {
- public:
-  void* allocate(std::size_t n, cudaStream_t stream) override
-  {
-    void* ptr = rmm::mr::get_current_device_resource()->allocate(n, stream);
-    return ptr;
-  }
-
-  void deallocate(void* p, std::size_t n, cudaStream_t stream) override
-  {
-    rmm::mr::get_current_device_resource()->deallocate(p, n, stream);
-  }
-};  // class default_allocator
-
-};  // namespace device
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/device/buffer.hpp b/cpp/include/raft/mr/device/buffer.hpp
deleted file mode 100644
index 9b5ff11c50..0000000000
--- a/cpp/include/raft/mr/device/buffer.hpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "allocator.hpp"
-#include <memory>
-#include <raft/mr/buffer_base.hpp>
-
-namespace raft {
-namespace mr {
-namespace device {
-
-/**
- * @brief RAII object owning a contiguous typed device buffer. The passed in
- *        allocator supports asynchronous allocation and deallocation so this
- *        can also be used for temporary memory
- *
- * @code{.cpp}
- * template<typename T>
- * void foo(..., cudaStream_t stream) {
- *   ...
- *   raft::mr::device::buffer<T> temp(stream, 0);
- *   ...
- *   temp.resize(n);
- *   kernelA<<<grid,block,0,stream>>>(...,temp.data(),...);
- *   kernelB<<<grid,block,0,stream>>>(...,temp.data(),...);
- *   temp.release();
- *   ...
- * }
- * @endcode
- */
-template <typename T>
-class buffer : public buffer_base<T, allocator> {
- public:
-  using size_type       = typename buffer_base<T, allocator>::size_type;
-  using value_type      = typename buffer_base<T, allocator>::value_type;
-  using iterator        = typename buffer_base<T, allocator>::iterator;
-  using const_iterator  = typename buffer_base<T, allocator>::const_iterator;
-  using reference       = typename buffer_base<T, allocator>::reference;
-  using const_reference = typename buffer_base<T, allocator>::const_reference;
-
-  buffer() = delete;
-
-  buffer(const buffer& other) = delete;
-
-  buffer& operator=(const buffer& other) = delete;
-
-  buffer(std::shared_ptr<allocator> alloc, cudaStream_t stream, size_type n = 0)
-    : buffer_base<T, device::allocator>(alloc, stream, n)
-  {
-  }
-};  // class buffer
-
-};  // namespace device
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/host/allocator.hpp b/cpp/include/raft/mr/host/allocator.hpp
deleted file mode 100644
index 71b5465451..0000000000
--- a/cpp/include/raft/mr/host/allocator.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <raft/cudart_utils.h>
-#include <raft/mr/allocator.hpp>
-
-#include <cuda_runtime.h>
-
-#include <cstddef>
-
-namespace raft {
-namespace mr {
-namespace host {
-
-/**
- * @brief An explicit interface for an asynchronous host allocations.
- *
- * This is mostly done in order to reduce work needed in cuML codebase.
- * An implementation of this interface can make the following assumptions,
- * further to the ones listed in `Allocator`:
- * - Allocations don't need to be zero copy accessible form a device.
- */
-class allocator : public base_allocator {
-};
-
-/** Default cudaMallocHost/cudaFreeHost based host allocator */
-class default_allocator : public allocator {
- public:
-  void* allocate(std::size_t n, cudaStream_t stream) override
-  {
-    void* ptr = nullptr;
-    RAFT_CUDA_TRY(cudaMallocHost(&ptr, n));
-    return ptr;
-  }
-
-  void deallocate(void* p, std::size_t n, cudaStream_t stream) override
-  {
-    // Must call _NO_THROW here since this is called frequently from object
-    // destructors which are "nothrow" by default
-    RAFT_CUDA_TRY_NO_THROW(cudaFreeHost(p));
-  }
-};  // class default_allocator
-
-};  // namespace host
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/mr/host/buffer.hpp b/cpp/include/raft/mr/host/buffer.hpp
deleted file mode 100644
index 204b384719..0000000000
--- a/cpp/include/raft/mr/host/buffer.hpp
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "allocator.hpp"
-#include <memory>
-#include <raft/mr/buffer_base.hpp>
-#include <raft/mr/device/buffer.hpp>
-
-namespace raft {
-namespace mr {
-namespace host {
-
-/**
- * @brief RAII object owning a contigous typed host buffer (aka pinned memory).
- *        The passed in allocator supports asynchronus allocation and
- *        deallocation so this can also be used for temporary memory
- *
- * @code{.cpp}
- * template<typename T>
- * void foo(const T* in_d , T* out_d, ..., cudaStream_t stream) {
- *   ...
- *   raft::mr::host::buffer<T> temp(stream, 0);
- *   ...
- *   temp.resize(n);
- *   raft::copy(temp.data(), in_d, temp.size());
- *   ...
- *   raft::copy(out_d, temp.data(), temp.size());
- *   temp.release(stream);
- *   ...
- * }
- * @endcode
- */
-template <typename T>
-class buffer : public buffer_base<T, allocator> {
- public:
-  using size_type       = typename buffer_base<T, allocator>::size_type;
-  using value_type      = typename buffer_base<T, allocator>::value_type;
-  using iterator        = typename buffer_base<T, allocator>::iterator;
-  using const_iterator  = typename buffer_base<T, allocator>::const_iterator;
-  using reference       = typename buffer_base<T, allocator>::reference;
-  using const_reference = typename buffer_base<T, allocator>::const_reference;
-
-  buffer() = delete;
-
-  buffer(const buffer& other) = delete;
-
-  buffer& operator=(const buffer& other) = delete;
-
-  buffer(std::shared_ptr<allocator> alloc, const device::buffer<T>& other)
-    : buffer_base<T, allocator>(alloc, other.get_stream(), other.size())
-  {
-    if (other.size() > 0) { raft::copy(data_, other.data(), other.size(), other.get_stream()); }
-  }
-
-  buffer(std::shared_ptr<allocator> alloc, cudaStream_t stream, size_type n = 0)
-    : buffer_base<T, allocator>(alloc, stream, n)
-  {
-  }
-
-  reference operator[](size_type pos) { return data_[pos]; }
-
-  const_reference operator[](size_type pos) const { return data_[pos]; }
-
- private:
-  using buffer_base<T, allocator>::data_;
-};
-
-};  // namespace host
-};  // namespace mr
-};  // namespace raft
diff --git a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
index c23a2b1537..cb9e67dd6b 100644
--- a/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
+++ b/cpp/include/raft/sparse/distance/detail/coo_spmv.cuh
@@ -21,7 +21,6 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/cusparse_wrappers.h>
 
 #include "../../csr.hpp"
diff --git a/cpp/include/raft/sparse/distance/detail/utils.cuh b/cpp/include/raft/sparse/distance/detail/utils.cuh
index 8c01b33c1e..6af7867f63 100644
--- a/cpp/include/raft/sparse/distance/detail/utils.cuh
+++ b/cpp/include/raft/sparse/distance/detail/utils.cuh
@@ -16,9 +16,6 @@
 
 #pragma once
 
-#include <raft/mr/device/allocator.hpp>
-#include <raft/mr/device/buffer.hpp>
-
 #include <cub/cub.cuh>
 
 namespace raft {
diff --git a/cpp/include/raft/sparse/distance/distance.hpp b/cpp/include/raft/sparse/distance/distance.hpp
index 2f121dce33..aee53ac37a 100644
--- a/cpp/include/raft/sparse/distance/distance.hpp
+++ b/cpp/include/raft/sparse/distance/distance.hpp
@@ -21,7 +21,6 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/linalg/distance_type.h>
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/cusparse_wrappers.h>
 
 #include <raft/sparse/convert/coo.hpp>
diff --git a/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh b/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh
index 4e78494e6b..593702b4a7 100644
--- a/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh
+++ b/cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh
@@ -19,7 +19,6 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <rmm/device_uvector.hpp>
 
diff --git a/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh b/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh
index 5d4640f4a6..548ca00c93 100644
--- a/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh
+++ b/cpp/include/raft/sparse/hierarchy/detail/connectivities.cuh
@@ -24,7 +24,6 @@
 #include <rmm/device_uvector.hpp>
 
 #include <raft/linalg/distance_type.h>
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/convert/csr.hpp>
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/hierarchy/common.h>
diff --git a/cpp/include/raft/sparse/hierarchy/detail/mst.cuh b/cpp/include/raft/sparse/hierarchy/detail/mst.cuh
index 7173c76c08..10e9d04c0d 100644
--- a/cpp/include/raft/sparse/hierarchy/detail/mst.cuh
+++ b/cpp/include/raft/sparse/hierarchy/detail/mst.cuh
@@ -19,7 +19,6 @@
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
 
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/mst/mst.cuh>
 #include <raft/sparse/op/sort.hpp>
 #include <raft/sparse/selection/connect_components.hpp>
diff --git a/cpp/include/raft/sparse/op/detail/reduce.cuh b/cpp/include/raft/sparse/op/detail/reduce.cuh
index 074a139ba9..715bbe6deb 100644
--- a/cpp/include/raft/sparse/op/detail/reduce.cuh
+++ b/cpp/include/raft/sparse/op/detail/reduce.cuh
@@ -20,7 +20,6 @@
 
 #include <raft/cuda_utils.cuh>
 #include <raft/cudart_utils.h>
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/cusparse_wrappers.h>
 
 #include <raft/device_atomics.cuh>
diff --git a/cpp/include/raft/sparse/selection/detail/connect_components.cuh b/cpp/include/raft/sparse/selection/detail/connect_components.cuh
index 817b9782f2..a7364dec4c 100644
--- a/cpp/include/raft/sparse/selection/detail/connect_components.cuh
+++ b/cpp/include/raft/sparse/selection/detail/connect_components.cuh
@@ -19,7 +19,6 @@
 #include <raft/distance/fused_l2_nn.hpp>
 #include <raft/label/classlabels.cuh>
 #include <raft/linalg/norm.cuh>
-#include <raft/mr/device/buffer.hpp>
 #include <raft/sparse/convert/csr.hpp>
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/linalg/symmetrize.hpp>
diff --git a/cpp/include/raft/sparse/selection/detail/knn.cuh b/cpp/include/raft/sparse/selection/detail/knn.cuh
index de0a15c029..b8dc25053b 100644
--- a/cpp/include/raft/sparse/selection/detail/knn.cuh
+++ b/cpp/include/raft/sparse/selection/detail/knn.cuh
@@ -23,7 +23,6 @@
 #include <raft/linalg/distance_type.h>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/matrix/matrix.hpp>
-#include <raft/mr/device/buffer.hpp>
 
 #include <raft/sparse/coo.hpp>
 #include <raft/sparse/csr.hpp>
diff --git a/cpp/include/raft/spatial/knn/ann.hpp b/cpp/include/raft/spatial/knn/ann.hpp
index e8cc85256d..a63af32603 100644
--- a/cpp/include/raft/spatial/knn/ann.hpp
+++ b/cpp/include/raft/spatial/knn/ann.hpp
@@ -22,8 +22,6 @@
 #include <faiss/gpu/GpuIndex.h>
 #include <faiss/gpu/StandardGpuResources.h>
 
-#include <raft/mr/device/buffer.hpp>
-
 namespace raft {
 namespace spatial {
 namespace knn {
diff --git a/cpp/include/raft/spatial/knn/knn.hpp b/cpp/include/raft/spatial/knn/knn.hpp
index ecee562795..59df75ba36 100644
--- a/cpp/include/raft/spatial/knn/knn.hpp
+++ b/cpp/include/raft/spatial/knn/knn.hpp
@@ -19,8 +19,6 @@
 #include "detail/knn_brute_force_faiss.cuh"
 #include "detail/selection_faiss.cuh"
 
-#include <raft/mr/device/buffer.hpp>
-
 namespace raft {
 namespace spatial {
 namespace knn {
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index b37c671525..dac555c1d3 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -63,8 +63,6 @@ add_executable(test_raft
     test/linalg/unary_op.cu
     test/matrix/math.cu
     test/matrix/matrix.cu
-    test/mr/device/buffer.cpp
-    test/mr/host/buffer.cpp
     test/mst.cu
     test/random/rng.cu
     test/random/rng_int.cu
diff --git a/cpp/test/mr/device/buffer.cpp b/cpp/test/mr/device/buffer.cpp
deleted file mode 100644
index 4861a4ca1f..0000000000
--- a/cpp/test/mr/device/buffer.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <iostream>
-#include <memory>
-#include <raft/cudart_utils.h>
-#include <rmm/device_uvector.hpp>
-#include <rmm/mr/device/limiting_resource_adaptor.hpp>
-
-namespace raft {
-namespace mr {
-namespace device {
-
-TEST(Raft, DeviceBufferAlloc)
-{
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-  // no allocation at construction
-  rmm::device_uvector<char> buff(0, stream);
-  ASSERT_EQ(0, buff.size());
-  // explicit allocation after construction
-  buff.resize(20, stream);
-  ASSERT_EQ(20, buff.size());
-  // resizing to a smaller buffer size
-  buff.resize(10, stream);
-  ASSERT_EQ(10, buff.size());
-  // explicit deallocation
-  buff.release();
-  ASSERT_EQ(0, buff.size());
-  // use these methods without the explicit stream parameter
-  buff.resize(20, stream);
-  ASSERT_EQ(20, buff.size());
-  buff.resize(10, stream);
-  ASSERT_EQ(10, buff.size());
-  buff.release();
-  ASSERT_EQ(0, buff.size());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
-}
-
-TEST(Raft, DeviceBufferZeroResize)
-{
-  // Create a limiting_resource_adaptor to track allocations
-  auto curr_mr =
-    dynamic_cast<rmm::mr::cuda_memory_resource*>(rmm::mr::get_current_device_resource());
-  auto limit_mr =
-    std::make_shared<rmm::mr::limiting_resource_adaptor<rmm::mr::cuda_memory_resource>>(curr_mr,
-                                                                                        1000);
-
-  rmm::mr::set_current_device_resource(limit_mr.get());
-
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-  // no allocation at construction
-  rmm::device_uvector<char> buff(10, stream);
-  ASSERT_EQ(10, buff.size());
-  // explicit allocation after construction
-  buff.resize(0, stream);
-  ASSERT_EQ(0, buff.size());
-  // resizing to a smaller buffer size
-  buff.resize(20, stream);
-  ASSERT_EQ(20, buff.size());
-  // explicit deallocation
-  buff.release();
-  ASSERT_EQ(0, buff.size());
-
-  // Now check that there is no memory left. (Used to not be true)
-  ASSERT_EQ(0, limit_mr->get_allocated_bytes());
-
-  rmm::mr::set_current_device_resource(curr_mr);
-
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
-}
-
-}  // namespace device
-}  // namespace mr
-}  // namespace raft
diff --git a/cpp/test/mr/host/buffer.cpp b/cpp/test/mr/host/buffer.cpp
deleted file mode 100644
index d645ffa0e0..0000000000
--- a/cpp/test/mr/host/buffer.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <gtest/gtest.h>
-#include <iostream>
-#include <memory>
-#include <raft/mr/device/buffer.hpp>
-#include <raft/mr/host/buffer.hpp>
-
-namespace raft {
-namespace mr {
-namespace host {
-
-TEST(Raft, HostBuffer)
-{
-  auto alloc = std::make_shared<default_allocator>();
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-  // no allocation at construction
-  buffer<char> buff(alloc, stream);
-  ASSERT_EQ(0, buff.size());
-  // explicit allocation after construction
-  buff.resize(20, stream);
-  ASSERT_EQ(20, buff.size());
-  // resizing to a smaller buffer size
-  buff.resize(10, stream);
-  ASSERT_EQ(10, buff.size());
-  // explicit deallocation
-  buff.release(stream);
-  ASSERT_EQ(0, buff.size());
-  // use these methods without the explicit stream parameter
-  buff.resize(20);
-  ASSERT_EQ(20, buff.size());
-  buff.resize(10);
-  ASSERT_EQ(10, buff.size());
-  buff.release();
-  ASSERT_EQ(0, buff.size());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
-}
-
-TEST(Raft, DeviceToHostBuffer)
-{
-  auto d_alloc = std::make_shared<device::default_allocator>();
-  auto h_alloc = std::make_shared<default_allocator>();
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-  device::buffer<char> d_buff(d_alloc, stream, 32);
-  RAFT_CUDA_TRY(cudaMemsetAsync(d_buff.data(), 0, sizeof(char) * d_buff.size(), stream));
-  buffer<char> h_buff(h_alloc, d_buff);
-  ASSERT_EQ(d_buff.size(), h_buff.size());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
-}
-
-}  // namespace host
-}  // namespace mr
-}  // namespace raft
diff --git a/python/raft/common/handle.pxd b/python/raft/common/handle.pxd
index 077b6d62b6..dc38ac47ec 100644
--- a/python/raft/common/handle.pxd
+++ b/python/raft/common/handle.pxd
@@ -33,8 +33,6 @@ cdef extern from "raft/handle.hpp" namespace "raft" nogil:
         handle_t(cuda_stream_view stream_view) except +
         handle_t(cuda_stream_view stream_view,
                  shared_ptr[cuda_stream_pool] stream_pool) except +
-        void set_device_allocator(shared_ptr[allocator] a) except +
-        shared_ptr[allocator] get_device_allocator() except +
         cuda_stream_view get_stream() except +
         void sync_stream() except +
 
diff --git a/python/raft/common/handle.pyx b/python/raft/common/handle.pyx
index 1accf9e679..b4a681931a 100644
--- a/python/raft/common/handle.pyx
+++ b/python/raft/common/handle.pyx
@@ -20,13 +20,16 @@
 # cython: language_level = 3
 
 # import raft
-from libcpp.memory cimport shared_ptr
 from rmm._lib.cuda_stream_view cimport cuda_stream_per_thread
 from rmm._lib.cuda_stream_view cimport cuda_stream_view
 
 from .cuda cimport _Stream, _Error, cudaStreamSynchronize
 from .cuda import CudaRuntimeError
 
+cdef extern from * nogil:
+    ctypedef void* cudaStream_t "cudaStream_t"
+
+
 cdef class Handle:
     """
     Handle is a lightweight python wrapper around the corresponding C++ class
@@ -64,7 +67,7 @@ cdef class Handle:
                                           self.stream_pool))
         else:
             # this constructor constructs a handle on user stream
-            c_stream = cuda_stream_view(<_Stream><size_t> stream.getStream())
+            c_stream = cuda_stream_view(<cudaStream_t><size_t>stream.getStream())
             self.c_obj.reset(new handle_t(c_stream,
                                           self.stream_pool))
 

From 41d1326b3c65da6e8d42eeb9803a56c85476e0f7 Mon Sep 17 00:00:00 2001
From: viclafargue <viclafargue@nvidia.com>
Date: Mon, 14 Feb 2022 16:49:25 +0100
Subject: [PATCH 4/6] Use of RAFT_CUDA_TRY

---
 cpp/include/raft/random/detail/rng_impl.cuh |  2 +-
 cpp/test/linalg/rsvd.cu                     | 10 +++++-----
 cpp/test/span.cu                            |  8 ++++----
 cpp/test/spatial/fused_l2_knn.cu            | 15 ++++++++-------
 4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/cpp/include/raft/random/detail/rng_impl.cuh b/cpp/include/raft/random/detail/rng_impl.cuh
index 2406456404..b77e84284a 100644
--- a/cpp/include/raft/random/detail/rng_impl.cuh
+++ b/cpp/include/raft/random/detail/rng_impl.cuh
@@ -846,7 +846,7 @@ class RngImpl {
     rmm::device_uvector<char> workspace(0, stream);
     sortPairs(workspace, expWts.data(), sortedWts.data(), inIdxPtr, outIdxPtr, (int)len, stream);
     if (outIdx != nullptr) {
-      CUDA_CHECK(cudaMemcpyAsync(
+      RAFT_CUDA_TRY(cudaMemcpyAsync(
         outIdx, outIdxPtr, sizeof(IdxT) * sampledLen, cudaMemcpyDeviceToDevice, stream));
     }
     scatter<DataT, IdxT>(out, in, outIdxPtr, sampledLen, stream);
diff --git a/cpp/test/linalg/rsvd.cu b/cpp/test/linalg/rsvd.cu
index da38464bf7..8aef3e77a4 100644
--- a/cpp/test/linalg/rsvd.cu
+++ b/cpp/test/linalg/rsvd.cu
@@ -100,11 +100,11 @@ class RsvdTest : public ::testing::TestWithParam<RsvdInputs<T>> {
       int len_redundant = m * n_redundant;
 
       r.normal(A.data(), len_informative, mu, sigma, stream);
-      CUDA_CHECK(cudaMemcpyAsync(A.data() + len_informative,
-                                 A.data(),
-                                 len_redundant * sizeof(T),
-                                 cudaMemcpyDeviceToDevice,
-                                 stream));
+      RAFT_CUDA_TRY(cudaMemcpyAsync(A.data() + len_informative,
+                                    A.data(),
+                                    len_redundant * sizeof(T),
+                                    cudaMemcpyDeviceToDevice,
+                                    stream));
     }
     std::vector<T> A_backup_cpu(m *
                                 n);  // Backup A matrix as svdJacobi will destroy the content of A
diff --git a/cpp/test/span.cu b/cpp/test/span.cu
index e121cea108..abecfc677c 100644
--- a/cpp/test/span.cu
+++ b/cpp/test/span.cu
@@ -30,16 +30,16 @@ struct TestStatus {
  public:
   TestStatus()
   {
-    CUDA_CHECK(cudaMalloc(&status_, sizeof(int)));
+    RAFT_CUDA_TRY(cudaMalloc(&status_, sizeof(int)));
     int h_status = 1;
-    CUDA_CHECK(cudaMemcpy(status_, &h_status, sizeof(int), cudaMemcpyHostToDevice));
+    RAFT_CUDA_TRY(cudaMemcpy(status_, &h_status, sizeof(int), cudaMemcpyHostToDevice));
   }
-  ~TestStatus() noexcept(false) { CUDA_CHECK(cudaFree(status_)); }
+  ~TestStatus() noexcept(false) { RAFT_CUDA_TRY(cudaFree(status_)); }
 
   int Get()
   {
     int h_status;
-    CUDA_CHECK(cudaMemcpy(&h_status, status_, sizeof(int), cudaMemcpyDeviceToHost));
+    RAFT_CUDA_TRY(cudaMemcpy(&h_status, status_, sizeof(int), cudaMemcpyDeviceToHost));
     return h_status;
   }
 
diff --git a/cpp/test/spatial/fused_l2_knn.cu b/cpp/test/spatial/fused_l2_knn.cu
index 57d7126fba..db84c9d47b 100644
--- a/cpp/test/spatial/fused_l2_knn.cu
+++ b/cpp/test/spatial/fused_l2_knn.cu
@@ -110,15 +110,16 @@ class FusedL2KNNTest : public ::testing::TestWithParam<FusedL2KNNInputs> {
       faiss_indices_(params_.num_queries * params_.k, stream_),
       faiss_distances_(params_.num_queries * params_.k, stream_)
   {
-    CUDA_CHECK(cudaMemsetAsync(database.data(), 0, database.size() * sizeof(T), stream_));
-    CUDA_CHECK(
+    RAFT_CUDA_TRY(cudaMemsetAsync(database.data(), 0, database.size() * sizeof(T), stream_));
+    RAFT_CUDA_TRY(
       cudaMemsetAsync(search_queries.data(), 0, search_queries.size() * sizeof(T), stream_));
-    CUDA_CHECK(cudaMemsetAsync(raft_indices_.data(), 0, raft_indices_.size() * sizeof(T), stream_));
-    CUDA_CHECK(cudaMemsetAsync(
+    RAFT_CUDA_TRY(
+      cudaMemsetAsync(raft_indices_.data(), 0, raft_indices_.size() * sizeof(T), stream_));
+    RAFT_CUDA_TRY(cudaMemsetAsync(
       raft_distances_.data(), 0, raft_distances_.size() * sizeof(int64_t), stream_));
-    CUDA_CHECK(
+    RAFT_CUDA_TRY(
       cudaMemsetAsync(faiss_indices_.data(), 0, faiss_indices_.size() * sizeof(int64_t), stream_));
-    CUDA_CHECK(
+    RAFT_CUDA_TRY(
       cudaMemsetAsync(faiss_distances_.data(), 0, faiss_distances_.size() * sizeof(T), stream_));
   }
 
@@ -172,7 +173,7 @@ class FusedL2KNNTest : public ::testing::TestWithParam<FusedL2KNNInputs> {
 
     gpu_res.noTempMemory();
     int device;
-    CUDA_CHECK(cudaGetDevice(&device));
+    RAFT_CUDA_TRY(cudaGetDevice(&device));
     gpu_res.setDefaultStream(device, stream_);
 
     faiss::gpu::GpuDistanceParams args;

From 8ec45817ba14fa1eb1240b5c41ce8f37f3d8f9f8 Mon Sep 17 00:00:00 2001
From: viclafargue <viclafargue@nvidia.com>
Date: Thu, 24 Feb 2022 16:48:13 +0100
Subject: [PATCH 5/6] Update copyright header

---
 cpp/include/raft/cudart_utils.h                   | 2 +-
 cpp/include/raft/sparse/distance/detail/utils.cuh | 2 +-
 cpp/test/CMakeLists.txt                           | 2 +-
 cpp/test/spatial/fused_l2_knn.cu                  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/include/raft/cudart_utils.h b/cpp/include/raft/cudart_utils.h
index 3a063be77e..1940fcea51 100644
--- a/cpp/include/raft/cudart_utils.h
+++ b/cpp/include/raft/cudart_utils.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/include/raft/sparse/distance/detail/utils.cuh b/cpp/include/raft/sparse/distance/detail/utils.cuh
index 6af7867f63..ed2b414c70 100644
--- a/cpp/include/raft/sparse/distance/detail/utils.cuh
+++ b/cpp/include/raft/sparse/distance/detail/utils.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt
index 8b7c39669b..8b52ea6158 100644
--- a/cpp/test/CMakeLists.txt
+++ b/cpp/test/CMakeLists.txt
@@ -1,5 +1,5 @@
 #=============================================================================
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/cpp/test/spatial/fused_l2_knn.cu b/cpp/test/spatial/fused_l2_knn.cu
index db84c9d47b..1b6c8390ed 100644
--- a/cpp/test/spatial/fused_l2_knn.cu
+++ b/cpp/test/spatial/fused_l2_knn.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 0243630b13a375b06298aef620f01a72d1812b34 Mon Sep 17 00:00:00 2001
From: viclafargue <viclafargue@nvidia.com>
Date: Tue, 15 Mar 2022 19:35:48 +0100
Subject: [PATCH 6/6] Resolve remaining issues

---
 .../raft/random/detail/make_regression.cuh    |  1 -
 .../raft/stats/detail/homogeneity_score.cuh   |  1 -
 cpp/test/spatial/fused_l2_knn.cu              |  6 +--
 cpp/test/stats/information_criterion.cu       | 53 ++++++++++---------
 4 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/cpp/include/raft/random/detail/make_regression.cuh b/cpp/include/raft/random/detail/make_regression.cuh
index 8bab85e485..42c1319889 100644
--- a/cpp/include/raft/random/detail/make_regression.cuh
+++ b/cpp/include/raft/random/detail/make_regression.cuh
@@ -30,7 +30,6 @@
 #include <raft/linalg/qr.cuh>
 #include <raft/linalg/transpose.cuh>
 #include <raft/matrix/matrix.cuh>
-#include <raft/mr/device/buffer.hpp>
 #include <raft/random/permute.cuh>
 #include <raft/random/rng.cuh>
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/include/raft/stats/detail/homogeneity_score.cuh b/cpp/include/raft/stats/detail/homogeneity_score.cuh
index 4c78553258..e781b58875 100644
--- a/cpp/include/raft/stats/detail/homogeneity_score.cuh
+++ b/cpp/include/raft/stats/detail/homogeneity_score.cuh
@@ -22,7 +22,6 @@
 
 #pragma once
 
-#include <raft/mr/device/allocator.hpp>
 #include <raft/stats/entropy.cuh>
 #include <raft/stats/mutual_info_score.cuh>
 
diff --git a/cpp/test/spatial/fused_l2_knn.cu b/cpp/test/spatial/fused_l2_knn.cu
index 3be4b47256..2ec4e86d1f 100644
--- a/cpp/test/spatial/fused_l2_knn.cu
+++ b/cpp/test/spatial/fused_l2_knn.cu
@@ -114,9 +114,9 @@ class FusedL2KNNTest : public ::testing::TestWithParam<FusedL2KNNInputs> {
     RAFT_CUDA_TRY(
       cudaMemsetAsync(search_queries.data(), 0, search_queries.size() * sizeof(T), stream_));
     RAFT_CUDA_TRY(
-      cudaMemsetAsync(raft_indices_.data(), 0, raft_indices_.size() * sizeof(T), stream_));
-    RAFT_CUDA_TRY(cudaMemsetAsync(
-      raft_distances_.data(), 0, raft_distances_.size() * sizeof(int64_t), stream_));
+      cudaMemsetAsync(raft_indices_.data(), 0, raft_indices_.size() * sizeof(int64_t), stream_));
+    RAFT_CUDA_TRY(
+      cudaMemsetAsync(raft_distances_.data(), 0, raft_distances_.size() * sizeof(T), stream_));
     RAFT_CUDA_TRY(
       cudaMemsetAsync(faiss_indices_.data(), 0, faiss_indices_.size() * sizeof(int64_t), stream_));
     RAFT_CUDA_TRY(
diff --git a/cpp/test/stats/information_criterion.cu b/cpp/test/stats/information_criterion.cu
index 802e3fee23..d61f8591a5 100644
--- a/cpp/test/stats/information_criterion.cu
+++ b/cpp/test/stats/information_criterion.cu
@@ -19,7 +19,8 @@
 #include <raft/stats/information_criterion.cuh>
 
 #include <raft/cudart_utils.h>
-#include <raft/mr/device/allocator.hpp>
+#include <raft/handle.hpp>
+#include <rmm/device_uvector.hpp>
 
 #include <gtest/gtest.h>
 
@@ -59,21 +60,23 @@ struct BatchedICInputs {
 
 template <typename T>
 class BatchedICTest : public ::testing::TestWithParam<BatchedICInputs<T>> {
+ public:
+  BatchedICTest()
+    : params(::testing::TestWithParam<BatchedICInputs<T>>::GetParam()),
+      stream(handle.get_stream()),
+      res_d(sizeof(T) * params.batch_size, stream)
+  {
+  }
+
  protected:
   void SetUp() override
   {
     using std::vector;
-    params = ::testing::TestWithParam<BatchedICInputs<T>>::GetParam();
-
-    // Create stream and allocator
-    RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-    allocator = std::make_shared<raft::mr::device::default_allocator>();
 
     // Create arrays
     std::vector<T> loglike_h = std::vector<T>(params.batch_size);
     res_h.resize(params.batch_size);
-    T* loglike_d = (T*)allocator->allocate(sizeof(T) * params.batch_size, stream);
-    res_d        = (T*)allocator->allocate(sizeof(T) * params.batch_size, stream);
+    rmm::device_uvector<T> loglike_d(sizeof(T) * params.batch_size, stream);
 
     // Generate random data
     std::random_device rd;
@@ -83,11 +86,11 @@ class BatchedICTest : public ::testing::TestWithParam<BatchedICInputs<T>> {
       loglike_h[i] = std::log(udis(gen));
 
     // Copy the data to the device
-    raft::update_device(loglike_d, loglike_h.data(), params.batch_size, stream);
+    raft::update_device(loglike_d.data(), loglike_h.data(), params.batch_size, stream);
 
     // Compute the tested results
-    information_criterion_batched(res_d,
-                                  loglike_d,
+    information_criterion_batched(res_d.data(),
+                                  loglike_d.data(),
                                   params.ic_type,
                                   params.n_params,
                                   params.batch_size,
@@ -103,22 +106,14 @@ class BatchedICTest : public ::testing::TestWithParam<BatchedICInputs<T>> {
              params.n_samples);
 
     RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-
-    allocator->deallocate(loglike_d, sizeof(T) * params.batch_size, stream);
-  }
-
-  void TearDown() override
-  {
-    allocator->deallocate(res_d, sizeof(T) * params.batch_size, stream);
-    RAFT_CUDA_TRY(cudaStreamDestroy(stream));
   }
 
  protected:
-  std::shared_ptr<raft::mr::device::default_allocator> allocator;
+  raft::handle_t handle;
+  cudaStream_t stream = 0;
   BatchedICInputs<T> params;
-  T* res_d;
+  rmm::device_uvector<T> res_d;
   std::vector<T> res_h;
-  cudaStream_t stream = 0;
 };
 
 // Test parameters (op, n_batches, m, n, p, q, tolerance)
@@ -133,13 +128,19 @@ using BatchedICTestD = BatchedICTest<double>;
 using BatchedICTestF = BatchedICTest<float>;
 TEST_P(BatchedICTestD, Result)
 {
-  ASSERT_TRUE(devArrMatchHost(
-    res_h.data(), res_d, params.batch_size, raft::CompareApprox<double>(params.tolerance), stream));
+  ASSERT_TRUE(devArrMatchHost(res_h.data(),
+                              res_d.data(),
+                              params.batch_size,
+                              raft::CompareApprox<double>(params.tolerance),
+                              stream));
 }
 TEST_P(BatchedICTestF, Result)
 {
-  ASSERT_TRUE(devArrMatchHost(
-    res_h.data(), res_d, params.batch_size, raft::CompareApprox<float>(params.tolerance), stream));
+  ASSERT_TRUE(devArrMatchHost(res_h.data(),
+                              res_d.data(),
+                              params.batch_size,
+                              raft::CompareApprox<float>(params.tolerance),
+                              stream));
 }
 
 INSTANTIATE_TEST_CASE_P(BatchedICTests, BatchedICTestD, ::testing::ValuesIn(inputsd));