rapidsai · rapids-bot · Dec 14, 2023 · Dec 12, 2023 · Dec 13, 2023 · Dec 13, 2023
diff --git a/.gitignore b/.gitignore
@@ -22,6 +22,7 @@ rmm.egg-info/
 python/build
 python/*/build
 python/rmm/**/_lib/**/*.cpp
+!python/rmm/_lib/_torch_allocator.cpp
 python/rmm/**/_lib/**/*.h
 python/rmm/**/_lib/.nfs*
 python/rmm/_cuda/*.cpp

@@ -12,12 +12,17 @@
 # the License.
 # =============================================================================
 
-set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx
-                   torch_allocator.pyx)
+set(cython_sources device_buffer.pyx lib.pyx logger.pyx memory_resource.pyx cuda_stream.pyx)
 set(linked_libraries rmm::rmm)
 
 # Build all of the Cython targets
 rapids_cython_create_modules(SOURCE_FILES "${cython_sources}" LINKED_LIBRARIES "${linked_libraries}"
                                                                                CXX)
-# The cdef public functions in this file need to have a C ABI
-target_compile_definitions(torch_allocator PRIVATE CYTHON_EXTERN_C=extern\ "C")
+
+add_library(_torch_allocator SHARED _torch_allocator.cpp)
+# Want the output to be called _torch_allocator.so
+set_target_properties(_torch_allocator PROPERTIES PREFIX "" SUFFIX ".so")
+target_link_libraries(_torch_allocator PRIVATE rmm::rmm)
+cmake_path(RELATIVE_PATH CMAKE_CURRENT_SOURCE_DIR BASE_DIRECTORY "${PROJECT_SOURCE_DIR}"
+           OUTPUT_VARIABLE _torch_allocator_location)
+install(TARGETS _torch_allocator DESTINATION "${_torch_allocator_location}")
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cuda_runtime_api.h>
+
+#include <rmm/cuda_device.hpp>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+
+// These signatures must match those required by CUDAPluggableAllocator in
+// github.com/pytorch/pytorch/blob/main/torch/csrc/cuda/CUDAPluggableAllocator.h
+// Since the loading is done at runtime via dlopen, no error checking
+// can be performed for mismatching signatures.
+
+/**
+ * @brief Allocate memory of at least \p size bytes.
+ *
+ * @throws rmm::bad_alloc When the requested allocation cannot be satisfied.
+ *
+ * @param size The number of bytes to allocate
+ * @param device The device whose memory resource one should use
+ * @param stream CUDA stream to perform allocation on
+ * @return void* Pointer to the newly allocated memory
+ */
+extern "C" void* allocate(std::size_t size, int device, void* stream)
+{
+  rmm::cuda_device_id const device_id{device};
+  rmm::cuda_set_device_raii with_device{device_id};
+  auto mr = rmm::mr::get_per_device_resource(device_id);
+  return mr->allocate(size, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+}
+
+/**
+ * @brief Deallocate memory pointed to by \p ptr.
+ *
+ * @param ptr Pointer to be deallocated
+ * @param size The number of bytes in the allocation
+ * @param device The device whose memory resource one should use
+ * @param stream CUDA stream to perform deallocation on
+ */
+extern "C" void deallocate(void* ptr, std::size_t size, int device, void* stream)
+{
+  rmm::cuda_device_id const device_id{device};
+  rmm::cuda_set_device_raii with_device{device_id};
+  auto mr = rmm::mr::get_per_device_resource(device_id);
+  mr->deallocate(ptr, size, rmm::cuda_stream_view{static_cast<cudaStream_t>(stream)});
+}
@@ -16,11 +16,14 @@
 except ImportError:
     rmm_torch_allocator = None
 else:
-    import rmm._lib.torch_allocator
+    import pathlib
 
-    _alloc_free_lib_path = rmm._lib.torch_allocator.__file__
+    sofile = (
+        pathlib.Path(__file__).parent.parent / "_lib" / "_torch_allocator.so"
+    )
     rmm_torch_allocator = CUDAPluggableAllocator(
-        _alloc_free_lib_path,
+        str(sofile.absolute()),
         alloc_fn_name="allocate",
         free_fn_name="deallocate",
     )
+    del pathlib, sofile