Skip to content

Commit

Permalink
Add available_device_memory to fetch free amount of memory on a GPU (
Browse files Browse the repository at this point in the history
…#1567)

This PR adds `get_free_device_memory` that returns free GPU memory necessary for rapidsai/cudf#15628

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)

URL: #1567
  • Loading branch information
galipremsagar authored Jun 4, 2024
1 parent 805bcf1 commit 8597c22
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 6 deletions.
7 changes: 6 additions & 1 deletion python/rmm/rmm/_lib/memory_resource.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -14,6 +14,7 @@

from libc.stdint cimport int8_t
from libcpp.memory cimport shared_ptr
from libcpp.pair cimport pair
from libcpp.string cimport string
from libcpp.vector cimport vector

Expand All @@ -32,6 +33,10 @@ cdef extern from "rmm/mr/device/device_memory_resource.hpp" \
cuda_stream_view stream
) except +

cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil:
size_t percent_of_free_device_memory(int percent) except +
pair[size_t, size_t] available_device_memory() except +

cdef class DeviceMemoryResource:
cdef shared_ptr[device_memory_resource] c_obj
cdef device_memory_resource* get_mr(self) noexcept nogil
Expand Down
22 changes: 18 additions & 4 deletions python/rmm/rmm/_lib/memory_resource.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,9 +32,16 @@ from libcpp.string cimport string
from cuda.cudart import cudaError_t

from rmm._cuda.gpu import CUDARuntimeError, getDevice, setDevice

from rmm._cuda.stream cimport Stream

from rmm._cuda.stream import DEFAULT_STREAM

from rmm._lib.cuda_stream_view cimport cuda_stream_view
from rmm._lib.memory_resource cimport (
available_device_memory as c_available_device_memory,
percent_of_free_device_memory as c_percent_of_free_device_memory,
)
from rmm._lib.per_device_resource cimport (
cuda_device_id,
set_per_device_resource as cpp_set_per_device_resource,
Expand Down Expand Up @@ -109,8 +116,6 @@ cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \
win32
win32_kmt

cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil:
size_t percent_of_free_device_memory(int percent) except +

cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \
namespace "rmm::mr" nogil:
Expand Down Expand Up @@ -368,7 +373,7 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor):
cdef size_t c_initial_pool_size
cdef optional[size_t] c_maximum_pool_size
c_initial_pool_size = (
percent_of_free_device_memory(50) if
c_percent_of_free_device_memory(50) if
initial_pool_size is None
else initial_pool_size
)
Expand Down Expand Up @@ -1188,3 +1193,12 @@ def get_log_filenames():
else None
for i, each_mr in _per_device_mrs.items()
}


def available_device_memory():
"""
Returns a tuple of free and total device memory memory.
"""
cdef pair[size_t, size_t] res
res = c_available_device_memory()
return (res.first, res.second)
4 changes: 3 additions & 1 deletion python/rmm/rmm/mr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,6 +28,7 @@
UpstreamResourceAdaptor,
_flush_logs,
_initialize,
available_device_memory,
disable_logging,
enable_logging,
get_current_device_resource,
Expand Down Expand Up @@ -57,6 +58,7 @@
"UpstreamResourceAdaptor",
"_flush_logs",
"_initialize",
"available_device_memory",
"set_per_device_resource",
"enable_logging",
"disable_logging",
Expand Down
13 changes: 13 additions & 0 deletions python/rmm/rmm/tests/test_rmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,3 +1002,16 @@ def test_invalid_logging_level(level):
rmm.set_flush_level(level)
with pytest.raises(TypeError):
rmm.should_log(level)


def test_available_device_memory():
from rmm.mr import available_device_memory

initial_memory = available_device_memory()
device_buffer = rmm.DeviceBuffer.to_device( # noqa: F841
np.zeros(10000, dtype="u1")
)
final_memory = available_device_memory()
assert initial_memory[1] == final_memory[1]
assert initial_memory[0] > 0
assert final_memory[0] > 0

0 comments on commit 8597c22

Please sign in to comment.