Skip to content

Commit

Permalink
Add ability to enable rmm pool on cudf.pandas import (#15628)
Browse files Browse the repository at this point in the history
This PR enables allocating of rmm memory pool on `cudf.pandas` import using the following environment variables:

```
export CUDF_PANDAS_RMM_MODE="pool"
```

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Bradley Dice (https://github.com/bdice)

URL: #15628
  • Loading branch information
galipremsagar authored Jun 13, 2024
1 parent cb564da commit 3cb3df3
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 0 deletions.
43 changes: 43 additions & 0 deletions python/cudf/cudf/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0


import warnings

from .fast_slow_proxy import is_proxy_object
from .magics import load_ipython_extension
from .profiler import Profiler
Expand All @@ -19,6 +22,46 @@ def install():
loader = ModuleAccelerator.install("pandas", "cudf", "pandas")
global LOADED
LOADED = loader is not None
import os

if (rmm_mode := os.getenv("CUDF_PANDAS_RMM_MODE", None)) is not None:
import rmm.mr
from rmm.mr import available_device_memory

# Check if a non-default memory resource is set
current_mr = rmm.mr.get_current_device_resource()
if not isinstance(current_mr, rmm.mr.CudaMemoryResource):
warnings.warn(
f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}",
UserWarning,
)
free_memory, _ = available_device_memory()
free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)

if rmm_mode == "cuda":
mr = rmm.mr.CudaMemoryResource()
rmm.mr.set_current_device_resource(mr)
elif rmm_mode == "pool":
rmm.mr.set_current_device_resource(
rmm.mr.PoolMemoryResource(
rmm.mr.get_current_device_resource(),
initial_pool_size=free_memory,
)
)
elif rmm_mode == "async":
mr = rmm.mr.CudaAsyncMemoryResource(initial_pool_size=free_memory)
rmm.mr.set_current_device_resource(mr)
elif rmm_mode == "managed":
mr = rmm.mr.ManagedMemoryResource()
rmm.mr.set_current_device_resource(mr)
elif rmm_mode == "managed_pool":
rmm.reinitialize(
managed_memory=True,
pool_allocator=True,
initial_pool_size=free_memory,
)
else:
raise TypeError(f"Unsupported rmm mode: {rmm_mode}")


def pytest_load_initial_conftests(early_config, parser, args):
Expand Down
28 changes: 28 additions & 0 deletions python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import pathlib
import pickle
import subprocess
import tempfile
import types
from io import BytesIO, StringIO
Expand Down Expand Up @@ -1425,6 +1426,33 @@ def test_holidays_within_dates(holiday, start, expected):
) == [utc.localize(dt) for dt in expected]


@pytest.mark.parametrize(
"env_value",
["", "cuda", "pool", "async", "managed", "managed_pool", "abc"],
)
def test_rmm_option_on_import(env_value):
data_directory = os.path.dirname(os.path.abspath(__file__))
# Create a copy of the current environment variables
env = os.environ.copy()
env["CUDF_PANDAS_RMM_MODE"] = env_value

sp_completed = subprocess.run(
[
"python",
"-m",
"cudf.pandas",
data_directory + "/data/profile_basic.py",
],
capture_output=True,
text=True,
env=env,
)
if env_value in {"cuda", "pool", "async", "managed", "managed_pool"}:
assert sp_completed.returncode == 0
else:
assert sp_completed.returncode == 1


def test_cudf_pandas_debugging_different_results(monkeypatch):
cudf_mean = cudf.Series.mean

Expand Down

0 comments on commit 3cb3df3

Please sign in to comment.