From 3cb3df3255efaec4a5ebb6cb7606067f753e3554 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Thu, 13 Jun 2024 11:54:55 -0500 Subject: [PATCH] Add ability to enable rmm pool on `cudf.pandas` import (#15628) This PR enables allocating of rmm memory pool on `cudf.pandas` import using the following environment variables: ``` export CUDF_PANDAS_RMM_MODE="pool" ``` Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Mark Harris (https://github.com/harrism) - Mads R. B. Kristensen (https://github.com/madsbk) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/15628 --- python/cudf/cudf/pandas/__init__.py | 43 +++++++++++++++++++ .../cudf_pandas_tests/test_cudf_pandas.py | 28 ++++++++++++ 2 files changed, 71 insertions(+) diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index 5b3785531d3..59a88f85dda 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -2,6 +2,9 @@ # All rights reserved. # SPDX-License-Identifier: Apache-2.0 + +import warnings + from .fast_slow_proxy import is_proxy_object from .magics import load_ipython_extension from .profiler import Profiler @@ -19,6 +22,46 @@ def install(): loader = ModuleAccelerator.install("pandas", "cudf", "pandas") global LOADED LOADED = loader is not None + import os + + if (rmm_mode := os.getenv("CUDF_PANDAS_RMM_MODE", None)) is not None: + import rmm.mr + from rmm.mr import available_device_memory + + # Check if a non-default memory resource is set + current_mr = rmm.mr.get_current_device_resource() + if not isinstance(current_mr, rmm.mr.CudaMemoryResource): + warnings.warn( + f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}", + UserWarning, + ) + free_memory, _ = available_device_memory() + free_memory = int(round(float(free_memory) * 0.80 / 256) * 256) + + if rmm_mode == "cuda": + mr = rmm.mr.CudaMemoryResource() + rmm.mr.set_current_device_resource(mr) + elif rmm_mode == "pool": + rmm.mr.set_current_device_resource( + rmm.mr.PoolMemoryResource( + rmm.mr.get_current_device_resource(), + initial_pool_size=free_memory, + ) + ) + elif rmm_mode == "async": + mr = rmm.mr.CudaAsyncMemoryResource(initial_pool_size=free_memory) + rmm.mr.set_current_device_resource(mr) + elif rmm_mode == "managed": + mr = rmm.mr.ManagedMemoryResource() + rmm.mr.set_current_device_resource(mr) + elif rmm_mode == "managed_pool": + rmm.reinitialize( + managed_memory=True, + pool_allocator=True, + initial_pool_size=free_memory, + ) + else: + raise TypeError(f"Unsupported rmm mode: {rmm_mode}") def pytest_load_initial_conftests(early_config, parser, args): diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 515a4714a5a..c251e4a197e 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -9,6 +9,7 @@ import os import pathlib import pickle +import subprocess import tempfile import types from io import BytesIO, StringIO @@ -1425,6 +1426,33 @@ def test_holidays_within_dates(holiday, start, expected): ) == [utc.localize(dt) for dt in expected] +@pytest.mark.parametrize( + "env_value", + ["", "cuda", "pool", "async", "managed", "managed_pool", "abc"], +) +def test_rmm_option_on_import(env_value): + data_directory = os.path.dirname(os.path.abspath(__file__)) + # Create a copy of the current environment variables + env = os.environ.copy() + env["CUDF_PANDAS_RMM_MODE"] = env_value + + sp_completed = subprocess.run( + [ + "python", + "-m", + "cudf.pandas", + data_directory + "/data/profile_basic.py", + ], + capture_output=True, + text=True, + env=env, + ) + if env_value in {"cuda", "pool", "async", "managed", "managed_pool"}: + assert sp_completed.returncode == 0 + else: + assert sp_completed.returncode == 1 + + def test_cudf_pandas_debugging_different_results(monkeypatch): cudf_mean = cudf.Series.mean