From ed73c186dd406e903c0b461f16574d0c3cd759b4 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 1 May 2024 15:02:06 +0000 Subject: [PATCH 1/6] Add ability to enable rmm pool on import --- python/cudf/cudf/pandas/__init__.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index f2e855ae55c..1f872d010ae 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. # All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -18,6 +18,25 @@ def install(): loader = ModuleAccelerator.install("pandas", "cudf", "pandas") global LOADED LOADED = loader is not None + import os + + cudf_pandas_mr = os.getenv("CUDF_PANDAS_MEMORY_RESOURCE", None) + if cudf_pandas_mr is not None: + import rmm + import rmm.mr + + cudf_pandas_mr = getattr(rmm.mr, cudf_pandas_mr, None) + if cudf_pandas_mr is not None: + from rmm.mr import PoolMemoryResource + + mr = PoolMemoryResource( + cudf_pandas_mr(), + initial_pool_size=os.getenv( + "CUDF_PANDAS_INITIAL_POOL_SIZE", None + ), + maximum_pool_size=os.getenv("CUDF_PANDAS_MAX_POOL_SIZE", None), + ) + rmm.mr.set_current_device_resource(mr) def pytest_load_initial_conftests(early_config, parser, args): From 583c87f6871487ef463656e390b5fb85b393e4e9 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 1 May 2024 10:22:34 -0500 Subject: [PATCH 2/6] Apply suggestions from code review Co-authored-by: Bradley Dice --- python/cudf/cudf/pandas/__init__.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index 1f872d010ae..6567fcc27d5 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -20,13 +20,10 @@ def install(): LOADED = loader is not None import os - cudf_pandas_mr = os.getenv("CUDF_PANDAS_MEMORY_RESOURCE", None) - if cudf_pandas_mr is not None: - import rmm + if cudf_pandas_mr := os.getenv("CUDF_PANDAS_MEMORY_RESOURCE", None) is not None: import rmm.mr - cudf_pandas_mr = getattr(rmm.mr, cudf_pandas_mr, None) - if cudf_pandas_mr is not None: + if cudf_pandas_mr := getattr(rmm.mr, cudf_pandas_mr, None) is not None: from rmm.mr import PoolMemoryResource mr = PoolMemoryResource( From 1f2132b3c3014680732000fdefdce8c33036681c Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 21 May 2024 06:00:21 +0000 Subject: [PATCH 3/6] address reviews --- python/cudf/cudf/pandas/__init__.py | 44 +++++++++++++++---- .../cudf_pandas_tests/test_cudf_pandas.py | 31 +++++++++++++ 2 files changed, 66 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index 6567fcc27d5..4f153e838a4 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -2,6 +2,8 @@ # All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import warnings + from .magics import load_ipython_extension from .profiler import Profiler @@ -20,20 +22,44 @@ def install(): LOADED = loader is not None import os - if cudf_pandas_mr := os.getenv("CUDF_PANDAS_MEMORY_RESOURCE", None) is not None: + if (rmm_mode := os.getenv("CUDF_PANDAS_RMM_MODE", None)) is not None: import rmm.mr + from rmm._lib.memory_resource import get_free_device_memory - if cudf_pandas_mr := getattr(rmm.mr, cudf_pandas_mr, None) is not None: - from rmm.mr import PoolMemoryResource + # Check if a non-default memory resource is set + current_mr = rmm.mr.get_current_device_resource() + if not isinstance(current_mr, rmm.mr.CudaMemoryResource): + warnings.warn( + f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}", + UserWarning, + ) - mr = PoolMemoryResource( - cudf_pandas_mr(), - initial_pool_size=os.getenv( - "CUDF_PANDAS_INITIAL_POOL_SIZE", None - ), - maximum_pool_size=os.getenv("CUDF_PANDAS_MAX_POOL_SIZE", None), + if rmm_mode == "cuda": + mr = rmm.mr.CudaMemoryResource() + rmm.mr.set_current_device_resource(mr) + elif rmm_mode == "pool": + rmm.mr.set_current_device_resource( + rmm.mr.PoolMemoryResource( + rmm.mr.get_current_device_resource(), + initial_pool_size=get_free_device_memory(80), + ) ) + elif rmm_mode == "async": + mr = rmm.mr.CudaAsyncMemoryResource( + initial_pool_size=get_free_device_memory(80) + ) + rmm.mr.set_current_device_resource(mr) + elif rmm_mode == "managed": + mr = rmm.mr.ManagedMemoryResource() rmm.mr.set_current_device_resource(mr) + elif rmm_mode == "managed_pool": + rmm.reinitialize( + managed_memory=True, + pool_allocator=True, + initial_pool_size=get_free_device_memory(80), + ) + else: + raise TypeError(f"Unsupported rmm mode: {rmm_mode}") def pytest_load_initial_conftests(early_config, parser, args): diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 75bceea3034..4120fd0ae00 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -6,8 +6,10 @@ import copy import datetime import operator +import os import pathlib import pickle +import subprocess import tempfile import types from io import BytesIO, StringIO @@ -1421,3 +1423,32 @@ def test_holidays_within_dates(holiday, start, expected): utc.localize(xpd.Timestamp(start)), ) ) == [utc.localize(dt) for dt in expected] + + +@pytest.mark.parametrize( + "env_value", + ["", "cuda", "pool", "async", "managed", "managed_pool", "abc"], +) +def test_rmm_option_on_import(env_value): + data_directory = os.path.dirname(os.path.abspath(__file__)) + # Create a copy of the current environment variables + env = os.environ.copy() + env["CUDF_PANDAS_RMM_MODE"] = env_value + + sp_completed = subprocess.run( + [ + "python", + "-m", + "cudf.pandas", + data_directory + "/data/profile_basic.py", + ], + capture_output=True, + text=True, + env=env, + ) + if env_value in {"cuda", "pool", "async", "managed", "managed_pool"}: + assert sp_completed.returncode == 0 + else: + assert sp_completed.returncode == 1 + + del env["CUDF_PANDAS_RMM_MODE"] From 5ca4602ef36631da9f9510a026e1ad0457680fea Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 12 Jun 2024 09:21:40 -0500 Subject: [PATCH 4/6] Update python/cudf/cudf_pandas_tests/test_cudf_pandas.py Co-authored-by: Mads R. B. Kristensen --- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 4120fd0ae00..da6a62e07ac 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1450,5 +1450,3 @@ def test_rmm_option_on_import(env_value): assert sp_completed.returncode == 0 else: assert sp_completed.returncode == 1 - - del env["CUDF_PANDAS_RMM_MODE"] From 919fe0eb7164f9b113b337b3ca9a2240768ed67d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 12 Jun 2024 14:47:36 +0000 Subject: [PATCH 5/6] Fix percentage calculation --- python/cudf/cudf/pandas/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index 8d685702872..f168032d141 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -36,7 +36,8 @@ def install(): UserWarning, ) free_memory, _ = available_device_memory() - free_memory = int(float(free_memory) / 80.0) + free_memory = int(float(free_memory) * 0.80) + if rmm_mode == "cuda": mr = rmm.mr.CudaMemoryResource() rmm.mr.set_current_device_resource(mr) From 531a5858d8b171c5047efb6e1cb1383b0fbf2975 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 12 Jun 2024 15:24:30 +0000 Subject: [PATCH 6/6] closest to 256 --- python/cudf/cudf/pandas/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py index f168032d141..59a88f85dda 100644 --- a/python/cudf/cudf/pandas/__init__.py +++ b/python/cudf/cudf/pandas/__init__.py @@ -36,7 +36,7 @@ def install(): UserWarning, ) free_memory, _ = available_device_memory() - free_memory = int(float(free_memory) * 0.80) + free_memory = int(round(float(free_memory) * 0.80 / 256) * 256) if rmm_mode == "cuda": mr = rmm.mr.CudaMemoryResource()