Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to prefetch in cudf.pandas and change default to managed pool #16296

Merged
merged 23 commits into from
Jul 25, 2024
Merged
Changes from 11 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
35f1723
Add ability to prefetch
galipremsagar Jul 17, 2024
2424867
Merge branch 'branch-24.08' into prefetch_mr
galipremsagar Jul 17, 2024
7047b77
Prefetch the managed pool, too.
bdice Jul 23, 2024
8e70f7d
Merge remote-tracking branch 'upstream/branch-24.08' into prefetch_mr
galipremsagar Jul 23, 2024
42762d6
enable by default and add prefetch
galipremsagar Jul 23, 2024
2cd93d0
simplify
galipremsagar Jul 23, 2024
664da44
Apply suggestions from code review
galipremsagar Jul 23, 2024
02bb3dd
update
galipremsagar Jul 23, 2024
82b5c9e
Address reviews
galipremsagar Jul 23, 2024
cd4d156
Merge branch 'branch-24.08' into prefetch_mr
galipremsagar Jul 23, 2024
a6ebd3d
Update python/cudf/cudf/pandas/__init__.py
galipremsagar Jul 23, 2024
caaa2f1
Apply suggestions from code review
galipremsagar Jul 23, 2024
c7b6d56
move enable prefecting order to after module is run
galipremsagar Jul 23, 2024
eb6acb3
Merge branch 'prefetch_mr' of https://github.com/galipremsagar/cudf i…
galipremsagar Jul 23, 2024
3050661
return rmm_mode
galipremsagar Jul 23, 2024
c2e97e4
update
galipremsagar Jul 23, 2024
276d8cf
Merge branch 'branch-24.08' into prefetch_mr
galipremsagar Jul 24, 2024
34d35ba
Merge remote-tracking branch 'upstream/branch-24.08' into prefetch_mr
galipremsagar Jul 24, 2024
4457575
Merge branch 'branch-24.08' into prefetch_mr
galipremsagar Jul 24, 2024
cad780c
reduce parallelism
galipremsagar Jul 24, 2024
0170ffa
Merge branch 'prefetch_mr' of https://github.com/galipremsagar/cudf i…
galipremsagar Jul 24, 2024
89f50e9
Merge branch 'branch-24.08' into prefetch_mr
galipremsagar Jul 24, 2024
2de3886
Update python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
galipremsagar Jul 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 45 additions & 30 deletions python/cudf/cudf/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import rmm.mr

from cudf._lib import pylibcudf

from .fast_slow_proxy import is_proxy_object
from .magics import load_ipython_extension
from .profiler import Profiler
Expand All @@ -25,41 +27,54 @@ def install():
global LOADED
LOADED = loader is not None

if (rmm_mode := os.getenv("CUDF_PANDAS_RMM_MODE", None)) is not None:
# Check if a non-default memory resource is set
current_mr = rmm.mr.get_current_device_resource()
if not isinstance(current_mr, rmm.mr.CudaMemoryResource):
warnings.warn(
f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}",
UserWarning,
)
free_memory, _ = rmm.mr.available_device_memory()
free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)
rmm_mode = os.getenv("CUDF_PANDAS_RMM_MODE", "managed_pool")
# Check if a non-default memory resource is set
current_mr = rmm.mr.get_current_device_resource()
if not isinstance(current_mr, rmm.mr.CudaMemoryResource):
warnings.warn(
f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}",
UserWarning,
)
return
enable_prefetching = "managed" in rmm_mode
free_memory, _ = rmm.mr.available_device_memory()
free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)

if rmm_mode == "cuda":
mr = rmm.mr.CudaMemoryResource()
rmm.mr.set_current_device_resource(mr)
elif rmm_mode == "pool":
rmm.mr.set_current_device_resource(
rmm.mr.PoolMemoryResource(
rmm.mr.get_current_device_resource(),
initial_pool_size=free_memory,
)
if rmm_mode == "cuda":
current_mr = rmm.mr.CudaMemoryResource()
elif rmm_mode == "pool":
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
current_mr = rmm.mr.set_current_device_resource(
rmm.mr.PoolMemoryResource(
current_mr,
initial_pool_size=free_memory,
)
elif rmm_mode == "async":
mr = rmm.mr.CudaAsyncMemoryResource(initial_pool_size=free_memory)
rmm.mr.set_current_device_resource(mr)
elif rmm_mode == "managed":
mr = rmm.mr.ManagedMemoryResource()
rmm.mr.set_current_device_resource(mr)
elif rmm_mode == "managed_pool":
mr = rmm.mr.PoolMemoryResource(
)
elif rmm_mode == "async":
current_mr = rmm.mr.CudaAsyncMemoryResource(
initial_pool_size=free_memory
)
elif rmm_mode == "managed":
current_mr = rmm.mr.PrefetchResourceAdaptor(
rmm.mr.ManagedMemoryResource()
)
elif rmm_mode == "managed_pool":
current_mr = rmm.mr.PrefetchResourceAdaptor(
rmm.mr.PoolMemoryResource(
rmm.mr.ManagedMemoryResource(),
initial_pool_size=free_memory,
)
rmm.mr.set_current_device_resource(mr)
else:
raise ValueError(f"Unsupported rmm mode: {rmm_mode}")
)
else:
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(f"Unsupported rmm mode: {rmm_mode}")
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
rmm.mr.set_current_device_resource(current_mr)
galipremsagar marked this conversation as resolved.
Show resolved Hide resolved
if enable_prefetching:
for key in {
"column_view::get_data",
"mutable_column_view::get_data",
"gather",
"hash_join",
}:
pylibcudf.experimental.enable_prefetching(key)


def pytest_load_initial_conftests(early_config, parser, args):
Expand Down
Loading