From 40285e73ed28170642ee42035f5d55642c4ace45 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 21 Sep 2023 02:59:31 -0700 Subject: [PATCH 1/4] Do not reset CUDA context after UCX tests Resetting CUDA contexts during a running process may have unintended consequences on third-party libraries -- e.g., CuPy -- that store state based on the context. Therefore, prevent destroying CUDA context for now. --- distributed/utils_test.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/distributed/utils_test.py b/distributed/utils_test.py index 69e4d11346f..92aa0089e75 100644 --- a/distributed/utils_test.py +++ b/distributed/utils_test.py @@ -2156,17 +2156,6 @@ def ucx_loop(): import distributed.comm.ucx distributed.comm.ucx.ucp = None - # If the test created a context, clean it up. - # TODO: should we check if there's already a context _before_ the test runs? - # I think that would be useful. - from distributed.diagnostics.nvml import has_cuda_context - - ctx = has_cuda_context() - if ctx.has_context: - import numba.cuda - - ctx = numba.cuda.current_context() - ctx.device.reset() def wait_for_log_line( From 89aa5529ae816568f4c1d8a114fdd429df25aeda Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 21 Sep 2023 07:11:08 -0700 Subject: [PATCH 2/4] Prevent cuDF `FutureWarning` on empty series datatype --- distributed/comm/tests/test_ucx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distributed/comm/tests/test_ucx.py b/distributed/comm/tests/test_ucx.py index 04d4cd1833f..5ecefcd6093 100644 --- a/distributed/comm/tests/test_ucx.py +++ b/distributed/comm/tests/test_ucx.py @@ -179,7 +179,7 @@ async def test_ucx_deserialize(ucx_loop): "g", [ lambda cudf: cudf.Series([1, 2, 3]), - lambda cudf: cudf.Series([]), + lambda cudf: cudf.Series([], dtype=object), lambda cudf: cudf.DataFrame([]), lambda cudf: cudf.DataFrame([1]).head(0), lambda cudf: cudf.DataFrame([1.0]).head(0), From 0f1c4daf6502e8ae5ce87205c81e954dda9a3db7 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 22 Sep 2023 04:46:50 -0700 Subject: [PATCH 3/4] Downgrade to aws-sdk-cpp<1.11 --- continuous_integration/gpuci/build.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/continuous_integration/gpuci/build.sh b/continuous_integration/gpuci/build.sh index 37b7ab43706..1a885a99aad 100644 --- a/continuous_integration/gpuci/build.sh +++ b/continuous_integration/gpuci/build.sh @@ -40,6 +40,8 @@ gpuci_logger "Activate conda env" . /opt/conda/etc/profile.d/conda.sh conda activate dask +mamba install -y 'aws-sdk-cpp<1.11' + gpuci_logger "Install distributed" python -m pip install -e . From 119dc1417ae2826e560096d8873aef1a5e82dd54 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Sat, 23 Sep 2023 02:23:39 -0700 Subject: [PATCH 4/4] Revert "Downgrade to aws-sdk-cpp<1.11" This reverts commit 0f1c4daf6502e8ae5ce87205c81e954dda9a3db7. --- continuous_integration/gpuci/build.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/continuous_integration/gpuci/build.sh b/continuous_integration/gpuci/build.sh index 1a885a99aad..37b7ab43706 100644 --- a/continuous_integration/gpuci/build.sh +++ b/continuous_integration/gpuci/build.sh @@ -40,8 +40,6 @@ gpuci_logger "Activate conda env" . /opt/conda/etc/profile.d/conda.sh conda activate dask -mamba install -y 'aws-sdk-cpp<1.11' - gpuci_logger "Install distributed" python -m pip install -e .