From cb3aa9b76da2ff53231011b9c892c94fef2d04e3 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Thu, 29 Jun 2023 16:30:28 -0700 Subject: [PATCH 1/9] test on CI --- python/raft-dask/raft_dask/test/test_comms.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/python/raft-dask/raft_dask/test/test_comms.py b/python/raft-dask/raft_dask/test/test_comms.py index 5c69a94fd8..9aa2402096 100644 --- a/python/raft-dask/raft_dask/test/test_comms.py +++ b/python/raft-dask/raft_dask/test/test_comms.py @@ -19,7 +19,7 @@ from dask.distributed import get_worker, wait -from .conftest import create_client +#from .conftest import create_client try: from raft_dask.common import ( @@ -43,6 +43,28 @@ except ImportError: pytestmark = pytest.mark.skip +def create_client(cluster): + """ + Create a Dask distributed client for a specified cluster. + + Parameters + ---------- + cluster : LocalCUDACluster instance or str + If a LocalCUDACluster instance is provided, a client will be created + for it directly. If a string is provided, it should specify the path to + a Dask scheduler file. A client will then be created for the cluster + referenced by this scheduler file. + + Returns + ------- + dask.distributed.Client + A client connected to the specified cluster. + """ + if isinstance(cluster, LocalCUDACluster): + return Client(cluster) + else: + return Client(scheduler_file=cluster) + def test_comms_init_no_p2p(cluster): client = create_client(cluster) From 77cdb03074c1f42b726e5f81656af5e74873d8a4 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Thu, 29 Jun 2023 16:35:08 -0700 Subject: [PATCH 2/9] test on CI --- python/raft-dask/raft_dask/test/test_comms.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/raft-dask/raft_dask/test/test_comms.py b/python/raft-dask/raft_dask/test/test_comms.py index 9aa2402096..eb29469ca4 100644 --- a/python/raft-dask/raft_dask/test/test_comms.py +++ b/python/raft-dask/raft_dask/test/test_comms.py @@ -18,6 +18,9 @@ import pytest from dask.distributed import get_worker, wait +from dask.distributed import Client +from dask_cuda import LocalCUDACluster + #from .conftest import create_client From 38d41bbc4ba2e15f190686065efd8b5917564950 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Thu, 29 Jun 2023 16:37:09 -0700 Subject: [PATCH 3/9] style fixes --- python/raft-dask/raft_dask/test/test_comms.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/raft-dask/raft_dask/test/test_comms.py b/python/raft-dask/raft_dask/test/test_comms.py index eb29469ca4..7135fe6ff6 100644 --- a/python/raft-dask/raft_dask/test/test_comms.py +++ b/python/raft-dask/raft_dask/test/test_comms.py @@ -17,12 +17,10 @@ import pytest -from dask.distributed import get_worker, wait -from dask.distributed import Client +from dask.distributed import Client, get_worker, wait from dask_cuda import LocalCUDACluster - -#from .conftest import create_client +# from .conftest import create_client try: from raft_dask.common import ( @@ -46,6 +44,7 @@ except ImportError: pytestmark = pytest.mark.skip + def create_client(cluster): """ Create a Dask distributed client for a specified cluster. From 4bf5bbad4a40015ca374890d8bc382caf5773b6f Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Thu, 29 Jun 2023 18:27:19 -0700 Subject: [PATCH 4/9] switch back ucx cluster --- python/raft-dask/raft_dask/test/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/raft-dask/raft_dask/test/conftest.py b/python/raft-dask/raft_dask/test/conftest.py index d1baa684d4..043c63c704 100644 --- a/python/raft-dask/raft_dask/test/conftest.py +++ b/python/raft-dask/raft_dask/test/conftest.py @@ -43,7 +43,7 @@ def client(cluster): @pytest.fixture() def ucx_client(ucx_cluster): - client = create_client(ucx_cluster) + client = create_client(cluster) yield client client.close() From bd4e45755c91b5706a9a52eb3ffb4dde7b46fc1c Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Fri, 30 Jun 2023 08:43:16 -0700 Subject: [PATCH 5/9] Try to triage CI issues by removing pytest skipping --- python/raft-dask/raft_dask/test/test_comms.py | 39 +++++++++---------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/python/raft-dask/raft_dask/test/test_comms.py b/python/raft-dask/raft_dask/test/test_comms.py index 7135fe6ff6..8a8529439b 100644 --- a/python/raft-dask/raft_dask/test/test_comms.py +++ b/python/raft-dask/raft_dask/test/test_comms.py @@ -20,29 +20,26 @@ from dask.distributed import Client, get_worker, wait from dask_cuda import LocalCUDACluster -# from .conftest import create_client +from raft_dask.common import ( + Comms, + local_handle, + perform_test_comm_split, + perform_test_comms_allgather, + perform_test_comms_allreduce, + perform_test_comms_bcast, + perform_test_comms_device_multicast_sendrecv, + perform_test_comms_device_send_or_recv, + perform_test_comms_device_sendrecv, + perform_test_comms_gather, + perform_test_comms_gatherv, + perform_test_comms_reduce, + perform_test_comms_reducescatter, + perform_test_comms_send_recv, +) -try: - from raft_dask.common import ( - Comms, - local_handle, - perform_test_comm_split, - perform_test_comms_allgather, - perform_test_comms_allreduce, - perform_test_comms_bcast, - perform_test_comms_device_multicast_sendrecv, - perform_test_comms_device_send_or_recv, - perform_test_comms_device_sendrecv, - perform_test_comms_gather, - perform_test_comms_gatherv, - perform_test_comms_reduce, - perform_test_comms_reducescatter, - perform_test_comms_send_recv, - ) +# from .conftest import create_client - pytestmark = pytest.mark.mg -except ImportError: - pytestmark = pytest.mark.skip +pytestmark = pytest.mark.mg def create_client(cluster): From 4ca25c9813871be7a60990c8e4d82c99613dff7d Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Fri, 30 Jun 2023 10:20:40 -0700 Subject: [PATCH 6/9] removing __init__.py --- python/raft-dask/raft_dask/test/__init__.py | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 python/raft-dask/raft_dask/test/__init__.py diff --git a/python/raft-dask/raft_dask/test/__init__.py b/python/raft-dask/raft_dask/test/__init__.py deleted file mode 100644 index 764e0f32fd..0000000000 --- a/python/raft-dask/raft_dask/test/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. From f46224ffb20d93e1725ba46f2d0a003699beeab3 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Fri, 30 Jun 2023 12:46:58 -0700 Subject: [PATCH 7/9] Update test_comms.py --- python/raft-dask/raft_dask/test/test_comms.py | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/python/raft-dask/raft_dask/test/test_comms.py b/python/raft-dask/raft_dask/test/test_comms.py index 8a8529439b..eb08a014c5 100644 --- a/python/raft-dask/raft_dask/test/test_comms.py +++ b/python/raft-dask/raft_dask/test/test_comms.py @@ -20,27 +20,27 @@ from dask.distributed import Client, get_worker, wait from dask_cuda import LocalCUDACluster -from raft_dask.common import ( - Comms, - local_handle, - perform_test_comm_split, - perform_test_comms_allgather, - perform_test_comms_allreduce, - perform_test_comms_bcast, - perform_test_comms_device_multicast_sendrecv, - perform_test_comms_device_send_or_recv, - perform_test_comms_device_sendrecv, - perform_test_comms_gather, - perform_test_comms_gatherv, - perform_test_comms_reduce, - perform_test_comms_reducescatter, - perform_test_comms_send_recv, -) - -# from .conftest import create_client - -pytestmark = pytest.mark.mg +try: + from raft_dask.common import ( + Comms, + local_handle, + perform_test_comm_split, + perform_test_comms_allgather, + perform_test_comms_allreduce, + perform_test_comms_bcast, + perform_test_comms_device_multicast_sendrecv, + perform_test_comms_device_send_or_recv, + perform_test_comms_device_sendrecv, + perform_test_comms_gather, + perform_test_comms_gatherv, + perform_test_comms_reduce, + perform_test_comms_reducescatter, + perform_test_comms_send_recv, + ) + pytestmark = pytest.mark.mg +except ImportError: + pytestmark = pytest.mark.skip def create_client(cluster): """ From 76f69833e97ae9e50da04409b7ceb95a3be3a3f5 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Fri, 30 Jun 2023 12:47:24 -0700 Subject: [PATCH 8/9] Update conftest.py --- python/raft-dask/raft_dask/test/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/raft-dask/raft_dask/test/conftest.py b/python/raft-dask/raft_dask/test/conftest.py index 043c63c704..d1baa684d4 100644 --- a/python/raft-dask/raft_dask/test/conftest.py +++ b/python/raft-dask/raft_dask/test/conftest.py @@ -43,7 +43,7 @@ def client(cluster): @pytest.fixture() def ucx_client(ucx_cluster): - client = create_client(cluster) + client = create_client(ucx_cluster) yield client client.close() From 536e9026245163c1b7aaf3d00274b97780174286 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Fri, 30 Jun 2023 12:49:51 -0700 Subject: [PATCH 9/9] Update test_comms.py --- python/raft-dask/raft_dask/test/test_comms.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/raft-dask/raft_dask/test/test_comms.py b/python/raft-dask/raft_dask/test/test_comms.py index eb08a014c5..68c9fee556 100644 --- a/python/raft-dask/raft_dask/test/test_comms.py +++ b/python/raft-dask/raft_dask/test/test_comms.py @@ -42,6 +42,7 @@ except ImportError: pytestmark = pytest.mark.skip + def create_client(cluster): """ Create a Dask distributed client for a specified cluster.