Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable Dask tests with UCX-Py/UCXX in CI #5697

Merged
merged 37 commits into from
May 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
a076e1e
Add UCX/UCXX cluster/client fixtures to Dask tests
pentschev Dec 12, 2023
348407e
Add options to select running UCX/UCXX Dask tests
pentschev Dec 12, 2023
d4988c2
Add UCX/UCXX Dask NN tests
pentschev Dec 12, 2023
b88b7bd
Enable UCX/UCXX tests in CI
pentschev Dec 12, 2023
b07197f
Merge remote-tracking branch 'upstream/branch-24.02' into ucxx-tests
pentschev Dec 12, 2023
f60d58a
Fix CI command line typos
pentschev Dec 12, 2023
55ad2da
Merge remote-tracking branch 'upstream/branch-24.02' into ucxx-tests
pentschev Jan 5, 2024
3a06e29
Merge remote-tracking branch 'upstream/branch-24.06' into ucxx-tests
pentschev Apr 9, 2024
c049eb3
Add RAFT Python channel to test before RAFT merges
pentschev Apr 9, 2024
2fa180f
Update copyright
pentschev Apr 9, 2024
b4e22fb
Point CMake to RAFT/cumlprims_mg UCXX PRs
pentschev Apr 9, 2024
cee0d25
Move UCX-Py/UCXX to `run_cuml_dask_pytests.sh`
pentschev Apr 9, 2024
a695651
Disable `test_fil_skl_classification`
pentschev Apr 10, 2024
13f1d4c
Add missing `distributed-ucxx` dependency
pentschev Apr 10, 2024
5b3d468
Adjust `distributed-ucxx` in `build_wheel.sh`
pentschev Apr 10, 2024
8ed59fe
Update `distributed-ucxx` version in release script
pentschev Apr 10, 2024
4e20acf
Merge remote-tracking branch 'upstream/branch-24.06' into ucxx-tests
pentschev Apr 10, 2024
8359624
Merge remote-tracking branch 'upstream/branch-24.06' into ucxx-tests
pentschev May 3, 2024
57c5f4f
Disable non-UCXX Dask wheel tests in CI
pentschev May 3, 2024
6d0819f
Pull wheels from raft-dask PR
pentschev May 3, 2024
b7c31d1
Switch to raft-dask PR also for wheel build
pentschev May 3, 2024
ea613f0
Add set -x
pentschev May 4, 2024
2d39085
Fix `raft_dask_wheelhouse`
pentschev May 6, 2024
764bba9
Undo raft testing changes
vyasr May 7, 2024
f5fd345
Fix copyright
vyasr May 7, 2024
f4d14f4
Merge remote-tracking branch 'upstream/branch-24.06' into ucxx-tests
pentschev May 7, 2024
055beb8
Revert "Add set -x"
pentschev May 7, 2024
2cb76dd
Formatting fix
pentschev May 7, 2024
d28568c
Fix comment
pentschev May 7, 2024
364dcb6
Remove direct `distributed-ucxx` dependency
pentschev May 13, 2024
6be59e8
Re-enable Dask wheel tests
pentschev May 13, 2024
89484c3
Add timeouts to Dask tests
pentschev May 14, 2024
1f29e03
Remove `distributed-ucxx` from build and release scripts
pentschev May 15, 2024
06c6c7a
Remove unnecessary UCX-Py version parsing
pentschev May 15, 2024
13862f2
Remove `--find-links dist/`
pentschev May 15, 2024
31fb4a6
Remove `pytest.ini`
pentschev May 16, 2024
ce9590a
Remove debug print statements
pentschev May 17, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR}
NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}
NEXT_UCX_PY_VERSION="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG}).*"

# Need to distutils-normalize the original version
NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))")
Expand Down
9 changes: 8 additions & 1 deletion ci/run_cuml_dask_pytests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,11 @@
# Support invoking run_cuml_dask_pytests.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cuml/tests/dask

python -m pytest --cache-clear "$@" .
rapids-logger "pytest cuml-dask (No UCX-Py/UCXX)"
timeout 2h python -m pytest --cache-clear "$@" .

rapids-logger "pytest cuml-dask (UCX-Py only)"
timeout 5m python -m pytest --cache-clear --run_ucx "$@" .

rapids-logger "pytest cuml-dask (UCXX only)"
timeout 5m python -m pytest --cache-clear --run_ucxx "$@" .
2 changes: 1 addition & 1 deletion ci/test_python_dask.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ EXITCODE=0
trap "EXITCODE=1" ERR
set +e

rapids-logger "pytest cuml-dask"
# Run tests
./ci/run_cuml_dask_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cuml-dask.xml" \
--cov-config=../../../.coveragerc \
Expand Down
71 changes: 60 additions & 11 deletions python/cuml/tests/dask/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

import pytest

Expand Down Expand Up @@ -34,18 +34,8 @@ def client(cluster):

@pytest.fixture(scope="module")
def ucx_cluster():
initialize.initialize(
create_cuda_context=True,
enable_tcp_over_ucx=enable_tcp_over_ucx,
enable_nvlink=enable_nvlink,
enable_infiniband=enable_infiniband,
)
cluster = LocalCUDACluster(
protocol="ucx",
enable_tcp_over_ucx=enable_tcp_over_ucx,
enable_nvlink=enable_nvlink,
enable_infiniband=enable_infiniband,
worker_class=IncreasedCloseTimeoutNanny,
)
yield cluster
cluster.close()
Expand All @@ -57,3 +47,62 @@ def ucx_client(ucx_cluster):
client = Client(ucx_cluster)
yield client
client.close()


@pytest.fixture(scope="module")
def ucxx_cluster():
cluster = LocalCUDACluster(
protocol="ucxx",
worker_class=IncreasedCloseTimeoutNanny,
)
yield cluster
cluster.close()


@pytest.fixture(scope="function")
def ucxx_client(ucxx_cluster):
pytest.importorskip("distributed_ucxx")

client = Client(ucxx_cluster)
yield client
client.close()


def pytest_addoption(parser):
group = parser.getgroup("Dask cuML Custom Options")

group.addoption(
"--run_ucx", action="store_true", help="run _only_ UCX-Py tests"
)

group.addoption(
"--run_ucxx", action="store_true", help="run _only_ UCXX tests"
)


def pytest_collection_modifyitems(config, items):
if config.getoption("--run_ucx"):
skip_others = pytest.mark.skip(
reason="only runs when --run_ucx is not specified"
)
for item in items:
if "ucx" not in item.keywords:
item.add_marker(skip_others)
else:
skip_ucx = pytest.mark.skip(reason="requires --run_ucx to run")
for item in items:
if "ucx" in item.keywords:
item.add_marker(skip_ucx)

if config.getoption("--run_ucxx"):
skip_others = pytest.mark.skip(
reason="only runs when --run_ucxx is not specified"
)
for item in items:
if "ucxx" not in item.keywords:
item.add_marker(skip_others)
else:
skip_ucxx = pytest.mark.skip(reason="requires --run_ucxx to run")
for item in items:
if "ucxx" in item.keywords:
item.add_marker(skip_ucxx)
Loading
Loading