From cd02a8f2b8dc22ec2115e80e559a4ab98d402c6e Mon Sep 17 00:00:00 2001 From: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Date: Fri, 20 Sep 2024 13:05:44 -0400 Subject: [PATCH 1/6] Fix sed syntax [skip-ci] (#2441) --- ci/release/update-version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 075eb896f2..5bb98511cf 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -58,7 +58,7 @@ for FILE in dependencies.yaml conda/environments/*.yaml; do sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done for DEP in "${UCXX_DEPENDENCIES[@]}"; do - sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_UCXX_SHORT_TAG_PEP440}.*/,>=0.0.0a0/g" "${FILE}" + sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_UCXX_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done done for FILE in python/*/pyproject.toml; do From f49567e1fb9f722f8864bc84880745b9b776eb61 Mon Sep 17 00:00:00 2001 From: Micka Date: Sat, 21 Sep 2024 01:09:00 +0200 Subject: [PATCH 2/6] Use runtime check of cudart version for eig (#2430) Authors: - Micka (https://github.com/lowener) Approvers: - Bradley Dice (https://github.com/bdice) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2430 --- cpp/include/raft/linalg/detail/eig.cuh | 31 ++++++++++++++------------ 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/cpp/include/raft/linalg/detail/eig.cuh b/cpp/include/raft/linalg/detail/eig.cuh index ba7ed3dcdf..561187178c 100644 --- a/cpp/include/raft/linalg/detail/eig.cuh +++ b/cpp/include/raft/linalg/detail/eig.cuh @@ -95,16 +95,19 @@ void eigDC(raft::resources const& handle, return; #endif -#if CUDART_VERSION <= 12040 - // Use a new stream instead of `cudaStreamPerThread` to avoid cusolver bug # 4580093. + int cudart_version = 0; + RAFT_CUDA_TRY(cudaRuntimeGetVersion(&cudart_version)); + cudaStream_t stream_new; + cudaEvent_t sync_event = resource::detail::get_cuda_stream_sync_event(handle); rmm::cuda_stream stream_new_wrapper; - cudaStream_t stream_new = stream_new_wrapper.value(); - cudaEvent_t sync_event = resource::detail::get_cuda_stream_sync_event(handle); - RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream)); - RAFT_CUDA_TRY(cudaStreamWaitEvent(stream_new, sync_event)); -#else - cudaStream_t stream_new = stream; -#endif + if (cudart_version < 12050) { + // Use a new stream instead of `cudaStreamPerThread` to avoid cusolver bug # 4580093. + stream_new = stream_new_wrapper.value(); + RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream)); + RAFT_CUDA_TRY(cudaStreamWaitEvent(stream_new, sync_event)); + } else { + stream_new = stream; + } cusolverDnHandle_t cusolverH = resource::get_cusolver_dn_handle(handle); cusolverDnParams_t dn_params = nullptr; @@ -152,11 +155,11 @@ void eigDC(raft::resources const& handle, "eig.cuh: eigensolver couldn't converge to a solution. " "This usually occurs when some of the features do not vary enough."); -#if CUDART_VERSION <= 12040 - // Synchronize the created stream with the original stream before return - RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream_new)); - RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, sync_event)); -#endif + if (cudart_version < 12050) { + // Synchronize the created stream with the original stream before return + RAFT_CUDA_TRY(cudaEventRecord(sync_event, stream_new)); + RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, sync_event)); + } } enum EigVecMemUsage { OVERWRITE_INPUT, COPY_INPUT }; From b79f15d2f229849bc02425b2e4ffd7bd3db89d4c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 23 Sep 2024 16:09:01 -0500 Subject: [PATCH 3/6] Update fmt (to 11.0.2) and spdlog (to 1.14.1). (#2433) * Update fmt (to 11.0.2) and spdlog (to 1.14.1). * use rmm and ucxx CI artifacts * try using librmm wheels * try again to use rmm wheels * rapids-get-pr-wheel-artifact was missing RAPIDS_PY_WHEEL_NAME * ok you do not need to provide the Python slug yourself for rapids-get-pr-wheel-artifact * constraints need 'file://' protocol * try suppressing unreachable-code diagnostics from nvcc (this should be narrowed down / upstreamed before merging) * fix checks * Revert "try suppressing unreachable-code diagnostics from nvcc (this should be narrowed down / upstreamed before merging)" This reverts commit 3ba2201d678cc8befdfcaa0e89630a95a00a78b2. * copyright * move rapids-cmake overrides [skip ci] * kick off a build * fix dependency graph * devcontainer * run all CI * remove testing-only changes [skip ci] --- conda/recipes/libraft/conda_build_config.yaml | 4 ++-- .../raft-ann-bench-cpu/conda_build_config.yaml | 4 ++-- cpp/cmake/thirdparty/get_spdlog.cmake | 11 +---------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml index 00b133c821..5c0047fb9c 100644 --- a/conda/recipes/libraft/conda_build_config.yaml +++ b/conda/recipes/libraft/conda_build_config.yaml @@ -73,7 +73,7 @@ cuda11_cuda_profiler_api_run_version: - ">=11.4.240,<12" spdlog_version: - - ">=1.12.0,<1.13" + - ">=1.14.1,<1.15" fmt_version: - - ">=10.1.1,<11" + - ">=11.0.2,<12" diff --git a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml index 70d1f0490e..ed6f708e14 100644 --- a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml +++ b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml @@ -23,7 +23,7 @@ nlohmann_json_version: - ">=3.11.2" spdlog_version: - - ">=1.12.0,<1.13" + - ">=1.14.1,<1.15" fmt_version: - - ">=10.1.1,<11" + - ">=11.0.2,<12" diff --git a/cpp/cmake/thirdparty/get_spdlog.cmake b/cpp/cmake/thirdparty/get_spdlog.cmake index 7be7804c7e..57e38c2638 100644 --- a/cpp/cmake/thirdparty/get_spdlog.cmake +++ b/cpp/cmake/thirdparty/get_spdlog.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -19,15 +19,6 @@ function(find_and_configure_spdlog) rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET rmm-exports) rapids_export_package(BUILD spdlog rmm-exports) - if(spdlog_ADDED) - rapids_export( - BUILD spdlog - EXPORT_SET spdlog - GLOBAL_TARGETS spdlog spdlog_header_only - NAMESPACE spdlog::) - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root(BUILD spdlog [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET rmm-exports) - endif() endfunction() find_and_configure_spdlog() \ No newline at end of file From 878cefc923f25df07fc362126d1c269bdff30c0c Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:10:57 -0500 Subject: [PATCH 4/6] update update-version.sh to use packaging lib (#2447) --- ci/release/update-version.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 5bb98511cf..032b88b4aa 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -25,8 +25,8 @@ NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} NEXT_UCXX_SHORT_TAG="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG})" # Need to distutils-normalize the original version -NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))") -NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_UCXX_SHORT_TAG}'))") +NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))") +NEXT_UCXX_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_UCXX_SHORT_TAG}'))") echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" From 17757b84c38dfa671abe5e8794fd2bd9bc636772 Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Wed, 25 Sep 2024 13:49:36 -0500 Subject: [PATCH 5/6] Switch traceback to `native` (#2446) In cudf we have observed a ~10% speed up of pytest suite execution by switching pytest traceback to `--native`: ``` currently: 102474 passed, 2117 skipped, 902 xfailed in 892.16s (0:14:52) --tb=short: 102474 passed, 2117 skipped, 902 xfailed in 898.99s (0:14:58) --tb=no: 102474 passed, 2117 skipped, 902 xfailed in 815.98s (0:13:35) --tb=native: 102474 passed, 2117 skipped, 902 xfailed in 820.92s (0:13:40) ``` This PR makes a similar change to `raft` repo. xref: https://github.com/rapidsai/cudf/pull/16851 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2446 --- python/pylibraft/pylibraft/test/pytest.ini | 5 +++++ python/raft-dask/pytest.ini | 1 + python/raft-dask/raft_dask/test/pytest.ini | 5 +++++ 3 files changed, 11 insertions(+) create mode 100644 python/pylibraft/pylibraft/test/pytest.ini create mode 100644 python/raft-dask/raft_dask/test/pytest.ini diff --git a/python/pylibraft/pylibraft/test/pytest.ini b/python/pylibraft/pylibraft/test/pytest.ini new file mode 100644 index 0000000000..bf70c06f84 --- /dev/null +++ b/python/pylibraft/pylibraft/test/pytest.ini @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native + diff --git a/python/raft-dask/pytest.ini b/python/raft-dask/pytest.ini index 2467e2089a..e09c2b173d 100644 --- a/python/raft-dask/pytest.ini +++ b/python/raft-dask/pytest.ini @@ -10,3 +10,4 @@ markers = nccl: marks a test as using NCCL ucx: marks a test as using UCX-Py ucxx: marks a test as using UCXX +addopts = --tb=native diff --git a/python/raft-dask/raft_dask/test/pytest.ini b/python/raft-dask/raft_dask/test/pytest.ini new file mode 100644 index 0000000000..bf70c06f84 --- /dev/null +++ b/python/raft-dask/raft_dask/test/pytest.ini @@ -0,0 +1,5 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +[pytest] +addopts = --tb=native + From 0f8b09720054e5cf7a65dddfa02a2f06e7ba8adb Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Wed, 25 Sep 2024 20:53:54 -0400 Subject: [PATCH 6/6] Disable NN Descent Batch tests temporarily (#2453) Linked issue https://github.com/rapidsai/raft/issues/2450 Authors: - Divye Gala (https://github.com/divyegala) Approvers: - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2453 --- cpp/test/CMakeLists.txt | 4 +++- cpp/test/neighbors/ann_nn_descent.cuh | 18 ++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index a497e6d3ba..5d504d2100 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -440,7 +440,9 @@ if(BUILD_TESTS) neighbors/ann_nn_descent/test_float_uint32_t.cu neighbors/ann_nn_descent/test_int8_t_uint32_t.cu neighbors/ann_nn_descent/test_uint8_t_uint32_t.cu - neighbors/ann_nn_descent/test_batch_float_uint32_t.cu + # TODO: Investigate why this test is failing Reference issue + # https://github.com/rapidsai/raft/issues/2450 + # neighbors/ann_nn_descent/test_batch_float_uint32_t.cu LIB EXPLICIT_INSTANTIATE_ONLY GPUS diff --git a/cpp/test/neighbors/ann_nn_descent.cuh b/cpp/test/neighbors/ann_nn_descent.cuh index 2f9d4e252b..5070d83b15 100644 --- a/cpp/test/neighbors/ann_nn_descent.cuh +++ b/cpp/test/neighbors/ann_nn_descent.cuh @@ -318,13 +318,15 @@ const std::vector inputs = raft::util::itertools::product inputsBatch = - raft::util::itertools::product( - {std::make_pair(0.9, 3lu), std::make_pair(0.9, 2lu)}, // min_recall, n_clusters - {4000, 5000}, // n_rows - {192, 512}, // dim - {32, 64}, // graph_degree - {raft::distance::DistanceType::L2Expanded}, - {false, true}); +// TODO: Investigate why this test is failing +// Reference issue https://github.com/rapidsai/raft/issues/2450 +// const std::vector inputsBatch = +// raft::util::itertools::product( +// {std::make_pair(0.9, 3lu), std::make_pair(0.9, 2lu)}, // min_recall, n_clusters +// {4000, 5000}, // n_rows +// {192, 512}, // dim +// {32, 64}, // graph_degree +// {raft::distance::DistanceType::L2Expanded}, +// {false, true}); } // namespace raft::neighbors::experimental::nn_descent