From 1e4961e2354afba116e3479c5ec9041937b9922e Mon Sep 17 00:00:00 2001 From: JiefengWang <146178560+JieFengWang@users.noreply.github.com> Date: Wed, 17 Jan 2024 20:14:57 +0800 Subject: [PATCH] Fix compile failure on RTX 4090 (#2076) [bug]Fix compile failure on RTX 4090. related issue (#2073) Authors: - JiefengWang (https://github.com/JieFengWang) - William Hicks (https://github.com/wphicks) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - William Hicks (https://github.com/wphicks) - Corey J. Nolet (https://github.com/cjnolet) URL: https://github.com/rapidsai/raft/pull/2076 --- cpp/include/raft/neighbors/detail/nn_descent.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh index 6e0636c37a..f624a6015b 100644 --- a/cpp/include/raft/neighbors/detail/nn_descent.cuh +++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -686,12 +686,12 @@ __device__ __forceinline__ void remove_duplicates( // Per // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#features-and-technical-specifications, // MAX_RESIDENT_THREAD_PER_SM = BLOCK_SIZE * BLOCKS_PER_SM = 2048 -// For architectures 750 and 860, the values for MAX_RESIDENT_THREAD_PER_SM +// For architectures 750 and 860 (890), the values for MAX_RESIDENT_THREAD_PER_SM // is 1024 and 1536 respectively, which means the bounds don't work anymore template > RAFT_KERNEL #ifdef __CUDA_ARCH__ -#if (__CUDA_ARCH__) == 750 || (__CUDA_ARCH__) == 860 +#if (__CUDA_ARCH__) == 750 || ((__CUDA_ARCH__) >= 860 && (__CUDA_ARCH__) <= 890) __launch_bounds__(BLOCK_SIZE) #else __launch_bounds__(BLOCK_SIZE, 4)