Skip to content

Commit

Permalink
This PR adds support to __half and nb_bfloat16 to myAtomicReduce (#1585)
Browse files Browse the repository at this point in the history
Authors:
  - Nicolas Blin (https://github.com/Kh4ster)

Approvers:
  - Louis Sugy (https://github.com/Nyrio)
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #1585
  • Loading branch information
Kh4ster authored Jun 10, 2023
1 parent 681008e commit ad0c1c1
Showing 1 changed file with 34 additions and 0 deletions.
34 changes: 34 additions & 0 deletions cpp/include/raft/util/cuda_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
#include <stdint.h>
#include <type_traits>

#if defined(_RAFT_HAS_CUDA)
#include <cuda_bf16.h>
#include <cuda_fp16.h>
#endif

#include <raft/core/cudart_utils.hpp>
#include <raft/core/math.hpp>
#include <raft/core/operators.hpp>
Expand Down Expand Up @@ -79,6 +84,35 @@ DI void myAtomicReduce(float* address, float val, ReduceLambda op)
} while (assumed != old);
}

// Needed for atomicCas on ushort
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 700)
template <typename ReduceLambda>
DI void myAtomicReduce(__half* address, __half val, ReduceLambda op)
{
unsigned short int* address_as_uint = (unsigned short int*)address;
unsigned short int old = *address_as_uint, assumed;
do {
assumed = old;
old = atomicCAS(address_as_uint, assumed, __half_as_ushort(op(val, __ushort_as_half(assumed))));
} while (assumed != old);
}
#endif

// Needed for nv_bfloat16 support
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800)
template <typename ReduceLambda>
DI void myAtomicReduce(nv_bfloat16* address, nv_bfloat16 val, ReduceLambda op)
{
unsigned short int* address_as_uint = (unsigned short int*)address;
unsigned short int old = *address_as_uint, assumed;
do {
assumed = old;
old = atomicCAS(
address_as_uint, assumed, __bfloat16_as_ushort(op(val, __ushort_as_bfloat16(assumed))));
} while (assumed != old);
}
#endif

template <typename ReduceLambda>
DI void myAtomicReduce(int* address, int val, ReduceLambda op)
{
Expand Down

0 comments on commit ad0c1c1

Please sign in to comment.