From fa24d7e3e0ea67d53b7057e07d5bc7e75ffda9ba Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 11 Feb 2022 10:14:14 +0000 Subject: [PATCH] [OpenCL] Add OpenCL 3.0 atomics to -fdeclare-opencl-builtins Add the atomic overloads for the `global` and `local` address spaces, which are new in OpenCL 3.0. Ensure the preexisting `generic` overloads are guarded by the generic address space feature macro. Ensure a subset of the atomic builtins are guarded by the `__opencl_c_atomic_order_seq_cst` and `__opencl_c_atomic_scope_device` feature macros, and enable those macros for SPIR/SPIR-V targets in `opencl-c-base.h`. Also guard the `cl_ext_float_atomics` builtins with the atomic order and scope feature macros. Differential Revision: https://reviews.llvm.org/D119420 (cherry picked from commit 50f8abb9f40a6c4974ec71e760773a711732648f) --- clang/lib/Headers/opencl-c-base.h | 2 + clang/lib/Sema/OpenCLBuiltins.td | 142 +++++++++++------- .../SemaOpenCL/fdeclare-opencl-builtins.cl | 28 ++++ 3 files changed, 118 insertions(+), 54 deletions(-) diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h index ad276dc0f6aae8..5191c41bcd0573 100644 --- a/clang/lib/Headers/opencl-c-base.h +++ b/clang/lib/Headers/opencl-c-base.h @@ -67,6 +67,8 @@ #if (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300) // For the SPIR and SPIR-V target all features are supported. #if defined(__SPIR__) || defined(__SPIRV__) +#define __opencl_c_atomic_order_seq_cst 1 +#define __opencl_c_atomic_scope_device 1 #define __opencl_c_atomic_scope_all_devices 1 #define __opencl_c_read_write_images 1 #endif // defined(__SPIR__) diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td index 4d36df352d5ec5..ab30553005729d 100644 --- a/clang/lib/Sema/OpenCLBuiltins.td +++ b/clang/lib/Sema/OpenCLBuiltins.td @@ -57,6 +57,23 @@ class FunctionExtension : AbstractExtension<_Ext>; // disabled. class TypeExtension : AbstractExtension<_Ext>; +// Concatenate zero or more space-separated extensions in NewExts to Base and +// return the resulting FunctionExtension in ret. +class concatExtension { + FunctionExtension ret = FunctionExtension< + !cond( + // Return Base extension if NewExts is empty, + !empty(NewExts) : Base.ExtName, + + // otherwise, return NewExts if Base extension is empty, + !empty(Base.ExtName) : NewExts, + + // otherwise, concatenate NewExts to Base. + true : Base.ExtName # " " # NewExts + ) + >; +} + // TypeExtension definitions. def NoTypeExt : TypeExtension<"">; def Fp16TypeExt : TypeExtension<"cl_khr_fp16">; @@ -1043,40 +1060,57 @@ let Extension = FuncExtOpenCLCxx in { // OpenCL v2.0 s6.13.11 - Atomic Functions. // An atomic builtin with 2 additional _explicit variants. -multiclass BuiltinAtomicExplicit Types> { +multiclass BuiltinAtomicExplicit Types, FunctionExtension BaseExt> { // Without explicit MemoryOrder or MemoryScope. - def : Builtin; + let Extension = concatExtension.ret in { + def : Builtin; + } // With an explicit MemoryOrder argument. - def : Builtin; + let Extension = concatExtension.ret in { + def : Builtin; + } // With explicit MemoryOrder and MemoryScope arguments. - def : Builtin; + let Extension = BaseExt in { + def : Builtin; + } } // OpenCL 2.0 atomic functions that have a pointer argument in a given address space. -multiclass OpenCL2Atomics { +multiclass OpenCL2Atomics { foreach TypePair = [[AtomicInt, Int], [AtomicUInt, UInt], [AtomicLong, Long], [AtomicULong, ULong], [AtomicFloat, Float], [AtomicDouble, Double]] in { def : Builtin<"atomic_init", [Void, PointerType, addrspace>, TypePair[1]]>; defm : BuiltinAtomicExplicit<"atomic_store", - [Void, PointerType, addrspace>, TypePair[1]]>; + [Void, PointerType, addrspace>, TypePair[1]], BaseExt>; defm : BuiltinAtomicExplicit<"atomic_load", - [TypePair[1], PointerType, addrspace>]>; + [TypePair[1], PointerType, addrspace>], BaseExt>; defm : BuiltinAtomicExplicit<"atomic_exchange", - [TypePair[1], PointerType, addrspace>, TypePair[1]]>; + [TypePair[1], PointerType, addrspace>, TypePair[1]], BaseExt>; foreach Variant = ["weak", "strong"] in { - def : Builtin<"atomic_compare_exchange_" # Variant, - [Bool, PointerType, addrspace>, - PointerType, TypePair[1]]>; - def : Builtin<"atomic_compare_exchange_" # Variant # "_explicit", - [Bool, PointerType, addrspace>, - PointerType, TypePair[1], MemoryOrder, MemoryOrder]>; - def : Builtin<"atomic_compare_exchange_" # Variant # "_explicit", - [Bool, PointerType, addrspace>, - PointerType, TypePair[1], MemoryOrder, MemoryOrder, MemoryScope]>; + foreach exp_ptr_addrspace = !cond( + !eq(BaseExt, FuncExtOpenCLCGenericAddressSpace): [GenericAS], + !eq(BaseExt, FuncExtOpenCLCNamedAddressSpaceBuiltins): [GlobalAS, LocalAS, PrivateAS]) + in { + let Extension = concatExtension.ret in { + def : Builtin<"atomic_compare_exchange_" # Variant, + [Bool, PointerType, addrspace>, + PointerType, TypePair[1]]>; + } + let Extension = concatExtension.ret in { + def : Builtin<"atomic_compare_exchange_" # Variant # "_explicit", + [Bool, PointerType, addrspace>, + PointerType, TypePair[1], MemoryOrder, MemoryOrder]>; + } + let Extension = BaseExt in { + def : Builtin<"atomic_compare_exchange_" # Variant # "_explicit", + [Bool, PointerType, addrspace>, + PointerType, TypePair[1], MemoryOrder, MemoryOrder, MemoryScope]>; + } + } } } @@ -1085,68 +1119,68 @@ multiclass OpenCL2Atomics { [AtomicUIntPtr, UIntPtr, PtrDiff]] in { foreach ModOp = ["add", "sub"] in { defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, - [TypePair[1], PointerType, addrspace>, TypePair[2]]>; + [TypePair[1], PointerType, addrspace>, TypePair[2]], BaseExt>; } } foreach TypePair = [[AtomicInt, Int, Int], [AtomicUInt, UInt, UInt], [AtomicLong, Long, Long], [AtomicULong, ULong, ULong]] in { foreach ModOp = ["or", "xor", "and", "min", "max"] in { defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, - [TypePair[1], PointerType, addrspace>, TypePair[2]]>; + [TypePair[1], PointerType, addrspace>, TypePair[2]], BaseExt>; } } defm : BuiltinAtomicExplicit<"atomic_flag_clear", - [Void, PointerType, addrspace>]>; + [Void, PointerType, addrspace>], BaseExt>; defm : BuiltinAtomicExplicit<"atomic_flag_test_and_set", - [Bool, PointerType, addrspace>]>; + [Bool, PointerType, addrspace>], BaseExt>; } let MinVersion = CL20 in { def : Builtin<"atomic_work_item_fence", [Void, MemFenceFlags, MemoryOrder, MemoryScope]>; - defm : OpenCL2Atomics; + defm : OpenCL2Atomics; + defm : OpenCL2Atomics; + defm : OpenCL2Atomics; } // The functionality added by cl_ext_float_atomics extension let MinVersion = CL20 in { foreach addrspace = [GlobalAS, LocalAS, GenericAS] in { - let Extension = !cast("FuncExtFloatAtomicsFp16" # addrspace # "LoadStore") in { - defm : BuiltinAtomicExplicit<"atomic_store", - [Void, PointerType, addrspace>, AtomicHalf]>; - defm : BuiltinAtomicExplicit<"atomic_load", - [Half, PointerType, addrspace>]>; - defm : BuiltinAtomicExplicit<"atomic_exchange", - [Half, PointerType, addrspace>, Half]>; - } + defvar extension_fp16 = !cast("FuncExtFloatAtomicsFp16" # addrspace # "LoadStore"); + + defm : BuiltinAtomicExplicit<"atomic_store", + [Void, PointerType, addrspace>, AtomicHalf], extension_fp16>; + defm : BuiltinAtomicExplicit<"atomic_load", + [Half, PointerType, addrspace>], extension_fp16>; + defm : BuiltinAtomicExplicit<"atomic_exchange", + [Half, PointerType, addrspace>, Half], extension_fp16>; + foreach ModOp = ["add", "sub"] in { - let Extension = !cast("FuncExtFloatAtomicsFp16" # addrspace # "Add") in { - defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, - [Half, PointerType, addrspace>, Half]>; - } - let Extension = !cast("FuncExtFloatAtomicsFp32" # addrspace # "Add") in { - defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, - [Float, PointerType, addrspace>, Float]>; - } - let Extension = !cast("FuncExtFloatAtomicsFp64" # addrspace # "Add") in { - defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, - [Double, PointerType, addrspace>, Double]>; - } + defvar extension_fp16 = !cast("FuncExtFloatAtomicsFp16" # addrspace # "Add"); + defvar extension_fp32 = !cast("FuncExtFloatAtomicsFp32" # addrspace # "Add"); + defvar extension_fp64 = !cast("FuncExtFloatAtomicsFp64" # addrspace # "Add"); + + defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, + [Half, PointerType, addrspace>, Half], extension_fp16>; + defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, + [Float, PointerType, addrspace>, Float], extension_fp32>; + defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, + [Double, PointerType, addrspace>, Double], extension_fp64>; } + foreach ModOp = ["min", "max"] in { - let Extension = !cast("FuncExtFloatAtomicsFp16" # addrspace # "MinMax") in { - defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, - [Half, PointerType, addrspace>, Half]>; - } - let Extension = !cast("FuncExtFloatAtomicsFp32" # addrspace # "MinMax") in { - defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, - [Float, PointerType, addrspace>, Float]>; - } - let Extension = !cast("FuncExtFloatAtomicsFp64" # addrspace # "MinMax") in { - defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, - [Double, PointerType, addrspace>, Double]>; - } + defvar extension_fp16 = !cast("FuncExtFloatAtomicsFp16" # addrspace # "MinMax"); + defvar extension_fp32 = !cast("FuncExtFloatAtomicsFp32" # addrspace # "MinMax"); + defvar extension_fp64 = !cast("FuncExtFloatAtomicsFp64" # addrspace # "MinMax"); + + defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, + [Half, PointerType, addrspace>, Half], extension_fp16>; + defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, + [Float, PointerType, addrspace>, Float], extension_fp32>; + defm : BuiltinAtomicExplicit<"atomic_fetch_" # ModOp, + [Double, PointerType, addrspace>, Double], extension_fp64>; } } } diff --git a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl index d1a4f728372604..d526c32d65a920 100644 --- a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl +++ b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl @@ -9,6 +9,7 @@ // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++ -fdeclare-opencl-builtins -finclude-default-header // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++2021 -fdeclare-opencl-builtins -finclude-default-header // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header -cl-ext=-cl_khr_fp64 -DNO_FP64 +// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL3.0 -fdeclare-opencl-builtins -finclude-default-header -DNO_ATOMSCOPE // Test the -fdeclare-opencl-builtins option. This is not a completeness // test, so it should not test for all builtins defined by OpenCL. Instead @@ -80,6 +81,11 @@ typedef struct {int a;} ndrange_t; #define __opencl_c_read_write_images 1 #endif +#if (__OPENCL_CPP_VERSION__ == 100 || __OPENCL_C_VERSION__ == 200) +#define __opencl_c_atomic_order_seq_cst 1 +#define __opencl_c_atomic_scope_device 1 +#endif + #define __opencl_c_named_address_space_builtins 1 #endif @@ -98,6 +104,7 @@ kernel void test_pointers(volatile global void *global_p, global const int4 *a) #if !defined(NO_HEADER) && (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) kernel void test_enum_args(volatile global atomic_int *global_p, global int *expected) { int desired; + atomic_work_item_fence(CLK_GLOBAL_MEM_FENCE, memory_order_acq_rel, memory_scope_device); atomic_compare_exchange_strong_explicit(global_p, expected, desired, memory_order_acq_rel, memory_order_relaxed, @@ -156,6 +163,27 @@ void test_atomic_fetch_with_address_space(volatile __generic atomic_float *a_flo } #endif // !defined(NO_HEADER) && __OPENCL_C_VERSION__ >= 200 +#if defined(NO_ATOMSCOPE) && __OPENCL_C_VERSION__ >= 300 +// Disable the feature by undefining the feature macro. +#undef __opencl_c_atomic_scope_device + +// Test that only the overload with explicit order and scope arguments is +// available when the __opencl_c_atomic_scope_device feature is disabled. +void test_atomics_without_scope_device(volatile __generic atomic_int *a_int) { + int d; + + atomic_exchange(a_int, d); + // expected-error@-1{{implicit declaration of function 'atomic_exchange' is invalid in OpenCL}} + + atomic_exchange_explicit(a_int, d, memory_order_seq_cst); + // expected-error@-1{{no matching function for call to 'atomic_exchange_explicit'}} + // expected-note@-2 + {{candidate function not viable}} + + atomic_exchange_explicit(a_int, d, memory_order_seq_cst, memory_scope_work_group); +} + +#endif + // Test old atomic overloaded with generic address space in C++ for OpenCL. #if __OPENCL_C_VERSION__ >= 200 void test_legacy_atomics_cpp(__generic volatile unsigned int *a) {