diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 1de80e0460752..192a62b0035b4 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1127,9 +1127,7 @@ def OpenMP : DiagGroup<"openmp", [ ]>; // SYCL warnings -def Sycl2017Compat : DiagGroup<"sycl-2017-compat">; -def Sycl2020Compat : DiagGroup<"sycl-2020-compat">; -def SyclStrict : DiagGroup<"sycl-strict", [ Sycl2017Compat, Sycl2020Compat]>; +def SyclStrict : DiagGroup<"sycl-strict">; def SyclTarget : DiagGroup<"sycl-target">; // Backend warnings. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f900d226c374f..5a2f8810b59a5 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10979,12 +10979,6 @@ def warn_boolean_attribute_argument_is_not_valid: Warning< def err_sycl_attibute_cannot_be_applied_here : Error<"%0 attribute cannot be applied to a " "static function or function in an anonymous namespace">; -def warn_sycl_pass_by_value_deprecated - : Warning<"Passing kernel functions by value is deprecated in SYCL 2020">, - InGroup; -def warn_sycl_pass_by_reference_future - : Warning<"Passing of kernel functions by reference is a SYCL 2020 extension">, - InGroup; def warn_sycl_attibute_function_raw_ptr : Warning<"SYCL 1.2.1 specification does not allow %0 attribute applied " "to a function with a raw pointer " diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index e3b6adaaf7b4c..b64c7105a142f 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2587,7 +2587,6 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, if (const Arg *A = Args.getLastArg(OPT_sycl_std_EQ)) { Opts.SYCLVersion = llvm::StringSwitch(A->getValue()) .Cases("2017", "1.2.1", "121", "sycl-1.2.1", 2017) - .Case("2020", 2020) .Default(0U); if (Opts.SYCLVersion == 0U) { diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index c8fd21d3eccc6..ad5a3768846c7 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -698,38 +698,7 @@ getKernelInvocationKind(FunctionDecl *KernelCallerFunc) { } static const CXXRecordDecl *getKernelObjectType(FunctionDecl *Caller) { - QualType KernelParamTy = (*Caller->param_begin())->getType(); - // In SYCL 2020 kernels are now passed by reference. - if (KernelParamTy->isReferenceType()) - return KernelParamTy->getPointeeCXXRecordDecl(); - - // SYCL 1.2.1 - return KernelParamTy->getAsCXXRecordDecl(); -} - -static void checkKernelAndCaller(Sema &SemaRef, FunctionDecl *Caller, - const CXXRecordDecl *KernelObj) { - // check captures - if (KernelObj->isLambda()) { - for (const LambdaCapture &LC : KernelObj->captures()) - if (LC.capturesThis() && LC.isImplicit()) - SemaRef.Diag(LC.getLocation(), diag::err_implicit_this_capture); - } - - // check that calling kernel conforms to spec - assert(Caller->param_size() >= 1 && "missing kernel function argument."); - QualType KernelParamTy = (*Caller->param_begin())->getType(); - if (KernelParamTy->isReferenceType()) { - // passing by reference, so emit warning if not using SYCL 2020 - if (SemaRef.LangOpts.SYCLVersion < 2020) - SemaRef.Diag(Caller->getLocation(), - diag::warn_sycl_pass_by_reference_future); - } else { - // passing by value. emit warning if using SYCL 2020 or greater - if (SemaRef.LangOpts.SYCLVersion > 2017) - SemaRef.Diag(Caller->getLocation(), - diag::warn_sycl_pass_by_value_deprecated); - } + return (*Caller->param_begin())->getType()->getAsCXXRecordDecl(); } /// Creates a kernel parameter descriptor @@ -1944,8 +1913,11 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, constructKernelName(*this, KernelCallerFunc, MC); StringRef KernelName(getLangOpts().SYCLUnnamedLambda ? StableName : CalculatedName); - - checkKernelAndCaller(*this, KernelCallerFunc, KernelObj); + if (KernelObj->isLambda()) { + for (const LambdaCapture &LC : KernelObj->captures()) + if (LC.capturesThis() && LC.isImplicit()) + Diag(LC.getLocation(), diag::err_implicit_this_capture); + } SyclKernelFieldChecker checker(*this); SyclKernelDeclCreator kernel_decl( *this, checker, KernelName, KernelObj->getLocation(), diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 6d2c08b4ce31e..3184c58edcbfc 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -260,26 +260,26 @@ class spec_constant { #define ATTR_SYCL_KERNEL __attribute__((sycl_kernel)) template -ATTR_SYCL_KERNEL void kernel_single_task(const KernelType &kernelFunc) { +ATTR_SYCL_KERNEL void kernel_single_task(KernelType kernelFunc) { kernelFunc(); } template ATTR_SYCL_KERNEL void -kernel_parallel_for(const KernelType &KernelFunc) { +kernel_parallel_for(KernelType KernelFunc) { KernelFunc(id()); } template ATTR_SYCL_KERNEL void -kernel_parallel_for_work_group(const KernelType &KernelFunc) { +kernel_parallel_for_work_group(KernelType KernelFunc) { KernelFunc(group()); } class handler { public: template - void parallel_for(range numWorkItems, const KernelType &kernelFunc) { + void parallel_for(range numWorkItems, KernelType kernelFunc) { using NameT = typename get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ kernel_parallel_for(kernelFunc); @@ -289,7 +289,7 @@ class handler { } template - void parallel_for_work_group(range numWorkGroups, range WorkGroupSize, const KernelType &kernelFunc) { + void parallel_for_work_group(range numWorkGroups, range WorkGroupSize, KernelType kernelFunc) { using NameT = typename get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ kernel_parallel_for_work_group(kernelFunc); @@ -300,7 +300,7 @@ class handler { } template - void single_task(const KernelType &kernelFunc) { + void single_task(KernelType kernelFunc) { using NameT = typename get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ kernel_single_task(kernelFunc); diff --git a/clang/test/CodeGenSYCL/address-space-cond-op.cpp b/clang/test/CodeGenSYCL/address-space-cond-op.cpp index b2428dca72dfd..b43ed4e380f49 100644 --- a/clang/test/CodeGenSYCL/address-space-cond-op.cpp +++ b/clang/test/CodeGenSYCL/address-space-cond-op.cpp @@ -24,7 +24,7 @@ S foo(bool cond, S &lhs, S rhs) { } template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/address-space-new.cpp b/clang/test/CodeGenSYCL/address-space-new.cpp index 1caf5d49dd206..827215cec8ef4 100644 --- a/clang/test/CodeGenSYCL/address-space-new.cpp +++ b/clang/test/CodeGenSYCL/address-space-new.cpp @@ -110,11 +110,13 @@ void test() { // CHECK: call spir_func void @{{.*}}bar{{.*}}(%struct.{{.*}}.HasX addrspace(4)* align 4 dereferenceable(4) %[[SECOND]]) } + template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } + int main() { kernel_single_task([]() { test(); }); return 0; diff --git a/clang/test/CodeGenSYCL/address-space-of-returns.cpp b/clang/test/CodeGenSYCL/address-space-of-returns.cpp index 24bd762bb28d8..69104c261ddf7 100644 --- a/clang/test/CodeGenSYCL/address-space-of-returns.cpp +++ b/clang/test/CodeGenSYCL/address-space-of-returns.cpp @@ -29,7 +29,7 @@ A ret_agg() { // CHECK: define spir_func void @{{.*}}ret_agg{{.*}}(%struct.{{.*}}.A addrspace(4)* {{.*}} %agg.result) template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp b/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp index cbb645009bb34..632453486c2f9 100644 --- a/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp +++ b/clang/test/CodeGenSYCL/address-space-parameter-conversions.cpp @@ -195,7 +195,7 @@ void usages2() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } int main() { diff --git a/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp b/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp index 7c89a0154ec27..31795fc73b776 100644 --- a/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp +++ b/clang/test/CodeGenSYCL/basic-kernel-wrapper.cpp @@ -6,7 +6,7 @@ #include "sycl.hpp" template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/const-wg-init.cpp b/clang/test/CodeGenSYCL/const-wg-init.cpp index a31a008b17520..07a2a746dffbd 100644 --- a/clang/test/CodeGenSYCL/const-wg-init.cpp +++ b/clang/test/CodeGenSYCL/const-wg-init.cpp @@ -4,7 +4,7 @@ template __attribute__((sycl_kernel)) void -kernel_parallel_for_work_group(const KernelType &KernelFunc) { +kernel_parallel_for_work_group(KernelType KernelFunc) { cl::sycl::group<1> G; KernelFunc(G); } diff --git a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp index 5cfd9a939913d..fcff6aae4f763 100644 --- a/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp +++ b/clang/test/CodeGenSYCL/debug-info-srcpos-kernel.cpp @@ -11,7 +11,7 @@ #include template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/device-functions.cpp b/clang/test/CodeGenSYCL/device-functions.cpp index f2c180a80ccf3..d52ba4c13a7f7 100644 --- a/clang/test/CodeGenSYCL/device-functions.cpp +++ b/clang/test/CodeGenSYCL/device-functions.cpp @@ -13,7 +13,7 @@ T bar(T arg) { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/device-variables.cpp b/clang/test/CodeGenSYCL/device-variables.cpp index e02ac0f6ae42b..83d6c4258995c 100644 --- a/clang/test/CodeGenSYCL/device-variables.cpp +++ b/clang/test/CodeGenSYCL/device-variables.cpp @@ -11,7 +11,7 @@ static constexpr int my_array[1] = {42}; void foo(const test_type &) {} template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/emit-kernel-in-virtual-func.cpp b/clang/test/CodeGenSYCL/emit-kernel-in-virtual-func.cpp index 2c4734b509c60..081d6b5d93c70 100644 --- a/clang/test/CodeGenSYCL/emit-kernel-in-virtual-func.cpp +++ b/clang/test/CodeGenSYCL/emit-kernel-in-virtual-func.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/emit-kernel-in-virtual-func2.cpp b/clang/test/CodeGenSYCL/emit-kernel-in-virtual-func2.cpp index 15d11b379fa2d..2120176c5cd3a 100644 --- a/clang/test/CodeGenSYCL/emit-kernel-in-virtual-func2.cpp +++ b/clang/test/CodeGenSYCL/emit-kernel-in-virtual-func2.cpp @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { - kernelFunc(); +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); } template diff --git a/clang/test/CodeGenSYCL/esimd_metadata1.cpp b/clang/test/CodeGenSYCL/esimd_metadata1.cpp index 0f776e9b43ddf..554f508ef4f32 100644 --- a/clang/test/CodeGenSYCL/esimd_metadata1.cpp +++ b/clang/test/CodeGenSYCL/esimd_metadata1.cpp @@ -9,7 +9,7 @@ // 3. Proper module !spirv.Source metadata is generated template -void kernel(const Func &f) __attribute__((sycl_kernel)) { +void kernel(Func f) __attribute__((sycl_kernel)) { f(); } diff --git a/clang/test/CodeGenSYCL/fpga_pipes.cpp b/clang/test/CodeGenSYCL/fpga_pipes.cpp index 237f9988f5893..bf1ed38afe24f 100644 --- a/clang/test/CodeGenSYCL/fpga_pipes.cpp +++ b/clang/test/CodeGenSYCL/fpga_pipes.cpp @@ -45,7 +45,7 @@ class pipe { }; template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/inheritance.cpp b/clang/test/CodeGenSYCL/inheritance.cpp index f648b244e552d..4ac785336fb39 100644 --- a/clang/test/CodeGenSYCL/inheritance.cpp +++ b/clang/test/CodeGenSYCL/inheritance.cpp @@ -24,7 +24,7 @@ struct base { struct derived : base, second_base { int a; - void operator()() const { + void operator()() { } }; diff --git a/clang/test/CodeGenSYCL/inline_asm.cpp b/clang/test/CodeGenSYCL/inline_asm.cpp index e5bb809c73952..60015f8ca6a82 100644 --- a/clang/test/CodeGenSYCL/inline_asm.cpp +++ b/clang/test/CodeGenSYCL/inline_asm.cpp @@ -3,7 +3,7 @@ class kernel; template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { // CHECK: %[[ARRAY_A:[0-9a-z]+]] = alloca [100 x i32], align 4 // CHECK: %[[IDX:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[ARRAY_A]], i64 0, i64 0 int a[100], i = 0; diff --git a/clang/test/CodeGenSYCL/inlining.cpp b/clang/test/CodeGenSYCL/inlining.cpp index a816d16f88a01..da01f2b70a8cb 100644 --- a/clang/test/CodeGenSYCL/inlining.cpp +++ b/clang/test/CodeGenSYCL/inlining.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice %s -S -emit-llvm -o - | FileCheck %s template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/int_header1.cpp b/clang/test/CodeGenSYCL/int_header1.cpp index 76e37c4a45b5e..65b1b6cdab056 100644 --- a/clang/test/CodeGenSYCL/int_header1.cpp +++ b/clang/test/CodeGenSYCL/int_header1.cpp @@ -22,7 +22,7 @@ #include "sycl.hpp" template -__attribute__((sycl_kernel)) void kernel_single_task(const KernelType &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(KernelType kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/int_header_inline_ns.cpp b/clang/test/CodeGenSYCL/int_header_inline_ns.cpp index efe4e12e98530..dedeb0345f0c5 100644 --- a/clang/test/CodeGenSYCL/int_header_inline_ns.cpp +++ b/clang/test/CodeGenSYCL/int_header_inline_ns.cpp @@ -8,7 +8,7 @@ #include "sycl.hpp" template -__attribute__((sycl_kernel)) void kernel_single_task(const KernelType &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(KernelType kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/integration_header.cpp b/clang/test/CodeGenSYCL/integration_header.cpp index a0859f4925c43..c63e64a37f117 100644 --- a/clang/test/CodeGenSYCL/integration_header.cpp +++ b/clang/test/CodeGenSYCL/integration_header.cpp @@ -68,7 +68,7 @@ #include "sycl.hpp" template -__attribute__((sycl_kernel)) void kernel_single_task(const KernelType &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(KernelType kernelFunc) { kernelFunc(); } struct x {}; diff --git a/clang/test/CodeGenSYCL/intel-fpga-ivdep-array.cpp b/clang/test/CodeGenSYCL/intel-fpga-ivdep-array.cpp index a59d4733e59c3..707eeff1ea392 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-ivdep-array.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-ivdep-array.cpp @@ -178,7 +178,7 @@ void ivdep_struct() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/intel-fpga-ivdep-embedded-loops.cpp b/clang/test/CodeGenSYCL/intel-fpga-ivdep-embedded-loops.cpp index 72434fe3480af..c90671c8f04a0 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-ivdep-embedded-loops.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-ivdep-embedded-loops.cpp @@ -148,7 +148,7 @@ void ivdep_embedded_multiple_dimensions() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/intel-fpga-ivdep-global.cpp b/clang/test/CodeGenSYCL/intel-fpga-ivdep-global.cpp index c6a0298336136..df745e91d63d8 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-ivdep-global.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-ivdep-global.cpp @@ -80,7 +80,7 @@ void ivdep_conflicting_safelen() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/intel-fpga-local.cpp b/clang/test/CodeGenSYCL/intel-fpga-local.cpp index ad02d32e2aca1..41a483f33babc 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-local.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-local.cpp @@ -256,7 +256,7 @@ void field_addrspace_cast() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/intel-fpga-loops.cpp b/clang/test/CodeGenSYCL/intel-fpga-loops.cpp index ce7fb46b9c66c..0556feadcf7a2 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-loops.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-loops.cpp @@ -95,7 +95,7 @@ void speculated_iterations() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp b/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp index abc75cf10dd85..e330fffb8eb65 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-mem-builtin.cpp @@ -71,7 +71,7 @@ void foo(float *A, int *B, State *C, State &D) { // CHECK-DAG: attributes [[ATT]] = { readnone } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/intel-fpga-no-global-work-offset.cpp b/clang/test/CodeGenSYCL/intel-fpga-no-global-work-offset.cpp index 3b2ffd1cae5e4..d1352b190fa94 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-no-global-work-offset.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-no-global-work-offset.cpp @@ -2,11 +2,11 @@ class Foo { public: - [[intelfpga::no_global_work_offset(1)]] void operator()() const {} + [[intelfpga::no_global_work_offset(1)]] void operator()() {} }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/intel-fpga-reg.cpp b/clang/test/CodeGenSYCL/intel-fpga-reg.cpp index 066ccc4064933..9428243813f40 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-reg.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-reg.cpp @@ -143,7 +143,7 @@ void foo() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/intel-max-global-work-dim.cpp b/clang/test/CodeGenSYCL/intel-max-global-work-dim.cpp index 0fcd1ab014864..5208db6ec3908 100644 --- a/clang/test/CodeGenSYCL/intel-max-global-work-dim.cpp +++ b/clang/test/CodeGenSYCL/intel-max-global-work-dim.cpp @@ -2,11 +2,11 @@ class Foo { public: - [[intelfpga::max_global_work_dim(1)]] void operator()() const {} + [[intelfpga::max_global_work_dim(1)]] void operator()() {} }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/intel-max-work-group-size.cpp b/clang/test/CodeGenSYCL/intel-max-work-group-size.cpp index e9b4360701e09..13bbb54f34198 100644 --- a/clang/test/CodeGenSYCL/intel-max-work-group-size.cpp +++ b/clang/test/CodeGenSYCL/intel-max-work-group-size.cpp @@ -2,11 +2,11 @@ class Foo { public: - [[intelfpga::max_work_group_size(1, 1, 1)]] void operator()() const {} + [[intelfpga::max_work_group_size(1, 1, 1)]] void operator()() {} }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/intel-restrict.cpp b/clang/test/CodeGenSYCL/intel-restrict.cpp index f3f01ec671bba..06d9d7ef4d59f 100644 --- a/clang/test/CodeGenSYCL/intel-restrict.cpp +++ b/clang/test/CodeGenSYCL/intel-restrict.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device %s -emit-llvm -triple spir64-unknown-unknown-sycldevice -o - | FileCheck %s template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/kernel-by-reference.cpp b/clang/test/CodeGenSYCL/kernel-by-reference.cpp deleted file mode 100644 index 575ed9af6dac5..0000000000000 --- a/clang/test/CodeGenSYCL/kernel-by-reference.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// RUN: %clang_cc1 -triple spir64 -fsycl -fsycl-is-device -verify -fsyntax-only -sycl-std=2017 -DSYCL2017 %s -// RUN: %clang_cc1 -triple spir64 -fsycl -fsycl-is-device -verify -fsyntax-only -sycl-std=2020 -DSYCL2020 %s -// RUN: %clang_cc1 -triple spir64 -fsycl -fsycl-is-device -verify -fsyntax-only -Wno-sycl-strict -DNODIAG %s -// RUN: %clang_cc1 -triple spir64 -fsycl -fsycl-is-device -verify -fsyntax-only -sycl-std=2020 -Wno-sycl-strict -DNODIAG %s - -// SYCL 1.2/2017 - kernel functions passed directly. (Also no const requirement, though mutable lambdas never supported) -template -#if defined(SYCL2020) -// expected-warning@+2 {{Passing kernel functions by value is deprecated in SYCL 2020}} -#endif -__attribute__((sycl_kernel)) void sycl_2017_single_task(Func kernelFunc) { - kernelFunc(); -} - -// SYCL 2020 - kernel functions are passed by reference. -template -#if defined(SYCL2017) -// expected-warning@+2 {{Passing of kernel functions by reference is a SYCL 2020 extension}} -#endif -__attribute__((sycl_kernel)) void sycl_2020_single_task(const Func &kernelFunc) { - kernelFunc(); -} - -int do_nothing(int i) { - return i + 1; -} - -// ensure both compile. -int main() { - sycl_2017_single_task([]() { - do_nothing(10); - }); - - sycl_2020_single_task([]() { - do_nothing(11); - }); - - return 0; -} -#if defined(NODIAG) -// expected-no-diagnostics -#endif \ No newline at end of file diff --git a/clang/test/CodeGenSYCL/kernel-name.cpp b/clang/test/CodeGenSYCL/kernel-name.cpp index 167203fd266a7..69298f9f8b164 100644 --- a/clang/test/CodeGenSYCL/kernel-name.cpp +++ b/clang/test/CodeGenSYCL/kernel-name.cpp @@ -9,7 +9,7 @@ inline namespace cl { using namespace cl::sycl; template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp index 7f458efb36a57..8c2cfb2a1bd8b 100644 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -37,7 +37,7 @@ using namespace cl::sycl; template -__attribute__((sycl_kernel)) void a_kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp index cf17a9d7e3e83..ec8ac8bc01f5f 100644 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp @@ -7,7 +7,7 @@ using namespace cl::sycl; template -__attribute__((sycl_kernel)) void a_kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp index db2ffcfb13fd5..f5f679f7d3650 100644 --- a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp @@ -37,7 +37,7 @@ using namespace cl::sycl; template -__attribute__((sycl_kernel)) void a_kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp index b77eecfc85d68..1b1b25dcd3ff4 100644 --- a/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp @@ -7,7 +7,7 @@ using namespace cl::sycl; template -__attribute__((sycl_kernel)) void a_kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp index b0dc2132fb06a..d4a5c8d5995a0 100755 --- a/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp @@ -39,7 +39,7 @@ using namespace cl::sycl; template -__attribute__((sycl_kernel)) void a_kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp index 78fb63c714768..3ff1669dc4d17 100644 --- a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp @@ -7,7 +7,7 @@ using namespace cl::sycl; template -__attribute__((sycl_kernel)) void a_kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/kernel_functor.cpp b/clang/test/CodeGenSYCL/kernel_functor.cpp index b75e4d465f888..8712895c20656 100644 --- a/clang/test/CodeGenSYCL/kernel_functor.cpp +++ b/clang/test/CodeGenSYCL/kernel_functor.cpp @@ -9,9 +9,32 @@ constexpr auto sycl_read_write = cl::sycl::access::mode::read_write; constexpr auto sycl_global_buffer = cl::sycl::access::target::global_buffer; // Case 1: +// - functor class is defined in an anonymous namespace +// - the '()' operator: +// * does not have parameters (to be used in 'single_task'). +// * has no 'const' qualifier +namespace { + class Functor1 { + public: + Functor1(int X_, cl::sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + void operator()() { + Acc.use(X); + } + + private: + int X; + cl::sycl::accessor Acc; + }; +} + +// Case 2: // - functor class is defined in a namespace // - the '()' operator: // * does not have parameters (to be used in 'single_task'). +// * has the 'const' qualifier namespace ns { class Functor2 { public: @@ -29,10 +52,31 @@ namespace ns { }; } -// Case 2: +// Case 3: // - functor class is templated and defined in the translation unit scope // - the '()' operator: // * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). +// * has no 'const' qualifier +template class TmplFunctor { +public: + TmplFunctor(T X_, cl::sycl::accessor &Acc_) : + X(X_), Acc(Acc_) + {} + + void operator()(cl::sycl::id<1> id) { + Acc.use(id, X); + } + +private: + T X; + cl::sycl::accessor Acc; +}; + +// Case 4: +// - functor class is templated and defined in the translation unit scope +// - the '()' operator: +// * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). +// * has the 'const' qualifier template class TmplConstFunctor { public: TmplConstFunctor(T X_, cl::sycl::accessor &Acc_) : @@ -55,6 +99,12 @@ int foo(int X) { cl::sycl::queue Q; cl::sycl::buffer Buf(A, 1); + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.get_access(cgh); + Functor1 F(X, Acc); + + cgh.single_task(F); + }); Q.submit([&](cl::sycl::handler& cgh) { auto Acc = Buf.get_access(cgh); ns::Functor2 F(X, Acc); @@ -79,6 +129,13 @@ template T bar(T X) { { cl::sycl::queue Q; cl::sycl::buffer Buf(A, ARR_LEN(A)); + + Q.submit([&](cl::sycl::handler& cgh) { + auto Acc = Buf.template get_access(cgh); + TmplFunctor F(X, Acc); + + cgh.parallel_for(cl::sycl::range<1>(ARR_LEN(A)), F); + }); // Spice with lambdas to make sure functors and lambdas work together. Q.submit([&](cl::sycl::handler& cgh) { auto Acc = Buf.template get_access(cgh); @@ -108,8 +165,12 @@ int main() { const int Gold2 = 80; #ifndef __SYCL_DEVICE_ONLY__ + cl::sycl::detail::KernelInfo::getName(); + // CHECK: Functor1 cl::sycl::detail::KernelInfo::getName(); // CHECK: ns::Functor2 + cl::sycl::detail::KernelInfo>::getName(); + // CHECK: TmplFunctor cl::sycl::detail::KernelInfo>::getName(); // CHECK: TmplConstFunctor #endif // __SYCL_DEVICE_ONLY__ diff --git a/clang/test/CodeGenSYCL/kernel_name_with_typedefs.cpp b/clang/test/CodeGenSYCL/kernel_name_with_typedefs.cpp index 543b3ddfc3f5b..7a1bb7bb447d4 100644 --- a/clang/test/CodeGenSYCL/kernel_name_with_typedefs.cpp +++ b/clang/test/CodeGenSYCL/kernel_name_with_typedefs.cpp @@ -4,12 +4,12 @@ #include "sycl.hpp" template -__attribute__((sycl_kernel)) void single_task(const KernelType &kernelFunc) { +__attribute__((sycl_kernel)) void single_task(KernelType kernelFunc) { kernelFunc(); } struct dummy_functor { - void operator()() const {} + void operator()() {} }; typedef int int_t; diff --git a/clang/test/CodeGenSYCL/kernelname-enum.cpp b/clang/test/CodeGenSYCL/kernelname-enum.cpp index 3529fa1534b00..8580f1c532794 100644 --- a/clang/test/CodeGenSYCL/kernelname-enum.cpp +++ b/clang/test/CodeGenSYCL/kernelname-enum.cpp @@ -40,43 +40,43 @@ enum class no_type_set { template class dummy_functor_1 { public: - void operator()() const {} + void operator()() {} }; template class dummy_functor_2 { public: - void operator()() const {} + void operator()() {} }; template class dummy_functor_3 { public: - void operator()() const {} + void operator()() {} }; template class dummy_functor_4 { public: - void operator()() const {} + void operator()() {} }; template class dummy_functor_5 { public: - void operator()() const {} + void operator()() {} }; template class dummy_functor_6 { public: - void operator()() const {} + void operator()() {} }; template class dummy_functor_7 { public: - void operator()() const {} + void operator()() {} }; namespace type_argument_template_enum { @@ -105,7 +105,7 @@ class Baz; template class T> class dummy_functor_8 { public: - void operator()() const {} + void operator()() {} }; int main() { diff --git a/clang/test/CodeGenSYCL/loop_unroll.cpp b/clang/test/CodeGenSYCL/loop_unroll.cpp index 3b9928873a6e3..7e80c92db5231 100644 --- a/clang/test/CodeGenSYCL/loop_unroll.cpp +++ b/clang/test/CodeGenSYCL/loop_unroll.cpp @@ -30,7 +30,7 @@ void disable() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/module-id.cpp b/clang/test/CodeGenSYCL/module-id.cpp index 08ad6178ddbf9..d120ee295c288 100644 --- a/clang/test/CodeGenSYCL/module-id.cpp +++ b/clang/test/CodeGenSYCL/module-id.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/noexcept.cpp b/clang/test/CodeGenSYCL/noexcept.cpp index c0f4e55400a90..816f2c43ebfe2 100644 --- a/clang/test/CodeGenSYCL/noexcept.cpp +++ b/clang/test/CodeGenSYCL/noexcept.cpp @@ -48,7 +48,7 @@ void foo_cleanup() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/num-simd-work-items.cpp b/clang/test/CodeGenSYCL/num-simd-work-items.cpp index 5613969b88a91..8b8b8ba22d0da 100644 --- a/clang/test/CodeGenSYCL/num-simd-work-items.cpp +++ b/clang/test/CodeGenSYCL/num-simd-work-items.cpp @@ -2,11 +2,11 @@ class Foo { public: - [[intelfpga::num_simd_work_items(1)]] void operator()() const {} + [[intelfpga::num_simd_work_items(1)]] void operator()() {} }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/remove-ur-inst.cpp b/clang/test/CodeGenSYCL/remove-ur-inst.cpp index 9764b64a4616c..7866aff7f07a3 100644 --- a/clang/test/CodeGenSYCL/remove-ur-inst.cpp +++ b/clang/test/CodeGenSYCL/remove-ur-inst.cpp @@ -4,7 +4,7 @@ SYCL_EXTERNAL void doesNotReturn() throw() __attribute__((__noreturn__)); template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/reqd-sub-group-size.cpp b/clang/test/CodeGenSYCL/reqd-sub-group-size.cpp index 1b62cfa810525..d95b97180b3f6 100644 --- a/clang/test/CodeGenSYCL/reqd-sub-group-size.cpp +++ b/clang/test/CodeGenSYCL/reqd-sub-group-size.cpp @@ -2,14 +2,14 @@ class Functor16 { public: - [[cl::intel_reqd_sub_group_size(16)]] void operator()() const {} + [[cl::intel_reqd_sub_group_size(16)]] void operator()() {} }; [[cl::intel_reqd_sub_group_size(8)]] void foo() {} class Functor { public: - void operator()() const { + void operator()() { foo(); } }; @@ -17,11 +17,11 @@ class Functor { template class Functor5 { public: - [[cl::intel_reqd_sub_group_size(SIZE)]] void operator()() const {} + [[cl::intel_reqd_sub_group_size(SIZE)]] void operator()() {} }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/reqd-work-group-size.cpp b/clang/test/CodeGenSYCL/reqd-work-group-size.cpp index 72ba3492d2ed0..bfb08c7ce6c2d 100644 --- a/clang/test/CodeGenSYCL/reqd-work-group-size.cpp +++ b/clang/test/CodeGenSYCL/reqd-work-group-size.cpp @@ -2,20 +2,20 @@ class Functor32x16x16 { public: - [[cl::reqd_work_group_size(32, 16, 16)]] void operator()() const {} + [[cl::reqd_work_group_size(32, 16, 16)]] void operator()() {} }; [[cl::reqd_work_group_size(8, 1, 1)]] void f8x1x1() {} class Functor { public: - void operator()() const { + void operator()() { f8x1x1(); } }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/sampler.cpp b/clang/test/CodeGenSYCL/sampler.cpp index 68c856f4532a3..39755aa6b9aec 100644 --- a/clang/test/CodeGenSYCL/sampler.cpp +++ b/clang/test/CodeGenSYCL/sampler.cpp @@ -43,7 +43,7 @@ struct sampler_wrapper { }; template -__attribute__((sycl_kernel)) void kernel_single_task(const KernelType &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(KernelType kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/spir-calling-conv.cpp b/clang/test/CodeGenSYCL/spir-calling-conv.cpp index 35f1b280881e6..bed31dcb96e48 100644 --- a/clang/test/CodeGenSYCL/spir-calling-conv.cpp +++ b/clang/test/CodeGenSYCL/spir-calling-conv.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/spir-enum.cpp b/clang/test/CodeGenSYCL/spir-enum.cpp index 185ef144c7dd3..738d1337e02ab 100644 --- a/clang/test/CodeGenSYCL/spir-enum.cpp +++ b/clang/test/CodeGenSYCL/spir-enum.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/spir-opencl-version.cpp b/clang/test/CodeGenSYCL/spir-opencl-version.cpp index 5729020883565..43e187b463156 100644 --- a/clang/test/CodeGenSYCL/spir-opencl-version.cpp +++ b/clang/test/CodeGenSYCL/spir-opencl-version.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm %s -o - | FileCheck %s template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/sycl-device-static-init.cpp b/clang/test/CodeGenSYCL/sycl-device-static-init.cpp index 95b220388d894..b811b1061bfd6 100644 --- a/clang/test/CodeGenSYCL/sycl-device-static-init.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-static-init.cpp @@ -28,7 +28,7 @@ template const int BaseInit::var = 9; template struct BaseInit; template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } int main() { diff --git a/clang/test/CodeGenSYCL/sycl-multi-kernel-attr.cpp b/clang/test/CodeGenSYCL/sycl-multi-kernel-attr.cpp index 457766d4c5c11..3df365b3fef10 100644 --- a/clang/test/CodeGenSYCL/sycl-multi-kernel-attr.cpp +++ b/clang/test/CodeGenSYCL/sycl-multi-kernel-attr.cpp @@ -2,11 +2,12 @@ class Functor { public: - [[cl::intel_reqd_sub_group_size(4), cl::reqd_work_group_size(32, 16, 16)]] void operator()() const {} + [[cl::intel_reqd_sub_group_size(4), cl::reqd_work_group_size(32, 16, 16)]] void operator()() {} }; + template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/unique-stable-name.cpp b/clang/test/CodeGenSYCL/unique-stable-name.cpp index 66ca499e6cdae..f24bef81bb8c8 100644 --- a/clang/test/CodeGenSYCL/unique-stable-name.cpp +++ b/clang/test/CodeGenSYCL/unique-stable-name.cpp @@ -33,7 +33,7 @@ void lambda_in_dependent_function() { {DEF_IN_MACRO();}{DEF_IN_MACRO();} template -[[clang::sycl_kernel]] void kernel_single_task(const KernelType &kernelFunc) { +[[clang::sycl_kernel]] void kernel_single_task(KernelType kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/usm-int-header.cpp b/clang/test/CodeGenSYCL/usm-int-header.cpp index 298a7464ac588..5b3ffa00050fa 100644 --- a/clang/test/CodeGenSYCL/usm-int-header.cpp +++ b/clang/test/CodeGenSYCL/usm-int-header.cpp @@ -17,7 +17,7 @@ #include template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/virtual-types.cpp b/clang/test/CodeGenSYCL/virtual-types.cpp index 48bb1b17456f5..b78f93712f9a0 100644 --- a/clang/test/CodeGenSYCL/virtual-types.cpp +++ b/clang/test/CodeGenSYCL/virtual-types.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-linux-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/CodeGenSYCL/wrapped-accessor.cpp b/clang/test/CodeGenSYCL/wrapped-accessor.cpp index bf511f6ae9fa7..0355a2a17b15b 100644 --- a/clang/test/CodeGenSYCL/wrapped-accessor.cpp +++ b/clang/test/CodeGenSYCL/wrapped-accessor.cpp @@ -34,7 +34,7 @@ template struct AccWrapper { Acc accessor; }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/Inputs/sycl.hpp b/clang/test/SemaSYCL/Inputs/sycl.hpp index 4ddfe4adcdf42..9e3efc6321096 100644 --- a/clang/test/SemaSYCL/Inputs/sycl.hpp +++ b/clang/test/SemaSYCL/Inputs/sycl.hpp @@ -192,13 +192,13 @@ struct get_kernel_name_t { }; #define ATTR_SYCL_KERNEL __attribute__((sycl_kernel)) template -ATTR_SYCL_KERNEL void kernel_single_task(const KernelType &kernelFunc) { +ATTR_SYCL_KERNEL void kernel_single_task(KernelType kernelFunc) { kernelFunc(); } class handler { public: template - void single_task(const KernelType &kernelFunc) { + void single_task(KernelType kernelFunc) { using NameT = typename get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ kernel_single_task(kernelFunc); diff --git a/clang/test/SemaSYCL/accessor-type-diagnostics.cpp b/clang/test/SemaSYCL/accessor-type-diagnostics.cpp index e6b958d130caa..3a814fbf78011 100644 --- a/clang/test/SemaSYCL/accessor-type-diagnostics.cpp +++ b/clang/test/SemaSYCL/accessor-type-diagnostics.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64 -fsycl-is-device -verify -fsyntax-only -Wno-sycl-2017-compat %s +// RUN: %clang_cc1 -I %S/Inputs -fsycl -triple spir64 -fsycl-is-device -verify -fsyntax-only %s // // Ensure SYCL type restrictions are applied to accessors as well. @@ -7,7 +7,7 @@ using namespace cl::sycl; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/accessors-targets-image.cpp b/clang/test/SemaSYCL/accessors-targets-image.cpp index 385b1d11bf219..18fac9940cb1f 100644 --- a/clang/test/SemaSYCL/accessors-targets-image.cpp +++ b/clang/test/SemaSYCL/accessors-targets-image.cpp @@ -8,7 +8,7 @@ using namespace cl::sycl; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/accessors-targets.cpp b/clang/test/SemaSYCL/accessors-targets.cpp index b589bb462c69c..dbaab2664e95c 100644 --- a/clang/test/SemaSYCL/accessors-targets.cpp +++ b/clang/test/SemaSYCL/accessors-targets.cpp @@ -8,7 +8,7 @@ using namespace cl::sycl; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/allow-constexpr-recursion.cpp b/clang/test/SemaSYCL/allow-constexpr-recursion.cpp index 6b17af6351e22..fed70a02f7021 100644 --- a/clang/test/SemaSYCL/allow-constexpr-recursion.cpp +++ b/clang/test/SemaSYCL/allow-constexpr-recursion.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-return-type -Wno-sycl-2017-compat -verify -fsyntax-only -std=c++20 -Werror=vla %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-return-type -verify -fsyntax-only -std=c++20 -Werror=vla %s template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/array-kernel-param-neg.cpp b/clang/test/SemaSYCL/array-kernel-param-neg.cpp index 5ad8569feebe8..0618014c9fb10 100755 --- a/clang/test/SemaSYCL/array-kernel-param-neg.cpp +++ b/clang/test/SemaSYCL/array-kernel-param-neg.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-sycl-2017-compat -verify -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -verify -fsyntax-only %s // This test checks if compiler reports compilation error on an attempt to pass // an array of non-trivially copyable structs as SYCL kernel parameter or @@ -20,11 +20,11 @@ class E { int i[]; public: - int operator()() const { return i[0]; } + int operator()() { return i[0]; } }; template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/array-kernel-param.cpp b/clang/test/SemaSYCL/array-kernel-param.cpp index 2308d6a3360dc..548077be4f82b 100644 --- a/clang/test/SemaSYCL/array-kernel-param.cpp +++ b/clang/test/SemaSYCL/array-kernel-param.cpp @@ -8,7 +8,7 @@ using namespace cl::sycl; template -__attribute__((sycl_kernel)) void a_kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/basic-kernel-wrapper.cpp b/clang/test/SemaSYCL/basic-kernel-wrapper.cpp index 24bbfcf07d504..1f500eff0a888 100644 --- a/clang/test/SemaSYCL/basic-kernel-wrapper.cpp +++ b/clang/test/SemaSYCL/basic-kernel-wrapper.cpp @@ -9,7 +9,7 @@ template struct AccWrapper { Acc accessor; }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp b/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp index dd8e1db6b5ff9..2d04e0f453579 100644 --- a/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp +++ b/clang/test/SemaSYCL/built-in-type-kernel-arg.cpp @@ -6,7 +6,7 @@ #include template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/call-to-undefined-function.cpp b/clang/test/SemaSYCL/call-to-undefined-function.cpp index 8fd8a6fd7ca73..cc30af261936e 100644 --- a/clang/test/SemaSYCL/call-to-undefined-function.cpp +++ b/clang/test/SemaSYCL/call-to-undefined-function.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-sycl-2017-compat -verify -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -fsyntax-only %s void defined() { } @@ -9,7 +9,7 @@ void undefined(); SYCL_EXTERNAL void undefinedExternal(); template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/deferred-diagnostics-aux-builtin.cpp b/clang/test/SemaSYCL/deferred-diagnostics-aux-builtin.cpp index b4df077725393..8da7b60bcdf67 100644 --- a/clang/test/SemaSYCL/deferred-diagnostics-aux-builtin.cpp +++ b/clang/test/SemaSYCL/deferred-diagnostics-aux-builtin.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -fsycl -fsycl-is-device -aux-triple x86_64-unknown-linux-gnu -Wno-sycl-2017-compat -verify -fsyntax-only %s +// RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -fsycl -fsycl-is-device -aux-triple x86_64-unknown-linux-gnu -verify -fsyntax-only %s inline namespace cl { namespace sycl { template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { // expected-note@+1 {{called by 'kernel_single_task -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { // expected-note@+1 3{{called by 'kernel_single_task; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/float128.cpp b/clang/test/SemaSYCL/float128.cpp index b61787a153fd0..0904e740d5083 100644 --- a/clang/test/SemaSYCL/float128.cpp +++ b/clang/test/SemaSYCL/float128.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple spir64 -fsycl -fsycl-is-device -Wno-sycl-2017-compat -verify -fsyntax-only %s -// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsycl -fsycl-is-device -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -triple spir64 -fsycl -fsycl-is-device -verify -fsyntax-only %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -fsycl -fsycl-is-device -fsyntax-only %s typedef __float128 BIGTY; @@ -63,7 +63,7 @@ void foo2(){}; __float128 foo(__float128 P) { return P; } template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { // expected-note@+1 6{{called by 'kernel}} kernelFunc(); } diff --git a/clang/test/SemaSYCL/forward-decl.cpp b/clang/test/SemaSYCL/forward-decl.cpp index 7d252af5abc89..4a7a741991edb 100644 --- a/clang/test/SemaSYCL/forward-decl.cpp +++ b/clang/test/SemaSYCL/forward-decl.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify -pedantic %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify -pedantic %s // expected-no-diagnostics template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/fpga_pipes.cpp b/clang/test/SemaSYCL/fpga_pipes.cpp index ef25438a08162..7f988c0ff49e0 100644 --- a/clang/test/SemaSYCL/fpga_pipes.cpp +++ b/clang/test/SemaSYCL/fpga_pipes.cpp @@ -34,7 +34,7 @@ template pipe_storage Storage5 __attribute__((io_pipe_id(N))); template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/implicit_kernel_type.cpp b/clang/test/SemaSYCL/implicit_kernel_type.cpp index 022d34ad59227..97a0ca90a2bc9 100644 --- a/clang/test/SemaSYCL/implicit_kernel_type.cpp +++ b/clang/test/SemaSYCL/implicit_kernel_type.cpp @@ -1,43 +1,13 @@ // RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsyntax-only -verify %s -Werror=sycl-strict -DERROR -// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsyntax-only -verify %s -Wsycl-strict -DWARN -// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsycl-unnamed-lambda -fsyntax-only -verify %s -Werror=sycl-strict - -// SYCL 1.2 Definitions -template -__attribute__((sycl_kernel)) void sycl_121_single_task(Func kernelFunc) { - kernelFunc(); -} - -class event {}; -class queue { -public: - template - event submit(T cgf) { return event{}; } -}; -class auto_name {}; -template -struct get_kernel_name_t { - using name = Name; -}; -class handler { -public: - template - void single_task(KernelType kernelFunc) { - using NameT = typename get_kernel_name_t::name; -#ifdef __SYCL_DEVICE_ONLY__ - sycl_121_single_task(kernelFunc); -#else - kernelFunc(); -#endif - } -}; -// -- /Definitions +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsyntax-only -verify %s -Wsycl-strict -DWARN +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsycl-unnamed-lambda -fsyntax-only -verify %s -Werror=sycl-strict +#include #ifdef __SYCL_UNNAMED_LAMBDA__ // expected-no-diagnostics #endif -//using namespace cl::sycl; +using namespace cl::sycl; void function() { } @@ -50,11 +20,11 @@ struct myWrapper { class myWrapper2; int main() { - queue q; + cl::sycl::queue q; #ifndef __SYCL_UNNAMED_LAMBDA__ // expected-note@+1 {{InvalidKernelName1 declared here}} class InvalidKernelName1 {}; - q.submit([&](handler &h) { + q.submit([&](cl::sycl::handler &h) { // expected-error@+1 {{kernel needs to have a globally-visible name}} h.single_task([]() {}); }); @@ -66,7 +36,7 @@ int main() { // expected-error@+3 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}} // expected-note@+2 {{fake_kernel declared here}} #endif - sycl_121_single_task([]() { function(); }); + cl::sycl::kernel_single_task([]() { function(); }); #if defined(WARN) // expected-warning@+6 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}} // expected-note@+5 {{fake_kernel2 declared here}} @@ -74,10 +44,10 @@ int main() { // expected-error@+3 {{SYCL 1.2.1 specification requires an explicit forward declaration for a kernel type name; your program may not be portable}} // expected-note@+2 {{fake_kernel2 declared here}} #endif - sycl_121_single_task([]() { + cl::sycl::kernel_single_task([]() { auto l = [](auto f) { f(); }; }); - sycl_121_single_task([]() { function(); }); - sycl_121_single_task([]() { function(); }); + cl::sycl::kernel_single_task([]() { function(); }); + cl::sycl::kernel_single_task([]() { function(); }); return 0; } diff --git a/clang/test/SemaSYCL/inheritance.cpp b/clang/test/SemaSYCL/inheritance.cpp index b801c416cada2..ff0b263449a35 100644 --- a/clang/test/SemaSYCL/inheritance.cpp +++ b/clang/test/SemaSYCL/inheritance.cpp @@ -24,7 +24,7 @@ struct base { struct derived : base, second_base { int a; - void operator()() const { + void operator()() { } }; diff --git a/clang/test/SemaSYCL/inline-asm.cpp b/clang/test/SemaSYCL/inline-asm.cpp index bb8a03b7a4090..36237ee3fd38a 100644 --- a/clang/test/SemaSYCL/inline-asm.cpp +++ b/clang/test/SemaSYCL/inline-asm.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify %s -DLINUX_ASM -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify %s -DLINUX_ASM -DSPIR_CHECK -triple spir64-unknown-unknown-sycldevice -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify -triple x86_64-windows -fasm-blocks %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify %s -DLINUX_ASM +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify %s -DLINUX_ASM -DSPIR_CHECK -triple spir64-unknown-unknown-sycldevice +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify -triple x86_64-windows -fasm-blocks %s #ifndef SPIR_CHECK //expected-no-diagnostics @@ -50,7 +50,7 @@ void bar() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); #ifdef LINUX_ASM __asm__("int3"); diff --git a/clang/test/SemaSYCL/intel-fpga-local.cpp b/clang/test/SemaSYCL/intel-fpga-local.cpp index 4d637a4a9213f..cb406b84f6e38 100644 --- a/clang/test/SemaSYCL/intel-fpga-local.cpp +++ b/clang/test/SemaSYCL/intel-fpga-local.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-return-type -fcxx-exceptions -fsyntax-only -ast-dump -Wno-sycl-2017-compat -verify -pedantic %s | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-return-type -fcxx-exceptions -fsyntax-only -ast-dump -verify -pedantic %s | FileCheck %s //CHECK: FunctionDecl{{.*}}check_ast void check_ast() @@ -781,7 +781,7 @@ struct templ_st { }; template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/intel-fpga-loops.cpp b/clang/test/SemaSYCL/intel-fpga-loops.cpp index 164d720eb1560..d32c3cfa4a0c8 100644 --- a/clang/test/SemaSYCL/intel-fpga-loops.cpp +++ b/clang/test/SemaSYCL/intel-fpga-loops.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify -pedantic %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify -pedantic %s // Test for Intel FPGA loop attributes applied not to a loop void foo() { @@ -371,7 +371,7 @@ void max_concurrency_dependent() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/intel-fpga-mem-builtin.cpp b/clang/test/SemaSYCL/intel-fpga-mem-builtin.cpp index bfc3d803ec44f..fa3709b4eccb5 100644 --- a/clang/test/SemaSYCL/intel-fpga-mem-builtin.cpp +++ b/clang/test/SemaSYCL/intel-fpga-mem-builtin.cpp @@ -56,7 +56,7 @@ void foo(float *A, int *B, State *C) { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } int main() { diff --git a/clang/test/SemaSYCL/intel-fpga-reg.cpp b/clang/test/SemaSYCL/intel-fpga-reg.cpp index 213038b44a81d..5d09258d39170 100644 --- a/clang/test/SemaSYCL/intel-fpga-reg.cpp +++ b/clang/test/SemaSYCL/intel-fpga-reg.cpp @@ -57,7 +57,7 @@ void foo() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } int main() { diff --git a/clang/test/SemaSYCL/intel-max-global-work-dim.cpp b/clang/test/SemaSYCL/intel-max-global-work-dim.cpp index cfde3fd4d7c3b..d391924773f1e 100644 --- a/clang/test/SemaSYCL/intel-max-global-work-dim.cpp +++ b/clang/test/SemaSYCL/intel-max-global-work-dim.cpp @@ -1,16 +1,15 @@ -// RUN: %clang_cc1 %s -fsyntax-only -fsycl -fsycl-is-device -Wno-sycl-2017-compat -triple spir64 -DTRIGGER_ERROR -verify -// RUN: %clang_cc1 %s -fsyntax-only -ast-dump -fsycl -fsycl-is-device -Wno-sycl-2017-compat -triple spir64 | FileCheck %s -// RUN: %clang_cc1 -fsycl -fsycl-is-host -Wno-sycl-2017-compat -fsyntax-only -verify %s +// RUN: %clang_cc1 %s -fsyntax-only -fsycl -fsycl-is-device -triple spir64 -DTRIGGER_ERROR -verify +// RUN: %clang_cc1 %s -fsyntax-only -ast-dump -fsycl -fsycl-is-device -triple spir64 | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-host -fsyntax-only -verify %s #ifndef __SYCL_DEVICE_ONLY__ struct FuncObj { [[intelfpga::max_global_work_dim(1)]] // expected-no-diagnostics - void - operator()() const {} + void operator()() {} }; template -void kernel(const Func &kernelFunc) { +void kernel(Func kernelFunc) { kernelFunc(); } @@ -24,21 +23,22 @@ void foo() { [[intelfpga::max_global_work_dim(2)]] void func_do_not_ignore() {} struct FuncObj { - [[intelfpga::max_global_work_dim(1)]] void operator()() const {} + [[intelfpga::max_global_work_dim(1)]] + void operator()() {} }; struct TRIFuncObjGood1 { [[intelfpga::max_global_work_dim(0)]] [[intelfpga::max_work_group_size(1, 1, 1)]] - [[cl::reqd_work_group_size(1, 1, 1)]] void - operator()() const {} + [[cl::reqd_work_group_size(1, 1, 1)]] + void operator()() {} }; struct TRIFuncObjGood2 { [[intelfpga::max_global_work_dim(3)]] [[intelfpga::max_work_group_size(8, 1, 1)]] - [[cl::reqd_work_group_size(4, 1, 1)]] void - operator()() const {} + [[cl::reqd_work_group_size(4, 1, 1)]] + void operator()() {} }; #ifdef TRIGGER_ERROR @@ -46,13 +46,12 @@ struct TRIFuncObjBad { [[intelfpga::max_global_work_dim(0)]] [[intelfpga::max_work_group_size(8, 8, 8)]] // expected-error{{'max_work_group_size' X-, Y- and Z- sizes must be 1 when 'max_global_work_dim' attribute is used with value 0}} [[cl::reqd_work_group_size(4, 4, 4)]] // expected-error{{'reqd_work_group_size' X-, Y- and Z- sizes must be 1 when 'max_global_work_dim' attribute is used with value 0}} - void - operator()() const {} + void operator()() {} }; #endif // TRIGGER_ERROR template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/intel-max-work-group-size.cpp b/clang/test/SemaSYCL/intel-max-work-group-size.cpp index 210944f6b2fdd..f5b2a67599e8b 100644 --- a/clang/test/SemaSYCL/intel-max-work-group-size.cpp +++ b/clang/test/SemaSYCL/intel-max-work-group-size.cpp @@ -1,16 +1,15 @@ -// RUN: %clang_cc1 %s -fsyntax-only -fsycl -fsycl-is-device -Wno-sycl-2017-compat -triple spir64 -DTRIGGER_ERROR -verify -// RUN: %clang_cc1 %s -fsyntax-only -ast-dump -fsycl -fsycl-is-device -Wno-sycl-2017-compat -triple spir64 | FileCheck %s -// RUN: %clang_cc1 -fsycl -fsycl-is-host -fsyntax-only -Wno-sycl-2017-compat -verify %s +// RUN: %clang_cc1 %s -fsyntax-only -fsycl -fsycl-is-device -triple spir64 -DTRIGGER_ERROR -verify +// RUN: %clang_cc1 %s -fsyntax-only -ast-dump -fsycl -fsycl-is-device -triple spir64 | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-host -fsyntax-only -verify %s #ifndef __SYCL_DEVICE_ONLY__ struct FuncObj { [[intelfpga::max_work_group_size(1, 1, 1)]] // expected-no-diagnostics - void - operator()() const {} + void operator()() {} }; template -void kernel(const Func &kernelFunc) { +void kernel(Func kernelFunc) { kernelFunc(); } @@ -24,20 +23,20 @@ void foo() { [[intelfpga::max_work_group_size(2, 2, 2)]] void func_do_not_ignore() {} struct FuncObj { - [[intelfpga::max_work_group_size(4, 4, 4)]] void operator()() const {} + [[intelfpga::max_work_group_size(4, 4, 4)]] + void operator()() {} }; #ifdef TRIGGER_ERROR struct DAFuncObj { [[intelfpga::max_work_group_size(4, 4, 4)]] [[cl::reqd_work_group_size(8, 8, 4)]] // expected-error{{'reqd_work_group_size' attribute conflicts with 'max_work_group_size' attribute}} - void - operator()() const {} + void operator()() {} }; #endif // TRIGGER_ERROR template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/intel-reqd-work-group-size.cpp b/clang/test/SemaSYCL/intel-reqd-work-group-size.cpp index 09fe0934029b2..361b3d083c0ef 100644 --- a/clang/test/SemaSYCL/intel-reqd-work-group-size.cpp +++ b/clang/test/SemaSYCL/intel-reqd-work-group-size.cpp @@ -1,16 +1,16 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-sycl-2017-compat -fsyntax-only -verify -DTRIGGER_ERROR %s -// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-sycl-2017-compat -ast-dump %s | FileCheck %s -// RUN: %clang_cc1 -fsycl -fsycl-is-host -Wno-sycl-2017-compat -fsyntax-only -verify %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify -DTRIGGER_ERROR %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-host -fsyntax-only -verify %s #ifndef __SYCL_DEVICE_ONLY__ // expected-no-diagnostics class Functor { public: - [[intel::reqd_work_group_size(4)]] void operator()() const {} + [[intel::reqd_work_group_size(4)]] void operator()() {} }; template -void kernel(const Func &kernelFunc) { +void kernel(Func kernelFunc) { kernelFunc(); } @@ -32,50 +32,50 @@ void bar() { #ifdef TRIGGER_ERROR class Functor32 { public: - [[cl::reqd_work_group_size(32)]] void operator()() const {} // expected-error {{'reqd_work_group_size' attribute requires exactly 3 arguments}} + [[cl::reqd_work_group_size(32)]] void operator()() {} // expected-error {{'reqd_work_group_size' attribute requires exactly 3 arguments}} }; class Functor33 { public: - [[intel::reqd_work_group_size(32, -4)]] void operator()() const {} // expected-error {{'reqd_work_group_size' attribute requires a non-negative integral compile time constant expression}} + [[intel::reqd_work_group_size(32, -4)]] void operator()() {} // expected-error {{'reqd_work_group_size' attribute requires a non-negative integral compile time constant expression}} }; #endif // TRIGGER_ERROR class Functor16 { public: - [[intel::reqd_work_group_size(16)]] void operator()() const {} + [[intel::reqd_work_group_size(16)]] void operator()() {} }; class Functor64 { public: - [[intel::reqd_work_group_size(64, 64)]] void operator()() const {} + [[intel::reqd_work_group_size(64, 64)]] void operator()() {} }; class Functor16x16x16 { public: - [[intel::reqd_work_group_size(16, 16, 16)]] void operator()() const {} + [[intel::reqd_work_group_size(16, 16, 16)]] void operator()() {} }; class Functor8 { // expected-error {{conflicting attributes applied to a SYCL kernel}} public: - [[intel::reqd_work_group_size(8)]] void operator()() const { // expected-note {{conflicting attribute is here}} + [[intel::reqd_work_group_size(8)]] void operator()() { // expected-note {{conflicting attribute is here}} f4x1x1(); } }; class Functor { public: - void operator()() const { + void operator()() { f4x1x1(); } }; class FunctorAttr { public: - __attribute__((reqd_work_group_size(128, 128, 128))) void operator()() const {} + __attribute__((reqd_work_group_size(128, 128, 128))) void operator()() {} }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/intel-restrict.cpp b/clang/test/SemaSYCL/intel-restrict.cpp index c878cec15be3b..b998b261f4320 100644 --- a/clang/test/SemaSYCL/intel-restrict.cpp +++ b/clang/test/SemaSYCL/intel-restrict.cpp @@ -1,14 +1,15 @@ -// RUN: %clang_cc1 %s -fsyntax-only -fsycl -fsycl-is-device -Wno-sycl-2017-compat -triple spir64 -DCHECKDIAG -verify -// RUN: %clang_cc1 %s -fsyntax-only -ast-dump -fsycl -fsycl-is-device -Wno-sycl-2017-compat -triple spir64 | FileCheck %s +// RUN: %clang_cc1 %s -fsyntax-only -fsycl -fsycl-is-device -triple spir64 -DCHECKDIAG -verify +// RUN: %clang_cc1 %s -fsyntax-only -ast-dump -fsycl -fsycl-is-device -triple spir64 | FileCheck %s [[intel::kernel_args_restrict]] void func_do_not_ignore() {} struct FuncObj { - [[intel::kernel_args_restrict]] void operator()() const {} + [[intel::kernel_args_restrict]] + void operator()() {} }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); #ifdef CHECKDIAG [[intel::kernel_args_restrict]] int invalid = 42; // expected-error{{'kernel_args_restrict' attribute only applies to functions}} diff --git a/clang/test/SemaSYCL/kernel-function-type.cpp b/clang/test/SemaSYCL/kernel-function-type.cpp index 86b0399d79ed9..7b33ec5ffd07a 100644 --- a/clang/test/SemaSYCL/kernel-function-type.cpp +++ b/clang/test/SemaSYCL/kernel-function-type.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify %s // expected-no-diagnostics // The kernel_single_task call is emitted as an OpenCL kernel function. The call @@ -29,7 +29,7 @@ void foo(j e, d k) { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/kernelname-enum.cpp b/clang/test/SemaSYCL/kernelname-enum.cpp index 8ed02e06f7473..22a9f96acc50a 100644 --- a/clang/test/SemaSYCL/kernelname-enum.cpp +++ b/clang/test/SemaSYCL/kernelname-enum.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsyntax-only -Wno-sycl-2017-compat -verify %s +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsyntax-only -verify %s #include "sycl.hpp" @@ -26,26 +26,26 @@ enum class scoped_enum_no_type_set { template class dummy_functor_1 { public: - void operator()() const {} + void operator()() {} }; // expected-error@+2 {{kernel name is invalid. Unscoped enum requires fixed underlying type}} template class dummy_functor_2 { public: - void operator()() const {} + void operator()() {} }; template class dummy_functor_3 { public: - void operator()() const {} + void operator()() {} }; template class dummy_functor_4 { public: - void operator()() const {} + void operator()() {} }; int main() { diff --git a/clang/test/SemaSYCL/lambda_implicit_capture_this.cpp b/clang/test/SemaSYCL/lambda_implicit_capture_this.cpp index 90827db1d4ac8..331eb1b64ae3e 100644 --- a/clang/test/SemaSYCL/lambda_implicit_capture_this.cpp +++ b/clang/test/SemaSYCL/lambda_implicit_capture_this.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify %s +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsyntax-only -verify %s template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/loop_unroll.cpp b/clang/test/SemaSYCL/loop_unroll.cpp index 13ecae08d7430..2e7e205532fd1 100644 --- a/clang/test/SemaSYCL/loop_unroll.cpp +++ b/clang/test/SemaSYCL/loop_unroll.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify -pedantic %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify -pedantic %s template void bar() { @@ -60,7 +60,7 @@ void foo() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/mangle-kernel.cpp b/clang/test/SemaSYCL/mangle-kernel.cpp index 7b515dd508309..8cc2950019b45 100644 --- a/clang/test/SemaSYCL/mangle-kernel.cpp +++ b/clang/test/SemaSYCL/mangle-kernel.cpp @@ -5,7 +5,7 @@ #include template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/markfunction-astconsumer.cpp b/clang/test/SemaSYCL/markfunction-astconsumer.cpp index 8315c890fc35e..c4883f64a8c36 100644 --- a/clang/test/SemaSYCL/markfunction-astconsumer.cpp +++ b/clang/test/SemaSYCL/markfunction-astconsumer.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-return-type -verify -Wno-sycl-2017-compat -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-return-type -verify -fsyntax-only -std=c++17 %s void bar(); template @@ -7,7 +7,7 @@ void usage(T func) { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/no-vtables.cpp b/clang/test/SemaSYCL/no-vtables.cpp index 68f9953bf6784..1891ed278d444 100644 --- a/clang/test/SemaSYCL/no-vtables.cpp +++ b/clang/test/SemaSYCL/no-vtables.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -verify -Wno-sycl-2017-compat -fsyntax-only -emit-llvm-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -verify -fsyntax-only -emit-llvm-only %s // expected-no-diagnostics // Should never fail, since the type is never used in kernel code. @@ -17,8 +17,9 @@ void always_uses() { void usage() { } + template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } int main() { diff --git a/clang/test/SemaSYCL/no-vtables2.cpp b/clang/test/SemaSYCL/no-vtables2.cpp index 92bde4db57b33..e2468121291f4 100644 --- a/clang/test/SemaSYCL/no-vtables2.cpp +++ b/clang/test/SemaSYCL/no-vtables2.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-return-type -verify -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-return-type -verify -fsyntax-only %s struct Base { virtual void f() const {} @@ -28,8 +28,9 @@ Inherit usage() { usage_child(); } + template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } int main() { diff --git a/clang/test/SemaSYCL/non-std-layout-param.cpp b/clang/test/SemaSYCL/non-std-layout-param.cpp index 0698b3b7573a3..0a8ffbf798f94 100644 --- a/clang/test/SemaSYCL/non-std-layout-param.cpp +++ b/clang/test/SemaSYCL/non-std-layout-param.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-std-layout-kernel-params -verify -Wno-sycl-2017-compat -fsyntax-only %s -// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-std-layout-kernel-params -verify -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only %s // This test checks if compiler reports compilation error on an attempt to pass // non-standard layout struct object as SYCL kernel parameter. @@ -15,10 +15,11 @@ struct C : public Base { }; template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } + void test() { C C0; C0.Y=0; @@ -29,7 +30,7 @@ void test() { } struct Kernel { - void operator()() const { + void operator()() { (void) c1; (void) c2; (void) p; diff --git a/clang/test/SemaSYCL/non-trivially-copyable-kernel-param.cpp b/clang/test/SemaSYCL/non-trivially-copyable-kernel-param.cpp index 00d1ec0bc0418..4453e5273f9a7 100644 --- a/clang/test/SemaSYCL/non-trivially-copyable-kernel-param.cpp +++ b/clang/test/SemaSYCL/non-trivially-copyable-kernel-param.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -fsyntax-only %s // This test checks if compiler reports compilation error on an attempt to pass // a struct with non-trivially copyable type as SYCL kernel parameter. @@ -22,7 +22,7 @@ struct D { }; template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/num_simd_work_items.cpp b/clang/test/SemaSYCL/num_simd_work_items.cpp index d6528ec4e665d..638539917a283 100644 --- a/clang/test/SemaSYCL/num_simd_work_items.cpp +++ b/clang/test/SemaSYCL/num_simd_work_items.cpp @@ -1,16 +1,15 @@ -// RUN: %clang_cc1 %s -fsycl -fsycl-is-device -triple spir64 -fsyntax-only -Wno-sycl-2017-compat -DTRIGGER_ERROR -verify -// RUN: %clang_cc1 %s -fsycl -fsycl-is-device -triple spir64 -fsyntax-only -Wno-sycl-2017-compat -ast-dump | FileCheck %s -// RUN: %clang_cc1 -fsycl -fsycl-is-host -fsyntax-only -Wno-sycl-2017-compat -verify %s +// RUN: %clang_cc1 %s -fsycl -fsycl-is-device -triple spir64 -fsyntax-only -DTRIGGER_ERROR -verify +// RUN: %clang_cc1 %s -fsycl -fsycl-is-device -triple spir64 -fsyntax-only -ast-dump | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-host -fsyntax-only -verify %s #ifndef __SYCL_DEVICE_ONLY__ struct FuncObj { [[intelfpga::num_simd_work_items(42)]] // expected-no-diagnostics - void - operator()() const {} + void operator()() {} }; template -void kernel(const Func &kernelFunc) { +void kernel(Func kernelFunc) { kernelFunc(); } @@ -24,11 +23,12 @@ void foo() { [[intelfpga::num_simd_work_items(2)]] void func_do_not_ignore() {} struct FuncObj { - [[intelfpga::num_simd_work_items(42)]] void operator()() const {} + [[intelfpga::num_simd_work_items(42)]] + void operator()() {} }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/prohibit-thread-local.cpp b/clang/test/SemaSYCL/prohibit-thread-local.cpp index 1cdfb4c5ea3b6..e07353e862479 100644 --- a/clang/test/SemaSYCL/prohibit-thread-local.cpp +++ b/clang/test/SemaSYCL/prohibit-thread-local.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64 -verify -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64 -verify -fsyntax-only %s thread_local const int prohobit_ns_scope = 0; thread_local int prohobit_ns_scope2 = 0; @@ -41,7 +41,7 @@ template __attribute__((sycl_kernel)) // expected-note@+2 2{{called by}} void -kernel_single_task(const Func &kernelFunc) { kernelFunc(); } +kernel_single_task(Func kernelFunc) { kernelFunc(); } int main() { // expected-note@+1 2{{called by}} diff --git a/clang/test/SemaSYCL/reference-kernel-param.cpp b/clang/test/SemaSYCL/reference-kernel-param.cpp index 06809a8fcd1c3..3bd548e23a6fb 100644 --- a/clang/test/SemaSYCL/reference-kernel-param.cpp +++ b/clang/test/SemaSYCL/reference-kernel-param.cpp @@ -1,10 +1,10 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -fsyntax-only %s // This test checks if compiler reports compilation error on an attempt to pass // a reference as SYCL kernel parameter. template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/reqd-sub-group-size-device.cpp b/clang/test/SemaSYCL/reqd-sub-group-size-device.cpp index 56fca9002d436..ffa4b176207a0 100644 --- a/clang/test/SemaSYCL/reqd-sub-group-size-device.cpp +++ b/clang/test/SemaSYCL/reqd-sub-group-size-device.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify -DTRIGGER_ERROR %s -// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-sycl-2017-compat -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify -DTRIGGER_ERROR %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -ast-dump %s | FileCheck %s [[intel::reqd_sub_group_size(4)]] void foo() {} // expected-note {{conflicting attribute is here}} // expected-note@-1 {{conflicting attribute is here}} @@ -9,30 +9,30 @@ class Functor16 { public: // expected-warning@+2 {{attribute 'intel_reqd_sub_group_size' is deprecated}} // expected-note@+1 {{did you mean to use 'intel::reqd_sub_group_size' instead?}} - [[cl::intel_reqd_sub_group_size(16)]] void operator()() const {} + [[cl::intel_reqd_sub_group_size(16)]] void operator()() {} }; class Functor8 { // expected-error {{conflicting attributes applied to a SYCL kernel}} public: - [[intel::reqd_sub_group_size(8)]] void operator()() const { // expected-note {{conflicting attribute is here}} + [[intel::reqd_sub_group_size(8)]] void operator()() { // expected-note {{conflicting attribute is here}} foo(); } }; class Functor4 { public: - [[intel::reqd_sub_group_size(12)]] void operator()() const {} + [[intel::reqd_sub_group_size(12)]] void operator()() {} }; class Functor { public: - void operator()() const { + void operator()() { foo(); } }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/reqd-work-group-size-device.cpp b/clang/test/SemaSYCL/reqd-work-group-size-device.cpp index 65bb0b13474d7..7175322fb43c4 100644 --- a/clang/test/SemaSYCL/reqd-work-group-size-device.cpp +++ b/clang/test/SemaSYCL/reqd-work-group-size-device.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify -DTRIGGER_ERROR %s -// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-sycl-2017-compat -ast-dump %s | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify -DTRIGGER_ERROR %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -ast-dump %s | FileCheck %s [[cl::reqd_work_group_size(4, 1, 1)]] void f4x1x1() {} // expected-note {{conflicting attribute is here}} // expected-note@-1 {{conflicting attribute is here}} @@ -13,7 +13,7 @@ class Functor16 { public: - [[cl::reqd_work_group_size(16, 1, 1)]] [[cl::reqd_work_group_size(16, 1, 1)]] void operator()() const {} + [[cl::reqd_work_group_size(16, 1, 1)]] [[cl::reqd_work_group_size(16, 1, 1)]] void operator()() {} }; #ifdef TRIGGER_ERROR @@ -21,35 +21,35 @@ class Functor32 { public: //expected-warning@+2{{attribute 'reqd_work_group_size' is already applied with different parameters}} // expected-error@+1{{'reqd_work_group_size' attribute conflicts with 'reqd_work_group_size' attribute}} - [[cl::reqd_work_group_size(32, 1, 1)]] [[cl::reqd_work_group_size(1, 1, 32)]] void operator()() const {} + [[cl::reqd_work_group_size(32, 1, 1)]] [[cl::reqd_work_group_size(1, 1, 32)]] void operator()() {} }; #endif class Functor16x16x16 { public: - [[cl::reqd_work_group_size(16, 16, 16)]] void operator()() const {} + [[cl::reqd_work_group_size(16, 16, 16)]] void operator()() {} }; class Functor8 { // expected-error {{conflicting attributes applied to a SYCL kernel}} public: - [[cl::reqd_work_group_size(1, 1, 8)]] void operator()() const { // expected-note {{conflicting attribute is here}} + [[cl::reqd_work_group_size(1, 1, 8)]] void operator()() { // expected-note {{conflicting attribute is here}} f4x1x1(); } }; class Functor { public: - void operator()() const { + void operator()() { f4x1x1(); } }; class FunctorAttr { public: - __attribute__((reqd_work_group_size(128, 128, 128))) void operator()() const {} + __attribute__((reqd_work_group_size(128, 128, 128))) void operator()() {} }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/restrict-recursion.cpp b/clang/test/SemaSYCL/restrict-recursion.cpp index 3e29271561fe2..3547e2f793e19 100644 --- a/clang/test/SemaSYCL/restrict-recursion.cpp +++ b/clang/test/SemaSYCL/restrict-recursion.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-return-type -verify -Wno-sycl-2017-compat -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-return-type -verify -fsyntax-only -std=c++17 %s // This recursive function is not called from sycl kernel, // so it should not be diagnosed. @@ -57,7 +57,7 @@ bool isa_B(void) { } template -__attribute__((sycl_kernel)) void kernel(const L &l) { +__attribute__((sycl_kernel)) void kernel(L l) { l(); } @@ -85,13 +85,13 @@ int addInt(int n, int m) { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } template -// expected-note@+1 2{{function implemented using recursion declared here}} -__attribute__((sycl_kernel)) void kernel_single_task2(const Func &kernelFunc) { + // expected-note@+1 2{{function implemented using recursion declared here}} +__attribute__((sycl_kernel)) void kernel_single_task2(Func kernelFunc) { kernelFunc(); // expected-error@+1 2{{SYCL kernel cannot call a recursive function}} kernel_single_task2(kernelFunc); diff --git a/clang/test/SemaSYCL/restrict-recursion2.cpp b/clang/test/SemaSYCL/restrict-recursion2.cpp index b9eaffa4149b1..51688fea7b1b6 100644 --- a/clang/test/SemaSYCL/restrict-recursion2.cpp +++ b/clang/test/SemaSYCL/restrict-recursion2.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-return-type -verify -Wno-sycl-2017-compat -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-return-type -verify -fsyntax-only -std=c++17 %s // This recursive function is not called from sycl kernel, // so it should not be diagnosed. @@ -71,7 +71,7 @@ int addInt(int n, int m) { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/restrict-recursion3.cpp b/clang/test/SemaSYCL/restrict-recursion3.cpp index f5289b3142e2b..1a4a671904169 100644 --- a/clang/test/SemaSYCL/restrict-recursion3.cpp +++ b/clang/test/SemaSYCL/restrict-recursion3.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-return-type -Wno-sycl-2017-compat -Wno-error=sycl-strict -verify -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-return-type -Wno-error=sycl-strict -verify -fsyntax-only -std=c++17 %s // This recursive function is not called from sycl kernel, // so it should not be diagnosed. @@ -32,7 +32,7 @@ int addInt(int n, int m) { template // expected-note@+1 2{{function implemented using recursion declared here}} -__attribute__((sycl_kernel)) void kernel_single_task2(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task2(Func kernelFunc) { // expected-note@+1 {{called by 'kernel_single_task2}} kernelFunc(); // expected-warning@+1 2{{SYCL kernel cannot call a recursive function}} diff --git a/clang/test/SemaSYCL/restrict-recursion4.cpp b/clang/test/SemaSYCL/restrict-recursion4.cpp index dde093792cf26..c755f0eaec63a 100644 --- a/clang/test/SemaSYCL/restrict-recursion4.cpp +++ b/clang/test/SemaSYCL/restrict-recursion4.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-return-type -Wno-sycl-2017-compat -Wno-error=sycl-strict -verify -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -Wno-return-type -Wno-error=sycl-strict -verify -fsyntax-only -std=c++17 %s // This recursive function is not called from sycl kernel, // so it should not be diagnosed. @@ -34,7 +34,7 @@ int addInt(int n, int m) { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { // expected-note@+1 {{called by 'kernel_single_task}} kernelFunc(); } diff --git a/clang/test/SemaSYCL/sampler.cpp b/clang/test/SemaSYCL/sampler.cpp index a1fb423b109ca..2a3171ce06f08 100644 --- a/clang/test/SemaSYCL/sampler.cpp +++ b/clang/test/SemaSYCL/sampler.cpp @@ -3,7 +3,7 @@ #include template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp b/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp index 54a6f46290268..cbfba6e4be32d 100644 --- a/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp +++ b/clang/test/SemaSYCL/spec_const_and_accesor_crash.cpp @@ -5,7 +5,7 @@ #include template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/sycl-callstack.cpp b/clang/test/SemaSYCL/sycl-callstack.cpp index 8c8a53753743d..15da9d68255f5 100644 --- a/clang/test/SemaSYCL/sycl-callstack.cpp +++ b/clang/test/SemaSYCL/sycl-callstack.cpp @@ -1,9 +1,10 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -verify -Wno-sycl-2017-compat -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -verify -fsyntax-only -std=c++17 %s template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) +void kernel_single_task(Func kernelFunc) { // expected-note@+1 {{called by 'kernel_single_task}} - kernelFunc(); + kernelFunc(); } void foo() { diff --git a/clang/test/SemaSYCL/sycl-cconv.cpp b/clang/test/SemaSYCL/sycl-cconv.cpp index a42d4e7622aaa..f905417027c4a 100644 --- a/clang/test/SemaSYCL/sycl-cconv.cpp +++ b/clang/test/SemaSYCL/sycl-cconv.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-windows-sycldevice -aux-triple x86_64-pc-windows-msvc -fsyntax-only -Wno-sycl-2017-compat -verify %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-windows-sycldevice -aux-triple x86_64-pc-windows-msvc -fsyntax-only -verify %s // expected-no-warning@+1 __inline __cdecl int printf(char const* const _Format, ...) { return 0; } @@ -13,7 +13,7 @@ void bar() { template // expected-no-warning@+1 -__cdecl __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__cdecl __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { // expected-error@+1{{SYCL kernel cannot call a variadic function}} printf("cannot call from here\n"); // expected-no-error@+1 diff --git a/clang/test/SemaSYCL/sycl-device-const-static.cpp b/clang/test/SemaSYCL/sycl-device-const-static.cpp index 02a97810b7e5e..0e90057c73d60 100644 --- a/clang/test/SemaSYCL/sycl-device-const-static.cpp +++ b/clang/test/SemaSYCL/sycl-device-const-static.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -fsyntax-only %s struct Base {}; struct S { @@ -37,7 +37,7 @@ void usage() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { // expected-error@+1{{SYCL kernel cannot use a non-const static data variable}} static int z; // expected-note-re@+3{{called by 'kernel_single_task}} diff --git a/clang/test/SemaSYCL/sycl-device-static-restrict.cpp b/clang/test/SemaSYCL/sycl-device-static-restrict.cpp index b3517770e5290..03132b4530b88 100644 --- a/clang/test/SemaSYCL/sycl-device-static-restrict.cpp +++ b/clang/test/SemaSYCL/sycl-device-static-restrict.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -fsyntax-only %s const int glob1 = 1; int glob2 = 2; template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { // expected-note-re@+1{{called by 'kernel_single_task}} kernelFunc(); } diff --git a/clang/test/SemaSYCL/sycl-dllimport-dllexport.cpp b/clang/test/SemaSYCL/sycl-dllimport-dllexport.cpp index e03d749a09db6..b2c3fbf3aa1ff 100644 --- a/clang/test/SemaSYCL/sycl-dllimport-dllexport.cpp +++ b/clang/test/SemaSYCL/sycl-dllimport-dllexport.cpp @@ -1,19 +1,19 @@ // RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -fms-extensions \ // RUN: -aux-triple x86_64-unknown-linux-gnu -fsycl -fsycl-is-device \ -// RUN: -fsyntax-only -Wno-sycl-2017-compat -DWARNCHECK %s -o /dev/null 2>&1 | FileCheck %s +// RUN: -fsyntax-only -DWARNCHECK %s -o /dev/null 2>&1 | FileCheck %s // check random triple aux-triple with sycl-device -// RUN: %clang_cc1 -triple spir64-unknown-windows-sycldevice -Wno-sycl-2017-compat -fsyntax-only \ +// RUN: %clang_cc1 -triple spir64-unknown-windows-sycldevice -fsyntax-only \ // RUN: -fms-extensions -DWARNCHECK %s -o /dev/null 2>&1 | FileCheck %s // check without -aux-triple but sycl-device // RUN: %clang_cc1 -triple spir64-unknown-windows-sycldevice -fsycl \ // RUN: -fsycl-is-device -aux-triple x86_64-pc-windows-msvc -fms-extensions \ -// RUN: -fsyntax-only -Wno-sycl-2017-compat -DWARNCHECK %s -o /dev/null 2>&1 | \ +// RUN: -fsyntax-only -DWARNCHECK %s -o /dev/null 2>&1 | \ // RUN: FileCheck %s --check-prefixes CHECKALL // check -aux-tripe without sycl-device -// RUN: %clang_cc1 -triple spir64-unknown-windows-sycldevice -Wno-sycl-2017-compat -fsyntax-only \ +// RUN: %clang_cc1 -triple spir64-unknown-windows-sycldevice -fsyntax-only \ // RUN: -aux-triple x86_64-pc-windows-msvc -fsycl -fsycl-is-device \ // RUN: -fms-extensions -verify %s // check error message when dllimport function gets called in sycl-kernel code @@ -51,7 +51,7 @@ int foobar() // expected-warning {{'foobar' redeclared without 'dllimport' attr } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/sycl-esimd.cpp b/clang/test/SemaSYCL/sycl-esimd.cpp index 68ef557818ddc..f235394b9971c 100644 --- a/clang/test/SemaSYCL/sycl-esimd.cpp +++ b/clang/test/SemaSYCL/sycl-esimd.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-explicit-simd -fsyntax-only -Wno-sycl-2017-compat -verify %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsycl-explicit-simd -fsyntax-only -verify %s // ----------- Negative tests @@ -12,7 +12,7 @@ bar() {} // -- ESIMD kernel can't call functions with required subgroup size != 1 template -void kernel0(const F &f) __attribute__((sycl_kernel)) { +void kernel0(F f) __attribute__((sycl_kernel)) { f(); } @@ -27,7 +27,7 @@ void test0() { // -- Usual kernel can't call ESIMD function template -void kernel1(const F &f) __attribute__((sycl_kernel)) { +void kernel1(F f) __attribute__((sycl_kernel)) { f(); } @@ -43,7 +43,7 @@ void test1() { // -- Kernel-function call, both have the attribute, lambda kernel. template -void kernel2(const F &f) __attribute__((sycl_kernel)) { +void kernel2(F f) __attribute__((sycl_kernel)) { f(); } @@ -64,12 +64,12 @@ class A { // -- Functor object kernel. template -void kernel3(const F &f) __attribute__((sycl_kernel)) { +void kernel3(F f) __attribute__((sycl_kernel)) { f(); } struct Kernel3 { - void operator()() const __attribute__((sycl_explicit_simd)) {} + void operator()() __attribute__((sycl_explicit_simd)) {} }; void bar3() { diff --git a/clang/test/SemaSYCL/sycl-fptr-lambda.cpp b/clang/test/SemaSYCL/sycl-fptr-lambda.cpp index 5ec68265a2ee9..ba44c7286ec2d 100644 --- a/clang/test/SemaSYCL/sycl-fptr-lambda.cpp +++ b/clang/test/SemaSYCL/sycl-fptr-lambda.cpp @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -std=c++14 -verify -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -std=c++14 -verify -fsyntax-only %s // expected-no-diagnostics template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/sycl-pseudo-dtor.cpp b/clang/test/SemaSYCL/sycl-pseudo-dtor.cpp index cc839a18446ed..b4700cadd3f2f 100644 --- a/clang/test/SemaSYCL/sycl-pseudo-dtor.cpp +++ b/clang/test/SemaSYCL/sycl-pseudo-dtor.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -verify -fsyntax-only %s template struct functor_wrapper{ @@ -13,7 +13,7 @@ struct S { virtual void foo(); }; struct T { virtual ~T(); }; template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { // expected-no-note@+1 using DATA_I = int; using DATA_S = S; diff --git a/clang/test/SemaSYCL/sycl-restrict.cpp b/clang/test/SemaSYCL/sycl-restrict.cpp index 2c963f95ff76d..00b215d7d459b 100644 --- a/clang/test/SemaSYCL/sycl-restrict.cpp +++ b/clang/test/SemaSYCL/sycl-restrict.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -triple spir64 -Wno-return-type -verify -Wno-sycl-2017-compat -fsyntax-only -std=c++17 %s -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -triple spir64 -fno-sycl-allow-func-ptr -Wno-return-type -verify -Wno-sycl-2017-compat -fsyntax-only -std=c++17 %s -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -triple spir64 -DALLOW_FP=1 -fsycl-allow-func-ptr -Wno-return-type -verify -Wno-sycl-2017-compat -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -triple spir64 -Wno-return-type -verify -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -triple spir64 -fno-sycl-allow-func-ptr -Wno-return-type -verify -fsyntax-only -std=c++17 %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -triple spir64 -DALLOW_FP=1 -fsycl-allow-func-ptr -Wno-return-type -verify -fsyntax-only -std=c++17 %s namespace std { class type_info; @@ -86,7 +86,7 @@ bool isa_B(A *a) { } template -__attribute__((sycl_kernel)) void kernel1(const L &l) { +__attribute__((sycl_kernel)) void kernel1(L l) { l(); //#rtti_kernel // expected-note 2{{called by 'kernel1 -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); //#call_kernelFunc // expected-note 3{{called by 'kernel_single_task -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { kernelFunc(); //expected-note 2+ {{called by 'kernel_single_task}} } diff --git a/clang/test/SemaSYCL/tls_error.cpp b/clang/test/SemaSYCL/tls_error.cpp index bacecf7827803..eb89616105955 100644 --- a/clang/test/SemaSYCL/tls_error.cpp +++ b/clang/test/SemaSYCL/tls_error.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64 -verify -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64 -verify -fsyntax-only %s extern __thread void* __once_callable; // expected-no-error extern __thread void (*__once_call)(); // expected-no-error @@ -14,7 +14,7 @@ void usage() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { //expected-note@+1{{called by}} kernelFunc(); } diff --git a/clang/test/SemaSYCL/unevaluated-function.cpp b/clang/test/SemaSYCL/unevaluated-function.cpp index 49ffe954ab7a9..6e4f3b0fa3f57 100644 --- a/clang/test/SemaSYCL/unevaluated-function.cpp +++ b/clang/test/SemaSYCL/unevaluated-function.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -verify -Wno-sycl-2017-compat -fsyntax-only %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -verify -fsyntax-only %s // Check that a function used in an unevaluated context is not subject // to delayed device diagnostics. @@ -24,7 +24,7 @@ bool foo3() { } template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { // expected-note@+1 1{{called by}} kernelFunc(); } diff --git a/clang/test/SemaSYCL/unnamed-kernel.cpp b/clang/test/SemaSYCL/unnamed-kernel.cpp index 45954f325b4d6..33e5656ac8220 100644 --- a/clang/test/SemaSYCL/unnamed-kernel.cpp +++ b/clang/test/SemaSYCL/unnamed-kernel.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsyntax-only -Wno-sycl-2017-compat -verify %s -// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsycl-unnamed-lambda -fsyntax-only -Wno-sycl-2017-compat -verify %s +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsyntax-only -verify %s +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -fsycl-int-header=%t.h -fsycl-unnamed-lambda -fsyntax-only -verify %s #include #ifdef __SYCL_UNNAMED_LAMBDA__ diff --git a/clang/test/SemaSYCL/unsupported_math.cpp b/clang/test/SemaSYCL/unsupported_math.cpp index 3c0de837dcd77..e93bf61283588 100644 --- a/clang/test/SemaSYCL/unsupported_math.cpp +++ b/clang/test/SemaSYCL/unsupported_math.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify %s extern "C" float sinf(float); extern "C" float cosf(float); extern "C" float logf(float); @@ -6,7 +6,7 @@ extern "C" double sin(double); extern "C" double cos(double); extern "C" double log(double); template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/clang/test/SemaSYCL/variadic-func-call.cpp b/clang/test/SemaSYCL/variadic-func-call.cpp index bdb7ace1690b6..896ba16e46b9e 100644 --- a/clang/test/SemaSYCL/variadic-func-call.cpp +++ b/clang/test/SemaSYCL/variadic-func-call.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown -fsyntax-only -Wno-sycl-2017-compat -verify %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown -fsyntax-only -verify %s void variadic(int, ...) {} namespace NS { @@ -18,7 +18,7 @@ void foo() { void overloaded(int, int) {} void overloaded(int, ...) {} template -__attribute__((sycl_kernel)) void task(const Func &KF) { +__attribute__((sycl_kernel)) void task(Func KF) { KF(); // expected-note 2 {{called by 'task}} } diff --git a/clang/test/SemaSYCL/wrapped-accessor.cpp b/clang/test/SemaSYCL/wrapped-accessor.cpp index efbd32dd09809..1052b4ac24e0f 100644 --- a/clang/test/SemaSYCL/wrapped-accessor.cpp +++ b/clang/test/SemaSYCL/wrapped-accessor.cpp @@ -9,7 +9,7 @@ template struct AccWrapper { Acc accessor; }; template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { +__attribute__((sycl_kernel)) void kernel(Func kernelFunc) { kernelFunc(); } diff --git a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp index c424f81a84e00..14087420587e3 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGScope.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGScope.cpp @@ -265,6 +265,7 @@ static void guardBlockWithIsLeaderCheck(BasicBlock *IfBB, BasicBlock *TrueBB, auto *Ty = LinearLocalID->getType(); Value *Zero = Constant::getNullValue(Ty); IRBuilder<> Builder(IfBB->getContext()); + spirv::genWGBarrier(*(IfBB->getTerminator()), TT); Builder.SetInsertPoint(IfBB->getTerminator()); Value *Cmp = Builder.CreateICmpEQ(LinearLocalID, Zero, "cmpz"); Builder.SetCurrentDebugLocation(DbgLoc); diff --git a/llvm/test/SYCLLowerIR/byval_arg.ll b/llvm/test/SYCLLowerIR/byval_arg.ll index 03c4bb2892f64..df3cc8062a796 100644 --- a/llvm/test/SYCLLowerIR/byval_arg.ll +++ b/llvm/test/SYCLLowerIR/byval_arg.ll @@ -11,6 +11,7 @@ define internal spir_func void @wibble(%struct.baz* byval(%struct.baz) %arg1) !work_group_scope !0 { ; CHECK-LABEL: @wibble( ; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[CMPZ]], label [[LEADER:%.*]], label [[MERGE:%.*]] ; CHECK: leader: diff --git a/llvm/test/SYCLLowerIR/byval_arg_cast.ll b/llvm/test/SYCLLowerIR/byval_arg_cast.ll index 14699c070a673..ef94ccd8e9461 100644 --- a/llvm/test/SYCLLowerIR/byval_arg_cast.ll +++ b/llvm/test/SYCLLowerIR/byval_arg_cast.ll @@ -20,6 +20,7 @@ define dso_local spir_func void @wombat(%struct.widget* byval(%struct.widget) al ; CHECK-LABEL: @wombat( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ1:%.*]] = icmp eq i64 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMPZ1]], label [[LEADER:%.*]], label [[MERGE:%.*]] ; CHECK: leader: @@ -31,6 +32,7 @@ define dso_local spir_func void @wombat(%struct.widget* byval(%struct.widget) al ; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.widget* [[ARG]] to i8* ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 8 [[TMP2]], i8 addrspace(3)* align 16 bitcast (%struct.widget addrspace(3)* @[[SHADOW]] to i8 addrspace(3)*), i64 32, i1 false) ; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP3]], 0 ; CHECK-NEXT: br i1 [[CMPZ]], label [[WG_LEADER:%.*]], label [[WG_CF:%.*]] ; CHECK: wg_leader: diff --git a/llvm/test/SYCLLowerIR/convergent.ll b/llvm/test/SYCLLowerIR/convergent.ll index 7c8d70c08d2a9..be9c9bd687776 100644 --- a/llvm/test/SYCLLowerIR/convergent.ll +++ b/llvm/test/SYCLLowerIR/convergent.ll @@ -19,8 +19,8 @@ define internal spir_func void @wibble(%struct.baz* byval(%struct.baz) %arg1) !w ; CHECK-PTX: declare i64 @_Z27__spirv_LocalInvocationId_zv() ; CHECK: ; Function Attrs: convergent -; CHECK: declare void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) #1 +; CHECK: declare void @_Z22__spirv_ControlBarrierjjj(i32, i32, i32) #[[ATTR_NUM:[0-9]+]] -; CHECK: attributes #1 = { convergent } +; CHECK: attributes #[[ATTR_NUM]] = { convergent } !0 = !{} diff --git a/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll b/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll index 615d11190d4cc..fdbdca4686f57 100644 --- a/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll +++ b/llvm/test/SYCLLowerIR/pfwg_and_pfwi.ll @@ -25,6 +25,7 @@ define internal spir_func void @wibble(%struct.bar addrspace(4)* %arg, %struct.z ; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BAR:%.*]] addrspace(4)*, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[STRUCT_FOO_0:%.*]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP0]], 0 ; CHECK-NEXT: br i1 [[CMPZ3]], label [[LEADER:%.*]], label [[MERGE:%.*]] ; CHECK: leader: @@ -40,6 +41,7 @@ define internal spir_func void @wibble(%struct.bar addrspace(4)* %arg, %struct.z ; CHECK-NEXT: [[TMP4:%.*]] = bitcast [[STRUCT_BAR]] addrspace(4)* [[ARG]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 8 [[TMP4]], i8 addrspace(3)* align 8 getelementptr inbounds (%struct.bar, [[STRUCT_BAR]] addrspace(3)* @[[PFWG_SHADOW]], i32 0, i32 0), i64 1, i1 false) ; CHECK-NEXT: [[TMP5:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ:%.*]] = icmp eq i64 [[TMP5]], 0 ; CHECK-NEXT: br i1 [[CMPZ]], label [[WG_LEADER:%.*]], label [[WG_CF:%.*]] ; CHECK: wg_leader: @@ -50,6 +52,7 @@ define internal spir_func void @wibble(%struct.bar addrspace(4)* %arg, %struct.z ; CHECK-NEXT: br label [[WG_CF]] ; CHECK: wg_cf: ; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) ; CHECK-NEXT: [[CMPZ2:%.*]] = icmp eq i64 [[TMP4]], 0 ; CHECK-NEXT: br i1 [[CMPZ2]], label [[TESTMAT:%.*]], label [[LEADERMAT:%.*]] ; CHECK: TestMat: diff --git a/llvm/test/SYCLLowerIR/wg_scope_ctor_loop.ll b/llvm/test/SYCLLowerIR/wg_scope_ctor_loop.ll new file mode 100644 index 0000000000000..84575dc77f27c --- /dev/null +++ b/llvm/test/SYCLLowerIR/wg_scope_ctor_loop.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -LowerWGScope -S | FileCheck %s + +%struct.snork = type { i32 } +%struct.eggs = type { i8 } +%struct.snork.0 = type { %struct.widget, %struct.widget, %struct.widget, %struct.ham } +%struct.widget = type { %struct.wibble } +%struct.wibble = type { [3 x i64] } +%struct.ham = type { %struct.wibble } + +@global = internal addrspace(3) global [12 x %struct.snork] zeroinitializer, align 4 + +; CHECK: @[[WG_NEXT:[a-zA-Z0-9_.]+]] = internal unnamed_addr addrspace(3) global %struct.snork addrspace(4)* undef, align 8 +; CHECK: @[[WG_DONE:[a-zA-Z0-9_.]+]] = internal unnamed_addr addrspace(3) global i1 undef, align 1 + +define internal spir_func void @spam(%struct.eggs addrspace(4)* %arg, %struct.snork.0* byval(%struct.snork.0) align 8 %arg1) align 2 !work_group_scope !0 { +; CHECK: arrayctor.loop: +; CHECK-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi [[STRUCT_SNORK:%.*]] addrspace(4)* [ getelementptr inbounds ([12 x %struct.snork], [12 x %struct.snork] addrspace(4)* addrspacecast ([12 x %struct.snork] addrspace(3)* @global to [12 x %struct.snork] addrspace(4)*), i32 0, i32 0), [[WG_CF:%.*]] ], [ [[WG_VAL_ARRAYCTOR_NEXT:%.*]], [[WG_CF2:%.*]] ] +; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64 addrspace(1)* @__spirv_BuiltInLocalInvocationIndex, align 4 +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) +; CHECK-NEXT: [[CMPZ3:%.*]] = icmp eq i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[CMPZ3]], label [[WG_LEADER1:%.*]], label [[WG_CF2]] +; CHECK: wg_leader1: +; CHECK-NEXT: call spir_func void @bar(%struct.snork addrspace(4)* [[ARRAYCTOR_CUR]]) +; CHECK-NEXT: [[ARRAYCTOR_NEXT:%.*]] = getelementptr inbounds [[STRUCT_SNORK]], [[STRUCT_SNORK]] addrspace(4)* [[ARRAYCTOR_CUR]], i64 1 +; CHECK-NEXT: store [[STRUCT_SNORK]] addrspace(4)* [[ARRAYCTOR_NEXT]], [[STRUCT_SNORK]] addrspace(4)* addrspace(3)* @[[WG_NEXT]], align 8 +; CHECK-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq [[STRUCT_SNORK]] addrspace(4)* [[ARRAYCTOR_NEXT]], getelementptr inbounds (%struct.snork, [[STRUCT_SNORK]] addrspace(4)* getelementptr inbounds ([12 x %struct.snork], [12 x %struct.snork] addrspace(4)* addrspacecast ([12 x %struct.snork] addrspace(3)* @global to [12 x %struct.snork] addrspace(4)*), i32 0, i32 0), i64 12) +; CHECK-NEXT: store i1 [[ARRAYCTOR_DONE]], i1 addrspace(3)* @[[WG_DONE]], align 1 +; CHECK-NEXT: br label [[WG_CF2]] +; CHECK: wg_cf2: +; CHECK-NEXT: call void @_Z22__spirv_ControlBarrierjjj(i32 2, i32 2, i32 272) #0 +; CHECK-NEXT: [[WG_VAL_ARRAYCTOR_DONE:%.*]] = load i1, i1 addrspace(3)* @[[WG_DONE]], align 1 +; CHECK-NEXT: [[WG_VAL_ARRAYCTOR_NEXT]] = load [[STRUCT_SNORK]] addrspace(4)*, [[STRUCT_SNORK]] addrspace(4)* addrspace(3)* @[[WG_NEXT]], align 8 +; CHECK-NEXT: br i1 [[WG_VAL_ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP:%.*]] +; CHECK: arrayctor.cont: +; CHECK-NEXT: ret void +; +entry: + %tmp = alloca %struct.eggs addrspace(4)*, align 8 + store %struct.eggs addrspace(4)* %arg, %struct.eggs addrspace(4)** %tmp, align 8 + %tmp2 = load %struct.eggs addrspace(4)*, %struct.eggs addrspace(4)** %tmp, align 8 + br label %arrayctor.loop + +arrayctor.loop: ; preds = %arrayctor.loop, %entry + %arrayctor.cur = phi %struct.snork addrspace(4)* [ getelementptr inbounds ([12 x %struct.snork], [12 x %struct.snork] addrspace(4)* addrspacecast ([12 x %struct.snork] addrspace(3)* @global to [12 x %struct.snork] addrspace(4)*), i32 0, i32 0), %entry ], [ %arrayctor.next, %arrayctor.loop ] + call spir_func void @bar(%struct.snork addrspace(4)* %arrayctor.cur) + %arrayctor.next = getelementptr inbounds %struct.snork, %struct.snork addrspace(4)* %arrayctor.cur, i64 1 + %arrayctor.done = icmp eq %struct.snork addrspace(4)* %arrayctor.next, getelementptr inbounds (%struct.snork, %struct.snork addrspace(4)* getelementptr inbounds ([12 x %struct.snork], [12 x %struct.snork] addrspace(4)* addrspacecast ([12 x %struct.snork] addrspace(3)* @global to [12 x %struct.snork] addrspace(4)*), i32 0, i32 0), i64 12) + br i1 %arrayctor.done, label %arrayctor.cont, label %arrayctor.loop + +arrayctor.cont: ; preds = %arrayctor.loop + ret void +} + +define linkonce_odr dso_local spir_func void @bar(%struct.snork addrspace(4)* %arg) unnamed_addr align 2 { +bb: + %tmp = alloca %struct.snork addrspace(4)*, align 8 + store %struct.snork addrspace(4)* %arg, %struct.snork addrspace(4)** %tmp, align 8 + %tmp1 = load %struct.snork addrspace(4)*, %struct.snork addrspace(4)** %tmp, align 8 + %tmp2 = getelementptr inbounds %struct.snork, %struct.snork addrspace(4)* %tmp1, i32 0, i32 0 + store i32 0, i32 addrspace(4)* %tmp2, align 4 + ret void +} + +!0 = !{} diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index d540c23ea0a64..1b7ae47f7b7fb 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -14,7 +14,7 @@ include(AddSYCLExecutable) set(SYCL_MAJOR_VERSION 2) set(SYCL_MINOR_VERSION 1) set(SYCL_PATCH_VERSION 0) -set(SYCL_DEV_ABI_VERSION 3) +set(SYCL_DEV_ABI_VERSION 4) if (SYCL_ADD_DEV_VERSION_POSTFIX) set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}") endif() @@ -326,7 +326,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS sycl-headers-extras sycl pi_opencl - pi_level0 + pi_level_zero libsycldevice ) if(OpenCL_INSTALL_KHRONOS_ICD_LOADER AND TARGET ocl-icd) diff --git a/sycl/ReleaseNotes.md b/sycl/ReleaseNotes.md index 70d30cf3cfd5b..7c6963777fb30 100644 --- a/sycl/ReleaseNotes.md +++ b/sycl/ReleaseNotes.md @@ -62,7 +62,7 @@ Release notes for the commit range ba404be..24726df - Added a cache for PI plugins, so subsequent calls for `sycl::device` creation should be cheaper [03dd60d] - A SYCL program will be aborted now if program linking is requested when - using L0 plugin. This is done because L0 doesn't support program linking + using Level Zero plugin. This is done because L0 doesn't support program linking [d4a5b71] - Added a diagnostic on attempt to use `sycl::program::set_spec_constant` when the program is already in compiled or linked state [e2e3d3d] diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 3d8d5cb41f741..b353135ecb9db 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -1,6 +1,6 @@ # Environment Variables -This document describes environment variables that are having effect on DPC++ +This document describes environment variables that are having effect on DPC++ compiler and runtime. ## Controlling DPC++ RT @@ -12,7 +12,7 @@ subject to change. Do not rely on these variables in production code. | Environment variable | Values | Description | | -------------------- | ------ | ----------- | | SYCL_PI_TRACE | Described [below](#sycl_pi_trace-options) | Enable specified level of tracing for PI. | -| SYCL_BE | PI_OPENCL, PI_LEVEL0, PI_CUDA | Force SYCL RT to consider only devices of the specified backend during the device selection. | +| SYCL_BE | PI_OPENCL, PI_LEVEL_ZERO, PI_CUDA | Force SYCL RT to consider only devices of the specified backend during the device selection. | | SYCL_DEVICE_TYPE | One of: CPU, GPU, ACC, HOST | Force SYCL to use the specified device type. If unset, default selection rules are applied. If set to any unlisted value, this control has no effect. If the requested device type is not found, a `cl::sycl::runtime_error` exception is thrown. If a non-default device selector is used, a device must satisfy both the selector and this control to be chosen. This control only has effect on devices created with a selector. | | SYCL_PROGRAM_COMPILE_OPTIONS | String of valid OpenCL compile options | Override compile options for all programs. | | SYCL_PROGRAM_LINK_OPTIONS | String of valid OpenCL link options | Override link options for all programs. | diff --git a/sycl/doc/extensions/ParallelForSimpification/SYCL_INTEL_parallel_for_simplification.asciidoc b/sycl/doc/extensions/ParallelForSimplification/SYCL_INTEL_parallel_for_simplification.asciidoc similarity index 100% rename from sycl/doc/extensions/ParallelForSimpification/SYCL_INTEL_parallel_for_simplification.asciidoc rename to sycl/doc/extensions/ParallelForSimplification/SYCL_INTEL_parallel_for_simplification.asciidoc diff --git a/sycl/doc/extensions/README.md b/sycl/doc/extensions/README.md index d9d107cb7a6f6..30ff1c7c905bd 100644 --- a/sycl/doc/extensions/README.md +++ b/sycl/doc/extensions/README.md @@ -13,7 +13,7 @@ DPC++ extensions status: | [SYCL_INTEL_deduction_guides](deduction_guides/SYCL_INTEL_deduction_guides.asciidoc) | Supported | | | [SYCL_INTEL_device_specific_kernel_queries](DeviceSpecificKernelQueries/SYCL_INTEL_device_specific_kernel_queries.asciidoc) | Proposal | | | [SYCL_INTEL_enqueue_barrier](EnqueueBarrier/enqueue_barrier.asciidoc) | Supported(OpenCL, Level Zero) | | -| [SYCL_INTEL_extended_atomics](ExtendedAtomics/SYCL_INTEL_extended_atomics.asciidoc) | Partially supported(OpenCL: CPU, GPU) | Not supported: pointer types | +| [SYCL_INTEL_extended_atomics](ExtendedAtomics/SYCL_INTEL_extended_atomics.asciidoc) | Supported(OpenCL: CPU, GPU) | | | [SYCL_INTEL_group_algorithms](GroupAlgorithms/SYCL_INTEL_group_algorithms.asciidoc) | Supported(OpenCL) | | | [SYCL_INTEL_group_mask](./GroupMask/SYCL_INTEL_group_mask.asciidoc) | Proposal | | | [FPGA selector](IntelFPGA/FPGASelector.md) | Supported | | diff --git a/sycl/include/CL/sycl/backend/level_zero.hpp b/sycl/include/CL/sycl/backend/level_zero.hpp index 3dd6fa2b8677b..82de4eb4c6a78 100644 --- a/sycl/include/CL/sycl/backend/level_zero.hpp +++ b/sycl/include/CL/sycl/backend/level_zero.hpp @@ -14,37 +14,37 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -template <> struct interop { +template <> struct interop { using type = ze_driver_handle_t; }; -template <> struct interop { +template <> struct interop { using type = ze_device_handle_t; }; -template <> struct interop { +template <> struct interop { using type = ze_command_queue_handle_t; }; -template <> struct interop { +template <> struct interop { using type = ze_module_handle_t; }; template -struct interop> { +struct interop> { using type = char *; }; template -struct interop> { +struct interop> { using type = char *; }; -namespace level0 { +namespace level_zero { // Implementation of various "make" functions resides in libsycl.so platform make_platform(pi_native_handle NativeHandle); @@ -55,7 +55,7 @@ queue make_queue(const context &Context, pi_native_handle InteropHandle); // Construction of SYCL platform. template ::value>::type * = nullptr> -T make(typename interop::type Interop) { +T make(typename interop::type Interop) { return make_platform(reinterpret_cast(Interop)); } @@ -63,7 +63,7 @@ T make(typename interop::type Interop) { template ::value>::type * = nullptr> T make(const platform &Platform, - typename interop::type Interop) { + typename interop::type Interop) { return make_device(Platform, reinterpret_cast(Interop)); } @@ -71,7 +71,7 @@ T make(const platform &Platform, template ::value>::type * = nullptr> T make(const context &Context, - typename interop::type Interop) { + typename interop::type Interop) { return make_program(Context, reinterpret_cast(Interop)); } @@ -79,10 +79,10 @@ T make(const context &Context, template ::value>::type * = nullptr> T make(const context &Context, - typename interop::type Interop) { + typename interop::type Interop) { return make_queue(Context, reinterpret_cast(Interop)); } -} // namespace level0 +} // namespace level_zero } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/backend_types.hpp b/sycl/include/CL/sycl/backend_types.hpp index 362c6d9d9b5b9..655bbf89d8d39 100644 --- a/sycl/include/CL/sycl/backend_types.hpp +++ b/sycl/include/CL/sycl/backend_types.hpp @@ -18,7 +18,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -enum class backend : char { host, opencl, level0, cuda }; +enum class backend : char { host, opencl, level_zero, cuda }; template struct interop; @@ -30,7 +30,7 @@ inline std::ostream &operator<<(std::ostream &Out, backend be) { case backend::opencl: Out << std::string("opencl"); break; - case backend::level0: + case backend::level_zero: Out << std::string("level-zero"); break; case backend::cuda: diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index 5301dd288502a..c106c5cba35bd 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -57,11 +57,11 @@ bool trace(TraceLevel level); #ifdef SYCL_RT_OS_WINDOWS #define OPENCL_PLUGIN_NAME "pi_opencl.dll" -#define LEVEL0_PLUGIN_NAME "pi_level0.dll" +#define LEVEL_ZERO_PLUGIN_NAME "pi_level_zero.dll" #define CUDA_PLUGIN_NAME "pi_cuda.dll" #else #define OPENCL_PLUGIN_NAME "libpi_opencl.so" -#define LEVEL0_PLUGIN_NAME "libpi_level0.so" +#define LEVEL_ZERO_PLUGIN_NAME "libpi_level_zero.so" #define CUDA_PLUGIN_NAME "libpi_cuda.so" #endif diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index d41feaf07db0a..b4504489662c0 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -734,16 +734,14 @@ class __SYCL_EXPORT handler { // NOTE: the name of this function - "kernel_single_task" - is used by the // Front End to determine kernel invocation kind. template - __attribute__((sycl_kernel)) void - kernel_single_task(const KernelType &KernelFunc) { + __attribute__((sycl_kernel)) void kernel_single_task(KernelType KernelFunc) { KernelFunc(); } // NOTE: the name of these functions - "kernel_parallel_for" - are used by the // Front End to determine kernel invocation kind. template - __attribute__((sycl_kernel)) void - kernel_parallel_for(const KernelType &KernelFunc) { + __attribute__((sycl_kernel)) void kernel_parallel_for(KernelType KernelFunc) { KernelFunc( detail::Builder::getElement(static_cast(nullptr))); } @@ -752,7 +750,7 @@ class __SYCL_EXPORT handler { // used by the Front End to determine kernel invocation kind. template __attribute__((sycl_kernel)) void - kernel_parallel_for_work_group(const KernelType &KernelFunc) { + kernel_parallel_for_work_group(KernelType KernelFunc) { KernelFunc( detail::Builder::getElement(static_cast(nullptr))); } @@ -855,7 +853,7 @@ class __SYCL_EXPORT handler { /// /// \param KernelFunc is a SYCL kernel function. template - void single_task(const KernelType &KernelFunc) { + void single_task(KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; @@ -872,17 +870,17 @@ class __SYCL_EXPORT handler { } template - void parallel_for(range<1> NumWorkItems, const KernelType &KernelFunc) { + void parallel_for(range<1> NumWorkItems, KernelType KernelFunc) { parallel_for_lambda_impl(NumWorkItems, std::move(KernelFunc)); } template - void parallel_for(range<2> NumWorkItems, const KernelType &KernelFunc) { + void parallel_for(range<2> NumWorkItems, KernelType KernelFunc) { parallel_for_lambda_impl(NumWorkItems, std::move(KernelFunc)); } template - void parallel_for(range<3> NumWorkItems, const KernelType &KernelFunc) { + void parallel_for(range<3> NumWorkItems, KernelType KernelFunc) { parallel_for_lambda_impl(NumWorkItems, std::move(KernelFunc)); } @@ -945,7 +943,7 @@ class __SYCL_EXPORT handler { template void parallel_for(range NumWorkItems, id WorkItemOffset, - const KernelType &KernelFunc) { + KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; @@ -977,8 +975,7 @@ class __SYCL_EXPORT handler { /// \param KernelFunc is a SYCL kernel function. template - void parallel_for(nd_range ExecutionRange, - const KernelType &KernelFunc) { + void parallel_for(nd_range ExecutionRange, KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; @@ -1005,8 +1002,7 @@ class __SYCL_EXPORT handler { int Dims, typename Reduction> detail::enable_if_t - parallel_for(nd_range Range, Reduction Redu, - const KernelType &KernelFunc) { + parallel_for(nd_range Range, Reduction Redu, KernelType KernelFunc) { intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu, Redu.getUserAccessor()); } @@ -1019,8 +1015,7 @@ class __SYCL_EXPORT handler { int Dims, typename Reduction> detail::enable_if_t - parallel_for(nd_range Range, Reduction Redu, - const KernelType &KernelFunc) { + parallel_for(nd_range Range, Reduction Redu, KernelType KernelFunc) { intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu, Redu.getUSMPointer()); } @@ -1039,8 +1034,7 @@ class __SYCL_EXPORT handler { int Dims, typename Reduction> detail::enable_if_t - parallel_for(nd_range Range, Reduction Redu, - const KernelType &KernelFunc) { + parallel_for(nd_range Range, Reduction Redu, KernelType KernelFunc) { shared_ptr_class QueueCopy = MQueue; auto RWAcc = Redu.getReadWriteScalarAcc(*this); intel::detail::reduCGFunc(*this, KernelFunc, Range, Redu, @@ -1076,8 +1070,7 @@ class __SYCL_EXPORT handler { template detail::enable_if_t - parallel_for(nd_range Range, Reduction Redu, - const KernelType &KernelFunc) { + parallel_for(nd_range Range, Reduction Redu, KernelType KernelFunc) { // This parallel_for() is lowered to the following sequence: // 1) Call a kernel that a) call user's lambda function and b) performs // one iteration of reduction, storing the partial reductions/sums @@ -1152,7 +1145,7 @@ class __SYCL_EXPORT handler { template void parallel_for_work_group(range NumWorkGroups, - const KernelType &KernelFunc) { + KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; @@ -1185,7 +1178,7 @@ class __SYCL_EXPORT handler { int Dims> void parallel_for_work_group(range NumWorkGroups, range WorkGroupSize, - const KernelType &KernelFunc) { + KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; @@ -1287,7 +1280,7 @@ class __SYCL_EXPORT handler { /// \param KernelFunc is a lambda that is used if device, queue is bound to, /// is a host device. template - void single_task(kernel Kernel, const KernelType &KernelFunc) { + void single_task(kernel Kernel, KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; @@ -1327,7 +1320,7 @@ class __SYCL_EXPORT handler { template void parallel_for(kernel Kernel, range NumWorkItems, - const KernelType &KernelFunc) { + KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; @@ -1362,7 +1355,7 @@ class __SYCL_EXPORT handler { template void parallel_for(kernel Kernel, range NumWorkItems, - id WorkItemOffset, const KernelType &KernelFunc) { + id WorkItemOffset, KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; @@ -1399,7 +1392,7 @@ class __SYCL_EXPORT handler { template void parallel_for(kernel Kernel, nd_range NDRange, - const KernelType &KernelFunc) { + KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; @@ -1441,7 +1434,7 @@ class __SYCL_EXPORT handler { template void parallel_for_work_group(kernel Kernel, range NumWorkGroups, - const KernelType &KernelFunc) { + KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; @@ -1479,7 +1472,7 @@ class __SYCL_EXPORT handler { int Dims> void parallel_for_work_group(kernel Kernel, range NumWorkGroups, range WorkGroupSize, - const KernelType &KernelFunc) { + KernelType KernelFunc) { throwIfActionIsCreated(); using NameT = typename detail::get_kernel_name_t::name; diff --git a/sycl/include/CL/sycl/intel/atomic_ref.hpp b/sycl/include/CL/sycl/intel/atomic_ref.hpp index f6e8d4ff68616..1616727f919b8 100644 --- a/sycl/include/CL/sycl/intel/atomic_ref.hpp +++ b/sycl/include/CL/sycl/intel/atomic_ref.hpp @@ -135,8 +135,6 @@ class atomic_ref_base { static_assert(!(std::is_same::value || std::is_same::value), "intel::atomic_ref does not support short type"); - static_assert(!std::is_pointer::value, - "intel::atomic_ref does not yet support pointer types"); static_assert(detail::IsValidAtomicAddressSpace::value, "Invalid atomic address_space. Valid address spaces are: " "global_space, local_space, global_device_space"); @@ -508,12 +506,138 @@ class atomic_ref_impl< }; // Partial specialization for pointer types +// Arithmetic is emulated because target's representation of T* is unknown +// TODO: Find a way to use intptr_t or uintptr_t atomics instead template -class atomic_ref_impl::value>> - : public atomic_ref_base { - // TODO: Implement partial specialization for pointer types +class atomic_ref_impl + : public atomic_ref_base { + +private: + using base_type = + atomic_ref_base; + +public: + using value_type = T *; + using difference_type = ptrdiff_t; + static constexpr size_t required_alignment = sizeof(T *); + static constexpr bool is_always_lock_free = + detail::IsValidAtomicType::value; + static constexpr memory_order default_read_order = + detail::memory_order_traits::read_order; + static constexpr memory_order default_write_order = + detail::memory_order_traits::write_order; + static constexpr memory_order default_read_modify_write_order = DefaultOrder; + static constexpr memory_scope default_scope = DefaultScope; + + using base_type::is_lock_free; + + atomic_ref_impl(T *&ref) : base_type(reinterpret_cast(ref)) {} + + void store(T *operand, memory_order order = default_write_order, + memory_scope scope = default_scope) const noexcept { + base_type::store(reinterpret_cast(operand), order, scope); + } + + T *operator=(T *desired) const noexcept { + store(desired); + return desired; + } + + T *load(memory_order order = default_read_order, + memory_scope scope = default_scope) const noexcept { + return reinterpret_cast(base_type::load(order, scope)); + } + + operator T *() const noexcept { return load(); } + + T *exchange(T *operand, memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + return reinterpret_cast(base_type::exchange( + reinterpret_cast(operand), order, scope)); + } + + T *fetch_add(difference_type operand, + memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + // TODO: Find a way to avoid compare_exchange here + auto load_order = detail::getLoadOrder(order); + T *expected = load(load_order, scope); + T *desired; + do { + desired = expected + operand; + } while (!compare_exchange_weak(expected, desired, order, scope)); + return expected; + } + + T *operator+=(difference_type operand) const noexcept { + return fetch_add(operand) + operand; + } + + T *operator++(int) const noexcept { return fetch_add(difference_type(1)); } + + T *operator++() const noexcept { + return fetch_add(difference_type(1)) + difference_type(1); + } + + T *fetch_sub(difference_type operand, + memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + // TODO: Find a way to avoid compare_exchange here + auto load_order = detail::getLoadOrder(order); + T *expected = load(load_order, scope); + T *desired; + do { + desired = expected - operand; + } while (!compare_exchange_weak(expected, desired, order, scope)); + return expected; + } + + T *operator-=(difference_type operand) const noexcept { + return fetch_sub(operand) - operand; + } + + T *operator--(int) const noexcept { return fetch_sub(difference_type(1)); } + + T *operator--() const noexcept { + return fetch_sub(difference_type(1)) - difference_type(1); + } + + bool + compare_exchange_strong(T *&expected, T *desired, memory_order success, + memory_order failure, + memory_scope scope = default_scope) const noexcept { + return base_type::compare_exchange_strong( + reinterpret_cast(expected), + reinterpret_cast(desired), success, failure, scope); + } + + bool + compare_exchange_strong(T *&expected, T *desired, + memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + return compare_exchange_strong(expected, desired, order, order, scope); + } + + bool + compare_exchange_weak(T *&expected, T *desired, memory_order success, + memory_order failure, + memory_scope scope = default_scope) const noexcept { + return base_type::compare_exchange_weak( + reinterpret_cast(expected), + reinterpret_cast(desired), success, failure, scope); + } + + bool + compare_exchange_weak(T *&expected, T *desired, + memory_order order = default_read_modify_write_order, + memory_scope scope = default_scope) const noexcept { + return compare_exchange_weak(expected, desired, order, order, scope); + } + +private: + using base_type::ptr; }; } // namespace detail diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index a8fc3d8ef2c86..19ce5f6ace5bd 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -361,7 +361,7 @@ class __SYCL_EXPORT queue { /// \param CodeLoc contains the code location of user code template event single_task( - const KernelType &KernelFunc + KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -384,7 +384,7 @@ class __SYCL_EXPORT queue { /// \param CodeLoc contains the code location of user code template event single_task( - event DepEvent, const KernelType &KernelFunc + event DepEvent, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -409,7 +409,7 @@ class __SYCL_EXPORT queue { /// \param CodeLoc contains the code location of user code template event single_task( - const vector_class &DepEvents, const KernelType &KernelFunc + const vector_class &DepEvents, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -434,7 +434,7 @@ class __SYCL_EXPORT queue { /// \param CodeLoc contains the code location of user code template event parallel_for( - range<1> NumWorkItems, const KernelType &KernelFunc + range<1> NumWorkItems, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -454,7 +454,7 @@ class __SYCL_EXPORT queue { /// \param CodeLoc contains the code location of user code template event parallel_for( - range<2> NumWorkItems, const KernelType &KernelFunc + range<2> NumWorkItems, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -474,7 +474,7 @@ class __SYCL_EXPORT queue { /// \param CodeLoc contains the code location of user code template event parallel_for( - range<3> NumWorkItems, const KernelType &KernelFunc + range<3> NumWorkItems, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -495,7 +495,7 @@ class __SYCL_EXPORT queue { /// \param CodeLoc contains the code location of user code template event parallel_for( - range<1> NumWorkItems, event DepEvent, const KernelType &KernelFunc + range<1> NumWorkItems, event DepEvent, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -517,7 +517,7 @@ class __SYCL_EXPORT queue { /// \param CodeLoc contains the code location of user code template event parallel_for( - range<2> NumWorkItems, event DepEvent, const KernelType &KernelFunc + range<2> NumWorkItems, event DepEvent, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -539,7 +539,7 @@ class __SYCL_EXPORT queue { /// \param CodeLoc contains the code location of user code template event parallel_for( - range<3> NumWorkItems, event DepEvent, const KernelType &KernelFunc + range<3> NumWorkItems, event DepEvent, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -563,7 +563,7 @@ class __SYCL_EXPORT queue { template event parallel_for( range<1> NumWorkItems, const vector_class &DepEvents, - const KernelType &KernelFunc + KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -587,7 +587,7 @@ class __SYCL_EXPORT queue { template event parallel_for( range<2> NumWorkItems, const vector_class &DepEvents, - const KernelType &KernelFunc + KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -611,7 +611,7 @@ class __SYCL_EXPORT queue { template event parallel_for( range<3> NumWorkItems, const vector_class &DepEvents, - const KernelType &KernelFunc + KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -634,8 +634,7 @@ class __SYCL_EXPORT queue { template event parallel_for( - range NumWorkItems, id WorkItemOffset, - const KernelType &KernelFunc + range NumWorkItems, id WorkItemOffset, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -664,7 +663,7 @@ class __SYCL_EXPORT queue { int Dims> event parallel_for( range NumWorkItems, id WorkItemOffset, event DepEvent, - const KernelType &KernelFunc + KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -695,7 +694,7 @@ class __SYCL_EXPORT queue { int Dims> event parallel_for( range NumWorkItems, id WorkItemOffset, - const vector_class &DepEvents, const KernelType &KernelFunc + const vector_class &DepEvents, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -723,7 +722,7 @@ class __SYCL_EXPORT queue { template event parallel_for( - nd_range ExecutionRange, const KernelType &KernelFunc + nd_range ExecutionRange, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -751,8 +750,7 @@ class __SYCL_EXPORT queue { template event parallel_for( - nd_range ExecutionRange, event DepEvent, - const KernelType &KernelFunc + nd_range ExecutionRange, event DepEvent, KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() @@ -783,7 +781,7 @@ class __SYCL_EXPORT queue { int Dims> event parallel_for( nd_range ExecutionRange, const vector_class &DepEvents, - const KernelType &KernelFunc + KernelType KernelFunc #ifndef DISABLE_SYCL_INSTRUMENTATION_METADATA , const detail::code_location &CodeLoc = detail::code_location::current() diff --git a/sycl/plugins/level_zero/CMakeLists.txt b/sycl/plugins/level_zero/CMakeLists.txt index b7e89af870b48..a90dfc8ac841f 100755 --- a/sycl/plugins/level_zero/CMakeLists.txt +++ b/sycl/plugins/level_zero/CMakeLists.txt @@ -1,14 +1,14 @@ -# PI Level0 plugin library +# PI Level Zero plugin library if(MSVC) - set(L0_LOADER + set(LEVEL_ZERO_LOADER "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader${CMAKE_STATIC_LIBRARY_SUFFIX}") else() - set(L0_LOADER + set(LEVEL_ZERO_LOADER "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}ze_loader${CMAKE_SHARED_LIBRARY_SUFFIX}") endif() -if (NOT DEFINED L0_LIBRARY OR NOT DEFINED L0_INCLUDE_DIR) +if (NOT DEFINED LEVEL_ZERO_LIBRARY OR NOT DEFINED LEVEL_ZERO_INCLUDE_DIR) message(STATUS "Download Level Zero loader and headers from github.com") if (CMAKE_C_COMPILER) list(APPEND AUX_CMAKE_FLAGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}) @@ -16,18 +16,18 @@ if (NOT DEFINED L0_LIBRARY OR NOT DEFINED L0_INCLUDE_DIR) if (CMAKE_CXX_COMPILER) list(APPEND AUX_CMAKE_FLAGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}) endif() - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/l0_loader_build) - set(L0_LOADER_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/Level0/l0_loader") - if (NOT DEFINED SYCL_EP_L0_LOADER_SKIP_AUTO_UPDATE) - set(SYCL_EP_L0_LOADER_SKIP_AUTO_UPDATE ${SYCL_EXTERNAL_PROJECTS_SKIP_AUTO_UPDATE}) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build) + set(LEVEL_ZERO_LOADER_SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero/level_zero_loader") + if (NOT DEFINED SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE) + set(SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE ${SYCL_EXTERNAL_PROJECTS_SKIP_AUTO_UPDATE}) endif() - ExternalProject_Add(l0-loader + ExternalProject_Add(level-zero-loader GIT_REPOSITORY https://github.com/oneapi-src/level-zero.git GIT_TAG v0.91.21 - UPDATE_DISCONNECTED ${SYCL_EP_L0_LOADER_SKIP_AUTO_UPDATE} - SOURCE_DIR ${L0_LOADER_SOURCE_DIR} - BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/l0_loader_build" - INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/l0_loader_install" + UPDATE_DISCONNECTED ${SYCL_EP_LEVEL_ZERO_LOADER_SKIP_AUTO_UPDATE} + SOURCE_DIR ${LEVEL_ZERO_LOADER_SOURCE_DIR} + BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_build" + INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install" CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} -DOpenCL_INCLUDE_DIR=${OpenCL_INCLUDE_DIRS} @@ -36,50 +36,50 @@ if (NOT DEFINED L0_LIBRARY OR NOT DEFINED L0_INCLUDE_DIR) ${AUX_CMAKE_FLAGS} STEP_TARGETS configure,build,install DEPENDS ocl-headers - BUILD_BYPRODUCTS ${L0_LOADER} + BUILD_BYPRODUCTS ${LEVEL_ZERO_LOADER} ) - ExternalProject_Add_Step(l0-loader llvminstall + ExternalProject_Add_Step(level-zero-loader llvminstall COMMAND ${CMAKE_COMMAND} -E copy_directory / ${LLVM_BINARY_DIR} - COMMENT "Installing l0-loader into the LLVM binary directory" + COMMENT "Installing level-zero-loader into the LLVM binary directory" DEPENDEES install ) - install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/l0_loader_install/" + install(DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader_install/" DESTINATION "." - COMPONENT l0-loader + COMPONENT level-zero-loader ) - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS l0-loader) + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS level-zero-loader) else() - include_directories("${L0_INCLUDE_DIR}") - file(GLOB L0_LIBRARY_SRC "${L0_LIBRARY}*") - file(COPY ${L0_LIBRARY_SRC} DESTINATION ${LLVM_LIBRARY_OUTPUT_INTDIR}) - add_custom_target(l0-loader DEPENDS ${L0_LIBRARY} COMMENT "Copying Level Zero Loader ...") + include_directories("${LEVEL_ZERO_INCLUDE_DIR}") + file(GLOB LEVEL_ZERO_LIBRARY_SRC "${LEVEL_ZERO_LIBRARY}*") + file(COPY ${LEVEL_ZERO_LIBRARY_SRC} DESTINATION ${LLVM_LIBRARY_OUTPUT_INTDIR}) + add_custom_target(level-zero-loader DEPENDS ${LEVEL_ZERO_LIBRARY} COMMENT "Copying Level Zero Loader ...") endif() -add_library (L0Loader-Headers INTERFACE) -add_library (L0Loader::Headers ALIAS L0Loader-Headers) -target_include_directories(L0Loader-Headers - INTERFACE "${L0_INCLUDE_DIR}" +add_library (LevelZeroLoader-Headers INTERFACE) +add_library (LevelZeroLoader::Headers ALIAS LevelZeroLoader-Headers) +target_include_directories(LevelZeroLoader-Headers + INTERFACE "${LEVEL_ZERO_INCLUDE_DIR}" ) include_directories("${sycl_inc_dir}") include_directories(${OPENCL_INCLUDE}) -add_library(pi_level0 SHARED +add_library(pi_level_zero SHARED "${sycl_inc_dir}/CL/sycl/detail/pi.h" - "${CMAKE_CURRENT_SOURCE_DIR}/pi_level0.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/pi_level0.hpp" + "${CMAKE_CURRENT_SOURCE_DIR}/pi_level_zero.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/pi_level_zero.hpp" ) if (MSVC) # by defining __SYCL_BUILD_SYCL_DLL, we can use __declspec(dllexport) # which are individually tagged for all pi* symbols in pi.h - target_compile_definitions(pi_level0 PRIVATE __SYCL_BUILD_SYCL_DLL) + target_compile_definitions(pi_level_zero PRIVATE __SYCL_BUILD_SYCL_DLL) else() # we set the visibility of all symbols 'hidden' by default. # In pi.h file, we set exported symbols with visibility==default individually - target_compile_options(pi_level0 PUBLIC -fvisibility=hidden) + target_compile_options(pi_level_zero PUBLIC -fvisibility=hidden) # This script file is used to allow exporting pi* symbols only. # All other symbols are regarded as local (hidden) @@ -87,23 +87,23 @@ else() # Filter symbols based on the scope defined in the script file, # and export pi* function symbols in the library. - target_link_libraries( pi_level0 + target_link_libraries( pi_level_zero PRIVATE "-Wl,--version-script=${linker_script}" ) endif() -if (TARGET l0-loader) - add_dependencies(pi_level0 l0-loader) +if (TARGET level-zero-loader) + add_dependencies(pi_level_zero level-zero-loader) endif() - add_dependencies(sycl-toolchain pi_level0) + add_dependencies(sycl-toolchain pi_level_zero) - target_link_libraries(pi_level0 PRIVATE "${L0_LOADER}") + target_link_libraries(pi_level_zero PRIVATE "${LEVEL_ZERO_LOADER}") if (UNIX) - target_link_libraries(pi_level0 PRIVATE pthread) + target_link_libraries(pi_level_zero PRIVATE pthread) endif() -add_common_options(pi_level0) +add_common_options(pi_level_zero) -install(TARGETS pi_level0 - LIBRARY DESTINATION "lib" COMPONENT pi_level0 - RUNTIME DESTINATION "bin" COMPONENT pi_level0) +install(TARGETS pi_level_zero + LIBRARY DESTINATION "lib" COMPONENT pi_level_zero + RUNTIME DESTINATION "bin" COMPONENT pi_level_zero) diff --git a/sycl/plugins/level_zero/pi_level0.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp similarity index 96% rename from sycl/plugins/level_zero/pi_level0.cpp rename to sycl/plugins/level_zero/pi_level_zero.cpp index 47042442f1c45..a77719a2220ae 100644 --- a/sycl/plugins/level_zero/pi_level0.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1,17 +1,17 @@ -//===----------- pi_level0.cpp - Level Zero Plugin--------------------------==// +//===-------- pi_level_zero.cpp - Level Zero Plugin --------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -//===----------------------------------------------------------------------===// +//===------------------------------------------------------------------===// -/// \file pi_level0.cpp +/// \file pi_level_zero.cpp /// Implementation of Level Zero Plugin. /// -/// \ingroup sycl_pi_level0 +/// \ingroup sycl_pi_level_zero -#include "pi_level0.hpp" +#include "pi_level_zero.hpp" #include #include #include @@ -26,8 +26,8 @@ namespace { -// Controls L0 calls serialization to w/a L0 driver being not MT ready. -// Recognized values (can be used as a bit mask): +// Controls Level Zero calls serialization to w/a Level Zero driver being not MT +// ready. Recognized values (can be used as a bit mask): enum { ZeSerializeNone = 0, // no locking or blocking (except when SYCL RT requested blocking) @@ -37,10 +37,10 @@ enum { }; static pi_uint32 ZeSerialize = 0; -// This class encapsulates actions taken along with a call to L0 API. +// This class encapsulates actions taken along with a call to Level Zero API. class ZeCall { private: - // The global mutex that is used for total serialization of L0 calls. + // The global mutex that is used for total serialization of Level Zero calls. static std::mutex GlobalLock; public: @@ -61,7 +61,7 @@ class ZeCall { }; std::mutex ZeCall::GlobalLock; -// Controls L0 calls tracing in zePrint. +// Controls Level Zero calls tracing in zePrint. static bool ZeDebug = false; static void zePrint(const char *Format, ...) { @@ -233,12 +233,12 @@ _pi_context::decrementAliveEventsInPool(ze_event_pool_handle_t ZePool) { return ZE_RESULT_SUCCESS; } -// Some opencl extensions we know are supported by all Level0 devices. +// Some opencl extensions we know are supported by all Level Zero devices. constexpr char ZE_SUPPORTED_EXTENSIONS[] = "cl_khr_il_program cl_khr_subgroups cl_intel_subgroups " "cl_intel_subgroups_short cl_intel_required_subgroup_size "; -// Map L0 runtime error code to PI error code +// Map Level Zero runtime error code to PI error code static pi_result mapError(ze_result_t ZeResult) { // TODO: these mapping need to be clarified and synced with the PI API return // values, which is TBD. @@ -373,7 +373,7 @@ pi_result _pi_device::initialize() { // Crate a new command list to be used in a PI call pi_result _pi_device::createCommandList(ze_command_list_handle_t *ZeCommandList) { - // Create the command list, because in L0 commands are added to + // Create the command list, because in Level Zero commands are added to // the command lists, and later are then added to the command queue. // // TODO: Figure out how to lower the overhead of creating a new list @@ -448,7 +448,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, return PI_INVALID_VALUE; } - // This is a good time to initialize L0. + // This is a good time to initialize Level Zero. // TODO: We can still safely recover if something goes wrong during the init. // Implement handling segfault using sigaction. // TODO: We should not call zeInit multiples times ever, so @@ -467,7 +467,7 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, return mapError(ZeResult); } - // L0 does not have concept of Platforms, but L0 driver is the + // Level Zero does not have concept of Platforms, but Level Zero driver is the // closest match. if (Platforms && NumEntries > 0) { uint32_t ZeDriverCount = 0; @@ -533,10 +533,10 @@ pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, switch (ParamName) { case PI_PLATFORM_INFO_NAME: - // TODO: Query L0 driver when relevant info is added there. + // TODO: Query Level Zero driver when relevant info is added there. return ReturnValue("Intel(R) Level-Zero"); case PI_PLATFORM_INFO_VENDOR: - // TODO: Query L0 driver when relevant info is added there. + // TODO: Query Level Zero driver when relevant info is added there. return ReturnValue("Intel(R) Corporation"); case PI_PLATFORM_INFO_EXTENSIONS: // Convention adopted from OpenCL: @@ -547,7 +547,7 @@ pi_result piPlatformGetInfo(pi_platform Platform, pi_platform_info ParamName, // // TODO: Check the common extensions supported by all connected devices and // return them. For now, hardcoding some extensions we know are supported by - // all Level0 devices. + // all Level Zero devices. return ReturnValue(ZE_SUPPORTED_EXTENSIONS); case PI_PLATFORM_INFO_PROFILE: // TODO: figure out what this means and how is this used @@ -575,7 +575,7 @@ pi_result piextPlatformGetNativeHandle(pi_platform Platform, assert(NativeHandle); auto ZeDriver = pi_cast(NativeHandle); - // Extract the L0 driver handle from the given PI platform + // Extract the Level Zero driver handle from the given PI platform *ZeDriver = Platform->ZeDriver; return PI_SUCCESS; } @@ -585,7 +585,7 @@ pi_result piextPlatformCreateWithNativeHandle(pi_native_handle NativeHandle, assert(NativeHandle); assert(Platform); - // Create PI platform from the given L0 driver handle. + // Create PI platform from the given Level Zero driver handle. auto ZeDriver = pi_cast(NativeHandle); *Platform = new _pi_platform(ZeDriver); return PI_SUCCESS; @@ -598,7 +598,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType, assert(Platform); ze_driver_handle_t ZeDriver = Platform->ZeDriver; - // Get number of devices supporting L0 + // Get number of devices supporting Level Zero uint32_t ZeDeviceCount = 0; const bool AskingForGPU = (DeviceType & PI_DEVICE_TYPE_GPU); const bool AskingForDefault = (DeviceType == PI_DEVICE_TYPE_DEFAULT); @@ -718,7 +718,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, return ReturnValue(PI_DEVICE_TYPE_GPU); } case PI_DEVICE_INFO_PARENT_DEVICE: - // TODO: all L0 devices are parent ? + // TODO: all Level Zero devices are parent ? return ReturnValue(pi_device{0}); case PI_DEVICE_INFO_PLATFORM: return ReturnValue(Device->Platform); @@ -729,8 +729,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, // "Returns a space separated list of extension names (the extension // names themselves do not contain any spaces) supported by the device." // - // TODO: Use proper mechanism to get this information from Level0 after - // it is added to Level0. + // TODO: Use proper mechanism to get this information from Level Zero after + // it is added to Level Zero. // Hardcoding the few we know are supported by the current hardware. // // @@ -754,7 +754,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, // cl_khr_3d_image_writes - Extension to enable writes to 3D image memory // objects. // - // Hardcoding some extensions we know are supported by all Level0 devices. + // Hardcoding some extensions we know are supported by all Level Zero + // devices. SupportedExtensions += (ZE_SUPPORTED_EXTENSIONS); if (ZeDeviceKernelProperties.fp16Supported) SupportedExtensions += ("cl_khr_fp16 "); @@ -784,7 +785,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, return ReturnValue(pi_uint32{MaxComputeUnits}); } case PI_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS: - // L0 spec defines only three dimensions + // Level Zero spec defines only three dimensions return ReturnValue(pi_uint32{3}); case PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE: return ReturnValue( @@ -845,7 +846,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, return ReturnValue(pi_uint32{Device->RefCount}); case PI_DEVICE_INFO_PARTITION_PROPERTIES: { // It is debatable if SYCL sub-device and partitioning APIs sufficient to - // expose Level0 sub-devices? We start with support of + // expose Level Zero sub-devices? We start with support of // "partition_by_affinity_domain" and "numa" but if that doesn't seem to // be a good fit we could look at adding a more descriptive partitioning // type. @@ -913,7 +914,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, case PI_DEVICE_INFO_MEM_BASE_ADDR_ALIGN: // SYCL/OpenCL spec is vague on what this means exactly, but seems to // be for "alignment requirement (in bits) for sub-buffer offsets." - // An OpenCL implementation returns 8*128, but L0 can do just 8, + // An OpenCL implementation returns 8*128, but Level Zero can do just 8, // meaning unaligned access for values of types larger than 8 bits. return ReturnValue(pi_uint32{8}); case PI_DEVICE_INFO_MAX_SAMPLERS: @@ -995,24 +996,24 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, return ReturnValue(pi_uint64{DoubleFPValue}); } case PI_DEVICE_INFO_IMAGE2D_MAX_WIDTH: - // Until L0 provides needed info, hardcode default minimum values required - // by the SYCL specification. + // Until Level Zero provides needed info, hardcode default minimum values + // required by the SYCL specification. return ReturnValue(size_t{8192}); case PI_DEVICE_INFO_IMAGE2D_MAX_HEIGHT: - // Until L0 provides needed info, hardcode default minimum values required - // by the SYCL specification. + // Until Level Zero provides needed info, hardcode default minimum values + // required by the SYCL specification. return ReturnValue(size_t{8192}); case PI_DEVICE_INFO_IMAGE3D_MAX_WIDTH: - // Until L0 provides needed info, hardcode default minimum values required - // by the SYCL specification. + // Until Level Zero provides needed info, hardcode default minimum values + // required by the SYCL specification. return ReturnValue(size_t{2048}); case PI_DEVICE_INFO_IMAGE3D_MAX_HEIGHT: - // Until L0 provides needed info, hardcode default minimum values required - // by the SYCL specification. + // Until Level Zero provides needed info, hardcode default minimum values + // required by the SYCL specification. return ReturnValue(size_t{2048}); case PI_DEVICE_INFO_IMAGE3D_MAX_DEPTH: - // Until L0 provides needed info, hardcode default minimum values required - // by the SYCL specification. + // Until Level Zero provides needed info, hardcode default minimum values + // required by the SYCL specification. return ReturnValue(size_t{2048}); case PI_DEVICE_INFO_IMAGE_MAX_BUFFER_SIZE: return ReturnValue(size_t{ZeDeviceImageProperties.maxImageBufferSize}); @@ -1106,7 +1107,7 @@ pi_result piDevicePartition(pi_device Device, const pi_device_partition_property *Properties, pi_uint32 NumDevices, pi_device *OutDevices, pi_uint32 *OutNumDevices) { - // Other partitioning ways are not supported by L0 + // Other partitioning ways are not supported by Level Zero if (Properties[0] != PI_DEVICE_PARTITION_BY_AFFINITY_DOMAIN || Properties[1] != PI_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE) { return PI_INVALID_VALUE; @@ -1139,7 +1140,7 @@ pi_result piDevicePartition(pi_device Device, auto ZeSubdevices = new ze_device_handle_t[Count]; ZE_CALL(zeDeviceGetSubDevices(Device->ZeDevice, &Count, ZeSubdevices)); - // Wrap the L0 sub-devices into PI sub-devices, and write them out. + // Wrap the Level Zero sub-devices into PI sub-devices, and write them out. for (uint32_t I = 0; I < Count; ++I) { OutDevices[I] = new _pi_device(ZeSubdevices[I], Device->Platform, true /* isSubDevice */); @@ -1183,7 +1184,7 @@ pi_result piextDeviceGetNativeHandle(pi_device Device, assert(NativeHandle); auto ZeDevice = pi_cast(NativeHandle); - // Extract the L0 module handle from the given PI device + // Extract the Level Zero module handle from the given PI device *ZeDevice = Device->ZeDevice; return PI_SUCCESS; } @@ -1195,7 +1196,7 @@ pi_result piextDeviceCreateWithNativeHandle(pi_native_handle NativeHandle, assert(Device); assert(Platform); - // Create PI device from the given L0 device handle. + // Create PI device from the given Level Zero device handle. auto ZeDevice = pi_cast(NativeHandle); *Device = new _pi_device(ZeDevice, Platform); return (*Device)->initialize(); @@ -1208,7 +1209,7 @@ pi_result piContextCreate(const pi_context_properties *Properties, void *UserData), void *UserData, pi_context *RetContext) { - // L0 does not have notion of contexts. + // Level Zero does not have notion of contexts. // Return the device handle (only single device is allowed) as a context // handle. if (NumDevices != 1) { @@ -1387,7 +1388,7 @@ pi_result piextQueueGetNativeHandle(pi_queue Queue, assert(NativeHandle); auto ZeQueue = pi_cast(NativeHandle); - // Extract the L0 queue handle from the given PI queue + // Extract the Level Zero queue handle from the given PI queue *ZeQueue = Queue->ZeCommandQueue; return PI_SUCCESS; } @@ -1437,9 +1438,9 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size, auto HostPtrOrNull = (Flags & PI_MEM_FLAGS_HOST_PTR_USE) ? pi_cast(HostPtr) : nullptr; try { - *RetMem = new _pi_buffer(Context->Device->Platform, - pi_cast(Ptr) /* L0 Memory Handle */, - HostPtrOrNull); + *RetMem = new _pi_buffer( + Context->Device->Platform, + pi_cast(Ptr) /* Level Zero Memory Handle */, HostPtrOrNull); } catch (const std::bad_alloc &) { return PI_OUT_OF_HOST_MEMORY; } catch (...) { @@ -1660,8 +1661,8 @@ pi_result piProgramCreate(pi_context Context, const void *IL, size_t Length, assert(Context); assert(Program); - // NOTE: the L0 module creation is also building the program, so we are - // deferring it until the program is ready to be built in piProgramBuild + // NOTE: the Level Zero module creation is also building the program, so we + // are deferring it until the program is ready to be built in piProgramBuild // and piProgramCompile. Also it is only then we know the build options. // ze_module_desc_t ZeModuleDesc = {}; @@ -1726,7 +1727,7 @@ pi_result piclProgramCreateWithSource(pi_context Context, pi_uint32 Count, const size_t *Lengths, pi_program *RetProgram) { - zePrint("piclProgramCreateWithSource: not supported in L0\n"); + zePrint("piclProgramCreateWithSource: not supported in Level Zero\n"); return PI_INVALID_OPERATION; } @@ -1740,7 +1741,7 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, case PI_PROGRAM_INFO_REFERENCE_COUNT: return ReturnValue(pi_uint32{Program->RefCount}); case PI_PROGRAM_INFO_NUM_DEVICES: - // L0 Module is always for a single device. + // Level Zero Module is always for a single device. return ReturnValue(pi_uint32{1}); case PI_PROGRAM_INFO_DEVICES: return ReturnValue(Program->Context->Device); @@ -1764,7 +1765,7 @@ pi_result piProgramGetInfo(pi_program Program, pi_program_info ParamName, case PI_PROGRAM_INFO_KERNEL_NAMES: try { // There are extra allocations/copying here dictated by the difference - // in L0 and PI interfaces. + // in Level Zero and PI interfaces. uint32_t Count = 0; ZE_CALL(zeModuleGetKernelNames(Program->ZeModule, &Count, nullptr)); char **PNames = new char *[Count]; @@ -1795,7 +1796,9 @@ pi_result piProgramLink(pi_context Context, pi_uint32 NumDevices, const pi_program *InputPrograms, void (*PFnNotify)(pi_program Program, void *UserData), void *UserData, pi_program *RetProgram) { - // TODO: L0 does not [yet] support linking so dummy implementation here. + + // TODO: Level Zero does not [yet] support linking so dummy implementation + // here. assert(NumInputPrograms == 1 && InputPrograms); assert(RetProgram); *RetProgram = InputPrograms[0]; @@ -1812,7 +1815,7 @@ pi_result piProgramCompile( assert(NumInputHeaders == 0); assert(!InputHeaders); - // There is no support foe linking yet in L0 so "compile" actually + // There is no support for linking yet in Level Zero so "compile" actually // does the "build". return piProgramBuild(Program, NumDevices, DeviceList, Options, PFnNotify, UserData); @@ -1858,6 +1861,7 @@ pi_result piProgramBuild(pi_program Program, pi_uint32 NumDevices, ze_device_handle_t ZeDevice = Program->Context->Device->ZeDevice; ZE_CALL(zeModuleCreate(ZeDevice, &Program->ZeModuleDesc, &Program->ZeModule, &Program->ZeBuildLog)); + return PI_SUCCESS; } @@ -1868,14 +1872,14 @@ pi_result piProgramGetBuildInfo(pi_program Program, pi_device Device, ReturnHelper ReturnValue(ParamValueSize, ParamValue, ParamValueSizeRet); if (ParamName == CL_PROGRAM_BINARY_TYPE) { - // TODO: is this the only supported binary type in L0? + // TODO: is this the only supported binary type in Level Zero? // We should probably return CL_PROGRAM_BINARY_TYPE_NONE if asked // before the program was compiled. return ReturnValue( cl_program_binary_type{CL_PROGRAM_BINARY_TYPE_EXECUTABLE}); } if (ParamName == CL_PROGRAM_BUILD_OPTIONS) { - // TODO: how to get module build options out of L0? + // TODO: how to get module build options out of Level Zero? // For the programs that we compiled we can remember the options // passed with piProgramCompile/piProgramBuild, but what can we // return for programs that were built outside and registered @@ -1909,7 +1913,7 @@ pi_result piProgramRelease(pi_program Program) { delete[] Program->ZeModuleDesc.pInputModule; if (Program->ZeBuildLog) zeModuleBuildLogDestroy(Program->ZeBuildLog); - // TODO: call zeModuleDestroy for non-interop L0 modules + // TODO: call zeModuleDestroy for non-interop Level Zero modules delete Program; } return PI_SUCCESS; @@ -1921,7 +1925,7 @@ pi_result piextProgramGetNativeHandle(pi_program Program, assert(NativeHandle); auto ZeModule = pi_cast(NativeHandle); - // Extract the L0 module handle from the given PI program + // Extract the Level Zero module handle from the given PI program *ZeModule = Program->ZeModule; return PI_SUCCESS; } @@ -1935,9 +1939,9 @@ pi_result piextProgramCreateWithNativeHandle(pi_native_handle NativeHandle, auto ZeModule = pi_cast(NativeHandle); - // Create PI program from the given L0 module handle. + // Create PI program from the given Level Zero module handle. // - // TODO: We don't have the real L0 module descriptor with + // TODO: We don't have the real Level Zero module descriptor with // which it was created, but that's only needed for zeModuleCreate, // which we don't expect to be called on the interop program. // @@ -2050,9 +2054,9 @@ pi_result piKernelGetInfo(pi_kernel Kernel, pi_kernel_info ParamName, case PI_KERNEL_INFO_PROGRAM: return ReturnValue(pi_program{Kernel->Program}); case PI_KERNEL_INFO_FUNCTION_NAME: - // TODO: Replace with the line in the comment once bug in the L0 driver will - // be fixed. Problem is that currently L0 driver truncates name of the - // returned kernel if it is longer than 256 symbols. + // TODO: Replace with the line in the comment once bug in the Level Zero + // driver will be fixed. Problem is that currently Level Zero driver + // truncates name of the returned kernel if it is longer than 256 symbols. // // return ReturnValue(ZeKernelProperties.name); return ReturnValue(Kernel->KernelName.c_str()); @@ -2376,7 +2380,7 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, } case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: - // TODO: Support these when L0 supported is added. + // TODO: Support these when Level Zero supported is added. return ReturnValue(uint64_t{0}); default: zePrint("piEventGetProfilingInfo: not supported ParamName\n"); @@ -3016,8 +3020,8 @@ piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer, pi_bool BlockingMap, ZE_CALL(zeCommandListAppendWaitOnEvents(ZeCommandList, NumEventsInWaitList, ZeEventWaitList)); - // TODO: L0 is missing the memory "mapping" capabilities, so we are left - // to doing new memory allocation and a copy (read). + // TODO: Level Zero is missing the memory "mapping" capabilities, so we are + // left to doing new memory allocation and a copy (read). // // TODO: check if the input buffer is already allocated in shared // memory and thus is accessible from the host as is. Can we get SYCL RT @@ -3087,8 +3091,8 @@ pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr, ZE_CALL(zeCommandListAppendWaitOnEvents(ZeCommandList, NumEventsInWaitList, ZeEventWaitList)); - // TODO: L0 is missing the memory "mapping" capabilities, so we are left - // to doing copy (write back to the device). + // TODO: Level Zero is missing the memory "mapping" capabilities, so we are + // left to doing copy (write back to the device). // // NOTE: Keep this in sync with the implementation of // piEnqueueMemBufferMap/piEnqueueMemImageMap. @@ -3204,7 +3208,7 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, const ze_image_region_t ZeSrcRegion = getImageRegionHelper(SrcMem, SrcOrigin, Region); - // TODO: L0 does not support row_pitch/slice_pitch for images yet. + // TODO: Level Zero does not support row_pitch/slice_pitch for images yet. // Check that SYCL RT did not want pitch larger than default. #ifndef NDEBUG assert(SrcMem->isImage()); @@ -3229,7 +3233,7 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, const ze_image_region_t ZeDstRegion = getImageRegionHelper(DstMem, DstOrigin, Region); - // TODO: L0 does not support row_pitch/slice_pitch for images yet. + // TODO: Level Zero does not support row_pitch/slice_pitch for images yet. // Check that SYCL RT did not want pitch larger than default. #ifndef NDEBUG assert(DstMem->isImage()); @@ -3360,7 +3364,7 @@ pi_result piMemBufferPartition(pi_mem Buffer, pi_mem_flags Flags, *RetMem = new _pi_buffer(Buffer->Platform, pi_cast(Buffer->getZeHandle()) + - Region->origin /* L0 memory handle */, + Region->origin /* Level Zero memory handle */, nullptr /* Host pointer */, Buffer /* Parent buffer */, Region->origin /* Sub-buffer origin */, Region->size /*Sub-buffer size*/); @@ -3406,7 +3410,7 @@ pi_result piextUSMHostAlloc(void **ResultPtr, pi_context Context, ze_host_mem_alloc_desc_t ZeDesc = {}; ZeDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_DEFAULT; - // TODO: translate PI properties to L0 flags + // TODO: translate PI properties to Level Zero flags ZE_CALL(zeDriverAllocHostMem(Context->Device->Platform->ZeDriver, &ZeDesc, Size, Alignment, ResultPtr)); @@ -3423,7 +3427,7 @@ pi_result piextUSMDeviceAlloc(void **ResultPtr, pi_context Context, // Check that incorrect bits are not set in the properties. assert(!Properties || (Properties && !(*Properties & ~PI_MEM_ALLOC_FLAGS))); - // TODO: translate PI properties to L0 flags + // TODO: translate PI properties to Level Zero flags ze_device_mem_alloc_desc_t ZeDesc = {}; ZeDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_DEFAULT; ZeDesc.ordinal = 0; @@ -3443,7 +3447,7 @@ pi_result piextUSMSharedAlloc(void **ResultPtr, pi_context Context, // Check that incorrect bits are not set in the properties. assert(!Properties || (Properties && !(*Properties & ~PI_MEM_ALLOC_FLAGS))); - // TODO: translate PI properties to L0 flags + // TODO: translate PI properties to Level Zero flags ze_host_mem_alloc_desc_t ZeHostDesc = {}; ZeHostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_DEFAULT; ze_device_mem_alloc_desc_t ZeDevDesc = {}; @@ -3555,7 +3559,7 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, // TODO: figure out how to translate "flags" ZE_CALL(zeCommandListAppendMemoryPrefetch(ZeCommandList, Ptr, Size)); - // TODO: L0 does not have a completion "event" with the prefetch API, + // TODO: Level Zero does not have a completion "event" with the prefetch API, // so manually add command to signal our event. ZE_CALL(zeCommandListAppendSignalEvent(ZeCommandList, ZeEvent)); @@ -3638,7 +3642,7 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, ZE_CALL(zeCommandListAppendMemAdvise( ZeCommandList, Queue->Context->Device->ZeDevice, Ptr, Length, ZeAdvice)); - // TODO: L0 does not have a completion "event" with the advise API, + // TODO: Level Zero does not have a completion "event" with the advise API, // so manually add command to signal our event. ZE_CALL(zeCommandListAppendSignalEvent(ZeCommandList, ZeEvent)); @@ -3699,7 +3703,7 @@ pi_result piextUSMGetMemAllocInfo(pi_context Context, const void *Ptr, } case PI_MEM_ALLOC_DEVICE: { // TODO: this wants pi_device, but we didn't remember it, and cannot - // deduct from the L0 device. + // deduct from the Level Zero device. die("piextUSMGetMemAllocInfo: PI_MEM_ALLOC_DEVICE not implemented"); break; } @@ -3760,7 +3764,8 @@ pi_result piextProgramSetSpecializationConstant(pi_program Prog, // Pass SpecValue pointer. Spec constant value is retrieved // by Level-Zero when creating the modul // - // NOTE: SpecSize is unused in L0, the size is known from SPIR-V by SpecID. + // NOTE: SpecSize is unused in Level Zero, the size is known from SPIR-V by + // SpecID. Prog->ZeSpecConstants[SpecID] = reinterpret_cast(SpecValue); return PI_SUCCESS; diff --git a/sycl/plugins/level_zero/pi_level0.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp old mode 100755 new mode 100644 similarity index 86% rename from sycl/plugins/level_zero/pi_level0.hpp rename to sycl/plugins/level_zero/pi_level_zero.hpp index faf39d759c385..a3db143a55a48 --- a/sycl/plugins/level_zero/pi_level0.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -1,22 +1,22 @@ -//===---------- pi_level0.hpp - Level Zero Plugin -------------------------===// +//===------- pi_level_zero.hpp - Level Zero Plugin -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -//===----------------------------------------------------------------------===// +//===-----------------------------------------------------------------===// -/// \defgroup sycl_pi_level0 Level Zero Plugin +/// \defgroup sycl_pi_level_zero Level Zero Plugin /// \ingroup sycl_pi -/// \file pi_level0.hpp +/// \file pi_level_zero.hpp /// Declarations for Level Zero Plugin. It is the interface between the /// device-agnostic SYCL runtime layer and underlying Level Zero runtime. /// -/// \ingroup sycl_pi_level0 +/// \ingroup sycl_pi_level_zero -#ifndef PI_LEVEL0_HPP -#define PI_LEVEL0_HPP +#ifndef PI_LEVEL_ZERO_HPP +#define PI_LEVEL_ZERO_HPP #include #include @@ -51,18 +51,19 @@ template <> uint32_t pi_cast(uint64_t Value) { struct _pi_object { _pi_object() : RefCount{1} {} - // L0 doesn't do the reference counting, so we have to do. + // Level Zero doesn't do the reference counting, so we have to do. // Must be atomic to prevent data race when incrementing/decrementing. std::atomic RefCount; }; -// Define the types that are opaque in pi.h in a manner suitabale for L0 plugin +// Define the types that are opaque in pi.h in a manner suitabale for Level Zero +// plugin struct _pi_platform { _pi_platform(ze_driver_handle_t Driver) : ZeDriver{Driver} {} - // L0 lacks the notion of a platform, but there is a driver, which is a - // pretty good fit to keep here. + // Level Zero lacks the notion of a platform, but there is a driver, which is + // a pretty good fit to keep here. ze_driver_handle_t ZeDriver; // Cache versions info from zeDriverGetProperties. @@ -83,14 +84,14 @@ struct _pi_device : _pi_object { // Initialize the entire PI device. pi_result initialize(); - // L0 device handle. + // Level Zero device handle. ze_device_handle_t ZeDevice; // PI platform to which this device belongs. pi_platform Platform; - // Immediate L0 command list for this device, to be used for initializations. - // To be created as: + // Immediate Level Zero command list for this device, to be used for + // initializations. To be created as: // - Immediate command list: So any command appended to it is immediately // offloaded to the device. // - Synchronous: So implicit synchronization is made inside the level-zero @@ -117,7 +118,7 @@ struct _pi_context : _pi_object { : Device{Device}, ZeEventPool{nullptr}, NumEventsAvailableInEventPool{}, NumEventsLiveInEventPool{} {} - // L0 does not have notion of contexts. + // Level Zero does not have notion of contexts. // Keep the device here (must be exactly one) to return it when PI context // is queried for devices. pi_device Device; @@ -164,7 +165,7 @@ struct _pi_queue : _pi_object { _pi_queue(ze_command_queue_handle_t Queue, pi_context Context) : ZeCommandQueue{Queue}, Context{Context} {} - // L0 command queue handle. + // Level Zero command queue handle. ze_command_queue_handle_t ZeCommandQueue; // Keeps the PI context to which this queue belongs. @@ -197,10 +198,10 @@ struct _pi_mem : _pi_object { // Interface of the _pi_mem object - // Get the L0 handle of the current memory object + // Get the Level Zero handle of the current memory object virtual void *getZeHandle() = 0; - // Get a pointer to the L0 handle of the current memory object + // Get a pointer to the Level Zero handle of the current memory object virtual void *getZeHandlePtr() = 0; // Method to get type of the derived object (image or buffer) @@ -241,7 +242,7 @@ struct _pi_buffer final : _pi_mem { bool isSubBuffer() const { return SubBuffer.Parent != nullptr; } - // L0 memory handle is really just a naked pointer. + // Level Zero memory handle is really just a naked pointer. // It is just convenient to have it char * to simplify offset arithmetics. char *ZeMem; @@ -268,7 +269,7 @@ struct _pi_image final : _pi_mem { ze_image_desc_t ZeImageDesc; #endif // !NDEBUG - // L0 image handle. + // Level Zero image handle. ze_image_handle_t ZeImage; }; @@ -278,14 +279,14 @@ struct _pi_event : _pi_object { : ZeEvent{ZeEvent}, ZeEventPool{ZeEventPool}, ZeCommandList{nullptr}, CommandType{CommandType}, Context{Context}, CommandData{nullptr} {} - // L0 event handle. + // Level Zero event handle. ze_event_handle_t ZeEvent; - // L0 event pool handle. + // Level Zero event pool handle. ze_event_pool_handle_t ZeEventPool; - // L0 command list where the command signaling this event was appended to. - // This is currently used to remember/destroy the command list after - // all commands in it are completed, i.e. this event signaled. + // Level Zero command list where the command signaling this event was appended + // to. This is currently used to remember/destroy the command list after all + // commands in it are completed, i.e. this event signaled. ze_command_list_handle_t ZeCommandList; // Keeps the command-queue and command associated with the event. @@ -300,7 +301,7 @@ struct _pi_event : _pi_object { // Opaque data to hold any data needed for CommandType. void *CommandData; - // Methods for translating PI events list into L0 events list + // Methods for translating PI events list into Level Zero events list static ze_event_handle_t *createZeEventList(pi_uint32, const pi_event *); static void deleteZeEventList(ze_event_handle_t *); }; @@ -311,16 +312,16 @@ struct _pi_program : _pi_object { : ZeModuleDesc(ModuleDesc), ZeModule{Module}, ZeBuildLog{nullptr}, Context{Context} {} - // L0 module descriptor. + // Level Zero module descriptor. ze_module_desc_t ZeModuleDesc; - // L0 module handle. + // Level Zero module handle. ze_module_handle_t ZeModule; - // L0 module specialization constants + // Level Zero module specialization constants std::mutex ZeSpecConstantsMutex; std::unordered_map ZeSpecConstants; - // L0 build log. + // Level Zero build log. ze_module_build_log_handle_t ZeBuildLog; // Keep the context of the program. @@ -332,21 +333,21 @@ struct _pi_kernel : _pi_object { const char *KernelName) : ZeKernel{Kernel}, Program{Program}, KernelName(KernelName) {} - // L0 function handle. + // Level Zero function handle. ze_kernel_handle_t ZeKernel; // Keep the program of the kernel. pi_program Program; - // TODO: remove when bug in the L0 runtime will be fixed. + // TODO: remove when bug in the Level Zero runtime will be fixed. std::string KernelName; }; struct _pi_sampler : _pi_object { _pi_sampler(ze_sampler_handle_t Sampler) : ZeSampler{Sampler} {} - // L0 sampler handle. + // Level Zero sampler handle. ze_sampler_handle_t ZeSampler; }; -#endif // PI_LEVEL0_HPP +#endif // PI_LEVEL_ZERO_HPP diff --git a/sycl/source/backend/level_zero.cpp b/sycl/source/backend/level_zero.cpp index 2e62223c0301e..6d7c7a347e89d 100644 --- a/sycl/source/backend/level_zero.cpp +++ b/sycl/source/backend/level_zero.cpp @@ -14,13 +14,13 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -namespace level0 { +namespace level_zero { using namespace detail; //---------------------------------------------------------------------------- -// Implementation of level0::make +// Implementation of level_zero::make __SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { - const auto &Plugin = pi::getPlugin(); + const auto &Plugin = pi::getPlugin(); // Create PI platform first. pi::PiPlatform PiPlatform; Plugin.call(NativeHandle, @@ -32,10 +32,10 @@ __SYCL_EXPORT platform make_platform(pi_native_handle NativeHandle) { } //---------------------------------------------------------------------------- -// Implementation of level0::make +// Implementation of level_zero::make __SYCL_EXPORT device make_device(const platform &Platform, pi_native_handle NativeHandle) { - const auto &Plugin = pi::getPlugin(); + const auto &Plugin = pi::getPlugin(); const auto &PlatformImpl = getSyclObjImpl(Platform); // Create PI device first. pi::PiDevice PiDevice; @@ -47,7 +47,7 @@ __SYCL_EXPORT device make_device(const platform &Platform, } //---------------------------------------------------------------------------- -// Implementation of level0::make +// Implementation of level_zero::make __SYCL_EXPORT program make_program(const context &Context, pi_native_handle NativeHandle) { // Construct the SYCL program from native program. @@ -58,10 +58,10 @@ __SYCL_EXPORT program make_program(const context &Context, } //---------------------------------------------------------------------------- -// Implementation of level0::make +// Implementation of level_zero::make __SYCL_EXPORT queue make_queue(const context &Context, pi_native_handle NativeHandle) { - const auto &Plugin = pi::getPlugin(); + const auto &Plugin = pi::getPlugin(); const auto &ContextImpl = getSyclObjImpl(Context); // Create PI queue first. pi::PiQueue PiQueue; @@ -72,6 +72,6 @@ __SYCL_EXPORT queue make_queue(const context &Context, PiQueue, ContextImpl, ContextImpl->get_async_handler())); } -} // namespace level0 +} // namespace level_zero } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index 1559f40e5ad86..ac6fe8fbcbd2b 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include __SYCL_INLINE_NAMESPACE(cl) { @@ -118,9 +119,10 @@ template <> class SYCLConfig { return BackendPtr; const char *ValStr = BaseT::getRawValue(); - const std::array, 3> SyclBeMap = { + const std::array, 4> SyclBeMap = { {{"PI_OPENCL", backend::opencl}, - {"PI_LEVEL0", backend::level0}, + {"PI_LEVEL_ZERO", backend::level_zero}, + {"PI_LEVEL0", backend::level_zero}, // for backward compatibility {"PI_CUDA", backend::cuda}}}; if (ValStr) { auto It = std::find_if( @@ -130,7 +132,7 @@ template <> class SYCLConfig { }); if (It == SyclBeMap.end()) pi::die("Invalid backend. " - "Valid values are PI_OPENCL/PI_LEVEL0/PI_CUDA"); + "Valid values are PI_OPENCL/PI_LEVEL_ZERO/PI_CUDA"); static backend Backend = It->second; BackendPtr = &Backend; } @@ -161,6 +163,6 @@ template <> class SYCLConfig { } }; -} // __SYCL_INLINE_NAMESPACE(cl) -} // namespace sycl } // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 58df2a3f7ed9f..68b9f3cf59a27 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -215,7 +215,7 @@ bool findPlugins(vector_class> &PluginNames) { // env only. // PluginNames.emplace_back(OPENCL_PLUGIN_NAME, backend::opencl); - PluginNames.emplace_back(LEVEL0_PLUGIN_NAME, backend::level0); + PluginNames.emplace_back(LEVEL_ZERO_PLUGIN_NAME, backend::level_zero); PluginNames.emplace_back(CUDA_PLUGIN_NAME, backend::cuda); return true; } @@ -319,11 +319,11 @@ static void initializePlugins(vector_class *Plugins) { PluginNames[I].first.find("cuda") != std::string::npos) { // Use the CUDA plugin as the GlobalPlugin GlobalPlugin = std::make_shared(PluginInformation, backend::cuda); - } else if (InteropBE == backend::level0 && - PluginNames[I].first.find("level0") != std::string::npos) { - // Use the LEVEL0 plugin as the GlobalPlugin + } else if (InteropBE == backend::level_zero && + PluginNames[I].first.find("level_zero") != std::string::npos) { + // Use the LEVEL_ZERO plugin as the GlobalPlugin GlobalPlugin = - std::make_shared(PluginInformation, backend::level0); + std::make_shared(PluginInformation, backend::level_zero); } Plugins->emplace_back(plugin(PluginInformation, PluginNames[I].second)); if (trace(TraceLevel::PI_TRACE_BASIC)) @@ -395,7 +395,7 @@ template const plugin &getPlugin() { } template const plugin &getPlugin(); -template const plugin &getPlugin(); +template const plugin &getPlugin(); // Report error and no return (keeps compiler from printing warnings). // TODO: Probably change that to throw a catchable exception, diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 57a25c6c93aec..a4d7f162c1ab6 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -788,11 +788,11 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context, LinkOpts = LinkOptions.c_str(); } - // L0 plugin doesn't support piProgramCompile/piProgramLink commands, program - // is built during piProgramCreate. + // Level-Zero plugin doesn't support piProgramCompile/piProgramLink commands, + // program is built during piProgramCreate. // TODO: remove this check as soon as piProgramCompile/piProgramLink will be - // implemented in L0 plugin. - if (Context->getPlugin().getBackend() == backend::level0) { + // implemented in Level-Zero plugin. + if (Context->getPlugin().getBackend() == backend::level_zero) { LinkDeviceLibs = false; } diff --git a/sycl/source/device_selector.cpp b/sycl/source/device_selector.cpp index 22b3a613467ec..831ae5f124bfa 100644 --- a/sycl/source/device_selector.cpp +++ b/sycl/source/device_selector.cpp @@ -19,13 +19,13 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { // Utility function to check if device is of the preferred backend. -// Currently preference is given to the level0 backend. +// Currently preference is given to the level_zero backend. static bool isDeviceOfPreferredSyclBe(const device &Device) { if (Device.is_host()) return false; return detail::getSyclObjImpl(Device)->getPlugin().getBackend() == - backend::level0; + backend::level_zero; } device device_selector::select_device() const { diff --git a/sycl/test/CMakeLists.txt b/sycl/test/CMakeLists.txt index 4010c13d3b219..882de8263cb36 100644 --- a/sycl/test/CMakeLists.txt +++ b/sycl/test/CMakeLists.txt @@ -81,17 +81,17 @@ add_lit_testsuite(check-sycl-opencl "Running the SYCL regression tests for OpenC ) set_target_properties(check-sycl-opencl PROPERTIES FOLDER "SYCL tests") -add_lit_testsuite(check-sycl-level0 "Running the SYCL regression tests for Level Zero" +add_lit_testsuite(check-sycl-level-zero "Running the SYCL regression tests for Level Zero" ${CMAKE_CURRENT_BINARY_DIR} ARGS ${RT_TEST_ARGS} - PARAMS "SYCL_BE=PI_LEVEL0" + PARAMS "SYCL_BE=PI_LEVEL_ZERO" DEPENDS ${SYCL_TEST_DEPS} EXCLUDE_FROM_CHECK_ALL ) -set_target_properties(check-sycl-level0 PROPERTIES FOLDER "SYCL tests") +set_target_properties(check-sycl-level-zero PROPERTIES FOLDER "SYCL tests") add_custom_target(check-sycl) -add_dependencies(check-sycl check-sycl-opencl check-sycl-level0) +add_dependencies(check-sycl check-sycl-opencl check-sycl-level-zero) set_target_properties(check-sycl PROPERTIES FOLDER "SYCL tests") if(SYCL_BUILD_PI_CUDA) diff --git a/sycl/test/abi/pi_level0_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump similarity index 98% rename from sycl/test/abi/pi_level0_symbol_check.dump rename to sycl/test/abi/pi_level_zero_symbol_check.dump index f6bb19aa16a5b..7c8c74b8cc77d 100644 --- a/sycl/test/abi/pi_level0_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -3,7 +3,7 @@ # DO NOT EDIT IT MANUALLY. Refer to sycl/docs/ABIPolicyGuide.md for more info. ################################################################################ -# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libpi_level0.so +# RUN: env LLVM_BIN_PATH=%llvm_build_bin_dir python %sycl_tools_src_dir/abi_check.py --mode check_symbols --reference %s %sycl_libs_dir/libpi_level_zero.so # REQUIRES: linux piContextCreate diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 9ff2e1195cac9..0b336464462d3 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3589,6 +3589,10 @@ _ZN2cl10__host_std9u_sub_satEhh _ZN2cl10__host_std9u_sub_satEjj _ZN2cl10__host_std9u_sub_satEmm _ZN2cl10__host_std9u_sub_satEtt +_ZN2cl4sycl10level_zero10make_queueERKNS0_7contextEm +_ZN2cl4sycl10level_zero11make_deviceERKNS0_8platformEm +_ZN2cl4sycl10level_zero12make_programERKNS0_7contextEm +_ZN2cl4sycl10level_zero13make_platformEm _ZN2cl4sycl11malloc_hostEmRKNS0_5queueE _ZN2cl4sycl11malloc_hostEmRKNS0_7contextE _ZN2cl4sycl13aligned_allocEmmRKNS0_5queueENS0_3usm5allocE @@ -3621,8 +3625,8 @@ _ZN2cl4sycl5eventC1Ev _ZN2cl4sycl5eventC2EP9_cl_eventRKNS0_7contextE _ZN2cl4sycl5eventC2ESt10shared_ptrINS0_6detail10event_implEE _ZN2cl4sycl5eventC2Ev -_ZN2cl4sycl5intel6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl5intel6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm +_ZN2cl4sycl5intel6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_advice _ZN2cl4sycl5queue10wait_proxyERKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE @@ -3728,15 +3732,15 @@ _ZN2cl4sycl6detail12sampler_implD2Ev _ZN2cl4sycl6detail12split_stringERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEc _ZN2cl4sycl6detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EERS9_ _ZN2cl4sycl6detail13MemoryManager13releaseMemObjESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvS8_ -_ZN2cl4sycl6detail13MemoryManager19allocateImageObjectESt10shared_ptrINS1_12context_implEEPvbRK14_pi_image_descRK16_pi_image_formatRKNS0_13property_listE _ZN2cl4sycl6detail13MemoryManager16allocateMemImageESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRK14_pi_image_descRK16_pi_image_formatRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN2cl4sycl6detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN2cl4sycl6detail13MemoryManager20allocateBufferObjectESt10shared_ptrINS1_12context_implEEPvbmRKNS0_13property_listE +_ZN2cl4sycl6detail13MemoryManager17allocateMemBufferESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event _ZN2cl4sycl6detail13MemoryManager18allocateHostMemoryEPNS1_11SYCLMemObjIEPvbmRKNS0_13property_listE -_ZN2cl4sycl6detail13MemoryManager19wrapIntoImageBufferESt10shared_ptrINS1_12context_implEEPvPNS1_11SYCLMemObjIE _ZN2cl4sycl6detail13MemoryManager18releaseImageBufferESt10shared_ptrINS1_12context_implEEPv -_ZN2cl4sycl6detail13MemoryManager17allocateMemBufferESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event +_ZN2cl4sycl6detail13MemoryManager19allocateImageObjectESt10shared_ptrINS1_12context_implEEPvbRK14_pi_image_descRK16_pi_image_formatRKNS0_13property_listE +_ZN2cl4sycl6detail13MemoryManager19wrapIntoImageBufferESt10shared_ptrINS1_12context_implEEPvPNS1_11SYCLMemObjIE +_ZN2cl4sycl6detail13MemoryManager20allocateBufferObjectESt10shared_ptrINS1_12context_implEEPvbmRKNS0_13property_listE _ZN2cl4sycl6detail13MemoryManager20allocateMemSubBufferESt10shared_ptrINS1_12context_implEEPvmmNS0_5rangeILi3EEESt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event +_ZN2cl4sycl6detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event _ZN2cl4sycl6detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ _ZN2cl4sycl6detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_ _ZN2cl4sycl6detail13MemoryManager4fillEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEmPKcjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ @@ -3791,10 +3795,6 @@ _ZN2cl4sycl6kernelC1EP10_cl_kernelRKNS0_7contextE _ZN2cl4sycl6kernelC1ESt10shared_ptrINS0_6detail11kernel_implEE _ZN2cl4sycl6kernelC2EP10_cl_kernelRKNS0_7contextE _ZN2cl4sycl6kernelC2ESt10shared_ptrINS0_6detail11kernel_implEE -_ZN2cl4sycl6level010make_queueERKNS0_7contextEm -_ZN2cl4sycl6level011make_deviceERKNS0_8platformEm -_ZN2cl4sycl6level012make_programERKNS0_7contextEm -_ZN2cl4sycl6level013make_platformEm _ZN2cl4sycl6mallocEmRKNS0_5queueENS0_3usm5allocE _ZN2cl4sycl6mallocEmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocE _ZN2cl4sycl6opencl10make_queueERKNS0_7contextEm diff --git a/sycl/test/atomic_ref/add.cpp b/sycl/test/atomic_ref/add.cpp index b152166e4f966..cfe943d176299 100644 --- a/sycl/test/atomic_ref/add.cpp +++ b/sycl/test/atomic_ref/add.cpp @@ -12,11 +12,11 @@ using namespace sycl; using namespace sycl::intel; -template +template void add_fetch_test(queue q, size_t N) { T sum = 0; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer sum_buf(&sum, 1); buffer output_buf(output.data(), output.size()); @@ -27,29 +27,29 @@ void add_fetch_test(queue q, size_t N) { cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); auto atm = atomic_ref(sum[0]); - out[gid] = atm.fetch_add(T(1)); + out[gid] = atm.fetch_add(Difference(1)); }); }); } // All work-items increment by 1, so final value should be equal to N - assert(sum == N); + assert(sum == T(N)); // Fetch returns original value: will be in [0, N-1] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 0 && *max_e == N - 1); + assert(*min_e == T(0) && *max_e == T(N - 1)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void add_plus_equal_test(queue q, size_t N) { T sum = 0; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer sum_buf(&sum, 1); buffer output_buf(output.data(), output.size()); @@ -60,29 +60,29 @@ void add_plus_equal_test(queue q, size_t N) { cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); auto atm = atomic_ref(sum[0]); - out[gid] = atm += T(1); + out[gid] = atm += Difference(1); }); }); } // All work-items increment by 1, so final value should be equal to N - assert(sum == N); + assert(sum == T(N)); // += returns updated value: will be in [1, N] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 1 && *max_e == N); + assert(*min_e == T(1) && *max_e == T(N)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void add_pre_inc_test(queue q, size_t N) { T sum = 0; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer sum_buf(&sum, 1); buffer output_buf(output.data(), output.size()); @@ -99,23 +99,23 @@ void add_pre_inc_test(queue q, size_t N) { } // All work-items increment by 1, so final value should be equal to N - assert(sum == N); + assert(sum == T(N)); // Pre-increment returns updated value: will be in [1, N] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 1 && *max_e == N); + assert(*min_e == T(1) && *max_e == T(N)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void add_post_inc_test(queue q, size_t N) { T sum = 0; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer sum_buf(&sum, 1); buffer output_buf(output.data(), output.size()); @@ -132,24 +132,24 @@ void add_post_inc_test(queue q, size_t N) { } // All work-items increment by 1, so final value should be equal to N - assert(sum == N); + assert(sum == T(N)); // Post-increment returns original value: will be in [0, N-1] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 0 && *max_e == N - 1); + assert(*min_e == T(0) && *max_e == T(N - 1)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void add_test(queue q, size_t N) { - add_fetch_test(q, N); - add_plus_equal_test(q, N); - add_pre_inc_test(q, N); - add_post_inc_test(q, N); + add_fetch_test(q, N); + add_plus_equal_test(q, N); + add_pre_inc_test(q, N); + add_post_inc_test(q, N); } // Floating-point types do not support pre- or post-increment @@ -173,8 +173,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported add_test(q, N); add_test(q, N); add_test(q, N); @@ -183,7 +181,7 @@ int main() { add_test(q, N); add_test(q, N); add_test(q, N); - //add_test(q, N); + add_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/compare_exchange.cpp b/sycl/test/atomic_ref/compare_exchange.cpp index 8f563fccb65fd..11c2caa6ef3c4 100644 --- a/sycl/test/atomic_ref/compare_exchange.cpp +++ b/sycl/test/atomic_ref/compare_exchange.cpp @@ -16,10 +16,10 @@ class compare_exchange_kernel; template void compare_exchange_test(queue q, size_t N) { - const T initial = std::numeric_limits::max(); + const T initial = T(N); T compare_exchange = initial; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer compare_exchange_buf(&compare_exchange, 1); buffer output_buf(output.data(), output.size()); @@ -27,15 +27,16 @@ void compare_exchange_test(queue q, size_t N) { q.submit([&](handler &cgh) { auto exc = compare_exchange_buf.template get_access(cgh); auto out = output_buf.template get_access(cgh); - cgh.parallel_for>(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); + cgh.parallel_for>(range<1>(N), [=](item<1> + it) { + size_t gid = it.get_id(0); auto atm = atomic_ref(exc[0]); - T result = initial; + T result = T(N); // Avoid copying pointer bool success = atm.compare_exchange_strong(result, (T)gid); if (success) { out[gid] = result; } else { - out[gid] = gid; + out[gid] = T(gid); } }); }); @@ -45,7 +46,7 @@ void compare_exchange_test(queue q, size_t N) { assert(std::count(output.begin(), output.end(), initial) == 1); // All other values should be the index itself or the sentinel value - for (int i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { assert(output[i] == T(i) || output[i] == initial); } } @@ -59,8 +60,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported compare_exchange_test(q, N); compare_exchange_test(q, N); compare_exchange_test(q, N); @@ -69,7 +68,7 @@ int main() { compare_exchange_test(q, N); compare_exchange_test(q, N); compare_exchange_test(q, N); - //compare_exchange_test(q, N); + compare_exchange_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/exchange.cpp b/sycl/test/atomic_ref/exchange.cpp index 2ce1292cfdd55..b4445928ea075 100644 --- a/sycl/test/atomic_ref/exchange.cpp +++ b/sycl/test/atomic_ref/exchange.cpp @@ -16,10 +16,10 @@ class exchange_kernel; template void exchange_test(queue q, size_t N) { - const T initial = std::numeric_limits::max(); + const T initial = T(N); T exchange = initial; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer exchange_buf(&exchange, 1); buffer output_buf(output.data(), output.size()); @@ -28,9 +28,9 @@ void exchange_test(queue q, size_t N) { auto exc = exchange_buf.template get_access(cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); + size_t gid = it.get_id(0); auto atm = atomic_ref(exc[0]); - out[gid] = atm.exchange(gid); + out[gid] = atm.exchange(T(gid)); }); }); } @@ -52,8 +52,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported exchange_test(q, N); exchange_test(q, N); exchange_test(q, N); @@ -62,7 +60,7 @@ int main() { exchange_test(q, N); exchange_test(q, N); exchange_test(q, N); - //exchange_test(q, N); + exchange_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/load.cpp b/sycl/test/atomic_ref/load.cpp index 274191b9a5ac3..30ae13e16e65e 100644 --- a/sycl/test/atomic_ref/load.cpp +++ b/sycl/test/atomic_ref/load.cpp @@ -16,10 +16,10 @@ class load_kernel; template void load_test(queue q, size_t N) { - T initial = 42; + T initial = T(42); T load = initial; std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer load_buf(&load, 1); buffer output_buf(output.data(), output.size()); @@ -28,7 +28,7 @@ void load_test(queue q, size_t N) { auto ld = load_buf.template get_access(cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); + size_t gid = it.get_id(0); auto atm = atomic_ref(ld[0]); out[gid] = atm.load(); }); @@ -49,8 +49,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported load_test(q, N); load_test(q, N); load_test(q, N); @@ -59,7 +57,7 @@ int main() { load_test(q, N); load_test(q, N); load_test(q, N); - //load_test(q, N); + load_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/max.cpp b/sycl/test/atomic_ref/max.cpp index c8bccf1c28067..0c95653b8219b 100644 --- a/sycl/test/atomic_ref/max.cpp +++ b/sycl/test/atomic_ref/max.cpp @@ -57,8 +57,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported max_test(q, N); max_test(q, N); max_test(q, N); @@ -67,7 +65,6 @@ int main() { max_test(q, N); max_test(q, N); max_test(q, N); - //max_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/min.cpp b/sycl/test/atomic_ref/min.cpp index 8313c4931136c..6a0e32ca14bb5 100644 --- a/sycl/test/atomic_ref/min.cpp +++ b/sycl/test/atomic_ref/min.cpp @@ -55,8 +55,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported min_test(q, N); min_test(q, N); min_test(q, N); @@ -65,7 +63,6 @@ int main() { min_test(q, N); min_test(q, N); min_test(q, N); - //min_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/store.cpp b/sycl/test/atomic_ref/store.cpp index eebdba5ced095..db076ee994a3d 100644 --- a/sycl/test/atomic_ref/store.cpp +++ b/sycl/test/atomic_ref/store.cpp @@ -16,14 +16,14 @@ class store_kernel; template void store_test(queue q, size_t N) { - T initial = std::numeric_limits::max(); + T initial = T(N); T store = initial; { buffer store_buf(&store, 1); q.submit([&](handler &cgh) { auto st = store_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); + size_t gid = it.get_id(0); auto atm = atomic_ref(st[0]); atm.store(T(gid)); }); @@ -45,8 +45,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported store_test(q, N); store_test(q, N); store_test(q, N); @@ -55,7 +53,7 @@ int main() { store_test(q, N); store_test(q, N); store_test(q, N); - //store_test(q, N); + store_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/atomic_ref/sub.cpp b/sycl/test/atomic_ref/sub.cpp index 52e338048e7be..10ed75d21da25 100644 --- a/sycl/test/atomic_ref/sub.cpp +++ b/sycl/test/atomic_ref/sub.cpp @@ -12,11 +12,11 @@ using namespace sycl; using namespace sycl::intel; -template +template void sub_fetch_test(queue q, size_t N) { - T val = N; + T val = T(N); std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer val_buf(&val, 1); buffer output_buf(output.data(), output.size()); @@ -27,29 +27,29 @@ void sub_fetch_test(queue q, size_t N) { cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); auto atm = atomic_ref(val[0]); - out[gid] = atm.fetch_sub(T(1)); + out[gid] = atm.fetch_sub(Difference(1)); }); }); } // All work-items decrement by 1, so final value should be equal to 0 - assert(val == 0); + assert(val == T(0)); // Fetch returns original value: will be in [1, N] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 1 && *max_e == N); + assert(*min_e == T(1) && *max_e == T(N)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void sub_plus_equal_test(queue q, size_t N) { - T val = N; + T val = T(N); std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer val_buf(&val, 1); buffer output_buf(output.data(), output.size()); @@ -60,29 +60,29 @@ void sub_plus_equal_test(queue q, size_t N) { cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); auto atm = atomic_ref(val[0]); - out[gid] = atm -= T(1); + out[gid] = atm -= Difference(1); }); }); } // All work-items decrement by 1, so final value should be equal to 0 - assert(val == 0); + assert(val == T(0)); // -= returns updated value: will be in [0, N-1] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 0 && *max_e == N - 1); + assert(*min_e == T(0) && *max_e == T(N - 1)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void sub_pre_dec_test(queue q, size_t N) { - T val = N; + T val = T(N); std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer val_buf(&val, 1); buffer output_buf(output.data(), output.size()); @@ -99,23 +99,23 @@ void sub_pre_dec_test(queue q, size_t N) { } // All work-items decrement by 1, so final value should be equal to 0 - assert(val == 0); + assert(val == T(0)); // Pre-decrement returns updated value: will be in [0, N-1] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 0 && *max_e == N - 1); + assert(*min_e == T(0) && *max_e == T(N - 1)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void sub_post_dec_test(queue q, size_t N) { - T val = N; + T val = T(N); std::vector output(N); - std::fill(output.begin(), output.end(), 0); + std::fill(output.begin(), output.end(), T(0)); { buffer val_buf(&val, 1); buffer output_buf(output.data(), output.size()); @@ -132,24 +132,24 @@ void sub_post_dec_test(queue q, size_t N) { } // All work-items decrement by 1, so final value should be equal to 0 - assert(val == 0); + assert(val == T(0)); // Post-decrement returns original value: will be in [1, N] auto min_e = std::min_element(output.begin(), output.end()); auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == 1 && *max_e == N); + assert(*min_e == T(1) && *max_e == T(N)); // Intermediate values should be unique std::sort(output.begin(), output.end()); assert(std::unique(output.begin(), output.end()) == output.end()); } -template +template void sub_test(queue q, size_t N) { - sub_fetch_test(q, N); - sub_plus_equal_test(q, N); - sub_pre_dec_test(q, N); - sub_post_dec_test(q, N); + sub_fetch_test(q, N); + sub_plus_equal_test(q, N); + sub_pre_dec_test(q, N); + sub_post_dec_test(q, N); } // Floating-point types do not support pre- or post-decrement @@ -173,8 +173,6 @@ int main() { } constexpr int N = 32; - - // TODO: Enable missing tests when supported sub_test(q, N); sub_test(q, N); sub_test(q, N); @@ -183,7 +181,7 @@ int main() { sub_test(q, N); sub_test(q, N); sub_test(q, N); - //sub_test(q, N); + sub_test(q, N); std::cout << "Test passed." << std::endl; } diff --git a/sycl/test/basic_tests/buffer/buffer_full_copy.cpp b/sycl/test/basic_tests/buffer/buffer_full_copy.cpp index 79e69067682d5..2743557f6f971 100644 --- a/sycl/test/basic_tests/buffer/buffer_full_copy.cpp +++ b/sycl/test/basic_tests/buffer/buffer_full_copy.cpp @@ -6,7 +6,7 @@ // RUN: %GPU_RUN_PLACEHOLDER %t2.out // RUN: %ACC_RUN_PLACEHOLDER %t2.out -// XFAIL: level0 +// XFAIL: level_zero //==------------- buffer_full_copy.cpp - SYCL buffer basic test ------------==// // diff --git a/sycl/test/basic_tests/buffer/reinterpret.cpp b/sycl/test/basic_tests/buffer/reinterpret.cpp index 7fd000f165131..7288d9bfb2c97 100644 --- a/sycl/test/basic_tests/buffer/reinterpret.cpp +++ b/sycl/test/basic_tests/buffer/reinterpret.cpp @@ -3,7 +3,7 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out // -// XFAIL: level0 +// XFAIL: level_zero //==---------- reinterpret.cpp --- SYCL buffer reinterpret basic test ------==// // diff --git a/sycl/test/basic_tests/get_backend.cpp b/sycl/test/basic_tests/get_backend.cpp index de0738dd88341..a312304e0741b 100644 --- a/sycl/test/basic_tests/get_backend.cpp +++ b/sycl/test/basic_tests/get_backend.cpp @@ -3,7 +3,6 @@ // //==----------------- get_backend.cpp ------------------------==// // This is a test of get_backend(). -// Also prints handy info about the system. // Do not set SYCL_BE. We do not want the preferred backend. //==----------------------------------------------------------==// @@ -16,7 +15,7 @@ using namespace cl::sycl; bool check(backend be) { switch (be) { case backend::opencl: - case backend::level0: + case backend::level_zero: case backend::cuda: case backend::host: return true; diff --git a/sycl/test/basic_tests/image_accessor_readwrite.cpp b/sycl/test/basic_tests/image_accessor_readwrite.cpp index 13c0b175bd1fc..0451ced85dc55 100644 --- a/sycl/test/basic_tests/image_accessor_readwrite.cpp +++ b/sycl/test/basic_tests/image_accessor_readwrite.cpp @@ -6,7 +6,7 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // -// XFAIL: windows && level0 +// XFAIL: windows && level_zero //==--------------------image_accessor_readwrite.cpp ----------------------==// //==----------image_accessor read without sampler & write API test---------==// diff --git a/sycl/test/basic_tests/image_accessor_readwrite_half.cpp b/sycl/test/basic_tests/image_accessor_readwrite_half.cpp index 6d5c1960655e5..931780c6c36a6 100644 --- a/sycl/test/basic_tests/image_accessor_readwrite_half.cpp +++ b/sycl/test/basic_tests/image_accessor_readwrite_half.cpp @@ -6,7 +6,7 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // -// XFAIL: windows && level0 +// XFAIL: windows && level_zero //==--------------------image_accessor_readwrite_half.cpp -------------------==// //==-image_accessor read (without sampler)& write API test for half datatype-==// diff --git a/sycl/test/basic_tests/kernel_info.cpp b/sycl/test/basic_tests/kernel_info.cpp index 193d89a4c4bd8..68a642b476e27 100644 --- a/sycl/test/basic_tests/kernel_info.cpp +++ b/sycl/test/basic_tests/kernel_info.cpp @@ -3,8 +3,8 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out // -// Fail is flaky for level0, enable when fixed. -// UNSUPPORTED: level0 +// Fail is flaky for level_zero, enable when fixed. +// UNSUPPORTED: level_zero //==--- kernel_info.cpp - SYCL kernel info test ----------------------------==// // diff --git a/sycl/test/basic_tests/parallel_for_indexers.cpp b/sycl/test/basic_tests/parallel_for_indexers.cpp index 3d20ec3d66903..a4ec46b42bc9a 100644 --- a/sycl/test/basic_tests/parallel_for_indexers.cpp +++ b/sycl/test/basic_tests/parallel_for_indexers.cpp @@ -8,7 +8,7 @@ // TODO: Unexpected result // TODO: _indexers.cpp:37: int main(): Assertion `id == -1' failed. -// XFAIL: level0 +// XFAIL: level_zero #include diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp index 3031d3d30f388..4ec9b23158239 100644 --- a/sycl/test/basic_tests/parallel_for_range.cpp +++ b/sycl/test/basic_tests/parallel_for_range.cpp @@ -1,4 +1,4 @@ -// XFAIL: cuda || level0 +// XFAIL: cuda || level_zero // CUDA exposes broken hierarchical parallelism. // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out diff --git a/sycl/test/functor/functor_inheritance.cpp b/sycl/test/functor/functor_inheritance.cpp index 436e8ce74a7d0..3a3d5218ef6ec 100644 --- a/sycl/test/functor/functor_inheritance.cpp +++ b/sycl/test/functor/functor_inheritance.cpp @@ -36,7 +36,7 @@ struct Derived : public Base, public SecondBase { int _A, int _B, int _C, int _D, int _E, cl::sycl::accessor &_Acc) : A(_A), Acc(_Acc), /*Out(_Out),*/ Base(_B, _C, _D), SecondBase(_E) {} - void operator()() const { + void operator()() { Acc[0] = this->A + this->B + this->InnerObj.C + this->InnerObj.D + this->E; } diff --git a/sycl/test/functor/kernel_functor.cpp b/sycl/test/functor/kernel_functor.cpp index 9bed0b451ca25..9dd5e0f2fecdf 100644 --- a/sycl/test/functor/kernel_functor.cpp +++ b/sycl/test/functor/kernel_functor.cpp @@ -23,7 +23,7 @@ constexpr auto sycl_global_buffer = cl::sycl::access::target::global_buffer; // - functor class is defined in an anonymous namespace // - the '()' operator: // * does not have parameters (to be used in 'single_task'). -// * has the 'const' qualifier +// * has no 'const' qualifier namespace { class Functor1 { public: @@ -32,7 +32,7 @@ class Functor1 { cl::sycl::accessor &Acc_) : X(X_), Acc(Acc_) {} - void operator()() const { Acc[0] += X; } + void operator()() { Acc[0] += X; } private: int X; @@ -66,14 +66,14 @@ class Functor2 { // - functor class is templated and defined in the translation unit scope // - the '()' operator: // * has a parameter of type cl::sycl::id<1> (to be used in 'parallel_for'). -// * has the 'const' qualifier +// * has no 'const' qualifier template class TmplFunctor { public: TmplFunctor( T X_, cl::sycl::accessor &Acc_) : X(X_), Acc(Acc_) {} - void operator()(cl::sycl::id<1> id) const { Acc[id] += X; } + void operator()(cl::sycl::id<1> id) { Acc[id] += X; } private: T X; diff --git a/sycl/test/hier_par/hier_par_basic.cpp b/sycl/test/hier_par/hier_par_basic.cpp index 5670663e2e149..d1a94ea1a7112 100644 --- a/sycl/test/hier_par/hier_par_basic.cpp +++ b/sycl/test/hier_par/hier_par_basic.cpp @@ -55,7 +55,7 @@ struct PFWIFunctor { : wg_chunk(wg_chunk), wg_size(wg_size), wg_offset(wg_offset), range_length(range_length), v(v), dev_ptr(dev_ptr) {} - void operator()(h_item<1> i) const { + void operator()(h_item<1> i) { // number of buf elements per work item: size_t wi_chunk = (wg_chunk + wg_size - 1) / wg_size; auto id = i.get_physical_local_id().get(0); @@ -82,7 +82,7 @@ struct PFWGFunctor { : wg_chunk(wg_chunk), range_length(range_length), dev_ptr(dev_ptr), addend(addend), n_iter(n_iter) {} - void operator()(group<1> g) const { + void operator()(group<1> g) { int v = addend; // to check constant initializer works too size_t wg_offset = wg_chunk * g.get_id(0); size_t wg_size = g.get_local_range(0); @@ -95,13 +95,13 @@ struct PFWGFunctor { } // Dummy operator '()' to make sure compiler can handle multiple '()' // operators/ and pick the right one for PFWG kernel code generation. - void operator()(int ind, int val) const { dev_ptr[ind] += val; } + void operator()(int ind, int val) { dev_ptr[ind] += val; } const size_t wg_chunk; const size_t range_length; const int n_iter; const int addend; - mutable AccTy dev_ptr; + AccTy dev_ptr; }; int main() { diff --git a/sycl/test/host-interop-task/host-task-dependency.cpp b/sycl/test/host-interop-task/host-task-dependency.cpp index 60a1e60883d71..2bbc059a43e44 100644 --- a/sycl/test/host-interop-task/host-task-dependency.cpp +++ b/sycl/test/host-interop-task/host-task-dependency.cpp @@ -4,7 +4,7 @@ // RUN: %ACC_RUN_PLACEHOLDER SYCL_PI_TRACE=-1 %t.out 2>&1 %ACC_CHECK_PLACEHOLDER // // TODO: Behaviour is unstable for level zero on Windows. Enable when fixed. -// UNSUPPORTED: windows && level0 +// UNSUPPORTED: windows && level_zero #include #include diff --git a/sycl/test/host-interop-task/host-task-two-queues.cpp b/sycl/test/host-interop-task/host-task-two-queues.cpp index 5157b83b60092..7644d6bcfcd53 100644 --- a/sycl/test/host-interop-task/host-task-two-queues.cpp +++ b/sycl/test/host-interop-task/host-task-two-queues.cpp @@ -4,7 +4,7 @@ // RUN: %ACC_RUN_PLACEHOLDER %t.out // // TODO: Flaky fail on Level Zero that is why mark as unsupported temporarily. -// UNSUPPORTED: level0 +// UNSUPPORTED: level_zero #include #include diff --git a/sycl/test/host-interop-task/interop-task.cpp b/sycl/test/host-interop-task/interop-task.cpp index bcd784038228a..ea4f08eee4a83 100644 --- a/sycl/test/host-interop-task/interop-task.cpp +++ b/sycl/test/host-interop-task/interop-task.cpp @@ -2,7 +2,7 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// UNSUPPORTED: level0, cuda +// UNSUPPORTED: level_zero, cuda // REQUIRES: opencl // REQUIRES: TEMPORARY_DISABLED diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index 545ebb995f60d..f2a58749a2a93 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -81,7 +81,7 @@ def getDeviceCount(device_type): is_cuda = False; - is_level0 = False; + is_level_zero = False; process = subprocess.Popen([get_device_count_by_type_path, device_type, backend], stdout=subprocess.PIPE) (output, err) = process.communicate() @@ -106,12 +106,12 @@ def getDeviceCount(device_type): if re.match(r".*cuda", result[1]): is_cuda = True; if re.match(r".*level zero", result[1]): - is_level0 = True; + is_level_zero = True; if err: lit_config.warning("getDeviceCount {TYPE} {BACKEND} stderr:{ERR}".format( TYPE=device_type, BACKEND=backend, ERR=err)) - return [value,is_cuda,is_level0] + return [value,is_cuda,is_level_zero] # Every SYCL implementation provides a host implementation. config.available_features.add('host') @@ -149,8 +149,8 @@ def getDeviceCount(device_type): gpu_check_on_linux_substitute = "" cuda = False -level0 = False -[gpu_count, cuda, level0] = getDeviceCount("gpu") +level_zero = False +[gpu_count, cuda, level_zero] = getDeviceCount("gpu") if gpu_count > 0: found_at_least_one_device = True @@ -160,8 +160,8 @@ def getDeviceCount(device_type): config.available_features.add('gpu') if cuda: config.available_features.add('cuda') - elif level0: - config.available_features.add('level0') + elif level_zero: + config.available_features.add('level_zero') if platform.system() == "Linux": gpu_run_on_linux_substitute = "env SYCL_DEVICE_TYPE=GPU SYCL_BE={SYCL_BE} ".format(SYCL_BE=backend) @@ -188,7 +188,7 @@ def getDeviceCount(device_type): config.substitutions.append( ('%ACC_CHECK_PLACEHOLDER', acc_check_substitute) ) # LIT testing either supports OpenCL or CUDA or Level Zero. -if not cuda and not level0 and found_at_least_one_device: +if not cuda and not level_zero and found_at_least_one_device: config.available_features.add('opencl') if cuda: diff --git a/sycl/test/plugins/sycl-ls-gpu-default.cpp b/sycl/test/plugins/sycl-ls-gpu-default.cpp index a07933f2ccff8..a57fc2a39a027 100755 --- a/sycl/test/plugins/sycl-ls-gpu-default.cpp +++ b/sycl/test/plugins/sycl-ls-gpu-default.cpp @@ -1,4 +1,4 @@ -// REQUIRES: gpu, level0 +// REQUIRES: gpu, level_zero // RUN: sycl-ls --verbose >%t.default.out // RUN: FileCheck %s --check-prefixes=CHECK-GPU-BUILTIN,CHECK-GPU-CUSTOM --input-file %t.default.out diff --git a/sycl/test/regression/fsycl-save-temps.cpp b/sycl/test/regression/fsycl-save-temps.cpp index ce9e653af2608..7f89fa54feeca 100644 --- a/sycl/test/regression/fsycl-save-temps.cpp +++ b/sycl/test/regression/fsycl-save-temps.cpp @@ -22,6 +22,6 @@ int main() { } // TODO: Address a Windows-specific issue with integration header filenames -// XFAIL: system-windows && !level0 +// XFAIL: system-windows && !level_zero // TODO: fail is flaky on Windows for Level Zero. Enable when fixed. -// UNSUPPORTED: system-windows && level0 +// UNSUPPORTED: system-windows && level_zero diff --git a/sycl/test/regression/image_access.cpp b/sycl/test/regression/image_access.cpp index f32d0a33f6b91..e779fb5b4111b 100644 --- a/sycl/test/regression/image_access.cpp +++ b/sycl/test/regression/image_access.cpp @@ -5,9 +5,9 @@ // TODO: For now PI checks are skipped for ACC device. To decide if it's good. // RUN: env %ACC_RUN_PLACEHOLDER %t.out // -// UNSUPPORTED: cuda || windows && level0 +// UNSUPPORTED: cuda || windows && level_zero // CUDA cannot support OpenCL spec conform images. -// TODO: test hangs on level0, enable when fixed. +// TODO: test hangs on level_zero, enable when fixed. //==-------------- image_access.cpp - SYCL image accessors test -----------==// // diff --git a/sycl/test/regression/static-buffer-dtor.cpp b/sycl/test/regression/static-buffer-dtor.cpp index 5899420c20454..c541c180e7d73 100644 --- a/sycl/test/regression/static-buffer-dtor.cpp +++ b/sycl/test/regression/static-buffer-dtor.cpp @@ -14,7 +14,7 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out // -// XFAIL: linux && level0 +// XFAIL: linux && level_zero #include diff --git a/sycl/test/separate-compile/same-kernel.cpp b/sycl/test/separate-compile/same-kernel.cpp index ad72decfa3604..66ca32780f3cd 100644 --- a/sycl/test/separate-compile/same-kernel.cpp +++ b/sycl/test/separate-compile/same-kernel.cpp @@ -26,7 +26,9 @@ class TestFnObj { TestFnObj(buffer &buf, handler &cgh) : data(buf.get_access(cgh)) {} accessor data; - void operator()(id<1> item) const { data[item] = item[0]; } + void operator()(id<1> item) { + data[item] = item[0]; + } }; void kernel2(); diff --git a/sycl/test/spec_const/spec_const_hw.cpp b/sycl/test/spec_const/spec_const_hw.cpp index c50550c0827ad..442121353bb73 100644 --- a/sycl/test/spec_const/spec_const_hw.cpp +++ b/sycl/test/spec_const/spec_const_hw.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: cuda || level0 +// UNSUPPORTED: cuda || level_zero // // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out diff --git a/sycl/test/spec_const/spec_const_hw_accelerator.cpp b/sycl/test/spec_const/spec_const_hw_accelerator.cpp index d4eb754065c18..bd7df40ed378a 100644 --- a/sycl/test/spec_const/spec_const_hw_accelerator.cpp +++ b/sycl/test/spec_const/spec_const_hw_accelerator.cpp @@ -12,6 +12,6 @@ // TODO: re-enable after CI drivers are updated to newer which support spec // constants: // XFAIL: linux && opencl && accelerator -// UNSUPPORTED: cuda || level0 +// UNSUPPORTED: cuda || level_zero #include "spec_const_hw.cpp" // RUN: %ACC_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/spec_const/spec_const_neg.cpp b/sycl/test/spec_const/spec_const_neg.cpp index 18fb8ed5d9d0c..7312e29ab40e1 100644 --- a/sycl/test/spec_const/spec_const_neg.cpp +++ b/sycl/test/spec_const/spec_const_neg.cpp @@ -3,7 +3,7 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // Specialization constants are not supported on FPGA h/w and emulator. -// UNSUPPORTED: cuda || level0 +// UNSUPPORTED: cuda || level_zero // //==----------- spec_const_hw.cpp ------------------------------------------==// // diff --git a/sycl/test/spec_const/spec_const_redefine.cpp b/sycl/test/spec_const/spec_const_redefine.cpp index 36a82f9c9825e..6883ce5c9d7d6 100644 --- a/sycl/test/spec_const/spec_const_redefine.cpp +++ b/sycl/test/spec_const/spec_const_redefine.cpp @@ -1,4 +1,4 @@ -// UNSUPPORTED: cuda || level0 +// UNSUPPORTED: cuda || level_zero // // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out diff --git a/sycl/test/spec_const/spec_const_redefine_accelerator.cpp b/sycl/test/spec_const/spec_const_redefine_accelerator.cpp index 00ef10f63fdfa..52293f7a953e7 100644 --- a/sycl/test/spec_const/spec_const_redefine_accelerator.cpp +++ b/sycl/test/spec_const/spec_const_redefine_accelerator.cpp @@ -14,6 +14,6 @@ // TODO: re-enable after CI drivers are updated to newer which support spec // constants: // XFAIL: linux && opencl && accelerator -// UNSUPPORTED: cuda || level0 +// UNSUPPORTED: cuda || level_zero #include "spec_const_redefine_accelerator.cpp" // RUN: env SYCL_PI_TRACE=2 %ACC_RUN_PLACEHOLDER %t.out 2>&1 %ACC_CHECK_PLACEHOLDER diff --git a/sycl/test/sub_group/attributes.cpp b/sycl/test/sub_group/attributes.cpp index 650ba23dd8c76..d8173d2d1cf72 100644 --- a/sycl/test/sub_group/attributes.cpp +++ b/sycl/test/sub_group/attributes.cpp @@ -22,7 +22,7 @@ class KernelFunctor##SIZE { \ public: \ [[cl::intel_reqd_sub_group_size(SIZE)]] void \ - operator()(cl::sycl::nd_item<1> Item) const { \ + operator()(cl::sycl::nd_item<1> Item) { \ const auto GID = Item.get_global_id(); \ } \ }; diff --git a/sycl/tools/CMakeLists.txt b/sycl/tools/CMakeLists.txt index 1e2843e0b9385..67b0c7329a10b 100644 --- a/sycl/tools/CMakeLists.txt +++ b/sycl/tools/CMakeLists.txt @@ -6,22 +6,22 @@ add_subdirectory(sycl-ls) # TODO: move each tool in its own sub-directory add_executable(get_device_count_by_type get_device_count_by_type.cpp) -add_dependencies(get_device_count_by_type ocl-headers ocl-icd l0-loader) +add_dependencies(get_device_count_by_type ocl-headers ocl-icd level-zero-loader) if(MSVC) - set(L0_LIBRARY + set(LEVEL_ZERO_LIBRARY "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_STATIC_LIBRARY_PREFIX}ze_loader${CMAKE_STATIC_LIBRARY_SUFFIX}") else() - set(L0_LIBRARY + set(LEVEL_ZERO_LIBRARY "${LLVM_LIBRARY_OUTPUT_INTDIR}/${CMAKE_SHARED_LIBRARY_PREFIX}ze_loader${CMAKE_SHARED_LIBRARY_SUFFIX}") endif() target_link_libraries(get_device_count_by_type PRIVATE OpenCL::Headers - L0Loader::Headers + LevelZeroLoader::Headers ${OpenCL_LIBRARIES} - ${L0_LIBRARY} + ${LEVEL_ZERO_LIBRARY} $<$:cudadrv> ) target_compile_definitions(get_device_count_by_type diff --git a/sycl/tools/get_device_count_by_type.cpp b/sycl/tools/get_device_count_by_type.cpp index 26a9a0a4ed6ad..abf70ce98c357 100644 --- a/sycl/tools/get_device_count_by_type.cpp +++ b/sycl/tools/get_device_count_by_type.cpp @@ -32,7 +32,7 @@ static const std::string help = " Help\n" " Example: ./get_device_count_by_type cpu opencl\n" " Supported device types: cpu/gpu/accelerator/default/all\n" - " Supported backends: PI_CUDA/PI_OPENCL/PI_LEVEL0 \n" + " Supported backends: PI_CUDA/PI_OPENCL/PI_LEVEL_ZERO \n" " Output format: :"; // Return the string with all characters translated to lower case. @@ -259,7 +259,7 @@ int main(int argc, char *argv[]) { if (backend == "opencl" || backend == "pi_opencl") { querySuccess = queryOpenCL(deviceType, deviceCount, msg); - } else if (backend == "level0" || backend == "pi_level0") { + } else if (backend == "level_zero" || backend == "pi_level_zero") { querySuccess = queryLevelZero(deviceType, deviceCount, msg); } else if (backend == "cuda" || backend == "pi_cuda") { querySuccess = queryCUDA(deviceType, deviceCount, msg); diff --git a/sycl/unittests/pi/BackendString.hpp b/sycl/unittests/pi/BackendString.hpp index cea0eee8b8338..7f051f5ab6790 100644 --- a/sycl/unittests/pi/BackendString.hpp +++ b/sycl/unittests/pi/BackendString.hpp @@ -15,7 +15,7 @@ inline const char *GetBackendString(cl::sycl::backend backend) { PI_BACKEND_STR(cuda); PI_BACKEND_STR(host); PI_BACKEND_STR(opencl); - PI_BACKEND_STR(level0); + PI_BACKEND_STR(level_zero); #undef PI_BACKEND_STR default: return "Unknown Plugin";