From ac93d6fe3d9d1b170493d9ff2870a5dd3450bfeb Mon Sep 17 00:00:00 2001 From: jinge90 <43599496+jinge90@users.noreply.github.com> Date: Thu, 17 Dec 2020 19:47:41 +0800 Subject: [PATCH 1/5] [SYCL]Link Libm FP64 SYCL device library by default (#2892) Signed-off-by: gejin --- clang/lib/Driver/Driver.cpp | 5 ++--- clang/test/Driver/sycl-device-lib-win.cpp | 26 ++++++++++++++++------- clang/test/Driver/sycl-device-lib.cpp | 25 +++++++++++++++------- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 21f023b91d3ef..d7f7abe93101d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3972,10 +3972,9 @@ class OffloadingActionBuilder final { bool NoDeviceLibs = false; int NumOfDeviceLibLinked = 0; - // Currently, libc, libm-fp32 will be linked in by default. In order - // to use libm-fp64, -fsycl-device-lib=libm-fp64/all should be used. + // Currently, all SYCL device libraries will be linked by default llvm::StringMap devicelib_link_info = { - {"libc", true}, {"libm-fp32", true}, {"libm-fp64", false}}; + {"libc", true}, {"libm-fp32", true}, {"libm-fp64", true}}; if (Arg *A = Args.getLastArg(options::OPT_fsycl_device_lib_EQ, options::OPT_fno_sycl_device_lib_EQ)) { if (A->getValues().size() == 0) diff --git a/clang/test/Driver/sycl-device-lib-win.cpp b/clang/test/Driver/sycl-device-lib-win.cpp index e1cb1b16ea8b3..31b7dd1e59a34 100644 --- a/clang/test/Driver/sycl-device-lib-win.cpp +++ b/clang/test/Driver/sycl-device-lib-win.cpp @@ -1,6 +1,7 @@ /// /// Perform several driver tests for SYCL device libraries on Windows /// + // REQUIRES: clang-driver, windows /// ########################################################################### @@ -14,14 +15,16 @@ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT // RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,libm-fp32 -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT -// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp64 -### 2>&1 \ -// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-crt.obj" "-outputs={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.obj" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex-fp64.obj" "-outputs={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.obj" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath-fp64.obj" "-outputs={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.obj" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.obj" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex-fp64.obj" "-outputs={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.obj" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath-fp64.obj" "-outputs={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test behavior of device library link with libm-fp64 @@ -52,17 +55,20 @@ // RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libc -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.obj" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex-fp64.obj" "-outputs={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.obj" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath-fp64.obj" "-outputs={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.obj" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex-fp64.obj" "-outputs={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.obj" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" - +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath-fp64.obj" "-outputs={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### -/// test behavior of -fno-sycl-device-lib=libm-fp32 -// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32 -### 2>&1 \ -// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32 -// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-crt.obj" "-outputs={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" -// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.obj" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +/// test behavior of -fno-sycl-device-lib=libm-fp32,libm-fp64 +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32,libm-fp64 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-crt.obj" "-outputs={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.obj" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" /// ########################################################################### @@ -99,10 +105,14 @@ // SYCL_LLVM_LINK_DEVICE_LIB: llvm-link{{.*}} "{{.*}}.bc" "-o" "{{.*}}.bc" "--suppress-warnings" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-crt.obj" "-outputs={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.obj" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex-fp64.obj" "-outputs={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.obj" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath-fp64.obj" "-outputs={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.obj" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.obj" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex-fp64.obj" "-outputs={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.obj" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath-fp64.obj" "-outputs={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: llvm-link{{.*}} "-only-needed" "{{.*}}" "-o" "{{.*}}.bc" "--suppress-warnings" /// ########################################################################### diff --git a/clang/test/Driver/sycl-device-lib.cpp b/clang/test/Driver/sycl-device-lib.cpp index c2f995c1b9b3c..2e34247e315e4 100644 --- a/clang/test/Driver/sycl-device-lib.cpp +++ b/clang/test/Driver/sycl-device-lib.cpp @@ -15,14 +15,16 @@ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT // RUN: %clangxx -fsycl %s -fsycl-device-lib=libc,libm-fp32 -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT -// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp64 -### 2>&1 \ -// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-crt.o" "-outputs={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.o" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex-fp64.o" "-outputs={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.o" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath-fp64.o" "-outputs={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.o" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.o" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex-fp64.o" "-outputs={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.o" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_DEFAULT-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath-fp64.o" "-outputs={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### /// test behavior of device library link with libm-fp64 @@ -53,17 +55,20 @@ // RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libc -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.o" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex-fp64.o" "-outputs={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.o" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath-fp64.o" "-outputs={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.o" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex-fp64.o" "-outputs={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.o" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" - +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBC-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath-fp64.o" "-outputs={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" /// ########################################################################### -/// test behavior of -fno-sycl-device-lib=libm-fp32 -// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32 -### 2>&1 \ -// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32 -// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-crt.o" "-outputs={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" -// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM_FP32-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.o" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" +/// test behavior of -fno-sycl-device-lib=libm-fp32,libm-fp64 +// RUN: %clangxx -fsycl %s -fno-sycl-device-lib=libm-fp32,libm-fp64 -### 2>&1 \ +// RUN: | FileCheck %s -check-prefix=SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-crt.o" "-outputs={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" +// SYCL_DEVICE_LIB_UNBUNDLE_NO_LIBM-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.o" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" /// ########################################################################### @@ -100,10 +105,14 @@ // SYCL_LLVM_LINK_DEVICE_LIB: llvm-link{{.*}} "{{.*}}.bc" "-o" "{{.*}}.bc" "--suppress-warnings" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-crt.o" "-outputs={{.*}}libsycl-crt-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex.o" "-outputs={{.*}}libsycl-complex-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-complex-fp64.o" "-outputs={{.*}}libsycl-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath.o" "-outputs={{.*}}libsycl-cmath-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-cmath-fp64.o" "-outputs={{.*}}libsycl-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cassert.o" "-outputs={{.*}}libsycl-fallback-cassert-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex.o" "-outputs={{.*}}libsycl-fallback-complex-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-complex-fp64.o" "-outputs={{.*}}libsycl-fallback-complex-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath.o" "-outputs={{.*}}libsycl-fallback-cmath-{{.*}}.o" "-unbundle" +// SYCL_LLVM_LINK_DEVICE_LIB-NEXT: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64-unknown-unknown-sycldevice" "-inputs={{.*}}libsycl-fallback-cmath-fp64.o" "-outputs={{.*}}libsycl-fallback-cmath-fp64-{{.*}}.o" "-unbundle" // SYCL_LLVM_LINK_DEVICE_LIB-NEXT: llvm-link{{.*}} "-only-needed" "{{.*}}" "-o" "{{.*}}.bc" "--suppress-warnings" /// ########################################################################### From a5065ab85101f81a7296caf9b7f60501705951ba Mon Sep 17 00:00:00 2001 From: jinge90 <43599496+jinge90@users.noreply.github.com> Date: Thu, 17 Dec 2020 19:49:36 +0800 Subject: [PATCH 2/5] [SYCL] Support LLVM FP intrinsic in llvm-spirv and FE (#2880) Support LLVM FP intrinsic in llvm-spirv and enable the corresponding builtin in FE. Signed-off-by: gejin --- clang/lib/Sema/SemaSYCL.cpp | 24 --------- clang/test/SemaSYCL/supported_math.cpp | 64 ++++++++++++++++++++++++ clang/test/SemaSYCL/unsupported_math.cpp | 24 ++------- 3 files changed, 67 insertions(+), 45 deletions(-) create mode 100644 clang/test/SemaSYCL/supported_math.cpp diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index bde9632b19e7d..41f0006fcde73 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -165,38 +165,14 @@ static bool IsSyclMathFunc(unsigned BuiltinID) { case Builtin::BI__builtin_truncl: case Builtin::BIlroundl: case Builtin::BI__builtin_lroundl: - case Builtin::BIcopysign: - case Builtin::BI__builtin_copysign: - case Builtin::BIfloor: - case Builtin::BI__builtin_floor: case Builtin::BIfmax: case Builtin::BI__builtin_fmax: case Builtin::BIfmin: case Builtin::BI__builtin_fmin: - case Builtin::BInearbyint: - case Builtin::BI__builtin_nearbyint: - case Builtin::BIrint: - case Builtin::BI__builtin_rint: - case Builtin::BIround: - case Builtin::BI__builtin_round: - case Builtin::BItrunc: - case Builtin::BI__builtin_trunc: - case Builtin::BIcopysignf: - case Builtin::BI__builtin_copysignf: - case Builtin::BIfloorf: - case Builtin::BI__builtin_floorf: case Builtin::BIfmaxf: case Builtin::BI__builtin_fmaxf: case Builtin::BIfminf: case Builtin::BI__builtin_fminf: - case Builtin::BInearbyintf: - case Builtin::BI__builtin_nearbyintf: - case Builtin::BIrintf: - case Builtin::BI__builtin_rintf: - case Builtin::BIroundf: - case Builtin::BI__builtin_roundf: - case Builtin::BItruncf: - case Builtin::BI__builtin_truncf: case Builtin::BIlroundf: case Builtin::BI__builtin_lroundf: case Builtin::BI__builtin_fpclassify: diff --git a/clang/test/SemaSYCL/supported_math.cpp b/clang/test/SemaSYCL/supported_math.cpp new file mode 100644 index 0000000000000..aada7829ef781 --- /dev/null +++ b/clang/test/SemaSYCL/supported_math.cpp @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-strict -verify %s +extern "C" float sinf(float); +extern "C" float cosf(float); +extern "C" float floorf(float); +extern "C" float logf(float); +extern "C" float nearbyintf(float); +extern "C" float rintf(float); +extern "C" float roundf(float); +extern "C" float truncf(float); +extern "C" float copysignf(float, float); +extern "C" double sin(double); +extern "C" double cos(double); +extern "C" double floor(double); +extern "C" double log(double); +extern "C" double nearbyint(double); +extern "C" double rint(double); +extern "C" double round(double); +extern "C" double trunc(double); +extern "C" double copysign(double, double); +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +int main() { + kernel([=]() { + int acc[1] = {5}; + acc[0] *= 2; + acc[0] += (int)truncf(1.0f); // expected-no-diagnostics + acc[0] += (int)trunc(1.0); // expected-no-diagnostics + acc[0] += (int)roundf(1.0f); // expected-no-diagnostics + acc[0] += (int)round(1.0); // expected-no-diagnostics + acc[0] += (int)rintf(1.0f); // expected-no-diagnostics + acc[0] += (int)rint(1.0); // expected-no-diagnostics + acc[0] += (int)nearbyintf(0.5f); // expected-no-diagnostics + acc[0] += (int)nearbyint(0.5); // expected-no-diagnostics + acc[0] += (int)floorf(0.5f); // expected-no-diagnostics + acc[0] += (int)floor(0.5); // expected-no-diagnostics + acc[0] += (int)copysignf(1.0f, -0.5f); // expected-no-diagnostics + acc[0] += (int)copysign(1.0, -0.5); // expected-no-diagnostics + acc[0] += (int)sinf(1.0f); // expected-no-diagnostics + acc[0] += (int)sin(1.0); // expected-no-diagnostics + acc[0] += (int)__builtin_sinf(1.0f); // expected-no-diagnostics + acc[0] += (int)__builtin_sin(1.0); // expected-no-diagnostics + acc[0] += (int)cosf(1.0f); // expected-no-diagnostics + acc[0] += (int)cos(1.0); // expected-no-diagnostics + acc[0] += (int)__builtin_cosf(1.0f); // expected-no-diagnostics + acc[0] += (int)__builtin_cos(1.0); // expected-no-diagnostics + acc[0] += (int)logf(1.0f); // expected-no-diagnostics + acc[0] += (int)log(1.0); // expected-no-diagnostics + acc[0] += (int)__builtin_truncf(1.0f); // expected-no-diagnostics + acc[0] += (int)__builtin_trunc(1.0); // expected-no-diagnostics + acc[0] += (int)__builtin_rintf(1.0f); // expected-no-diagnostics + acc[0] += (int)__builtin_rint(1.0); // expected-no-diagnostics + acc[0] += (int)__builtin_nearbyintf(0.5f); // expected-no-diagnostics + acc[0] += (int)__builtin_nearbyint(0.5); // expected-no-diagnostics + acc[0] += (int)__builtin_floorf(0.5f); // expected-no-diagnostics + acc[0] += (int)__builtin_floor(0.5); // expected-no-diagnostics + acc[0] += (int)__builtin_copysignf(1.0f, -0.5f); // expected-no-diagnostics + acc[0] += (int)__builtin_logf(1.0f); // expected-no-diagnostics + acc[0] += (int)__builtin_log(1.0); // expected-no-diagnostics + }); + return 0; +} diff --git a/clang/test/SemaSYCL/unsupported_math.cpp b/clang/test/SemaSYCL/unsupported_math.cpp index 3c0de837dcd77..e0663b708785a 100644 --- a/clang/test/SemaSYCL/unsupported_math.cpp +++ b/clang/test/SemaSYCL/unsupported_math.cpp @@ -1,10 +1,4 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -Wno-sycl-2017-compat -verify %s -extern "C" float sinf(float); -extern "C" float cosf(float); -extern "C" float logf(float); -extern "C" double sin(double); -extern "C" double cos(double); -extern "C" double log(double); template __attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { kernelFunc(); @@ -14,21 +8,9 @@ int main() { kernel([=]() { int acc[1] = {5}; acc[0] *= 2; - acc[0] += (int)sinf(1.0f); // expected-no-error - acc[0] += (int)sin(1.0); // expected-no-error - acc[0] += (int)__builtin_sinf(1.0f); // expected-no-error - acc[0] += (int)__builtin_sin(1.0); // expected-no-error - acc[0] += (int)cosf(1.0f); // expected-no-error - acc[0] += (int)cos(1.0); // expected-no-error - acc[0] += (int)__builtin_cosf(1.0f); // expected-no-error - acc[0] += (int)__builtin_cos(1.0); // expected-no-error - acc[0] += (int)logf(1.0f); // expected-no-error - acc[0] += (int)log(1.0); // expected-no-error - acc[0] += (int)__builtin_logf(1.0f); // expected-no-error - acc[0] += (int)__builtin_log(1.0); // expected-no-error - acc[0] += (int)__builtin_fabsl(-1.0); // expected-error{{builtin is not supported on this target}} - acc[0] += (int)__builtin_cosl(-1.0); // expected-error{{builtin is not supported on this target}} - acc[0] += (int)__builtin_powl(-1.0, 10.0); // expected-error{{builtin is not supported on this target}} + acc[0] += (int)__builtin_fabsl(-1.0); // expected-error{{builtin is not supported on this target}} + acc[0] += (int)__builtin_cosl(-1.0); // expected-error{{builtin is not supported on this target}} + acc[0] += (int)__builtin_powl(-1.0, 10.0); // expected-error{{builtin is not supported on this target}} }); return 0; } From a5fde5a924acf527f9194a89268f11de6702efc0 Mon Sep 17 00:00:00 2001 From: smanna12 <52675209+smanna12@users.noreply.github.com> Date: Thu, 17 Dec 2020 06:52:40 -0500 Subject: [PATCH 3/5] [SYCL] Add template parameter support for no_global_work_offset attribute (#2839) This patch adds support for template parameter on [[intel:: no_global_work_offset())]] attribute where valid values are 0 and 1 and attribute parameter is optional, so [[intelfpga::no_global_work_offset]] means the same as [[intelfpga::no_global_work_offset(1)]]. updates sema/codegen tests with mock headers on device. uses existing function "sema::addIntelSYCLSingleArgFunctionAttr" from other single argument function attributes such as num_simd_work_items, max_global_work_dim, and intel_reqd_sub_group_size to avoid source codes duplication and reuse for the template parameter support. Signed-off-by: Soumi Manna --- clang/include/clang/Basic/Attr.td | 2 +- clang/include/clang/Basic/DiagnosticGroups.td | 4 +- .../clang/Basic/DiagnosticSemaKinds.td | 3 - clang/lib/CodeGen/CodeGenFunction.cpp | 7 +- clang/lib/Sema/SemaDeclAttr.cpp | 20 ++--- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 6 ++ .../intel-fpga-no-global-work-offset.cpp | 51 ++++++++---- .../check-notdirect-attribute-propagation.cpp | 2 +- .../intel-fpga-no-global-work-offset.cpp | 81 +++++++++---------- .../redeclaration-attribute-propagation.cpp | 40 +++++---- ...evice-intel-fpga-no-global-work-offset.cpp | 60 ++++++++++++++ 11 files changed, 181 insertions(+), 95 deletions(-) create mode 100644 clang/test/SemaSYCL/sycl-device-intel-fpga-no-global-work-offset.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 51820842e7e62..04b3f5e88e394 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1306,7 +1306,7 @@ def SYCLIntelMaxGlobalWorkDim : InheritableAttr { def SYCLIntelNoGlobalWorkOffset : InheritableAttr { let Spellings = [CXX11<"intelfpga","no_global_work_offset">, CXX11<"intel","no_global_work_offset">]; - let Args = [BoolArgument<"Enabled", 1>]; + let Args = [ExprArgument<"Value", /*optional*/1>]; let LangOpts = [SYCLIsDevice, SYCLIsHost]; let Subjects = SubjectList<[Function], ErrorDiag>; let Documentation = [SYCLIntelNoGlobalWorkOffsetAttrDocs]; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index e2894e764dba9..4b42a37fd33aa 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -667,10 +667,8 @@ def NSReturnsMismatch : DiagGroup<"nsreturns-mismatch">; def IndependentClassAttribute : DiagGroup<"IndependentClass-attribute">; def UnknownAttributes : DiagGroup<"unknown-attributes">; def IgnoredAttributes : DiagGroup<"ignored-attributes">; -def AdjustedAttributes : DiagGroup<"adjusted-attributes">; def Attributes : DiagGroup<"attributes", [UnknownAttributes, - IgnoredAttributes, - AdjustedAttributes]>; + IgnoredAttributes]>; def UnknownSanitizers : DiagGroup<"unknown-sanitizers">; def UnnamedTypeTemplateArgs : DiagGroup<"unnamed-type-template-args", [CXX98CompatUnnamedTypeTemplateArgs]>; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 5f332eda2f083..b67c28a77c27a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11143,9 +11143,6 @@ def err_sycl_function_attribute_mismatch : Error< "SYCL kernel without %0 attribute can't call a function with this attribute">; def err_sycl_x_y_z_arguments_must_be_one : Error< "%0 X-, Y- and Z- sizes must be 1 when %1 attribute is used with value 0">; -def warn_boolean_attribute_argument_is_not_valid: Warning< - "The value of %0 attribute should be 0 or 1. Adjusted to 1">, - InGroup; def err_sycl_attibute_cannot_be_applied_here : Error<"%0 attribute cannot be applied to a " "static function or function in an anonymous namespace">; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index f94a6a6973b6a..5c947cc66b42d 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -694,7 +694,12 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, if (const SYCLIntelNoGlobalWorkOffsetAttr *A = FD->getAttr()) { - if (A->getEnabled()) + const Expr *Arg = A->getValue(); + assert(Arg && "Got an unexpected null argument"); + Optional ArgVal = + Arg->getIntegerConstantExpr(FD->getASTContext()); + assert(ArgVal.hasValue() && "Not an integer constant expression"); + if (ArgVal->getBoolValue()) Fn->setMetadata("no_global_work_offset", llvm::MDNode::get(Context, {})); } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index ecac963bdb004..07f67ad0d84e9 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5276,24 +5276,18 @@ static void handleNoGlobalWorkOffsetAttr(Sema &S, Decl *D, checkForDuplicateAttribute(S, D, Attr); - uint32_t Enabled = 1; - if (Attr.getNumArgs()) { - const Expr *E = Attr.getArgAsExpr(0); - if (!checkUInt32Argument(S, Attr, E, Enabled, 0, - /*StrictlyUnsigned=*/true)) - return; - } - if (Enabled > 1) - S.Diag(Attr.getLoc(), diag::warn_boolean_attribute_argument_is_not_valid) - << Attr; - if (Attr.getKind() == ParsedAttr::AT_SYCLIntelNoGlobalWorkOffset && checkDeprecatedSYCLAttributeSpelling(S, Attr)) S.Diag(Attr.getLoc(), diag::note_spelling_suggestion) << "'intel::no_global_work_offset'"; - D->addAttr(::new (S.Context) - SYCLIntelNoGlobalWorkOffsetAttr(S.Context, Attr, Enabled)); + // If no attribute argument is specified, set to default value '1'. + Expr *E = Attr.isArgExpr(0) + ? Attr.getArgAsExpr(0) + : IntegerLiteral::Create(S.Context, llvm::APInt(32, 1), + S.Context.IntTy, Attr.getLoc()); + S.addIntelSYCLSingleArgFunctionAttr(D, Attr, + E); } /// Handle the [[intelfpga::doublepump]] and [[intelfpga::singlepump]] attributes. diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 5130bad1f7b5c..85fd57c459bf3 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -775,6 +775,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, *this, TemplateArgs, SYCLIntelMaxGlobalWorkDim, New); continue; } + if (const auto *SYCLIntelNoGlobalWorkOffset = + dyn_cast(TmplAttr)) { + instantiateIntelSYCLFunctionAttr( + *this, TemplateArgs, SYCLIntelNoGlobalWorkOffset, New); + continue; + } // Existing DLL attribute on the instantiation takes precedence. if (TmplAttr->getKind() == attr::DLLExport || TmplAttr->getKind() == attr::DLLImport) { diff --git a/clang/test/CodeGenSYCL/intel-fpga-no-global-work-offset.cpp b/clang/test/CodeGenSYCL/intel-fpga-no-global-work-offset.cpp index a4d93478134a1..a2d33c2ac2932 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-no-global-work-offset.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-no-global-work-offset.cpp @@ -1,28 +1,49 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; class Foo { public: [[intel::no_global_work_offset(1)]] void operator()() const {} }; -template -__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { - kernelFunc(); -} +template +class Functor { +public: + [[intel::no_global_work_offset(SIZE)]] void operator()() const {} +}; + +template +[[intel::no_global_work_offset(N)]] void func() {} + +int main() { + q.submit([&](handler &h) { + Foo boo; + h.single_task(boo); + + h.single_task( + []() [[intel::no_global_work_offset]]{}); -void bar() { - Foo boo; - kernel(boo); + h.single_task( + []() [[intel::no_global_work_offset(0)]]{}); - kernel( - []() [[intel::no_global_work_offset]]{}); + Functor<1> f; + h.single_task(f); - kernel( - []() [[intel::no_global_work_offset(0)]]{}); + h.single_task([]() { + func<1>(); + }); + }); + return 0; } -// CHECK: define spir_kernel void @{{.*}}kernel_name1() {{.*}} !no_global_work_offset ![[NUM5:[0-9]+]] -// CHECK: define spir_kernel void @{{.*}}kernel_name2() {{.*}} !no_global_work_offset ![[NUM5]] -// CHECK: define spir_kernel void @{{.*}}kernel_name3() {{.*}} ![[NUM4:[0-9]+]] +// CHECK: define spir_kernel void @{{.*}}kernel_name1"() #0 {{.*}} !no_global_work_offset ![[NUM5:[0-9]+]] +// CHECK: define spir_kernel void @{{.*}}kernel_name2"() #0 {{.*}} !no_global_work_offset ![[NUM5]] +// CHECK: define spir_kernel void @{{.*}}kernel_name3"() #0 {{.*}} ![[NUM4:[0-9]+]] +// CHECK: define spir_kernel void @{{.*}}kernel_name4"() #0 {{.*}} !no_global_work_offset ![[NUM5]] +// CHECK: define spir_kernel void @{{.*}}kernel_name5"() #0 {{.*}} !no_global_work_offset ![[NUM5]] // CHECK-NOT: ![[NUM4]] = !{i32 0} // CHECK: ![[NUM5]] = !{} diff --git a/clang/test/SemaSYCL/check-notdirect-attribute-propagation.cpp b/clang/test/SemaSYCL/check-notdirect-attribute-propagation.cpp index 777ef128123eb..d2ade080f86cc 100644 --- a/clang/test/SemaSYCL/check-notdirect-attribute-propagation.cpp +++ b/clang/test/SemaSYCL/check-notdirect-attribute-propagation.cpp @@ -46,7 +46,7 @@ void invoke_foo2() { // CHECK-LABEL: FunctionDecl {{.*}} invoke_foo2 'void ()' // CHECK: `-FunctionDecl {{.*}}KernelName 'void ()' // CHECK: -IntelReqdSubGroupSizeAttr {{.*}} - // CHECK: `-SYCLIntelNoGlobalWorkOffsetAttr {{.*}} Enabled + // CHECK: `-SYCLIntelNoGlobalWorkOffsetAttr {{.*}} parallel_for([]() {}); #else parallel_for([]() {}); // expected-error 2 {{conflicting attributes applied to a SYCL kernel or SYCL_EXTERNAL function}} diff --git a/clang/test/SemaSYCL/intel-fpga-no-global-work-offset.cpp b/clang/test/SemaSYCL/intel-fpga-no-global-work-offset.cpp index ff816237d6fb1..c8c54a6913587 100644 --- a/clang/test/SemaSYCL/intel-fpga-no-global-work-offset.cpp +++ b/clang/test/SemaSYCL/intel-fpga-no-global-work-offset.cpp @@ -1,51 +1,50 @@ -// RUN: %clang_cc1 -fsycl -fsycl-is-device -Wno-return-type -fcxx-exceptions -fsyntax-only -ast-dump -verify -pedantic %s | FileCheck %s +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -Wno-return-type -Wno-sycl-2017-compat -fcxx-exceptions -fsyntax-only -ast-dump -verify -pedantic %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; struct FuncObj { //expected-warning@+2 {{attribute 'intelfpga::no_global_work_offset' is deprecated}} //expected-note@+1 {{did you mean to use 'intel::no_global_work_offset' instead?}} - [[intelfpga::no_global_work_offset]] void operator()() {} + [[intelfpga::no_global_work_offset]] void operator()() const {} }; -template -void kernel(Func kernelFunc) { - kernelFunc(); -} - int main() { - // CHECK: SYCLIntelNoGlobalWorkOffsetAttr{{.*}}Enabled - kernel([]() { - FuncObj(); - }); - - // CHECK: SYCLIntelNoGlobalWorkOffsetAttr - // CHECK-NOT: Enabled - kernel( - []() [[intel::no_global_work_offset(0)]]{}); - - // CHECK: SYCLIntelNoGlobalWorkOffsetAttr{{.*}}Enabled - // expected-warning@+2{{'no_global_work_offset' attribute should be 0 or 1. Adjusted to 1}} - kernel( - []() [[intel::no_global_work_offset(42)]]{}); - - // expected-error@+2{{'no_global_work_offset' attribute requires a non-negative integral compile time constant expression}} - kernel( - []() [[intel::no_global_work_offset(-1)]]{}); - - // expected-error@+2{{'no_global_work_offset' attribute requires parameter 0 to be an integer constant}} - kernel( - []() [[intel::no_global_work_offset("foo")]]{}); - - kernel([]() { - // expected-error@+1{{'no_global_work_offset' attribute only applies to functions}} - [[intel::no_global_work_offset(1)]] int a; + q.submit([&](handler &h) { + // CHECK: SYCLIntelNoGlobalWorkOffsetAttr {{.*}} + // CHECK-NEXT: IntegerLiteral{{.*}}1{{$}} + h.single_task(FuncObj()); + + // CHECK: SYCLIntelNoGlobalWorkOffsetAttr {{.*}} + // CHECK-NEXT: IntegerLiteral{{.*}}0{{$}} + h.single_task( + []() [[intel::no_global_work_offset(0)]]{}); + + // CHECK: SYCLIntelNoGlobalWorkOffsetAttr{{.*}} + // CHECK-NEXT: IntegerLiteral{{.*}}42{{$}} + h.single_task( + []() [[intel::no_global_work_offset(42)]]{}); + + // CHECK: SYCLIntelNoGlobalWorkOffsetAttr{{.*}} + // CHECK-NEXT: UnaryOperator{{.*}} 'int' prefix '-' + // CHECK-NEXT-NEXT: IntegerLiteral{{.*}}1{{$}} + h.single_task( + []() [[intel::no_global_work_offset(-1)]]{}); + + // expected-error@+2{{'no_global_work_offset' attribute requires an integer constant}} + h.single_task( + []() [[intel::no_global_work_offset("foo")]]{}); + + h.single_task([]() { + // expected-error@+1{{'no_global_work_offset' attribute only applies to functions}} + [[intel::no_global_work_offset(1)]] int a; + }); + + // expected-warning@+2{{attribute 'no_global_work_offset' is already applied}} + h.single_task( + []() [[intel::no_global_work_offset(0), intel::no_global_work_offset(1)]]{}); }); - - // CHECK: SYCLIntelNoGlobalWorkOffsetAttr{{.*}} - // CHECK-NOT: Enabled - // CHECK: SYCLIntelNoGlobalWorkOffsetAttr{{.*}}Enabled - // expected-warning@+2{{attribute 'no_global_work_offset' is already applied}} - kernel( - []() [[intel::no_global_work_offset(0), intel::no_global_work_offset(1)]]{}); - return 0; } diff --git a/clang/test/SemaSYCL/redeclaration-attribute-propagation.cpp b/clang/test/SemaSYCL/redeclaration-attribute-propagation.cpp index 4b2777c1bb9dd..436f9b3186de2 100644 --- a/clang/test/SemaSYCL/redeclaration-attribute-propagation.cpp +++ b/clang/test/SemaSYCL/redeclaration-attribute-propagation.cpp @@ -1,8 +1,11 @@ -// RUN: %clang_cc1 %s -fsyntax-only -fsycl -fsycl-is-device -triple spir64 -Wno-sycl-2017-compat -verify -// RUN: %clang_cc1 %s -fsyntax-only -fsycl -fsycl-is-device -triple spir64 -DTRIGGER_ERROR -Wno-sycl-2017-compat -verify -// RUN: %clang_cc1 %s -fsyntax-only -ast-dump -fsycl -fsycl-is-device -triple spir64 -Wno-sycl-2017-compat | FileCheck %s +// RUN: %clang_cc1 %s -fsyntax-only -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64 -Wno-sycl-2017-compat -verify +// RUN: %clang_cc1 %s -fsyntax-only -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64 -DTRIGGER_ERROR -Wno-sycl-2017-compat -verify +// RUN: %clang_cc1 %s -fsyntax-only -ast-dump -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64 -Wno-sycl-2017-compat | FileCheck %s -#include "Inputs/sycl.hpp" +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; #ifndef TRIGGER_ERROR //first case - good case @@ -46,23 +49,26 @@ func4() {} // expected-error {{'max_work_group_size' attribute conflicts with '' #endif int main() { + q.submit([&](handler &h) { #ifndef TRIGGER_ERROR - // CHECK-LABEL: FunctionDecl {{.*}} main 'int ()' - // CHECK: `-FunctionDecl {{.*}}test_kernel1 'void ()' - // CHECK: -SYCLIntelMaxWorkGroupSizeAttr {{.*}} Inherited 4 4 4 - // CHECK: -SYCLIntelNoGlobalWorkOffsetAttr {{.*}} Inherited Enabled - // CHECK: `-ReqdWorkGroupSizeAttr {{.*}} 2 2 2 - cl::sycl::kernel_single_task( - []() { func1(); }); + // CHECK-LABEL: FunctionDecl {{.*}} main 'int ()' + // CHECK: `-FunctionDecl {{.*}}test_kernel1 'void ()' + // CHECK: -SYCLIntelMaxWorkGroupSizeAttr {{.*}} Inherited 4 4 4 + // CHECK: -SYCLIntelNoGlobalWorkOffsetAttr {{.*}} + // CHECK: `-ReqdWorkGroupSizeAttr {{.*}} 2 2 2 + h.single_task( + []() { func1(); }); #else - cl::sycl::kernel_single_task( - []() { func2(); }); // expected-error {{conflicting attributes applied to a SYCL kernel or SYCL_EXTERNAL function}} + h.single_task( + []() { func2(); }); // expected-error {{conflicting attributes applied to a SYCL kernel or SYCL_EXTERNAL function}} - cl::sycl::kernel_single_task( - []() { func3(); }); + h.single_task( + []() { func3(); }); - cl::sycl::kernel_single_task( - []() { func4(); }); + h.single_task( + []() { func4(); }); #endif + }); + return 0; } diff --git a/clang/test/SemaSYCL/sycl-device-intel-fpga-no-global-work-offset.cpp b/clang/test/SemaSYCL/sycl-device-intel-fpga-no-global-work-offset.cpp new file mode 100644 index 0000000000000..c6e2bb0475d28 --- /dev/null +++ b/clang/test/SemaSYCL/sycl-device-intel-fpga-no-global-work-offset.cpp @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -ast-dump -verify -pedantic %s | FileCheck %s + +// Test that checks template parameter support for 'no_global_work_offset' attribute on sycl device. + +// Test that checks wrong function template instantiation and ensures that the type +// is checked properly when instantiating from the template definition. +template +// expected-error@+1{{'no_global_work_offset' attribute requires an integer constant}} +[[intel::no_global_work_offset(Ty{})]] void func() {} + +struct S {}; +void var() { + //expected-note@+1{{in instantiation of function template specialization 'func' requested here}} + func(); +} + +// Test that checks expression is not a constant expression. +int foo(); +// expected-error@+1{{'no_global_work_offset' attribute requires an integer constant}} +[[intel::no_global_work_offset(foo() + 12)]] void func1(); + +// Test that checks expression is a constant expression. +constexpr int bar() { return 0; } +[[intel::no_global_work_offset(bar() + 12)]] void func2(); // OK + +// Test that checks template parameter suppport on member function of class template. +template +class KernelFunctor { +public: + [[intel::no_global_work_offset(SIZE)]] void operator()() {} +}; + +int main() { + KernelFunctor<1>(); +} + +// CHECK: ClassTemplateDecl {{.*}} {{.*}} KernelFunctor +// CHECK: ClassTemplateSpecializationDecl {{.*}} {{.*}} class KernelFunctor definition +// CHECK: CXXRecordDecl {{.*}} {{.*}} implicit class KernelFunctor +// CHECK: SYCLIntelNoGlobalWorkOffsetAttr {{.*}} +// CHECK: SubstNonTypeTemplateParmExpr {{.*}} +// CHECK-NEXT: NonTypeTemplateParmDecl {{.*}} +// CHECK-NEXT: IntegerLiteral{{.*}}1{{$}} + +// Test that checks template parameter suppport on function. +template +[[intel::no_global_work_offset(N)]] void func3() {} + +int check() { + func3<1>(); + return 0; +} + +// CHECK: FunctionTemplateDecl {{.*}} {{.*}} func3 +// CHECK: NonTypeTemplateParmDecl {{.*}} {{.*}} referenced 'int' depth 0 index 0 N +// CHECK: FunctionDecl {{.*}} {{.*}} func3 'void ()' +// CHECK: SYCLIntelNoGlobalWorkOffsetAttr {{.*}} +// CHECK: SubstNonTypeTemplateParmExpr {{.*}} +// CHECK-NEXT: NonTypeTemplateParmDecl {{.*}} +// CHECK-NEXT: IntegerLiteral{{.*}}1{{$}} From 74a68b7da4e78d13f2b95c7b12189bf038790161 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Thu, 17 Dec 2020 08:56:44 -0700 Subject: [PATCH 4/5] [SYCL] Adjust parallel-for range global size to improve group size selection (#2703) This change rounds up a parallel-for range to be a multiple of 32. This value can be changed later when we have better strategies for selecting work-group sizes. It works well for now. The rounding-up improves performance by 8-10x for the odd cases when the original range is a prime number. It has negligible performance impact cases where the range is already a multiple of 32. Signed-off-by: rdeodhar rajiv.deodhar@intel.com --- clang/include/clang/Sema/Sema.h | 6 + clang/lib/Sema/SemaSYCL.cpp | 128 ++++++++++-- clang/test/CodeGenSYCL/Inputs/sycl.hpp | 12 ++ .../test/CodeGenSYCL/kernel-by-reference.cpp | 4 +- .../CodeGenSYCL/parallel_for_this_item.cpp | 114 +++++++++++ sycl/doc/EnvironmentVariables.md | 2 + sycl/include/CL/sycl/detail/kernel_desc.hpp | 2 + sycl/include/CL/sycl/handler.hpp | 101 +++++++-- sycl/include/CL/sycl/id.hpp | 4 + sycl/include/CL/sycl/item.hpp | 3 + sycl/include/CL/sycl/range.hpp | 8 + .../parallel_for_range_roundup.cpp | 192 ++++++++++++++++++ 12 files changed, 550 insertions(+), 26 deletions(-) create mode 100755 clang/test/CodeGenSYCL/parallel_for_this_item.cpp create mode 100755 sycl/test/basic_tests/parallel_for_range_roundup.cpp diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9d6c5981bdf0b..d0a82c982c521 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -346,6 +346,9 @@ class SYCLIntegrationHeader { /// Registers a specialization constant to emit info for it into the header. void addSpecConstant(StringRef IDName, QualType IDType); + /// Notes that this_item is called within the kernel. + void setCallsThisItem(bool B); + private: // Kernel actual parameter descriptor. struct KernelParamDesc { @@ -382,6 +385,9 @@ class SYCLIntegrationHeader { /// Descriptor of kernel actual parameters. SmallVector Params; + // Whether kernel calls this_item() + bool CallsThisItem; + KernelDesc() = default; }; diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 41f0006fcde73..5c66ceee82128 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -99,10 +99,23 @@ class Util { /// \param Tmpl whether the class is template instantiation or simple record static bool isSyclType(const QualType &Ty, StringRef Name, bool Tmpl = false); + /// Checks whether given function is a standard SYCL API function with given + /// name. + /// \param FD the function being checked. + /// \param Name the function name to be checked against. + static bool isSyclFunction(const FunctionDecl *FD, StringRef Name); + /// Checks whether given clang type is a full specialization of the SYCL /// specialization constant class. static bool isSyclSpecConstantType(const QualType &Ty); + // Checks declaration context hierarchy. + /// \param DC the context of the item to be checked. + /// \param Scopes the declaration scopes leading from the item context to the + /// translation unit (excluding the latter) + static bool matchContext(const DeclContext *DC, + ArrayRef Scopes); + /// Checks whether given clang type is declared in the given hierarchy of /// declaration contexts. /// \param Ty the clang type being checked @@ -487,6 +500,21 @@ class MarkDeviceFunction : public RecursiveASTVisitor { FunctionDecl *FD = WorkList.back().first; FunctionDecl *ParentFD = WorkList.back().second; + // To implement rounding-up of a parallel-for range the + // SYCL header implementation modifies the kernel call like this: + // auto Wrapper = [=](TransformedArgType Arg) { + // if (Arg[0] >= NumWorkItems[0]) + // return; + // Arg.set_allowed_range(NumWorkItems); + // KernelFunc(Arg); + // }; + // + // This transformation leads to a condition where a kernel body + // function becomes callable from a new kernel body function. + // Hence this test. + if ((ParentFD == KernelBody) && isSYCLKernelBodyFunction(FD)) + KernelBody = FD; + if ((ParentFD == SYCLKernel) && isSYCLKernelBodyFunction(FD)) { assert(!KernelBody && "inconsistent call graph - only one kernel body " "function can be called"); @@ -2667,15 +2695,63 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { return !SemaRef.getASTContext().hasSameType(FD->getType(), Ty); } + // Sets a flag if the kernel is a parallel_for that calls the + // free function API "this_item". + void setThisItemIsCalled(const CXXRecordDecl *KernelObj, + FunctionDecl *KernelFunc) { + if (getKernelInvocationKind(KernelFunc) != InvokeParallelFor) + return; + + const CXXMethodDecl *WGLambdaFn = getOperatorParens(KernelObj); + if (!WGLambdaFn) + return; + + // The call graph for this translation unit. + CallGraph SYCLCG; + SYCLCG.addToCallGraph(SemaRef.getASTContext().getTranslationUnitDecl()); + using ChildParentPair = + std::pair; + llvm::SmallPtrSet Visited; + llvm::SmallVector WorkList; + WorkList.push_back({WGLambdaFn, nullptr}); + + while (!WorkList.empty()) { + const FunctionDecl *FD = WorkList.back().first; + WorkList.pop_back(); + if (!Visited.insert(FD).second) + continue; // We've already seen this Decl + + // Check whether this call is to sycl::this_item(). + if (Util::isSyclFunction(FD, "this_item")) { + Header.setCallsThisItem(true); + return; + } + + CallGraphNode *N = SYCLCG.getNode(FD); + if (!N) + continue; + + for (const CallGraphNode *CI : *N) { + if (auto *Callee = dyn_cast(CI->getDecl())) { + Callee = Callee->getMostRecentDecl(); + if (!Visited.count(Callee)) + WorkList.push_back({Callee, FD}); + } + } + } + } + public: static constexpr const bool VisitInsideSimpleContainers = false; SyclKernelIntHeaderCreator(Sema &S, SYCLIntegrationHeader &H, const CXXRecordDecl *KernelObj, QualType NameType, - StringRef Name, StringRef StableName) + StringRef Name, StringRef StableName, + FunctionDecl *KernelFunc) : SyclKernelFieldHandler(S), Header(H) { bool IsSIMDKernel = isESIMDKernelType(KernelObj); Header.startKernel(Name, NameType, StableName, KernelObj->getLocation(), IsSIMDKernel); + setThisItemIsCalled(KernelObj, KernelFunc); } bool handleSyclAccessorType(const CXXRecordDecl *RD, @@ -3123,7 +3199,7 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, SyclKernelIntHeaderCreator int_header( *this, getSyclIntegrationHeader(), KernelObj, calculateKernelNameType(Context, KernelCallerFunc), KernelName, - StableName); + StableName, KernelCallerFunc); KernelObjVisitor Visitor{*this}; Visitor.VisitRecordBases(KernelObj, kernel_decl, kernel_body, int_header); @@ -3842,6 +3918,9 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << " __SYCL_DLL_LOCAL\n"; O << " static constexpr bool isESIMD() { return " << K.IsESIMDKernel << "; }\n"; + O << " __SYCL_DLL_LOCAL\n"; + O << " static constexpr bool callsThisItem() { return "; + O << K.CallsThisItem << "; }\n"; O << "};\n"; CurStart += N; } @@ -3900,6 +3979,12 @@ void SYCLIntegrationHeader::addSpecConstant(StringRef IDName, QualType IDType) { SpecConsts.emplace_back(std::make_pair(IDType, IDName.str())); } +void SYCLIntegrationHeader::setCallsThisItem(bool B) { + KernelDesc *K = getCurKernelDesc(); + assert(K && "no kernels"); + K->CallsThisItem = B; +} + SYCLIntegrationHeader::SYCLIntegrationHeader(DiagnosticsEngine &_Diag, bool _UnnamedLambdaSupport, Sema &_S) @@ -3967,6 +4052,21 @@ bool Util::isSyclType(const QualType &Ty, StringRef Name, bool Tmpl) { return matchQualifiedTypeName(Ty, Scopes); } +bool Util::isSyclFunction(const FunctionDecl *FD, StringRef Name) { + if (!FD->isFunctionOrMethod() || !FD->getIdentifier() || + FD->getName().empty() || Name != FD->getName()) + return false; + + const DeclContext *DC = FD->getDeclContext(); + if (DC->isTranslationUnit()) + return false; + + std::array Scopes = { + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "cl"}, + Util::DeclContextDesc{clang::Decl::Kind::Namespace, "sycl"}}; + return matchContext(DC, Scopes); +} + bool Util::isAccessorPropertyListType(const QualType &Ty) { const StringRef &Name = "accessor_property_list"; std::array Scopes = { @@ -3977,21 +4077,15 @@ bool Util::isAccessorPropertyListType(const QualType &Ty) { return matchQualifiedTypeName(Ty, Scopes); } -bool Util::matchQualifiedTypeName(const QualType &Ty, - ArrayRef Scopes) { - // The idea: check the declaration context chain starting from the type +bool Util::matchContext(const DeclContext *Ctx, + ArrayRef Scopes) { + // The idea: check the declaration context chain starting from the item // itself. At each step check the context is of expected kind // (namespace) and name. - const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl(); - - if (!RecTy) - return false; // only classes/structs supported - const auto *Ctx = cast(RecTy); StringRef Name = ""; for (const auto &Scope : llvm::reverse(Scopes)) { clang::Decl::Kind DK = Ctx->getDeclKind(); - if (DK != Scope.first) return false; @@ -4005,7 +4099,7 @@ bool Util::matchQualifiedTypeName(const QualType &Ty, Name = cast(Ctx)->getName(); break; default: - llvm_unreachable("matchQualifiedTypeName: decl kind not supported"); + llvm_unreachable("matchContext: decl kind not supported"); } if (Name != Scope.second) return false; @@ -4013,3 +4107,13 @@ bool Util::matchQualifiedTypeName(const QualType &Ty, } return Ctx->isTranslationUnit(); } + +bool Util::matchQualifiedTypeName(const QualType &Ty, + ArrayRef Scopes) { + const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl(); + + if (!RecTy) + return false; // only classes/structs supported + const auto *Ctx = cast(RecTy); + return Util::matchContext(Ctx, Scopes); +} diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 72d1c284b39f2..0f71db428018a 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -118,6 +118,18 @@ struct id { int Data; }; +template struct item { + template + item(T... args) {} // fake constructor +private: + // Some fake field added to see using of item arguments in the + // kernel wrapper + int Data; +}; + +template item +this_item() { return item{}; } + template struct range { template diff --git a/clang/test/CodeGenSYCL/kernel-by-reference.cpp b/clang/test/CodeGenSYCL/kernel-by-reference.cpp index 6502cddf602d8..f5bbac0e75730 100644 --- a/clang/test/CodeGenSYCL/kernel-by-reference.cpp +++ b/clang/test/CodeGenSYCL/kernel-by-reference.cpp @@ -15,7 +15,7 @@ int simple_add(int i) { int main() { queue q; #if defined(SYCL2020) - // expected-warning@Inputs/sycl.hpp:286 {{Passing kernel functions by value is deprecated in SYCL 2020}} + // expected-warning@Inputs/sycl.hpp:298 {{Passing kernel functions by value is deprecated in SYCL 2020}} // expected-note@+3 {{in instantiation of function template specialization}} #endif q.submit([&](handler &h) { @@ -23,7 +23,7 @@ int main() { }); #if defined(SYCL2017) - // expected-warning@Inputs/sycl.hpp:281 {{Passing of kernel functions by reference is a SYCL 2020 extension}} + // expected-warning@Inputs/sycl.hpp:293 {{Passing of kernel functions by reference is a SYCL 2020 extension}} // expected-note@+3 {{in instantiation of function template specialization}} #endif q.submit([&](handler &h) { diff --git a/clang/test/CodeGenSYCL/parallel_for_this_item.cpp b/clang/test/CodeGenSYCL/parallel_for_this_item.cpp new file mode 100755 index 0000000000000..422a1bad33373 --- /dev/null +++ b/clang/test/CodeGenSYCL/parallel_for_this_item.cpp @@ -0,0 +1,114 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s -fsyntax-only +// RUN: FileCheck -input-file=%t.h %s + +// This test checks that compiler generates correct kernel description +// for parallel_for kernels that use the this_item API. + +// CHECK: __SYCL_INLINE_NAMESPACE(cl) { +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: namespace detail { + +// CHECK: static constexpr +// CHECK-NEXT: const char* const kernel_names[] = { +// CHECK-NEXT: "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3GNU", +// CHECK-NEXT: "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3EMU", +// CHECK-NEXT: "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3OWL", +// CHECK-NEXT: "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3RAT" +// CHECK-NEXT: }; + +// CHECK:template <> struct KernelInfo { +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr const char* getName() { return "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3GNU"; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr unsigned getNumParams() { return 0; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr const kernel_param_desc_t& getParamDesc(unsigned i) { +// CHECK-NEXT: return kernel_signatures[i+0]; +// CHECK-NEXT: } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr bool isESIMD() { return 0; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr bool callsThisItem() { return 0; } +// CHECK-NEXT:}; +// CHECK-NEXT:template <> struct KernelInfo { +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr const char* getName() { return "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3EMU"; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr unsigned getNumParams() { return 0; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr const kernel_param_desc_t& getParamDesc(unsigned i) { +// CHECK-NEXT: return kernel_signatures[i+0]; +// CHECK-NEXT: } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr bool isESIMD() { return 0; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr bool callsThisItem() { return 1; } +// CHECK-NEXT:}; +// CHECK-NEXT:template <> struct KernelInfo { +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr const char* getName() { return "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3OWL"; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr unsigned getNumParams() { return 0; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr const kernel_param_desc_t& getParamDesc(unsigned i) { +// CHECK-NEXT: return kernel_signatures[i+0]; +// CHECK-NEXT: } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr bool isESIMD() { return 0; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr bool callsThisItem() { return 0; } +// CHECK-NEXT:}; +// CHECK-NEXT:template <> struct KernelInfo { +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr const char* getName() { return "_ZTSZZ4mainENK3$_0clERN2cl4sycl7handlerEE3RAT"; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr unsigned getNumParams() { return 0; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr const kernel_param_desc_t& getParamDesc(unsigned i) { +// CHECK-NEXT: return kernel_signatures[i+0]; +// CHECK-NEXT: } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr bool isESIMD() { return 0; } +// CHECK-NEXT: __SYCL_DLL_LOCAL +// CHECK-NEXT: static constexpr bool callsThisItem() { return 1; } +// CHECK-NEXT:}; + +#include "sycl.hpp" + +using namespace cl::sycl; + +SYCL_EXTERNAL item<1> g() { return this_item<1>(); } +SYCL_EXTERNAL item<1> f() { return g(); } + +// This is a similar-looking this_item function but not the real one. +template item this_item(int i) { return item<1>{i}; } + +// This is a method named this_item but not the real one. +class C { +public: + template item this_item() { return item<1>{66}; }; +}; + +int main() { + queue myQueue; + myQueue.submit([&](::handler &cgh) { + // This kernel does not call sycl::this_item + cgh.parallel_for(range<1>(1), + [=](item<1> I) { this_item<1>(55); }); + + // This kernel calls sycl::this_item + cgh.parallel_for(range<1>(1), + [=](::item<1> I) { this_item<1>(); }); + + // This kernel does not call sycl::this_item + cgh.parallel_for(range<1>(1), [=](id<1> I) { + class C c; + c.this_item<1>(); + }); + + // This kernel calls sycl::this_item + cgh.parallel_for(range<1>(1), [=](id<1> I) { f(); }); + }); + + return 0; +} diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index adc9e6d0ab89f..2b3e7d6577879 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -29,6 +29,8 @@ subject to change. Do not rely on these variables in production code. | SYCL_PI_LEVEL_ZERO_MAX_COMMAND_LIST_CACHE | Positive integer | Maximum number of oneAPI Level Zero Command lists that can be allocated with no reuse before throwing an "out of resources" error. Default is 20000, threshold may be increased based on resource availabilty and workload demand. | | SYCL_PI_LEVEL_ZERO_DISABLE_USM_ALLOCATOR | Any(\*) | Disable USM allocator in Level Zero plugin (each memory request will go directly to Level Zero runtime) | | SYCL_PI_LEVEL_ZERO_BATCH_SIZE | Integer | Sets a preferred number of commands to batch into a command list before executing the command list. A value of 0 causes the batch size to be adjusted dynamically. A value greater than 0 specifies fixed size batching, with the batch size set to the specified value. The default is 0. | +| SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE | Any(\*) | Enables tracing of parallel_for invocations with rounded-up ranges. | +| SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING | Any(\*) | Disables automatic rounding-up of parallel_for invocation ranges. | `(*) Note: Any means this environment variable is effective when set to any non-null value.` diff --git a/sycl/include/CL/sycl/detail/kernel_desc.hpp b/sycl/include/CL/sycl/detail/kernel_desc.hpp index 5c53b67f46b93..f3bf02b1b1492 100644 --- a/sycl/include/CL/sycl/detail/kernel_desc.hpp +++ b/sycl/include/CL/sycl/detail/kernel_desc.hpp @@ -57,6 +57,7 @@ template struct KernelInfo { } static constexpr const char *getName() { return ""; } static constexpr bool isESIMD() { return 0; } + static constexpr bool callsThisItem() { return false; } }; #else template struct KernelInfoData { @@ -67,6 +68,7 @@ template struct KernelInfoData { } static constexpr const char *getName() { return ""; } static constexpr bool isESIMD() { return 0; } + static constexpr bool callsThisItem() { return false; } }; // C++14 like index_sequence and make_index_sequence diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 50169dbd116e2..117d700596072 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -123,6 +123,14 @@ template struct get_kernel_name_t { using name = Type; }; +// Used when parallel_for range is rounded-up. +template class __pf_kernel_wrapper; + +template struct get_kernel_wrapper_name_t { + using name = __pf_kernel_wrapper< + typename get_kernel_name_t::name>; +}; + template struct check_fn_signature { static_assert(std::integral_constant::value, "Second template parameter is required to be of function type"); @@ -745,23 +753,92 @@ class __SYCL_EXPORT handler { void parallel_for_lambda_impl(range NumWorkItems, KernelType KernelFunc) { throwIfActionIsCreated(); - using NameT = - typename detail::get_kernel_name_t::name; using LambdaArgType = sycl::detail::lambda_arg_type>; + + // If 1D kernel argument is an integral type, convert it to sycl::item<1> using TransformedArgType = - typename detail::conditional_t::value && - Dims == 1, - item, LambdaArgType>; + typename std::conditional::value && + Dims == 1, + item, LambdaArgType>::type; + using NameT = + typename detail::get_kernel_name_t::name; + + // The work group size preferred by this device. + // A reasonable choice for rounding up the range is 32. + constexpr size_t GoodLocalSizeX = 32; + + // Disable the rounding-up optimizations under these conditions: + // 1. The env var SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is set. + // 2. The string SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING is in + // the kernel name. + // 3. The kernel is provided via an interoperability method. + // 4. The API "this_item" is used inside the kernel. + // 5. The range is already a multiple of the rounding factor. + // + // Cases 3 and 4 could be supported with extra effort. + // As an optimization for the common case it is an + // implementation choice to not support those scenarios. + // Note that "this_item" is a free function, i.e. not tied to any + // specific id or item. When concurrent parallel_fors are executing + // on a device it is difficult to tell which parallel_for the call is + // being made from. One could replicate portions of the + // call-graph to make this_item calls kernel-specific but this is + // not considered worthwhile. + + // Get the kernal name to check condition 3. + std::string KName = typeid(NameT *).name(); + using KI = detail::KernelInfo; + bool DisableRounding = + (getenv("SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING") != nullptr) || + (KName.find("SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING") != + std::string::npos) || + (KI::getName() == nullptr || KI::getName()[0] == '\0') || + (KI::callsThisItem()); + + // Perform range rounding if rounding-up is enabled + // and the user-specified range is not a multiple of a "good" value. + if (!DisableRounding && NumWorkItems[0] % GoodLocalSizeX != 0) { + // It is sufficient to round up just the first dimension. + // Multiplying the rounded-up value of the first dimension + // by the values of the remaining dimensions (if any) + // will yield a rounded-up value for the total range. + size_t NewValX = + ((NumWorkItems[0] + GoodLocalSizeX - 1) / GoodLocalSizeX) * + GoodLocalSizeX; + using NameWT = typename detail::get_kernel_wrapper_name_t::name; + if (getenv("SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE") != nullptr) + std::cout << "parallel_for range adjusted from " << NumWorkItems[0] + << " to " << NewValX << std::endl; + auto Wrapper = [=](TransformedArgType Arg) { + if (Arg[0] >= NumWorkItems[0]) + return; + Arg.set_allowed_range(NumWorkItems); + KernelFunc(Arg); + }; + + range AdjustedRange = NumWorkItems; + AdjustedRange.set_range_dim0(NewValX); #ifdef __SYCL_DEVICE_ONLY__ - (void)NumWorkItems; - kernel_parallel_for(KernelFunc); + kernel_parallel_for(Wrapper); #else - detail::checkValueRange(NumWorkItems); - MNDRDesc.set(std::move(NumWorkItems)); - StoreLambda( - std::move(KernelFunc)); - MCGType = detail::CG::KERNEL; + detail::checkValueRange(AdjustedRange); + MNDRDesc.set(std::move(AdjustedRange)); + StoreLambda( + std::move(Wrapper)); + MCGType = detail::CG::KERNEL; #endif + } else { +#ifdef __SYCL_DEVICE_ONLY__ + (void)NumWorkItems; + kernel_parallel_for(KernelFunc); +#else + detail::checkValueRange(NumWorkItems); + MNDRDesc.set(std::move(NumWorkItems)); + StoreLambda( + std::move(KernelFunc)); + MCGType = detail::CG::KERNEL; +#endif + } } /// Defines and invokes a SYCL kernel function for the specified range. diff --git a/sycl/include/CL/sycl/id.hpp b/sycl/include/CL/sycl/id.hpp index 16d176b8b698d..151657aa661e8 100644 --- a/sycl/include/CL/sycl/id.hpp +++ b/sycl/include/CL/sycl/id.hpp @@ -239,6 +239,10 @@ template class id : public detail::array { __SYCL_GEN_OPT(^=) #undef __SYCL_GEN_OPT + +private: + friend class handler; + void set_allowed_range(range rnwi) { (void)rnwi[0]; } }; namespace detail { diff --git a/sycl/include/CL/sycl/item.hpp b/sycl/include/CL/sycl/item.hpp index 9d9a879815294..a8aa9c8ef09f5 100644 --- a/sycl/include/CL/sycl/item.hpp +++ b/sycl/include/CL/sycl/item.hpp @@ -118,6 +118,9 @@ template class item { friend class detail::Builder; private: + friend class handler; + void set_allowed_range(const range rnwi) { MImpl.MExtent = rnwi; } + detail::ItemBase MImpl; }; diff --git a/sycl/include/CL/sycl/range.hpp b/sycl/include/CL/sycl/range.hpp index 0fdfa3cb9c494..32337109f97a9 100644 --- a/sycl/include/CL/sycl/range.hpp +++ b/sycl/include/CL/sycl/range.hpp @@ -8,6 +8,7 @@ #pragma once #include +#include #include #include @@ -141,6 +142,13 @@ template class range : public detail::array { __SYCL_GEN_OPT(^=) #undef __SYCL_GEN_OPT + +private: + friend class handler; + friend class detail::Builder; + + // Adjust the first dim of the range + void set_range_dim0(const size_t dim0) { this->common_array[0] = dim0; } }; #ifdef __cpp_deduction_guides diff --git a/sycl/test/basic_tests/parallel_for_range_roundup.cpp b/sycl/test/basic_tests/parallel_for_range_roundup.cpp new file mode 100755 index 0000000000000..a4a8f45c2ae92 --- /dev/null +++ b/sycl/test/basic_tests/parallel_for_range_roundup.cpp @@ -0,0 +1,192 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE=1 %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER + +#include + +using namespace sycl; + +range<1> Range1 = {0}; +range<2> Range2 = {0, 0}; +range<3> Range3 = {0, 0, 0}; + +void check(const char *msg, size_t v, size_t ref) { + std::cout << msg << v << std::endl; + assert(v == ref); +} + +int try_item1(size_t size) { + range<1> Size{size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range1, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](item<1> ITEM) { + AccCounter[0].fetch_add(1); + AccRange[0] = ITEM.get_range(0); + }); + }); + myQueue.wait(); + } + check("Size seen by user = ", Range1.get(0), size); + check("Counter = ", Counter, size); + return 0; +} + +void try_item2(size_t size) { + range<2> Size{size, size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range2, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](item<2> ITEM) { + AccCounter[0].fetch_add(1); + AccRange[0][0] = ITEM.get_range(0); + }); + }); + myQueue.wait(); + } + check("Size seen by user = ", Range2.get(0), size); + check("Counter = ", Counter, size * size); +} + +void try_item3(size_t size) { + range<3> Size{size, size, size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range3, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](item<3> ITEM) { + AccCounter[0].fetch_add(1); + AccRange[0][0] = ITEM.get_range(0); + }); + }); + myQueue.wait(); + } + check("Size seen by user = ", Range3.get(0), size); + check("Counter = ", Counter, size * size * size); +} + +void try_id1(size_t size) { + range<1> Size{size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range1, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](id<1> ID) { + AccCounter[0].fetch_add(1); + AccRange[0] = ID[0]; + }); + }); + myQueue.wait(); + } + check("Counter = ", Counter, size); +} + +void try_id2(size_t size) { + range<2> Size{size, size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range2, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](id<2> ID) { + AccCounter[0].fetch_add(1); + AccRange[0][0] = ID[0]; + }); + }); + myQueue.wait(); + } + check("Counter = ", Counter, size * size); +} + +void try_id3(size_t size) { + range<3> Size{size, size, size}; + int Counter = 0; + { + buffer, 1> BufRange(&Range3, 1); + buffer BufCounter(&Counter, 1); + queue myQueue; + + myQueue.submit([&](handler &cgh) { + auto AccRange = BufRange.get_access(cgh); + auto AccCounter = BufCounter.get_access(cgh); + cgh.parallel_for(Size, [=](id<3> ID) { + AccCounter[0].fetch_add(1); + AccRange[0][0] = ID[0]; + }); + }); + myQueue.wait(); + } + check("Counter = ", Counter, size * size * size); +} + +int main() { + int x; + + x = 10; + try_item1(x); + try_item2(x); + try_item3(x); + try_id1(x); + try_id2(x); + try_id3(x); + + x = 256; + try_item1(x); + try_item2(x); + try_item3(x); + try_id1(x); + try_id2(x); + try_id3(x); + + return 0; +} + +// CHECK: parallel_for range adjusted from 10 to 32 +// CHECK-NEXT: Size seen by user = 10 +// CHECK-NEXT: Counter = 10 +// CHECK-NEXT: parallel_for range adjusted from 10 to 32 +// CHECK-NEXT: Size seen by user = 10 +// CHECK-NEXT: Counter = 100 +// CHECK-NEXT: parallel_for range adjusted from 10 to 32 +// CHECK-NEXT: Size seen by user = 10 +// CHECK-NEXT: Counter = 1000 +// CHECK-NEXT: parallel_for range adjusted from 10 to 32 +// CHECK-NEXT: Counter = 10 +// CHECK-NEXT: parallel_for range adjusted from 10 to 32 +// CHECK-NEXT: Counter = 100 +// CHECK-NEXT: parallel_for range adjusted from 10 to 32 +// CHECK-NEXT: Counter = 1000 +// CHECK-NEXT: Size seen by user = 256 +// CHECK-NEXT: Counter = 256 +// CHECK-NEXT: Size seen by user = 256 +// CHECK-NEXT: Counter = 65536 +// CHECK-NEXT: Size seen by user = 256 +// CHECK-NEXT: Counter = 16777216 +// CHECK-NEXT: Counter = 256 +// CHECK-NEXT: Counter = 65536 +// CHECK-NEXT: Counter = 16777216 From d4251e3c55e7b285950e6cf6eba6e874498376b7 Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Thu, 17 Dec 2020 20:08:17 +0300 Subject: [PATCH 5/5] [SYCL] Fix handling of multiple usages of composite spec constants (#2894) Fixed the issue that instead of re-using previously assigned IDs for elements of a composite spec constant, all elements used the same ID, which was taken from the last element of the composite. --- .../multiple-composite-spec-const-usages-2.ll | 78 +++++++++++++++++++ llvm/tools/sycl-post-link/SpecConstants.cpp | 30 +++---- .../multiple-usages-of-composite.cpp | 76 ++++++++++++++++++ 3 files changed, 164 insertions(+), 20 deletions(-) create mode 100644 llvm/test/tools/sycl-post-link/multiple-composite-spec-const-usages-2.ll create mode 100644 sycl/test/on-device/spec_const/multiple-usages-of-composite.cpp diff --git a/llvm/test/tools/sycl-post-link/multiple-composite-spec-const-usages-2.ll b/llvm/test/tools/sycl-post-link/multiple-composite-spec-const-usages-2.ll new file mode 100644 index 0000000000000..f07c9b5c47a1c --- /dev/null +++ b/llvm/test/tools/sycl-post-link/multiple-composite-spec-const-usages-2.ll @@ -0,0 +1,78 @@ +; RUN: sycl-post-link -spec-const=rt --ir-output-only %s -S -o - \ +; RUN: | FileCheck %s --implicit-check-not __sycl_getCompositeSpecConstantValue +; +; This test is intended to check that sycl-post-link tool is capable of handling +; situations when the same composite specialization constants is used more than +; once. Unlike multiple-composite-spec-const-usages.ll test, this is a real life +; LLVM IR example +; +; CHECK-LABEL: @_ZTSN4test8kernel_tIfEE +; CHECK: %[[#X1:]] = call float @_Z20__spirv_SpecConstantif(i32 0, float 0 +; CHECK: %[[#Y1:]] = call float @_Z20__spirv_SpecConstantif(i32 1, float 0 +; CHECK: call {{.*}} @_Z29__spirv_SpecConstantCompositeff(float %[[#X1]], float %[[#Y1]]), !SYCL_SPEC_CONST_SYM_ID ![[#ID:]] +; CHECK-LABEL: @_ZTSN4test8kernel_tIiEE +; CHECK: %[[#X2:]] = call float @_Z20__spirv_SpecConstantif(i32 0, float 0 +; CHECK: %[[#Y2:]] = call float @_Z20__spirv_SpecConstantif(i32 1, float 0 +; CHECK: call {{.*}} @_Z29__spirv_SpecConstantCompositeff(float %[[#X2]], float %[[#Y2]]), !SYCL_SPEC_CONST_SYM_ID ![[#ID]] +; CHECK: ![[#ID]] = !{!"_ZTS11sc_kernel_t", i32 0, i32 1} + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown-sycldevice" + +%"struct._ZTSN4test5pod_tE.test::pod_t" = type { float, float } + +$_ZTSN4test8kernel_tIfEE = comdat any + +$_ZTSN4test8kernel_tIiEE = comdat any + +@__builtin_unique_stable_name._ZNK2cl4sycl6ONEAPI12experimental13spec_constantIN4test5pod_tE11sc_kernel_tE3getIS5_EENSt9enable_ifIXaasr3std8is_classIT_EE5valuesr3std6is_podISA_EE5valueESA_E4typeEv = private unnamed_addr addrspace(1) constant [18 x i8] c"_ZTS11sc_kernel_t\00", align 1 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSN4test8kernel_tIfEE() local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: + %ref.tmp.i = alloca %"struct._ZTSN4test5pod_tE.test::pod_t", align 4 + %0 = bitcast %"struct._ZTSN4test5pod_tE.test::pod_t"* %ref.tmp.i to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #3 + %1 = addrspacecast %"struct._ZTSN4test5pod_tE.test::pod_t"* %ref.tmp.i to %"struct._ZTSN4test5pod_tE.test::pod_t" addrspace(4)* + call spir_func void @_Z36__sycl_getCompositeSpecConstantValueIN4test5pod_tEET_PKc(%"struct._ZTSN4test5pod_tE.test::pod_t" addrspace(4)* sret(%"struct._ZTSN4test5pod_tE.test::pod_t") align 4 %1, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([18 x i8], [18 x i8] addrspace(1)* @__builtin_unique_stable_name._ZNK2cl4sycl6ONEAPI12experimental13spec_constantIN4test5pod_tE11sc_kernel_tE3getIS5_EENSt9enable_ifIXaasr3std8is_classIT_EE5valuesr3std6is_podISA_EE5valueESA_E4typeEv, i64 0, i64 0) to i8 addrspace(4)*)) #4 + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #3 + ret void +} + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: convergent +declare dso_local spir_func void @_Z36__sycl_getCompositeSpecConstantValueIN4test5pod_tEET_PKc(%"struct._ZTSN4test5pod_tE.test::pod_t" addrspace(4)* sret(%"struct._ZTSN4test5pod_tE.test::pod_t") align 4, i8 addrspace(4)*) local_unnamed_addr #2 + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @_ZTSN4test8kernel_tIiEE() local_unnamed_addr #0 comdat !kernel_arg_buffer_location !4 { +entry: + %ref.tmp.i = alloca %"struct._ZTSN4test5pod_tE.test::pod_t", align 4 + %0 = bitcast %"struct._ZTSN4test5pod_tE.test::pod_t"* %ref.tmp.i to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) #3 + %1 = addrspacecast %"struct._ZTSN4test5pod_tE.test::pod_t"* %ref.tmp.i to %"struct._ZTSN4test5pod_tE.test::pod_t" addrspace(4)* + call spir_func void @_Z36__sycl_getCompositeSpecConstantValueIN4test5pod_tEET_PKc(%"struct._ZTSN4test5pod_tE.test::pod_t" addrspace(4)* sret(%"struct._ZTSN4test5pod_tE.test::pod_t") align 4 %1, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([18 x i8], [18 x i8] addrspace(1)* @__builtin_unique_stable_name._ZNK2cl4sycl6ONEAPI12experimental13spec_constantIN4test5pod_tE11sc_kernel_tE3getIS5_EENSt9enable_ifIXaasr3std8is_classIT_EE5valuesr3std6is_podISA_EE5valueESA_E4typeEv, i64 0, i64 0) to i8 addrspace(4)*)) #4 + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #3 + ret void +} + +attributes #0 = { convergent norecurse "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="repro-1.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nofree nosync nounwind willreturn } +attributes #2 = { convergent "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } +attributes #4 = { convergent } + +!llvm.module.flags = !{!0} +!opencl.spir.version = !{!1} +!spirv.Source = !{!2} +!llvm.ident = !{!3} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, i32 2} +!2 = !{i32 4, i32 100000} +!3 = !{!"clang version 12.0.0 (/data/github.com/intel/llvm/clang 9b7086f7cef079b80ac5e137394f8d77d5d49c3e)"} +!4 = !{} diff --git a/llvm/tools/sycl-post-link/SpecConstants.cpp b/llvm/tools/sycl-post-link/SpecConstants.cpp index 6bdd4997fe241..d9a3168799c80 100644 --- a/llvm/tools/sycl-post-link/SpecConstants.cpp +++ b/llvm/tools/sycl-post-link/SpecConstants.cpp @@ -318,12 +318,7 @@ Instruction *emitSpecConstantComposite(Type *Ty, /// first ID. If \c IsNewSpecConstant is false, this vector is expected to /// contain enough elements to assign ID to each scalar element encountered in /// the specified composite type. -/// @param IsNewSpecConstant [in] Flag to specify whether \c IDs vector should -/// be filled with new IDs or it should be used as-is to replicate an existing -/// spec constant -/// @param [in,out] IsFirstElement Flag indicating whether this function is -/// handling the first scalar element encountered in the specified composite -/// type \c Ty or not. +/// @param [in,out] Index Index of scalar element within a composite type /// /// @returns Instruction* representing specialization constant in LLVM IR, which /// is in SPIR-V friendly LLVM IR form. @@ -335,22 +330,20 @@ Instruction *emitSpecConstantComposite(Type *Ty, /// encountered scalars and assigns them IDs (or re-uses existing ones). Instruction *emitSpecConstantRecursiveImpl(Type *Ty, Instruction *InsertBefore, SmallVectorImpl &IDs, - bool IsNewSpecConstant, - bool &IsFirstElement) { + unsigned &Index) { if (!Ty->isArrayTy() && !Ty->isStructTy() && !Ty->isVectorTy()) { // Scalar - if (IsNewSpecConstant && !IsFirstElement) { + if (Index >= IDs.size()) { // If it is a new specialization constant, we need to generate IDs for // scalar elements, starting with the second one. IDs.push_back(IDs.back() + 1); } - IsFirstElement = false; - return emitSpecConstant(IDs.back(), Ty, InsertBefore); + return emitSpecConstant(IDs[Index++], Ty, InsertBefore); } SmallVector Elements; auto LoopIteration = [&](Type *Ty) { - Elements.push_back(emitSpecConstantRecursiveImpl( - Ty, InsertBefore, IDs, IsNewSpecConstant, IsFirstElement)); + Elements.push_back( + emitSpecConstantRecursiveImpl(Ty, InsertBefore, IDs, Index)); }; if (auto *ArrTy = dyn_cast(Ty)) { @@ -374,11 +367,9 @@ Instruction *emitSpecConstantRecursiveImpl(Type *Ty, Instruction *InsertBefore, /// Wrapper intended to hide IsFirstElement argument from the caller Instruction *emitSpecConstantRecursive(Type *Ty, Instruction *InsertBefore, - SmallVectorImpl &IDs, - bool IsNewSpecConstant) { - bool IsFirstElement = true; - return emitSpecConstantRecursiveImpl(Ty, InsertBefore, IDs, IsNewSpecConstant, - IsFirstElement); + SmallVectorImpl &IDs) { + unsigned Index = 0; + return emitSpecConstantRecursiveImpl(Ty, InsertBefore, IDs, Index); } } // namespace @@ -446,8 +437,7 @@ PreservedAnalyses SpecConstantsPass::run(Module &M, // 3. Transform to spirv intrinsic _Z*__spirv_SpecConstant* or // _Z*__spirv_SpecConstantComposite - auto *SPIRVCall = - emitSpecConstantRecursive(SCTy, CI, IDs, IsNewSpecConstant); + auto *SPIRVCall = emitSpecConstantRecursive(SCTy, CI, IDs); if (IsNewSpecConstant) { // emitSpecConstantRecursive might emit more than one spec constant // (because of composite types) and therefore, we need to ajudst diff --git a/sycl/test/on-device/spec_const/multiple-usages-of-composite.cpp b/sycl/test/on-device/spec_const/multiple-usages-of-composite.cpp new file mode 100644 index 0000000000000..6589e00f3f6e0 --- /dev/null +++ b/sycl/test/on-device/spec_const/multiple-usages-of-composite.cpp @@ -0,0 +1,76 @@ +// UNSUPPORTED: cuda +// +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %RUN_ON_HOST %t.out | FileCheck %s +// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER +// +// The test checks that multiple usages of the same specialization constant +// works correctly: toolchain processes them correctly and runtime can +// correctly execute the program. +// +// CHECK: --------> 1 + +#include + +using namespace cl::sycl; + +class sc_kernel_t; + +namespace test { + +struct pod_t { + float x; + float y; +}; + +template class kernel_t { +public: + using sc_t = sycl::ONEAPI::experimental::spec_constant; + + kernel_t(const sc_t &sc, cl::sycl::stream &strm) : sc_(sc), strm_(strm) {} + + void operator()(cl::sycl::id<1> i) const { + strm_ << "--------> " << sc_.get().x << sycl::endl; + } + + sc_t sc_; + cl::sycl::stream strm_; +}; + +template class kernel_driver_t { +public: + void execute(const pod_t &pod) { + device dev = sycl::device(default_selector{}); + context ctx = context(dev); + queue q(dev); + + cl::sycl::program p(q.get_context()); + auto sc = p.set_spec_constant(pod); + p.build_with_kernel_type>(); + + q.submit([&](cl::sycl::handler &cgh) { + cl::sycl::stream strm(1024, 256, cgh); + kernel_t func(sc, strm); + + auto sycl_kernel = p.get_kernel>(); + cgh.parallel_for(sycl_kernel, cl::sycl::range<1>(1), func); + }); + q.wait(); + } +}; + +template class kernel_driver_t; + +// The line below instantiates the second use of the spec constant named +// `sc_kernel_t`, which used to corrupt the spec constant content +template class kernel_driver_t; +} // namespace test + +int main() { + test::pod_t pod = {1, 2}; + test::kernel_driver_t kd_float; + kd_float.execute(pod); + + return 0; +}