AcademySoftwareFoundation · lgritz · Mar 8, 2022 · Mar 5, 2022 · Mar 5, 2022 · Mar 5, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -542,6 +542,63 @@ jobs:
             build/*.cmake
             build/testsuite/*/*.*
 
+  icx-vp2022:
+    name: "Linux VFXP-2022 icx/C++17 llvm13 py3.9 boost1.76 exr3.1 oiio2.3 avx2"
+    runs-on: ubuntu-latest
+    container:
+      image: aswftesting/ci-osl:2022-clang13
+    env:
+      CXX: icpx
+      CC: icx
+      CMAKE_CXX_STANDARD: 17
+      OPENIMAGEIO_VERSION: master
+      PYBIND11_VERSION: v2.9.0
+      PYTHON_VERSION: 3.9
+      USE_BATCHED: b8_AVX2_noFMA
+      USE_SIMD: avx2,f16c
+      OSL_CMAKE_FLAGS: -DSTOP_ON_WARNING=OFF
+      USE_OPENVDB: 0
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Prepare ccache timestamp
+        id: ccache_cache_keys
+        shell: bash
+        run: |
+          echo "::set-output name=date::`date -u +'%Y-%m-%dT%H:%M:%SZ'`"
+      - name: ccache
+        id: ccache
+        uses: actions/cache@v2
+        with:
+          path: /tmp/ccache
+          key: ${{ github.job }}-${{ steps.ccache_cache_keys.outputs.date }}
+          restore-keys: |
+            ${{ github.job }}-
+      - name: Build setup
+        run: |
+            src/build-scripts/ci-startup.bash
+      - name: Dependencies
+        run: |
+            sudo rm -rf /usr/local/include/OpenImageIO
+            sudo rm -rf /usr/local/lib*/cmake/OpenImageIO
+            sudo rm -rf /usr/local/lib*/libOpenImageIO*
+            sudo rm -rf /usr/local/lib*/python3.9/site-packages/OpenImageIO*
+            src/build-scripts/gh-installdeps.bash
+      - name: Build
+        run: |
+            src/build-scripts/ci-build.bash
+      - name: Testsuite
+        run: |
+            src/build-scripts/ci-test.bash
+      - uses: actions/upload-artifact@v2
+        if: failure()
+        with:
+          name: osl-${{github.job}}
+          path: |
+            build/CMake*
+            build/*.cmake
+            build/testsuite/*/*.*
+
   linux-latest-release:
     # Test against development master for relevant dependencies, latest
     # supported releases of everything else.

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -175,6 +175,21 @@ include_directories (
   )
 
 
+# Make sure our math calculations are consistent,
+# especially division.  Different compilers may have choose to use
+# reciprocal division losing precision causing slightly different
+# results which can lead to aliasing differences when running the testsuite
+# Disable reciprocal division
+if (CMAKE_COMPILER_IS_INTEL)
+    if (MSVC)
+        add_compile_options("/Qprec-div")
+    else ()
+        add_compile_options("-prec-div")
+    endif ()
+elseif (CMAKE_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_APPLECLANG OR CMAKE_COMPILER_IS_INTELCLANG)
+    add_compile_options("-fno-reciprocal-math")
+endif ()
+
 # Tell CMake to process the sub-directories
 add_subdirectory (src/include)
 add_subdirectory (src/liboslcomp)

diff --git a/src/build-scripts/gh-installdeps.bash b/src/build-scripts/gh-installdeps.bash
@@ -40,11 +40,18 @@ if [[ "$ASWF_ORG" != ""  ]] ; then
         popd
     fi
 
-    if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" ]] ; then
+    if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" || "$CXX" == "icpx" || "$CC" == "icx" || "$USE_ICX" != "" ]] ; then
         sudo cp src/build-scripts/oneAPI.repo /etc/yum.repos.d
         sudo yum install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic
         set +e; source /opt/intel/oneapi/setvars.sh; set -e
-        icpc --version
+        if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" ]] ; then
+            echo "Verifying installation of Intel(r) C++ Compiler:"
+            icpc --version
+        fi
+        if [[ "$CXX" == "icpx" || "$CC" == "icx" || "$USE_ICX" != "" ]] ; then
+            echo "Verifying installation of Intel(r) oneAPI DPC++/C++ Compiler:"
+            icpx --version
+        fi
     fi
 
 else
@@ -80,14 +87,19 @@ else
         time sudo apt-get install -y g++-11
     fi
 
-    if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" ]] ; then
+    if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" || "$CXX" == "icpx" || "$CC" == "icx" || "$USE_ICX" != "" ]] ; then
         wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
         sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
         echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
         time sudo apt-get update
         time sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic
         set +e; source /opt/intel/oneapi/setvars.sh; set -e
-        icpc --version
+        if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" ]] ; then
+            icpc --version
+        fi
+        if [[ "$CXX" == "icpx" || "$CC" == "icx" || "$USE_ICX" != "" ]] ; then
+            icpx --version
+        fi
     fi
 
     # Nonstandard python versions

diff --git a/src/cmake/compiler.cmake b/src/cmake/compiler.cmake
@@ -51,7 +51,8 @@ else ()
     set (GCC_VERSION 0)
 endif ()
 
-if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER MATCHES "[Cc]lang")
+if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER MATCHES "[Cc]lang"
+    OR CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
     # If using any flavor of clang, set CMAKE_COMPILER_IS_CLANG. If it's
     # Apple's variety, set CMAKE_COMPILER_IS_APPLECLANG and
     # APPLECLANG_VERSION_STRING, otherwise for generic clang set
@@ -65,6 +66,12 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER MATCHES "[Cc]lan
         if (VERBOSE)
             message (STATUS "The compiler is Clang: ${CMAKE_CXX_COMPILER_ID} version ${APPLECLANG_VERSION_STRING}")
         endif ()
+    elseif (CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
+        set (CMAKE_COMPILER_IS_INTELCLANG 1)
+        string (REGEX REPLACE ".* version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION_STRING ${clang_full_version_string})
+        if (VERBOSE)
+            message (STATUS "The compiler is Intel Clang: ${CMAKE_CXX_COMPILER_ID} version ${CLANG_VERSION_STRING}")
+        endif ()
     else ()
         string (REGEX REPLACE ".* version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION_STRING ${clang_full_version_string})
         if (VERBOSE)

diff --git a/src/include/OSL/Imathx/Imathx.h b/src/include/OSL/Imathx/Imathx.h
@@ -246,7 +246,7 @@ affineInverse(const Matrix44 &m)
 // differently than the LLVM IR version.
 // NOTE:  only using "inline" to get ODR (One Definition Rule) behavior
 static inline OSL_HOSTDEVICE Matrix44
-#if !OSL_INTEL_COMPILER
+#if !OSL_INTEL_CLASSIC_COMPILER_VERSION
     OSL_GNUC_ATTRIBUTE(optimize("fp-contract=off"))
 #endif
 nonAffineInverse(const Matrix44 &source);

diff --git a/src/include/OSL/mask.h b/src/include/OSL/mask.h
@@ -21,7 +21,7 @@ OSL_NAMESPACE_ENTER
 using std::popcount;
 using std::countr_zero;
 
-#elif OSL_INTEL_COMPILER
+#elif OSL_INTEL_CLASSIC_COMPILER_VERSION
 
 OSL_FORCEINLINE int popcount(uint32_t x) noexcept { return _mm_popcnt_u32(x);}
 OSL_FORCEINLINE int popcount(uint64_t x) noexcept { return _mm_popcnt_u64(x); }

diff --git a/src/include/OSL/oslnoise.h b/src/include/OSL/oslnoise.h
@@ -640,7 +640,7 @@ OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<float> select(const bool b, const Dual2<flo
     // versus requiring a stack location.
     // Without this work per component, gathers & scatters were being emitted
     // when used inside SIMD loops.
-#if OSL_NON_INTEL_CLANG
+#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION
     // Clang's vectorizor was really insistent that a select operation could not be replaced
     // with control flow, so had to re-introduce the ? operator to make it happy
     return Dual2<float> (
@@ -2247,7 +2247,7 @@ OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<Vec3> &result, const H &hash,
 
     // With Dual2<Vec3> data types, a lot of code is generated below
     // which caused some runaway compiler memory consumption when vectorizing
-#if !OSL_INTEL_COMPILER
+#if !OSL_INTEL_CLASSIC_COMPILER_VERSION
     auto l_result = OIIO::lerp (
                OIIO::trilerp (grad (hash (X  , Y  , Z  , W  ), fx     , fy     , fz     , fw     ),
                               grad (hash (X+1, Y  , Z  , W  ), fx-1.0f, fy     , fz     , fw     ),

diff --git a/src/include/OSL/platform.h b/src/include/OSL/platform.h
@@ -28,9 +28,10 @@
 
 // Notes:
 //   __GNUC__ is defined for gcc and all clang varieties
-//   __clang__ is defined for all clang varieties (generic and Apple)
+//   __clang__ is defined for all clang varieties (generic, Apple, and Intel)
 //   __apple_build_version__ is only defined for Apple clang
 //   __INTEL_COMPILER is defined only for icc
+//   __INTEL_LLVM_COMPILER is defined only for icx
 //   _MSC_VER is defined for MSVS compiler (not gcc/clang/icc even on Windows)
 //   _WIN32 is defined on Windows regardless of compiler
 //   __CUDACC__   is defined any time we are compiling a module for Cuda
@@ -51,7 +52,7 @@
 // 30402 for clang 3.4.2), or 0 if not a generic Clang release.
 // N.B. This will be 0 for the clang Apple distributes (which has different
 // version numbers entirely).
-#if defined(__clang__) && !defined(__apple_build_version__)
+#if defined(__clang__) && !defined(__apple_build_version__) && !defined(__INTEL_LLVM_COMPILER)
 #  define OSL_CLANG_VERSION (10000*__clang_major__ + 100*__clang_minor__ + __clang_patchlevel__)
 #else
 #  define OSL_CLANG_VERSION 0
@@ -60,27 +61,45 @@
 // Define OSL_APPLE_CLANG_VERSION to hold an encoded Apple Clang version
 // (e.g. 70002 for clang 7.0.2), or 0 if not an Apple Clang release.
 #if defined(__clang__) && defined(__apple_build_version__)
+#  if defined(__INTEL_LLVM_COMPILER)
+#    error Not expected for __INTEL_LLVM_COMPILER to be defined with an __apple_build_version__
+#  endif
 #  define OSL_APPLE_CLANG_VERSION (10000*__clang_major__ + 100*__clang_minor__ + __clang_patchlevel__)
 #else
 #  define OSL_APPLE_CLANG_VERSION 0
 #endif
+// The classic Intel(r) C++ Compiler on OSX may still define __clang__
+// combine with testing OSL_INTEL_CLASSIC_COMPILER_VERSION to further differentiate if needed.
 
-// Define OSL_INTEL_COMPILER to hold an encoded Intel compiler version
-// (e.g. 1900), or 0 if not an Intel compiler.
+// Define OSL_INTEL_CLASSIC_COMPILER_VERSION to hold an encoded Intel(r) C++ Compiler version
+// (e.g. 1900), or 0 if not an Intel(r) C++ Compiler.
 #if defined(__INTEL_COMPILER)
-#  define OSL_INTEL_COMPILER __INTEL_COMPILER
+#  define OSL_INTEL_CLASSIC_COMPILER_VERSION __INTEL_COMPILER
+#else
+#  define OSL_INTEL_CLASSIC_COMPILER_VERSION 0
+#endif
+
+// Define OSL_INTEL_LLVM_COMPILER_VERSION to hold an encoded Intel(r) LLVM Compiler version
+// (e.g. 20220000), or 0 if not an Intel(r) LLVM Compiler.
+// Define OSL_INTEL_CLANG_VERSION to hold the encoded Clang version the
+// Intel(r) LLVM Compiler is based on (e.g. 140000),
+// or 0 if not an Intel(r) LLVM compiler.
+#if defined(__INTEL_LLVM_COMPILER)
+#  define OSL_INTEL_LLVM_COMPILER_VERSION __INTEL_LLVM_COMPILER
+#  define OSL_INTEL_CLANG_VERSION (10000*__clang_major__ + 100*__clang_minor__ + __clang_patchlevel__)
 #else
-#  define OSL_INTEL_COMPILER 0
+#  define OSL_INTEL_LLVM_COMPILER_VERSION 0
+#  define OSL_INTEL_CLANG_VERSION 0
 #endif
 
-// Intel's compiler on OSX may still define __clang__
-// and we have need to know when using a true clang compiler
-#if !defined(__INTEL_COMPILER) && defined(__clang__)
-    #define OSL_NON_INTEL_CLANG  __clang__
+// Define OSL_ANY_CLANG to 0 or 1 to indicate if any Clang based compiler is in use
+#if defined(__clang__)
+#  define OSL_ANY_CLANG 1
 #else
-	#define OSL_NON_INTEL_CLANG  0
+#  define OSL_ANY_CLANG 0
 #endif
 
+
 // Tests for MSVS versions, always 0 if not MSVS at all.
 #if defined(_MSC_VER)
 #  define OSL_MSVS_AT_LEAST_2013 (_MSC_VER >= 1800)
@@ -243,6 +262,15 @@
     #define OSL_OMP_PRAGMA(aUnQuotedPragma)
 #endif
 
+#define OSL_OMP_SIMD_LOOP(...) OSL_OMP_PRAGMA(omp simd __VA_ARGS__)
+#if (OSL_GCCVERSION || OSL_INTEL_CLASSIC_COMPILER_VERSION)
+#   define OSL_OMP_COMPLEX_SIMD_LOOP(...) OSL_OMP_SIMD_LOOP(__VA_ARGS__)
+#else
+    // Ignore requests to vectorize complex/nested SIMD loops for certain
+    // compilers/versions that are not currently vectorizing complex loops.
+#   define OSL_OMP_COMPLEX_SIMD_LOOP(...)
+#endif
+
 // OSL_FORCEINLINE is a function attribute that attempts to make the
 // function always inline. On many compilers regular 'inline' is only
 // advisory. Put this attribute before the function return type, just like

diff --git a/src/include/OSL/sfmath.h b/src/include/OSL/sfmath.h
@@ -68,7 +68,7 @@ namespace sfm
        OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     */
 
-#if OSL_INTEL_COMPILER
+#if OSL_INTEL_CLASSIC_COMPILER_VERSION
     // std::isinf wasn't vectorizing and was branchy. This slightly
     // perturbed version fairs better and is branch free when vectorized
     // with the Intel compiler.
@@ -263,7 +263,7 @@ namespace sfm
         }
     }
 
-#if OSL_CLANG_VERSION && !OSL_INTEL_COMPILER
+#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION
 
     // To make clang's loop vectorizor happy
     // we need to make sure result of min and max

diff --git a/src/include/OSL/wide.h b/src/include/OSL/wide.h
@@ -1853,7 +1853,7 @@ struct WideImpl<const Dual2<ElementT>[], WidthT, true /*IsConstT */> {
 }  // namespace pvt
 
 
-#if OSL_INTEL_COMPILER || OSL_GNUC_VERSION
+#if OSL_INTEL_CLASSIC_COMPILER_VERSION || OSL_GNUC_VERSION
 // Workaround for error #3466: inheriting constructors must be inherited from a direct base class
 #    define __OSL_INHERIT_BASE_CTORS(DERIVED, BASE) \
         using Base = typename DERIVED::BASE;        \
@@ -3139,7 +3139,8 @@ template<typename DataT, int WidthT>
 OSL_FORCEINLINE bool
 testIfAnyLaneIsNonZero(const Wide<DataT, WidthT>& wvalues)
 {
-#if OSL_NON_INTEL_CLANG
+#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION \
+    && !OSL_INTEL_LLVM_COMPILER_VERSION
     int anyLaneIsOn = 0;
     OSL_OMP_PRAGMA(omp simd simdlen(WidthT) reduction(max : anyLaneIsOn))
     for (int i = 0; i < WidthT; ++i) {
@@ -3149,7 +3150,8 @@ testIfAnyLaneIsNonZero(const Wide<DataT, WidthT>& wvalues)
     return anyLaneIsOn;
 #else
     // NOTE: do not explicitly vectorize as it would require a
-    // reduction.  Instead let compiler optimize this itself.
+    // reduction.  Instead let compiler optimize/auto-vectorize this by itself
+    // which can produce less/better code than a full blown openmp reduction.
     bool anyLaneIsOn = false;
     for (int i = 0; i < WidthT; ++i) {
         if (wvalues[i] != DataT(0))

diff --git a/src/liboslexec/CMakeLists.txt b/src/liboslexec/CMakeLists.txt
@@ -97,6 +97,12 @@ set ( liboslexec_override_limits
    ../liboslnoise/wide/wide_gabor3_isotropic_enabled   
     )
 
+set ( liboslexec_require_INF_NaN
+    shadingsys.cpp
+    wide/wide_shadingsys
+    wide/wide_optest_float
+    )
+
 set (local_lib oslexec)
 set (lib_src
           shadingsys.cpp closure.cpp
@@ -144,6 +150,31 @@ FLEX_BISON ( ../liboslcomp/osllex.l ../liboslcomp/oslgram.y osl lib_src compiler
 
 set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS" )
 
+
+macro ( REQUIRE_INF_NAN SRC )
+    if (CMAKE_COMPILER_IS_INTELCLANG)
+        # if -ffast-math is enabled to to the underlying compiler, 
+        # inf's and NaN's may not be handled properly (by design)
+        if (MSVC)
+            set_property(SOURCE ${SRC} APPEND PROPERTY COMPILE_OPTIONS "/fp:precise")
+            #message (STATUS "SOURCE ${SRC} APPEND PROPERTY COMPILE_OPTIONS /fp:precise")
+        else ()
+            #set_property(SOURCE $SRC} APPEND PROPERTY COMPILE_OPTIONS "-fp-model=precise")
+            # Disabling fast-math should be enough to enable proper support of INF and NaN
+            set_property(SOURCE ${SRC} APPEND PROPERTY COMPILE_OPTIONS "-fno-fast-math")
+            #message (STATUS "SOURCE ${SRC} APPEND PROPERTY COMPILE_OPTIONS -fno-fast-math")
+        endif()
+    endif()
+endmacro ( )
+
+# Apply complilation options as needed
+foreach(src ${lib_src})
+    if (${src} IN_LIST liboslexec_require_INF_NaN)
+        set(SRC "${CMAKE_CURRENT_SOURCE_DIR}/${src}")
+        REQUIRE_INF_NAN ( ${SRC} )  
+    endif()
+endforeach(src)
+
 macro ( LLVM_COMPILE llvm_src srclist )
     get_filename_component ( llvmsrc_we ${llvm_src} NAME_WE )
     set ( llvm_asm "${CMAKE_CURRENT_BINARY_DIR}/${llvmsrc_we}.s" )
@@ -422,7 +453,11 @@ foreach(batched_target ${BATCHED_TARGET_LIST})
         # it as soon as clang addresses this issue.
         list (APPEND TARGET_CXX_OPTS "-fno-unroll-loops")
 
-        list (APPEND TARGET_CXX_OPTS "-fopenmp")
+        if (CMAKE_COMPILER_IS_INTELCLANG)
+            list (APPEND TARGET_CXX_OPTS "-qopenmp-simd")
+        else ()
+            list (APPEND TARGET_CXX_OPTS "-fopenmp")
+        endif ()
 
         # Uncomment next section to investigate failures in vectorization
         #list (APPEND TARGET_CXX_OPTS 
@@ -484,6 +519,11 @@ foreach(batched_target ${BATCHED_TARGET_LIST})
                 endif()
             endif()
         endif()
+
+        if (${target_src} IN_LIST liboslexec_require_INF_NaN)
+            REQUIRE_INF_NAN ( ${TARGET_SRC} ) 
+        endif()
+
     endforeach(target_src)
 
     add_library ( ${batched_target_lib} MODULE ${TARGET_LIB_SOURCES} )