Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pr/ci add icx #1473

Merged
merged 7 commits into from
Mar 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,63 @@ jobs:
build/*.cmake
build/testsuite/*/*.*

icx-vp2022:
name: "Linux VFXP-2022 icx/C++17 llvm13 py3.9 boost1.76 exr3.1 oiio2.3 avx2"
runs-on: ubuntu-latest
container:
image: aswftesting/ci-osl:2022-clang13
env:
CXX: icpx
CC: icx
CMAKE_CXX_STANDARD: 17
OPENIMAGEIO_VERSION: master
PYBIND11_VERSION: v2.9.0
PYTHON_VERSION: 3.9
USE_BATCHED: b8_AVX2_noFMA
USE_SIMD: avx2,f16c
OSL_CMAKE_FLAGS: -DSTOP_ON_WARNING=OFF
USE_OPENVDB: 0

steps:
- uses: actions/checkout@v2
- name: Prepare ccache timestamp
id: ccache_cache_keys
shell: bash
run: |
echo "::set-output name=date::`date -u +'%Y-%m-%dT%H:%M:%SZ'`"
- name: ccache
id: ccache
uses: actions/cache@v2
with:
path: /tmp/ccache
key: ${{ github.job }}-${{ steps.ccache_cache_keys.outputs.date }}
restore-keys: |
${{ github.job }}-
- name: Build setup
run: |
src/build-scripts/ci-startup.bash
- name: Dependencies
run: |
sudo rm -rf /usr/local/include/OpenImageIO
sudo rm -rf /usr/local/lib*/cmake/OpenImageIO
sudo rm -rf /usr/local/lib*/libOpenImageIO*
sudo rm -rf /usr/local/lib*/python3.9/site-packages/OpenImageIO*
src/build-scripts/gh-installdeps.bash
- name: Build
run: |
src/build-scripts/ci-build.bash
- name: Testsuite
run: |
src/build-scripts/ci-test.bash
- uses: actions/upload-artifact@v2
if: failure()
with:
name: osl-${{github.job}}
path: |
build/CMake*
build/*.cmake
build/testsuite/*/*.*

linux-latest-release:
# Test against development master for relevant dependencies, latest
# supported releases of everything else.
Expand Down
15 changes: 15 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,21 @@ include_directories (
)


# Make sure our math calculations are consistent,
# especially division. Different compilers may have choose to use
# reciprocal division losing precision causing slightly different
# results which can lead to aliasing differences when running the testsuite
# Disable reciprocal division
if (CMAKE_COMPILER_IS_INTEL)
if (MSVC)
add_compile_options("/Qprec-div")
else ()
add_compile_options("-prec-div")
endif ()
elseif (CMAKE_COMPILER_IS_CLANG OR CMAKE_COMPILER_IS_APPLECLANG OR CMAKE_COMPILER_IS_INTELCLANG)
add_compile_options("-fno-reciprocal-math")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this have performance implications we should be aware of? Is this something we should do all the time, or just for CI tests for the sake of uniformity of results across compilers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it has performance implications (as a reciprocal divide instruction combined with a multiply is faster).
Yes its just for sake of CI tests and the sake of uniformity of results across compilers!
There is a difference in default behavior among compilers, and hardware availability for the reciprocal divide.

For performance, I would do the reverse, explicitly enable reciprocal divides. However precision will be lost, perhaps more than a renderer would want.

As a programmer, I feel that the programmer probably should of written (1/divisor)*number instead of number/divisor if their algorithm would be OK with the loss in precision.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also tests themselves could be updated to be more resilient (example is using --center) and alternate results could be added as well. FMA's have similar issue but actually increase precision/correctness of results vs. a reciprocal divide which looses precision.

endif ()

# Tell CMake to process the sub-directories
add_subdirectory (src/include)
add_subdirectory (src/liboslcomp)
Expand Down
20 changes: 16 additions & 4 deletions src/build-scripts/gh-installdeps.bash
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,18 @@ if [[ "$ASWF_ORG" != "" ]] ; then
popd
fi

if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" ]] ; then
if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" || "$CXX" == "icpx" || "$CC" == "icx" || "$USE_ICX" != "" ]] ; then
sudo cp src/build-scripts/oneAPI.repo /etc/yum.repos.d
sudo yum install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic
set +e; source /opt/intel/oneapi/setvars.sh; set -e
icpc --version
if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" ]] ; then
echo "Verifying installation of Intel(r) C++ Compiler:"
icpc --version
fi
if [[ "$CXX" == "icpx" || "$CC" == "icx" || "$USE_ICX" != "" ]] ; then
echo "Verifying installation of Intel(r) oneAPI DPC++/C++ Compiler:"
icpx --version
fi
fi

else
Expand Down Expand Up @@ -80,14 +87,19 @@ else
time sudo apt-get install -y g++-11
fi

if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" ]] ; then
if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" || "$CXX" == "icpx" || "$CC" == "icx" || "$USE_ICX" != "" ]] ; then
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
time sudo apt-get update
time sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic
set +e; source /opt/intel/oneapi/setvars.sh; set -e
icpc --version
if [[ "$CXX" == "icpc" || "$CC" == "icc" || "$USE_ICC" != "" ]] ; then
icpc --version
fi
if [[ "$CXX" == "icpx" || "$CC" == "icx" || "$USE_ICX" != "" ]] ; then
icpx --version
fi
Comment on lines +97 to +102
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary -- see other note

Comment on lines +97 to +102
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary -- see other note

fi

# Nonstandard python versions
Expand Down
9 changes: 8 additions & 1 deletion src/cmake/compiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ else ()
set (GCC_VERSION 0)
endif ()

if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER MATCHES "[Cc]lang")
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER MATCHES "[Cc]lang"
OR CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
# If using any flavor of clang, set CMAKE_COMPILER_IS_CLANG. If it's
# Apple's variety, set CMAKE_COMPILER_IS_APPLECLANG and
# APPLECLANG_VERSION_STRING, otherwise for generic clang set
Expand All @@ -65,6 +66,12 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER MATCHES "[Cc]lan
if (VERBOSE)
message (STATUS "The compiler is Clang: ${CMAKE_CXX_COMPILER_ID} version ${APPLECLANG_VERSION_STRING}")
endif ()
elseif (CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
set (CMAKE_COMPILER_IS_INTELCLANG 1)
string (REGEX REPLACE ".* version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION_STRING ${clang_full_version_string})
if (VERBOSE)
message (STATUS "The compiler is Intel Clang: ${CMAKE_CXX_COMPILER_ID} version ${CLANG_VERSION_STRING}")
endif ()
else ()
string (REGEX REPLACE ".* version ([0-9]+\\.[0-9]+).*" "\\1" CLANG_VERSION_STRING ${clang_full_version_string})
if (VERBOSE)
Expand Down
2 changes: 1 addition & 1 deletion src/include/OSL/Imathx/Imathx.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ affineInverse(const Matrix44 &m)
// differently than the LLVM IR version.
// NOTE: only using "inline" to get ODR (One Definition Rule) behavior
static inline OSL_HOSTDEVICE Matrix44
#if !OSL_INTEL_COMPILER
#if !OSL_INTEL_CLASSIC_COMPILER_VERSION
OSL_GNUC_ATTRIBUTE(optimize("fp-contract=off"))
#endif
nonAffineInverse(const Matrix44 &source);
Expand Down
2 changes: 1 addition & 1 deletion src/include/OSL/mask.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ OSL_NAMESPACE_ENTER
using std::popcount;
using std::countr_zero;

#elif OSL_INTEL_COMPILER
#elif OSL_INTEL_CLASSIC_COMPILER_VERSION

OSL_FORCEINLINE int popcount(uint32_t x) noexcept { return _mm_popcnt_u32(x);}
OSL_FORCEINLINE int popcount(uint64_t x) noexcept { return _mm_popcnt_u64(x); }
Expand Down
4 changes: 2 additions & 2 deletions src/include/OSL/oslnoise.h
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ OSL_FORCEINLINE OSL_HOSTDEVICE Dual2<float> select(const bool b, const Dual2<flo
// versus requiring a stack location.
// Without this work per component, gathers & scatters were being emitted
// when used inside SIMD loops.
#if OSL_NON_INTEL_CLANG
#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION
// Clang's vectorizor was really insistent that a select operation could not be replaced
// with control flow, so had to re-introduce the ? operator to make it happy
return Dual2<float> (
Expand Down Expand Up @@ -2247,7 +2247,7 @@ OSL_FORCEINLINE OSL_HOSTDEVICE void perlin (Dual2<Vec3> &result, const H &hash,

// With Dual2<Vec3> data types, a lot of code is generated below
// which caused some runaway compiler memory consumption when vectorizing
#if !OSL_INTEL_COMPILER
#if !OSL_INTEL_CLASSIC_COMPILER_VERSION
auto l_result = OIIO::lerp (
OIIO::trilerp (grad (hash (X , Y , Z , W ), fx , fy , fz , fw ),
grad (hash (X+1, Y , Z , W ), fx-1.0f, fy , fz , fw ),
Expand Down
50 changes: 39 additions & 11 deletions src/include/OSL/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@

// Notes:
// __GNUC__ is defined for gcc and all clang varieties
// __clang__ is defined for all clang varieties (generic and Apple)
// __clang__ is defined for all clang varieties (generic, Apple, and Intel)
// __apple_build_version__ is only defined for Apple clang
// __INTEL_COMPILER is defined only for icc
// __INTEL_LLVM_COMPILER is defined only for icx
// _MSC_VER is defined for MSVS compiler (not gcc/clang/icc even on Windows)
// _WIN32 is defined on Windows regardless of compiler
// __CUDACC__ is defined any time we are compiling a module for Cuda
Expand All @@ -51,7 +52,7 @@
// 30402 for clang 3.4.2), or 0 if not a generic Clang release.
// N.B. This will be 0 for the clang Apple distributes (which has different
// version numbers entirely).
#if defined(__clang__) && !defined(__apple_build_version__)
#if defined(__clang__) && !defined(__apple_build_version__) && !defined(__INTEL_LLVM_COMPILER)
# define OSL_CLANG_VERSION (10000*__clang_major__ + 100*__clang_minor__ + __clang_patchlevel__)
#else
# define OSL_CLANG_VERSION 0
Expand All @@ -60,27 +61,45 @@
// Define OSL_APPLE_CLANG_VERSION to hold an encoded Apple Clang version
// (e.g. 70002 for clang 7.0.2), or 0 if not an Apple Clang release.
#if defined(__clang__) && defined(__apple_build_version__)
# if defined(__INTEL_LLVM_COMPILER)
# error Not expected for __INTEL_LLVM_COMPILER to be defined with an __apple_build_version__
# endif
# define OSL_APPLE_CLANG_VERSION (10000*__clang_major__ + 100*__clang_minor__ + __clang_patchlevel__)
#else
# define OSL_APPLE_CLANG_VERSION 0
#endif
// The classic Intel(r) C++ Compiler on OSX may still define __clang__
// combine with testing OSL_INTEL_CLASSIC_COMPILER_VERSION to further differentiate if needed.

// Define OSL_INTEL_COMPILER to hold an encoded Intel compiler version
// (e.g. 1900), or 0 if not an Intel compiler.
// Define OSL_INTEL_CLASSIC_COMPILER_VERSION to hold an encoded Intel(r) C++ Compiler version
// (e.g. 1900), or 0 if not an Intel(r) C++ Compiler.
#if defined(__INTEL_COMPILER)
# define OSL_INTEL_COMPILER __INTEL_COMPILER
# define OSL_INTEL_CLASSIC_COMPILER_VERSION __INTEL_COMPILER
#else
# define OSL_INTEL_CLASSIC_COMPILER_VERSION 0
#endif

// Define OSL_INTEL_LLVM_COMPILER_VERSION to hold an encoded Intel(r) LLVM Compiler version
// (e.g. 20220000), or 0 if not an Intel(r) LLVM Compiler.
// Define OSL_INTEL_CLANG_VERSION to hold the encoded Clang version the
// Intel(r) LLVM Compiler is based on (e.g. 140000),
// or 0 if not an Intel(r) LLVM compiler.
#if defined(__INTEL_LLVM_COMPILER)
# define OSL_INTEL_LLVM_COMPILER_VERSION __INTEL_LLVM_COMPILER
# define OSL_INTEL_CLANG_VERSION (10000*__clang_major__ + 100*__clang_minor__ + __clang_patchlevel__)
#else
# define OSL_INTEL_COMPILER 0
# define OSL_INTEL_LLVM_COMPILER_VERSION 0
# define OSL_INTEL_CLANG_VERSION 0
#endif

// Intel's compiler on OSX may still define __clang__
// and we have need to know when using a true clang compiler
#if !defined(__INTEL_COMPILER) && defined(__clang__)
#define OSL_NON_INTEL_CLANG __clang__
// Define OSL_ANY_CLANG to 0 or 1 to indicate if any Clang based compiler is in use
#if defined(__clang__)
# define OSL_ANY_CLANG 1
#else
#define OSL_NON_INTEL_CLANG 0
# define OSL_ANY_CLANG 0
#endif


// Tests for MSVS versions, always 0 if not MSVS at all.
#if defined(_MSC_VER)
# define OSL_MSVS_AT_LEAST_2013 (_MSC_VER >= 1800)
Expand Down Expand Up @@ -243,6 +262,15 @@
#define OSL_OMP_PRAGMA(aUnQuotedPragma)
#endif

#define OSL_OMP_SIMD_LOOP(...) OSL_OMP_PRAGMA(omp simd __VA_ARGS__)
#if (OSL_GCCVERSION || OSL_INTEL_CLASSIC_COMPILER_VERSION)
# define OSL_OMP_COMPLEX_SIMD_LOOP(...) OSL_OMP_SIMD_LOOP(__VA_ARGS__)
#else
// Ignore requests to vectorize complex/nested SIMD loops for certain
// compilers/versions that are not currently vectorizing complex loops.
# define OSL_OMP_COMPLEX_SIMD_LOOP(...)
#endif

// OSL_FORCEINLINE is a function attribute that attempts to make the
// function always inline. On many compilers regular 'inline' is only
// advisory. Put this attribute before the function return type, just like
Expand Down
4 changes: 2 additions & 2 deletions src/include/OSL/sfmath.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ namespace sfm
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#if OSL_INTEL_COMPILER
#if OSL_INTEL_CLASSIC_COMPILER_VERSION
// std::isinf wasn't vectorizing and was branchy. This slightly
// perturbed version fairs better and is branch free when vectorized
// with the Intel compiler.
Expand Down Expand Up @@ -263,7 +263,7 @@ namespace sfm
}
}

#if OSL_CLANG_VERSION && !OSL_INTEL_COMPILER
#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION

// To make clang's loop vectorizor happy
// we need to make sure result of min and max
Expand Down
8 changes: 5 additions & 3 deletions src/include/OSL/wide.h
Original file line number Diff line number Diff line change
Expand Up @@ -1853,7 +1853,7 @@ struct WideImpl<const Dual2<ElementT>[], WidthT, true /*IsConstT */> {
} // namespace pvt


#if OSL_INTEL_COMPILER || OSL_GNUC_VERSION
#if OSL_INTEL_CLASSIC_COMPILER_VERSION || OSL_GNUC_VERSION
// Workaround for error #3466: inheriting constructors must be inherited from a direct base class
# define __OSL_INHERIT_BASE_CTORS(DERIVED, BASE) \
using Base = typename DERIVED::BASE; \
Expand Down Expand Up @@ -3139,7 +3139,8 @@ template<typename DataT, int WidthT>
OSL_FORCEINLINE bool
testIfAnyLaneIsNonZero(const Wide<DataT, WidthT>& wvalues)
{
#if OSL_NON_INTEL_CLANG
#if OSL_ANY_CLANG && !OSL_INTEL_CLASSIC_COMPILER_VERSION \
&& !OSL_INTEL_LLVM_COMPILER_VERSION
int anyLaneIsOn = 0;
OSL_OMP_PRAGMA(omp simd simdlen(WidthT) reduction(max : anyLaneIsOn))
for (int i = 0; i < WidthT; ++i) {
Expand All @@ -3149,7 +3150,8 @@ testIfAnyLaneIsNonZero(const Wide<DataT, WidthT>& wvalues)
return anyLaneIsOn;
#else
// NOTE: do not explicitly vectorize as it would require a
// reduction. Instead let compiler optimize this itself.
// reduction. Instead let compiler optimize/auto-vectorize this by itself
// which can produce less/better code than a full blown openmp reduction.
bool anyLaneIsOn = false;
for (int i = 0; i < WidthT; ++i) {
if (wvalues[i] != DataT(0))
Expand Down
42 changes: 41 additions & 1 deletion src/liboslexec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ set ( liboslexec_override_limits
../liboslnoise/wide/wide_gabor3_isotropic_enabled
)

set ( liboslexec_require_INF_NaN
shadingsys.cpp
wide/wide_shadingsys
wide/wide_optest_float
)

set (local_lib oslexec)
set (lib_src
shadingsys.cpp closure.cpp
Expand Down Expand Up @@ -144,6 +150,31 @@ FLEX_BISON ( ../liboslcomp/osllex.l ../liboslcomp/oslgram.y osl lib_src compiler

set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS" )


macro ( REQUIRE_INF_NAN SRC )
if (CMAKE_COMPILER_IS_INTELCLANG)
# if -ffast-math is enabled to to the underlying compiler,
# inf's and NaN's may not be handled properly (by design)
if (MSVC)
set_property(SOURCE ${SRC} APPEND PROPERTY COMPILE_OPTIONS "/fp:precise")
#message (STATUS "SOURCE ${SRC} APPEND PROPERTY COMPILE_OPTIONS /fp:precise")
else ()
#set_property(SOURCE $SRC} APPEND PROPERTY COMPILE_OPTIONS "-fp-model=precise")
# Disabling fast-math should be enough to enable proper support of INF and NaN
set_property(SOURCE ${SRC} APPEND PROPERTY COMPILE_OPTIONS "-fno-fast-math")
#message (STATUS "SOURCE ${SRC} APPEND PROPERTY COMPILE_OPTIONS -fno-fast-math")
endif()
endif()
endmacro ( )

# Apply complilation options as needed
foreach(src ${lib_src})
if (${src} IN_LIST liboslexec_require_INF_NaN)
set(SRC "${CMAKE_CURRENT_SOURCE_DIR}/${src}")
REQUIRE_INF_NAN ( ${SRC} )
endif()
endforeach(src)

macro ( LLVM_COMPILE llvm_src srclist )
get_filename_component ( llvmsrc_we ${llvm_src} NAME_WE )
set ( llvm_asm "${CMAKE_CURRENT_BINARY_DIR}/${llvmsrc_we}.s" )
Expand Down Expand Up @@ -422,7 +453,11 @@ foreach(batched_target ${BATCHED_TARGET_LIST})
# it as soon as clang addresses this issue.
list (APPEND TARGET_CXX_OPTS "-fno-unroll-loops")

list (APPEND TARGET_CXX_OPTS "-fopenmp")
if (CMAKE_COMPILER_IS_INTELCLANG)
list (APPEND TARGET_CXX_OPTS "-qopenmp-simd")
else ()
list (APPEND TARGET_CXX_OPTS "-fopenmp")
endif ()

# Uncomment next section to investigate failures in vectorization
#list (APPEND TARGET_CXX_OPTS
Expand Down Expand Up @@ -484,6 +519,11 @@ foreach(batched_target ${BATCHED_TARGET_LIST})
endif()
endif()
endif()

if (${target_src} IN_LIST liboslexec_require_INF_NaN)
REQUIRE_INF_NAN ( ${TARGET_SRC} )
endif()

endforeach(target_src)

add_library ( ${batched_target_lib} MODULE ${TARGET_LIB_SOURCES} )
Expand Down
Loading