Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable numpy AVX512 when compiled with Intel classic #486

Open
wants to merge 11 commits into
base: spack-stack-dev
Choose a base branch
from
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
--- numpy-1.25.0/numpy/core/src/common/simd/avx512/conversion.h.org 2021-02-04 17:38:42.000000000 +0900
+++ numpy-1.25.0/numpy/core/src/common/simd/avx512/conversion.h 2021-02-04 18:29:21.000000000 +0900
@@ -131,20 +131,44 @@ npyv_pack_b8_b64(npyv_b64 a, npyv_b64 b, npyv_b64 c, npyv_b64 d,
__mmask16 gh = _mm512_kunpackb((__mmask16)h, (__mmask16)g);
return npyv_pack_b8_b32(ab, cd, ef, gh);
}
-
+/*
+ * A compiler bug workaround on Intel Compiler Classic.
+ * The bug manifests specifically when the
+ * scalar result of _cvtmask64_u64 is compared against the constant -1. This
+ * comparison uniquely triggers a bug under conditions of equality (==) or
+ * inequality (!=) checks, which are typically used in reduction operations like
+ * np.logical_or.
+ *
+ * The underlying issue arises from the compiler's optimizer. When the last
+ * vector comparison instruction operates on zmm, the optimizer erroneously
+ * emits a duplicate of this instruction but on the lower half register ymm. It
+ * then performs a bitwise XOR operation between the mask produced by this
+ * duplicated instruction and the mask from the original comparison instruction.
+ * This erroneous behavior leads to incorrect results.
+ *
+ * See https://github.com/numpy/numpy/issues/26197#issuecomment-2056750975
+ */
+#ifdef __INTEL_COMPILER
+#define NPYV__VOLATILE_CVTMASK64 volatile
+#else
+#define NPYV__VOLATILE_CVTMASK64
+#endif
// convert boolean vectors to integer bitfield
-NPY_FINLINE npy_uint64 npyv_tobits_b8(npyv_b8 a)
-{
+NPY_FINLINE npy_uint64 npyv_tobits_b8(npyv_b8 a) {
#ifdef NPY_HAVE_AVX512BW_MASK
- return (npy_uint64)_cvtmask64_u64(a);
+ npy_uint64 NPYV__VOLATILE_CVTMASK64 t = (npy_uint64)_cvtmask64_u64(a);
+ return t;
#elif defined(NPY_HAVE_AVX512BW)
- return (npy_uint64)a;
+ npy_uint64 NPYV__VOLATILE_CVTMASK64 t = (npy_uint64)a;
+ return t;
#else
int mask_lo = _mm256_movemask_epi8(npyv512_lower_si256(a));
int mask_hi = _mm256_movemask_epi8(npyv512_higher_si256(a));
return (unsigned)mask_lo | ((npy_uint64)(unsigned)mask_hi << 32);
#endif
}
+#undef NPYV__VOLATILE_CVTMASK64
+
NPY_FINLINE npy_uint64 npyv_tobits_b16(npyv_b16 a)
{
#ifdef NPY_HAVE_AVX512BW_MASK
36 changes: 31 additions & 5 deletions var/spack/repos/builtin/packages/py-numpy/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ class PyNumpy(PythonPackage):
patch("check_executables2.patch", when="@1.19.0:1.19.5")
patch("check_executables3.patch", when="@1.16.0:1.18.5")

patch("intel_mask_conversion.patch", when="@1.25.0:1.26.5")

# Backport bug fix for f2py's define for threading when building with Mingw
patch(
"https://github.com/numpy/numpy/pull/20881.patch?full_index=1",
Expand Down Expand Up @@ -281,21 +283,45 @@ def blas_lapack_pkg_config(self) -> Tuple[str, str]:

return blas, lapack

@when("@1.26:")
@when("@1.25:")
def config_settings(self, spec, prefix):
settings = {"builddir": "build", "compile-args": f"-j{make_jobs}"}

if self.spec.satisfies("@1.26:"):
settings.update(self.blas_config_settings())

# Disable AVX512 features for Intel Classic compilers
# https://numpy.org/doc/stable/reference/simd/build-options.html
# https://github.com/numpy/numpy/issues/27840
# https://github.com/matplotlib/matplotlib/issues/28762
archs = ("x86_64_v4:", "cannonlake:", "mic_knl")
if any([self.spec.satisfies(f"target={arch} %intel") for arch in archs]):
intel_settings = {
"setup-args": {
"-Dcpu-dispatch": (
settings.get("setup-args", {}).get("-Dcpu-dispatch", "")
+ " "
+ "MAX -AVX512F -AVX512CD -AVX512_KNL -AVX512_KNM -AVX512_SKX "
+ "-AVX512_CLX -AVX512_CNL -AVX512_ICL -AVX512_SPR"
)
}
}
settings.update(intel_settings)

return settings

def blas_config_settings(self):
blas, lapack = self.blas_lapack_pkg_config()
return {
"builddir": "build",
"compile-args": f"-j{make_jobs}",
"setup-args": {
# https://scipy.github.io/devdocs/building/blas_lapack.html
"-Dblas": blas,
"-Dlapack": lapack,
# https://numpy.org/doc/stable/reference/simd/build-options.html
# TODO: get this working in CI
# "-Dcpu-baseline": "native",
# "-Dcpu-dispatch": "none",
},
# "-Dcpu-dispatch": "none ",
}
}

def blas_lapack_site_cfg(self) -> None:
Expand Down
Loading