Skip to content

Commit

Permalink
[aarch64] Fix fmin and fmax on aarch64 (pytorch#140)
Browse files Browse the repository at this point in the history
vmaxnum_v*_v*_v* and vminnum_v*_v*_v* are helper functions for utilizing instructions for choosing larger and smaller elements from two given vectors, and they are introduced in the following PR.

shibatch/sleef#109

It is said that vmaxnmq and vminnmq on aarch64 are IEEE754-conformant, but I recently found that handling of signalling NaN by these instructions is not conforming to the specification of fmin and fmax functions in the ANSI C standard.

This patch fixes that problem. It also adds regression test for checking signaling nan handling in fmin and fmax.
  • Loading branch information
shibatch authored Jan 16, 2018
1 parent 69d7454 commit b82fd94
Show file tree
Hide file tree
Showing 12 changed files with 40 additions and 123 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ matrix:

before_install:
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then wget -q ${SDE_URL} ; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then tar xvf sde-external-8.12.0-2017-10-23-lin.tar.bz2; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then tar xf sde-external-8.12.0-2017-10-23-lin.tar.bz2; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then export PATH=${PATH}:${TRAVIS_BUILD_DIR}/sde-external-8.12.0-2017-10-23-lin; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo add-apt-repository -y ppa:adrozdoff/cmake; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo apt-get -qq update ; fi
Expand Down
26 changes: 0 additions & 26 deletions src/arch/helperadvsimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,6 @@

#define ISANAME "AArch64 AdvSIMD"

#ifdef SLEEF_SINGLE_MINMAXNUM_AVAILABLE
#error prior definition of SLEEF_SINGLE_MINMAXNUM_AVAILABLE
#endif
#define SLEEF_SINGLE_MINMAXNUM_AVAILABLE 1

#ifdef SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
#error prior definition of SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
#endif
#define SLEEF_DOUBLE_MINMAXNUM_AVAILABLE 1

// Mask definition
typedef uint32x4_t vmask;
typedef uint32x4_t vopmask;
Expand Down Expand Up @@ -161,14 +151,6 @@ static INLINE vfloat vmin_vf_vf_vf(vfloat x, vfloat y) {
return vminq_f32(x, y);
}

// max number, min number
static INLINE vfloat vmaxnum_vf_vf_vf(vfloat x, vfloat y) {
return vmaxnmq_f32(x, y);
}
static INLINE vfloat vminnum_vf_vf_vf(vfloat x, vfloat y) {
return vminnmq_f32(x, y);
}

// Comparisons
static INLINE vmask veq_vm_vf_vf(vfloat x, vfloat y) { return vceqq_f32(x, y); }
static INLINE vmask vneq_vm_vf_vf(vfloat x, vfloat y) {
Expand Down Expand Up @@ -308,14 +290,6 @@ static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) {
return vminq_f64(x, y);
}

// max number, min number
static INLINE vdouble vmaxnum_vd_vd_vd(vdouble x, vdouble y) {
return vmaxnmq_f64(x, y);
}
static INLINE vdouble vminnum_vd_vd_vd(vdouble x, vdouble y) {
return vminnmq_f64(x, y);
}

// Multiply accumulate: z = z + x * y
static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
return vfmaq_f64(z, x, y);
Expand Down
16 changes: 0 additions & 16 deletions src/arch/helperavx.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,6 @@

#define FULL_FP_ROUNDING

#ifdef SLEEF_SINGLE_MINMAXNUM_AVAILABLE
#error prior definition of SLEEF_SINGLE_MINMAXNUM_AVAILABLE
#endif
#define SLEEF_SINGLE_MINMAXNUM_AVAILABLE 1

#ifdef SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
#error prior definition of SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
#endif
#define SLEEF_DOUBLE_MINMAXNUM_AVAILABLE 1

#if defined(_MSC_VER)
#include <intrin.h>
#else
Expand Down Expand Up @@ -293,9 +283,6 @@ static INLINE vopmask visnan_vo_vd(vdouble d) {
return vreinterpret_vm_vd(_mm256_cmp_pd(d, d, _CMP_NEQ_UQ));
}

static INLINE vdouble vmaxnum_vd_vd_vd(vdouble x, vdouble y) { return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmax_vd_vd_vd(x, y)); }
static INLINE vdouble vminnum_vd_vd_vd(vdouble x, vdouble y) { return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmin_vd_vd_vd(x, y)); }

static INLINE vdouble vload_vd_p(const double *ptr) { return _mm256_load_pd(ptr); }
static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm256_loadu_pd(ptr); }

Expand Down Expand Up @@ -481,9 +468,6 @@ static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_
static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-INFINITYf)); }
static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); }

static INLINE vfloat vmaxnum_vf_vf_vf(vfloat x, vfloat y) { return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmax_vf_vf_vf(x, y)); }
static INLINE vfloat vminnum_vf_vf_vf(vfloat x, vfloat y) { return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmin_vf_vf_vf(x, y)); }

//

static INLINE vfloat vload_vf_p(const float *ptr) { return _mm256_load_ps(ptr); }
Expand Down
16 changes: 0 additions & 16 deletions src/arch/helperavx2.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,6 @@
#define FULL_FP_ROUNDING
#define SPLIT_KERNEL

#ifdef SLEEF_SINGLE_MINMAXNUM_AVAILABLE
#error prior definition of SLEEF_SINGLE_MINMAXNUM_AVAILABLE
#endif
#define SLEEF_SINGLE_MINMAXNUM_AVAILABLE 1

#ifdef SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
#error prior definition of SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
#endif
#define SLEEF_DOUBLE_MINMAXNUM_AVAILABLE 1

#if defined(_MSC_VER)
#include <intrin.h>
#else
Expand Down Expand Up @@ -249,9 +239,6 @@ static INLINE vopmask visnan_vo_vd(vdouble d) {
return vreinterpret_vm_vd(_mm256_cmp_pd(d, d, _CMP_NEQ_UQ));
}

static INLINE vdouble vmaxnum_vd_vd_vd(vdouble x, vdouble y) { return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmax_vd_vd_vd(x, y)); }
static INLINE vdouble vminnum_vd_vd_vd(vdouble x, vdouble y) { return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmin_vd_vd_vd(x, y)); }

#if defined(_MSC_VER)
// This function is needed when debugging on MSVC.
static INLINE double vcast_d_vd(vdouble v) {
Expand Down Expand Up @@ -356,9 +343,6 @@ static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_
static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-INFINITYf)); }
static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); }

static INLINE vfloat vmaxnum_vf_vf_vf(vfloat x, vfloat y) { return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmax_vf_vf_vf(x, y)); }
static INLINE vfloat vminnum_vf_vf_vf(vfloat x, vfloat y) { return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmin_vf_vf_vf(x, y)); }

#ifdef _MSC_VER
// This function is needed when debugging on MSVC.
static INLINE float vcast_f_vf(vfloat v) {
Expand Down
16 changes: 0 additions & 16 deletions src/arch/helperavx2_128.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,6 @@
#define FULL_FP_ROUNDING
#define SPLIT_KERNEL

#ifdef SLEEF_SINGLE_MINMAXNUM_AVAILABLE
#error prior definition of SLEEF_SINGLE_MINMAXNUM_AVAILABLE
#endif
#define SLEEF_SINGLE_MINMAXNUM_AVAILABLE 1

#ifdef SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
#error prior definition of SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
#endif
#define SLEEF_DOUBLE_MINMAXNUM_AVAILABLE 1

#if defined(_MSC_VER)
#include <intrin.h>
#else
Expand Down Expand Up @@ -231,9 +221,6 @@ static INLINE vopmask visnan_vo_vd(vdouble d) {
return vreinterpret_vm_vd(_mm_cmp_pd(d, d, _CMP_NEQ_UQ));
}

static INLINE vdouble vmaxnum_vd_vd_vd(vdouble x, vdouble y) { return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmax_vd_vd_vd(x, y)); }
static INLINE vdouble vminnum_vd_vd_vd(vdouble x, vdouble y) { return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmin_vd_vd_vd(x, y)); }

static INLINE vdouble vload_vd_p(const double *ptr) { return _mm_load_pd(ptr); }
static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm_loadu_pd(ptr); }

Expand Down Expand Up @@ -336,9 +323,6 @@ static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_
static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-INFINITYf)); }
static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); }

static INLINE vfloat vmaxnum_vf_vf_vf(vfloat x, vfloat y) { return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmax_vf_vf_vf(x, y)); }
static INLINE vfloat vminnum_vf_vf_vf(vfloat x, vfloat y) { return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmin_vf_vf_vf(x, y)); }

static INLINE vfloat vload_vf_p(const float *ptr) { return _mm_load_ps(ptr); }
static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm_loadu_ps(ptr); }

Expand Down
16 changes: 0 additions & 16 deletions src/arch/helpersse2.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,6 @@
#define LOG2VECTLENSP (LOG2VECTLENDP+1)
#define VECTLENSP (1 << LOG2VECTLENSP)

#ifdef SLEEF_SINGLE_MINMAXNUM_AVAILABLE
#error prior definition of SLEEF_SINGLE_MINMAXNUM_AVAILABLE
#endif
#define SLEEF_SINGLE_MINMAXNUM_AVAILABLE 1

#ifdef SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
#error prior definition of SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
#endif
#define SLEEF_DOUBLE_MINMAXNUM_AVAILABLE 1

#if defined(_MSC_VER)
#include <intrin.h>
#else
Expand Down Expand Up @@ -267,9 +257,6 @@ static INLINE vopmask visnan_vo_vd(vdouble d) {
return vreinterpret_vm_vd(_mm_cmpneq_pd(d, d));
}

static INLINE vdouble vmaxnum_vd_vd_vd(vdouble x, vdouble y) { return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmax_vd_vd_vd(x, y)); }
static INLINE vdouble vminnum_vd_vd_vd(vdouble x, vdouble y) { return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmin_vd_vd_vd(x, y)); }

//

static INLINE vdouble vload_vd_p(const double *ptr) { return _mm_load_pd(ptr); }
Expand Down Expand Up @@ -378,9 +365,6 @@ static INLINE vopmask vispinf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_
static INLINE vopmask visminf_vo_vf(vfloat d) { return veq_vo_vf_vf(d, vcast_vf_f(-INFINITYf)); }
static INLINE vopmask visnan_vo_vf(vfloat d) { return vneq_vo_vf_vf(d, d); }

static INLINE vfloat vmaxnum_vf_vf_vf(vfloat x, vfloat y) { return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmax_vf_vf_vf(x, y)); }
static INLINE vfloat vminnum_vf_vf_vf(vfloat x, vfloat y) { return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmin_vf_vf_vf(x, y)); }

static INLINE vfloat vload_vf_p(const float *ptr) { return _mm_load_ps(ptr); }
static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm_loadu_ps(ptr); }

Expand Down
3 changes: 3 additions & 0 deletions src/common/misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
#define M_2_PIl 0.636619772367581343075535053490057448L
#endif

#define SLEEF_SNAN (((union { long long int i; double d; }) { .i = 0x7ff0000000000001LL }).d)
#define SLEEF_SNANf (((union { long int i; float f; }) { .i = 0xff800001 }).f)

//

/*
Expand Down
16 changes: 8 additions & 8 deletions src/libm-tester/tester.c
Original file line number Diff line number Diff line change
Expand Up @@ -2572,8 +2572,8 @@ void do_test() {
{
fprintf(stderr, "fmax denormal/nonnumber test : ");

double xa[] = { +0.0, -0.0, +1, -1, +1e+100, -1e+100, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
double ya[] = { +0.0, -0.0, +1, -1, +1e+100, -1e+100, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
double xa[] = { +0.0, -0.0, +1, -1, +1e+100, -1e+100, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN, SLEEF_SNAN };
double ya[] = { +0.0, -0.0, +1, -1, +1e+100, -1e+100, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN, SLEEF_SNAN };

for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
Expand All @@ -2587,8 +2587,8 @@ void do_test() {
{
fprintf(stderr, "fmin denormal/nonnumber test : ");

double xa[] = { +0.0, -0.0, +1, -1, +1e+100, -1e+100, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
double ya[] = { +0.0, -0.0, +1, -1, +1e+100, -1e+100, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
double xa[] = { +0.0, -0.0, +1, -1, +1e+100, -1e+100, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN, SLEEF_SNAN };
double ya[] = { +0.0, -0.0, +1, -1, +1e+100, -1e+100, DBL_MAX, -DBL_MAX, DBL_MIN, -DBL_MIN, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN, SLEEF_SNAN };

for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) {
for(j=0;j<sizeof(ya)/sizeof(double) && success;j++) {
Expand Down Expand Up @@ -3068,8 +3068,8 @@ void do_test() {
{
fprintf(stderr, "fmaxf denormal/nonnumber test : ");

float xa[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN };
float ya[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN };
float xa[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN, SLEEF_SNANf };
float ya[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN, SLEEF_SNANf };

for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
Expand All @@ -3083,8 +3083,8 @@ void do_test() {
{
fprintf(stderr, "fminf denormal/nonnumber test : ");

float xa[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN };
float ya[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN };
float xa[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN, SLEEF_SNANf };
float ya[] = { +0.0, -0.0, +1, -1, +1e+30, -1e+30, FLT_MAX, -FLT_MAX, FLT_MIN, -FLT_MIN, POSITIVE_INFINITYf, NEGATIVE_INFINITYf, NAN, SLEEF_SNANf };

for(i=0;i<sizeof(xa)/sizeof(float) && success;i++) {
for(j=0;j<sizeof(ya)/sizeof(float) && success;j++) {
Expand Down
10 changes: 0 additions & 10 deletions src/libm/dd.h
Original file line number Diff line number Diff line change
Expand Up @@ -393,13 +393,3 @@ static INLINE CONST vdouble2 ddsqrt_vd2_vd(vdouble d) {
vdouble t = vsqrt_vd_vd(d);
return ddscale_vd2_vd2_vd(ddmul_vd2_vd2_vd2(ddadd2_vd2_vd_vd2(d, ddmul_vd2_vd_vd(t, t)), ddrec_vd2_vd(t)), vcast_vd_d(0.5));
}

#ifndef SLEEF_DOUBLE_MINMAXNUM_AVAILABLE
static INLINE CONST vdouble vmaxnum_vd_vd_vd(vdouble x, vdouble y) {
return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(x, y), x, y));
}

static INLINE CONST vdouble vminnum_vd_vd_vd(vdouble x, vdouble y) {
return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(y, x), x, y));
}
#endif
10 changes: 0 additions & 10 deletions src/libm/df.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,13 +464,3 @@ static INLINE CONST vfloat2 dfsqrt_vf2_vf(vfloat d) {
vfloat t = vsqrt_vf_vf(d);
return dfscale_vf2_vf2_vf(dfmul_vf2_vf2_vf2(dfadd2_vf2_vf_vf2(d, dfmul_vf2_vf_vf(t, t)), dfrec_vf2_vf(t)), vcast_vf_f(0.5f));
}

#ifndef SLEEF_SINGLE_MINMAXNUM_AVAILABLE
static INLINE CONST vfloat vmaxnum_vf_vf_vf(vfloat x, vfloat y) {
return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(x, y), x, y));
}

static INLINE CONST vfloat vminnum_vf_vf_vf(vfloat x, vfloat y) {
return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(y, x), x, y));
}
#endif
16 changes: 14 additions & 2 deletions src/libm/sleefsimddp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2252,9 +2252,21 @@ EXPORT CONST vdouble xfabs(vdouble x) { return vabs_vd_vd(x); }

EXPORT CONST vdouble xcopysign(vdouble x, vdouble y) { return vcopysign_vd_vd_vd(x, y); }

EXPORT CONST vdouble xfmax(vdouble x, vdouble y) { return vmaxnum_vd_vd_vd(x, y); }
EXPORT CONST vdouble xfmax(vdouble x, vdouble y) {
#if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC)
return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmax_vd_vd_vd(x, y));
#else
return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(x, y), x, y));
#endif
}

EXPORT CONST vdouble xfmin(vdouble x, vdouble y) { return vminnum_vd_vd_vd(x, y); }
EXPORT CONST vdouble xfmin(vdouble x, vdouble y) {
#if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC)
return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vmin_vd_vd_vd(x, y));
#else
return vsel_vd_vo_vd_vd(visnan_vo_vd(y), x, vsel_vd_vo_vd_vd(vgt_vo_vd_vd(y, x), x, y));
#endif
}

EXPORT CONST vdouble xfdim(vdouble x, vdouble y) {
vdouble ret = vsub_vd_vd_vd(x, y);
Expand Down
16 changes: 14 additions & 2 deletions src/libm/sleefsimdsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1723,9 +1723,21 @@ EXPORT CONST vfloat xfabsf(vfloat x) { return vabs_vf_vf(x); }

EXPORT CONST vfloat xcopysignf(vfloat x, vfloat y) { return vcopysign_vf_vf_vf(x, y); }

EXPORT CONST vfloat xfmaxf(vfloat x, vfloat y) { return vmaxnum_vf_vf_vf(x, y); }
EXPORT CONST vfloat xfmaxf(vfloat x, vfloat y) {
#if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC)
return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmax_vf_vf_vf(x, y));
#else
return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(x, y), x, y));
#endif
}

EXPORT CONST vfloat xfminf(vfloat x, vfloat y) { return vminnum_vf_vf_vf(x, y); }
EXPORT CONST vfloat xfminf(vfloat x, vfloat y) {
#if (defined(__x86_64__) || defined(__i386__)) && !defined(ENABLE_VECEXT) && !defined(ENABLE_PUREC)
return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vmin_vf_vf_vf(x, y));
#else
return vsel_vf_vo_vf_vf(visnan_vo_vf(y), x, vsel_vf_vo_vf_vf(vgt_vo_vf_vf(y, x), x, y));
#endif
}

EXPORT CONST vfloat xfdimf(vfloat x, vfloat y) {
vfloat ret = vsub_vf_vf_vf(x, y);
Expand Down

0 comments on commit b82fd94

Please sign in to comment.