Skip to content

Commit

Permalink
neon/cge: Improve some of the SSE2 fallbacks
Browse files Browse the repository at this point in the history
Fixes #900
  • Loading branch information
Glitch18 authored and nemequ committed Aug 22, 2021
1 parent e1bc968 commit 647dc12
Showing 1 changed file with 26 additions and 5 deletions.
31 changes: 26 additions & 5 deletions simde/arm/neon/cge.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,8 +290,11 @@ simde_vcgeq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
b_ = simde_uint8x16_to_private(b);

#if defined(SIMDE_X86_SSE2_NATIVE)
__m128i sign_bits = _mm_set1_epi8(INT8_MIN);
r_.m128i = _mm_or_si128(_mm_cmpgt_epi8(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi8(a_.m128i, b_.m128i));
r_.m128i =
_mm_cmpeq_epi8(
_mm_min_epu8(b_.m128i, a_.m128i),
b_.m128i
);
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
r_.v128 = wasm_u8x16_ge(a_.v128, b_.v128);
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
Expand Down Expand Up @@ -324,7 +327,13 @@ simde_vcgeq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
a_ = simde_uint16x8_to_private(a),
b_ = simde_uint16x8_to_private(b);

#if defined(SIMDE_X86_SSE2_NATIVE)
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i =
_mm_cmpeq_epi16(
_mm_min_epu16(b_.m128i, a_.m128i),
b_.m128i
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i sign_bits = _mm_set1_epi16(INT16_MIN);
r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi16(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
Expand Down Expand Up @@ -359,7 +368,13 @@ simde_vcgeq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
a_ = simde_uint32x4_to_private(a),
b_ = simde_uint32x4_to_private(b);

#if defined(SIMDE_X86_SSE2_NATIVE)
#if defined(SIMDE_X86_SSE4_1_NATIVE)
r_.m128i =
_mm_cmpeq_epi32(
_mm_min_epu32(b_.m128i, a_.m128i),
b_.m128i
);
#elif defined(SIMDE_X86_SSE2_NATIVE)
__m128i sign_bits = _mm_set1_epi32(INT32_MIN);
r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi32(a_.m128i, b_.m128i));
#elif defined(SIMDE_WASM_SIMD128_NATIVE)
Expand Down Expand Up @@ -394,7 +409,13 @@ simde_vcgeq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
a_ = simde_uint64x2_to_private(a),
b_ = simde_uint64x2_to_private(b);

#if defined(SIMDE_X86_SSE4_2_NATIVE)
#if defined(SIMDE_X86_AVX512VL_NATIVE)
r_.m128i =
_mm_cmpeq_epi64(
_mm_min_epu64(b_.m128i, a_.m128i),
b_.m128i
);
#elif defined(SIMDE_X86_SSE4_2_NATIVE)
__m128i sign_bits = _mm_set1_epi64x(INT64_MIN);
r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi64(a_.m128i, b_.m128i));
#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS)
Expand Down

0 comments on commit 647dc12

Please sign in to comment.