Skip to content

Commit

Permalink
feat(mask): implement mask_not
Browse files Browse the repository at this point in the history
  • Loading branch information
nfrechette committed Jul 5, 2024
1 parent 47c0c22 commit 38fca19
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 0 deletions.
28 changes: 28 additions & 0 deletions includes/rtm/mask4d.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,34 @@ namespace rtm
#endif
}

//////////////////////////////////////////////////////////////////////////
// Per component logical NOT of the input: ~input
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE mask4d RTM_SIMD_CALL mask_not(mask4d_arg0 input) RTM_NO_EXCEPT
{
#if defined(RTM_SSE2_INTRINSICS)
const __m128i true_mask = _mm_set_epi64x(0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL);
__m128d xy = _mm_andnot_pd(input.xy, _mm_castsi128_pd(true_mask));
__m128d zw = _mm_andnot_pd(input.zw, _mm_castsi128_pd(true_mask));
return mask4d{ xy, zw };
#else
const uint64_t* input_ = rtm_impl::bit_cast<const uint64_t*>(&input);

union
{
mask4d vector;
uint64_t scalar[4];
} result;

result.scalar[0] = ~input_[0];
result.scalar[1] = ~input_[1];
result.scalar[2] = ~input_[2];
result.scalar[3] = ~input_[3];

return result.vector;
#endif
}

RTM_IMPL_VERSION_NAMESPACE_END
}

Expand Down
28 changes: 28 additions & 0 deletions includes/rtm/mask4f.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,34 @@ namespace rtm
#endif
}

//////////////////////////////////////////////////////////////////////////
// Per component logical NOT of the input: ~input
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE mask4f RTM_SIMD_CALL mask_not(mask4f_arg0 input) RTM_NO_EXCEPT
{
#if defined(RTM_SSE2_INTRINSICS)
const __m128i true_mask = _mm_set_epi32(0xFFFFFFFFULL, 0xFFFFFFFFULL, 0xFFFFFFFFULL, 0xFFFFFFFFULL);
return _mm_andnot_ps(input, _mm_castsi128_ps(true_mask));
#elif defined(RTM_NEON_INTRINSICS)
return vmvnq_u32(input);
#else
const uint32_t* input_ = rtm_impl::bit_cast<const uint32_t*>(&input);

union
{
mask4f vector;
uint32_t scalar[4];
} result;

result.scalar[0] = ~input_[0];
result.scalar[1] = ~input_[1];
result.scalar[2] = ~input_[2];
result.scalar[3] = ~input_[3];

return result.vector;
#endif
}

RTM_IMPL_VERSION_NAMESPACE_END
}

Expand Down
15 changes: 15 additions & 0 deletions includes/rtm/mask4i.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,21 @@ namespace rtm
#endif
}

//////////////////////////////////////////////////////////////////////////
// Per component logical NOT of the input: ~input
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE mask4i RTM_SIMD_CALL mask_not(mask4i_arg0 input) RTM_NO_EXCEPT
{
#if defined(RTM_SSE2_INTRINSICS)
const __m128i true_mask = _mm_set_epi32(0xFFFFFFFFULL, 0xFFFFFFFFULL, 0xFFFFFFFFULL, 0xFFFFFFFFULL);
return _mm_andnot_si128(input, true_mask);
#elif defined(RTM_NEON_INTRINSICS)
return RTM_IMPL_MASK4i_SET(vmvnq_u32(RTM_IMPL_MASK4i_GET(input)));
#else
return mask4i{ ~input.x, ~input.y, ~input.z, ~input.w };
#endif
}

RTM_IMPL_VERSION_NAMESPACE_END
}

Expand Down
28 changes: 28 additions & 0 deletions includes/rtm/mask4q.h
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,34 @@ namespace rtm
#endif
}

//////////////////////////////////////////////////////////////////////////
// Per component logical NOT of the input: ~input
//////////////////////////////////////////////////////////////////////////
RTM_DISABLE_SECURITY_COOKIE_CHECK RTM_FORCE_INLINE mask4q RTM_SIMD_CALL mask_not(mask4q_arg0 input) RTM_NO_EXCEPT
{
#if defined(RTM_SSE2_INTRINSICS)
const __m128i true_mask = _mm_set_epi64x(0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL);
__m128i xy = _mm_andnot_si128(input.xy, true_mask);
__m128i zw = _mm_andnot_si128(input.zw, true_mask);
return mask4q{ xy, zw };
#else
const uint64_t* input_ = rtm_impl::bit_cast<const uint64_t*>(&input);

union
{
mask4q vector;
uint64_t scalar[4];
} result;

result.scalar[0] = ~input_[0];
result.scalar[1] = ~input_[1];
result.scalar[2] = ~input_[2];
result.scalar[3] = ~input_[3];

return result.vector;
#endif
}

RTM_IMPL_VERSION_NAMESPACE_END
}

Expand Down
24 changes: 24 additions & 0 deletions tests/sources/test_mask4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,26 @@ inline Mask4Type reference_mask_xor(const Mask4Type& input0, const Mask4Type& in
return result;
}

template<typename IntType, typename Mask4Type>
inline Mask4Type reference_mask_not(const Mask4Type& input)
{
IntType input_[4];

static_assert(sizeof(Mask4Type) == sizeof(input_), "Unexpected size");
std::memcpy(&input_[0], &input, sizeof(Mask4Type));

IntType result_[4];
result_[0] = ~input_[0];
result_[1] = ~input_[1];
result_[2] = ~input_[2];
result_[3] = ~input_[3];

Mask4Type result;
std::memcpy(&result, &result_[0], sizeof(Mask4Type));

return result;
}

template<typename MaskType, typename IntType>
static void test_mask_impl()
{
Expand Down Expand Up @@ -298,6 +318,10 @@ static void test_mask_impl()
CHECK(mask_all_equal(mask_xor(mask0, mask1), reference_mask_xor<IntType>(mask0, mask1)));
CHECK(mask_all_equal(mask_xor(mask0, mask2), reference_mask_xor<IntType>(mask0, mask2)));
CHECK(mask_all_equal(mask_xor(mask1, mask2), reference_mask_xor<IntType>(mask1, mask2)));

CHECK(mask_all_equal(mask_not(mask0), reference_mask_not<IntType>(mask0)));
CHECK(mask_all_equal(mask_not(mask1), reference_mask_not<IntType>(mask1)));
CHECK(mask_all_equal(mask_not(mask2), reference_mask_not<IntType>(mask2)));
}
}

Expand Down

0 comments on commit 38fca19

Please sign in to comment.