Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[X86] Align other variants to use void * as 512 variants. #66310

Merged
merged 4 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/lib/Headers/avx2intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2979,7 +2979,7 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
/// A pointer to the 32-byte aligned memory containing the vector to load.
/// \returns A 256-bit integer vector loaded from memory.
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_stream_load_si256(__m256i const *__V)
_mm256_stream_load_si256(const void *__V)
{
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);
Expand Down
6 changes: 3 additions & 3 deletions clang/lib/Headers/avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -3563,7 +3563,7 @@ _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
/// \param __b
/// A 256-bit integer vector containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_si256(__m256i *__a, __m256i __b)
_mm256_stream_si256(void *__a, __m256i __b)
{
typedef __v4di __v4di_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v4di_aligned)__b, (__v4di_aligned*)__a);
Expand All @@ -3583,7 +3583,7 @@ _mm256_stream_si256(__m256i *__a, __m256i __b)
/// \param __b
/// A 256-bit vector of [4 x double] containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_pd(double *__a, __m256d __b)
_mm256_stream_pd(void *__a, __m256d __b)
{
typedef __v4df __v4df_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v4df_aligned)__b, (__v4df_aligned*)__a);
Expand All @@ -3604,7 +3604,7 @@ _mm256_stream_pd(double *__a, __m256d __b)
/// \param __a
/// A 256-bit vector of [8 x float] containing the values to be moved.
static __inline void __DEFAULT_FN_ATTRS
_mm256_stream_ps(float *__p, __m256 __a)
_mm256_stream_ps(void *__p, __m256 __a)
{
typedef __v8sf __v8sf_aligned __attribute__((aligned(32)));
__builtin_nontemporal_store((__v8sf_aligned)__a, (__v8sf_aligned*)__p);
Expand Down
8 changes: 4 additions & 4 deletions clang/lib/Headers/emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -3945,7 +3945,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p,
/// A pointer to the 128-bit aligned memory location used to store the value.
/// \param __a
/// A vector of [2 x double] containing the 64-bit values to be stored.
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(void *__p,
__m128d __a) {
__builtin_nontemporal_store((__v2df)__a, (__v2df *)__p);
}
Expand All @@ -3963,7 +3963,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p,
/// A pointer to the 128-bit aligned memory location used to store the value.
/// \param __a
/// A 128-bit integer vector containing the values to be stored.
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(void *__p,
__m128i __a) {
__builtin_nontemporal_store((__v2di)__a, (__v2di *)__p);
}
Expand All @@ -3983,7 +3983,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p,
/// A 32-bit integer containing the value to be stored.
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
_mm_stream_si32(int *__p, int __a) {
_mm_stream_si32(void *__p, int __a) {
__builtin_ia32_movnti(__p, __a);
}

Expand All @@ -4003,7 +4003,7 @@ static __inline__ void
/// A 64-bit integer containing the value to be stored.
static __inline__ void
__attribute__((__always_inline__, __nodebug__, __target__("sse2")))
_mm_stream_si64(long long *__p, long long __a) {
_mm_stream_si64(void *__p, long long __a) {
__builtin_ia32_movnti64(__p, __a);
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Headers/smmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1,
/// \returns A 128-bit integer vector containing the data stored at the
/// specified memory location.
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_stream_load_si128(__m128i const *__V) {
_mm_stream_load_si128(const void *__V) {
return (__m128i)__builtin_nontemporal_load((const __v2di *)__V);
}

Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Headers/xmmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2140,7 +2140,7 @@ _mm_stream_pi(__m64 *__p, __m64 __a)
/// \param __a
/// A 128-bit vector of [4 x float] containing the values to be moved.
static __inline__ void __DEFAULT_FN_ATTRS
_mm_stream_ps(float *__p, __m128 __a)
_mm_stream_ps(void *__p, __m128 __a)
{
__builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);
}
Expand Down
18 changes: 18 additions & 0 deletions clang/test/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1940,18 +1940,36 @@ void test_mm256_stream_pd(double* A, __m256d B) {
_mm256_stream_pd(A, B);
}

void test_mm256_stream_pd_void(void *A, __m256d B) {
// CHECK-LABEL: test_mm256_stream_pd_void
// CHECK: store <4 x double> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
_mm256_stream_pd(A, B);
}

void test_mm256_stream_ps(float* A, __m256 B) {
// CHECK-LABEL: test_mm256_stream_ps
// CHECK: store <8 x float> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
_mm256_stream_ps(A, B);
}

void test_mm256_stream_ps_void(void *A, __m256 B) {
// CHECK-LABEL: test_mm256_stream_ps_void
// CHECK: store <8 x float> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
_mm256_stream_ps(A, B);
}

void test_mm256_stream_si256(__m256i* A, __m256i B) {
// CHECK-LABEL: test_mm256_stream_si256
// CHECK: store <4 x i64> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
_mm256_stream_si256(A, B);
}

void test_mm256_stream_si256_void(void *A, __m256i B) {
// CHECK-LABEL: test_mm256_stream_si256_void
// CHECK: store <4 x i64> %{{.*}}, ptr %{{.*}}, align 32, !nontemporal
_mm256_stream_si256(A, B);
}

__m256d test_mm256_sub_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_sub_pd
// CHECK: fsub <4 x double>
Expand Down
6 changes: 6 additions & 0 deletions clang/test/CodeGen/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,12 @@ __m256i test_mm256_stream_load_si256(__m256i const *a) {
return _mm256_stream_load_si256(a);
}

__m256i test_mm256_stream_load_si256_void(const void *a) {
// CHECK-LABEL: test_mm256_stream_load_si256_void
// CHECK: load <4 x i64>, ptr %{{.*}}, align 32, !nontemporal
return _mm256_stream_load_si256(a);
}

__m256i test_mm256_sub_epi8(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_sub_epi8
// CHECK: sub <32 x i8>
Expand Down
6 changes: 6 additions & 0 deletions clang/test/CodeGen/X86/sse-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,12 @@ void test_mm_stream_ps(float*A, __m128 B) {
_mm_stream_ps(A, B);
}

void test_mm_stream_ps_void(void *A, __m128 B) {
// CHECK-LABEL: test_mm_stream_ps_void
// CHECK: store <4 x float> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
_mm_stream_ps(A, B);
}

__m128 test_mm_sub_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_sub_ps
// CHECK: fsub <4 x float>
Expand Down
24 changes: 24 additions & 0 deletions clang/test/CodeGen/X86/sse2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1488,18 +1488,36 @@ void test_mm_stream_pd(double *A, __m128d B) {
_mm_stream_pd(A, B);
}

void test_mm_stream_pd_void(void *A, __m128d B) {
// CHECK-LABEL: test_mm_stream_pd_void
// CHECK: store <2 x double> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
_mm_stream_pd(A, B);
}

void test_mm_stream_si32(int *A, int B) {
// CHECK-LABEL: test_mm_stream_si32
// CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
_mm_stream_si32(A, B);
}

void test_mm_stream_si32_void(void *A, int B) {
// CHECK-LABEL: test_mm_stream_si32_void
// CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
_mm_stream_si32(A, B);
}

#ifdef __x86_64__
void test_mm_stream_si64(long long *A, long long B) {
// X64-LABEL: test_mm_stream_si64
// X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
_mm_stream_si64(A, B);
}

void test_mm_stream_si64_void(void *A, long long B) {
// X64-LABEL: test_mm_stream_si64_void
// X64: store i64 %{{.*}}, ptr %{{.*}}, align 1, !nontemporal
_mm_stream_si64(A, B);
}
#endif

void test_mm_stream_si128(__m128i *A, __m128i B) {
Expand All @@ -1508,6 +1526,12 @@ void test_mm_stream_si128(__m128i *A, __m128i B) {
_mm_stream_si128(A, B);
}

void test_mm_stream_si128_void(void *A, __m128i B) {
// CHECK-LABEL: test_mm_stream_si128_void
// CHECK: store <2 x i64> %{{.*}}, ptr %{{.*}}, align 16, !nontemporal
_mm_stream_si128(A, B);
}

__m128i test_mm_sub_epi8(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_sub_epi8
// CHECK: sub <16 x i8>
Expand Down
6 changes: 6 additions & 0 deletions clang/test/CodeGen/X86/sse41-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,12 @@ __m128i test_mm_stream_load_si128(__m128i const *a) {
return _mm_stream_load_si128(a);
}

__m128i test_mm_stream_load_si128_void(const void *a) {
// CHECK-LABEL: test_mm_stream_load_si128_void
// CHECK: load <2 x i64>, ptr %{{.*}}, align 16, !nontemporal
return _mm_stream_load_si128(a);
}

int test_mm_test_all_ones(__m128i x) {
// CHECK-LABEL: test_mm_test_all_ones
// CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
Expand Down