-
Notifications
You must be signed in to change notification settings - Fork 13.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Loading status checks…
Consistently use the most significant bit of vector masks
This improves the codegen for vector `select`, `gather`, `scatter` and boolean reduction intrinsics and fixes rust-lang/portable-simd#316. The current behavior of mask operations during llvm codegen is to truncate the mask vector to <N x i1>, telling llvm to use the least significat bit. Since sse/avx instructions are defined to use the most significant bit, llvm has to insert a left shift before the mask can actually be used. Similarly on aarch64, mask operations like blend work bit by bit, repeating the least significant bit across the whole lane involves shifting it into the sign position and then comparing against zero. By shifting before truncating to <N x i1>, we tell llvm that we only consider the most significant bit, removing the need for additional shift instructions in the assembly.
1 parent
6b1e5d9
commit 7252870
Showing
13 changed files
with
282 additions
and
170 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
65 changes: 65 additions & 0 deletions
65
tests/codegen/simd-intrinsic/simd-intrinsic-mask-reduce.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
//@ compile-flags: -C no-prepopulate-passes | ||
// | ||
|
||
#![crate_type = "lib"] | ||
#![feature(repr_simd, intrinsics)] | ||
#![allow(non_camel_case_types)] | ||
|
||
#[repr(simd)] | ||
#[derive(Copy, Clone)] | ||
pub struct mask32x2(i32, i32); | ||
|
||
#[repr(simd)] | ||
#[derive(Copy, Clone)] | ||
pub struct mask8x16(i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); | ||
|
||
extern "rust-intrinsic" { | ||
fn simd_reduce_all<T>(x: T) -> bool; | ||
fn simd_reduce_any<T>(x: T) -> bool; | ||
} | ||
|
||
// NOTE(eddyb) `%{{x|1}}` is used because on some targets (e.g. WASM) | ||
// SIMD vectors are passed directly, resulting in `%x` being a vector, | ||
// while on others they're passed indirectly, resulting in `%x` being | ||
// a pointer to a vector, and `%1` a vector loaded from that pointer. | ||
// This is controlled by the target spec option `simd_types_indirect`. | ||
|
||
// CHECK-LABEL: @reduce_any_32x2 | ||
#[no_mangle] | ||
pub unsafe fn reduce_any_32x2(x: mask32x2) -> bool { | ||
// CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|1}}, <i32 31, i32 31> | ||
// CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1> | ||
// CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[B]]) | ||
// CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8 | ||
simd_reduce_any(x) | ||
} | ||
|
||
// CHECK-LABEL: @reduce_all_32x2 | ||
#[no_mangle] | ||
pub unsafe fn reduce_all_32x2(x: mask32x2) -> bool { | ||
// CHECK: [[A:%[0-9]+]] = lshr <2 x i32> %{{x|1}}, <i32 31, i32 31> | ||
// CHECK: [[B:%[0-9]+]] = trunc <2 x i32> [[A]] to <2 x i1> | ||
// CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> [[B]]) | ||
// CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8 | ||
simd_reduce_all(x) | ||
} | ||
|
||
// CHECK-LABEL: @reduce_any_8x16 | ||
#[no_mangle] | ||
pub unsafe fn reduce_any_8x16(x: mask8x16) -> bool { | ||
// CHECK: [[A:%[0-9]+]] = lshr <16 x i8> %{{x|1}}, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> | ||
// CHECK: [[B:%[0-9]+]] = trunc <16 x i8> [[A]] to <16 x i1> | ||
// CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[B]]) | ||
// CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8 | ||
simd_reduce_any(x) | ||
} | ||
|
||
// CHECK-LABEL: @reduce_all_8x16 | ||
#[no_mangle] | ||
pub unsafe fn reduce_all_8x16(x: mask8x16) -> bool { | ||
// CHECK: [[A:%[0-9]+]] = lshr <16 x i8> %{{x|1}}, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> | ||
// CHECK: [[B:%[0-9]+]] = trunc <16 x i8> [[A]] to <16 x i1> | ||
// CHECK: [[C:%[0-9]+]] = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> [[B]]) | ||
// CHECK: %{{[0-9]+}} = zext i1 [[C]] to i8 | ||
simd_reduce_all(x) | ||
} |