Skip to content

Commit

Permalink
aarch64 neon intrinsics: vmaxq_f32, vminq_f32, vaddvq_f32, vrndnq_f32 (
Browse files Browse the repository at this point in the history
  • Loading branch information
tjamaan authored Sep 19, 2024
1 parent 5349365 commit 753271c
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 0 deletions.
43 changes: 43 additions & 0 deletions example/neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,44 @@ unsafe fn test_vqadd_u8() {
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
unsafe fn test_vmaxq_f32() {
// AArch64 llvm intrinsic: llvm.aarch64.neon.fmax.v4f32
let a = f32x4::from([0., -1., 2., -3.]);
let b = f32x4::from([-4., 5., -6., 7.]);
let e = f32x4::from([0., 5., 2., 7.]);
let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
unsafe fn test_vminq_f32() {
// AArch64 llvm intrinsic: llvm.aarch64.neon.fmin.v4f32
let a = f32x4::from([0., -1., 2., -3.]);
let b = f32x4::from([-4., 5., -6., 7.]);
let e = f32x4::from([-4., -1., -6., -3.]);
let r: f32x4 = transmute(vminq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
unsafe fn test_vaddvq_f32() {
// AArch64 llvm intrinsic: llvm.aarch64.neon.faddv.f32.v4f32
let a = f32x4::from([0., 1., 2., 3.]);
let e = 6f32;
let r = vaddvq_f32(transmute(a));
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
unsafe fn test_vrndnq_f32() {
// AArch64 llvm intrinsic: llvm.aarch64.neon.frintn.v4f32
let a = f32x4::from([0.1, -1.9, 4.5, 5.5]);
let e = f32x4::from([0., -2., 4., 6.]);
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
assert_eq!(r, e);
}

#[cfg(target_arch = "aarch64")]
fn main() {
unsafe {
Expand Down Expand Up @@ -229,6 +267,11 @@ fn main() {

test_vqsub_u8();
test_vqadd_u8();

test_vmaxq_f32();
test_vminq_f32();
test_vaddvq_f32();
test_vrndnq_f32();
}
}

Expand Down
38 changes: 38 additions & 0 deletions src/intrinsics/llvm_aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,44 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
);
}

_ if intrinsic.starts_with("llvm.aarch64.neon.fmax.v") => {
intrinsic_args!(fx, args => (x, y); intrinsic);

simd_pair_for_each_lane(
fx,
x,
y,
ret,
&|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmax(x_lane, y_lane),
);
}

_ if intrinsic.starts_with("llvm.aarch64.neon.fmin.v") => {
intrinsic_args!(fx, args => (x, y); intrinsic);

simd_pair_for_each_lane(
fx,
x,
y,
ret,
&|fx, _lane_ty, _res_lane_ty, x_lane, y_lane| fx.bcx.ins().fmin(x_lane, y_lane),
);
}

_ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => {
intrinsic_args!(fx, args => (v); intrinsic);

simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().fadd(a, b));
}

_ if intrinsic.starts_with("llvm.aarch64.neon.frintn.v") => {
intrinsic_args!(fx, args => (v); intrinsic);

simd_for_each_lane(fx, v, ret, &|fx, _lane_ty, _res_lane_ty, lane| {
fx.bcx.ins().nearest(lane)
});
}

_ if intrinsic.starts_with("llvm.aarch64.neon.smaxv.i") => {
intrinsic_args!(fx, args => (v); intrinsic);

Expand Down

0 comments on commit 753271c

Please sign in to comment.