From 22c3ba4bb519e12395c676ffe436ea4b8400234a Mon Sep 17 00:00:00 2001 From: lizhijin Date: Sun, 19 Mar 2023 13:43:01 +0800 Subject: [PATCH] [SVE] Add patterns for shift intrinsics with FalseLanesZero mode This patch adds patterns to reduce redundant mov and sel instructions for shift intrinsics with FalseLanesZero mode, when FeatureExperimentalZeroingPseudosis supported. For example, before: mov z1.b, #0 sel z0.b, p0, z0.b, z1.b asr z0.b, p0/m, z0.b, #7 After: movprfx z0.b, p0/z, z0.b asr z0.b, p0/m, z0.b, #7 Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D145551 --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 4 + llvm/lib/Target/AArch64/SVEInstrFormats.td | 20 ++ .../sve-intrinsics-int-arith-imm-zero.ll | 176 ++++++++++++++++++ 3 files changed, 200 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm-zero.ll diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 278f74ef341f88..858b352c8c72e7 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2060,6 +2060,10 @@ let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; + + defm ASR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd; + defm LSR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd; + defm LSL_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd; } // End HasSVEorSME, UseExperimentalZeroingPseudos let Predicates = [HasSVEorSME] in { diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 4c97ae88f192a6..1d3bf9150ca412 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -571,6 +571,12 @@ class SVE_Shift_DupImm_Any_Predicate_Pat; +class SVE_2_Op_Imm_Pat_Zero +: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Op1, (SVEDup0)), + (vt (splat_vector (it (cpx i32:$imm)))))), + (inst $Pg, $Op1, i32:$imm)>; + class SVE_2_Op_Fp_Imm_Pat { def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; } +multiclass sve_int_bin_pred_imm_zeroing_bhsd { + def _ZERO_B : PredTwoOpImmPseudo, FalseLanesZero>; + def _ZERO_H : PredTwoOpImmPseudo, FalseLanesZero>; + def _ZERO_S : PredTwoOpImmPseudo, FalseLanesZero>; + def _ZERO_D : PredTwoOpImmPseudo, FalseLanesZero>; + + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_B)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_H)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_S)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_D)>; +} + multiclass sve_int_bin_pred_shift_wide opc, string asm, SDPatternOperator op> { def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm-zero.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm-zero.ll new file mode 100644 index 00000000000000..6593978b03d41e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm-zero.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s + +;; ASR +define @asr_i8( %a, %pg) { +; CHECK-LABEL: asr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i8 8, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %vsel, %shuffle) + ret %res +} + +define @asr_i16( %a, %pg) { +; CHECK-LABEL: asr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i16 16, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv8i16( %pg, %vsel, %shuffle) + ret %res +} + +define @asr_i32( %a, %pg) local_unnamed_addr #0 { +; CHECK-LABEL: asr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i32 32, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv4i32( %pg, %vsel, %shuffle) + ret %res +} + +define @asr_i64( %a, %pg) { +; CHECK-LABEL: asr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i64 64, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv2i64( %pg, %vsel, %shuffle) + ret %res +} + +;; LSL +define @lsl_i8( %a, %pg) { +; CHECK-LABEL: lsl_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i8 7, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, %vsel, %shuffle) + ret %res +} + +define @lsl_i16( %a, %pg) { +; CHECK-LABEL: lsl_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i16 15, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, %vsel, %shuffle) + ret %res +} + +define @lsl_i32( %a, %pg) local_unnamed_addr #0 { +; CHECK-LABEL: lsl_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i32 31, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, %vsel, %shuffle) + ret %res +} + +define @lsl_i64( %a, %pg) { +; CHECK-LABEL: lsl_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i64 63, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, %vsel, %shuffle) + ret %res +} + +;; LSR +define @lsr_i8( %a, %pg) { +; CHECK-LABEL: lsr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i8 8, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, %vsel, %shuffle) + ret %res +} + +define @lsr_i16( %a, %pg) { +; CHECK-LABEL: lsr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i16 16, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, %vsel, %shuffle) + ret %res +} + +define @lsr_i32( %a, %pg) local_unnamed_addr #0 { +; CHECK-LABEL: lsr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i32 32, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, %vsel, %shuffle) + ret %res +} + +define @lsr_i64( %a, %pg) { +; CHECK-LABEL: lsr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i64 64, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, %vsel, %shuffle) + ret %res +} + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.nxv2i64(, , )