From 67d72a18e580736ac6162b203ea22500f4c06cdc Mon Sep 17 00:00:00 2001 From: Tuan Chuong Goh Date: Tue, 16 Jul 2024 08:56:18 +0000 Subject: [PATCH] [AArch64][GlobalISel] Reland Make G_DUP immediate 32-bits or larger (#96780) Immediate operand gets extended in RegBankSelect to at least 32 bits to allow for better pattern matching in TableGen The previous patch was erasing a constant without checking if it has more than one use Changes: - Does not erase the constant - Added @v_dup16_const test --- .../GISel/AArch64InstructionSelector.cpp | 8 +- .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 35 ++++++++- .../AArch64/GlobalISel/regbank-dup.mir | 75 +++++++++++-------- .../CodeGen/AArch64/GlobalISel/select-dup.mir | 19 +++++ llvm/test/CodeGen/AArch64/arm64-dup.ll | 67 ++++++++++++----- llvm/test/CodeGen/AArch64/neon-mov.ll | 73 ++++-------------- 6 files changed, 160 insertions(+), 117 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 24d65624e09e9a9..bf7c773708071ee 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2279,8 +2279,9 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { Register Dst = I.getOperand(0).getReg(); auto *CV = ConstantDataVector::getSplat( MRI.getType(Dst).getNumElements(), - ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()), - ValAndVReg->Value)); + ConstantInt::get( + Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()), + ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits()))); if (!emitConstantVector(Dst, CV, MIB, MRI)) return false; I.eraseFromParent(); @@ -5559,7 +5560,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, } if (CV->getSplatValue()) { - APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger()); + APInt DefBits = APInt::getSplat( + DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits())); auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * { MachineInstr *NewOp; bool Inv = false; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 5616d063f70bcc0..220ddbf10a1c235 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -42,6 +42,7 @@ #include "AArch64GenRegisterBankInfo.def" using namespace llvm; +static const unsigned CustomMappingID = 1; AArch64RegisterBankInfo::AArch64RegisterBankInfo( const TargetRegisterInfo &TRI) { @@ -424,6 +425,26 @@ void AArch64RegisterBankInfo::applyMappingImpl( MI.getOperand(2).setReg(Ext.getReg(0)); return applyDefaultMapping(OpdMapper); } + case AArch64::G_DUP: { + // Extend smaller gpr to 32-bits + assert(MRI.getType(MI.getOperand(1).getReg()).getSizeInBits() < 32 && + "Expected sources smaller than 32-bits"); + Builder.setInsertPt(*MI.getParent(), MI.getIterator()); + + Register ConstReg; + auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg()); + if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) { + auto CstVal = ConstMI->getOperand(1).getCImm()->getValue(); + ConstReg = + Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0); + } else { + ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg()) + .getReg(0); + } + MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID)); + MI.getOperand(1).setReg(ConstReg); + return applyDefaultMapping(OpdMapper); + } default: llvm_unreachable("Don't know how to handle that operation"); } @@ -792,8 +813,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { (getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank || onlyDefinesFP(*ScalarDef, MRI, TRI))) OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; - else + else { + if (ScalarTy.getSizeInBits() < 32 && + getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) { + // Calls applyMappingImpl() + MappingID = CustomMappingID; + } OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR}; + } break; } case TargetOpcode::G_TRUNC: { @@ -1014,8 +1041,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // If the type is i8/i16, and the regank will be GPR, then we change the // type to i32 in applyMappingImpl. LLT Ty = MRI.getType(MI.getOperand(2).getReg()); - if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) - MappingID = 1; + if (Ty.getSizeInBits() == 8 || Ty.getSizeInBits() == 16) { + // Calls applyMappingImpl() + MappingID = CustomMappingID; + } OpRegBankIdx[2] = PMI_FirstGPR; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir index 4cd6eef531ce085..66c8c2efda9bc14 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir @@ -16,10 +16,11 @@ body: | ; CHECK-LABEL: name: v4s32_gpr ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) - ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 %4:_(<4 x s32>) = G_DUP %0(s32) $q0 = COPY %4(<4 x s32>) @@ -37,10 +38,11 @@ body: | ; CHECK-LABEL: name: v4s64_gpr ; CHECK: liveins: $x0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) - ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $x0 %4:_(<2 x s64>) = G_DUP %0(s64) $q0 = COPY %4(<2 x s64>) @@ -58,10 +60,11 @@ body: | ; CHECK-LABEL: name: v2s32_gpr ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) - ; CHECK: $d0 = COPY [[DUP]](<2 x s32>) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) + ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s32) = COPY $w0 %4:_(<2 x s32>) = G_DUP %0(s32) $d0 = COPY %4(<2 x s32>) @@ -79,10 +82,11 @@ body: | ; CHECK-LABEL: name: v4s32_fpr ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) - ; CHECK: $q0 = COPY [[DUP]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $s0 %4:_(<4 x s32>) = G_DUP %0(s32) $q0 = COPY %4(<4 x s32>) @@ -100,10 +104,11 @@ body: | ; CHECK-LABEL: name: v2s64_fpr ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) - ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $d0 %4:_(<2 x s64>) = G_DUP %0(s64) $q0 = COPY %4(<2 x s64>) @@ -121,10 +126,11 @@ body: | ; CHECK-LABEL: name: v2s32_fpr ; CHECK: liveins: $s0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) - ; CHECK: $d0 = COPY [[DUP]](<2 x s32>) - ; CHECK: RET_ReallyLR implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32) + ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s32) = COPY $s0 %4:_(<2 x s32>) = G_DUP %0(s32) $d0 = COPY %4(<2 x s32>) @@ -142,10 +148,11 @@ body: | ; CHECK-LABEL: name: v2s64_fpr_copy ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 - ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) - ; CHECK: $q0 = COPY [[DUP]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0 + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s64) = COPY $d0 %6:_(<2 x s64>) = G_DUP %0(s64) $q0 = COPY %6(<2 x s64>) @@ -163,11 +170,13 @@ body: | ; CHECK-LABEL: name: v416s8_gpr ; CHECK: liveins: $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32) - ; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8) - ; CHECK: $q0 = COPY [[DUP]](<16 x s8>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8) + ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32) + ; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(s32) = COPY $w0 %trunc:_(s8) = G_TRUNC %0(s32) %1:_(<16 x s8>) = G_DUP %trunc(s8) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir index 809bdceb4aa25e1..cf2bab78fe5a632 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-dup.mir @@ -453,3 +453,22 @@ body: | %dup:fpr(<2 x p0>) = G_DUP %cst(p0) $q0 = COPY %dup(<2 x p0>) RET_ReallyLR implicit $q0 +... +--- +name: cstv4i16gpri32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: + ; CHECK-LABEL: name: cstv4i16gpri32 + ; CHECK: %dup:fpr64 = MOVIv4i16 3, 0 + ; CHECK-NEXT: $d0 = COPY %dup + ; CHECK-NEXT: RET_ReallyLR implicit $d0 + %cst:gpr(s32) = G_CONSTANT i32 3 + %dup:fpr(<4 x s16>) = G_DUP %cst(s32) + $d0 = COPY %dup(<4 x s16>) + RET_ReallyLR implicit $d0 + +... diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll index 2bf5419e54830bd..0291f8c91230474 100644 --- a/llvm/test/CodeGen/AArch64/arm64-dup.ll +++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll @@ -103,6 +103,19 @@ define <4 x i32> @v_dupQ32(i32 %A) nounwind { ret <4 x i32> %tmp4 } +define <4 x i16> @v_dup16_const(i16 %y, ptr %p) { +; CHECK-LABEL: v_dup16_const: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.4h v0, #10 +; CHECK-NEXT: mov w8, #10 // =0xa +; CHECK-NEXT: strh w8, [x1] +; CHECK-NEXT: ret + %i = insertelement <4 x i16> undef, i16 10, i32 0 + %lo = shufflevector <4 x i16> %i, <4 x i16> undef, <4 x i32> zeroinitializer + store i16 10, ptr %p + ret <4 x i16> %lo +} + define <4 x float> @v_dupQfloat(float %A) nounwind { ; CHECK-LABEL: v_dupQfloat: ; CHECK: // %bb.0: @@ -420,9 +433,9 @@ define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) n ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-NEXT: adrp x8, .LCPI34_0 ; CHECK-GI-NEXT: mov.d v0[1], v1[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI33_0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0] ; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -443,9 +456,9 @@ define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NEXT: adrp x8, .LCPI35_0 ; CHECK-GI-NEXT: mov.d v0[1], v1[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0] +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI35_0] ; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -462,9 +475,9 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n ; ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NEXT: adrp x8, .LCPI36_0 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret @@ -481,9 +494,9 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float> ; ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI36_0 +; CHECK-GI-NEXT: adrp x8, .LCPI37_0 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0] ; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret @@ -503,12 +516,12 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) { ; ; CHECK-GI-LABEL: disguised_dup: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI37_1 +; CHECK-GI-NEXT: adrp x8, .LCPI38_1 ; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1] -; CHECK-GI-NEXT: adrp x8, .LCPI37_0 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_1] +; CHECK-GI-NEXT: adrp x8, .LCPI38_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI38_0] ; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2 ; CHECK-GI-NEXT: str q0, [x0] ; CHECK-GI-NEXT: str q2, [x1] @@ -531,8 +544,8 @@ define <2 x i32> @dup_const2(<2 x i32> %A) nounwind { ; ; CHECK-GI-LABEL: dup_const2: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI38_0 -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI38_0] +; CHECK-GI-NEXT: adrp x8, .LCPI39_0 +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI39_0] ; CHECK-GI-NEXT: add.2s v0, v0, v1 ; CHECK-GI-NEXT: ret %tmp2 = add <2 x i32> %A, @@ -550,8 +563,8 @@ define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind { ; ; CHECK-GI-LABEL: dup_const4_ext: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI39_0 -; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI39_0] +; CHECK-GI-NEXT: adrp x8, .LCPI40_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI40_0] ; CHECK-GI-NEXT: add.4s v0, v0, v1 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-GI-NEXT: ret @@ -575,12 +588,12 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind ; ; CHECK-GI-LABEL: dup_const24: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI40_1 +; CHECK-GI-NEXT: adrp x8, .LCPI41_1 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI40_1] -; CHECK-GI-NEXT: adrp x8, .LCPI40_0 +; CHECK-GI-NEXT: ldr d3, [x8, :lo12:.LCPI41_1] +; CHECK-GI-NEXT: adrp x8, .LCPI41_0 ; CHECK-GI-NEXT: add.2s v0, v0, v3 -; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI40_0] +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI41_0] ; CHECK-GI-NEXT: mov.d v0[1], v1[0] ; CHECK-GI-NEXT: add.4s v1, v2, v3 ; CHECK-GI-NEXT: eor.16b v0, v1, v0 @@ -687,3 +700,17 @@ define <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) { ret <8 x i16> %r } +define <4 x i16> @dup_i16_v4i16_constant() { +; CHECK-SD-LABEL: dup_i16_v4i16_constant: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov w8, #9211 // =0x23fb +; CHECK-SD-NEXT: dup.4h v0, w8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: dup_i16_v4i16_constant: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI50_0 +; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI50_0] +; CHECK-GI-NEXT: ret + ret <4 x i16> +} diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll index 170ba7292ae608a..ff80ff097b28f73 100644 --- a/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -109,29 +109,11 @@ define <4 x i32> @movi4s_lsl16() { } define <4 x i32> @movi4s_fneg() { -; CHECK-NOFP16-SD-LABEL: movi4s_fneg: -; CHECK-NOFP16-SD: // %bb.0: -; CHECK-NOFP16-SD-NEXT: movi v0.4s, #240, lsl #8 -; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s -; CHECK-NOFP16-SD-NEXT: ret -; -; CHECK-FP16-SD-LABEL: movi4s_fneg: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: movi v0.4s, #240, lsl #8 -; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s -; CHECK-FP16-SD-NEXT: ret -; -; CHECK-NOFP16-GI-LABEL: movi4s_fneg: -; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8 -; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s -; CHECK-NOFP16-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: movi4s_fneg: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8 -; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s -; CHECK-FP16-GI-NEXT: ret +; CHECK-LABEL: movi4s_fneg: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.4s, #240, lsl #8 +; CHECK-NEXT: fneg v0.4s, v0.4s +; CHECK-NEXT: ret ret <4 x i32> } @@ -308,23 +290,17 @@ define <8 x i16> @mvni8h_neg() { ; CHECK-NOFP16-SD-NEXT: dup v0.8h, w8 ; CHECK-NOFP16-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: mvni8h_neg: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: movi v0.8h, #240 -; CHECK-FP16-SD-NEXT: fneg v0.8h, v0.8h -; CHECK-FP16-SD-NEXT: ret +; CHECK-FP16-LABEL: mvni8h_neg: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: movi v0.8h, #240 +; CHECK-FP16-NEXT: fneg v0.8h, v0.8h +; CHECK-FP16-NEXT: ret ; ; CHECK-NOFP16-GI-LABEL: mvni8h_neg: ; CHECK-NOFP16-GI: // %bb.0: ; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0 ; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0] ; CHECK-NOFP16-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: mvni8h_neg: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: movi v0.8h, #240 -; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h -; CHECK-FP16-GI-NEXT: ret ret <8 x i16> } @@ -494,29 +470,11 @@ define <2 x double> @fmov2d() { } define <2 x double> @fmov2d_neg0() { -; CHECK-NOFP16-SD-LABEL: fmov2d_neg0: -; CHECK-NOFP16-SD: // %bb.0: -; CHECK-NOFP16-SD-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NOFP16-SD-NEXT: fneg v0.2d, v0.2d -; CHECK-NOFP16-SD-NEXT: ret -; -; CHECK-FP16-SD-LABEL: fmov2d_neg0: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: movi v0.2d, #0000000000000000 -; CHECK-FP16-SD-NEXT: fneg v0.2d, v0.2d -; CHECK-FP16-SD-NEXT: ret -; -; CHECK-NOFP16-GI-LABEL: fmov2d_neg0: -; CHECK-NOFP16-GI: // %bb.0: -; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d -; CHECK-NOFP16-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: fmov2d_neg0: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000 -; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d -; CHECK-FP16-GI-NEXT: ret +; CHECK-LABEL: fmov2d_neg0: +; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: fneg v0.2d, v0.2d +; CHECK-NEXT: ret ret <2 x double> } @@ -581,5 +539,4 @@ define <2 x i32> @movi1d() { ret <2 x i32> %1 } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-FP16: {{.*}} ; CHECK-NOFP16: {{.*}}