-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][GlobalISel] Make G_DUP immediate 32-bits or larger #96780
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: None (chuongg3) ChangesG_DUP's immediate operand gets extended in RegBankSelect to allow for better pattern matching in TableGen Full diff: https://github.com/llvm/llvm-project/pull/96780.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 4a7c82b393c10..df342c8beef19 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -5536,7 +5536,8 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
}
if (CV->getSplatValue()) {
- APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
+ APInt DefBits = APInt::getSplat(
+ DstSize, CV->getUniqueInteger().trunc(DstTy.getScalarSizeInBits()));
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
MachineInstr *NewOp;
bool Inv = false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 4aa6999d1d3ca..6bbf7cc689abb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -420,6 +420,25 @@ void AArch64RegisterBankInfo::applyMappingImpl(
MI.getOperand(2).setReg(Ext.getReg(0));
return applyDefaultMapping(OpdMapper);
}
+ case AArch64::G_DUP: {
+ // Extend smaller gpr to 32-bits
+ Builder.setInsertPt(*MI.getParent(), MI.getIterator());
+
+ Register ConstReg;
+ auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ auto CstVal = ConstMI->getOperand(1).getCImm()->getValue();
+ ConstReg =
+ Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0);
+ ConstMI->eraseFromParent();
+ } else {
+ ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg())
+ .getReg(0);
+ }
+ MRI.setRegBank(ConstReg, getRegBank(AArch64::GPRRegBankID));
+ MI.getOperand(1).setReg(ConstReg);
+ return applyDefaultMapping(OpdMapper);
+ }
default:
llvm_unreachable("Don't know how to handle that operation");
}
@@ -774,8 +793,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
(getRegBank(ScalarReg, MRI, TRI) == &AArch64::FPRRegBank ||
onlyDefinesFP(*ScalarDef, MRI, TRI)))
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
- else
+ else {
+ if (ScalarTy.getSizeInBits() < 32 &&
+ getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank)
+ MappingID = 1;
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstGPR};
+ }
break;
}
case TargetOpcode::G_TRUNC: {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index c97a00ccdd455..2b9ef7acd4a4d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -18,8 +18,8 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) {
;
; GISEL-LABEL: combine_vec_udiv_uniform:
; GISEL: // %bb.0:
-; GISEL-NEXT: adrp x8, .LCPI0_0
-; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
+; GISEL-NEXT: mov w8, #25645 // =0x642d
+; GISEL-NEXT: dup v1.8h, w8
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
index 4cd6eef531ce0..66c8c2efda9bc 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-dup.mir
@@ -16,10 +16,11 @@ body: |
; CHECK-LABEL: name: v4s32_gpr
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
@@ -37,10 +38,11 @@ body: |
; CHECK-LABEL: name: v4s64_gpr
; CHECK: liveins: $x0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
- ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s64) = COPY $x0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $x0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
@@ -58,10 +60,11 @@ body: |
; CHECK-LABEL: name: v2s32_gpr
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $w0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
@@ -79,10 +82,11 @@ body: |
; CHECK-LABEL: name: v4s32_fpr
; CHECK: liveins: $s0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $q0 = COPY [[DUP]](<4 x s32>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<4 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $s0
%4:_(<4 x s32>) = G_DUP %0(s32)
$q0 = COPY %4(<4 x s32>)
@@ -100,10 +104,11 @@ body: |
; CHECK-LABEL: name: v2s64_fpr
; CHECK: liveins: $d0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
- ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%4:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %4(<2 x s64>)
@@ -121,10 +126,11 @@ body: |
; CHECK-LABEL: name: v2s32_fpr
; CHECK: liveins: $s0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
- ; CHECK: $d0 = COPY [[DUP]](<2 x s32>)
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s32) = COPY $s0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s32>) = G_DUP [[COPY]](s32)
+ ; CHECK-NEXT: $d0 = COPY [[DUP]](<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(s32) = COPY $s0
%4:_(<2 x s32>) = G_DUP %0(s32)
$d0 = COPY %4(<2 x s32>)
@@ -142,10 +148,11 @@ body: |
; CHECK-LABEL: name: v2s64_fpr_copy
; CHECK: liveins: $d0
- ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
- ; CHECK: $q0 = COPY [[DUP]](<2 x s64>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY $d0
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<2 x s64>) = G_DUP [[COPY]](s64)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s64) = COPY $d0
%6:_(<2 x s64>) = G_DUP %0(s64)
$q0 = COPY %6(<2 x s64>)
@@ -163,11 +170,13 @@ body: |
; CHECK-LABEL: name: v416s8_gpr
; CHECK: liveins: $w0
- ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
- ; CHECK: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP %trunc(s8)
- ; CHECK: $q0 = COPY [[DUP]](<16 x s8>)
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0
+ ; CHECK-NEXT: %trunc:gpr(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT %trunc(s8)
+ ; CHECK-NEXT: [[DUP:%[0-9]+]]:fpr(<16 x s8>) = G_DUP [[ANYEXT]](s32)
+ ; CHECK-NEXT: $q0 = COPY [[DUP]](<16 x s8>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%0:_(s32) = COPY $w0
%trunc:_(s8) = G_TRUNC %0(s32)
%1:_(<16 x s8>) = G_DUP %trunc(s8)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 307aa397eabbb..5aff8e0351487 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -994,9 +994,9 @@ define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
;
; CHECK-GI-LABEL: smull_noextvec_v8i8_v8i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NEXT: mov w8, #-999 // =0xfffffc19
; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT: dup v1.8h, w8
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
%tmp3 = sext <8 x i8> %arg to <8 x i16>
@@ -1088,29 +1088,13 @@ define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
; Do not use SMULL if the BUILD_VECTOR element values are too big.
-; CHECK-NEON-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK-NEON: // %bb.0:
-; CHECK-NEON-NEXT: mov w8, #999 // =0x3e7
-; CHECK-NEON-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEON-NEXT: dup v1.8h, w8
-; CHECK-NEON-NEXT: mul v0.8h, v0.8h, v1.8h
-; CHECK-NEON-NEXT: ret
-;
-; CHECK-SVE-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: mov w8, #999 // =0x3e7
-; CHECK-SVE-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-SVE-NEXT: dup v1.8h, w8
-; CHECK-SVE-NEXT: mul v0.8h, v0.8h, v1.8h
-; CHECK-SVE-NEXT: ret
-;
-; CHECK-GI-LABEL: umull_noextvec_v8i8_v8i16:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI38_0
-; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI38_0]
-; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: umull_noextvec_v8i8_v8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #999 // =0x3e7
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: dup v1.8h, w8
+; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: ret
%tmp3 = zext <8 x i8> %arg to <8 x i16>
%tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
ret <8 x i16> %tmp4
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index 170ba7292ae60..cb85bbda80a80 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -109,29 +109,11 @@ define <4 x i32> @movi4s_lsl16() {
}
define <4 x i32> @movi4s_fneg() {
-; CHECK-NOFP16-SD-LABEL: movi4s_fneg:
-; CHECK-NOFP16-SD: // %bb.0:
-; CHECK-NOFP16-SD-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-NOFP16-SD-NEXT: fneg v0.4s, v0.4s
-; CHECK-NOFP16-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: movi4s_fneg:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-FP16-SD-NEXT: fneg v0.4s, v0.4s
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-NOFP16-GI-LABEL: movi4s_fneg:
-; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s
-; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: movi4s_fneg:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8
-; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: movi4s_fneg:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.4s, #240, lsl #8
+; CHECK-NEXT: fneg v0.4s, v0.4s
+; CHECK-NEXT: ret
ret <4 x i32> <i32 2147545088, i32 2147545088, i32 2147545088, i32 2147545088>
}
@@ -308,23 +290,17 @@ define <8 x i16> @mvni8h_neg() {
; CHECK-NOFP16-SD-NEXT: dup v0.8h, w8
; CHECK-NOFP16-SD-NEXT: ret
;
-; CHECK-FP16-SD-LABEL: mvni8h_neg:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.8h, #240
-; CHECK-FP16-SD-NEXT: fneg v0.8h, v0.8h
-; CHECK-FP16-SD-NEXT: ret
+; CHECK-FP16-LABEL: mvni8h_neg:
+; CHECK-FP16: // %bb.0:
+; CHECK-FP16-NEXT: movi v0.8h, #240
+; CHECK-FP16-NEXT: fneg v0.8h, v0.8h
+; CHECK-FP16-NEXT: ret
;
; CHECK-NOFP16-GI-LABEL: mvni8h_neg:
; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI32_0
-; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
+; CHECK-NOFP16-GI-NEXT: mov w8, #-32528 // =0xffff80f0
+; CHECK-NOFP16-GI-NEXT: dup v0.8h, w8
; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: mvni8h_neg:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: movi v0.8h, #240
-; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h
-; CHECK-FP16-GI-NEXT: ret
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
}
@@ -494,29 +470,11 @@ define <2 x double> @fmov2d() {
}
define <2 x double> @fmov2d_neg0() {
-; CHECK-NOFP16-SD-LABEL: fmov2d_neg0:
-; CHECK-NOFP16-SD: // %bb.0:
-; CHECK-NOFP16-SD-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NOFP16-SD-NEXT: fneg v0.2d, v0.2d
-; CHECK-NOFP16-SD-NEXT: ret
-;
-; CHECK-FP16-SD-LABEL: fmov2d_neg0:
-; CHECK-FP16-SD: // %bb.0:
-; CHECK-FP16-SD-NEXT: movi v0.2d, #0000000000000000
-; CHECK-FP16-SD-NEXT: fneg v0.2d, v0.2d
-; CHECK-FP16-SD-NEXT: ret
-;
-; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
-; CHECK-NOFP16-GI: // %bb.0:
-; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d
-; CHECK-NOFP16-GI-NEXT: ret
-;
-; CHECK-FP16-GI-LABEL: fmov2d_neg0:
-; CHECK-FP16-GI: // %bb.0:
-; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000
-; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d
-; CHECK-FP16-GI-NEXT: ret
+; CHECK-LABEL: fmov2d_neg0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: fneg v0.2d, v0.2d
+; CHECK-NEXT: ret
ret <2 x double> <double -0.0, double -0.0>
}
@@ -581,5 +539,4 @@ define <2 x i32> @movi1d() {
ret <2 x i32> %1
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
; CHECK-NOFP16: {{.*}}
|
case AArch64::G_DUP: { | ||
// Extend smaller gpr to 32-bits | ||
Builder.setInsertPt(*MI.getParent(), MI.getIterator()); | ||
|
||
Register ConstReg; | ||
auto ConstMI = MRI.getVRegDef(MI.getOperand(1).getReg()); | ||
if (ConstMI->getOpcode() == TargetOpcode::G_CONSTANT) { | ||
auto CstVal = ConstMI->getOperand(1).getCImm()->getValue(); | ||
ConstReg = | ||
Builder.buildConstant(LLT::scalar(32), CstVal.sext(32)).getReg(0); | ||
ConstMI->eraseFromParent(); | ||
} else { | ||
ConstReg = Builder.buildAnyExt(LLT::scalar(32), MI.getOperand(1).getReg()) | ||
.getReg(0); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if it's an i64 G_CONSTANT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i32 and i64 constants are not changed, only constants that are smaller than 32-bits get extended
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might be good to add an assert that the type isn't larger than we expect
else { | ||
if (ScalarTy.getSizeInBits() < 32 && | ||
getRegBank(ScalarReg, MRI, TRI) == &AArch64::GPRRegBank) | ||
MappingID = 1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you make 1
here a const unsigned
with a name like CustomID
so it's clearer? Also add a comment here so the reader knows it causes applyMappingImpl
to be called.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with suggestions.
Immediate operand gets extended in RegBankSelect to allow for better pattern matching in TableGen
2563474
to
0e3f18e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…96780)" This reverts commit 5a5cd3f. Due to test suite failures on AArch64: https://lab.llvm.org/buildbot/#/builders/125/builds/541
This PR caused some of the test suite build to crash the compiler: https://lab.llvm.org/buildbot/#/builders/125/builds/541 I'll dig out a reproducer, but you can also compile using the files in llvm-test-suite. |
|
Thanks for the info @DavidSpickett. Sorry we didn't reply here. @chuong is looking into it! |
…lvm#96780)" This reverts commit 5a5cd3f. Due to test suite failures on AArch64: https://lab.llvm.org/buildbot/#/builders/125/builds/541
…lvm#96780) Immediate operand gets extended in RegBankSelect to at least 32 bits to allow for better pattern matching in TableGen The previous patch was erasing a constant without checking if it has more than one use Changes: - Does not erase the constant - Added @v_dup16_const test
…lvm#96780) Immediate operand gets extended in RegBankSelect to at least 32 bits to allow for better pattern matching in TableGen The previous patch was erasing a constant without checking if it has more than one use Changes: - Does not erase the constant - Added @v_dup16_const test
…lvm#96780) Immediate operand gets extended in RegBankSelect to at least 32 bits to allow for better pattern matching in TableGen The previous patch was erasing a constant without checking if it has more than one use Changes: - Does not erase the constant - Added @v_dup16_const test
…lvm#96780) Immediate operand gets extended in RegBankSelect to at least 32 bits to allow for better pattern matching in TableGen The previous patch was erasing a constant without checking if it has more than one use Changes: - Does not erase the constant - Added @v_dup16_const test
…96780) (#99014) Summary: Immediate operand gets extended in RegBankSelect to at least 32 bits to allow for better pattern matching in TableGen The previous patch was erasing a constant without checking if it has more than one use Changes: - Does not erase the constant - Added @v_dup16_const test Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250559
…llvm#96782) First commit's PR is llvm#96780 Combines the following instructions: `ushll r0, r0, #0` `shl r0, r0, llvm#3` Into: `ushll r0, r0, llvm#3`
…llvm#96782) First commit's PR is llvm#96780 Combines the following instructions: `ushll r0, r0, #0` `shl r0, r0, #3` Into: `ushll r0, r0, #3`
G_DUP's immediate operand gets extended in RegBankSelect to allow for better pattern matching in TableGen for #96782