-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Split TuneShiftedZExtFusion #76032
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Wang Pengcheng (wangpc-pp) ChangesWe split The zexth/zextw fusions can be reused by XiangShan[2] and others Besides,
References: Full diff: https://github.com/llvm/llvm-project/pull/76032.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 2095446c694bde..a66dd135ae5f89 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -977,9 +977,19 @@ def TuneLUIADDIFusion
def TuneAUIPCADDIFusion
: SubtargetFeature<"auipc-addi-fusion", "HasAUIPCADDIFusion",
"true", "Enable AUIPC+ADDI macrofusion">;
-def TuneShiftedZExtFusion
- : SubtargetFeature<"shifted-zext-fusion", "HasShiftedZExtFusion",
- "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension">;
+
+def TuneZExtHFusion
+ : SubtargetFeature<"zexth-fusion", "HasZExtHFusion",
+ "true", "Enable SLLI+SRLI to be fused to zero extension of halfword">;
+
+def TuneZExtWFusion
+ : SubtargetFeature<"zextw-fusion", "HasZExtWFusion",
+ "true", "Enable SLLI+SRLI to be fused to zero extension of word">;
+
+def TuneShiftedZExtWFusion
+ : SubtargetFeature<"shifted-zextw-fusion", "HasShiftedZExtWFusion",
+ "true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension of word">;
+
def TuneLDADDFusion
: SubtargetFeature<"ld-add-fusion", "HasLDADDFusion",
"true", "Enable LD+ADD macrofusion.">;
@@ -1001,12 +1011,8 @@ def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7",
[TuneNoDefaultUnroll,
TuneShortForwardBranchOpt]>;
-def TuneVeyronFusions : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
- "Ventana Veyron-Series processors",
- [TuneLUIADDIFusion,
- TuneAUIPCADDIFusion,
- TuneShiftedZExtFusion,
- TuneLDADDFusion]>;
+def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron",
+ "Ventana Veyron-Series processors">;
// Assume that lock-free native-width atomics are available, even if the target
// and operating system combination would not usually provide them. The user
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index 02ea5270823d8d..f948f05b22f772 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -58,18 +58,66 @@ static bool isLDADD(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
}
-// Fuse these patterns:
-//
-// slli rd, rs1, 32
-// srli rd, rd, x
-// where 0 <= x <= 32
-//
-// and
-//
+// Fuse zero extension of halfword:
// slli rd, rs1, 48
+// srli rd, rd, 48
+static bool isZExtH(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::SRLI)
+ return false;
+
+ if (!SecondMI.getOperand(2).isImm())
+ return false;
+
+ if (SecondMI.getOperand(2).getImm() != 48)
+ return false;
+
+ // Given SecondMI, when FirstMI is unspecified, we must return
+ // if SecondMI may be part of a fused pair at all.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::SLLI)
+ return false;
+
+ if (FirstMI->getOperand(2).getImm() != 48)
+ return false;
+
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse zero extension of word:
+// slli rd, rs1, 32
+// srli rd, rd, 32
+static bool isZExtW(const MachineInstr *FirstMI, const MachineInstr &SecondMI) {
+ if (SecondMI.getOpcode() != RISCV::SRLI)
+ return false;
+
+ if (!SecondMI.getOperand(2).isImm())
+ return false;
+
+ if (SecondMI.getOperand(2).getImm() != 32)
+ return false;
+
+ // Given SecondMI, when FirstMI is unspecified, we must return
+ // if SecondMI may be part of a fused pair at all.
+ if (!FirstMI)
+ return true;
+
+ if (FirstMI->getOpcode() != RISCV::SLLI)
+ return false;
+
+ if (FirstMI->getOperand(2).getImm() != 32)
+ return false;
+
+ return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
+}
+
+// Fuse shifted zero extension of word:
+// slli rd, rs1, 32
// srli rd, rd, x
-static bool isShiftedZExt(const MachineInstr *FirstMI,
- const MachineInstr &SecondMI) {
+// where 0 <= x < 32
+static bool isShiftedZExtW(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
if (SecondMI.getOpcode() != RISCV::SRLI)
return false;
@@ -77,8 +125,7 @@ static bool isShiftedZExt(const MachineInstr *FirstMI,
return false;
unsigned SRLIImm = SecondMI.getOperand(2).getImm();
- bool IsShiftBy48 = SRLIImm == 48;
- if (SRLIImm > 32 && !IsShiftBy48)
+ if (SRLIImm >= 32)
return false;
// Given SecondMI, when FirstMI is unspecified, we must return
@@ -89,8 +136,7 @@ static bool isShiftedZExt(const MachineInstr *FirstMI,
if (FirstMI->getOpcode() != RISCV::SLLI)
return false;
- unsigned SLLIImm = FirstMI->getOperand(2).getImm();
- if (IsShiftBy48 ? (SLLIImm != 48) : (SLLIImm != 32))
+ if (FirstMI->getOperand(2).getImm() != 32)
return false;
return checkRegisters(FirstMI->getOperand(0).getReg(), SecondMI);
@@ -144,7 +190,13 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
if (ST.hasAUIPCADDIFusion() && isAUIPCADDI(FirstMI, SecondMI))
return true;
- if (ST.hasShiftedZExtFusion() && isShiftedZExt(FirstMI, SecondMI))
+ if (ST.hasZExtHFusion() && isZExtH(FirstMI, SecondMI))
+ return true;
+
+ if (ST.hasZExtWFusion() && isZExtW(FirstMI, SecondMI))
+ return true;
+
+ if (ST.hasShiftedZExtWFusion() && isShiftedZExtW(FirstMI, SecondMI))
return true;
if (ST.hasLDADDFusion() && isLDADD(FirstMI, SecondMI))
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 58989fd716fa0e..0fc831d261c7eb 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -254,7 +254,13 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
FeatureStdExtZicbop,
FeatureStdExtZicboz,
FeatureVendorXVentanaCondOps],
- [TuneVeyronFusions]>;
+ [TuneVentanaVeyron,
+ TuneLUIADDIFusion,
+ TuneAUIPCADDIFusion,
+ TuneZExtHFusion,
+ TuneZExtWFusion,
+ TuneShiftedZExtWFusion,
+ TuneLDADDFusion]>;
def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu",
NoSchedModel,
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 7540218633bfcb..26320b05d9be29 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -190,8 +190,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
}
bool hasMacroFusion() const {
- return hasLUIADDIFusion() || hasAUIPCADDIFusion() ||
- hasShiftedZExtFusion() || hasLDADDFusion();
+ return hasLUIADDIFusion() || hasAUIPCADDIFusion() || hasZExtHFusion() ||
+ hasZExtWFusion() || hasShiftedZExtWFusion() || hasLDADDFusion();
}
// Vector codegen related methods.
diff --git a/llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir
similarity index 94%
rename from llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir
rename to llvm/test/CodeGen/RISCV/macro-fusions.mir
index 6d1e92e997b324..dfcd53e36d66dc 100644
--- a/llvm/test/CodeGen/RISCV/macro-fusions-veyron-v1.mir
+++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir
@@ -1,7 +1,7 @@
# REQUIRES: asserts
-# RUN: llc -mtriple=riscv64-linux-gnu -mcpu=veyron-v1 -x=mir < %s \
+# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \
# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \
-# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+shifted-zext-fusion,+ld-add-fusion \
+# RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \
# RUN: | FileCheck %s
# CHECK: lui_addi:%bb.0
|
def TuneShiftedZExtFusion | ||
: SubtargetFeature<"shifted-zext-fusion", "HasShiftedZExtFusion", | ||
"true", "Enable SLLI+SRLI to be fused when computing (shifted) zero extension">; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think "word" is the right word here ) Machine word on riscv64 is 64 bits. How about ZExt16
and ZExt32
instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or even if "word" is the right terminology here, I'd still prefer numeric suffixes because they are more explicit
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
zextw and zexth are consistent with the names of the instructions that were added in Zba and Zbb to replace these shift patterns. But I don't have a strong preference.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I stand for names zexth
and zextw
, as @topperc said, they are the names for equivalent existed instructions. I think people may be much more familiar with these. WDYT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, sounds good to me. I didn't know about these.
@@ -1,7 +1,7 @@ | |||
# REQUIRES: asserts |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's rename the slli_srli_...
test to zext16...
and zext32
and shifted_zext32_...
to make it more consistent with other tests. I should have done this myself, but I've missed that :-<
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be nice to add tests that exercise logic from checkRegisters
, but we can leave it to a different PR
LGTM |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor comments only
TuneAUIPCADDIFusion, | ||
TuneShiftedZExtFusion, | ||
TuneLDADDFusion]>; | ||
def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "VentanaVeyron", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please land this rename and split as a separate change, then rebase this on top of it. These are conceptually different changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Committed as 90f816e.
// slli rd, rs1, 48 | ||
// srli rd, rd, 48 | ||
static bool isZExtH(const MachineInstr *FirstMI, const MachineInstr &SecondMI) { | ||
if (SecondMI.getOpcode() != RISCV::SRLI) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor, but a utility which matches a SRLI and a separate one which matches a SRLI and returns the immediate might reduce some code duplication through this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can I just leave it just like this? This whole file will be removed in #72224.
We split `TuneShiftedZExtFusion` into three fusions to make them reusable and match the GCC implementation[1]. The zexth/zextw fusions can be reused by XiangShan[2] and others commercial processors, but shifted zero extension is not so common. Besides, `TuneVeyronFusions` is changed back to `TuneVentanaVeyron` and fusion features are added to processor definition. `macro-fusions-veyron-v1.mir` is renamed so it's not relevant to specific processor. References: [1] https://gcc.gnu.org/pipermail/gcc-patches/2023-November/637303.html [2] https://xiangshan-doc.readthedocs.io/zh_CN/latest/frontend/decode
ea2500f
to
65473de
Compare
Sorry for buildbot failures, I should have fixed it in 59eebb4. |
We split
TuneShiftedZExtFusion
into three fusions to make themreusable and match the GCC implementation[1].
The zexth/zextw fusions can be reused by XiangShan[2] and other
commercial processors, but shifted zero extension is not so common.
macro-fusions-veyron-v1.mir
is renamed so it's not relevant tospecific processor.
References:
[1] https://gcc.gnu.org/pipermail/gcc-patches/2023-November/637303.html
[2] https://xiangshan-doc.readthedocs.io/zh_CN/latest/frontend/decode