-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][GlobalISel] Avoid running the shl(zext(a), C) -> zext(shl(a, C)) combine. #67045
Conversation
…, C)) combine. This combine moves shifts inwards which narrows them. For AArch64 however doing so prevents us from using our extended-register operands, since they only work with shifts. Gives some code size savings on -Os CTMark: Program size.__text before after diff SPASS/SPASS 410616.00 410616.00 0.0% kimwitu++/kc 453636.00 453636.00 0.0% tramp3d-v4/tramp3d-v4 393808.00 393808.00 0.0% mafft/pairlocalalign 244284.00 244280.00 -0.0% sqlite3/sqlite3 287832.00 287800.00 -0.0% Bullet/bullet 461144.00 461092.00 -0.0% consumer-typeset/consumer-typeset 412220.00 412164.00 -0.0% 7zip/7zip-benchmark 593512.00 593364.00 -0.0% ClamAV/clamscan 381964.00 381836.00 -0.0% lencod/lencod 428060.00 427836.00 -0.1% Geomean difference -0.0%
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel ChangesThis combine moves shifts inwards which narrows them. For AArch64 however doing so Gives some code size savings on -Os CTMark: Program size.__text Full diff: https://github.com/llvm/llvm-project/pull/67045.diff 4 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index bd802bd4b173a0b..477b31cb776e68a 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4129,6 +4129,12 @@ class TargetLowering : public TargetLoweringBase {
return true;
}
+ /// GlobalISel - return true if it's profitable to perform the combine:
+ /// shl ([sza]ext x), y => zext (shl x, y)
+ virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const {
+ return true;
+ }
+
// Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and
// optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of
// writing this) is:
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 2ce68950424095e..f79944e824575a1 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1719,6 +1719,8 @@ void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
RegisterImmPair &MatchData) {
assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
+ if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
+ return false;
Register LHS = MI.getOperand(1).getReg();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index e015f68dabc6977..bdde4b5e8e00f87 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -690,6 +690,10 @@ class AArch64TargetLowering : public TargetLowering {
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;
+ bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
+ return false;
+ }
+
/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/no-reduce-shl-of-ext.ll b/llvm/test/CodeGen/AArch64/GlobalISel/no-reduce-shl-of-ext.ll
new file mode 100644
index 000000000000000..ab009cb7cc0e305
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/no-reduce-shl-of-ext.ll
@@ -0,0 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc %s -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+%struct.mszip_stream = type { i32, i32, i8, i32, ptr, i32, i32, i32, i32, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, [288 x i8], [32 x i8], [1152 x i16], [128 x i16], [32768 x i8], ptr, ptr }
+
+define i16 @test(i32 %bit_buffer.6.lcssa, ptr %zip, ptr %.out) {
+; CHECK-LABEL: test:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: and w8, w0, #0x1ff
+; CHECK-NEXT: add x8, x1, w8, uxtw #1
+; CHECK-NEXT: ldrh w0, [x8, #412]
+; CHECK-NEXT: ret
+ %and274 = and i32 %bit_buffer.6.lcssa, 511
+ %idxprom275 = zext i32 %and274 to i64
+ %arrayidx276 = getelementptr inbounds %struct.mszip_stream, ptr %zip, i64 0, i32 19, i64 %idxprom275
+ %ld = load i16, ptr %arrayidx276, align 2
+ ret i16 %ld
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@@ -690,6 +690,10 @@ class AArch64TargetLowering : public TargetLowering { | |||
bool isDesirableToCommuteWithShift(const SDNode *N, | |||
CombineLevel Level) const override; | |||
|
|||
bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override { | |||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A quick comment on the "why" would be good. Could be just the second sentence from the commit message.
Local branch amd-gfx 9003dd0 Merged main:9b6b2a0cec53 into amd-gfx:80557850aa2c Remote branch main 985362e [AArch64][GlobalISel] Avoid running the shl(zext(a), C) -> zext(shl(a, C)) combine. (llvm#67045)
This combine moves shifts inwards which narrows them. For AArch64 however doing so
prevents us from using our extended-register operands, since they only work
with shifts.
Gives some code size savings on -Os CTMark: