Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Improve contant materialization to end with 'not' if the cons… #66950

Merged
merged 1 commit into from
Sep 20, 2023

Conversation

topperc
Copy link
Collaborator

@topperc topperc commented Sep 20, 2023

…tant has leading ones.

We can invert the value and treat it as if it had leading zeroes.

…tant has leading ones.

We can invert the value and treat it as if it had leading zeroes.
@llvmbot
Copy link
Member

llvmbot commented Sep 20, 2023

@llvm/pr-subscribers-backend-risc-v

Changes

…tant has leading ones.

We can invert the value and treat it as if it had leading zeroes.


Full diff: https://github.com/llvm/llvm-project/pull/66950.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp (+62-36)
  • (modified) llvm/test/CodeGen/RISCV/imm.ll (+61-79)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index f659779e9772055..6c5d0ad5b092708 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -171,6 +171,57 @@ static unsigned extractRotateInfo(int64_t Val) {
   return 0;
 }
 
+static void generateInstSeqLeadingZeros(int64_t Val,
+                                        const FeatureBitset &ActiveFeatures,
+                                        RISCVMatInt::InstSeq &Res) {
+  assert(Val > 0 && "Expected postive val");
+
+  unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
+  uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
+  // Fill in the bits that will be shifted out with 1s. An example where this
+  // helps is trailing one masks with 32 or more ones. This will generate
+  // ADDI -1 and an SRLI.
+  ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
+
+  RISCVMatInt::InstSeq TmpSeq;
+  generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+
+  // Keep the new sequence if it is an improvement or the original is empty.
+  if ((TmpSeq.size() + 1) < Res.size() ||
+      (Res.empty() && TmpSeq.size() < 8)) {
+    TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+    Res = TmpSeq;
+  }
+
+  // Some cases can benefit from filling the lower bits with zeros instead.
+  ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
+  TmpSeq.clear();
+  generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+
+  // Keep the new sequence if it is an improvement or the original is empty.
+  if ((TmpSeq.size() + 1) < Res.size() ||
+      (Res.empty() && TmpSeq.size() < 8)) {
+    TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+    Res = TmpSeq;
+  }
+
+  // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
+  // the end of the sequence.
+  if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
+    // Try replacing upper bits with 1.
+    uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
+    TmpSeq.clear();
+    generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
+
+    // Keep the new sequence if it is an improvement.
+    if ((TmpSeq.size() + 1) < Res.size() ||
+        (Res.empty() && TmpSeq.size() < 8)) {
+      TmpSeq.emplace_back(RISCV::ADD_UW, 0);
+      Res = TmpSeq;
+    }
+  }
+}
+
 namespace llvm::RISCVMatInt {
 InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
   RISCVMatInt::InstSeq Res;
@@ -210,47 +261,21 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
   // with no leading zeros and use a final SRLI to restore them.
   if (Val > 0) {
     assert(Res.size() > 2 && "Expected longer sequence");
-    unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
-    uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
-    // Fill in the bits that will be shifted out with 1s. An example where this
-    // helps is trailing one masks with 32 or more ones. This will generate
-    // ADDI -1 and an SRLI.
-    ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
+    generateInstSeqLeadingZeros(Val, ActiveFeatures, Res);
+  }
 
+  // If the constant is negative, trying inverting and using our trailing zero
+  // optimizations. Use an xori to invert the final value.
+  if (Val < 0 && Res.size() > 3) {
+    uint64_t InvertedVal = ~(uint64_t)Val;
     RISCVMatInt::InstSeq TmpSeq;
-    generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
+    generateInstSeqLeadingZeros(InvertedVal, ActiveFeatures, TmpSeq);
 
-    // Keep the new sequence if it is an improvement.
-    if ((TmpSeq.size() + 1) < Res.size()) {
-      TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
-      Res = TmpSeq;
-    }
-
-    // Some cases can benefit from filling the lower bits with zeros instead.
-    ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
-    TmpSeq.clear();
-    generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
-
-    // Keep the new sequence if it is an improvement.
-    if ((TmpSeq.size() + 1) < Res.size()) {
-      TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
+    // Keep it if we found a sequence that is smaller after inverting.
+    if (!TmpSeq.empty() && (TmpSeq.size() + 1) < Res.size()) {
+      TmpSeq.emplace_back(RISCV::XORI, -1);
       Res = TmpSeq;
     }
-
-    // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
-    // the end of the sequence.
-    if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
-      // Try replacing upper bits with 1.
-      uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
-      TmpSeq.clear();
-      generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
-
-      // Keep the new sequence if it is an improvement.
-      if ((TmpSeq.size() + 1) < Res.size()) {
-        TmpSeq.emplace_back(RISCV::ADD_UW, 0);
-        Res = TmpSeq;
-      }
-    }
   }
 
   // If the Low and High halves are the same, use pack. The pack instruction
@@ -429,6 +454,7 @@ OpndKind Inst::getOpndKind() const {
     return RISCVMatInt::RegReg;
   case RISCV::ADDI:
   case RISCV::ADDIW:
+  case RISCV::XORI:
   case RISCV::SLLI:
   case RISCV::SRLI:
   case RISCV::SLLI_UW:
diff --git a/llvm/test/CodeGen/RISCV/imm.ll b/llvm/test/CodeGen/RISCV/imm.ll
index 4f9cf1d947d5c35..307d19690bff0ec 100644
--- a/llvm/test/CodeGen/RISCV/imm.ll
+++ b/llvm/test/CodeGen/RISCV/imm.ll
@@ -1058,47 +1058,37 @@ define i64 @imm_end_xori_1() nounwind {
 ;
 ; RV64I-LABEL: imm_end_xori_1:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    li a0, -1
-; RV64I-NEXT:    slli a0, a0, 36
-; RV64I-NEXT:    addi a0, a0, 1
-; RV64I-NEXT:    slli a0, a0, 25
-; RV64I-NEXT:    addi a0, a0, -1
+; RV64I-NEXT:    lui a0, 983040
+; RV64I-NEXT:    srli a0, a0, 3
+; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV64IZBA-LABEL: imm_end_xori_1:
 ; RV64IZBA:       # %bb.0:
-; RV64IZBA-NEXT:    li a0, -1
-; RV64IZBA-NEXT:    slli a0, a0, 36
-; RV64IZBA-NEXT:    addi a0, a0, 1
-; RV64IZBA-NEXT:    slli a0, a0, 25
-; RV64IZBA-NEXT:    addi a0, a0, -1
+; RV64IZBA-NEXT:    lui a0, 983040
+; RV64IZBA-NEXT:    srli a0, a0, 3
+; RV64IZBA-NEXT:    not a0, a0
 ; RV64IZBA-NEXT:    ret
 ;
 ; RV64IZBB-LABEL: imm_end_xori_1:
 ; RV64IZBB:       # %bb.0:
-; RV64IZBB-NEXT:    li a0, -1
-; RV64IZBB-NEXT:    slli a0, a0, 36
-; RV64IZBB-NEXT:    addi a0, a0, 1
-; RV64IZBB-NEXT:    slli a0, a0, 25
-; RV64IZBB-NEXT:    addi a0, a0, -1
+; RV64IZBB-NEXT:    lui a0, 983040
+; RV64IZBB-NEXT:    srli a0, a0, 3
+; RV64IZBB-NEXT:    not a0, a0
 ; RV64IZBB-NEXT:    ret
 ;
 ; RV64IZBS-LABEL: imm_end_xori_1:
 ; RV64IZBS:       # %bb.0:
-; RV64IZBS-NEXT:    li a0, -1
-; RV64IZBS-NEXT:    slli a0, a0, 36
-; RV64IZBS-NEXT:    addi a0, a0, 1
-; RV64IZBS-NEXT:    slli a0, a0, 25
-; RV64IZBS-NEXT:    addi a0, a0, -1
+; RV64IZBS-NEXT:    lui a0, 983040
+; RV64IZBS-NEXT:    srli a0, a0, 3
+; RV64IZBS-NEXT:    not a0, a0
 ; RV64IZBS-NEXT:    ret
 ;
 ; RV64IXTHEADBB-LABEL: imm_end_xori_1:
 ; RV64IXTHEADBB:       # %bb.0:
-; RV64IXTHEADBB-NEXT:    li a0, -1
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 36
-; RV64IXTHEADBB-NEXT:    addi a0, a0, 1
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 25
-; RV64IXTHEADBB-NEXT:    addi a0, a0, -1
+; RV64IXTHEADBB-NEXT:    lui a0, 983040
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 3
+; RV64IXTHEADBB-NEXT:    not a0, a0
 ; RV64IXTHEADBB-NEXT:    ret
   ret i64 -2305843009180139521 ; 0xE000_0000_01FF_FFFF
 }
@@ -1174,13 +1164,12 @@ define i64 @imm_2reg_1() nounwind {
 ;
 ; RV64-NOPOOL-LABEL: imm_2reg_1:
 ; RV64-NOPOOL:       # %bb.0:
-; RV64-NOPOOL-NEXT:    li a0, -1
-; RV64-NOPOOL-NEXT:    slli a0, a0, 35
-; RV64-NOPOOL-NEXT:    addi a0, a0, 9
+; RV64-NOPOOL-NEXT:    lui a0, 1048430
+; RV64-NOPOOL-NEXT:    addiw a0, a0, 1493
 ; RV64-NOPOOL-NEXT:    slli a0, a0, 13
-; RV64-NOPOOL-NEXT:    addi a0, a0, 837
-; RV64-NOPOOL-NEXT:    slli a0, a0, 12
-; RV64-NOPOOL-NEXT:    addi a0, a0, 1656
+; RV64-NOPOOL-NEXT:    addi a0, a0, -1921
+; RV64-NOPOOL-NEXT:    srli a0, a0, 4
+; RV64-NOPOOL-NEXT:    not a0, a0
 ; RV64-NOPOOL-NEXT:    ret
 ;
 ; RV64I-POOL-LABEL: imm_2reg_1:
@@ -1191,45 +1180,42 @@ define i64 @imm_2reg_1() nounwind {
 ;
 ; RV64IZBA-LABEL: imm_2reg_1:
 ; RV64IZBA:       # %bb.0:
-; RV64IZBA-NEXT:    li a0, -1
-; RV64IZBA-NEXT:    slli a0, a0, 35
-; RV64IZBA-NEXT:    addi a0, a0, 9
+; RV64IZBA-NEXT:    lui a0, 1048430
+; RV64IZBA-NEXT:    addiw a0, a0, 1493
 ; RV64IZBA-NEXT:    slli a0, a0, 13
-; RV64IZBA-NEXT:    addi a0, a0, 837
-; RV64IZBA-NEXT:    slli a0, a0, 12
-; RV64IZBA-NEXT:    addi a0, a0, 1656
+; RV64IZBA-NEXT:    addi a0, a0, -1921
+; RV64IZBA-NEXT:    srli a0, a0, 4
+; RV64IZBA-NEXT:    not a0, a0
 ; RV64IZBA-NEXT:    ret
 ;
 ; RV64IZBB-LABEL: imm_2reg_1:
 ; RV64IZBB:       # %bb.0:
-; RV64IZBB-NEXT:    li a0, -1
-; RV64IZBB-NEXT:    slli a0, a0, 35
-; RV64IZBB-NEXT:    addi a0, a0, 9
+; RV64IZBB-NEXT:    lui a0, 1048430
+; RV64IZBB-NEXT:    addiw a0, a0, 1493
 ; RV64IZBB-NEXT:    slli a0, a0, 13
-; RV64IZBB-NEXT:    addi a0, a0, 837
-; RV64IZBB-NEXT:    slli a0, a0, 12
-; RV64IZBB-NEXT:    addi a0, a0, 1656
+; RV64IZBB-NEXT:    addi a0, a0, -1921
+; RV64IZBB-NEXT:    srli a0, a0, 4
+; RV64IZBB-NEXT:    not a0, a0
 ; RV64IZBB-NEXT:    ret
 ;
 ; RV64IZBS-LABEL: imm_2reg_1:
 ; RV64IZBS:       # %bb.0:
-; RV64IZBS-NEXT:    lui a0, 74565
-; RV64IZBS-NEXT:    addiw a0, a0, 1656
-; RV64IZBS-NEXT:    bseti a0, a0, 60
-; RV64IZBS-NEXT:    bseti a0, a0, 61
-; RV64IZBS-NEXT:    bseti a0, a0, 62
-; RV64IZBS-NEXT:    bseti a0, a0, 63
+; RV64IZBS-NEXT:    lui a0, 1048430
+; RV64IZBS-NEXT:    addiw a0, a0, 1493
+; RV64IZBS-NEXT:    slli a0, a0, 13
+; RV64IZBS-NEXT:    addi a0, a0, -1921
+; RV64IZBS-NEXT:    srli a0, a0, 4
+; RV64IZBS-NEXT:    not a0, a0
 ; RV64IZBS-NEXT:    ret
 ;
 ; RV64IXTHEADBB-LABEL: imm_2reg_1:
 ; RV64IXTHEADBB:       # %bb.0:
-; RV64IXTHEADBB-NEXT:    li a0, -1
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 35
-; RV64IXTHEADBB-NEXT:    addi a0, a0, 9
+; RV64IXTHEADBB-NEXT:    lui a0, 1048430
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, 1493
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 13
-; RV64IXTHEADBB-NEXT:    addi a0, a0, 837
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
-; RV64IXTHEADBB-NEXT:    addi a0, a0, 1656
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1921
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 4
+; RV64IXTHEADBB-NEXT:    not a0, a0
 ; RV64IXTHEADBB-NEXT:    ret
   ret i64 -1152921504301427080 ; 0xF000_0000_1234_5678
 }
@@ -1724,13 +1710,12 @@ define i64 @imm_neg_9223372034778874949() {
 ;
 ; RV64-NOPOOL-LABEL: imm_neg_9223372034778874949:
 ; RV64-NOPOOL:       # %bb.0:
-; RV64-NOPOOL-NEXT:    li a0, -1
-; RV64-NOPOOL-NEXT:    slli a0, a0, 37
-; RV64-NOPOOL-NEXT:    addi a0, a0, 31
+; RV64-NOPOOL-NEXT:    lui a0, 1048329
+; RV64-NOPOOL-NEXT:    addiw a0, a0, -1911
 ; RV64-NOPOOL-NEXT:    slli a0, a0, 12
-; RV64-NOPOOL-NEXT:    addi a0, a0, -273
-; RV64-NOPOOL-NEXT:    slli a0, a0, 14
-; RV64-NOPOOL-NEXT:    addi a0, a0, -1093
+; RV64-NOPOOL-NEXT:    addi a0, a0, -1911
+; RV64-NOPOOL-NEXT:    srli a0, a0, 1
+; RV64-NOPOOL-NEXT:    not a0, a0
 ; RV64-NOPOOL-NEXT:    ret
 ;
 ; RV64I-POOL-LABEL: imm_neg_9223372034778874949:
@@ -1741,24 +1726,22 @@ define i64 @imm_neg_9223372034778874949() {
 ;
 ; RV64IZBA-LABEL: imm_neg_9223372034778874949:
 ; RV64IZBA:       # %bb.0:
-; RV64IZBA-NEXT:    li a0, -1
-; RV64IZBA-NEXT:    slli a0, a0, 37
-; RV64IZBA-NEXT:    addi a0, a0, 31
+; RV64IZBA-NEXT:    lui a0, 1048329
+; RV64IZBA-NEXT:    addiw a0, a0, -1911
 ; RV64IZBA-NEXT:    slli a0, a0, 12
-; RV64IZBA-NEXT:    addi a0, a0, -273
-; RV64IZBA-NEXT:    slli a0, a0, 14
-; RV64IZBA-NEXT:    addi a0, a0, -1093
+; RV64IZBA-NEXT:    addi a0, a0, -1911
+; RV64IZBA-NEXT:    srli a0, a0, 1
+; RV64IZBA-NEXT:    not a0, a0
 ; RV64IZBA-NEXT:    ret
 ;
 ; RV64IZBB-LABEL: imm_neg_9223372034778874949:
 ; RV64IZBB:       # %bb.0:
-; RV64IZBB-NEXT:    li a0, -1
-; RV64IZBB-NEXT:    slli a0, a0, 37
-; RV64IZBB-NEXT:    addi a0, a0, 31
+; RV64IZBB-NEXT:    lui a0, 1048329
+; RV64IZBB-NEXT:    addiw a0, a0, -1911
 ; RV64IZBB-NEXT:    slli a0, a0, 12
-; RV64IZBB-NEXT:    addi a0, a0, -273
-; RV64IZBB-NEXT:    slli a0, a0, 14
-; RV64IZBB-NEXT:    addi a0, a0, -1093
+; RV64IZBB-NEXT:    addi a0, a0, -1911
+; RV64IZBB-NEXT:    srli a0, a0, 1
+; RV64IZBB-NEXT:    not a0, a0
 ; RV64IZBB-NEXT:    ret
 ;
 ; RV64IZBS-LABEL: imm_neg_9223372034778874949:
@@ -1770,13 +1753,12 @@ define i64 @imm_neg_9223372034778874949() {
 ;
 ; RV64IXTHEADBB-LABEL: imm_neg_9223372034778874949:
 ; RV64IXTHEADBB:       # %bb.0:
-; RV64IXTHEADBB-NEXT:    li a0, -1
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 37
-; RV64IXTHEADBB-NEXT:    addi a0, a0, 31
+; RV64IXTHEADBB-NEXT:    lui a0, 1048329
+; RV64IXTHEADBB-NEXT:    addiw a0, a0, -1911
 ; RV64IXTHEADBB-NEXT:    slli a0, a0, 12
-; RV64IXTHEADBB-NEXT:    addi a0, a0, -273
-; RV64IXTHEADBB-NEXT:    slli a0, a0, 14
-; RV64IXTHEADBB-NEXT:    addi a0, a0, -1093
+; RV64IXTHEADBB-NEXT:    addi a0, a0, -1911
+; RV64IXTHEADBB-NEXT:    srli a0, a0, 1
+; RV64IXTHEADBB-NEXT:    not a0, a0
 ; RV64IXTHEADBB-NEXT:    ret
   ret i64 -9223372034778874949 ; 0x800000007bbbbbbb
 }

Copy link
Collaborator

@preames preames left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - trying the negate seems like a reasonable idea.

As a follow up, could we use a sra to sign extend a leading one bit? Would that ever be better than inverting?

@topperc topperc merged commit a8b8e94 into llvm:main Sep 20, 2023
topperc added a commit that referenced this pull request Sep 21, 2023
…the cons… (#66950)"

This reverts commit a8b8e94.

Forgot to update MC tests.
topperc added a commit that referenced this pull request Sep 21, 2023
…if the cons… (#66950)"

With MC test updates.

Original commit message

We can invert the value and treat it as if it had leading zeroes.
@topperc topperc deleted the pr/constant-mat3 branch September 22, 2023 18:57
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants