-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Use experimental.vp.splat to splat specific vector length elements. #101329
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Yeting Kuo (yetingk) ChangesPreviously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. Full diff: https://github.com/llvm/llvm-project/pull/101329.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 0a66a38f6d5ab..be2e880ecd3a9 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -187,25 +187,10 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
auto *VTy = cast<VectorType>(II.getType());
IRBuilder<> Builder(&II);
-
- // Extend VL from i32 to XLen if needed.
- if (ST->is64Bit())
- VL = Builder.CreateZExt(VL, Builder.getInt64Ty());
-
Type *STy = VTy->getElementType();
Value *Val = Builder.CreateLoad(STy, BasePtr);
- const auto &TLI = *ST->getTargetLowering();
- Value *Res;
-
- // TODO: Also support fixed/illegal vector types to splat with evl = vl.
- if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
- unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
- : Intrinsic::riscv_vmv_v_x;
- Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
- {PoisonValue::get(VTy), Val, VL});
- } else {
- Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
- }
+ Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy},
+ {Val, II.getOperand(2), VL});
II.replaceAllUsesWith(Res);
II.eraseFromParent();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index b8c7037580c46..849f98c26f459 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -638,14 +638,14 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64,
define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
; CHECK-OPT: # %bb.0:
-; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero
; CHECK-OPT-NEXT: ret
;
; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
; CHECK-NO-OPT: # %bb.0:
; CHECK-NO-OPT-NEXT: lbu a0, 0(a0)
-; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e8, mf4, ta, ma
; CHECK-NO-OPT-NEXT: vmv.v.x v8, a0
; CHECK-NO-OPT-NEXT: ret
%load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3)
@@ -657,14 +657,14 @@ define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
; CHECK-OPT: # %bb.0:
-; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
; CHECK-OPT-NEXT: ret
;
; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
; CHECK-NO-OPT: # %bb.0:
; CHECK-NO-OPT-NEXT: flh fa5, 0(a0)
-; CHECK-NO-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NO-OPT-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
; CHECK-NO-OPT-NEXT: vfmv.v.f v8, fa5
; CHECK-NO-OPT-NEXT: ret
%load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3)
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index 0010f64a93fd6..e9f092b23b336 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -823,15 +823,15 @@ define <vscale x 1 x half> @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) {
ret <vscale x 1 x half> %load
}
-define <vscale x 1 x i64> @zero_strided_vadd.vx(<vscale x 1 x i64> %v, ptr %ptr) {
-; CHECK-RV32-LABEL: zero_strided_vadd.vx:
+define <vscale x 1 x i64> @zero_strided_vadd_nxv1i64(<vscale x 1 x i64> %v, ptr %ptr) {
+; CHECK-RV32-LABEL: zero_strided_vadd_nxv1i64:
; CHECK-RV32: # %bb.0:
; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; CHECK-RV32-NEXT: vlse64.v v9, (a0), zero
; CHECK-RV32-NEXT: vadd.vv v8, v8, v9
; CHECK-RV32-NEXT: ret
;
-; CHECK-RV64-LABEL: zero_strided_vadd.vx:
+; CHECK-RV64-LABEL: zero_strided_vadd_nxv1i64:
; CHECK-RV64: # %bb.0:
; CHECK-RV64-NEXT: ld a0, 0(a0)
; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m1, ta, ma
@@ -842,3 +842,38 @@ define <vscale x 1 x i64> @zero_strided_vadd.vx(<vscale x 1 x i64> %v, ptr %ptr)
%w = add <vscale x 1 x i64> %v, %load
ret <vscale x 1 x i64> %w
}
+
+define <vscale x 16 x i64> @zero_strided_vadd_nxv16i64(<vscale x 16 x i64> %v, ptr %ptr) {
+; CHECK-RV32-LABEL: zero_strided_vadd_nxv16i64:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: csrr a1, vlenb
+; CHECK-RV32-NEXT: srli a2, a1, 3
+; CHECK-RV32-NEXT: sub a3, a2, a1
+; CHECK-RV32-NEXT: sltu a4, a2, a3
+; CHECK-RV32-NEXT: addi a4, a4, -1
+; CHECK-RV32-NEXT: and a3, a4, a3
+; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v24, (a0), zero
+; CHECK-RV32-NEXT: bltu a2, a1, .LBB55_2
+; CHECK-RV32-NEXT: # %bb.1:
+; CHECK-RV32-NEXT: mv a2, a1
+; CHECK-RV32-NEXT: .LBB55_2:
+; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vlse64.v v0, (a0), zero
+; CHECK-RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-RV32-NEXT: vadd.vv v16, v16, v24
+; CHECK-RV32-NEXT: vadd.vv v8, v8, v0
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: zero_strided_vadd_nxv16i64:
+; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: ld a0, 0(a0)
+; CHECK-RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-RV64-NEXT: vadd.vx v8, v8, a0
+; CHECK-RV64-NEXT: vadd.vx v16, v16, a0
+; CHECK-RV64-NEXT: ret
+ %vscale = call i32 @llvm.vscale()
+ %load = call <vscale x 16 x i64> @llvm.experimental.vp.strided.load.nxv16i64.p0.i32(ptr %ptr, i32 0, <vscale x 16 x i1> splat (i1 true), i32 %vscale)
+ %w = add <vscale x 16 x i64> %v, %load
+ ret <vscale x 16 x i64> %w
+}
|
Doesn't this fix a crash if the loaded elements have pointer type? Pointer types are not supported by getEVT or the the riscv_vmv_v_x intrinsic from the original code. |
…ments. Previously, llvm IR is hard to create a scalable vector splat with a specific vector lenght, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types.
Add ptr vector test case. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
#98579 causes crash when the type of vp.stride is pointer vector type, since pointer types are unexpected for getEVT. |
Oh woops I missed the comments above about the pointer type crash. In that case then I think we should backport this given that #98579 is in. Since this is independent of whether or not RISCVGatherScatterLowering emits vp_strided_load |
…ments. (llvm#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types. (cherry picked from commit 87af9ee)
…ments. (llvm#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types. (cherry picked from commit 87af9ee)
…ments. (llvm#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types.
…ments. (llvm#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types. (cherry picked from commit 87af9ee)
…ments. (llvm#101329) Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors. This patch also fixes crash for getEVT not serving ptr types. (cherry picked from commit 87af9ee)
This was done in llvm#101329
Previously, llvm IR is hard to create a scalable vector splat with a specific vector length, so we use riscv.vmv.v.x and riscv.vmv.v.f to do this work. But the two rvv intrinsics needs strict type constraint which can not support fixed vector types and illegal vector types. Using vp.splat could preserve old functionality and also generate more optimized code for vector types and illegal vectors.
This patch also fixes crash for getEVT not serving ptr types.