From 8efaaba6715b2b1a66aadcd4254ad26332c0d7de Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 5 Aug 2024 18:42:55 +0800 Subject: [PATCH] [TTI] Use element alignment in vp.strided.{load,store} cost (#101940) In SelectionDAGBuilder, a vp.strided.{load,store} uses the datalayout alignment of the element type if an align attribute isn't specified on the pointer operand. This updates BasicTTIImpl to use said element alignment instead of defaulting to one when costing it. This mainly just fixes the output of the cost model tests on RISC-V, since as far as I'm aware nothing uses the cost of these, e.g. SLP calls getStridedMemoryOpCost directly --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 8 +++-- .../CostModel/RISCV/rvv-intrinsics.ll | 32 +++++++++---------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 5b9cc5dfeeadb86..17f99e79c002414 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1628,7 +1628,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { const Value *Mask = Args[3]; const Value *EVL = Args[4]; bool VarMask = !isa(Mask) || !isa(EVL); - Align Alignment = I->getParamAlign(1).valueOrOne(); + Type *EltTy = cast(Data->getType())->getElementType(); + Align Alignment = + I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy)); return thisT()->getStridedMemoryOpCost(Instruction::Store, Data->getType(), Ptr, VarMask, Alignment, CostKind, I); @@ -1638,7 +1640,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { const Value *Mask = Args[2]; const Value *EVL = Args[3]; bool VarMask = !isa(Mask) || !isa(EVL); - Align Alignment = I->getParamAlign(0).valueOrOne(); + Type *EltTy = cast(RetTy)->getElementType(); + Align Alignment = + I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy)); return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr, VarMask, Alignment, CostKind, I); } diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll index 67c081ba5d3c696..40aad95e715afd9 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-intrinsics.ll @@ -1037,18 +1037,18 @@ define void @strided_load() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t10.a = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 undef, i64 undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t13.a = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr align 8 undef, i64 undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t15.a = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 undef, i64 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t8 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %t10 = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %t13 = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %t15 = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %t8 = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i64(ptr undef, i64 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t10 = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr undef, i64 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t13 = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i64(ptr undef, i64 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t15 = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr undef, i64 undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t17 = call @llvm.experimental.vp.strided.load.nxv2i8.p0.i64(ptr undef, i64 undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t19 = call @llvm.experimental.vp.strided.load.nxv4i8.p0.i64(ptr undef, i64 undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t21 = call @llvm.experimental.vp.strided.load.nxv8i8.p0.i64(ptr undef, i64 undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t23 = call @llvm.experimental.vp.strided.load.nxv16i8.p0.i64(ptr undef, i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t25 = call @llvm.experimental.vp.strided.load.nxv2i64.p0.i64(ptr undef, i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t27 = call @llvm.experimental.vp.strided.load.nxv4i64.p0.i64(ptr undef, i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t29 = call @llvm.experimental.vp.strided.load.nxv8i64.p0.i64(ptr undef, i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: %t31 = call @llvm.experimental.vp.strided.load.nxv16i64.p0.i64(ptr undef, i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %t25 = call @llvm.experimental.vp.strided.load.nxv2i64.p0.i64(ptr undef, i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %t27 = call @llvm.experimental.vp.strided.load.nxv4i64.p0.i64(ptr undef, i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %t29 = call @llvm.experimental.vp.strided.load.nxv8i64.p0.i64(ptr undef, i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %t31 = call @llvm.experimental.vp.strided.load.nxv16i64.p0.i64(ptr undef, i64 undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'strided_load' @@ -1103,10 +1103,10 @@ define void @strided_store() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i8.p0.i64(<4 x i8> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i8.p0.i64(<8 x i8> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.v16i8.p0.i64(<16 x i8> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v2i64.p0.i64(<2 x i64> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.experimental.vp.strided.store.v8i64.p0.i64(<8 x i64> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 78 for instruction: call void @llvm.experimental.vp.strided.store.v16i64.p0.i64(<16 x i64> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i64.p0.i64(<2 x i64> undef, ptr undef, i64 undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> undef, ptr undef, i64 undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i64.p0.i64(<8 x i64> undef, ptr undef, i64 undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.v16i64.p0.i64(<16 x i64> undef, ptr undef, i64 undef, <16 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.experimental.vp.strided.store.v2i64.p0.i64(<2 x i64> undef, ptr align 8 undef, i64 undef, <2 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.v4i64.p0.i64(<4 x i64> undef, ptr align 8 undef, i64 undef, <4 x i1> undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.v8i64.p0.i64(<8 x i64> undef, ptr align 8 undef, i64 undef, <8 x i1> undef, i32 undef) @@ -1115,10 +1115,10 @@ define void @strided_store() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.nxv4i8.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.nxv8i8.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vp.strided.store.nxv16i8.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.nxv2i64.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.nxv4i64.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.nxv8i64.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.experimental.vp.strided.store.nxv16i64.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.experimental.vp.strided.store.nxv2i64.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.experimental.vp.strided.store.nxv4i64.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.experimental.vp.strided.store.nxv8i64.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.experimental.vp.strided.store.nxv16i64.p0.i64( undef, ptr undef, i64 undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPEBASED-LABEL: 'strided_store'