Skip to content

Commit

Permalink
[Local] Only intersect llvm.access.group metadata if instr moves. (ll…
Browse files Browse the repository at this point in the history
…vm#115868)

Preserve llvm.access.group metadata on the replacement instruction, if
it does not move. In that case, the program would be UB, if the parallel
property encoded in the metadata does not hold.

This matches the LangRef recently updated in llvm#116220

PR llvm#115868
  • Loading branch information
fhahn authored and pull[bot] committed Nov 20, 2024
1 parent eddab3f commit 3578223
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 106 deletions.
5 changes: 3 additions & 2 deletions llvm/lib/Transforms/Utils/Local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3336,8 +3336,9 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
break;
case LLVMContext::MD_access_group:
K->setMetadata(LLVMContext::MD_access_group,
intersectAccessGroups(K, J));
if (DoesKMove)
K->setMetadata(LLVMContext::MD_access_group,
intersectAccessGroups(K, J));
break;
case LLVMContext::MD_range:
if (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef))
Expand Down
15 changes: 7 additions & 8 deletions llvm/test/Transforms/InstCombine/intersect-accessgroup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,9 @@
; }
; }
;
; Check for correctly merging access group metadata for instcombine
; (only common loops are parallel == intersection)
; Note that combined load would be parallel to loop !16 since both
; origin loads are parallel to it, but it references two access groups
; (!8 and !9), neither of which contain both loads. As such, the
; information that the combined load is parallel to !16 is lost.
; Check that the original access group on %0 is preserved when replacing uses
; of %1 with it, as %0 is not moved and if %0 would not be parallel in the
; original loop it would be UB.
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

Expand Down Expand Up @@ -107,7 +104,9 @@ for.end32:
; CHECK: load double, {{.*}} !llvm.access.group ![[ACCESSGROUP_0:[0-9]+]]
; CHECK: br label %for.cond14, !llvm.loop ![[LOOP_4:[0-9]+]]

; CHECK: ![[ACCESSGROUP_0]] = distinct !{}
; CHECK: ![[ACCESSGROUP_0]] = !{![[G1:[0-9]+]], ![[G2:[0-9]+]]}
; CHECK: ![[G1]] = distinct !{}
; CHECK: ![[G2]] = distinct !{}

; CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[PARALLEL_ACCESSES_5:[0-9]+]]}
; CHECK: ![[PARALLEL_ACCESSES_5]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_0]]}
; CHECK: ![[PARALLEL_ACCESSES_5]] = !{!"llvm.loop.parallel_accesses", ![[G1]]}
25 changes: 11 additions & 14 deletions llvm/test/Transforms/InstCombine/loadstore-metadata.ll
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,11 @@ entry:
ret ptr %l.sel
}

; FIXME: Should preserve metadata on loads.
define double @preserve_load_metadata_after_select_transform2(ptr %a, ptr %b) {
; CHECK-LABEL: @preserve_load_metadata_after_select_transform2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[META6]]
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
; CHECK-NEXT: ret double [[L_SEL]]
Expand All @@ -220,12 +219,11 @@ entry:
ret double %l.sel
}

; FIXME: Should preserve metadata on loads.
define double @preserve_load_metadata_after_select_transform_metadata_missing_1(ptr %a, ptr %b) {
; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[META6]]
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
; CHECK-NEXT: ret double [[L_SEL]]
Expand All @@ -242,8 +240,8 @@ entry:
define double @preserve_load_metadata_after_select_transform_metadata_missing_2(ptr %a, ptr %b) {
; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[META6]]
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
; CHECK-NEXT: ret double [[L_SEL]]
Expand All @@ -257,12 +255,11 @@ entry:
ret double %l.sel
}

; FIXME: Should preserve metadata on loads.
define double @preserve_load_metadata_after_select_transform_metadata_missing_3(ptr %a, ptr %b) {
; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_3(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[META6]]
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
; CHECK-NEXT: ret double [[L_SEL]]
Expand All @@ -276,14 +273,13 @@ entry:
ret double %l.sel
}

; FIXME: Should preserve metadata on loads.
; Like preserve_load_metadata_after_select_transform_metadata_missing_3, but
; with different access groups on all loads.
define double @preserve_load_metadata_after_select_transform_metadata_missing_4(ptr %a, ptr %b) {
; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !llvm.access.group [[META6]]
; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !llvm.access.group [[ACC_GRP10:![0-9]+]]
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
; CHECK-NEXT: ret double [[L_SEL]]
Expand Down Expand Up @@ -322,4 +318,5 @@ entry:
; CHECK: [[META7]] = !{i32 1}
; CHECK: [[META8]] = !{i64 8}
; CHECK: [[ACC_GRP9]] = distinct !{}
; CHECK: [[ACC_GRP10]] = distinct !{}
;.
119 changes: 37 additions & 82 deletions llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,83 +15,39 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
; CHECK: [[FOR_BODY_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[TMP0]]
; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 3
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_BODY_PREHEADER_NEW:.*]]
; CHECK: [[FOR_BODY_PREHEADER_NEW]]:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], 2147483644
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[FOR_BODY_PREHEADER14]]:
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER:%.*]], %[[MIDDLE_BLOCK:.*]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]]:
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT_3:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0
; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_EPIL:.*]]
; CHECK: [[FOR_BODY_EPIL]]:
; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY_EPIL]] ], [ [[INDVARS_IV_UNR]], %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], %[[FOR_BODY_EPIL]] ], [ 0, %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[UNROLL_ITER]] = and i64 [[TMP0]], 2147483644
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]]
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
; CHECK-NEXT: [[GEP_EPIL:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_EPIL]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[IDXPROM3_EPIL:%.*]] = sext i32 [[TMP2]] to i64
; CHECK-NEXT: [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_EPIL]]
; CHECK-NEXT: [[IDXPROM5_EPIL:%.*]] = sext i32 [[TMP3]] to i64
; CHECK-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_EPIL]]
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX4_EPIL]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX6_EPIL]], align 8
; CHECK-NEXT: [[CMP_I_EPIL:%.*]] = fcmp fast olt double [[TMP4]], [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[CMP_I_EPIL]], double [[TMP5]], double [[TMP4]]
; CHECK-NEXT: store double [[TMP6]], ptr [[ARRAYIDX4_EPIL]], align 8, !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1
; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_EPIL]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[GEP_EPIL]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[WIDE_LOAD12]] to <4 x i64>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], <4 x i64> [[TMP5]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> [[TMP6]], i32 8, <4 x i1> splat (i1 true), <4 x double> poison), !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[TMP0]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]]
; CHECK: [[FOR_COND_CLEANUP]]:
; CHECK-NEXT: ret void
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_3]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP7]] to i64
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3]]
; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP8]] to i64
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[ARRAYIDX4]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = load double, ptr [[ARRAYIDX6]], align 8
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[CMP_I]], double [[TMP10]], double [[TMP9]]
; CHECK-NEXT: store double [[TMP11]], ptr [[ARRAYIDX4]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[GEP_1]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[IDXPROM3_1:%.*]] = sext i32 [[TMP12]] to i64
; CHECK-NEXT: [[ARRAYIDX4_1:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_1]]
; CHECK-NEXT: [[IDXPROM5_1:%.*]] = sext i32 [[TMP13]] to i64
; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_1]]
; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX4_1]], align 8
; CHECK-NEXT: [[TMP15:%.*]] = load double, ptr [[ARRAYIDX6_1]], align 8
; CHECK-NEXT: [[CMP_I_1:%.*]] = fcmp fast olt double [[TMP14]], [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[CMP_I_1]], double [[TMP15]], double [[TMP14]]
; CHECK-NEXT: store double [[TMP16]], ptr [[ARRAYIDX4_1]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[GEP_2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[IDXPROM3_2:%.*]] = sext i32 [[TMP17]] to i64
; CHECK-NEXT: [[ARRAYIDX4_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_2]]
; CHECK-NEXT: [[IDXPROM5_2:%.*]] = sext i32 [[TMP18]] to i64
; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_2]]
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX4_2]], align 8
; CHECK-NEXT: [[TMP20:%.*]] = load double, ptr [[ARRAYIDX6_2]], align 8
; CHECK-NEXT: [[CMP_I_2:%.*]] = fcmp fast olt double [[TMP19]], [[TMP20]]
; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[CMP_I_2]], double [[TMP20]], double [[TMP19]]
; CHECK-NEXT: store double [[TMP21]], ptr [[ARRAYIDX4_2]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER14]] ]
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]]
Expand All @@ -100,15 +56,14 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
; CHECK-NEXT: [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]]
; CHECK-NEXT: [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64
; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_3]]
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[CMP_I_3:%.*]] = fcmp fast olt double [[TMP24]], [[TMP25]]
; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[CMP_I_3]], double [[TMP25]], double [[TMP24]]
; CHECK-NEXT: store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4
; CHECK-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4
; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label %[[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
;
entry:
%nface.addr = alloca i32, align 4
Expand Down Expand Up @@ -242,10 +197,10 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: re
; CHECK: [[ACC_GRP4]] = distinct !{}
; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
; CHECK: [[META6]] = !{!"double", [[META2]], i64 0}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]]}
; CHECK: [[META8]] = !{!"llvm.loop.unroll.disable"}
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META10:![0-9]+]], [[META11:![0-9]+]], [[META12:![0-9]+]]}
; CHECK: [[META10]] = !{!"llvm.loop.mustprogress"}
; CHECK: [[META11]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP4]]}
; CHECK: [[META12]] = !{!"llvm.loop.vectorize.enable", i1 true}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"}
; CHECK: [[META9]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP4]]}
; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META8]], [[META9]], [[META11]], [[META10]]}
;.

0 comments on commit 3578223

Please sign in to comment.