From 31340d0821c2a4a5b5e10b003885d067abe70567 Mon Sep 17 00:00:00 2001 From: Alex Richardson Date: Tue, 15 Mar 2022 10:08:04 +0000 Subject: [PATCH] [LoopIdiom] Set no_preserve_cheri_tags when possible This allows inlining of underaligned memcpy() calls that copy more than capability size (which will be most of the ones created by this pass). Previously, we weren't setting any attribute on the memcpy at all which meant that the backends had to be conservative when inlining the memcpy. --- .../Transforms/Scalar/LoopIdiomRecognize.cpp | 29 +++- .../LoopIdiom/cheri-preserve-tags-memcpy.ll | 155 ++++++++++++++++++ .../LoopIdiom/cheri-preserve-tags-store.ll | 101 ++++++++++++ 3 files changed, 276 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll create mode 100644 llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 3d60e205b002..05720affd2cd 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -230,7 +230,8 @@ class LoopIdiomRecognize { Instruction *TheLoad, const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv, - const SCEV *BECount); + const SCEV *BECount, + PreserveCheriTags PreserveTags); bool avoidLIRForMultiBlockLoop(bool IsMemset = false, bool IsLoopMemset = false); @@ -889,7 +890,8 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI, return processLoopStoreOfLoopLoad(Dest, Source, (unsigned)SizeInBytes, MCI->getDestAlign(), MCI->getSourceAlign(), - MCI, MCI, StoreEv, LoadEv, BECount); + MCI, MCI, StoreEv, LoadEv, BECount, + MCI->shouldPreserveCheriTags()); } /// processLoopMemSet - See if this memset can be promoted to a large memset. @@ -1170,8 +1172,17 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, Value *StorePtr = SI->getPointerOperand(); const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); - unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); - + Type *StoreType = SI->getValueOperand()->getType(); + unsigned StoreSize = DL->getTypeStoreSize(StoreType); + auto PreserveTags = PreserveCheriTags::Unknown; + if (DL->isFatPointer(StoreType->getScalarType())) { + // Capabilities and vectors of capabilities need to preserve tags + PreserveTags = PreserveCheriTags::Required; + } else if (StoreType->isSingleValueType()) { + // But all stores of simple types (i.e. non-struct, non-array) never copy + // CHERI tag bits, so we can mark the memcpy as non-tag-preserving. + PreserveTags = PreserveCheriTags::Unnecessary; + } // The store must be feeding a non-volatile load. LoadInst *LI = cast(SI->getValueOperand()); assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads."); @@ -1183,14 +1194,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, const SCEVAddRecExpr *LoadEv = cast(SE->getSCEV(LoadPtr)); return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSize, SI->getAlign(), LI->getAlign(), SI, LI, - StoreEv, LoadEv, BECount); + StoreEv, LoadEv, BECount, PreserveTags); } bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( Value *DestPtr, Value *SourcePtr, unsigned StoreSize, MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore, Instruction *TheLoad, const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv, - const SCEV *BECount) { + const SCEV *BECount, PreserveCheriTags PreserveTags) { // FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to // conservatively bail here, since otherwise we may have to transform @@ -1325,10 +1336,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( if (!TheStore->isAtomic() && !TheLoad->isAtomic()) { if (UseMemMove) NewCall = Builder.CreateMemMove(StoreBasePtr, StoreAlign, LoadBasePtr, - LoadAlign, NumBytes); + LoadAlign, NumBytes, PreserveTags); else NewCall = Builder.CreateMemCpy(StoreBasePtr, StoreAlign, LoadBasePtr, - LoadAlign, NumBytes); + LoadAlign, NumBytes, PreserveTags); } else { // For now don't support unordered atomic memmove. if (UseMemMove) @@ -1352,7 +1363,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad( // have an alignment but non-atomic loads/stores may not. NewCall = Builder.CreateElementUnorderedAtomicMemCpy( StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(), - NumBytes, StoreSize); + NumBytes, StoreSize, PreserveTags); } NewCall->setDebugLoc(TheStore->getDebugLoc()); diff --git a/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll new file mode 100644 index 000000000000..d0a12f909761 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-memcpy.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --force-update +; Check that we retain the {must,no}_preserve_cheri_tags attribute when merging memcpy loops. +; FIXME: this does not work with addrspace(200) pointers yet since we need SCEV. +; RUN: sed -e 's/addrspace(200)/addrspace(0)/g' -e 's/-A200-P200-G200//g' -e 's/.p200i8/.p0i8/g' %s | \ +; RUN: opt -passes='require,loop(loop-idiom,loop-deletion),simplifycfg' -aa-pipeline=basic-aa -S | \ +; RUN: FileCheck %s --check-prefixes=HYBRID,CHECK +; RUN: opt -passes='require,loop(loop-idiom,loop-deletion),simplifycfg' -aa-pipeline=basic-aa -S < %s | \ +; RUN: FileCheck %s --check-prefixes=PURECAP,CHECK +target datalayout = "e-m:e-pf200:128:128:128:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-A200-P200-G200" + +; Created from the following reduced C testcase: +; struct sockaddr_in { +; short sin_portsin_zero[8] +; }; +; struct { +; int nscount; +; struct sockaddr_in nsaddr_list[] +; } __res_nsend_statp; +; int __res_nsend_ns; +; void __res_nsend() { +; for (; __res_nsend_ns; __res_nsend_ns++) +; __res_nsend_statp.nsaddr_list[__res_nsend_ns] = +; __res_nsend_statp.nsaddr_list[__res_nsend_ns + 1]; +; } + +%struct.wombat = type { i16, %struct.foo, [8 x i8] } +%struct.foo = type { i32 } + +define void @no_preserve(%struct.wombat addrspace(200)* noalias writeonly %dst, %struct.wombat addrspace(200)* noalias readonly %src, i32 %count) local_unnamed_addr #0 { +; HYBRID-LABEL: @no_preserve( +; HYBRID-NEXT: [[DST1:%.*]] = bitcast %struct.wombat* [[DST:%.*]] to i8* +; HYBRID-NEXT: [[TMP161:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 +; HYBRID-NEXT: br i1 [[TMP161]], label [[BB17_PREHEADER:%.*]], label [[BB30:%.*]] +; HYBRID: bb17.preheader: +; HYBRID-NEXT: [[SCEVGEP:%.*]] = getelementptr [[STRUCT_WOMBAT:%.*]], %struct.wombat* [[SRC:%.*]], i64 1 +; HYBRID-NEXT: [[SCEVGEP2:%.*]] = bitcast %struct.wombat* [[SCEVGEP]] to i8* +; HYBRID-NEXT: [[TMP1:%.*]] = zext i32 [[COUNT]] to i64 +; HYBRID-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +; HYBRID-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DST1]], i8* align 4 [[SCEVGEP2]], i64 [[TMP2]], i1 false) #[[ATTR2:[0-9]+]] +; HYBRID-NEXT: br label [[BB30]] +; HYBRID: bb30: +; HYBRID-NEXT: ret void +; +; PURECAP-LABEL: @no_preserve( +; PURECAP-NEXT: [[TMP161:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 +; PURECAP-NEXT: br i1 [[TMP161]], label [[BB17:%.*]], label [[BB30:%.*]] +; PURECAP: bb17: +; PURECAP-NEXT: [[TMP8_02:%.*]] = phi i32 [ [[TMP23:%.*]], [[BB17]] ], [ 0, [[TMP0:%.*]] ] +; PURECAP-NEXT: [[TMP19:%.*]] = zext i32 [[TMP8_02]] to i64 +; PURECAP-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_WOMBAT:%.*]], [[STRUCT_WOMBAT]] addrspace(200)* [[DST:%.*]], i64 [[TMP19]] +; PURECAP-NEXT: [[TMP23]] = add nuw nsw i32 [[TMP8_02]], 1 +; PURECAP-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +; PURECAP-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_WOMBAT]], [[STRUCT_WOMBAT]] addrspace(200)* [[SRC:%.*]], i64 [[TMP24]] +; PURECAP-NEXT: [[TMP26:%.*]] = bitcast [[STRUCT_WOMBAT]] addrspace(200)* [[TMP20]] to i8 addrspace(200)* +; PURECAP-NEXT: [[TMP27:%.*]] = bitcast [[STRUCT_WOMBAT]] addrspace(200)* [[TMP25]] to i8 addrspace(200)* +; PURECAP-NEXT: tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(16) [[TMP26]], i8 addrspace(200)* noundef nonnull align 4 dereferenceable(16) [[TMP27]], i64 16, i1 false) #[[ATTR2:[0-9]+]] +; PURECAP-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP23]], [[COUNT]] +; PURECAP-NEXT: br i1 [[TMP16]], label [[BB17]], label [[BB30]] +; PURECAP: bb30: +; PURECAP-NEXT: ret void +; + %tmp161 = icmp sgt i32 %count, 0 + br i1 %tmp161, label %bb17.preheader, label %bb30 + +bb17.preheader: ; preds = %0 + br label %bb17 + +bb17: ; preds = %bb17.preheader, %bb17 + %tmp8.02 = phi i32 [ %tmp23, %bb17 ], [ 0, %bb17.preheader ] + %tmp19 = zext i32 %tmp8.02 to i64 + %tmp20 = getelementptr %struct.wombat, %struct.wombat addrspace(200)* %dst, i64 %tmp19 + %tmp23 = add nuw nsw i32 %tmp8.02, 1 + %tmp24 = zext i32 %tmp23 to i64 + %tmp25 = getelementptr %struct.wombat, %struct.wombat addrspace(200)* %src, i64 %tmp24 + %tmp26 = bitcast %struct.wombat addrspace(200)* %tmp20 to i8 addrspace(200)* + %tmp27 = bitcast %struct.wombat addrspace(200)* %tmp25 to i8 addrspace(200)* + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(16) %tmp26, i8 addrspace(200)* noundef nonnull align 4 dereferenceable(16) %tmp27, i64 16, i1 false) #1 + %tmp16 = icmp slt i32 %tmp23, %count + br i1 %tmp16, label %bb17, label %bb30.loopexit + +bb30.loopexit: ; preds = %bb17 + br label %bb30 + +bb30: ; preds = %bb30.loopexit, %0 + ret void +} + +define void @must_preserve(%struct.wombat addrspace(200)* noalias writeonly %dst, %struct.wombat addrspace(200)* noalias readonly %src, i32 %count) local_unnamed_addr #0 { +; HYBRID-LABEL: @must_preserve( +; HYBRID-NEXT: [[DST1:%.*]] = bitcast %struct.wombat* [[DST:%.*]] to i8* +; HYBRID-NEXT: [[TMP161:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 +; HYBRID-NEXT: br i1 [[TMP161]], label [[BB17_PREHEADER:%.*]], label [[BB30:%.*]] +; HYBRID: bb17.preheader: +; HYBRID-NEXT: [[SCEVGEP:%.*]] = getelementptr [[STRUCT_WOMBAT:%.*]], %struct.wombat* [[SRC:%.*]], i64 1 +; HYBRID-NEXT: [[SCEVGEP2:%.*]] = bitcast %struct.wombat* [[SCEVGEP]] to i8* +; HYBRID-NEXT: [[TMP1:%.*]] = zext i32 [[COUNT]] to i64 +; HYBRID-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 4 +; HYBRID-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[DST1]], i8* align 4 [[SCEVGEP2]], i64 [[TMP2]], i1 false) #[[ATTR3:[0-9]+]] +; HYBRID-NEXT: br label [[BB30]] +; HYBRID: bb30: +; HYBRID-NEXT: ret void +; +; PURECAP-LABEL: @must_preserve( +; PURECAP-NEXT: [[TMP161:%.*]] = icmp sgt i32 [[COUNT:%.*]], 0 +; PURECAP-NEXT: br i1 [[TMP161]], label [[BB17:%.*]], label [[BB30:%.*]] +; PURECAP: bb17: +; PURECAP-NEXT: [[TMP8_02:%.*]] = phi i32 [ [[TMP23:%.*]], [[BB17]] ], [ 0, [[TMP0:%.*]] ] +; PURECAP-NEXT: [[TMP19:%.*]] = zext i32 [[TMP8_02]] to i64 +; PURECAP-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_WOMBAT:%.*]], [[STRUCT_WOMBAT]] addrspace(200)* [[DST:%.*]], i64 [[TMP19]] +; PURECAP-NEXT: [[TMP23]] = add nuw nsw i32 [[TMP8_02]], 1 +; PURECAP-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +; PURECAP-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_WOMBAT]], [[STRUCT_WOMBAT]] addrspace(200)* [[SRC:%.*]], i64 [[TMP24]] +; PURECAP-NEXT: [[TMP26:%.*]] = bitcast [[STRUCT_WOMBAT]] addrspace(200)* [[TMP20]] to i8 addrspace(200)* +; PURECAP-NEXT: [[TMP27:%.*]] = bitcast [[STRUCT_WOMBAT]] addrspace(200)* [[TMP25]] to i8 addrspace(200)* +; PURECAP-NEXT: tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(16) [[TMP26]], i8 addrspace(200)* noundef nonnull align 4 dereferenceable(16) [[TMP27]], i64 16, i1 false) #[[ATTR3:[0-9]+]] +; PURECAP-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP23]], [[COUNT]] +; PURECAP-NEXT: br i1 [[TMP16]], label [[BB17]], label [[BB30]] +; PURECAP: bb30: +; PURECAP-NEXT: ret void +; + %tmp161 = icmp sgt i32 %count, 0 + br i1 %tmp161, label %bb17.preheader, label %bb30 + +bb17.preheader: ; preds = %0 + br label %bb17 + +bb17: ; preds = %bb17.preheader, %bb17 + %tmp8.02 = phi i32 [ %tmp23, %bb17 ], [ 0, %bb17.preheader ] + %tmp19 = zext i32 %tmp8.02 to i64 + %tmp20 = getelementptr %struct.wombat, %struct.wombat addrspace(200)* %dst, i64 %tmp19 + %tmp23 = add nuw nsw i32 %tmp8.02, 1 + %tmp24 = zext i32 %tmp23 to i64 + %tmp25 = getelementptr %struct.wombat, %struct.wombat addrspace(200)* %src, i64 %tmp24 + %tmp26 = bitcast %struct.wombat addrspace(200)* %tmp20 to i8 addrspace(200)* + %tmp27 = bitcast %struct.wombat addrspace(200)* %tmp25 to i8 addrspace(200)* + tail call void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noundef nonnull align 16 dereferenceable(16) %tmp26, i8 addrspace(200)* noundef nonnull align 4 dereferenceable(16) %tmp27, i64 16, i1 false) #2 + %tmp16 = icmp slt i32 %tmp23, %count + br i1 %tmp16, label %bb17, label %bb30.loopexit + +bb30.loopexit: ; preds = %bb17 + br label %bb30 + +bb30: ; preds = %bb30.loopexit, %0 + ret void +} + +declare void @llvm.memcpy.p200i8.p200i8.i64(i8 addrspace(200)* noalias nocapture writeonly, i8 addrspace(200)* noalias nocapture readonly, i64, i1 immarg) + +attributes #0 = { nofree nosync nounwind } +attributes #1 = { no_preserve_cheri_tags } +attributes #2 = { must_preserve_cheri_tags } + +; UTC_ARGS: --disable +; CHECK: attributes #[[ATTR2]] = { no_preserve_cheri_tags } +; CHECK: attributes #[[ATTR3]] = { must_preserve_cheri_tags } diff --git a/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll new file mode 100644 index 000000000000..267c2636cd52 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/cheri-preserve-tags-store.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --force-update +; We should be setting no_preserve_cheri_tags for loops that copy integers, and must_preserve_cheri_tags for capability copies. +; FIXME: this does not work with addrspace(200) pointers yet since we need SCEV. +; RUN: sed -e 's/addrspace(200)/addrspace(0)/g' -e 's/-A200-P200-G200//g' %s | \ +; RUN: opt --passes='require,loop(loop-idiom,loop-deletion),simplifycfg' -aa-pipeline=basic-aa -S | \ +; RUN: FileCheck %s --check-prefixes=HYBRID +; RUN: opt --passes='require,loop(loop-idiom,loop-deletion),simplifycfg' -aa-pipeline=basic-aa -S | \ +; RUN: FileCheck %s --check-prefixes=PURECAP +target datalayout = "e-m:e-pf200:128:128:128:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-A200-P200-G200" + +%struct.state = type { [25 x i32], i32 } +%struct.capstate = type { [25 x i32 addrspace(200)*], i32 } + +@nocap = unnamed_addr addrspace(200) global [25 x i32] zeroinitializer, align 4 +@cap = unnamed_addr addrspace(200) global [25 x i32 addrspace(200)*] zeroinitializer, align 4 +@k = unnamed_addr addrspace(200) global i32 0, align 4 + +define void @get_state(%struct.state addrspace(200)* nocapture noalias %state) local_unnamed_addr addrspace(200) #0 { +; HYBRID-LABEL: @get_state( +; HYBRID-NEXT: entry: +; HYBRID-NEXT: [[STATE1:%.*]] = bitcast %struct.state* [[STATE:%.*]] to i8* +; HYBRID-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[STATE1]], i8* align 4 bitcast ([25 x i32]* @nocap to i8*), i64 100, i1 false) #[[ATTR1:[0-9]+]] +; HYBRID-NEXT: ret void +; +; PURECAP-LABEL: @get_state( +; PURECAP-NEXT: entry: +; PURECAP-NEXT: br label [[FOR_BODY:%.*]] +; PURECAP: for.body: +; PURECAP-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; PURECAP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [25 x i32], [25 x i32] addrspace(200)* @nocap, i64 0, i64 [[I_08]] +; PURECAP-NEXT: [[TMP0:%.*]] = load i32, i32 addrspace(200)* [[ARRAYIDX]], align 4 +; PURECAP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_STATE:%.*]], [[STRUCT_STATE]] addrspace(200)* [[STATE:%.*]], i64 0, i32 0, i64 [[I_08]] +; PURECAP-NEXT: store i32 [[TMP0]], i32 addrspace(200)* [[ARRAYIDX2]], align 4 +; PURECAP-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 +; PURECAP-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INC]], 25 +; PURECAP-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; PURECAP: for.end: +; PURECAP-NEXT: ret void +; +entry: + br label %for.body.preheader + +for.body.preheader: ; preds = %0 + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.08 = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds [25 x i32], [25 x i32] addrspace(200)* @nocap, i64 0, i64 %i.08 + %0 = load i32, i32 addrspace(200)* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds %struct.state, %struct.state addrspace(200)* %state, i64 0, i32 0, i64 %i.08 + store i32 %0, i32 addrspace(200)* %arrayidx2, align 4 + %inc = add nuw nsw i64 %i.08, 1 + %exitcond = icmp ne i64 %inc, 25 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +define void @get_cap_state(%struct.capstate addrspace(200)* nocapture noalias %state) local_unnamed_addr addrspace(200) #0 { +; HYBRID-LABEL: @get_cap_state( +; HYBRID-NEXT: entry: +; HYBRID-NEXT: [[STATE1:%.*]] = bitcast %struct.capstate* [[STATE:%.*]] to i8* +; HYBRID-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[STATE1]], i8* align 16 bitcast ([25 x i32*]* @cap to i8*), i64 200, i1 false) #[[ATTR1]] +; HYBRID-NEXT: ret void +; +; PURECAP-LABEL: @get_cap_state( +; PURECAP-NEXT: entry: +; PURECAP-NEXT: br label [[FOR_BODY:%.*]] +; PURECAP: for.body: +; PURECAP-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; PURECAP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [25 x i32 addrspace(200)*], [25 x i32 addrspace(200)*] addrspace(200)* @cap, i64 0, i64 [[I_08]] +; PURECAP-NEXT: [[TMP0:%.*]] = load i32 addrspace(200)*, i32 addrspace(200)* addrspace(200)* [[ARRAYIDX]], align 16 +; PURECAP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [[STRUCT_CAPSTATE:%.*]], [[STRUCT_CAPSTATE]] addrspace(200)* [[STATE:%.*]], i64 0, i32 0, i64 [[I_08]] +; PURECAP-NEXT: store i32 addrspace(200)* [[TMP0]], i32 addrspace(200)* addrspace(200)* [[ARRAYIDX2]], align 16 +; PURECAP-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1 +; PURECAP-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INC]], 25 +; PURECAP-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; PURECAP: for.end: +; PURECAP-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.08 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds [25 x i32 addrspace(200)*], [25 x i32 addrspace(200)*] addrspace(200)* @cap, i64 0, i64 %i.08 + %0 = load i32 addrspace(200)*, i32 addrspace(200)* addrspace(200)* %arrayidx, align 16 + %arrayidx2 = getelementptr inbounds %struct.capstate, %struct.capstate addrspace(200)* %state, i64 0, i32 0, i64 %i.08 + store i32 addrspace(200)* %0, i32 addrspace(200)* addrspace(200)* %arrayidx2, align 16 + %inc = add nuw nsw i64 %i.08, 1 + %exitcond = icmp ne i64 %inc, 25 + br i1 %exitcond, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; UTC_ARGS: --disable +; HYBRID: attributes #[[ATTR1]] = { no_preserve_cheri_tags } +; HYBRID: attributes #[[ATTR2]] = { must_preserve_cheri_tags }