Skip to content

Commit

Permalink
Atomic loads/stores, spill/reload, tests for __fp16 and half vectors.
Browse files Browse the repository at this point in the history
  • Loading branch information
JonPsson1 committed Nov 6, 2024
1 parent d58853c commit 26660a6
Show file tree
Hide file tree
Showing 14 changed files with 1,297 additions and 19 deletions.
4 changes: 2 additions & 2 deletions clang/lib/Basic/Targets/SystemZ.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,12 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {

// True if the backend supports operations on the half LLVM IR type.
// By setting this to false, conversions will happen for _Float16 around
// a statement by default with operations done in float. However, if
// a statement by default, with operations done in float. However, if
// -ffloat16-excess-precision=none is given, no conversions will be made
// and instead the backend will promote each half operation to float
// individually.
HasLegalHalfType = false;
// Allow half arguments and return values.
// Allow half arguments and return values (__fp16).
HalfArgsAndReturns = true;
// Support _Float16.
HasFloat16 = true;
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/Targets/SystemZ.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {

if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
switch (BT->getKind()) {
// case BuiltinType::Half: // __fp16 Support __fp16??
case BuiltinType::Half: // __fp16
case BuiltinType::Float16: // _Float16
case BuiltinType::Float:
case BuiltinType::Double:
Expand Down
File renamed without changes.
32 changes: 32 additions & 0 deletions clang/test/CodeGen/SystemZ/fp16.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// RUN: %clang_cc1 -triple s390x-linux-gnu -emit-llvm -o - %s \
// RUN: | FileCheck %s

__fp16 f(__fp16 a, __fp16 b, __fp16 c, __fp16 d) {
return a * b + c * d;
}

// CHECK-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 {
// CHECK-NEXT: entry:
// CHECK-NEXT: %a.addr = alloca half, align 2
// CHECK-NEXT: %b.addr = alloca half, align 2
// CHECK-NEXT: %c.addr = alloca half, align 2
// CHECK-NEXT: %d.addr = alloca half, align 2
// CHECK-NEXT: store half %a, ptr %a.addr, align 2
// CHECK-NEXT: store half %b, ptr %b.addr, align 2
// CHECK-NEXT: store half %c, ptr %c.addr, align 2
// CHECK-NEXT: store half %d, ptr %d.addr, align 2
// CHECK-NEXT: %0 = load half, ptr %a.addr, align 2
// CHECK-NEXT: %conv = fpext half %0 to float
// CHECK-NEXT: %1 = load half, ptr %b.addr, align 2
// CHECK-NEXT: %conv1 = fpext half %1 to float
// CHECK-NEXT: %mul = fmul float %conv, %conv1
// CHECK-NEXT: %2 = load half, ptr %c.addr, align 2
// CHECK-NEXT: %conv2 = fpext half %2 to float
// CHECK-NEXT: %3 = load half, ptr %d.addr, align 2
// CHECK-NEXT: %conv3 = fpext half %3 to float
// CHECK-NEXT: %mul4 = fmul float %conv2, %conv3
// CHECK-NEXT: %add = fadd float %mul, %mul4
// CHECK-NEXT: %4 = fptrunc float %add to half
// CHECK-NEXT: ret half %4
// CHECK-NEXT: }

43 changes: 43 additions & 0 deletions clang/test/CodeGen/SystemZ/systemz-abi.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ long long pass_longlong(long long arg) { return arg; }
__int128 pass_int128(__int128 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_int128(ptr dead_on_unwind noalias writable sret(i128) align 8 %{{.*}}, ptr %0)

__fp16 pass___fp16(__fp16 arg) { return arg; }
// CHECK-LABEL: define{{.*}} half @pass___fp16(half %{{.*}})

_Float16 pass__Float16(_Float16 arg) { return arg; }
// CHECK-LABEL: define{{.*}} half @pass__Float16(half %{{.*}})

Expand Down Expand Up @@ -75,6 +78,8 @@ _Complex long pass_complex_long(_Complex long arg) { return arg; }
_Complex long long pass_complex_longlong(_Complex long long arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_complex_longlong(ptr dead_on_unwind noalias writable sret({ i64, i64 }) align 8 %{{.*}}, ptr %{{.*}}arg)

// _Complex __fp16 is (currently?) not allowed.

_Complex _Float16 pass_complex__Float16(_Complex _Float16 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_complex__Float16(ptr dead_on_unwind noalias writable sret({ half, half }) align 2 %{{.*}}, ptr %{{.*}}arg)

Expand Down Expand Up @@ -129,6 +134,11 @@ struct agg_16byte pass_agg_16byte(struct agg_16byte arg) { return arg; }

// Float-like aggregate types

struct agg___fp16 { __fp16 a; };
struct agg___fp16 pass_agg___fp16(struct agg___fp16 arg) { return arg; }
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg___fp16(ptr dead_on_unwind noalias writable sret(%struct.agg___fp16) align 2 %{{.*}}, half %{{.*}})
// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg___fp16(ptr dead_on_unwind noalias writable sret(%struct.agg___fp16) align 2 %{{.*}}, i16 noext %{{.*}})

struct agg__Float16 { _Float16 a; };
struct agg__Float16 pass_agg__Float16(struct agg__Float16 arg) { return arg; }
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, half %{{.*}})
Expand All @@ -148,6 +158,11 @@ struct agg_longdouble { long double a; };
struct agg_longdouble pass_agg_longdouble(struct agg_longdouble arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_agg_longdouble(ptr dead_on_unwind noalias writable sret(%struct.agg_longdouble) align 8 %{{.*}}, ptr %{{.*}})

struct agg___fp16_a8 { __fp16 a __attribute__((aligned (8))); };
struct agg___fp16_a8 pass_agg___fp16_a8(struct agg___fp16_a8 arg) { return arg; }
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg___fp16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg___fp16_a8) align 8 %{{.*}}, double %{{.*}})
// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg___fp16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg___fp16_a8) align 8 %{{.*}}, i64 %{{.*}})

struct agg__Float16_a8 { _Float16 a __attribute__((aligned (8))); };
struct agg__Float16_a8 pass_agg__Float16_a8(struct agg__Float16_a8 arg) { return arg; }
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a8) align 8 %{{.*}}, double %{{.*}})
Expand Down Expand Up @@ -180,6 +195,10 @@ struct agg_nofloat3 pass_agg_nofloat3(struct agg_nofloat3 arg) { return arg; }

// Union types likewise are *not* float-like aggregate types

union union___fp16 { __fp16 a; };
union union___fp16 pass_union___fp16(union union___fp16 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_union___fp16(ptr dead_on_unwind noalias writable sret(%union.union___fp16) align 2 %{{.*}}, i16 noext %{{.*}})

union union__Float16 { _Float16 a; };
union union__Float16 pass_union__Float16(union union__Float16 arg) { return arg; }
// CHECK-LABEL: define{{.*}} void @pass_union__Float16(ptr dead_on_unwind noalias writable sret(%union.union__Float16) align 2 %{{.*}}, i16 noext %{{.*}})
Expand Down Expand Up @@ -461,6 +480,30 @@ struct agg_8byte va_agg_8byte(__builtin_va_list l) { return __builtin_va_arg(l,
// CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi ptr [ [[RAW_REG_ADDR]], %{{.*}} ], [ [[RAW_MEM_ADDR]], %{{.*}} ]
// CHECK: ret void

struct agg___fp16 va_agg___fp16(__builtin_va_list l) { return __builtin_va_arg(l, struct agg___fp16); }
// CHECK-LABEL: define{{.*}} void @va_agg___fp16(ptr dead_on_unwind noalias writable sret(%struct.agg___fp16) align 2 %{{.*}}, ptr %{{.*}}
// HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 1
// SOFT-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 0
// CHECK: [[REG_COUNT:%[^ ]+]] = load i64, ptr [[REG_COUNT_PTR]]
// HARD-FLOAT: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 4
// SOFT-FLOAT: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
// CHECK: br i1 [[FITS_IN_REGS]],
// CHECK: [[SCALED_REG_COUNT:%[^ ]+]] = mul i64 [[REG_COUNT]], 8
// HARD-FLOAT: [[REG_OFFSET:%[^ ]+]] = add i64 [[SCALED_REG_COUNT]], 128
// SOFT-FLOAT: [[REG_OFFSET:%[^ ]+]] = add i64 [[SCALED_REG_COUNT]], 22
// CHECK: [[REG_SAVE_AREA_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 3
// CHECK: [[REG_SAVE_AREA:%[^ ]+]] = load ptr, ptr [[REG_SAVE_AREA_PTR:[^ ]+]]
// CHECK: [[RAW_REG_ADDR:%[^ ]+]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i64 [[REG_OFFSET]]
// CHECK: [[REG_COUNT1:%[^ ]+]] = add i64 [[REG_COUNT]], 1
// CHECK: store i64 [[REG_COUNT1]], ptr [[REG_COUNT_PTR]]
// CHECK: [[OVERFLOW_ARG_AREA_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 2
// CHECK: [[OVERFLOW_ARG_AREA:%[^ ]+]] = load ptr, ptr [[OVERFLOW_ARG_AREA_PTR]]
// CHECK: [[RAW_MEM_ADDR:%[^ ]+]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i64 6
// CHECK: [[OVERFLOW_ARG_AREA2:%[^ ]+]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i64 8
// CHECK: store ptr [[OVERFLOW_ARG_AREA2]], ptr [[OVERFLOW_ARG_AREA_PTR]]
// CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi ptr [ [[RAW_REG_ADDR]], %{{.*}} ], [ [[RAW_MEM_ADDR]], %{{.*}} ]
// CHECK: ret void

struct agg__Float16 va_agg__Float16(__builtin_va_list l) { return __builtin_va_arg(l, struct agg__Float16); }
// CHECK-LABEL: define{{.*}} void @va_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, ptr %{{.*}}
// HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 1
Expand Down
60 changes: 47 additions & 13 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTruncStoreAction(VT, MVT::f16, Expand);
}
setOperationAction(ISD::LOAD, MVT::f16, Custom);
setOperationAction(ISD::ATOMIC_LOAD, MVT::f16, Custom);
setOperationAction(ISD::STORE, MVT::f16, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::f16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
Expand Down Expand Up @@ -4596,6 +4598,22 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
}

SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SelectionDAG &DAG) const {
MVT RegVT = Op.getSimpleValueType();
if (RegVT.getSizeInBits() == 128)
return lowerATOMIC_LDST_I128(Op, DAG);
return lowerLoadF16(Op, DAG);
}

SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
if (Node->getMemoryVT().getSizeInBits() == 128)
return lowerATOMIC_LDST_I128(Op, DAG);
return lowerStoreF16(Op, DAG);
}

SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
SelectionDAG &DAG) const {
auto *Node = cast<AtomicSDNode>(Op.getNode());
Expand Down Expand Up @@ -6217,15 +6235,25 @@ SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
MVT RegVT = Op.getSimpleValueType();
if (RegVT != MVT::f16)
return SDValue();
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
SDLoc DL(Ld);
assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending f16 load");

SDLoc DL(Op);
SDValue NewLd;
if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
NewLd = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, MVT::i16, MVT::i32,
AtomicLd->getChain(), AtomicLd->getBasePtr(),
AtomicLd->getMemOperand());
cast<AtomicSDNode>(NewLd)->setExtensionType(ISD::EXTLOAD);
} else {
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Ld->getChain(),
Ld->getBasePtr(), Ld->getPointerInfo(),
MVT::i16, Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
}
// Load as integer, shift and insert into upper 2 bytes of the FP register.
// TODO: Use VLEH if available.
SDValue NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Ld->getChain(),
Ld->getBasePtr(), Ld->getPointerInfo(),
MVT::i16, Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i32, NewLd,
DAG.getConstant(16, DL, MVT::i32));
SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Shft);
Expand All @@ -6236,20 +6264,25 @@ SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,

SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
SelectionDAG &DAG) const {
StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
SDLoc DL(St);
SDValue StoredVal = St->getValue();
SDValue StoredVal = Op->getOperand(1);
MVT StoreVT = StoredVal.getSimpleValueType();
if (StoreVT != MVT::f16)
return SDValue();
// Move into a GPR, shift and store the 2 bytes.
// TODO: Use VSTEH if available.

// Move into a GPR, shift and store the 2 bytes. TODO: Use VSTEH if available.
SDLoc DL(Op);
SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f32);
SDValue In32 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL,
MVT::f32, SDValue(U32, 0), StoredVal);
SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, In32);
SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i32, BCast,
DAG.getConstant(16, DL, MVT::i32));

if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
Shft, AtomicSt->getBasePtr(), AtomicSt->getMemOperand());

StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(),
MVT::i16, St->getMemOperand());
}
Expand Down Expand Up @@ -6373,8 +6406,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
case ISD::ATOMIC_SWAP:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
return lowerATOMIC_STORE(Op, DAG);
case ISD::ATOMIC_LOAD:
return lowerATOMIC_LDST_I128(Op, DAG);
return lowerATOMIC_LOAD(Op, DAG);
case ISD::ATOMIC_LOAD_ADD:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
case ISD::ATOMIC_LOAD_SUB:
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,8 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
unsigned Opcode) const;
Expand Down
46 changes: 46 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -995,8 +995,31 @@ void SystemZInstrInfo::storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
bool isKill, int FrameIdx, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI, Register VReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();

// There are no fp16 load/store instructions, so need to save/restore via
// GPR (TODO: Use VSTEH in case of vector support).
if (RC == &SystemZ::FP16BitRegClass) {
assert(!MRI.isSSA() && MRI.getNumVirtRegs() &&
"Expected non-SSA form with virtual registers.");
Register GR64Reg = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
Register FP64Reg = MRI.createVirtualRegister(&SystemZ::FP64BitRegClass);
BuildMI(MBB, MBBI, DL, get(SystemZ::COPY))
.addReg(FP64Reg, RegState::DefineNoRead, SystemZ::subreg_h16)
.addReg(SrcReg, getKillRegState(isKill));
BuildMI(MBB, MBBI, DL, get(SystemZ::LGDR), GR64Reg)
.addReg(FP64Reg, RegState::Kill);
BuildMI(MBB, MBBI, DL, get(SystemZ::SRLG), GR64Reg)
.addReg(GR64Reg)
.addReg(0)
.addImm(48);
addFrameReference(BuildMI(MBB, MBBI, DL, get(SystemZ::STH))
.addReg(GR64Reg, RegState::Kill, SystemZ::subreg_l32),
FrameIdx);
return;
}

// Callers may expect a single instruction, so keep 128-bit moves
// together for now and lower them after register allocation.
unsigned LoadOpcode, StoreOpcode;
Expand All @@ -1012,8 +1035,31 @@ void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
Register VReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();

// There are no fp16 load/store instructions, so need to save/restore via
// GPR (TODO: Use VLEH in case of vector support).
if (RC == &SystemZ::FP16BitRegClass) {
assert(!MRI.isSSA() && MRI.getNumVirtRegs() &&
"Expected non-SSA form with virtual registers.");
Register GR64Reg = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
Register FP64Reg = MRI.createVirtualRegister(&SystemZ::FP64BitRegClass);
addFrameReference(BuildMI(MBB, MBBI, DL, get(SystemZ::LH))
.addReg(GR64Reg, RegState::DefineNoRead,
SystemZ::subreg_l32),
FrameIdx);
BuildMI(MBB, MBBI, DL, get(SystemZ::SLLG), GR64Reg)
.addReg(GR64Reg)
.addReg(0)
.addImm(48);
BuildMI(MBB, MBBI, DL, get(SystemZ::LDGR), FP64Reg)
.addReg(GR64Reg, RegState::Kill);
BuildMI(MBB, MBBI, DL, get(SystemZ::COPY), DestReg)
.addReg(FP64Reg, RegState::Kill, SystemZ::subreg_h16);
return;
}

// Callers may expect a single instruction, so keep 128-bit moves
// together for now and lower them after register allocation.
unsigned LoadOpcode, StoreOpcode;
Expand Down
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomic-load-10.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Test fp16 atomic loads.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define half @f1(ptr %src) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
; CHECK-NEXT: lh %r0, 0(%r2)
; CHECK-NEXT: sllg %r0, %r0, 48
; CHECK-NEXT: ldgr %f0, %r0
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
; CHECK-NEXT: br %r14
%val = load atomic half, ptr %src seq_cst, align 2
ret half %val
}
17 changes: 17 additions & 0 deletions llvm/test/CodeGen/SystemZ/atomic-store-10.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Test half atomic stores.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s

define void @f1(ptr %src, half %val) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d
; CHECK-NEXT: lgdr %r0, %f0
; CHECK-NEXT: srlg %r0, %r0, 48
; CHECK-NEXT: sth %r0, 0(%r2)
; CHECK-NEXT: bcr 15, %r0
; CHECK-NEXT: br %r14
store atomic half %val, ptr %src seq_cst, align 2
ret void
}
Loading

0 comments on commit 26660a6

Please sign in to comment.