diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index ee374ef66539..cadc80534266 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -655,10 +655,10 @@ G_VECREDUCE_FADD, G_VECREDUCE_FMUL These reductions are relaxed variants which may reduce the elements in any order. -G_VECREDUCE_FMAX, G_VECREDUCE_FMIN +G_VECREDUCE_FMAX, G_VECREDUCE_FMIN, G_VECREDUCE_FMAXIMUM, G_VECREDUCE_FMINIMUM ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. +FMIN/FMAX/FMINIMUM/FMAXIMUM nodes can have flags, for NaN/NoNaN variants. Integer/bitwise reductions diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index ef2fca2e8ef4..ee515e469762 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -409,6 +409,8 @@ class GVecReduce : public GenericMachineInstr { case TargetOpcode::G_VECREDUCE_FMUL: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + case TargetOpcode::G_VECREDUCE_FMINIMUM: case TargetOpcode::G_VECREDUCE_ADD: case TargetOpcode::G_VECREDUCE_MUL: case TargetOpcode::G_VECREDUCE_AND: @@ -441,6 +443,12 @@ class GVecReduce : public GenericMachineInstr { case TargetOpcode::G_VECREDUCE_FMIN: ScalarOpc = TargetOpcode::G_FMINNUM; break; + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + ScalarOpc = TargetOpcode::G_FMAXIMUM; + break; + case TargetOpcode::G_VECREDUCE_FMINIMUM: + ScalarOpc = TargetOpcode::G_FMINIMUM; + break; case TargetOpcode::G_VECREDUCE_ADD: ScalarOpc = TargetOpcode::G_ADD; break; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 46d6eb63bfa6..fbe920e6b325 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1973,6 +1973,19 @@ class MachineIRBuilder { MachineInstrBuilder buildVecReduceFMin(const DstOp &Dst, const SrcOp &Src) { return buildInstr(TargetOpcode::G_VECREDUCE_FMIN, {Dst}, {Src}); } + + /// Build and insert \p Res = G_VECREDUCE_FMAXIMUM \p Src + MachineInstrBuilder buildVecReduceFMaximum(const DstOp &Dst, + const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_FMAXIMUM, {Dst}, {Src}); + } + + /// Build and insert \p Res = G_VECREDUCE_FMINIMUM \p Src + MachineInstrBuilder buildVecReduceFMinimum(const DstOp &Dst, + const SrcOp &Src) { + return buildInstr(TargetOpcode::G_VECREDUCE_FMINIMUM, {Dst}, {Src}); + } + /// Build and insert \p Res = G_VECREDUCE_ADD \p Src MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src) { return buildInstr(TargetOpcode::G_VECREDUCE_ADD, {Dst}, {Src}); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index d5c1fd8d0d51..5be67eb013b1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -57,6 +57,8 @@ class APFloat; case TargetOpcode::G_VECREDUCE_FMUL: \ case TargetOpcode::G_VECREDUCE_FMAX: \ case TargetOpcode::G_VECREDUCE_FMIN: \ + case TargetOpcode::G_VECREDUCE_FMAXIMUM: \ + case TargetOpcode::G_VECREDUCE_FMINIMUM: \ case TargetOpcode::G_VECREDUCE_ADD: \ case TargetOpcode::G_VECREDUCE_MUL: \ case TargetOpcode::G_VECREDUCE_AND: \ @@ -72,6 +74,8 @@ class APFloat; case TargetOpcode::G_VECREDUCE_FMUL: \ case TargetOpcode::G_VECREDUCE_FMAX: \ case TargetOpcode::G_VECREDUCE_FMIN: \ + case TargetOpcode::G_VECREDUCE_FMAXIMUM: \ + case TargetOpcode::G_VECREDUCE_FMINIMUM: \ case TargetOpcode::G_VECREDUCE_ADD: \ case TargetOpcode::G_VECREDUCE_MUL: \ case TargetOpcode::G_VECREDUCE_AND: \ diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 623f3ce91e4e..db7888b043b4 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -811,6 +811,8 @@ HANDLE_TARGET_OPCODE(G_VECREDUCE_FADD) HANDLE_TARGET_OPCODE(G_VECREDUCE_FMUL) HANDLE_TARGET_OPCODE(G_VECREDUCE_FMAX) HANDLE_TARGET_OPCODE(G_VECREDUCE_FMIN) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FMAXIMUM) +HANDLE_TARGET_OPCODE(G_VECREDUCE_FMINIMUM) HANDLE_TARGET_OPCODE(G_VECREDUCE_ADD) HANDLE_TARGET_OPCODE(G_VECREDUCE_MUL) HANDLE_TARGET_OPCODE(G_VECREDUCE_AND) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 4782d9d7b798..db40f7595e55 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1398,6 +1398,8 @@ def G_VECREDUCE_FMUL : VectorReduction; def G_VECREDUCE_FMAX : VectorReduction; def G_VECREDUCE_FMIN : VectorReduction; +def G_VECREDUCE_FMAXIMUM : VectorReduction; +def G_VECREDUCE_FMINIMUM : VectorReduction; def G_VECREDUCE_ADD : VectorReduction; def G_VECREDUCE_MUL : VectorReduction; diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 9e3a3e25d8ee..c306be094057 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -165,6 +165,8 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 482057f167e2..939e0c6af461 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1797,6 +1797,10 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_VECREDUCE_FMIN; case Intrinsic::vector_reduce_fmax: return TargetOpcode::G_VECREDUCE_FMAX; + case Intrinsic::vector_reduce_fminimum: + return TargetOpcode::G_VECREDUCE_FMINIMUM; + case Intrinsic::vector_reduce_fmaximum: + return TargetOpcode::G_VECREDUCE_FMAXIMUM; case Intrinsic::vector_reduce_add: return TargetOpcode::G_VECREDUCE_ADD; case Intrinsic::vector_reduce_mul: diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 59daea97abd9..1ab571fd6663 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2649,6 +2649,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { } case TargetOpcode::G_VECREDUCE_FMIN: case TargetOpcode::G_VECREDUCE_FMAX: + case TargetOpcode::G_VECREDUCE_FMINIMUM: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index af235f54401f..7baac14f9d31 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1720,6 +1720,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { case TargetOpcode::G_VECREDUCE_FMUL: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + case TargetOpcode::G_VECREDUCE_FMINIMUM: case TargetOpcode::G_VECREDUCE_ADD: case TargetOpcode::G_VECREDUCE_MUL: case TargetOpcode::G_VECREDUCE_AND: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index e9865e951a15..3e6ee6655f43 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -850,7 +850,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampMaxNumElements(1, s32, 4) .lower(); - getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX}) + getActionDefinitionsBuilder({G_VECREDUCE_FMIN, G_VECREDUCE_FMAX, + G_VECREDUCE_FMINIMUM, G_VECREDUCE_FMAXIMUM}) .legalFor({{s32, v4s32}, {s32, v2s32}, {s64, v2s64}}) .legalIf([=](const LegalityQuery &Query) { const auto &Ty = Query.Types[1]; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 1cc1a4907818..0abb9f4d4c78 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -996,6 +996,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_VECREDUCE_FMUL: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + case TargetOpcode::G_VECREDUCE_FMINIMUM: case TargetOpcode::G_VECREDUCE_ADD: case TargetOpcode::G_VECREDUCE_MUL: case TargetOpcode::G_VECREDUCE_AND: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll index 4c1ed5d61e7c..16762dc4fd3f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-reductions.ll @@ -66,6 +66,8 @@ define double @fmul_fast(double %start, <4 x double> %vec) { declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>) +declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>) define float @fmax(<4 x float> %vec) { ; CHECK-LABEL: name: fmax @@ -106,6 +108,45 @@ define float @fmin_nnan(<4 x float> %vec) { ret float %res } +define float @fmaximum(<4 x float> %vec) { + ; CHECK-LABEL: name: fmaximum + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[BITCAST]](<4 x s32>) + ; CHECK: $s0 = COPY [[VECREDUCE_FMAX]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %vec) + ret float %res +} + +define float @fminimum(<4 x float> %vec) { + ; CHECK-LABEL: name: fminimum + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>) + ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec) + ret float %res +} + +define float @fminimum_nnan(<4 x float> %vec) { + ; CHECK-LABEL: name: fminimum_nnan + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMINIMUM [[BITCAST]](<4 x s32>) + ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32) + ; CHECK: RET_ReallyLR implicit $s0 + %res = call nnan float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %vec) + ret float %res +} + declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) define i32 @add(<4 x i32> %vec) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fminmax.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fminmax.mir new file mode 100644 index 000000000000..b1cb0d91d017 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-reduce-fminmax.mir @@ -0,0 +1,93 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s + +--- +name: fmin_v2s32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: fmin_v2s32 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[COPY]](<2 x s32>) + ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMIN]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(s32) = G_VECREDUCE_FMIN %0(<2 x s32>) + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: fmax_v8s16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: fmax_v8s16 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXNUM [[FPEXT]], [[FPEXT1]] + ; CHECK-NEXT: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[FMAXNUM]](<4 x s32>) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[VECREDUCE_FMAX]](s32) + ; CHECK-NEXT: $h0 = COPY [[FPTRUNC]](s16) + ; CHECK-NEXT: RET_ReallyLR implicit $h0 + %0:_(<8 x s16>) = COPY $q0 + %1:_(s16) = G_VECREDUCE_FMAX %0(<8 x s16>) + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +... +--- +name: fminimum_v2s32 +tracksRegLiveness: true +body: | + bb.1: + liveins: $d0 + + ; CHECK-LABEL: name: fminimum_v2s32 + ; CHECK: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[VECREDUCE_FMINIMUM:%[0-9]+]]:_(s32) = G_VECREDUCE_FMINIMUM [[COPY]](<2 x s32>) + ; CHECK-NEXT: $s0 = COPY [[VECREDUCE_FMINIMUM]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0 + %0:_(<2 x s32>) = COPY $d0 + %1:_(s32) = G_VECREDUCE_FMINIMUM %0(<2 x s32>) + $s0 = COPY %1(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: fmaximum_v8s16 +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: fmaximum_v8s16 + ; CHECK: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>) + ; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>) + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[FPEXT]], [[FPEXT1]] + ; CHECK-NEXT: [[VECREDUCE_FMAXIMUM:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAXIMUM [[FMAXIMUM]](<4 x s32>) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[VECREDUCE_FMAXIMUM]](s32) + ; CHECK-NEXT: $h0 = COPY [[FPTRUNC]](s16) + ; CHECK-NEXT: RET_ReallyLR implicit $h0 + %0:_(<8 x s16>) = COPY $q0 + %1:_(s16) = G_VECREDUCE_FMAXIMUM %0(<8 x s16>) + $h0 = COPY %1(s16) + RET_ReallyLR implicit $h0 + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 410bfc12f83e..7ef8212c5ffb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -726,6 +726,14 @@ # DEBUG-NEXT: G_VECREDUCE_FMIN (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_VECREDUCE_FMAXIMUM (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_VECREDUCE_FMINIMUM (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_VECREDUCE_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-reductions.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-reductions.mir index 4c87dc024d80..1c043da54215 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-reductions.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbankselect-reductions.mir @@ -41,3 +41,24 @@ body: | RET_ReallyLR implicit $w0 ... +--- +name: fmaximum_v4s32 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $q0 + + ; CHECK-LABEL: name: fmaximum_v4s32 + ; CHECK: liveins: $q0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY $q0 + ; CHECK: [[VECREDUCE_FMAXIMUM:%[0-9]+]]:fpr(s32) = G_VECREDUCE_FMAXIMUM [[COPY]](<4 x s32>) + ; CHECK: $w0 = COPY [[VECREDUCE_FMAXIMUM]](s32) + ; CHECK: RET_ReallyLR implicit $w0 + %0:_(<4 x s32>) = COPY $q0 + %1:_(s32) = G_VECREDUCE_FMAXIMUM %0(<4 x s32>) + $w0 = COPY %1(s32) + RET_ReallyLR implicit $w0 + +... + diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll index 1118a17d3694..49270c427407 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll @@ -1,6 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-GI +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-GI + +; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf +; +; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf declare half @llvm.vector.reduce.fmaximum.v1f16(<1 x half> %a) declare float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a) @@ -30,11 +40,29 @@ define half @test_v1f16(<1 x half> %a) nounwind { } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-LABEL: test_v1f32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v1f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v1f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v1f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmov x8, d0 +; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v1f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmov x8, d0 +; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: ret %b = call float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a) ret float %b } @@ -56,166 +84,195 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { } define half @test_v4f16(<4 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v4f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v4f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v4f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxv h0, v0.4h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v4f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fmaxv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a) ret half %b } define half @test_v8f16(<8 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v8f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[4] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[5] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v8f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v8f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmaxv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v8f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fmax v0.4s, v1.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fmaxv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a) ret half %b } define half @test_v16f16(<16 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v16f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h2, v1.h[1] -; CHECK-NOFP-NEXT: mov h3, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s4, h1 -; CHECK-NOFP-NEXT: fcvt s5, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fmax s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[2] -; CHECK-NOFP-NEXT: fmax s2, s3, s2 -; CHECK-NOFP-NEXT: mov h3, v1.h[2] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s3, s5, s3 -; CHECK-NOFP-NEXT: mov h5, v0.h[3] -; CHECK-NOFP-NEXT: fmax s2, s4, s2 -; CHECK-NOFP-NEXT: mov h4, v1.h[3] -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[4] -; CHECK-NOFP-NEXT: fmax s2, s2, s3 -; CHECK-NOFP-NEXT: mov h3, v1.h[4] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s3, s5, s3 -; CHECK-NOFP-NEXT: mov h5, v0.h[5] -; CHECK-NOFP-NEXT: fmax s2, s2, s4 -; CHECK-NOFP-NEXT: mov h4, v1.h[5] -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fmax s2, s2, s3 -; CHECK-NOFP-NEXT: mov h3, v1.h[6] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: mov h1, v1.h[7] -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s3, s5, s3 -; CHECK-NOFP-NEXT: fmax s0, s0, s1 -; CHECK-NOFP-NEXT: fmax s2, s2, s4 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmax s2, s2, s3 -; CHECK-NOFP-NEXT: fcvt h1, s2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmax s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h2, v1.h[1] +; CHECK-NOFP-SD-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s4, h1 +; CHECK-NOFP-SD-NEXT: fcvt s5, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fmax s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2] +; CHECK-NOFP-SD-NEXT: fmax s2, s3, s2 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s3, s5, s3 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3] +; CHECK-NOFP-SD-NEXT: fmax s2, s4, s2 +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4] +; CHECK-NOFP-SD-NEXT: fmax s2, s2, s3 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s3, s5, s3 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5] +; CHECK-NOFP-SD-NEXT: fmax s2, s2, s4 +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fmax s2, s2, s3 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[6] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7] +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s3, s5, s3 +; CHECK-NOFP-SD-NEXT: fmax s0, s0, s1 +; CHECK-NOFP-SD-NEXT: fmax s2, s2, s4 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmax s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcvt h1, s2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmax s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v16f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmax v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fmaxv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NOFP-GI-NEXT: fmax v0.4s, v2.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fmax v1.4s, v3.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %a) ret half %b } @@ -249,13 +306,37 @@ define float @test_v8f32(<8 x float> %a) nounwind { } define float @test_v16f32(<16 x float> %a) nounwind { -; CHECK-LABEL: test_v16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: fmax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: fmax v0.4s, v0.4s, v2.4s -; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fmaxv s0, v0.4s -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: fmax v1.4s, v1.4s, v3.4s +; CHECK-NOFP-SD-NEXT: fmax v0.4s, v0.4s, v2.4s +; CHECK-NOFP-SD-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-NOFP-SD-NEXT: fmaxv s0, v0.4s +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v16f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: fmax v1.4s, v1.4s, v3.4s +; CHECK-FP-SD-NEXT: fmax v0.4s, v0.4s, v2.4s +; CHECK-FP-SD-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-FP-SD-NEXT: fmaxv s0, v0.4s +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmax v1.4s, v2.4s, v3.4s +; CHECK-NOFP-GI-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmaxv s0, v0.4s +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v16f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fmax v1.4s, v2.4s, v3.4s +; CHECK-FP-GI-NEXT: fmax v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fmaxv s0, v0.4s +; CHECK-FP-GI-NEXT: ret %b = call nnan float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %a) ret float %b } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll index 1fb6c32adc88..378e50795c1d 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll @@ -1,6 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP -; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-SD +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-GI +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-GI + +; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf +; +; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32 +; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf declare half @llvm.vector.reduce.fminimum.v1f16(<1 x half> %a) declare float @llvm.vector.reduce.fminimum.v1f32(<1 x float> %a) @@ -30,11 +40,29 @@ define half @test_v1f16(<1 x half> %a) nounwind { } define float @test_v1f32(<1 x float> %a) nounwind { -; CHECK-LABEL: test_v1f32: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v1f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v1f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v1f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmov x8, d0 +; CHECK-NOFP-GI-NEXT: fmov s0, w8 +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v1f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmov x8, d0 +; CHECK-FP-GI-NEXT: fmov s0, w8 +; CHECK-FP-GI-NEXT: ret %b = call float @llvm.vector.reduce.fminimum.v1f32(<1 x float> %a) ret float %b } @@ -56,166 +84,195 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind { } define half @test_v4f16(<4 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v4f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v4f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v4f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fminv h0, v0.4h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v4f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fminv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a) ret half %b } define half @test_v8f16(<8 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v8f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h1, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s2, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s2, s1 -; CHECK-NOFP-NEXT: mov h2, v0.h[2] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[4] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[5] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: mov h2, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s1, s1, s2 -; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v8f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s2, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s2, s1 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: mov h2, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s1, s1, s2 +; CHECK-NOFP-SD-NEXT: fcvt h1, s1 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v8f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fminv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v8f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fmin v0.4s, v1.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fminv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a) ret half %b } define half @test_v16f16(<16 x half> %a) nounwind { -; CHECK-NOFP-LABEL: test_v16f16: -; CHECK-NOFP: // %bb.0: -; CHECK-NOFP-NEXT: mov h2, v1.h[1] -; CHECK-NOFP-NEXT: mov h3, v0.h[1] -; CHECK-NOFP-NEXT: fcvt s4, h1 -; CHECK-NOFP-NEXT: fcvt s5, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fmin s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[2] -; CHECK-NOFP-NEXT: fmin s2, s3, s2 -; CHECK-NOFP-NEXT: mov h3, v1.h[2] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s3, s5, s3 -; CHECK-NOFP-NEXT: mov h5, v0.h[3] -; CHECK-NOFP-NEXT: fmin s2, s4, s2 -; CHECK-NOFP-NEXT: mov h4, v1.h[3] -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[4] -; CHECK-NOFP-NEXT: fmin s2, s2, s3 -; CHECK-NOFP-NEXT: mov h3, v1.h[4] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s3, s5, s3 -; CHECK-NOFP-NEXT: mov h5, v0.h[5] -; CHECK-NOFP-NEXT: fmin s2, s2, s4 -; CHECK-NOFP-NEXT: mov h4, v1.h[5] -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s4, s5, s4 -; CHECK-NOFP-NEXT: mov h5, v0.h[6] -; CHECK-NOFP-NEXT: mov h0, v0.h[7] -; CHECK-NOFP-NEXT: fmin s2, s2, s3 -; CHECK-NOFP-NEXT: mov h3, v1.h[6] -; CHECK-NOFP-NEXT: fcvt h4, s4 -; CHECK-NOFP-NEXT: fcvt s5, h5 -; CHECK-NOFP-NEXT: mov h1, v1.h[7] -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s4, h4 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s3, s5, s3 -; CHECK-NOFP-NEXT: fmin s0, s0, s1 -; CHECK-NOFP-NEXT: fmin s2, s2, s4 -; CHECK-NOFP-NEXT: fcvt h3, s3 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: fcvt h2, s2 -; CHECK-NOFP-NEXT: fcvt s3, h3 -; CHECK-NOFP-NEXT: fcvt s0, h0 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmin s2, s2, s3 -; CHECK-NOFP-NEXT: fcvt h1, s2 -; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fmin s0, s1, s0 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f16: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: mov h2, v1.h[1] +; CHECK-NOFP-SD-NEXT: mov h3, v0.h[1] +; CHECK-NOFP-SD-NEXT: fcvt s4, h1 +; CHECK-NOFP-SD-NEXT: fcvt s5, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fmin s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2] +; CHECK-NOFP-SD-NEXT: fmin s2, s3, s2 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[2] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s3, s5, s3 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3] +; CHECK-NOFP-SD-NEXT: fmin s2, s4, s2 +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3] +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4] +; CHECK-NOFP-SD-NEXT: fmin s2, s2, s3 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[4] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s3, s5, s3 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5] +; CHECK-NOFP-SD-NEXT: fmin s2, s2, s4 +; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5] +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s4, s5, s4 +; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6] +; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7] +; CHECK-NOFP-SD-NEXT: fmin s2, s2, s3 +; CHECK-NOFP-SD-NEXT: mov h3, v1.h[6] +; CHECK-NOFP-SD-NEXT: fcvt h4, s4 +; CHECK-NOFP-SD-NEXT: fcvt s5, h5 +; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7] +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s4, h4 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s3, s5, s3 +; CHECK-NOFP-SD-NEXT: fmin s0, s0, s1 +; CHECK-NOFP-SD-NEXT: fmin s2, s2, s4 +; CHECK-NOFP-SD-NEXT: fcvt h3, s3 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: fcvt h2, s2 +; CHECK-NOFP-SD-NEXT: fcvt s3, h3 +; CHECK-NOFP-SD-NEXT: fcvt s0, h0 +; CHECK-NOFP-SD-NEXT: fcvt s2, h2 +; CHECK-NOFP-SD-NEXT: fmin s2, s2, s3 +; CHECK-NOFP-SD-NEXT: fcvt h1, s2 +; CHECK-NOFP-SD-NEXT: fcvt s1, h1 +; CHECK-NOFP-SD-NEXT: fmin s0, s1, s0 +; CHECK-NOFP-SD-NEXT: fcvt h0, s0 +; CHECK-NOFP-SD-NEXT: ret ; ; CHECK-FP-LABEL: test_v16f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: fmin v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fminv h0, v0.8h ; CHECK-FP-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f16: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NOFP-GI-NEXT: fcvtl v3.4s, v1.4h +; CHECK-NOFP-GI-NEXT: fcvtl2 v1.4s, v1.8h +; CHECK-NOFP-GI-NEXT: fmin v0.4s, v2.4s, v0.4s +; CHECK-NOFP-GI-NEXT: fmin v1.4s, v3.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fminv s0, v0.4s +; CHECK-NOFP-GI-NEXT: fcvt h0, s0 +; CHECK-NOFP-GI-NEXT: ret %b = call nnan half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %a) ret half %b } @@ -249,13 +306,37 @@ define float @test_v8f32(<8 x float> %a) nounwind { } define float @test_v16f32(<16 x float> %a) nounwind { -; CHECK-LABEL: test_v16f32: -; CHECK: // %bb.0: -; CHECK-NEXT: fmin v1.4s, v1.4s, v3.4s -; CHECK-NEXT: fmin v0.4s, v0.4s, v2.4s -; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fminv s0, v0.4s -; CHECK-NEXT: ret +; CHECK-NOFP-SD-LABEL: test_v16f32: +; CHECK-NOFP-SD: // %bb.0: +; CHECK-NOFP-SD-NEXT: fmin v1.4s, v1.4s, v3.4s +; CHECK-NOFP-SD-NEXT: fmin v0.4s, v0.4s, v2.4s +; CHECK-NOFP-SD-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-NOFP-SD-NEXT: fminv s0, v0.4s +; CHECK-NOFP-SD-NEXT: ret +; +; CHECK-FP-SD-LABEL: test_v16f32: +; CHECK-FP-SD: // %bb.0: +; CHECK-FP-SD-NEXT: fmin v1.4s, v1.4s, v3.4s +; CHECK-FP-SD-NEXT: fmin v0.4s, v0.4s, v2.4s +; CHECK-FP-SD-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-FP-SD-NEXT: fminv s0, v0.4s +; CHECK-FP-SD-NEXT: ret +; +; CHECK-NOFP-GI-LABEL: test_v16f32: +; CHECK-NOFP-GI: // %bb.0: +; CHECK-NOFP-GI-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fmin v1.4s, v2.4s, v3.4s +; CHECK-NOFP-GI-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-NOFP-GI-NEXT: fminv s0, v0.4s +; CHECK-NOFP-GI-NEXT: ret +; +; CHECK-FP-GI-LABEL: test_v16f32: +; CHECK-FP-GI: // %bb.0: +; CHECK-FP-GI-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fmin v1.4s, v2.4s, v3.4s +; CHECK-FP-GI-NEXT: fmin v0.4s, v0.4s, v1.4s +; CHECK-FP-GI-NEXT: fminv s0, v0.4s +; CHECK-FP-GI-NEXT: ret %b = call nnan float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %a) ret float %b }