diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index 9723789e919b15..6177884bd19501 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -337,6 +337,8 @@ class LoongArchTargetLowering : public TargetLowering { bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVectorImpl &ArgLocs) const; + + bool softPromoteHalfType() const override { return true; } }; } // end namespace llvm diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll index 75f920b43a06ce..03965ac81f3763 100644 --- a/llvm/test/CodeGen/LoongArch/fp16-promote.ll +++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll @@ -126,42 +126,40 @@ define void @test_fptrunc_double(double %d, ptr %p) nounwind { define half @test_fadd_reg(half %a, half %b) nounwind { ; LA32-LABEL: test_fadd_reg: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -32 -; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) ; LA32-NEXT: fmov.s $fs0, $fa0 -; LA32-NEXT: fmov.s $fa0, $fa1 -; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fmov.s $fs1, $fa0 -; LA32-NEXT: fmov.s $fa0, $fs0 +; LA32-NEXT: fadd.s $fa0, $fa0, $fs0 ; LA32-NEXT: bl %plt(__gnu_f2h_ieee) -; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fadd.s $fa0, $fa0, $fs1 -; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: test_fadd_reg: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $sp, $sp, -32 ; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) ; LA64-NEXT: fmov.s $fs0, $fa0 -; LA64-NEXT: fmov.s $fa0, $fa1 -; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: move $a0, $fp ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fmov.s $fs1, $fa0 -; LA64-NEXT: fmov.s $fa0, $fs0 +; LA64-NEXT: fadd.s $fa0, $fa0, $fs0 ; LA64-NEXT: bl %plt(__gnu_f2h_ieee) -; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fadd.s $fa0, $fa0, $fs1 -; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT: addi.d $sp, $sp, 32 ; LA64-NEXT: ret @@ -177,16 +175,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind { ; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill ; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill ; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill -; LA32-NEXT: move $fp, $a1 -; LA32-NEXT: move $s0, $a0 -; LA32-NEXT: ld.hu $a0, $a0, 0 +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: ld.hu $s0, $a0, 0 +; LA32-NEXT: ld.hu $a0, $a1, 0 ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) ; LA32-NEXT: fmov.s $fs0, $fa0 -; LA32-NEXT: ld.hu $a0, $fp, 0 +; LA32-NEXT: move $a0, $s0 ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fadd.s $fa0, $fs0, $fa0 +; LA32-NEXT: fadd.s $fa0, $fa0, $fs0 ; LA32-NEXT: bl %plt(__gnu_f2h_ieee) -; LA32-NEXT: st.h $a0, $s0, 0 +; LA32-NEXT: st.h $a0, $fp, 0 ; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload @@ -201,16 +199,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind { ; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill ; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill -; LA64-NEXT: move $fp, $a1 -; LA64-NEXT: move $s0, $a0 -; LA64-NEXT: ld.hu $a0, $a0, 0 +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: ld.hu $s0, $a0, 0 +; LA64-NEXT: ld.hu $a0, $a1, 0 ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) ; LA64-NEXT: fmov.s $fs0, $fa0 -; LA64-NEXT: ld.hu $a0, $fp, 0 +; LA64-NEXT: move $a0, $s0 ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fadd.s $fa0, $fs0, $fa0 +; LA64-NEXT: fadd.s $fa0, $fa0, $fs0 ; LA64-NEXT: bl %plt(__gnu_f2h_ieee) -; LA64-NEXT: st.h $a0, $s0, 0 +; LA64-NEXT: st.h $a0, $fp, 0 ; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload @@ -227,42 +225,40 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind { define half @test_fmul_reg(half %a, half %b) nounwind { ; LA32-LABEL: test_fmul_reg: ; LA32: # %bb.0: -; LA32-NEXT: addi.w $sp, $sp, -32 -; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill -; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: move $a0, $a1 +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) ; LA32-NEXT: fmov.s $fs0, $fa0 -; LA32-NEXT: fmov.s $fa0, $fa1 -; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: move $a0, $fp ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fmov.s $fs1, $fa0 -; LA32-NEXT: fmov.s $fa0, $fs0 +; LA32-NEXT: fmul.s $fa0, $fa0, $fs0 ; LA32-NEXT: bl %plt(__gnu_f2h_ieee) -; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fmul.s $fa0, $fa0, $fs1 -; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 ; LA32-NEXT: ret ; ; LA64-LABEL: test_fmul_reg: ; LA64: # %bb.0: ; LA64-NEXT: addi.d $sp, $sp, -32 ; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill -; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill -; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) ; LA64-NEXT: fmov.s $fs0, $fa0 -; LA64-NEXT: fmov.s $fa0, $fa1 -; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: move $a0, $fp ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fmov.s $fs1, $fa0 -; LA64-NEXT: fmov.s $fa0, $fs0 +; LA64-NEXT: fmul.s $fa0, $fa0, $fs0 ; LA64-NEXT: bl %plt(__gnu_f2h_ieee) -; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fmul.s $fa0, $fa0, $fs1 -; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload -; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload ; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload ; LA64-NEXT: addi.d $sp, $sp, 32 ; LA64-NEXT: ret @@ -278,16 +274,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind { ; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill ; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill ; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill -; LA32-NEXT: move $fp, $a1 -; LA32-NEXT: move $s0, $a0 -; LA32-NEXT: ld.hu $a0, $a0, 0 +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: ld.hu $s0, $a0, 0 +; LA32-NEXT: ld.hu $a0, $a1, 0 ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) ; LA32-NEXT: fmov.s $fs0, $fa0 -; LA32-NEXT: ld.hu $a0, $fp, 0 +; LA32-NEXT: move $a0, $s0 ; LA32-NEXT: bl %plt(__gnu_h2f_ieee) -; LA32-NEXT: fmul.s $fa0, $fs0, $fa0 +; LA32-NEXT: fmul.s $fa0, $fa0, $fs0 ; LA32-NEXT: bl %plt(__gnu_f2h_ieee) -; LA32-NEXT: st.h $a0, $s0, 0 +; LA32-NEXT: st.h $a0, $fp, 0 ; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload ; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload ; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload @@ -302,16 +298,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind { ; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill ; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill ; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill -; LA64-NEXT: move $fp, $a1 -; LA64-NEXT: move $s0, $a0 -; LA64-NEXT: ld.hu $a0, $a0, 0 +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: ld.hu $s0, $a0, 0 +; LA64-NEXT: ld.hu $a0, $a1, 0 ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) ; LA64-NEXT: fmov.s $fs0, $fa0 -; LA64-NEXT: ld.hu $a0, $fp, 0 +; LA64-NEXT: move $a0, $s0 ; LA64-NEXT: bl %plt(__gnu_h2f_ieee) -; LA64-NEXT: fmul.s $fa0, $fs0, $fa0 +; LA64-NEXT: fmul.s $fa0, $fa0, $fs0 ; LA64-NEXT: bl %plt(__gnu_f2h_ieee) -; LA64-NEXT: st.h $a0, $s0, 0 +; LA64-NEXT: st.h $a0, $fp, 0 ; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload ; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload ; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload @@ -324,3 +320,61 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind { store half %r, ptr %p ret void } + +define half @freeze_half_undef() nounwind { +; LA32-LABEL: freeze_half_undef: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: movgr2fr.w $fa0, $zero +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: freeze_half_undef: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: movgr2fr.w $fa0, $zero +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %y1 = freeze half undef + %t1 = fadd half %y1, %y1 + ret half %t1 +} + +define half @freeze_half_poison(half %maybe.poison) nounwind { +; LA32-LABEL: freeze_half_poison: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: freeze_half_poison: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fadd.s $fa0, $fa0, $fa0 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %y1 = freeze half %maybe.poison + %t1 = fadd half %y1, %y1 + ret half %t1 +}