release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) #109093

llvmbot · 2024-09-18T07:25:13Z

Backport 13280d9

Requested by: @nikic

llvmbot · 2024-09-18T07:25:19Z

@arsenm What do you think about merging this PR to the release branch?

llvmbot · 2024-09-18T07:25:45Z

@llvm/pr-subscribers-backend-loongarch

Author: None (llvmbot)

Changes

Backport 13280d9

Requested by: @nikic

Full diff: https://github.com/llvm/llvm-project/pull/109093.diff

2 Files Affected:

(modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+2)
(modified) llvm/test/CodeGen/LoongArch/fp16-promote.ll (+126-72)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index fc5b36c2124e01..267837add575dc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -332,6 +332,8 @@ class LoongArchTargetLowering : public TargetLowering {
   bool isEligibleForTailCallOptimization(
       CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
       const SmallVectorImpl<CCValAssign> &ArgLocs) const;
+
+  bool softPromoteHalfType() const override { return true; }
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
index 75f920b43a06ce..03965ac81f3763 100644
--- a/llvm/test/CodeGen/LoongArch/fp16-promote.ll
+++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
@@ -126,42 +126,40 @@ define void @test_fptrunc_double(double %d, ptr %p) nounwind {
 define half @test_fadd_reg(half %a, half %b) nounwind {
 ; LA32-LABEL: test_fadd_reg:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    addi.w $sp, $sp, -32
-; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:    fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT:    fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT:    fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT:    move $fp, $a0
+; LA32-NEXT:    move $a0, $a1
+; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:    fmov.s $fs0, $fa0
-; LA32-NEXT:    fmov.s $fa0, $fa1
-; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:    move $a0, $fp
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fmov.s $fs1, $fa0
-; LA32-NEXT:    fmov.s $fa0, $fs0
+; LA32-NEXT:    fadd.s $fa0, $fa0, $fs0
 ; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fadd.s $fa0, $fa0, $fs1
-; LA32-NEXT:    fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT:    fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 32
+; LA32-NEXT:    fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT:    ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_fadd_reg:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addi.d $sp, $sp, -32
 ; LA64-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT:    fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    move $fp, $a0
+; LA64-NEXT:    move $a0, $a1
+; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA64-NEXT:    fmov.s $fs0, $fa0
-; LA64-NEXT:    fmov.s $fa0, $fa1
-; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:    move $a0, $fp
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fmov.s $fs1, $fa0
-; LA64-NEXT:    fmov.s $fa0, $fs0
+; LA64-NEXT:    fadd.s $fa0, $fa0, $fs0
 ; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fadd.s $fa0, $fa0, $fs1
-; LA64-NEXT:    fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT:    fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
 ; LA64-NEXT:    addi.d $sp, $sp, 32
 ; LA64-NEXT:    ret
@@ -177,16 +175,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
 ; LA32-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
 ; LA32-NEXT:    st.w $s0, $sp, 20 # 4-byte Folded Spill
 ; LA32-NEXT:    fst.d $fs0, $sp, 8 # 8-byte Folded Spill
-; LA32-NEXT:    move $fp, $a1
-; LA32-NEXT:    move $s0, $a0
-; LA32-NEXT:    ld.hu $a0, $a0, 0
+; LA32-NEXT:    move $fp, $a0
+; LA32-NEXT:    ld.hu $s0, $a0, 0
+; LA32-NEXT:    ld.hu $a0, $a1, 0
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:    fmov.s $fs0, $fa0
-; LA32-NEXT:    ld.hu $a0, $fp, 0
+; LA32-NEXT:    move $a0, $s0
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fadd.s $fa0, $fs0, $fa0
+; LA32-NEXT:    fadd.s $fa0, $fa0, $fs0
 ; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:    st.h $a0, $s0, 0
+; LA32-NEXT:    st.h $a0, $fp, 0
 ; LA32-NEXT:    fld.d $fs0, $sp, 8 # 8-byte Folded Reload
 ; LA32-NEXT:    ld.w $s0, $sp, 20 # 4-byte Folded Reload
 ; LA32-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
@@ -201,16 +199,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
 ; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; LA64-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; LA64-NEXT:    fst.d $fs0, $sp, 0 # 8-byte Folded Spill
-; LA64-NEXT:    move $fp, $a1
-; LA64-NEXT:    move $s0, $a0
-; LA64-NEXT:    ld.hu $a0, $a0, 0
+; LA64-NEXT:    move $fp, $a0
+; LA64-NEXT:    ld.hu $s0, $a0, 0
+; LA64-NEXT:    ld.hu $a0, $a1, 0
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA64-NEXT:    fmov.s $fs0, $fa0
-; LA64-NEXT:    ld.hu $a0, $fp, 0
+; LA64-NEXT:    move $a0, $s0
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fadd.s $fa0, $fs0, $fa0
+; LA64-NEXT:    fadd.s $fa0, $fa0, $fs0
 ; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT:    st.h $a0, $s0, 0
+; LA64-NEXT:    st.h $a0, $fp, 0
 ; LA64-NEXT:    fld.d $fs0, $sp, 0 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -227,42 +225,40 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
 define half @test_fmul_reg(half %a, half %b) nounwind {
 ; LA32-LABEL: test_fmul_reg:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    addi.w $sp, $sp, -32
-; LA32-NEXT:    st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT:    fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT:    fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT:    fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT:    move $fp, $a0
+; LA32-NEXT:    move $a0, $a1
+; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:    fmov.s $fs0, $fa0
-; LA32-NEXT:    fmov.s $fa0, $fa1
-; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:    move $a0, $fp
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fmov.s $fs1, $fa0
-; LA32-NEXT:    fmov.s $fa0, $fs0
+; LA32-NEXT:    fmul.s $fa0, $fa0, $fs0
 ; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fmul.s $fa0, $fa0, $fs1
-; LA32-NEXT:    fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT:    fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT:    ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT:    addi.w $sp, $sp, 32
+; LA32-NEXT:    fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT:    ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_fmul_reg:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    addi.d $sp, $sp, -32
 ; LA64-NEXT:    st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT:    fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT:    fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    move $fp, $a0
+; LA64-NEXT:    move $a0, $a1
+; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA64-NEXT:    fmov.s $fs0, $fa0
-; LA64-NEXT:    fmov.s $fa0, $fa1
-; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:    move $a0, $fp
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fmov.s $fs1, $fa0
-; LA64-NEXT:    fmov.s $fa0, $fs0
+; LA64-NEXT:    fmul.s $fa0, $fa0, $fs0
 ; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fmul.s $fa0, $fa0, $fs1
-; LA64-NEXT:    fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT:    fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT:    fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $ra, $sp, 24 # 8-byte Folded Reload
 ; LA64-NEXT:    addi.d $sp, $sp, 32
 ; LA64-NEXT:    ret
@@ -278,16 +274,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
 ; LA32-NEXT:    st.w $fp, $sp, 24 # 4-byte Folded Spill
 ; LA32-NEXT:    st.w $s0, $sp, 20 # 4-byte Folded Spill
 ; LA32-NEXT:    fst.d $fs0, $sp, 8 # 8-byte Folded Spill
-; LA32-NEXT:    move $fp, $a1
-; LA32-NEXT:    move $s0, $a0
-; LA32-NEXT:    ld.hu $a0, $a0, 0
+; LA32-NEXT:    move $fp, $a0
+; LA32-NEXT:    ld.hu $s0, $a0, 0
+; LA32-NEXT:    ld.hu $a0, $a1, 0
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA32-NEXT:    fmov.s $fs0, $fa0
-; LA32-NEXT:    ld.hu $a0, $fp, 0
+; LA32-NEXT:    move $a0, $s0
 ; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT:    fmul.s $fa0, $fs0, $fa0
+; LA32-NEXT:    fmul.s $fa0, $fa0, $fs0
 ; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT:    st.h $a0, $s0, 0
+; LA32-NEXT:    st.h $a0, $fp, 0
 ; LA32-NEXT:    fld.d $fs0, $sp, 8 # 8-byte Folded Reload
 ; LA32-NEXT:    ld.w $s0, $sp, 20 # 4-byte Folded Reload
 ; LA32-NEXT:    ld.w $fp, $sp, 24 # 4-byte Folded Reload
@@ -302,16 +298,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
 ; LA64-NEXT:    st.d $fp, $sp, 16 # 8-byte Folded Spill
 ; LA64-NEXT:    st.d $s0, $sp, 8 # 8-byte Folded Spill
 ; LA64-NEXT:    fst.d $fs0, $sp, 0 # 8-byte Folded Spill
-; LA64-NEXT:    move $fp, $a1
-; LA64-NEXT:    move $s0, $a0
-; LA64-NEXT:    ld.hu $a0, $a0, 0
+; LA64-NEXT:    move $fp, $a0
+; LA64-NEXT:    ld.hu $s0, $a0, 0
+; LA64-NEXT:    ld.hu $a0, $a1, 0
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
 ; LA64-NEXT:    fmov.s $fs0, $fa0
-; LA64-NEXT:    ld.hu $a0, $fp, 0
+; LA64-NEXT:    move $a0, $s0
 ; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT:    fmul.s $fa0, $fs0, $fa0
+; LA64-NEXT:    fmul.s $fa0, $fa0, $fs0
 ; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT:    st.h $a0, $s0, 0
+; LA64-NEXT:    st.h $a0, $fp, 0
 ; LA64-NEXT:    fld.d $fs0, $sp, 0 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $s0, $sp, 8 # 8-byte Folded Reload
 ; LA64-NEXT:    ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -324,3 +320,61 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
   store half %r, ptr %p
   ret void
 }
+
+define half @freeze_half_undef() nounwind {
+; LA32-LABEL: freeze_half_undef:
+; LA32:       # %bb.0:
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    movgr2fr.w $fa0, $zero
+; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT:    fadd.s $fa0, $fa0, $fa0
+; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: freeze_half_undef:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.d $sp, $sp, -16
+; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    movgr2fr.w $fa0, $zero
+; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT:    fadd.s $fa0, $fa0, $fa0
+; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT:    addi.d $sp, $sp, 16
+; LA64-NEXT:    ret
+  %y1 = freeze half undef
+  %t1 = fadd half %y1, %y1
+  ret half %t1
+}
+
+define half @freeze_half_poison(half %maybe.poison) nounwind {
+; LA32-LABEL: freeze_half_poison:
+; LA32:       # %bb.0:
+; LA32-NEXT:    addi.w $sp, $sp, -16
+; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT:    bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT:    fadd.s $fa0, $fa0, $fa0
+; LA32-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT:    addi.w $sp, $sp, 16
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: freeze_half_poison:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.d $sp, $sp, -16
+; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT:    bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT:    fadd.s $fa0, $fa0, $fa0
+; LA64-NEXT:    bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT:    addi.d $sp, $sp, 16
+; LA64-NEXT:    ret
+  %y1 = freeze half %maybe.poison
+  %t1 = fadd half %y1, %y1
+  ret half %t1
+}

nikic · 2024-09-18T07:40:58Z

Do I understand correctly that a side effect of this change is to change the half float ABI on loongarch from passing via FP regs to passing via GPR regs?

yxd-ym · 2024-09-18T08:47:41Z

Do I understand correctly that a side effect of this change is to change the half float ABI on loongarch from passing via FP regs to passing via GPR regs?

llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines 1356 to 1374 in 43c9203

    
           if (!isTypeLegal(MVT::f16)) { 
        
             // Allow targets to control how we legalize half. 
        
             bool SoftPromoteHalfType = softPromoteHalfType(); 
        
             bool UseFPRegsForHalfType = !SoftPromoteHalfType || useFPRegsForHalfType(); 
        
             if (!UseFPRegsForHalfType) { 
        
               NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; 
        
               RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; 
        
             } else { 
        
               NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; 
        
               RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; 
        
             } 
        
             TransformToType[MVT::f16] = MVT::f32; 
        
             if (SoftPromoteHalfType) { 
        
               ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf); 
        
             } else { 
        
               ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); 
        
             } 
        
           }

From the logic above, the answer seems to be yes. Because with this patch, the following functions' return values for loongarch are

softPromoteHalfType() => true
useFPRegsForHalfType() => false

llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h

Lines 519 to 530 in 43c9203

    
           // Return true if the half type should be promoted using soft promotion rules 
        
           // where each operation is promoted to f32 individually, then converted to 
        
           // fp16. The default behavior is to promote chains of operations, keeping 
        
           // intermediate results in f32 precision and range. 
        
           virtual bool softPromoteHalfType() const { return false; } 
        
           // Return true if, for soft-promoted half, the half type should be passed 
        
           // passed to and returned from functions as f32. The default behavior is to 
        
           // pass as i16. If soft-promoted half is not used, this function is ignored 
        
           // and values are always passed and returned as f32. 
        
           virtual bool useFPRegsForHalfType() const { return false; }

tru · 2024-09-24T06:31:17Z

Are there still questionmarks about this one @nikic or can it be approved and merged?

nikic · 2024-09-24T07:32:30Z

I'm concerned about changing the call ABI in a stable release, even if the ABI itself is not specified. It means that if you compile an object with LLVM 19.1.0, you may not be able to link it with an object compiled with LLVM 19.1.1.

It's also worth noting that the ABI used here may not be final either, #109368 proposed to change it again.

heiher · 2024-09-24T07:44:45Z

Currently, Rust's compiler-builtins has marked fp16 as available for loongarch64, but in fact, the functionality is broken. Even with this patch, it is not optimal. Subjectively, I hope these patches can be backported to LLVM 19 to avoid ABI incompatibility issues across multiple versions. If we don’t break anything unrelated to fp16, it won’t be a bad thing to work on fixing fp16, as it was already broken to begin with. Thanks.

arsenm · 2024-09-24T07:46:53Z

This patch is fixing the stated issue in a roundabout way. You do not need to change the ABI or half promotion strategy just to support freeze on half values

nikic · 2024-09-24T09:30:03Z

fp16 support is generally quite broken outside of a few targets -- from a Rust perspective, we'd prefer full support in LLVM 20 rather than backporting things piecemeal to LLVM 19. Especially if it's going to change the ABI, which I think means we'd need LLVM-patch-version-specific handling in compiler-builtins.

tgross35 · 2024-09-27T00:06:26Z

Currently, Rust's compiler-builtins has marked fp16 as available for loongarch64, but in fact, the functionality is broken. Even with this patch, it is not optimal. Subjectively, I hope these patches can be backported to LLVM 19 to avoid ABI incompatibility issues across multiple versions. If we don’t break anything unrelated to fp16, it won’t be a bad thing to work on fixing fp16, as it was already broken to begin with. Thanks.

If you are referring to https://github.com/rust-lang/compiler-builtins/blob/bb18ce58c2b5fec081a5dd3553aa960a36b8de5c/configure.rs#L51-L91, we only disable f16/f128 on platforms where LLVM 19 actually has a crash or produces code that can't be linked. Quite a few of the targets that aren't disabled there have some form of ABI issue - so we just build the symbols but don't actually test with them.

(the config for what gets tested is at https://github.com/rust-lang/rust/blob/2bd1e894efde3b6be857ad345914a3b1cea51def/library/std/build.rs#L81-L186)

tru · 2024-10-01T06:50:45Z

Sounds to me like we don't have to accept this patch into the 19.1 release. Please re-open and argue if you don't agree with that assesment.

heiher · 2024-10-19T14:02:32Z

Update: #109368 (comment)

I suggest continuing this PR to ensure that fp16 support is functional on the release/19.x.

…ongarch (llvm#107791) For zig with LLVM 19.1.0rc4, we are seeing the following error when bootstrapping a `loongarch64-linux-musl` target. ziglang/zig-bootstrap#164 (comment) It seems that this issue is caused by `PromoteFloatResult` is not handling FREEZE OP on loongarch. Here is the reproduction of the error: https://godbolt.org/z/PPfvWjjG5 ~~This patch adds the FREEZE OP handling with `PromoteFloatRes_UnaryOp` and adds a test case.~~ This patch changes loongarch's way of floating point promotion to soft promotion to avoid this problem. See: loongarch's handling of `half`: - llvm#93894 - llvm#94456 Also see: other float promotion FREEZE handling - llvm@0019c2f (cherry picked from commit 13280d9)

tru · 2024-10-28T07:28:21Z

@heiher this would still break the ABI right? so it would still create problem for downstream users like rust?

cc @nikic @arsenm @tgross35

heiher · 2024-10-28T07:46:35Z

@heiher this would still break the ABI right? so it would still create problem for downstream users like rust?

cc @nikic @arsenm @tgross35

After deciding that FPU does not support fp16, there’s no longer any ABI-breaking impact. Without this patch, LoongArch’s software emulation for fp16 operations still has correctness issues. This is why downstream software depends on it, which is the reason for my request. Thanks.

tgross35 · 2024-10-28T08:47:16Z

@heiher this would still break the ABI right? so it would still create problem for downstream users like rust?

Speaking only from a Rust perspective, don't worry too much about making breaking changes that fix f16 or f128 behavior. The types are nightly-only for now (probably will be that way for at least a year or so) so we don't have any public guarantees. (I would just get to un-skip f16 tests on another arch sooner).

I don't know what other frontends exposes this type but the commit mentions Zig.

If it is preferable to keep using float registers, it sounds like there is another fix for the precision issue rather than changing the ABI #97975 (comment). I don't think there is a test for the precision yet.

arsenm · 2024-10-28T18:38:55Z

I would like to reiterate that this is a roundabout fix for the assertion in question. It sends it down a different path that happens to avoid it. It is not necessary to make this ABI change to fix the assertion.

Disable `f161` for LoongArch64 due to incorrect code generation on LLVM 19, which causes failures in `testcrate/tests/conv.rs`. This workaround will remain in place until llvm/llvm-project#109093 is merged or we upgrade to LLVM 20.

heiher · 2024-11-11T02:23:47Z

Although the original intent of this patch was to resolve an assertion issue (ziglang/zig-bootstrap#164 (comment)), it actually addresses two miscompilation issues (#97975 #97981) as well. When the backport was initially proposed, I was believed that an ABI change would be necessary to ensure compatibility with future hardware supporting fp16. However, it's now clear that no such change will be required (#109368 (comment)). Given that this is primarily a bug fix, could we proceed with merging this backport into 19?

tru · 2024-11-12T07:35:44Z

@nikic @arsenm can someone re-review the new version without the abi break?

nikic · 2024-11-12T09:24:14Z

I'd still prefer not to backport this. We're changing many targets to use softPromoteHalfType in LLVM 20 (hopefully all if someone gets around to it...), with the ABI changes that implies. I don't think it makes sense to backport this just for loongarch, and I also don't want to backport this for all targets where this change is made (esp as the changes are trickling in slowly over time).

heiher · 2024-11-12T09:52:12Z

I'd still prefer not to backport this. We're changing many targets to use softPromoteHalfType in LLVM 20 (hopefully all if someone gets around to it...), with the ABI changes that implies. I don't think it makes sense to backport this just for loongarch, and I also don't want to backport this for all targets where this change is made (esp as the changes are trickling in slowly over time).

I agree with you. It's reasonable. Thanks for your feedback.

llvmbot added this to the LLVM 19.X Release milestone Sep 18, 2024

llvmbot requested a review from arsenm September 18, 2024 07:25

llvmbot mentioned this pull request Sep 18, 2024

[loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch #107791

Merged

llvmbot added the backend:loongarch label Sep 18, 2024

arsenm approved these changes Sep 18, 2024

View reviewed changes

tru closed this Oct 1, 2024

heiher mentioned this pull request Oct 19, 2024

[LoongArch] Pass 'half' in the lower 16 bits of an f32 value with F/D ABI #109368

Closed

heiher reopened this Oct 19, 2024

heiher force-pushed the issue107791 branch from 3539b82 to bd494f3 Compare October 19, 2024 14:03

heiher mentioned this pull request Nov 1, 2024

Disable f16 for LoongArch64 rust-lang/compiler-builtins#722

Merged

heiher closed this Nov 12, 2024

tru deleted the issue107791 branch November 15, 2024 08:30

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) #109093

release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) #109093

llvmbot commented Sep 18, 2024

llvmbot commented Sep 18, 2024

llvmbot commented Sep 18, 2024

nikic commented Sep 18, 2024

yxd-ym commented Sep 18, 2024

tru commented Sep 24, 2024

nikic commented Sep 24, 2024

heiher commented Sep 24, 2024 •

edited

Loading

arsenm commented Sep 24, 2024

nikic commented Sep 24, 2024

tgross35 commented Sep 27, 2024

tru commented Oct 1, 2024

heiher commented Oct 19, 2024

tru commented Oct 28, 2024

heiher commented Oct 28, 2024

tgross35 commented Oct 28, 2024

arsenm commented Oct 28, 2024

heiher commented Nov 11, 2024

tru commented Nov 12, 2024

nikic commented Nov 12, 2024 •

edited

Loading

heiher commented Nov 12, 2024

release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) #109093

release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) #109093

Conversation

llvmbot commented Sep 18, 2024

llvmbot commented Sep 18, 2024

llvmbot commented Sep 18, 2024

nikic commented Sep 18, 2024

yxd-ym commented Sep 18, 2024

tru commented Sep 24, 2024

nikic commented Sep 24, 2024

heiher commented Sep 24, 2024 • edited Loading

arsenm commented Sep 24, 2024

nikic commented Sep 24, 2024

tgross35 commented Sep 27, 2024

tru commented Oct 1, 2024

heiher commented Oct 19, 2024

tru commented Oct 28, 2024

heiher commented Oct 28, 2024

tgross35 commented Oct 28, 2024

arsenm commented Oct 28, 2024

heiher commented Nov 11, 2024

tru commented Nov 12, 2024

nikic commented Nov 12, 2024 • edited Loading

heiher commented Nov 12, 2024

heiher commented Sep 24, 2024 •

edited

Loading

nikic commented Nov 12, 2024 •

edited

Loading