Skip to content

Commit

Permalink
Adjust overload types between clang and mlir
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrkusanin committed Jan 19, 2024
1 parent 684d08f commit 86dfbca
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 71 deletions.
26 changes: 13 additions & 13 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18341,14 +18341,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:
ArgsForMatchingMatrixTypes = {0, 2};
ArgsForMatchingMatrixTypes = {2, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:
ArgsForMatchingMatrixTypes = {0, 2};
ArgsForMatchingMatrixTypes = {2, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:
Expand All @@ -18357,7 +18357,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
LLVM_FALLTHROUGH;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:
ArgsForMatchingMatrixTypes = {0, 2};
ArgsForMatchingMatrixTypes = {2, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:
Expand All @@ -18366,56 +18366,56 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
LLVM_FALLTHROUGH;
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:
ArgsForMatchingMatrixTypes = {0, 2};
ArgsForMatchingMatrixTypes = {2, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:
ArgsForMatchingMatrixTypes = {0, 2};
ArgsForMatchingMatrixTypes = {2, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:
ArgsForMatchingMatrixTypes = {0, 2};
ArgsForMatchingMatrixTypes = {2, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:
ArgsForMatchingMatrixTypes = {1, 4};
ArgsForMatchingMatrixTypes = {4, 1};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:
ArgsForMatchingMatrixTypes = {1, 4};
ArgsForMatchingMatrixTypes = {4, 1};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:
ArgsForMatchingMatrixTypes = {0, 2};
ArgsForMatchingMatrixTypes = {2, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:
ArgsForMatchingMatrixTypes = {0, 2};
ArgsForMatchingMatrixTypes = {2, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:
ArgsForMatchingMatrixTypes = {0, 2};
ArgsForMatchingMatrixTypes = {2, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:
ArgsForMatchingMatrixTypes = {0, 2};
ArgsForMatchingMatrixTypes = {2, 0};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;
break;
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:
case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:
ArgsForMatchingMatrixTypes = {1, 4};
ArgsForMatchingMatrixTypes = {4, 1};
BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;
break;
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:
Expand Down
18 changes: 9 additions & 9 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w32.cl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ typedef int v8i __attribute__((ext_vector_type(8)));

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f16.v8f32(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half> [[A:%.*]], <8 x half> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4:![0-9]+]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -31,7 +31,7 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w32(global v8f* out, v8h a, v8h b, v8f c)

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8bf16.v8f32(<8 x bfloat> [[A:%.*]], <8 x bfloat> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8bf16(<8 x bfloat> [[A:%.*]], <8 x bfloat> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand Down Expand Up @@ -76,7 +76,7 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w32(global v8bf* out, v8bf a, v8bf b, v

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v2i32.v8i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -91,7 +91,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w32(global v8i* out, v2i a, v2i b, v8i c)

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.i32.v8i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -102,7 +102,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w32(global v8i* out, int a, int b, v8i c)

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v2i32.v8f32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -113,7 +113,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v8f* out, v2i a, v2i b, v8

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v2i32.v8f32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -124,7 +124,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v8f* out, v2i a, v2i b, v8

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v2i32.v8f32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -135,7 +135,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v8f* out, v2i a, v2i b, v8

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v2i32.v8f32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <8 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <8 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -146,7 +146,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v8f* out, v2i a, v2i b, v8

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v2i32.v8i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 true, <2 x i32> [[A:%.*]], i1 true, <2 x i32> [[B:%.*]], <8 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: store <8 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand Down
18 changes: 9 additions & 9 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-wmma-w64.cl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ typedef int v4i __attribute__((ext_vector_type(4)));

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_f16_w64(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f16.v4f32(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half> [[A:%.*]], <4 x half> [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4:![0-9]+]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -30,7 +30,7 @@ void test_amdgcn_wmma_f32_16x16x16_f16_w64(global v4f* out, v4h a, v4h b, v4f c)

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf16_w64(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4bf16.v4f32(<4 x bfloat> [[A:%.*]], <4 x bfloat> [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4bf16(<4 x bfloat> [[A:%.*]], <4 x bfloat> [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand Down Expand Up @@ -75,7 +75,7 @@ void test_amdgcn_wmma_bf16_16x16x16_bf16_w64(global v4bf* out, v4bf a, v4bf b, v

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu8_w64(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.i32.v4i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -90,7 +90,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu8_w64(global v4i* out, int a, int b, v4i c)

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x16_iu4_w64(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.i32.v4i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -101,7 +101,7 @@ void test_amdgcn_wmma_i32_16x16x16_iu4_w64(global v4i* out, int a, int b, v4i c)

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.i32.v4f32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -112,7 +112,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32(global v4f* out, int a, int b, v4

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.i32.v4f32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -123,7 +123,7 @@ void test_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32(global v4f* out, int a, int b, v4

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.i32.v4f32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -134,7 +134,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32(global v4f* out, int a, int b, v4

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.i32.v4f32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32 [[A:%.*]], i32 [[B:%.*]], <4 x float> [[C:%.*]])
// CHECK-GFX1200-NEXT: store <4 x float> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand All @@ -145,7 +145,7 @@ void test_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32(global v4f* out, int a, int b, v4

// CHECK-GFX1200-LABEL: @test_amdgcn_wmma_i32_16x16x32_iu4_w32(
// CHECK-GFX1200-NEXT: entry:
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.i32.v4i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 true, i32 [[A:%.*]], i1 true, i32 [[B:%.*]], <4 x i32> [[C:%.*]], i1 false)
// CHECK-GFX1200-NEXT: store <4 x i32> [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 16, !tbaa [[TBAA4]]
// CHECK-GFX1200-NEXT: ret void
//
Expand Down
Loading

0 comments on commit 86dfbca

Please sign in to comment.