From 90df1a6a639401cac42f7bac69792d5b47ddab93 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 28 Dec 2022 13:59:26 +0100 Subject: [PATCH] Use plain llvmcall calling convention for WMMA intrinsics. (#1709) --- src/device/intrinsics/wmma.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/device/intrinsics/wmma.jl b/src/device/intrinsics/wmma.jl index c12fe526ca..3b80e51c41 100644 --- a/src/device/intrinsics/wmma.jl +++ b/src/device/intrinsics/wmma.jl @@ -191,7 +191,7 @@ for ops in all_ldst_ops, # Determine types + size for this (matrix, elem_type) combination arr_ty, frag_ty, sz = get_frag_info(mat, elem_type, shape) - ccall_name = "extern $llvm_intr" + ccall_name = "$llvm_intr" ptr_ty = LLVMPtr{arr_ty, addr_space_int} struct_ty = Symbol("LLVMStruct$sz") @@ -253,7 +253,7 @@ export llvm_wmma_store # Determine types + size for this (matrix, elem_type) combination arr_ty, frag_ty, sz = get_frag_info(mat, elem_type, shape) - ccall_name = "extern $llvm_intr" + ccall_name = "$llvm_intr" frag_types = ntuple(i -> frag_ty, sz) frag_vars = ntuple(i -> :(data[$i]), sz) @@ -325,7 +325,7 @@ for ops in all_wmma_ops, c_arr_ty, c_frag_ty, c_sz = get_frag_info("c", c_elem_type, shape) d_arr_ty, d_frag_ty, d_sz = get_frag_info("d", d_elem_type, shape) - ccall_name = "extern $llvm_intr" + ccall_name = "$llvm_intr" a_types = ntuple(i -> a_frag_ty, a_sz) b_types = ntuple(i -> b_frag_ty, b_sz)