Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HLSL] implement elementwise firstbithigh hlsl builtin #111082

Merged
merged 12 commits into from
Nov 6, 2024
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4798,6 +4798,12 @@ def HLSLDot4AddI8Packed : LangBuiltin<"HLSL_LANG"> {
let Prototype = "int(unsigned int, unsigned int, int)";
}

def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_firstbithigh"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}

def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_frac"];
let Attributes = [NoThrow, Const];
Expand Down
18 changes: 18 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18767,6 +18767,15 @@ static Intrinsic::ID getDotProductIntrinsic(CGHLSLRuntime &RT, QualType QT) {
return RT.getUDotIntrinsic();
}

Intrinsic::ID getFirstBitHighIntrinsic(CGHLSLRuntime &RT, QualType QT) {
if (QT->hasSignedIntegerRepresentation()) {
return RT.getFirstBitSHighIntrinsic();
}

assert(QT->hasUnsignedIntegerRepresentation());
return RT.getFirstBitUHighIntrinsic();
}

Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
Expand Down Expand Up @@ -18866,6 +18875,15 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/C->getType(), ID, ArrayRef<Value *>{A, B, C}, nullptr,
"hlsl.dot4add.i8packed");
}
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {

Value *X = EmitScalarExpr(E->getArg(0));

return Builder.CreateIntrinsic(
/*ReturnType=*/ConvertType(E->getType()),
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
}
case Builtin::BI__builtin_hlsl_lerp: {
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddI8Packed, dot4add_i8packed)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)

GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromBinding, handle_fromBinding)

Expand Down
72 changes: 72 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,78 @@ float3 exp2(float3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
float4 exp2(float4);

//===----------------------------------------------------------------------===//
// firstbithigh builtins
//===----------------------------------------------------------------------===//

/// \fn T firstbithigh(T Val)
/// \brief Returns the location of the first set bit starting from the highest
/// order bit and working downward, per component.
/// \param Val the input value.

#ifdef __HLSL_ENABLE_16_BIT
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint firstbithigh(int16_t);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint2 firstbithigh(int16_t2);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint3 firstbithigh(int16_t3);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint4 firstbithigh(int16_t4);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint firstbithigh(uint16_t);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint2 firstbithigh(uint16_t2);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint3 firstbithigh(uint16_t3);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint4 firstbithigh(uint16_t4);
#endif

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint firstbithigh(int);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint2 firstbithigh(int2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint3 firstbithigh(int3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint4 firstbithigh(int4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint firstbithigh(uint);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint2 firstbithigh(uint2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint3 firstbithigh(uint3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint4 firstbithigh(uint4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint firstbithigh(int64_t);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint2 firstbithigh(int64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint3 firstbithigh(int64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint4 firstbithigh(int64_t4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint firstbithigh(uint64_t);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint2 firstbithigh(uint64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint3 firstbithigh(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint4 firstbithigh(uint64_t4);

//===----------------------------------------------------------------------===//
// floor builtins
//===----------------------------------------------------------------------===//
Expand Down
25 changes: 25 additions & 0 deletions clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1947,6 +1947,31 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
return true;

const Expr *Arg = TheCall->getArg(0);
QualType ArgTy = Arg->getType();
QualType EltTy = ArgTy;

QualType ResTy = SemaRef.Context.UnsignedIntTy;

if (auto *VecTy = EltTy->getAs<VectorType>()) {
EltTy = VecTy->getElementType();
ResTy = SemaRef.Context.getVectorType(ResTy, VecTy->getNumElements(),
VecTy->getVectorKind());
}

if (!EltTy->isIntegerType()) {
Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
<< 1 << /* integer ty */ 6 << ArgTy;
return true;
}

TheCall->setType(ResTy);
break;
}
case Builtin::BI__builtin_hlsl_select: {
if (SemaRef.checkArgCount(TheCall, 3))
return true;
Expand Down
153 changes: 153 additions & 0 deletions clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s -DTARGET=spv

#ifdef __HLSL_ENABLE_16_BIT
// CHECK-LABEL: test_firstbithigh_ushort
// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16
uint test_firstbithigh_ushort(uint16_t p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_ushort2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
uint2 test_firstbithigh_ushort2(uint16_t2 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_ushort3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
uint3 test_firstbithigh_ushort3(uint16_t3 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_ushort4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
uint4 test_firstbithigh_ushort4(uint16_t4 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_short
// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16
uint test_firstbithigh_short(int16_t p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_short2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
uint2 test_firstbithigh_short2(int16_t2 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_short3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
uint3 test_firstbithigh_short3(int16_t3 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_short4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
uint4 test_firstbithigh_short4(int16_t4 p0) {
return firstbithigh(p0);
}
#endif // __HLSL_ENABLE_16_BIT

// CHECK-LABEL: test_firstbithigh_uint
// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32
uint test_firstbithigh_uint(uint p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_uint2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
uint2 test_firstbithigh_uint2(uint2 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_uint3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
uint3 test_firstbithigh_uint3(uint3 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_uint4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
uint4 test_firstbithigh_uint4(uint4 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_ulong
// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64
uint test_firstbithigh_ulong(uint64_t p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_ulong2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
uint2 test_firstbithigh_ulong2(uint64_t2 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_ulong3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
uint3 test_firstbithigh_ulong3(uint64_t3 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_ulong4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
uint4 test_firstbithigh_ulong4(uint64_t4 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_int
// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32
uint test_firstbithigh_int(int p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_int2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
uint2 test_firstbithigh_int2(int2 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_int3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
uint3 test_firstbithigh_int3(int3 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_int4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
uint4 test_firstbithigh_int4(int4 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_long
// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64
uint test_firstbithigh_long(int64_t p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_long2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
uint2 test_firstbithigh_long2(int64_t2 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_long3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
uint3 test_firstbithigh_long3(int64_t3 p0) {
return firstbithigh(p0);
}

// CHECK-LABEL: test_firstbithigh_long4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
uint4 test_firstbithigh_long4(int64_t4 p0) {
return firstbithigh(p0);
}
28 changes: 28 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected

int test_too_few_arg() {
return firstbithigh();
// expected-error@-1 {{no matching function for call to 'firstbithigh'}}
}

int test_too_many_arg(int p0) {
return firstbithigh(p0, p0);
// expected-error@-1 {{no matching function for call to 'firstbithigh'}}
}

double test_int_builtin(double p0) {
return firstbithigh(p0);
// expected-error@-1 {{call to 'firstbithigh' is ambiguous}}
}

double2 test_int_builtin_2(double2 p0) {
return __builtin_hlsl_elementwise_firstbithigh(p0);
// expected-error@-1 {{1st argument must be a vector of integers
// (was 'double2' (aka 'vector<double, 2>'))}}
}

float test_int_builtin_3(float p0) {
return __builtin_hlsl_elementwise_firstbithigh(p0);
// expected-error@-1 {{1st argument must be a vector of integers
// (was 'float')}}
}
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,6 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L
def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>],
[LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
}
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsSPIRV.td
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,6 @@ let TargetPrefix = "spv" in {
[llvm_any_ty],
[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty],
[IntrNoMem]>;
def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
}
24 changes: 24 additions & 0 deletions llvm/lib/Target/DirectX/DXIL.td
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,30 @@ def CountBits : DXILOp<31, unaryBits> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def FirstbitHi : DXILOp<33, unaryBits> {
let Doc = "Returns the location of the first set bit starting from "
"the highest order bit and working downward.";
let LLVMIntrinsic = int_dx_firstbituhigh;
let arguments = [OverloadTy];
let result = Int32Ty;
let overloads =
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def FirstbitSHi : DXILOp<34, unaryBits> {
let Doc = "Returns the location of the first set bit from "
"the highest order bit based on the sign.";
let LLVMIntrinsic = int_dx_firstbitshigh;
let arguments = [OverloadTy];
let result = Int32Ty;
let overloads =
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def FMax : DXILOp<35, binary> {
let Doc = "Float maximum. FMax(a,b) = a > b ? a : b";
let LLVMIntrinsic = int_maxnum;
Expand Down
Loading