Skip to content

Commit

Permalink
[HLSL] Implement elementwise firstbitlow builtin (llvm#116858)
Browse files Browse the repository at this point in the history
Closes llvm#99116

Implements `firstbitlow` by extracting common functionality from
`firstbithigh` into a shared function while also fixing a bug for an edge
case where `u64x3` and larger vectors will attempt to create vectors
larger than the SPRIV max of 4.
---------

Co-authored-by: Steven Perron <[email protected]>
  • Loading branch information
V-FEXrt and s-perron authored Jan 15, 2025
1 parent 4a4a8a1 commit 4f48abf
Show file tree
Hide file tree
Showing 17 changed files with 1,021 additions and 134 deletions.
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -4855,6 +4855,12 @@ def HLSLFirstBitHigh : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void(...)";
}

def HLSLFirstBitLow : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_firstbitlow"];
let Attributes = [NoThrow, Const];
let Prototype = "void(...)";
}

def HLSLFrac : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_elementwise_frac"];
let Attributes = [NoThrow, Const];
Expand Down
9 changes: 8 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19316,14 +19316,21 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
"hlsl.dot4add.u8packed");
}
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {

Value *X = EmitScalarExpr(E->getArg(0));

return Builder.CreateIntrinsic(
/*ReturnType=*/ConvertType(E->getType()),
getFirstBitHighIntrinsic(CGM.getHLSLRuntime(), E->getArg(0)->getType()),
ArrayRef<Value *>{X}, nullptr, "hlsl.firstbithigh");
}
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
Value *X = EmitScalarExpr(E->getArg(0));

return Builder.CreateIntrinsic(
/*ReturnType=*/ConvertType(E->getType()),
CGM.getHLSLRuntime().getFirstBitLowIntrinsic(), ArrayRef<Value *>{X},
nullptr, "hlsl.firstbitlow");
}
case Builtin::BI__builtin_hlsl_lerp: {
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGHLSLRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveReadLaneAt, wave_readlane)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitUHigh, firstbituhigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitSHigh, firstbitshigh)
GENERATE_HLSL_INTRINSIC_FUNCTION(FirstBitLow, firstbitlow)
GENERATE_HLSL_INTRINSIC_FUNCTION(NClamp, nclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(SClamp, sclamp)
GENERATE_HLSL_INTRINSIC_FUNCTION(UClamp, uclamp)
Expand Down
72 changes: 72 additions & 0 deletions clang/lib/Headers/hlsl/hlsl_intrinsics.h
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,78 @@ uint3 firstbithigh(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
uint4 firstbithigh(uint64_t4);

//===----------------------------------------------------------------------===//
// firstbitlow builtins
//===----------------------------------------------------------------------===//

/// \fn T firstbitlow(T Val)
/// \brief Returns the location of the first set bit starting from the lowest
/// order bit and working upward, per component.
/// \param Val the input value.

#ifdef __HLSL_ENABLE_16_BIT
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(int16_t);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(int16_t2);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(int16_t3);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(int16_t4);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(uint16_t);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(uint16_t2);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(uint16_t3);
_HLSL_AVAILABILITY(shadermodel, 6.2)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(uint16_t4);
#endif

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(int);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(int2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(int3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(int4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(uint);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(uint2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(uint3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(uint4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(int64_t);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(int64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(int64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(int64_t4);

_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint firstbitlow(uint64_t);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint2 firstbitlow(uint64_t2);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint3 firstbitlow(uint64_t3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbitlow)
uint4 firstbitlow(uint64_t4);

//===----------------------------------------------------------------------===//
// floor builtins
//===----------------------------------------------------------------------===//
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2036,7 +2036,8 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh: {
case Builtin::BI__builtin_hlsl_elementwise_firstbithigh:
case Builtin::BI__builtin_hlsl_elementwise_firstbitlow: {
if (SemaRef.PrepareBuiltinElementwiseMathOneArgCall(TheCall))
return true;

Expand Down
153 changes: 153 additions & 0 deletions clang/test/CodeGenHLSL/builtins/firstbitlow.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s -DTARGET=spv

#ifdef __HLSL_ENABLE_16_BIT
// CHECK-LABEL: test_firstbitlow_ushort
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
uint test_firstbitlow_ushort(uint16_t p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ushort2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
uint2 test_firstbitlow_ushort2(uint16_t2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ushort3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
uint3 test_firstbitlow_ushort3(uint16_t3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ushort4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
uint4 test_firstbitlow_ushort4(uint16_t4 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_short
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i16
uint test_firstbitlow_short(int16_t p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_short2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i16
uint2 test_firstbitlow_short2(int16_t2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_short3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i16
uint3 test_firstbitlow_short3(int16_t3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_short4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i16
uint4 test_firstbitlow_short4(int16_t4 p0) {
return firstbitlow(p0);
}
#endif // __HLSL_ENABLE_16_BIT

// CHECK-LABEL: test_firstbitlow_uint
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
uint test_firstbitlow_uint(uint p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_uint2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
uint2 test_firstbitlow_uint2(uint2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_uint3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
uint3 test_firstbitlow_uint3(uint3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_uint4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
uint4 test_firstbitlow_uint4(uint4 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ulong
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
uint test_firstbitlow_ulong(uint64_t p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ulong2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
uint2 test_firstbitlow_ulong2(uint64_t2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ulong3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
uint3 test_firstbitlow_ulong3(uint64_t3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_ulong4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
uint4 test_firstbitlow_ulong4(uint64_t4 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_int
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i32
uint test_firstbitlow_int(int p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_int2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i32
uint2 test_firstbitlow_int2(int2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_int3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i32
uint3 test_firstbitlow_int3(int3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_int4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i32
uint4 test_firstbitlow_int4(int4 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_long
// CHECK: call i32 @llvm.[[TARGET]].firstbitlow.i64
uint test_firstbitlow_long(int64_t p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_long2
// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitlow.v2i64
uint2 test_firstbitlow_long2(int64_t2 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_long3
// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitlow.v3i64
uint3 test_firstbitlow_long3(int64_t3 p0) {
return firstbitlow(p0);
}

// CHECK-LABEL: test_firstbitlow_long4
// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitlow.v4i64
uint4 test_firstbitlow_long4(int64_t4 p0) {
return firstbitlow(p0);
}
6 changes: 2 additions & 4 deletions clang/test/SemaHLSL/BuiltIns/firstbithigh-errors.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ double test_int_builtin(double p0) {

double2 test_int_builtin_2(double2 p0) {
return __builtin_hlsl_elementwise_firstbithigh(p0);
// expected-error@-1 {{1st argument must be a vector of integers
// (was 'double2' (aka 'vector<double, 2>'))}}
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
}

float test_int_builtin_3(float p0) {
return __builtin_hlsl_elementwise_firstbithigh(p0);
// expected-error@-1 {{1st argument must be a vector of integers
// (was 'float')}}
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
}
26 changes: 26 additions & 0 deletions clang/test/SemaHLSL/BuiltIns/firstbitlow-errors.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected

int test_too_few_arg() {
return firstbitlow();
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
}

int test_too_many_arg(int p0) {
return firstbitlow(p0, p0);
// expected-error@-1 {{no matching function for call to 'firstbitlow'}}
}

double test_int_builtin(double p0) {
return firstbitlow(p0);
// expected-error@-1 {{call to 'firstbitlow' is ambiguous}}
}

double2 test_int_builtin_2(double2 p0) {
return __builtin_hlsl_elementwise_firstbitlow(p0);
// expected-error@-1 {{1st argument must be a vector of integers (was 'double2' (aka 'vector<double, 2>'))}}
}

float test_int_builtin_3(float p0) {
return __builtin_hlsl_elementwise_firstbitlow(p0);
// expected-error@-1 {{1st argument must be a vector of integers (was 'double')}}
}
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>
def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;

def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
}
1 change: 1 addition & 0 deletions llvm/include/llvm/IR/IntrinsicsSPIRV.td
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ let TargetPrefix = "spv" in {

def int_spv_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_spv_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_spv_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;

def int_spv_resource_updatecounter
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
Expand Down
12 changes: 12 additions & 0 deletions llvm/lib/Target/DirectX/DXIL.td
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,18 @@ def CountBits : DXILOp<31, unaryBits> {
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def FirstbitLo : DXILOp<32, unaryBits> {
let Doc = "Returns the location of the first set bit starting from "
"the lowest order bit and working upward.";
let intrinsics = [ IntrinSelect<int_dx_firstbitlow> ];
let arguments = [OverloadTy];
let result = Int32Ty;
let overloads =
[Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}

def FirstbitHi : DXILOp<33, unaryBits> {
let Doc = "Returns the location of the first set bit starting from "
"the highest order bit and working downward.";
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_splitdouble:
case Intrinsic::dx_firstbituhigh:
case Intrinsic::dx_firstbitshigh:
case Intrinsic::dx_firstbitlow:
return true;
default:
return false;
Expand Down
Loading

0 comments on commit 4f48abf

Please sign in to comment.