Skip to content

Commit

Permalink
Bitfield Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
cwfitzgerald committed Feb 26, 2024
1 parent 5b9ade8 commit 233db07
Show file tree
Hide file tree
Showing 13 changed files with 801 additions and 271 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ Bottom level categories:
- Fix timeout when presenting a surface where no work has been done. By @waywardmonkeys in [#5200](https://github.com/gfx-rs/wgpu/pull/5200)
- Simplify and speed up the allocation of internal IDs. By @nical in [#5229](https://github.com/gfx-rs/wgpu/pull/5229)
- Fix an issue where command encoders weren't properly freed if an error occurred during command encoding. By @ErichDonGubler in [#5251](https://github.com/gfx-rs/wgpu/pull/5251).
- Fix behavior of `extractBits` and `insertBits` when `offset + count` overflows the bit width. By @cwfitzgerald in [#5305](https://github.com/gfx-rs/wgpu/pull/5305)

#### WGL

Expand Down
72 changes: 69 additions & 3 deletions naga/src/back/glsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1290,7 +1290,14 @@ impl<'a, W: Write> Writer<'a, W> {

let inner = expr_info.ty.inner_with(&self.module.types);

if let Expression::Math { fun, arg, arg1, .. } = *expr {
if let Expression::Math {
fun,
arg,
arg1,
arg2,
..
} = *expr
{
match fun {
crate::MathFunction::Dot => {
// if the expression is a Dot product with integer arguments,
Expand All @@ -1305,6 +1312,14 @@ impl<'a, W: Write> Writer<'a, W> {
}
}
}
crate::MathFunction::ExtractBits => {
// Only argument 1 is re-used.
self.need_bake_expressions.insert(arg1.unwrap());
}
crate::MathFunction::InsertBits => {
// Only argument 2 is re-used.
self.need_bake_expressions.insert(arg2.unwrap());
}
crate::MathFunction::CountLeadingZeros => {
if let Some(crate::ScalarKind::Sint) = inner.scalar_kind() {
self.need_bake_expressions.insert(arg);
Expand Down Expand Up @@ -3353,8 +3368,59 @@ impl<'a, W: Write> Writer<'a, W> {
}
Mf::CountOneBits => "bitCount",
Mf::ReverseBits => "bitfieldReverse",
Mf::ExtractBits => "bitfieldExtract",
Mf::InsertBits => "bitfieldInsert",
Mf::ExtractBits => {
// The behavior of ExtractBits is undefined when offset + count > bit_width. We need
// to first sanitize the offset and count first. If we don't do this, AMD and Intel chips
// will return out-of-spec values if the extracted range is not within the bit width.
//
// This encodes the exact formula specified by the wgsl spec, without temporary values:
// https://gpuweb.github.io/gpuweb/wgsl/#extractBits-unsigned-builtin
//
// w = sizeof(x) * 8
// o = min(offset, w)
// c = min(count, w - o)
//
// bitfieldExtract(x, o, c)
//
// extract_bits(e, min(offset, w), min(count, w - min(offset, w))))
let scalar_bits = ctx
.resolve_type(arg, &self.module.types)
.scalar_width()
.unwrap();

write!(self.out, "bitfieldExtract(")?;
self.write_expr(arg, ctx)?;
write!(self.out, ", int(min(")?;
self.write_expr(arg1.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u)), int(min(",)?;
self.write_expr(arg2.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u - min(")?;
self.write_expr(arg1.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u))))")?;

return Ok(());
}
Mf::InsertBits => {
// InsertBits has the same considerations as ExtractBits above
let scalar_bits = ctx
.resolve_type(arg, &self.module.types)
.scalar_width()
.unwrap();

write!(self.out, "bitfieldInsert(")?;
self.write_expr(arg, ctx)?;
write!(self.out, ", ")?;
self.write_expr(arg1.unwrap(), ctx)?;
write!(self.out, ", int(min(")?;
self.write_expr(arg2.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u)), int(min(",)?;
self.write_expr(arg3.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u - min(")?;
self.write_expr(arg2.unwrap(), ctx)?;
write!(self.out, ", {scalar_bits}u))))")?;

return Ok(());
}
Mf::FindLsb => "findLSB",
Mf::FindMsb => "findMSB",
// data packing
Expand Down
150 changes: 149 additions & 1 deletion naga/src/back/hlsl/help.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ int dim_1d = NagaDimensions1D(image_1d);
```
*/

use super::{super::FunctionCtx, BackendResult};
use super::{
super::FunctionCtx,
writer::{EXTRACT_BITS_FUNCTION, INSERT_BITS_FUNCTION},
BackendResult,
};
use crate::{arena::Handle, proc::NameKey};
use std::fmt::Write;

Expand Down Expand Up @@ -59,6 +63,13 @@ pub(super) struct WrappedMatCx2 {
pub(super) columns: crate::VectorSize,
}

#[derive(Clone, Copy, Debug, Hash, Eq, Ord, PartialEq, PartialOrd)]
pub(super) struct WrappedMath {
pub(super) fun: crate::MathFunction,
pub(super) scalar: crate::Scalar,
pub(super) components: Option<u32>,
}

/// HLSL backend requires its own `ImageQuery` enum.
///
/// It is used inside `WrappedImageQuery` and should be unique per ImageQuery function.
Expand Down Expand Up @@ -851,12 +862,149 @@ impl<'a, W: Write> super::Writer<'a, W> {
Ok(())
}

pub(super) fn write_wrapped_math_functions(
&mut self,
module: &crate::Module,
func_ctx: &FunctionCtx,
) -> BackendResult {
for (_, expression) in func_ctx.expressions.iter() {
if let crate::Expression::Math {
fun,
arg,
arg1: _arg1,
arg2: _arg2,
arg3: _arg3,
} = *expression
{
match fun {
crate::MathFunction::ExtractBits => {
// The behavior of our extractBits polyfill is undefined if offset + count > bit_width. We need
// to first sanitize the offset and count first. If we don't do this, we will get out-of-spec
// values if the extracted range is not within the bit width.
//
// This encodes the exact formula specified by the wgsl spec:
// https://gpuweb.github.io/gpuweb/wgsl/#extractBits-unsigned-builtin
//
// w = sizeof(x) * 8
// o = min(offset, w)
// c = min(count, w - o)
//
// bitfieldExtract(x, o, c)
let arg_ty = func_ctx.resolve_type(arg, &module.types);
let scalar = arg_ty.scalar().unwrap();
let components = arg_ty.components();

let wrapped = WrappedMath {
fun,
scalar,
components,
};

if !self.wrapped.math.insert(wrapped) {
continue;
}

// Write return type
self.write_value_type(module, arg_ty)?;

let scalar_width: u8 = scalar.width * 8;

// Write function name and parameters
writeln!(self.out, " {EXTRACT_BITS_FUNCTION}(")?;
write!(self.out, " ")?;
self.write_value_type(module, arg_ty)?;
writeln!(self.out, " e,")?;
writeln!(self.out, " uint offset,")?;
writeln!(self.out, " uint count")?;
writeln!(self.out, ") {{")?;

// Write function body
writeln!(self.out, " uint w = {scalar_width};")?;
writeln!(self.out, " uint o = min(offset, w);")?;
writeln!(self.out, " uint c = min(count, w - o);")?;
writeln!(
self.out,
" return (c == 0 ? 0 : (e << (w - c - o)) >> (w - c));"
)?;

// End of function body
writeln!(self.out, "}}")?;
}
crate::MathFunction::InsertBits => {
// The behavior of our insertBits polyfill has the same constraints as the extractBits polyfill.

let arg_ty = func_ctx.resolve_type(arg, &module.types);
let scalar = arg_ty.scalar().unwrap();
let components = arg_ty.components();

let wrapped = WrappedMath {
fun,
scalar,
components,
};

if !self.wrapped.math.insert(wrapped) {
continue;
}

// Write return type
self.write_value_type(module, arg_ty)?;

let scalar_width: u8 = scalar.width * 8;
let scalar_max: u64 = match scalar.width {
1 => 0xFF,
2 => 0xFFFF,
4 => 0xFFFFFFFF,
8 => 0xFFFFFFFFFFFFFFFF,
_ => unreachable!(),
};

// Write function name and parameters
writeln!(self.out, " {INSERT_BITS_FUNCTION}(")?;
write!(self.out, " ")?;
self.write_value_type(module, arg_ty)?;
writeln!(self.out, " e,")?;
write!(self.out, " ")?;
self.write_value_type(module, arg_ty)?;
writeln!(self.out, " newbits,")?;
writeln!(self.out, " uint offset,")?;
writeln!(self.out, " uint count")?;
writeln!(self.out, ") {{")?;

// Write function body
writeln!(self.out, " uint w = {scalar_width}u;")?;
writeln!(self.out, " uint o = min(offset, w);")?;
writeln!(self.out, " uint c = min(count, w - o);")?;

// The `u` suffix on the literals is _extremely_ important. Otherwise it will use
// i32 shifting instead of the intended u32 shifting.
writeln!(
self.out,
" uint mask = (({scalar_max}u >> ({scalar_width}u - c)) << o);"
)?;
writeln!(
self.out,
" return (c == 0 ? e : ((e & ~mask) | ((newbits << o) & mask)));"
)?;

// End of function body
writeln!(self.out, "}}")?;
}
_ => {}
}
}
}

Ok(())
}

/// Helper function that writes various wrapped functions
pub(super) fn write_wrapped_functions(
&mut self,
module: &crate::Module,
func_ctx: &FunctionCtx,
) -> BackendResult {
self.write_wrapped_math_functions(module, func_ctx)?;
self.write_wrapped_compose_functions(module, func_ctx.expressions)?;

for (handle, _) in func_ctx.expressions.iter() {
Expand Down
2 changes: 2 additions & 0 deletions naga/src/back/hlsl/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,8 @@ pub const RESERVED: &[&str] = &[
// Naga utilities
super::writer::MODF_FUNCTION,
super::writer::FREXP_FUNCTION,
super::writer::EXTRACT_BITS_FUNCTION,
super::writer::INSERT_BITS_FUNCTION,
];

// DXC scalar types, from https://github.com/microsoft/DirectXShaderCompiler/blob/18c9e114f9c314f93e68fbc72ce207d4ed2e65ae/tools/clang/lib/AST/ASTContextHLSL.cpp#L48-L254
Expand Down
2 changes: 2 additions & 0 deletions naga/src/back/hlsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ struct Wrapped {
constructors: crate::FastHashSet<help::WrappedConstructor>,
struct_matrix_access: crate::FastHashSet<help::WrappedStructMatrixAccess>,
mat_cx2s: crate::FastHashSet<help::WrappedMatCx2>,
math: crate::FastHashSet<help::WrappedMath>,
}

impl Wrapped {
Expand All @@ -265,6 +266,7 @@ impl Wrapped {
self.constructors.clear();
self.struct_matrix_access.clear();
self.mat_cx2s.clear();
self.math.clear();
}
}

Expand Down
Loading

0 comments on commit 233db07

Please sign in to comment.