From b6f12ea105205e6f624a16e12c9f5f73e2f1d701 Mon Sep 17 00:00:00 2001 From: "Matias N. Goldberg" Date: Mon, 30 Dec 2024 22:33:55 -0300 Subject: [PATCH] Implement non uniform scaled normals for all shader paths World matrix is now always normalized in the vertex shader. Without it, offset bias shadow mapping would malfunction if the object had scaling. Switch manually multiplying matrices using float4 with explicit matrices. Code is eaiser to read and maintain. The CrossPlatform utils make it easy to work with matrix data types, which didn't exist when the original code was written. Affects #373 --- .../GLSL/CrossPlatformSettings_piece_all.glsl | 2 + .../HLSL/CrossPlatformSettings_piece_all.hlsl | 2 + .../CrossPlatformSettings_piece_all.metal | 2 + .../Any/Main/800.VertexShader_piece_vs.any | 147 ++++++++++-------- 4 files changed, 84 insertions(+), 69 deletions(-) diff --git a/Samples/Media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl b/Samples/Media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl index 94bb29a1a6e..233badf3bf1 100644 --- a/Samples/Media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl +++ b/Samples/Media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl @@ -77,6 +77,8 @@ #define buildFloat4x4( row0, row1, row2, row3 ) mat4( row0, row1, row2, row3 ) +#define getMatrixRow( mat, idx ) mat[idx] + // Let's explain this madness: // // We use the keyword "midf" because "half" is already taken on Metal. diff --git a/Samples/Media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl b/Samples/Media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl index 29396a798e1..0682793bcb9 100644 --- a/Samples/Media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl +++ b/Samples/Media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl @@ -17,6 +17,8 @@ #define buildFloat4x4( row0, row1, row2, row3 ) transpose( float4x4( row0, row1, row2, row3 ) ) +#define getMatrixRow( mat, idx ) transpose( mat )[idx] + // See CrossPlatformSettings_piece_all.glsl for an explanation @property( precision_mode == full32 ) #define _h(x) (x) diff --git a/Samples/Media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal b/Samples/Media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal index 43dec9370f4..fecd33e77eb 100644 --- a/Samples/Media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal +++ b/Samples/Media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal @@ -50,6 +50,8 @@ inline half3x3 toMatHalf3x3( float3x4 m ) #define buildFloat4x4( row0, row1, row2, row3 ) float4x4( float4( row0 ), float4( row1 ), float4( row2 ), float4( row3 ) ) +#define getMatrixRow( mat, idx ) mat[idx] + // See CrossPlatformSettings_piece_all.glsl for an explanation @property( precision_mode == full32 ) // In Metal 'half' is an actual datatype. It should be OK to override it diff --git a/Samples/Media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any b/Samples/Media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any index 341b3b0c973..da8f7e737b1 100644 --- a/Samples/Media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any +++ b/Samples/Media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any @@ -10,6 +10,9 @@ @insertpiece( Common_Matrix_DeclUnpackMatrix4x4 ) @insertpiece( Common_Matrix_DeclUnpackMatrix4x3 ) + @property( hlms_skeleton || hlms_pose ) + @insertpiece( Common_Matrix_DeclLoadOgreFloat4x3 ) + @end @property( hlms_particle_system ) @insertpiece( DeclQuaternion ) @end @@ -33,20 +36,42 @@ @insertpiece( DeclAtmosphereNprSkyFuncs ) @property( accurate_non_uniform_scaled_normals ) - midf3x3 adjugate( midf3x3 m ) + // Computes transpose( adjugate( m ) ) + // See: + // https://x.com/iquilezles/status/1866219178409316362 + // https://www.shadertoy.com/view/3s33zj + // https://github.com/graphitemaster/normals_revisited + midf3x3 adjugateForNormals( midf3x3 m ) + { + const midf3 r0 = getMatrixRow( m, 0 ).xyz; + const midf3 r1 = getMatrixRow( m, 1 ).xyz; + const midf3 r2 = getMatrixRow( m, 2 ).xyz; + midf3x3 n = buildFloat3x3( cross( r1.xyz, r2.xyz ), + cross( r2.xyz, r0.xyz ), + cross( r0.xyz, r1.xyz ) ); + return n; + } + + INLINE midf3x3 adjugateForNormalsFrom4x3( ogre_float4x3 m ) { - midf3x3 n; - n[0][0] = m[1][1] * m[2][2] - m[1][2] * m[2][1]; - n[0][1] = m[0][2] * m[2][1] - m[0][1] * m[2][2]; - n[0][2] = m[0][1] * m[1][2] - m[0][2] * m[1][1]; - n[1][0] = m[1][2] * m[2][0] - m[1][0] * m[2][2]; - n[1][1] = m[0][0] * m[2][2] - m[0][2] * m[2][0]; - n[1][2] = m[0][2] * m[1][0] - m[0][0] * m[1][2]; - n[2][0] = m[1][0] * m[2][1] - m[2][0] * m[1][1]; - n[2][1] = m[0][1] * m[2][0] - m[0][0] * m[2][1]; - n[2][2] = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + const midf3 r0 = midf3_c( getMatrixRow( m, 0 ).xyz ); + const midf3 r1 = midf3_c( getMatrixRow( m, 1 ).xyz ); + const midf3 r2 = midf3_c( getMatrixRow( m, 2 ).xyz ); + midf3x3 n = buildFloat3x3( cross( r1.xyz, r2.xyz ), + cross( r2.xyz, r0.xyz ), + cross( r0.xyz, r1.xyz ) ); return n; } + @else + midf3x3 adjugateForNormals( midf3x3 m ) + { + return m; + } + + INLINE midf3x3 adjugateForNormalsFrom4x3( ogre_float4x3 m ) + { + return toMidf3x3( m ); + } @end @end @@ -63,60 +88,43 @@ @property( hlms_skeleton ) @piece( SkeletonTransform ) uint _idx = (inVs_blendIndices[0] << 1u) + inVs_blendIndices[0]; //inVs_blendIndices[0] * 3u; a 32-bit int multiply is 4 cycles on GCN! (and mul24 is not exposed to GLSL...) - uint matStart = worldMaterialIdx[inVs_drawId].x >> 9u; - float4 worldMat[3]; - worldMat[0] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 0u) ); - worldMat[1] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 1u) ); - worldMat[2] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 2u) ); + const uint matStart = worldMaterialIdx[inVs_drawId].x >> 9u; + ogre_float4x3 worldMat; + worldMat = makeOgreFloat4x3( readOnlyFetch( worldMatBuf, int( matStart + _idx + 0u ) ), + readOnlyFetch( worldMatBuf, int( matStart + _idx + 1u ) ), + readOnlyFetch( worldMatBuf, int( matStart + _idx + 2u ) ) ); float4 worldPos; - worldPos.x = dot( worldMat[0], inputPos ); - worldPos.y = dot( worldMat[1], inputPos ); - worldPos.z = dot( worldMat[2], inputPos ); - worldPos.xyz *= inVs_blendWeights[0]; - @property( hlms_normal || hlms_qtangent ) + worldPos.xyz = mul( inputPos, worldMat ) * inVs_blendWeights[0]; + @property( hlms_normal || hlms_qtangent ) + midf3x3 normalAdjMat = adjugateForNormalsFrom4x3( worldMat ); + midf3 worldNorm; - worldNorm.x = dot( midf3_c( worldMat[0].xyz ), inputNormal ); - worldNorm.y = dot( midf3_c( worldMat[1].xyz ), inputNormal ); - worldNorm.z = dot( midf3_c( worldMat[2].xyz ), inputNormal ); - worldNorm *= midf_c( inVs_blendWeights[0] ); + worldNorm = mul( inputNormal, normalAdjMat ) * midf_c( inVs_blendWeights[0] ); @end @property( normal_map ) midf3 worldTang; - worldTang.x = dot( midf3_c( worldMat[0].xyz ), inputTangent ); - worldTang.y = dot( midf3_c( worldMat[1].xyz ), inputTangent ); - worldTang.z = dot( midf3_c( worldMat[2].xyz ), inputTangent ); - worldTang *= midf_c( inVs_blendWeights[0] ); + worldTang = mul( inputTangent, normalAdjMat ) * midf_c( inVs_blendWeights[0] ); @end - @psub( NeedsMoreThan1BonePerVertex, hlms_bones_per_vertex, 1 ) - @property( NeedsMoreThan1BonePerVertex ) - float4 tmp4; - tmp4.w = 1.0; - midf3 tmp3; - @end //!NeedsMoreThan1BonePerVertex @foreach( hlms_bones_per_vertex, n, 1 ) - _idx = (inVs_blendIndices[@n] << 1u) + inVs_blendIndices[@n]; //inVs_blendIndices[@n] * 3; a 32-bit int multiply is 4 cycles on GCN! (and mul24 is not exposed to GLSL...) - worldMat[0] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 0u) ); - worldMat[1] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 1u) ); - worldMat[2] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 2u) ); - tmp4.x = dot( worldMat[0], inputPos ); - tmp4.y = dot( worldMat[1], inputPos ); - tmp4.z = dot( worldMat[2], inputPos ); - worldPos.xyz += (tmp4 * inVs_blendWeights[@n]).xyz; + _idx = (inVs_blendIndices[@n] << 1u) + inVs_blendIndices[@n]; //inVs_blendIndices[@n] * 3; a 32-bit int multiply is 4 cycles on GCN! (and mul24 is not exposed to GLSL...). + worldMat = makeOgreFloat4x3( readOnlyFetch( worldMatBuf, int( matStart + _idx + 0u ) ), + readOnlyFetch( worldMatBuf, int( matStart + _idx + 1u ) ), + readOnlyFetch( worldMatBuf, int( matStart + _idx + 2u ) ) ); + worldPos.xyz += mul( inputPos, worldMat ) * inVs_blendWeights[@n]; @property( hlms_normal || hlms_qtangent ) - tmp3.x = dot( midf3_c( worldMat[0].xyz ), inputNormal ); - tmp3.y = dot( midf3_c( worldMat[1].xyz ), inputNormal ); - tmp3.z = dot( midf3_c( worldMat[2].xyz ), inputNormal ); - worldNorm += tmp3.xyz * midf_c( inVs_blendWeights[@n] ); + normalAdjMat = adjugateForNormalsFrom4x3( worldMat ); + worldNorm += mul( inputNormal, normalAdjMat ) * midf_c( inVs_blendWeights[@n] ); @end @property( normal_map ) - tmp3.x = dot( midf3_c( worldMat[0].xyz ), inputTangent ); - tmp3.y = dot( midf3_c( worldMat[1].xyz ), inputTangent ); - tmp3.z = dot( midf3_c( worldMat[2].xyz ), inputTangent ); - worldTang += tmp3.xyz * midf_c( inVs_blendWeights[@n] ); + worldNorm += mul( inputTangent, normalAdjMat ) * midf_c( inVs_blendWeights[@n] ); @end @end + @property( hlms_normal || hlms_qtangent ) + worldNorm = normalize( worldNorm ); + @end + worldPos.w = 1.0; @end // SkeletonTransform @end // !hlms_skeleton @@ -185,14 +193,13 @@ // If hlms_skeleton is defined the transforms will be provided by bones. // If hlms_pose is not combined with hlms_skeleton the object's worldMat and worldView have to be set. @property( !hlms_skeleton ) - float4 worldMat[3]; - worldMat[0] = readOnlyFetch( worldMatBuf, int( poseDataStart + @value(NumPoseWeightVectors)u + 1u ) ); - worldMat[1] = readOnlyFetch( worldMatBuf, int( poseDataStart + @value(NumPoseWeightVectors)u + 2u ) ); - worldMat[2] = readOnlyFetch( worldMatBuf, int( poseDataStart + @value(NumPoseWeightVectors)u + 3u ) ); + ogre_float4x3 worldMat; + worldMat = makeOgreFloat4x3( + readOnlyFetch( worldMatBuf, int( poseDataStart + @value( NumPoseWeightVectors )u + 1u ) ), + readOnlyFetch( worldMatBuf, int( poseDataStart + @value( NumPoseWeightVectors )u + 2u ) ), + readOnlyFetch( worldMatBuf, int( poseDataStart + @value( NumPoseWeightVectors )u + 3u ) ) ); float4 worldPos; - worldPos.x = dot( worldMat[0], inputPos ); - worldPos.y = dot( worldMat[1], inputPos ); - worldPos.z = dot( worldMat[2], inputPos ); + worldPos.xyz = mul( inputPos, worldMat ); worldPos.w = 1.0; @property( hlms_normal || hlms_qtangent ) @@ -213,11 +220,12 @@ @property( hlms_normal || hlms_qtangent ) outVs.pos = @insertpiece( CalculatePsPos );@end @property( hlms_normal || hlms_qtangent ) midf3x3 worldMat3x3 = toMidf3x3( worldViewMat ); - @property( accurate_non_uniform_scaled_normals ) - midf3x3 normalMat = transpose( adjugate( worldMat3x3 ) ); - outVs.normal = normalize( mul( @insertpiece(local_normal), normalMat ) ); - @else + @property( hlms_skeleton ) + // worldViewMat is actually passBuf.view so we don't need the adjugate. We've already done that. outVs.normal = mul( @insertpiece(local_normal), worldMat3x3 ); + @else + midf3x3 normalMat = adjugateForNormals( worldMat3x3 ); + outVs.normal = mul( @insertpiece(local_normal), normalMat ); @end @end @property( normal_map ) outVs.tangent = mul( @insertpiece(local_tangent), toMidf3x3( worldViewMat ) );@end @@ -290,19 +298,20 @@ float4 worldPos = float4( mul(inVs_vertex, worldMat).xyz, 1.0f ); @property( ( hlms_normal || hlms_qtangent) && hlms_num_shadow_map_lights ) - // We need worldNorm for normal offset bias - midf3 worldNorm = mul( inputNormal, toMidf3x3( worldMat ) ).xyz; + const midf3x3 normalAdjMat = adjugateForNormalsFrom4x3( worldMat ); + + // We need worldNorm for normal offset bias. + midf3 worldNorm = normalize( mul( inputNormal, normalAdjMat ).xyz ); @end @end @insertpiece( PoseTransform ) @property( !hlms_skeleton && hlms_pose && ( hlms_normal || hlms_qtangent) && hlms_num_shadow_map_lights ) - // We need worldNorm for normal offset bias, special path when using poses - midf3 worldNorm; - worldNorm.x = dot( midf3_c( worldMat[0].xyz ), inputNormal ); - worldNorm.y = dot( midf3_c( worldMat[1].xyz ), inputNormal ); - worldNorm.z = dot( midf3_c( worldMat[2].xyz ), inputNormal ); + const midf3x3 normalAdjMat = adjugateForNormalsFrom4x3( worldMat ); + + // We need worldNorm for normal offset bias, special path when using poses. + midf3 worldNorm = normalize( mul( inputNormal, normalAdjMat ).xyz ); @end @insertpiece( SkeletonTransform )