diff --git a/Samples/Media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl b/Samples/Media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl index 94bb29a1a6e..233badf3bf1 100644 --- a/Samples/Media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl +++ b/Samples/Media/Hlms/Common/GLSL/CrossPlatformSettings_piece_all.glsl @@ -77,6 +77,8 @@ #define buildFloat4x4( row0, row1, row2, row3 ) mat4( row0, row1, row2, row3 ) +#define getMatrixRow( mat, idx ) mat[idx] + // Let's explain this madness: // // We use the keyword "midf" because "half" is already taken on Metal. diff --git a/Samples/Media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl b/Samples/Media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl index 29396a798e1..0682793bcb9 100644 --- a/Samples/Media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl +++ b/Samples/Media/Hlms/Common/HLSL/CrossPlatformSettings_piece_all.hlsl @@ -17,6 +17,8 @@ #define buildFloat4x4( row0, row1, row2, row3 ) transpose( float4x4( row0, row1, row2, row3 ) ) +#define getMatrixRow( mat, idx ) transpose( mat )[idx] + // See CrossPlatformSettings_piece_all.glsl for an explanation @property( precision_mode == full32 ) #define _h(x) (x) diff --git a/Samples/Media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal b/Samples/Media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal index 43dec9370f4..fecd33e77eb 100644 --- a/Samples/Media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal +++ b/Samples/Media/Hlms/Common/Metal/CrossPlatformSettings_piece_all.metal @@ -50,6 +50,8 @@ inline half3x3 toMatHalf3x3( float3x4 m ) #define buildFloat4x4( row0, row1, row2, row3 ) float4x4( float4( row0 ), float4( row1 ), float4( row2 ), float4( row3 ) ) +#define getMatrixRow( mat, idx ) mat[idx] + // See CrossPlatformSettings_piece_all.glsl for an explanation @property( precision_mode == full32 ) // In Metal 'half' is an actual datatype. It should be OK to override it diff --git a/Samples/Media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any b/Samples/Media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any index 341b3b0c973..da8f7e737b1 100644 --- a/Samples/Media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any +++ b/Samples/Media/Hlms/Pbs/Any/Main/800.VertexShader_piece_vs.any @@ -10,6 +10,9 @@ @insertpiece( Common_Matrix_DeclUnpackMatrix4x4 ) @insertpiece( Common_Matrix_DeclUnpackMatrix4x3 ) + @property( hlms_skeleton || hlms_pose ) + @insertpiece( Common_Matrix_DeclLoadOgreFloat4x3 ) + @end @property( hlms_particle_system ) @insertpiece( DeclQuaternion ) @end @@ -33,20 +36,42 @@ @insertpiece( DeclAtmosphereNprSkyFuncs ) @property( accurate_non_uniform_scaled_normals ) - midf3x3 adjugate( midf3x3 m ) + // Computes transpose( adjugate( m ) ) + // See: + // https://x.com/iquilezles/status/1866219178409316362 + // https://www.shadertoy.com/view/3s33zj + // https://github.com/graphitemaster/normals_revisited + midf3x3 adjugateForNormals( midf3x3 m ) + { + const midf3 r0 = getMatrixRow( m, 0 ).xyz; + const midf3 r1 = getMatrixRow( m, 1 ).xyz; + const midf3 r2 = getMatrixRow( m, 2 ).xyz; + midf3x3 n = buildFloat3x3( cross( r1.xyz, r2.xyz ), + cross( r2.xyz, r0.xyz ), + cross( r0.xyz, r1.xyz ) ); + return n; + } + + INLINE midf3x3 adjugateForNormalsFrom4x3( ogre_float4x3 m ) { - midf3x3 n; - n[0][0] = m[1][1] * m[2][2] - m[1][2] * m[2][1]; - n[0][1] = m[0][2] * m[2][1] - m[0][1] * m[2][2]; - n[0][2] = m[0][1] * m[1][2] - m[0][2] * m[1][1]; - n[1][0] = m[1][2] * m[2][0] - m[1][0] * m[2][2]; - n[1][1] = m[0][0] * m[2][2] - m[0][2] * m[2][0]; - n[1][2] = m[0][2] * m[1][0] - m[0][0] * m[1][2]; - n[2][0] = m[1][0] * m[2][1] - m[2][0] * m[1][1]; - n[2][1] = m[0][1] * m[2][0] - m[0][0] * m[2][1]; - n[2][2] = m[0][0] * m[1][1] - m[0][1] * m[1][0]; + const midf3 r0 = midf3_c( getMatrixRow( m, 0 ).xyz ); + const midf3 r1 = midf3_c( getMatrixRow( m, 1 ).xyz ); + const midf3 r2 = midf3_c( getMatrixRow( m, 2 ).xyz ); + midf3x3 n = buildFloat3x3( cross( r1.xyz, r2.xyz ), + cross( r2.xyz, r0.xyz ), + cross( r0.xyz, r1.xyz ) ); return n; } + @else + midf3x3 adjugateForNormals( midf3x3 m ) + { + return m; + } + + INLINE midf3x3 adjugateForNormalsFrom4x3( ogre_float4x3 m ) + { + return toMidf3x3( m ); + } @end @end @@ -63,60 +88,43 @@ @property( hlms_skeleton ) @piece( SkeletonTransform ) uint _idx = (inVs_blendIndices[0] << 1u) + inVs_blendIndices[0]; //inVs_blendIndices[0] * 3u; a 32-bit int multiply is 4 cycles on GCN! (and mul24 is not exposed to GLSL...) - uint matStart = worldMaterialIdx[inVs_drawId].x >> 9u; - float4 worldMat[3]; - worldMat[0] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 0u) ); - worldMat[1] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 1u) ); - worldMat[2] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 2u) ); + const uint matStart = worldMaterialIdx[inVs_drawId].x >> 9u; + ogre_float4x3 worldMat; + worldMat = makeOgreFloat4x3( readOnlyFetch( worldMatBuf, int( matStart + _idx + 0u ) ), + readOnlyFetch( worldMatBuf, int( matStart + _idx + 1u ) ), + readOnlyFetch( worldMatBuf, int( matStart + _idx + 2u ) ) ); float4 worldPos; - worldPos.x = dot( worldMat[0], inputPos ); - worldPos.y = dot( worldMat[1], inputPos ); - worldPos.z = dot( worldMat[2], inputPos ); - worldPos.xyz *= inVs_blendWeights[0]; - @property( hlms_normal || hlms_qtangent ) + worldPos.xyz = mul( inputPos, worldMat ) * inVs_blendWeights[0]; + @property( hlms_normal || hlms_qtangent ) + midf3x3 normalAdjMat = adjugateForNormalsFrom4x3( worldMat ); + midf3 worldNorm; - worldNorm.x = dot( midf3_c( worldMat[0].xyz ), inputNormal ); - worldNorm.y = dot( midf3_c( worldMat[1].xyz ), inputNormal ); - worldNorm.z = dot( midf3_c( worldMat[2].xyz ), inputNormal ); - worldNorm *= midf_c( inVs_blendWeights[0] ); + worldNorm = mul( inputNormal, normalAdjMat ) * midf_c( inVs_blendWeights[0] ); @end @property( normal_map ) midf3 worldTang; - worldTang.x = dot( midf3_c( worldMat[0].xyz ), inputTangent ); - worldTang.y = dot( midf3_c( worldMat[1].xyz ), inputTangent ); - worldTang.z = dot( midf3_c( worldMat[2].xyz ), inputTangent ); - worldTang *= midf_c( inVs_blendWeights[0] ); + worldTang = mul( inputTangent, normalAdjMat ) * midf_c( inVs_blendWeights[0] ); @end - @psub( NeedsMoreThan1BonePerVertex, hlms_bones_per_vertex, 1 ) - @property( NeedsMoreThan1BonePerVertex ) - float4 tmp4; - tmp4.w = 1.0; - midf3 tmp3; - @end //!NeedsMoreThan1BonePerVertex @foreach( hlms_bones_per_vertex, n, 1 ) - _idx = (inVs_blendIndices[@n] << 1u) + inVs_blendIndices[@n]; //inVs_blendIndices[@n] * 3; a 32-bit int multiply is 4 cycles on GCN! (and mul24 is not exposed to GLSL...) - worldMat[0] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 0u) ); - worldMat[1] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 1u) ); - worldMat[2] = readOnlyFetch( worldMatBuf, int(matStart + _idx + 2u) ); - tmp4.x = dot( worldMat[0], inputPos ); - tmp4.y = dot( worldMat[1], inputPos ); - tmp4.z = dot( worldMat[2], inputPos ); - worldPos.xyz += (tmp4 * inVs_blendWeights[@n]).xyz; + _idx = (inVs_blendIndices[@n] << 1u) + inVs_blendIndices[@n]; //inVs_blendIndices[@n] * 3; a 32-bit int multiply is 4 cycles on GCN! (and mul24 is not exposed to GLSL...). + worldMat = makeOgreFloat4x3( readOnlyFetch( worldMatBuf, int( matStart + _idx + 0u ) ), + readOnlyFetch( worldMatBuf, int( matStart + _idx + 1u ) ), + readOnlyFetch( worldMatBuf, int( matStart + _idx + 2u ) ) ); + worldPos.xyz += mul( inputPos, worldMat ) * inVs_blendWeights[@n]; @property( hlms_normal || hlms_qtangent ) - tmp3.x = dot( midf3_c( worldMat[0].xyz ), inputNormal ); - tmp3.y = dot( midf3_c( worldMat[1].xyz ), inputNormal ); - tmp3.z = dot( midf3_c( worldMat[2].xyz ), inputNormal ); - worldNorm += tmp3.xyz * midf_c( inVs_blendWeights[@n] ); + normalAdjMat = adjugateForNormalsFrom4x3( worldMat ); + worldNorm += mul( inputNormal, normalAdjMat ) * midf_c( inVs_blendWeights[@n] ); @end @property( normal_map ) - tmp3.x = dot( midf3_c( worldMat[0].xyz ), inputTangent ); - tmp3.y = dot( midf3_c( worldMat[1].xyz ), inputTangent ); - tmp3.z = dot( midf3_c( worldMat[2].xyz ), inputTangent ); - worldTang += tmp3.xyz * midf_c( inVs_blendWeights[@n] ); + worldNorm += mul( inputTangent, normalAdjMat ) * midf_c( inVs_blendWeights[@n] ); @end @end + @property( hlms_normal || hlms_qtangent ) + worldNorm = normalize( worldNorm ); + @end + worldPos.w = 1.0; @end // SkeletonTransform @end // !hlms_skeleton @@ -185,14 +193,13 @@ // If hlms_skeleton is defined the transforms will be provided by bones. // If hlms_pose is not combined with hlms_skeleton the object's worldMat and worldView have to be set. @property( !hlms_skeleton ) - float4 worldMat[3]; - worldMat[0] = readOnlyFetch( worldMatBuf, int( poseDataStart + @value(NumPoseWeightVectors)u + 1u ) ); - worldMat[1] = readOnlyFetch( worldMatBuf, int( poseDataStart + @value(NumPoseWeightVectors)u + 2u ) ); - worldMat[2] = readOnlyFetch( worldMatBuf, int( poseDataStart + @value(NumPoseWeightVectors)u + 3u ) ); + ogre_float4x3 worldMat; + worldMat = makeOgreFloat4x3( + readOnlyFetch( worldMatBuf, int( poseDataStart + @value( NumPoseWeightVectors )u + 1u ) ), + readOnlyFetch( worldMatBuf, int( poseDataStart + @value( NumPoseWeightVectors )u + 2u ) ), + readOnlyFetch( worldMatBuf, int( poseDataStart + @value( NumPoseWeightVectors )u + 3u ) ) ); float4 worldPos; - worldPos.x = dot( worldMat[0], inputPos ); - worldPos.y = dot( worldMat[1], inputPos ); - worldPos.z = dot( worldMat[2], inputPos ); + worldPos.xyz = mul( inputPos, worldMat ); worldPos.w = 1.0; @property( hlms_normal || hlms_qtangent ) @@ -213,11 +220,12 @@ @property( hlms_normal || hlms_qtangent ) outVs.pos = @insertpiece( CalculatePsPos );@end @property( hlms_normal || hlms_qtangent ) midf3x3 worldMat3x3 = toMidf3x3( worldViewMat ); - @property( accurate_non_uniform_scaled_normals ) - midf3x3 normalMat = transpose( adjugate( worldMat3x3 ) ); - outVs.normal = normalize( mul( @insertpiece(local_normal), normalMat ) ); - @else + @property( hlms_skeleton ) + // worldViewMat is actually passBuf.view so we don't need the adjugate. We've already done that. outVs.normal = mul( @insertpiece(local_normal), worldMat3x3 ); + @else + midf3x3 normalMat = adjugateForNormals( worldMat3x3 ); + outVs.normal = mul( @insertpiece(local_normal), normalMat ); @end @end @property( normal_map ) outVs.tangent = mul( @insertpiece(local_tangent), toMidf3x3( worldViewMat ) );@end @@ -290,19 +298,20 @@ float4 worldPos = float4( mul(inVs_vertex, worldMat).xyz, 1.0f ); @property( ( hlms_normal || hlms_qtangent) && hlms_num_shadow_map_lights ) - // We need worldNorm for normal offset bias - midf3 worldNorm = mul( inputNormal, toMidf3x3( worldMat ) ).xyz; + const midf3x3 normalAdjMat = adjugateForNormalsFrom4x3( worldMat ); + + // We need worldNorm for normal offset bias. + midf3 worldNorm = normalize( mul( inputNormal, normalAdjMat ).xyz ); @end @end @insertpiece( PoseTransform ) @property( !hlms_skeleton && hlms_pose && ( hlms_normal || hlms_qtangent) && hlms_num_shadow_map_lights ) - // We need worldNorm for normal offset bias, special path when using poses - midf3 worldNorm; - worldNorm.x = dot( midf3_c( worldMat[0].xyz ), inputNormal ); - worldNorm.y = dot( midf3_c( worldMat[1].xyz ), inputNormal ); - worldNorm.z = dot( midf3_c( worldMat[2].xyz ), inputNormal ); + const midf3x3 normalAdjMat = adjugateForNormalsFrom4x3( worldMat ); + + // We need worldNorm for normal offset bias, special path when using poses. + midf3 worldNorm = normalize( mul( inputNormal, normalAdjMat ).xyz ); @end @insertpiece( SkeletonTransform )