From 21f99f78b6e8359bc29d040ab39f1ba87d682802 Mon Sep 17 00:00:00 2001 From: "C. S." <76898260+Pentalimbed@users.noreply.github.com> Date: Mon, 9 Dec 2024 21:32:17 +0000 Subject: [PATCH] feat: refactor ssgi to use spherical harmonics (#837) * chore: remove ao blur * chore: random changes * feat: add SH IL * feat: add back blur * feat: add back half res * fix: ao temporal denoise --- .../Shaders/ScreenSpaceGI/blur.cs.hlsl | 70 +++--- .../Shaders/ScreenSpaceGI/gi.cs.hlsl | 163 ++++--------- .../ScreenSpaceGI/radianceDisocc.cs.hlsl | 59 ++--- .../Shaders/ScreenSpaceGI/upsample.cs.hlsl | 32 ++- .../Shaders/Skylighting/Skylighting.hlsli | 21 -- package/Shaders/AmbientCompositeCS.hlsl | 38 +-- package/Shaders/Common/Color.hlsli | 19 ++ .../SphericalHarmonics.hlsli | 43 ++++ package/Shaders/DeferredCompositeCS.hlsl | 28 ++- package/Shaders/Water.hlsl | 2 +- src/Deferred.cpp | 18 +- src/Features/ScreenSpaceGI.cpp | 219 +++++++----------- src/Features/ScreenSpaceGI.h | 27 ++- 13 files changed, 350 insertions(+), 389 deletions(-) diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl index 08cc5dea5..47eb61f36 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl @@ -8,13 +8,15 @@ #include "Common/VR.hlsli" #include "ScreenSpaceGI/common.hlsli" -Texture2D srcGI : register(t0); // maybe half-res -Texture2D srcAccumFrames : register(t1); // maybe half-res -Texture2D srcDepth : register(t2); -Texture2D srcNormalRoughness : register(t3); +Texture2D srcDepth : register(t0); +Texture2D srcNormalRoughness : register(t1); +Texture2D srcAccumFrames : register(t2); // maybe half-res +Texture2D srcIlY : register(t3); // maybe half-res +Texture2D srcIlCoCg : register(t4); // maybe half-res -RWTexture2D outGI : register(u0); -RWTexture2D outAccumFrames : register(u1); +RWTexture2D outAccumFrames : register(u0); +RWTexture2D outIlY : register(u1); +RWTexture2D outIlCoCg : register(u2); // samples = 8, min distance = 0.5, average samples on radius = 2 static const float3 g_Poisson8[8] = { @@ -73,6 +75,8 @@ float2x3 getKernelBasis(float3 D, float3 N, float roughness = 1.0, float anisoFa return float2x3(T, B); } +// TODO: spinning blur + [numthreads(8, 8, 1)] void main(const uint2 dtid : SV_DispatchThreadID) { const float2 frameScale = FrameDim * RcpTexDim; @@ -90,34 +94,28 @@ float2x3 getKernelBasis(float3 D, float3 N, float roughness = 1.0, float anisoFa float depth = READ_DEPTH(srcDepth, dtid); float3 pos = ScreenToViewPosition(screenPos, depth, eyeIndex); - float4 normalRoughness = FULLRES_LOAD(srcNormalRoughness, dtid, uv, samplerLinearClamp); - float3 normal = GBuffer::DecodeNormal(normalRoughness.xy); -#ifdef SPECULAR_BLUR - float roughness = 1 - normalRoughness.z; -#endif + float3 normal = GBuffer::DecodeNormal(FULLRES_LOAD(srcNormalRoughness, dtid, uv, samplerLinearClamp).xy); const float2 pixelDirRBViewspaceSizeAtCenterZ = depth.xx * (eyeIndex == 0 ? NDCToViewMul.xy : NDCToViewMul.zw) * RCP_OUT_FRAME_DIM; const float worldRadius = radius * pixelDirRBViewspaceSizeAtCenterZ.x; -#ifdef SPECULAR_BLUR - float2x3 TvBv = getKernelBasis(getSpecularDominantDirection(normal, -normalize(pos), roughness), normal, roughness); - float halfAngle = specularLobeHalfAngle(roughness); -#else float2x3 TvBv = getKernelBasis(normal, normal); // D = N float halfAngle = Math::HALF_PI * .5f; -#endif + TvBv[0] *= worldRadius; TvBv[1] *= worldRadius; #ifdef TEMPORAL_DENOISER halfAngle *= 1 - lerp(0, 0.8, sqrt(accumFrames / (float)MaxAccumFrames)); #endif - float4 gi = srcGI[dtid]; + const float4 ilY = srcIlY[dtid]; + const float2 ilCoCg = srcIlCoCg[dtid]; - float4 sum = gi; -#if defined(TEMPORAL_DENOISER) && !defined(SPECULAR_BLUR) - float fsum = accumFrames; + float4 ySum = ilY; + float2 coCgSum = ilCoCg; +#if defined(TEMPORAL_DENOISER) + float fSum = accumFrames; #endif - float wsum = 1; + float wSum = 1; for (uint i = 0; i < numSamples; i++) { float w = GaussianWeight(g_Poisson8[i].z); @@ -146,30 +144,28 @@ float2x3 getKernelBasis(float3 D, float3 N, float roughness = 1.0, float anisoFa float4 normalRoughnessSample = srcNormalRoughness.SampleLevel(samplerLinearClamp, uvSample * frameScale, 0); float3 normalSample = GBuffer::DecodeNormal(normalRoughnessSample.xy); -#ifdef SPECULAR_BLUR - float roughnessSample = 1 - normalRoughnessSample.z; -#endif - - float4 giSample = srcGI.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0); // geometry weight w *= saturate(1 - abs(dot(normal, posSample - pos)) * DistanceNormalisation); // normal weight w *= 1 - saturate(FastMath::acosFast4(saturate(dot(normalSample, normal))) / halfAngle); -#ifdef SPECULAR_BLUR - // roughness weight - w *= abs(roughness - roughnessSample) / (roughness * roughness * 0.99 + 0.01); -#endif - sum += giSample * w; -#if defined(TEMPORAL_DENOISER) && !defined(SPECULAR_BLUR) - fsum += srcAccumFrames.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0) * w; + if (w > 1e-8) { + float4 ySample = srcIlY.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0); + float2 coCgSample = srcIlCoCg.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0); + + ySum += ySample * w; + coCgSum += coCgSample * w; +#if defined(TEMPORAL_DENOISER) + fSum += srcAccumFrames.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0) * w; #endif - wsum += w; + wSum += w; + } } - outGI[dtid] = sum / wsum; -#if defined(TEMPORAL_DENOISER) && !defined(SPECULAR_BLUR) - outAccumFrames[dtid] = fsum / wsum; + outIlY[dtid] = ySum / wSum; + outIlCoCg[dtid] = coCgSum / wSum; +#if defined(TEMPORAL_DENOISER) + outAccumFrames[dtid] = fSum / wSum; #endif } \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl index c31489de3..1ee4eee4f 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl @@ -20,12 +20,17 @@ // Screen Space Indirect Lighting with Visibility Bitmask // https://arxiv.org/abs/2301.11376 // +// Exploring Raytraced Future in Metro Exodus +// https://developer.download.nvidia.com/video/gputechconf/gtc/2019/presentation/s9985-exploring-ray-traced-future-in-metro-exodus.pdf +// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#include "Common/Color.hlsli" #include "Common/FastMath.hlsli" #include "Common/FrameBuffer.hlsli" #include "Common/GBuffer.hlsli" #include "Common/Math.hlsli" +#include "Common/Spherical Harmonics/SphericalHarmonics.hlsli" #include "Common/VR.hlsli" #include "ScreenSpaceGI/common.hlsli" @@ -36,12 +41,13 @@ Texture2D srcNormalRoughness : register(t1); Texture2D srcRadiance : register(t2); // maybe half-res Texture2D srcNoise : register(t3); Texture2D srcAccumFrames : register(t4); // maybe half-res -Texture2D srcPrevGI : register(t5); // maybe half-res -Texture2D srcPrevGISpecular : register(t6); // maybe half-res +Texture2D srcPrevAo : register(t5); // maybe half-res +Texture2D srcPrevY : register(t6); // maybe half-res +Texture2D srcPrevCoCg : register(t7); // maybe half-res -RWTexture2D outGI : register(u0); -RWTexture2D outGISpecular : register(u1); -RWTexture2D outBentNormal : register(u2); +RWTexture2D outAo : register(u0); +RWTexture2D outY : register(u1); +RWTexture2D outCoCg : register(u2); RWTexture2D outPrevGeo : register(u3); float GetDepthFade(float depth) @@ -79,7 +85,7 @@ float GetVisibilityFunctionSmithJointApprox(float roughness, float NdotV, float void CalculateGI( uint2 dtid, float2 uv, float viewspaceZ, float3 viewspaceNormal, - out float4 o_currGIAO, out float4 o_currGIAOSpecular, out float3 o_bentNormal) + out float o_ao, out sh2 o_currY, out float2 o_currCoCg) { const float2 frameScale = FrameDim * RcpTexDim; @@ -112,14 +118,8 @@ void CalculateGI( const float NoV = clamp(dot(viewVec, viewspaceNormal), 1e-5, 1); float visibility = 0; - float visibilitySpecular = 0; - float3 radiance = 0; - float3 radianceSpecular = 0; - float3 bentNormal = viewspaceNormal; - -#ifdef GI_SPECULAR - const float roughness = max(0.2, saturate(1 - FULLRES_LOAD(srcNormalRoughness, dtid, uv * frameScale, samplerLinearClamp).z)); // can't handle low roughness -#endif + float4 radianceY = 0; + float2 radianceCoCg = 0; for (uint slice = 0; slice < NumSlices; slice++) { float phi = (Math::PI * rcpNumSlices) * (slice + noiseSlice); @@ -142,12 +142,6 @@ void CalculateGI( uint bitmask = 0; #ifdef GI uint bitmaskGI = 0; -# ifdef GI_SPECULAR - uint bitmaskGISpecular = 0; - float3 domVec = getSpecularDominantDirection(viewspaceNormal, viewVec, roughness); - float3 projectedDomVec = normalize(domVec - axisVec * dot(domVec, axisVec)); - float nDom = sign(dot(orthoDirectionVec, projectedDomVec)) * FastMath::ACos(saturate(dot(projectedDomVec, viewVec))); -# endif #endif // R1 sequence (http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/) @@ -197,63 +191,32 @@ void CalculateGI( float angleBackGI = FastMath::ACos(dot(sampleBackHorizonVecGI, viewVec)); float2 angleRangeGI = -sideSign * (sideSign == -1 ? float2(angleFront, angleBackGI) : float2(angleBackGI, angleFront)); -# ifdef GI_SPECULAR - // thank u Olivier! - float coneHalfAngles = max(5e-2, specularLobeHalfAngle(roughness)); // not too small - float2 angleRangeSpecular = clamp((angleRangeGI + nDom) * 0.5 / coneHalfAngles, -1, 1) * 0.5 + 0.5; - - // Experimental method using importance sampling - // https://agraphicsguynotes.com/posts/sample_microfacet_brdf/ - // float2 angleRangeSpecular = angleBackGI; - // float2 specularSigns = sign(angleRangeSpecular); - // angleRangeSpecular = saturate(cos(angleRangeSpecular)) * (roughness2 - 1); - // angleRangeSpecular = roughness2 / (angleRangeSpecular * angleRangeSpecular + roughness2 - 1) - 1 / (roughness2 - 1); - // angleRangeSpecular = saturate((angleRangeSpecular * specularSigns) * 0.5 + 0.5); - - uint2 bitsRangeGISpecular = uint2(round(angleRangeSpecular.x * 32u), round((angleRangeSpecular.y - angleRangeSpecular.x) * 32u)); - uint maskedBitsGISpecular = s < GIRadius ? ((1 << bitsRangeGISpecular.y) - 1) << bitsRangeGISpecular.x : 0; -# endif - angleRangeGI = smoothstep(0, 1, (angleRangeGI + n) * RCP_PI + .5); // https://discord.com/channels/586242553746030596/586245736413528082/1102228968247144570 uint2 bitsRangeGI = uint2(round(angleRangeGI.x * 32u), round((angleRangeGI.y - angleRangeGI.x) * 32u)); uint maskedBitsGI = s < GIRadius ? ((1 << bitsRangeGI.y) - 1) << bitsRangeGI.x : 0; - uint overlappedBits = maskedBitsGI & ~bitmaskGI; - bool checkGI = overlappedBits; -# ifdef GI_SPECULAR - uint overlappedBitsSpecular = maskedBitsGISpecular & ~bitmaskGISpecular; - checkGI = checkGI || overlappedBitsSpecular; -# endif + uint validBits = maskedBitsGI & ~bitmaskGI; + bool checkGI = validBits; if (checkGI) { - float giBoost = 1 + GIDistanceCompensation * smoothstep(0, GICompensationMaxDist, s * EffectRadius); + float giBoost = 4.0 * Math::PI * (1 + GIDistanceCompensation * smoothstep(0, GICompensationMaxDist, s * EffectRadius)); // IL float3 normalSample = GBuffer::DecodeNormal(srcNormalRoughness.SampleLevel(samplerPointClamp, sampleUV * frameScale, 0).xy); float frontBackMult = saturate(-dot(normalSample, sampleHorizonVec)); frontBackMult = frontBackMult < 0 ? abs(frontBackMult) * BackfaceStrength : frontBackMult; // backface - float NoL = clamp(dot(viewspaceNormal, sampleHorizonVec), 1e-5, 1); + if (frontBackMult > 0.f) { + float3 sampleHorizonVecWS = normalize(mul(FrameBuffer::CameraViewInverse[eyeIndex], half4(sampleHorizonVec, 0)).xyz); - if (frontBackMult > 0.f && NoL > 0.001f) { float3 sampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * OUT_FRAME_SCALE, mipLevel).rgb * frontBackMult * giBoost; + sampleRadiance = max(sampleRadiance, 0); + float3 sampleRadianceYCoCg = Color::RGBToYCoCg(sampleRadiance); - float3 diffuseRadiance = sampleRadiance * countbits(overlappedBits) * 0.03125; // 1/32 - diffuseRadiance *= NoL; - diffuseRadiance = max(0, diffuseRadiance); - - radiance += diffuseRadiance; - -# ifdef GI_SPECULAR - float NoH = clamp(dot(viewspaceNormal, normalize(viewVec + sampleHorizonVec)), 1e-5, 1); - - float3 specularRadiance = sampleRadiance * countbits(overlappedBitsSpecular) * 0.03125; // 1/32 - specularRadiance *= GetNormalDistributionFunctionGGX(roughness, NoH) * GetVisibilityFunctionSmithJointApprox(roughness, NoV, NoL); - specularRadiance = max(0, specularRadiance); - - radianceSpecular += specularRadiance; -# endif + float bitmaskWeight = countbits(validBits) * 0.03125; + radianceY += sampleRadianceYCoCg.r * SphericalHarmonics::Evaluate(sampleHorizonVecWS) * bitmaskWeight; + radianceCoCg += sampleRadianceYCoCg.gb * bitmaskWeight; } } #endif // GI @@ -261,18 +224,11 @@ void CalculateGI( bitmask |= maskedBits; #ifdef GI bitmaskGI |= maskedBitsGI; -# ifdef GI_SPECULAR - bitmaskGISpecular |= maskedBitsGISpecular; -# endif #endif } } visibility += countbits(bitmask) * 0.03125; - -#if defined(GI) && defined(GI_SPECULAR) - visibilitySpecular += countbits(bitmaskGISpecular) * 0.03125; -#endif } float depthFade = GetDepthFade(viewspaceZ); @@ -282,28 +238,15 @@ void CalculateGI( visibility = 1 - pow(abs(1 - visibility), AOPower); #ifdef GI - radiance *= rcpNumSlices; - radiance = lerp(radiance, 0, depthFade); -# ifdef GI_SPECULAR - radianceSpecular *= rcpNumSlices; - radianceSpecular = lerp(radianceSpecular, 0, depthFade); - - visibilitySpecular *= rcpNumSlices; - visibilitySpecular = lerp(saturate(visibility), 0, depthFade); -# endif -#endif - -#if !defined(GI) || !defined(GI_SPECULAR) - visibilitySpecular = 0.0; -#endif + radianceY *= rcpNumSlices; + radianceY = lerp(radianceY, 0, depthFade); -#ifdef BENT_NORMAL - bentNormal = normalize(bentNormal); + radianceCoCg *= rcpNumSlices; #endif - o_currGIAO = float4(radiance, visibility); - o_currGIAOSpecular = float4(radianceSpecular, visibilitySpecular); - o_bentNormal = bentNormal; + o_ao = visibility; + o_currY = radianceY; + o_currCoCg = radianceCoCg; } [numthreads(8, 8, 1)] void main(const uint2 dtid @@ -311,13 +254,6 @@ void CalculateGI( const float2 frameScale = FrameDim * RcpTexDim; uint2 pxCoord = dtid; -#if defined(HALF_RATE) - const uint halfWidth = uint(OUT_FRAME_DIM.x) >> 1; - const bool useHistory = dtid.x >= halfWidth; - pxCoord.x = (pxCoord.x % halfWidth) * 2 + (dtid.y + FrameIndex + useHistory) % 2; -#else - const static bool useHistory = false; -#endif float2 uv = (pxCoord + .5) * RCP_OUT_FRAME_DIM; uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv); @@ -333,38 +269,27 @@ void CalculateGI( // Move center pixel slightly towards camera to avoid imprecision artifacts due to depth buffer imprecision; offset depends on depth texture format used viewspaceZ *= 0.99920h; // this is good for FP16 depth buffer - float4 currGIAO = float4(0, 0, 0, 0); - float4 currGIAOSpecular = float4(0, 0, 0, 0); - float3 bentNormal = viewspaceNormal; + float currAo = 0; + float4 currY = 0; + float2 currCoCg = 0; bool needGI = viewspaceZ > FP_Z && viewspaceZ < DepthFadeRange.y; if (needGI) { - if (!useHistory) - CalculateGI( - pxCoord, uv, viewspaceZ, viewspaceNormal, - currGIAO, currGIAOSpecular, bentNormal); + CalculateGI( + pxCoord, uv, viewspaceZ, viewspaceNormal, + currAo, currY, currCoCg); #ifdef TEMPORAL_DENOISER float lerpFactor = rcp(srcAccumFrames[pxCoord] * 255); -# if defined(HALF_RATE) - if (useHistory && lerpFactor != 1) - lerpFactor = 0; -# endif - - currGIAO = lerp(srcPrevGI[pxCoord], currGIAO, lerpFactor); -# ifdef GI_SPECULAR - currGIAOSpecular = lerp(srcPrevGISpecular[pxCoord], currGIAOSpecular, lerpFactor); -# endif + + currY = lerp(srcPrevY[pxCoord], currY, lerpFactor); + currCoCg = lerp(srcPrevCoCg[pxCoord], currCoCg, lerpFactor); #endif } - currGIAO = any(ISNAN(currGIAO)) ? float4(0, 0, 0, 0) : currGIAO; - currGIAOSpecular = any(ISNAN(currGIAOSpecular)) ? float4(0, 0, 0, 0) : currGIAOSpecular; + currY = any(ISNAN(currY)) ? 0 : currY; + currCoCg = any(ISNAN(currCoCg)) ? 0 : currCoCg; - outGI[pxCoord] = currGIAO; -#ifdef GI_SPECULAR - outGISpecular[pxCoord] = currGIAOSpecular; -#endif -#ifdef BENT_NORMAL - outBentNormal[pxCoord] = GBuffer::EncodeNormal(bentNormal); -#endif + outAo[pxCoord] = currAo; + outY[pxCoord] = currY; + outCoCg[pxCoord] = currCoCg; } \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl index 6e368f0dc..1d1ad5afd 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl @@ -5,19 +5,21 @@ #include "ScreenSpaceGI/common.hlsli" Texture2D srcDiffuse : register(t0); -Texture2D srcPrevGI : register(t1); // maybe half-res -Texture2D srcPrevGISpecular : register(t2); // maybe half-res -Texture2D srcCurrDepth : register(t3); -Texture2D srcCurrNormal : register(t4); -Texture2D srcPrevGeo : register(t5); // maybe half-res -Texture2D srcMotionVec : register(t6); -Texture2D srcPrevAmbient : register(t7); -Texture2D srcAccumFrames : register(t8); // maybe half-res +Texture2D srcCurrDepth : register(t1); +Texture2D srcCurrNormal : register(t2); +Texture2D srcPrevGeo : register(t3); // maybe half-res +Texture2D srcMotionVec : register(t4); +Texture2D srcPrevAmbient : register(t5); +Texture2D srcAccumFrames : register(t6); // maybe half-res +Texture2D srcPrevAo : register(t7); // maybe half-res +Texture2D srcPrevIlY : register(t8); // maybe half-res +Texture2D srcPrevIlCoCg : register(t9); // maybe half-res RWTexture2D outRadianceDisocc : register(u0); RWTexture2D outAccumFrames : register(u1); -RWTexture2D outRemappedPrevGI : register(u2); -RWTexture2D outRemappedPrevGISpecular : register(u3); +RWTexture2D outRemappedAo : register(u2); +RWTexture2D outRemappedIlY : register(u3); +RWTexture2D outRemappedIlCoCg : register(u4); #if (defined(GI) && defined(GI_BOUNCE)) || defined(TEMPORAL_DENOISER) || defined(HALF_RATE) # define REPROJECTION @@ -25,7 +27,7 @@ RWTexture2D outRemappedPrevGISpecular : register(u3); void readHistory( uint eyeIndex, float curr_depth, float3 curr_pos, int2 pixCoord, float bilinear_weight, - inout half4 prev_gi, inout half4 prev_gi_specular, inout half3 prev_ambient, inout float accum_frames, inout float wsum) + inout half prev_ao, inout half4 prev_y, inout half2 prev_co_cg, inout half3 prev_ambient, inout float accum_frames, inout float wsum) { const float2 uv = (pixCoord + .5) * RCP_OUT_FRAME_DIM; const float2 screen_pos = Stereo::ConvertFromStereoUV(uv, eyeIndex); @@ -50,10 +52,9 @@ void readHistory( prev_ambient += srcPrevAmbient[pixCoord] * bilinear_weight; #endif #ifdef TEMPORAL_DENOISER - prev_gi += srcPrevGI[pixCoord] * bilinear_weight; -# ifdef GI_SPECULAR - prev_gi_specular += srcPrevGISpecular[pixCoord] * bilinear_weight; -# endif + prev_ao += srcPrevAo[pixCoord] * bilinear_weight; + prev_y += srcPrevIlY[pixCoord] * bilinear_weight; + prev_co_cg += srcPrevIlCoCg[pixCoord] * bilinear_weight; accum_frames += srcAccumFrames[pixCoord] * bilinear_weight; #endif wsum += bilinear_weight; @@ -75,8 +76,9 @@ void readHistory( float2 prev_uv = Stereo::ConvertToStereoUV(prev_screen_pos, eyeIndex); half3 prev_ambient = 0; - half4 prev_gi = 0; - half4 prev_gi_specular = 0; + half prev_ao = 0; + half4 prev_y = 0; + half2 prev_co_cg = 0; float accum_frames = 0; float wsum = 0; @@ -85,7 +87,8 @@ void readHistory( if (curr_depth < FP_Z) { outRadianceDisocc[pixCoord] = half3(0, 0, 0); outAccumFrames[pixCoord] = 1.0 / 255.0; - outRemappedPrevGI[pixCoord] = half4(0, 0, 0, 0); + outRemappedIlY[pixCoord] = half4(0, 0, 0, 0); + outRemappedIlCoCg[pixCoord] = half2(0, 0); return; } @@ -102,24 +105,23 @@ void readHistory( readHistory(eyeIndex, curr_depth, curr_pos, prev_px_lu, (1 - bilinear_weights.x) * (1 - bilinear_weights.y), - prev_gi, prev_gi_specular, prev_ambient, accum_frames, wsum); + prev_ao, prev_y, prev_co_cg, prev_ambient, accum_frames, wsum); readHistory(eyeIndex, curr_depth, curr_pos, prev_px_lu + int2(1, 0), bilinear_weights.x * (1 - bilinear_weights.y), - prev_gi, prev_gi_specular, prev_ambient, accum_frames, wsum); + prev_ao, prev_y, prev_co_cg, prev_ambient, accum_frames, wsum); readHistory(eyeIndex, curr_depth, curr_pos, prev_px_lu + int2(0, 1), (1 - bilinear_weights.x) * bilinear_weights.y, - prev_gi, prev_gi_specular, prev_ambient, accum_frames, wsum); + prev_ao, prev_y, prev_co_cg, prev_ambient, accum_frames, wsum); readHistory(eyeIndex, curr_depth, curr_pos, prev_px_lu + int2(1, 1), bilinear_weights.x * bilinear_weights.y, - prev_gi, prev_gi_specular, prev_ambient, accum_frames, wsum); + prev_ao, prev_y, prev_co_cg, prev_ambient, accum_frames, wsum); if (wsum > 1e-2) { float rcpWsum = rcp(wsum + 1e-10); # ifdef TEMPORAL_DENOISER - prev_gi *= rcpWsum; -# ifdef GI_SPECULAR - prev_gi_specular *= rcpWsum; -# endif + prev_ao *= rcpWsum; + prev_y *= rcpWsum; + prev_co_cg *= rcpWsum; accum_frames *= rcpWsum; # endif # if defined(GI) && defined(GI_BOUNCE) @@ -147,7 +149,8 @@ void readHistory( accum_frames = max(1, min(accum_frames * 255 + useHistory, MaxAccumFrames)); outAccumFrames[pixCoord] = accum_frames / 255.0; - outRemappedPrevGI[pixCoord] = prev_gi; - outRemappedPrevGISpecular[pixCoord] = prev_gi_specular; + outRemappedAo[pixCoord] = prev_ao; + outRemappedIlY[pixCoord] = prev_y; + outRemappedIlCoCg[pixCoord] = prev_co_cg; #endif } \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl index 3bd7e92de..c1bb9246a 100644 --- a/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl @@ -4,13 +4,19 @@ #include "ScreenSpaceGI/common.hlsli" Texture2D srcDepth : register(t0); -Texture2D srcGI : register(t1); // half-res +Texture2D srcAo : register(t1); // half-res +Texture2D srcIlY : register(t2); // half-res +Texture2D srcIlCoCg : register(t3); // half-res -RWTexture2D outGI : register(u0); +RWTexture2D outAo : register(u0); +RWTexture2D outIlY : register(u1); +RWTexture2D outIlCoCg : register(u2); #define min4(v) min(min(v.x, v.y), min(v.z, v.w)) #define max4(v) max(max(v.x, v.y), max(v.z, v.w)) +#define BLEND_WEIGHT(a, b, c, d, w, sumw) ((a * w.x + b * w.y + c * w.z + d * w.w) / max(sumw, 1e-5)) + [numthreads(8, 8, 1)] void main(const uint2 dtid : SV_DispatchThreadID) { int2 px00 = (dtid >> 1) + (dtid & 1) - 1; @@ -31,15 +37,12 @@ RWTexture2D outGI : register(u0); float avg = dot(d, 0.25.xxxx); bool d_edge = (diffd / avg) < 0.1; - float4 atten; + float ao; + float4 y; + float2 coCg; [branch] if (d_edge) { - float4 gisample0 = srcGI[px00]; - float4 gisample1 = srcGI[px01]; - float4 gisample2 = srcGI[px10]; - float4 gisample3 = srcGI[px11]; - float bgdepth = srcDepth[dtid]; //note: depth weighing from https://www.ppsloan.org/publications/ProxyPG.pdf#page=5 @@ -47,12 +50,19 @@ RWTexture2D outGI : register(u0); float4 w = 1.0 / (dd + 0.00001); float sumw = w.x + w.y + w.z + w.w; - atten = (gisample0 * w.x + gisample1 * w.y + gisample2 * w.z + gisample3 * w.w) / (sumw + 0.00001); + ao = BLEND_WEIGHT(srcAo[px00], srcAo[px01], srcAo[px10], srcAo[px11], w, sumw); + y = BLEND_WEIGHT(srcIlY[px00], srcIlY[px01], srcIlY[px10], srcIlY[px11], w, sumw); + coCg = BLEND_WEIGHT(srcIlCoCg[px00], srcIlCoCg[px01], srcIlCoCg[px10], srcIlCoCg[px11], w, sumw); } else { - atten = srcGI.SampleLevel(samplerLinearClamp, (dtid + .5) * RcpFrameDim * OUT_FRAME_DIM * RcpTexDim, 0); + float2 uv = (dtid + .5) * RcpFrameDim * OUT_FRAME_DIM * RcpTexDim; + ao = srcAo.SampleLevel(samplerLinearClamp, uv, 0); + y = srcIlY.SampleLevel(samplerLinearClamp, uv, 0); + coCg = srcIlCoCg.SampleLevel(samplerLinearClamp, uv, 0); } - outGI[dtid] = atten; + outAo[dtid] = ao; + outIlY[dtid] = y; + outIlCoCg[dtid] = coCg; } \ No newline at end of file diff --git a/features/Skylighting/Shaders/Skylighting/Skylighting.hlsli b/features/Skylighting/Shaders/Skylighting/Skylighting.hlsli index 0f675adb3..9c9713d60 100644 --- a/features/Skylighting/Shaders/Skylighting/Skylighting.hlsli +++ b/features/Skylighting/Shaders/Skylighting/Skylighting.hlsli @@ -108,25 +108,4 @@ namespace Skylighting } return vl * step; } - - sh2 fauxSpecularLobeSH(float3 N, float3 V, float roughness) - { - // https://www.gdcvault.com/play/1026701/Fast-Denoising-With-Self-Stabilizing - // get dominant ggx reflection direction - float f = (1 - roughness) * (sqrt(1 - roughness) + roughness); - float3 R = reflect(-V, N); - float3 D = lerp(N, R, f); - float3 dominantDir = normalize(D); - - // lobe half angle - // credit: Olivier Therrien - float roughness2 = roughness * roughness; - float halfAngle = clamp(4.1679 * roughness2 * roughness2 - 9.0127 * roughness2 * roughness + 4.6161 * roughness2 + 1.7048 * roughness + 0.1, 0, Math::HALF_PI); - float lerpFactor = halfAngle / Math::HALF_PI; - sh2 directional = SphericalHarmonics::Evaluate(dominantDir); - sh2 cosineLobe = SphericalHarmonics::EvaluateCosineLobe(dominantDir) / Math::PI; - sh2 result = SphericalHarmonics::Add(SphericalHarmonics::Scale(directional, lerpFactor), SphericalHarmonics::Scale(cosineLobe, 1 - lerpFactor)); - - return result; - } } diff --git a/package/Shaders/AmbientCompositeCS.hlsl b/package/Shaders/AmbientCompositeCS.hlsl index d9e47873e..d35d40c09 100644 --- a/package/Shaders/AmbientCompositeCS.hlsl +++ b/package/Shaders/AmbientCompositeCS.hlsl @@ -3,6 +3,7 @@ #include "Common/GBuffer.hlsli" #include "Common/Math.hlsli" #include "Common/SharedData.hlsli" +#include "Common/Spherical Harmonics/SphericalHarmonics.hlsli" #include "Common/VR.hlsli" Texture2D AlbedoTexture : register(t0); @@ -19,12 +20,14 @@ Texture3D SkylightingProbeArray : register(t3); Texture2D DepthTexture : register(t2); #endif +Texture2D Masks2Texture : register(t4); + #if defined(SSGI) -Texture2D SSGITexture : register(t4); +Texture2D SsgiAoTexture : register(t5); +Texture2D SsgiYTexture : register(t6); +Texture2D SsgiCoCgTexture : register(t7); #endif -Texture2D Masks2Texture : register(t5); - RWTexture2D MainRW : register(u0); #if defined(SSGI) RWTexture2D DiffuseAmbientRW : register(u1); @@ -83,22 +86,27 @@ RWTexture2D DiffuseAmbientRW : register(u1); uint2 pixCoord2 = (uint2)(uv2.xy / SharedData::BufferDim.zw - 0.5); # endif - half4 ssgiDiffuse = SSGITexture[dispatchID.xy]; -# if defined(VR) - half4 ssgiDiffuse2 = SSGITexture[pixCoord2]; - ssgiDiffuse = Stereo::BlendEyeColors(uv1Mono, (float4)ssgiDiffuse, uv2Mono, (float4)ssgiDiffuse2); -# endif - ssgiDiffuse.rgb *= linAlbedo; - ssgiDiffuse.a = 1 - ssgiDiffuse.a; - - visibility *= ssgiDiffuse.a; + half ssgiAo = 1 - SsgiAoTexture[dispatchID.xy]; + half4 ssgiIlYSh = SsgiYTexture[dispatchID.xy]; + // half ssgiIlY = SphericalHarmonics::FuncProductIntegral(ssgiIlYSh, SphericalHarmonics::EvaluateCosineLobe(normalWS)); + half ssgiIlY = SphericalHarmonics::SHHallucinateZH3Irradiance(ssgiIlYSh, normalWS); + half2 ssgiIlCoCg = SsgiCoCgTexture[dispatchID.xy]; + half3 ssgiIl = max(0, Color::YCoCgToRGB(float3(ssgiIlY, ssgiIlCoCg))); - DiffuseAmbientRW[dispatchID.xy] = linAlbedo * linDirectionalAmbientColor + ssgiDiffuse.rgb; + // TODO: VR Blending + // # if defined(VR) + // half4 ssgiDiffuse2 = SSGITexture[pixCoord2]; + // ssgiDiffuse = Stereo::BlendEyeColors(uv1Mono, (float4)ssgiDiffuse, uv2Mono, (float4)ssgiDiffuse2); + // # endif + visibility *= ssgiAo; # if defined(INTERIOR) - linDiffuseColor *= ssgiDiffuse.a; + linDiffuseColor *= ssgiAo; # endif - linDiffuseColor += ssgiDiffuse.rgb; + + ssgiIl *= linAlbedo; + DiffuseAmbientRW[dispatchID.xy] = linAlbedo * linDirectionalAmbientColor + ssgiIl; + linDiffuseColor += ssgiIl; #endif linAmbient *= visibility; diff --git a/package/Shaders/Common/Color.hlsli b/package/Shaders/Common/Color.hlsli index 80501e677..3a4d7b31d 100644 --- a/package/Shaders/Common/Color.hlsli +++ b/package/Shaders/Common/Color.hlsli @@ -22,6 +22,25 @@ namespace Color return dot(color, float3(0.299, 0.587, 0.114)); } + float3 RGBToYCoCg(float3 color) + { + float tmp = 0.25 * (color.r + color.b); + return float3( + tmp + 0.5 * color.g, // Y + 0.5 * (color.r - color.b), // Co + -tmp + 0.5 * color.g // Cg + ); + } + + float3 YCoCgToRGB(float3 color) + { + float tmp = color.x - color.z; + return float3( + tmp + color.y, + color.x + color.z, + tmp - color.y); + } + const static float AlbedoPreMult = 1 / 1.7; // greater value -> brighter pbr const static float LightPreMult = 1 / (Math::PI * AlbedoPreMult); // ensure 1/PI as product diff --git a/package/Shaders/Common/Spherical Harmonics/SphericalHarmonics.hlsli b/package/Shaders/Common/Spherical Harmonics/SphericalHarmonics.hlsli index c9d1b4f45..181e75246 100644 --- a/package/Shaders/Common/Spherical Harmonics/SphericalHarmonics.hlsli +++ b/package/Shaders/Common/Spherical Harmonics/SphericalHarmonics.hlsli @@ -199,6 +199,49 @@ namespace SphericalHarmonics result.yzw *= 2.0943951023931954923f; return result; } + + // Author: ProfJack + // Constructs the SH of an approximate specular lobe + sh2 FauxSpecularLobe(float3 N, float3 V, float roughness) + { + // https://www.gdcvault.com/play/1026701/Fast-Denoising-With-Self-Stabilizing + // get dominant ggx reflection direction + float f = (1 - roughness) * (sqrt(1 - roughness) + roughness); + float3 R = reflect(-V, N); + float3 D = lerp(N, R, f); + float3 dominantDir = normalize(D); + + // lobe half angle + // credit: Olivier Therrien + float roughness2 = roughness * roughness; + float halfAngle = clamp(4.1679 * roughness2 * roughness2 - 9.0127 * roughness2 * roughness + 4.6161 * roughness2 + 1.7048 * roughness + 0.1, 0, Math::HALF_PI); + float lerpFactor = halfAngle / Math::HALF_PI; + sh2 directional = SphericalHarmonics::Evaluate(dominantDir); + sh2 cosineLobe = SphericalHarmonics::EvaluateCosineLobe(dominantDir) / Math::PI; + sh2 result = SphericalHarmonics::Add(SphericalHarmonics::Scale(directional, lerpFactor), SphericalHarmonics::Scale(cosineLobe, 1 - lerpFactor)); + + return result; + } + + // Hallucinate zonal harmonics for diffuse lighting with more contrast + // http://torust.me/ZH3.pdf + float SHHallucinateZH3Irradiance(sh2 inSH, float3 direction) + { + float3 zonalAxis = normalize(float3(inSH.w, inSH.y, inSH.z)); + float ratio = 0.0; + ratio = abs(dot(float3(-inSH.w, -inSH.y, inSH.z), zonalAxis)); + ratio /= inSH.x; + float zonalL2Coeff = inSH.x * (0.08f * ratio + 0.6f * ratio * ratio); // Curve-fit; Section 3.4.3 + float fZ = dot(zonalAxis, direction); + float zhDir = sqrt(5.0f / (16.0f * Math::PI)) * (3.0f * fZ * fZ - 1.0f); + // Convolve inSH with the normalized cosine kernel (multiply the L1 band by the zonal scale 2/3), then dot with + // inSH(direction) for linear inSH (Equation 5). + float result = SphericalHarmonics::FuncProductIntegral(inSH, SphericalHarmonics::EvaluateCosineLobe(direction)); + // Add irradiance from the ZH3 term. zonalL2Coeff is the ZH3 coefficient for a radiance signal, so we need to + // multiply by 1/4 (the L2 zonal scale for a normalized clamped cosine kernel) to evaluate irradiance. + result += 0.25f * zonalL2Coeff * zhDir; + return max(0, result); + } } #endif // __SPHERICAL_HARMONICS_DEPENDENCY_HLSL__ diff --git a/package/Shaders/DeferredCompositeCS.hlsl b/package/Shaders/DeferredCompositeCS.hlsl index f45c02e14..300801499 100644 --- a/package/Shaders/DeferredCompositeCS.hlsl +++ b/package/Shaders/DeferredCompositeCS.hlsl @@ -4,6 +4,7 @@ #include "Common/GBuffer.hlsli" #include "Common/MotionBlur.hlsli" #include "Common/SharedData.hlsli" +#include "Common/Spherical Harmonics/SphericalHarmonics.hlsli" #include "Common/VR.hlsli" Texture2D SpecularTexture : register(t0); @@ -36,7 +37,8 @@ Texture3D SkylightingProbeArray : register(t9); #endif #if defined(SSGI) -Texture2D SpecularSSGITexture : register(t10); +Texture2D SsgiYTexture : register(t10); +Texture2D SsgiCoCgTexture : register(t11); #endif [numthreads(8, 8, 1)] void main(uint3 dispatchID @@ -88,6 +90,8 @@ Texture2D SpecularSSGITexture : register(t10); half roughness = 1.0 - glossiness; half level = roughness * 7.0; + sh2 specularLobe = SphericalHarmonics::FauxSpecularLobe(normalWS, -V, roughness); + half3 finalIrradiance = 0; # if defined(INTERIOR) @@ -103,7 +107,6 @@ Texture2D SpecularSSGITexture : register(t10); # endif sh2 skylighting = Skylighting::sample(SharedData::skylightingSettings, SkylightingProbeArray, positionMS.xyz, normalWS); - sh2 specularLobe = Skylighting::fauxSpecularLobeSH(normalWS, -V, roughness); half skylightingSpecular = SphericalHarmonics::FuncProductIntegral(skylighting, specularLobe); skylightingSpecular = Skylighting::mixSpecular(SharedData::skylightingSettings, skylightingSpecular); @@ -138,12 +141,21 @@ Texture2D SpecularSSGITexture : register(t10); uint2 pixCoord2 = (uint2)(uv2.xy / SharedData::BufferDim.zw - 0.5); # endif - half4 ssgiSpecular = SpecularSSGITexture[dispatchID.xy]; -# if defined(VR) - half4 ssgiSpecular2 = SpecularSSGITexture[pixCoord2]; - ssgiSpecular = Stereo::BlendEyeColors(uv1Mono, (float4)ssgiSpecular, uv2Mono, (float4)ssgiSpecular2); -# endif - finalIrradiance = finalIrradiance * (1 - ssgiSpecular.a) + ssgiSpecular.rgb; + half4 ssgiIlYSh = SsgiYTexture[dispatchID.xy]; + half ssgiIlY = SphericalHarmonics::FuncProductIntegral(ssgiIlYSh, specularLobe); + half2 ssgiIlCoCg = SsgiCoCgTexture[dispatchID.xy]; + half3 ssgiIlSpecular = max(0, Color::YCoCgToRGB(float3(ssgiIlY, ssgiIlCoCg / Math::PI))); + // pi to compensate for the /pi in specularLobe + // i don't think there really should be a 1/PI but without it the specular is too strong + // reflectance being ambient reflectance doesn't help either + + // TODO: VR Blending (this doesn't make sense tho, because specular is very sensitive to view shifts) + // # if defined(VR) + // half4 ssgiSpecular2 = SpecularSSGITexture[pixCoord2]; + // ssgiSpecular = Stereo::BlendEyeColors(uv1Mono, (float4)ssgiSpecular, uv2Mono, (float4)ssgiSpecular2); + // # endif + + finalIrradiance += ssgiIlSpecular; # endif color += reflectance * finalIrradiance; diff --git a/package/Shaders/Water.hlsl b/package/Shaders/Water.hlsl index cf1531a9a..bc0d0624b 100644 --- a/package/Shaders/Water.hlsl +++ b/package/Shaders/Water.hlsl @@ -547,7 +547,7 @@ float3 GetWaterSpecularColor(PS_INPUT input, float3 normal, float3 viewDirection # endif sh2 skylighting = Skylighting::sample(SharedData::skylightingSettings, Skylighting::SkylightingProbeArray, positionMSSkylight, normal); - sh2 specularLobe = Skylighting::fauxSpecularLobeSH(normal, -viewDirection, 0.0); + sh2 specularLobe = SphericalHarmonics::FauxSpecularLobe(normal, -viewDirection, 0.0); float skylightingSpecular = SphericalHarmonics::FuncProductIntegral(skylighting, specularLobe); skylightingSpecular = lerp(1.0, skylightingSpecular, Skylighting::getFadeOutFactor(input.WPosition.xyz)); diff --git a/src/Deferred.cpp b/src/Deferred.cpp index cb314a6db..da3a02629 100644 --- a/src/Deferred.cpp +++ b/src/Deferred.cpp @@ -349,23 +349,26 @@ void Deferred::DeferredPasses() auto skylighting = Skylighting::GetSingleton(); auto ssgi = ScreenSpaceGI::GetSingleton(); + if (ssgi->loaded) + ssgi->DrawSSGI(prevDiffuseAmbientTexture); + auto [ssgi_ao, ssgi_y, ssgi_cocg] = ssgi->GetOutputTextures(); auto dispatchCount = Util::GetScreenDispatchCount(); if (ssgi->loaded) { - ssgi->DrawSSGI(prevDiffuseAmbientTexture); - // Ambient Composite { TracyD3D11Zone(State::GetSingleton()->tracyCtx, "Ambient Composite"); - ID3D11ShaderResourceView* srvs[6]{ + ID3D11ShaderResourceView* srvs[8]{ albedo.SRV, normalRoughness.SRV, skylighting->loaded || REL::Module::IsVR() ? depth.depthSRV : nullptr, skylighting->loaded ? skylighting->texProbeArray->srv.get() : nullptr, - ssgi->settings.Enabled ? ssgi->texGI[ssgi->outputGIIdx]->srv.get() : nullptr, masks2.SRV, + ssgi_ao, + ssgi_y, + ssgi_cocg, }; context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); @@ -405,9 +408,7 @@ void Deferred::DeferredPasses() { TracyD3D11Zone(State::GetSingleton()->tracyCtx, "Deferred Composite"); - bool doSSGISpecular = ssgi->loaded && ssgi->settings.Enabled && ssgi->settings.EnableGI && ssgi->settings.EnableSpecularGI; - - ID3D11ShaderResourceView* srvs[11]{ + ID3D11ShaderResourceView* srvs[12]{ specular.SRV, albedo.SRV, normalRoughness.SRV, @@ -418,7 +419,8 @@ void Deferred::DeferredPasses() dynamicCubemaps->loaded ? dynamicCubemaps->envTexture->srv.get() : nullptr, dynamicCubemaps->loaded ? dynamicCubemaps->envReflectionsTexture->srv.get() : nullptr, dynamicCubemaps->loaded && skylighting->loaded ? skylighting->texProbeArray->srv.get() : nullptr, - doSSGISpecular ? ssgi->texGISpecular[ssgi->outputGIIdx]->srv.get() : nullptr, + ssgi_y, + ssgi_cocg, }; if (dynamicCubemaps->loaded) diff --git a/src/Features/ScreenSpaceGI.cpp b/src/Features/ScreenSpaceGI.cpp index bbb5268fc..3d68df8c0 100644 --- a/src/Features/ScreenSpaceGI.cpp +++ b/src/Features/ScreenSpaceGI.cpp @@ -11,8 +11,6 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( ScreenSpaceGI::Settings, Enabled, EnableGI, - EnableSpecularGI, - HalfRate, HalfRes, EnableTemporalDenoiser, NumSlices, @@ -32,7 +30,6 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( NormalDisocclusion, MaxAccumFrames, BlurRadius, - BlurPasses, DistanceNormalisation) //////////////////////////////////////////////////////////////////////////////////// @@ -54,23 +51,11 @@ void ScreenSpaceGI::DrawSettings() ImGui::Checkbox("Show Advanced Options", &showAdvanced); - if (ImGui::BeginTable("Toggles", 3)) { + if (ImGui::BeginTable("Toggles", 2)) { ImGui::TableNextColumn(); ImGui::Checkbox("Enabled", &settings.Enabled); ImGui::TableNextColumn(); - recompileFlag |= ImGui::Checkbox("Diffuse IL", &settings.EnableGI); - if (auto _tt = Util::HoverTooltipWrapper()) - ImGui::Text("Simulates indirect diffuse lighting."); - ImGui::TableNextColumn(); - { - auto _ = Util::DisableGuard(!settings.EnableGI); - recompileFlag |= ImGui::Checkbox("Specular IL", &settings.EnableSpecularGI); - if (auto _tt = Util::HoverTooltipWrapper()) - ImGui::Text( - "Reuses diffuse samples to simulate indirect specular lighting.\n" - "Doubles the cost of denoisers.\n" - "Only for Complex Material or TruePBR materials."); - } + recompileFlag |= ImGui::Checkbox("Indirect Lighting (IL)", &settings.EnableGI); ImGui::EndTable(); } @@ -88,15 +73,12 @@ void ScreenSpaceGI::DrawSettings() recompileFlag = true; } if (auto _tt = Util::HoverTooltipWrapper()) - ImGui::Text( - "1 Slice, 6 Steps, no blur, no GI\n" - "Try smaller effect radius :)"); + ImGui::Text("1 Slice, 6 Steps, no blur, no GI\n"); ImGui::TableNextColumn(); if (ImGui::Button("Low", { -1, 0 })) { settings.NumSlices = 2; settings.NumSteps = 4; - settings.EnableBlur = true; settings.EnableGI = true; recompileFlag = true; } @@ -107,7 +89,6 @@ void ScreenSpaceGI::DrawSettings() if (ImGui::Button("Medium", { -1, 0 })) { settings.NumSlices = 3; settings.NumSteps = 6; - settings.EnableBlur = true; settings.EnableGI = true; recompileFlag = true; } @@ -118,7 +99,6 @@ void ScreenSpaceGI::DrawSettings() if (ImGui::Button("High", { -1, 0 })) { settings.NumSlices = 4; settings.NumSteps = 8; - settings.EnableBlur = true; settings.EnableGI = true; recompileFlag = true; } @@ -129,7 +109,6 @@ void ScreenSpaceGI::DrawSettings() if (ImGui::Button("Ultra", { -1, 0 })) { settings.NumSlices = 6; settings.NumSteps = 10; - settings.EnableBlur = true; settings.EnableGI = true; recompileFlag = true; } @@ -152,11 +131,6 @@ void ScreenSpaceGI::DrawSettings() "Controls accuracy of lighting, and noise when effect radius is large."); if (ImGui::BeginTable("Less Work", 2)) { - ImGui::TableNextColumn(); - recompileFlag |= ImGui::Checkbox("Half Rate", &settings.HalfRate); - if (auto _tt = Util::HoverTooltipWrapper()) - ImGui::Text("Shading only half the pixels per frame. Cheaper for higher settings but has more ghosting, and takes twice as long to converge."); - ImGui::TableNextColumn(); recompileFlag |= ImGui::Checkbox("Half Resolution", &settings.HalfRes); if (auto _tt = Util::HoverTooltipWrapper()) @@ -172,7 +146,7 @@ void ScreenSpaceGI::DrawSettings() { auto _ = Util::DisableGuard(!settings.EnableGI); - ImGui::SliderFloat("IL Source Brightness", &settings.GIStrength, 0.f, 10.f, "%.2f"); + ImGui::SliderFloat("IL Source Brightness", &settings.GIStrength, 0.f, 6.f, "%.2f"); } ImGui::Separator(); @@ -284,10 +258,6 @@ void ScreenSpaceGI::DrawSettings() auto _ = Util::DisableGuard(!settings.EnableBlur); ImGui::SliderFloat("Blur Radius", &settings.BlurRadius, 0.f, 30.f, "%.1f px"); - ImGui::SliderInt("Blur Passes", (int*)&settings.BlurPasses, 1, 3, "%d", ImGuiSliderFlags_AlwaysClamp); - if (auto _tt = Util::HoverTooltipWrapper()) - ImGui::Text("Blurring repeatedly for x times."); - if (showAdvanced) { ImGui::SliderFloat("Geometry Weight", &settings.DistanceNormalisation, 0.f, 5.f, "%.2f"); if (auto _tt = Util::HoverTooltipWrapper()) @@ -310,10 +280,12 @@ void ScreenSpaceGI::DrawSettings() BUFFER_VIEWER_NODE(texWorkingDepth, debugRescale) BUFFER_VIEWER_NODE(texPrevGeo, debugRescale) BUFFER_VIEWER_NODE(texRadiance, debugRescale) - BUFFER_VIEWER_NODE(texGI[0], debugRescale) - BUFFER_VIEWER_NODE(texGI[1], debugRescale) - BUFFER_VIEWER_NODE(texGISpecular[0], debugRescale) - BUFFER_VIEWER_NODE(texGISpecular[1], debugRescale) + BUFFER_VIEWER_NODE(texAo[0], debugRescale) + BUFFER_VIEWER_NODE(texAo[1], debugRescale) + BUFFER_VIEWER_NODE(texIlY[0], debugRescale) + BUFFER_VIEWER_NODE(texIlY[1], debugRescale) + BUFFER_VIEWER_NODE(texIlCoCg[0], debugRescale) + BUFFER_VIEWER_NODE(texIlCoCg[1], debugRescale) BUFFER_VIEWER_NODE(deferred->prevDiffuseAmbientTexture, debugRescale) @@ -398,25 +370,35 @@ void ScreenSpaceGI::SetupResources() texDesc.MipLevels = srvDesc.Texture2D.MipLevels = 1; srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; { - texGI[0] = eastl::make_unique(texDesc); - texGI[0]->CreateSRV(srvDesc); - texGI[0]->CreateUAV(uavDesc); + texIlY[0] = eastl::make_unique(texDesc); + texIlY[0]->CreateSRV(srvDesc); + texIlY[0]->CreateUAV(uavDesc); - texGI[1] = eastl::make_unique(texDesc); - texGI[1]->CreateSRV(srvDesc); - texGI[1]->CreateUAV(uavDesc); - - texGISpecular[0] = eastl::make_unique(texDesc); - texGISpecular[0]->CreateSRV(srvDesc); - texGISpecular[0]->CreateUAV(uavDesc); + texIlY[1] = eastl::make_unique(texDesc); + texIlY[1]->CreateSRV(srvDesc); + texIlY[1]->CreateUAV(uavDesc); + } + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R16G16_FLOAT; + { + texIlCoCg[0] = eastl::make_unique(texDesc); + texIlCoCg[0]->CreateSRV(srvDesc); + texIlCoCg[0]->CreateUAV(uavDesc); - texGISpecular[1] = eastl::make_unique(texDesc); - texGISpecular[1]->CreateSRV(srvDesc); - texGISpecular[1]->CreateUAV(uavDesc); + texIlCoCg[1] = eastl::make_unique(texDesc); + texIlCoCg[1]->CreateSRV(srvDesc); + texIlCoCg[1]->CreateUAV(uavDesc); } srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R8_UNORM; { + texAo[0] = eastl::make_unique(texDesc); + texAo[0]->CreateSRV(srvDesc); + texAo[0]->CreateUAV(uavDesc); + + texAo[1] = eastl::make_unique(texDesc); + texAo[1]->CreateSRV(srvDesc); + texAo[1]->CreateUAV(uavDesc); + texAccumFrames[0] = eastl::make_unique(texDesc); texAccumFrames[0]->CreateSRV(srvDesc); texAccumFrames[0]->CreateUAV(uavDesc); @@ -491,7 +473,7 @@ void ScreenSpaceGI::SetupResources() void ScreenSpaceGI::ClearShaderCache() { static const std::vector*> shaderPtrs = { - &prefilterDepthsCompute, &radianceDisoccCompute, &giCompute, &blurCompute, &blurSpecularCompute, &upsampleCompute + &prefilterDepthsCompute, &radianceDisoccCompute, &giCompute, &blurCompute, &upsampleCompute }; for (auto shader : shaderPtrs) @@ -515,7 +497,6 @@ void ScreenSpaceGI::CompileComputeShaders() { &radianceDisoccCompute, "radianceDisocc.cs.hlsl", {} }, { &giCompute, "gi.cs.hlsl", {} }, { &blurCompute, "blur.cs.hlsl", {} }, - { &blurSpecularCompute, "blur.cs.hlsl", { { "SPECULAR_BLUR", "" } } }, { &upsampleCompute, "upsample.cs.hlsl", {} }, }; for (auto& info : shaderInfos) { @@ -523,14 +504,10 @@ void ScreenSpaceGI::CompileComputeShaders() info.defines.push_back({ "VR", "" }); if (settings.HalfRes) info.defines.push_back({ "HALF_RES", "" }); - if (settings.HalfRate) - info.defines.push_back({ "HALF_RATE", "" }); if (settings.EnableTemporalDenoiser) info.defines.push_back({ "TEMPORAL_DENOISER", "" }); if (settings.EnableGI) info.defines.push_back({ "GI", "" }); - if (settings.EnableSpecularGI) - info.defines.push_back({ "GI_SPECULAR", "" }); if (settings.EnableGIBounce) info.defines.push_back({ "GI_BOUNCE", "" }); } @@ -546,7 +523,7 @@ void ScreenSpaceGI::CompileComputeShaders() bool ScreenSpaceGI::ShadersOK() { - return texNoise && prefilterDepthsCompute && radianceDisoccCompute && giCompute && blurCompute && blurSpecularCompute && upsampleCompute; + return texNoise && prefilterDepthsCompute && radianceDisoccCompute && giCompute && blurCompute && upsampleCompute; } void ScreenSpaceGI::UpdateSB() @@ -613,18 +590,20 @@ void ScreenSpaceGI::DrawSSGI(Texture2D* srcPrevAmbient) auto& context = State::GetSingleton()->context; if (!(settings.Enabled && ShadersOK())) { - FLOAT clr[4] = { 0., 0., 0., 0. }; - context->ClearUnorderedAccessViewFloat(texGI[outputGIIdx]->uav.get(), clr); - context->ClearUnorderedAccessViewFloat(texGISpecular[outputGIIdx]->uav.get(), clr); - + FLOAT clr[4] = { 0.f, 0.f, 0.f, 0.f }; + context->ClearUnorderedAccessViewFloat(texAo[1]->uav.get(), clr); + context->ClearUnorderedAccessViewFloat(texIlY[outputIlIdx]->uav.get(), clr); + context->ClearUnorderedAccessViewFloat(texIlCoCg[outputIlIdx]->uav.get(), clr); return; } ZoneScoped; TracyD3D11Zone(State::GetSingleton()->tracyCtx, "SSGI"); + static uint lastFrameAoTexIdx = 0; static uint lastFrameGITexIdx = 0; static uint lastFrameAccumTexIdx = 0; + uint inputAoTexIdx = lastFrameAoTexIdx; uint inputGITexIdx = lastFrameGITexIdx; ////////////////////////////////////////////////////// @@ -634,8 +613,6 @@ void ScreenSpaceGI::DrawSSGI(Texture2D* srcPrevAmbient) UpdateSB(); - bool doSpecular = settings.EnableGI && settings.EnableSpecularGI; - ////////////////////////////////////////////////////// auto renderer = RE::BSGraphics::Renderer::GetSingleton(); @@ -647,7 +624,7 @@ void ScreenSpaceGI::DrawSSGI(Texture2D* srcPrevAmbient) uint halfRes[2] = { resolution[0] >> 1, resolution[1] >> 1 }; auto internalRes = settings.HalfRes ? halfRes : resolution; - std::array srvs = { nullptr }; + std::array srvs = { nullptr }; std::array uavs = { nullptr }; std::array samplers = { pointClampSampler.get(), linearClampSampler.get() }; auto cb = ssgiCB->CB(); @@ -685,19 +662,21 @@ void ScreenSpaceGI::DrawSSGI(Texture2D* srcPrevAmbient) resetViews(); srvs.at(0) = rts[deferred->forwardRenderTargets[0]].SRV; - srvs.at(1) = texGI[inputGITexIdx]->srv.get(); - srvs.at(2) = doSpecular ? texGISpecular[inputGITexIdx]->srv.get() : nullptr; - srvs.at(3) = texWorkingDepth->srv.get(); - srvs.at(4) = rts[NORMALROUGHNESS].SRV; - srvs.at(5) = texPrevGeo->srv.get(); - srvs.at(6) = rts[RE::RENDER_TARGET::kMOTION_VECTOR].SRV; - srvs.at(7) = srcPrevAmbient->srv.get(); - srvs.at(8) = texAccumFrames[lastFrameAccumTexIdx]->srv.get(); + srvs.at(1) = texWorkingDepth->srv.get(); + srvs.at(2) = rts[NORMALROUGHNESS].SRV; + srvs.at(3) = texPrevGeo->srv.get(); + srvs.at(4) = rts[RE::RENDER_TARGET::kMOTION_VECTOR].SRV; + srvs.at(5) = srcPrevAmbient->srv.get(); + srvs.at(6) = texAccumFrames[lastFrameAccumTexIdx]->srv.get(); + srvs.at(7) = texAo[inputAoTexIdx]->srv.get(); + srvs.at(8) = texIlY[inputGITexIdx]->srv.get(); + srvs.at(9) = texIlCoCg[inputGITexIdx]->srv.get(); uavs.at(0) = texRadiance->uav.get(); uavs.at(1) = texAccumFrames[!lastFrameAccumTexIdx]->uav.get(); - uavs.at(2) = texGI[!inputGITexIdx]->uav.get(); - uavs.at(3) = doSpecular ? texGISpecular[!inputGITexIdx]->uav.get() : nullptr; + uavs.at(2) = texAo[!inputAoTexIdx]->uav.get(); + uavs.at(3) = texIlY[!inputGITexIdx]->uav.get(); + uavs.at(4) = texIlCoCg[!inputGITexIdx]->uav.get(); context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); @@ -706,6 +685,7 @@ void ScreenSpaceGI::DrawSSGI(Texture2D* srcPrevAmbient) context->GenerateMips(texRadiance->srv.get()); + inputAoTexIdx = !inputAoTexIdx; inputGITexIdx = !inputGITexIdx; lastFrameAccumTexIdx = !lastFrameAccumTexIdx; } @@ -720,12 +700,13 @@ void ScreenSpaceGI::DrawSSGI(Texture2D* srcPrevAmbient) srvs.at(2) = texRadiance->srv.get(); srvs.at(3) = texNoise->srv.get(); srvs.at(4) = texAccumFrames[lastFrameAccumTexIdx]->srv.get(); - srvs.at(5) = texGI[inputGITexIdx]->srv.get(); - srvs.at(6) = texGISpecular[inputGITexIdx]->srv.get(); + srvs.at(5) = texAo[inputAoTexIdx]->srv.get(); + srvs.at(6) = texIlY[inputGITexIdx]->srv.get(); + srvs.at(7) = texIlCoCg[inputGITexIdx]->srv.get(); - uavs.at(0) = texGI[!inputGITexIdx]->uav.get(); - uavs.at(1) = texGISpecular[!inputGITexIdx]->uav.get(); - uavs.at(2) = nullptr; + uavs.at(0) = texAo[!inputAoTexIdx]->uav.get(); + uavs.at(1) = texIlY[!inputGITexIdx]->uav.get(); + uavs.at(2) = texIlCoCg[!inputGITexIdx]->uav.get(); uavs.at(3) = texPrevGeo->uav.get(); context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); @@ -733,82 +714,60 @@ void ScreenSpaceGI::DrawSSGI(Texture2D* srcPrevAmbient) context->CSSetShader(giCompute.get(), nullptr, 0); context->Dispatch((internalRes[0] + 7u) >> 3, (internalRes[1] + 7u) >> 3, 1); + inputAoTexIdx = !inputAoTexIdx; inputGITexIdx = !inputGITexIdx; lastFrameGITexIdx = inputGITexIdx; + lastFrameAoTexIdx = inputAoTexIdx; } // blur if (settings.EnableBlur) { - for (uint i = 0; i < settings.BlurPasses; i++) { - if (doSpecular) { - TracyD3D11Zone(State::GetSingleton()->tracyCtx, "SSGI - Specular Blur"); - resetViews(); - srvs.at(0) = texGISpecular[inputGITexIdx]->srv.get(); - srvs.at(1) = texAccumFrames[lastFrameAccumTexIdx]->srv.get(); - srvs.at(2) = texWorkingDepth->srv.get(); - srvs.at(3) = rts[NORMALROUGHNESS].SRV; - - uavs.at(0) = texGISpecular[!inputGITexIdx]->uav.get(); - uavs.at(1) = nullptr; - - context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); - context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); - context->CSSetShader(blurSpecularCompute.get(), nullptr, 0); - context->Dispatch((internalRes[0] + 7u) >> 3, (internalRes[1] + 7u) >> 3, 1); - } - - TracyD3D11Zone(State::GetSingleton()->tracyCtx, "SSGI - Diffuse Blur"); + TracyD3D11Zone(State::GetSingleton()->tracyCtx, "SSGI - Diffuse Blur"); - resetViews(); - srvs.at(0) = texGI[inputGITexIdx]->srv.get(); - srvs.at(1) = texAccumFrames[lastFrameAccumTexIdx]->srv.get(); - srvs.at(2) = texWorkingDepth->srv.get(); - srvs.at(3) = rts[NORMALROUGHNESS].SRV; + resetViews(); + srvs.at(0) = texWorkingDepth->srv.get(); + srvs.at(1) = rts[NORMALROUGHNESS].SRV; + srvs.at(2) = texAccumFrames[lastFrameAccumTexIdx]->srv.get(); + srvs.at(3) = texIlY[inputGITexIdx]->srv.get(); + srvs.at(4) = texIlCoCg[inputGITexIdx]->srv.get(); - uavs.at(0) = texGI[!inputGITexIdx]->uav.get(); - uavs.at(1) = texAccumFrames[!lastFrameAccumTexIdx]->uav.get(); + uavs.at(0) = texAccumFrames[!lastFrameAccumTexIdx]->uav.get(); + uavs.at(1) = texIlY[!inputGITexIdx]->uav.get(); + uavs.at(2) = texIlCoCg[!inputGITexIdx]->uav.get(); - context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); - context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); - context->CSSetShader(blurCompute.get(), nullptr, 0); - context->Dispatch((internalRes[0] + 7u) >> 3, (internalRes[1] + 7u) >> 3, 1); + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(blurCompute.get(), nullptr, 0); + context->Dispatch((internalRes[0] + 7u) >> 3, (internalRes[1] + 7u) >> 3, 1); - inputGITexIdx = !inputGITexIdx; - lastFrameGITexIdx = inputGITexIdx; - lastFrameAccumTexIdx = !lastFrameAccumTexIdx; - } + inputGITexIdx = !inputGITexIdx; + lastFrameGITexIdx = inputGITexIdx; + lastFrameAccumTexIdx = !lastFrameAccumTexIdx; } // upsasmple if (settings.HalfRes) { resetViews(); srvs.at(0) = texWorkingDepth->srv.get(); - srvs.at(1) = texGI[inputGITexIdx]->srv.get(); + srvs.at(1) = texAo[inputAoTexIdx]->srv.get(); + srvs.at(2) = texIlY[inputGITexIdx]->srv.get(); + srvs.at(3) = texIlCoCg[inputGITexIdx]->srv.get(); - uavs.at(0) = texGI[!inputGITexIdx]->uav.get(); + uavs.at(0) = texAo[!inputAoTexIdx]->uav.get(); + uavs.at(1) = texIlY[!inputGITexIdx]->uav.get(); + uavs.at(2) = texIlCoCg[!inputGITexIdx]->uav.get(); context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); context->CSSetShader(upsampleCompute.get(), nullptr, 0); context->Dispatch((resolution[0] + 7u) >> 3, (resolution[1] + 7u) >> 3, 1); - if (doSpecular) { - resetViews(); - srvs.at(0) = texWorkingDepth->srv.get(); - srvs.at(1) = texGISpecular[inputGITexIdx]->srv.get(); - - uavs.at(0) = texGISpecular[!inputGITexIdx]->uav.get(); - - context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); - context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); - context->CSSetShader(upsampleCompute.get(), nullptr, 0); - context->Dispatch((resolution[0] + 7u) >> 3, (resolution[1] + 7u) >> 3, 1); - } - + inputAoTexIdx = !inputAoTexIdx; inputGITexIdx = !inputGITexIdx; } - outputGIIdx = inputGITexIdx; + outputAoIdx = inputAoTexIdx; + outputIlIdx = inputGITexIdx; // cleanup resetViews(); diff --git a/src/Features/ScreenSpaceGI.h b/src/Features/ScreenSpaceGI.h index 8edbfa8f2..d963ebe53 100644 --- a/src/Features/ScreenSpaceGI.h +++ b/src/Features/ScreenSpaceGI.h @@ -40,21 +40,20 @@ struct ScreenSpaceGI : Feature ////////////////////////////////////////////////////////////////////////////////// bool recompileFlag = false; - uint outputGIIdx = 0; + uint outputAoIdx = 0; + uint outputIlIdx = 0; struct Settings { bool Enabled = true; bool EnableGI = true; - bool EnableSpecularGI = false; // performance/quality uint NumSlices = 2; uint NumSteps = 4; bool HalfRes = true; - bool HalfRate = true; // visual float MinScreenRadius = 0.01f; - float AORadius = 50.f; + float AORadius = 100.f; float GIRadius = 500.f; float Thickness = 75.f; float2 DepthFadeRange = { 4e4, 5e4 }; @@ -65,15 +64,14 @@ struct ScreenSpaceGI : Feature float GIDistanceCompensation = 0.f; // mix float AOPower = 2.f; - float GIStrength = 3.f; + float GIStrength = 1.f; // denoise bool EnableTemporalDenoiser = true; bool EnableBlur = true; float DepthDisocclusion = .03f; float NormalDisocclusion = .1f; - uint MaxAccumFrames = 16; - float BlurRadius = 15.f; - uint BlurPasses = 1; + uint MaxAccumFrames = 10; + float BlurRadius = 5.f; float DistanceNormalisation = 2.f; } settings; @@ -125,8 +123,16 @@ struct ScreenSpaceGI : Feature eastl::unique_ptr texPrevGeo = nullptr; eastl::unique_ptr texRadiance = nullptr; eastl::unique_ptr texAccumFrames[2] = { nullptr }; - eastl::unique_ptr texGI[2] = { nullptr }; - eastl::unique_ptr texGISpecular[2] = { nullptr }; + eastl::unique_ptr texAo[2] = { nullptr }; + eastl::unique_ptr texIlY[2] = { nullptr }; + eastl::unique_ptr texIlCoCg[2] = { nullptr }; + + inline auto GetOutputTextures() + { + return (loaded && settings.Enabled) ? + std::make_tuple(texAo[outputAoIdx]->srv.get(), texIlY[outputIlIdx]->srv.get(), texIlCoCg[outputIlIdx]->srv.get()) : + std::make_tuple(nullptr, nullptr, nullptr); + } winrt::com_ptr linearClampSampler = nullptr; winrt::com_ptr pointClampSampler = nullptr; @@ -135,6 +141,5 @@ struct ScreenSpaceGI : Feature winrt::com_ptr radianceDisoccCompute = nullptr; winrt::com_ptr giCompute = nullptr; winrt::com_ptr blurCompute = nullptr; - winrt::com_ptr blurSpecularCompute = nullptr; winrt::com_ptr upsampleCompute = nullptr; }; \ No newline at end of file