feat: refactor ssgi to use spherical harmonics (#837)

* chore: remove ao blur * chore: random changes * feat: add SH IL * feat: add back blur * feat: add back half res * fix: ao temporal denoise
doodlum · Dec 9, 2024 · 21f99f7 · 21f99f7
1 parent 27aa73a
commit 21f99f7
Show file tree

Hide file tree

Showing 13 changed files with 350 additions and 389 deletions.
diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl
@@ -8,13 +8,15 @@
 #include "Common/VR.hlsli"
 #include "ScreenSpaceGI/common.hlsli"
 
-Texture2D<float4> srcGI : register(t0);                // maybe half-res
-Texture2D<unorm float> srcAccumFrames : register(t1);  // maybe half-res
-Texture2D<half> srcDepth : register(t2);
-Texture2D<half4> srcNormalRoughness : register(t3);
+Texture2D<half> srcDepth : register(t0);
+Texture2D<half4> srcNormalRoughness : register(t1);
+Texture2D<unorm float> srcAccumFrames : register(t2);  // maybe half-res
+Texture2D<float4> srcIlY : register(t3);               // maybe half-res
+Texture2D<float2> srcIlCoCg : register(t4);            // maybe half-res
 
-RWTexture2D<float4> outGI : register(u0);
-RWTexture2D<unorm float> outAccumFrames : register(u1);
+RWTexture2D<unorm float> outAccumFrames : register(u0);
+RWTexture2D<float4> outIlY : register(u1);
+RWTexture2D<float2> outIlCoCg : register(u2);
 
 // samples = 8, min distance = 0.5, average samples on radius = 2
 static const float3 g_Poisson8[8] = {
@@ -73,6 +75,8 @@ float2x3 getKernelBasis(float3 D, float3 N, float roughness = 1.0, float anisoFa
 	return float2x3(T, B);
 }
 
+// TODO: spinning blur
+
 [numthreads(8, 8, 1)] void main(const uint2 dtid
 								: SV_DispatchThreadID) {
 	const float2 frameScale = FrameDim * RcpTexDim;
@@ -90,34 +94,28 @@ float2x3 getKernelBasis(float3 D, float3 N, float roughness = 1.0, float anisoFa
 
 	float depth = READ_DEPTH(srcDepth, dtid);
 	float3 pos = ScreenToViewPosition(screenPos, depth, eyeIndex);
-	float4 normalRoughness = FULLRES_LOAD(srcNormalRoughness, dtid, uv, samplerLinearClamp);
-	float3 normal = GBuffer::DecodeNormal(normalRoughness.xy);
-#ifdef SPECULAR_BLUR
-	float roughness = 1 - normalRoughness.z;
-#endif
+	float3 normal = GBuffer::DecodeNormal(FULLRES_LOAD(srcNormalRoughness, dtid, uv, samplerLinearClamp).xy);
 
 	const float2 pixelDirRBViewspaceSizeAtCenterZ = depth.xx * (eyeIndex == 0 ? NDCToViewMul.xy : NDCToViewMul.zw) * RCP_OUT_FRAME_DIM;
 	const float worldRadius = radius * pixelDirRBViewspaceSizeAtCenterZ.x;
-#ifdef SPECULAR_BLUR
-	float2x3 TvBv = getKernelBasis(getSpecularDominantDirection(normal, -normalize(pos), roughness), normal, roughness);
-	float halfAngle = specularLobeHalfAngle(roughness);
-#else
 	float2x3 TvBv = getKernelBasis(normal, normal);  // D = N
 	float halfAngle = Math::HALF_PI * .5f;
-#endif
+
 	TvBv[0] *= worldRadius;
 	TvBv[1] *= worldRadius;
 #ifdef TEMPORAL_DENOISER
 	halfAngle *= 1 - lerp(0, 0.8, sqrt(accumFrames / (float)MaxAccumFrames));
 #endif
 
-	float4 gi = srcGI[dtid];
+	const float4 ilY = srcIlY[dtid];
+	const float2 ilCoCg = srcIlCoCg[dtid];
 
-	float4 sum = gi;
-#if defined(TEMPORAL_DENOISER) && !defined(SPECULAR_BLUR)
-	float fsum = accumFrames;
+	float4 ySum = ilY;
+	float2 coCgSum = ilCoCg;
+#if defined(TEMPORAL_DENOISER)
+	float fSum = accumFrames;
 #endif
-	float wsum = 1;
+	float wSum = 1;
 	for (uint i = 0; i < numSamples; i++) {
 		float w = GaussianWeight(g_Poisson8[i].z);
 
@@ -146,30 +144,28 @@ float2x3 getKernelBasis(float3 D, float3 N, float roughness = 1.0, float anisoFa
 
 		float4 normalRoughnessSample = srcNormalRoughness.SampleLevel(samplerLinearClamp, uvSample * frameScale, 0);
 		float3 normalSample = GBuffer::DecodeNormal(normalRoughnessSample.xy);
-#ifdef SPECULAR_BLUR
-		float roughnessSample = 1 - normalRoughnessSample.z;
-#endif
-
-		float4 giSample = srcGI.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0);
 
 		// geometry weight
 		w *= saturate(1 - abs(dot(normal, posSample - pos)) * DistanceNormalisation);
 		// normal weight
 		w *= 1 - saturate(FastMath::acosFast4(saturate(dot(normalSample, normal))) / halfAngle);
-#ifdef SPECULAR_BLUR
-		// roughness weight
-		w *= abs(roughness - roughnessSample) / (roughness * roughness * 0.99 + 0.01);
-#endif
 
-		sum += giSample * w;
-#if defined(TEMPORAL_DENOISER) && !defined(SPECULAR_BLUR)
-		fsum += srcAccumFrames.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0) * w;
+		if (w > 1e-8) {
+			float4 ySample = srcIlY.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0);
+			float2 coCgSample = srcIlCoCg.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0);
+
+			ySum += ySample * w;
+			coCgSum += coCgSample * w;
+#if defined(TEMPORAL_DENOISER)
+			fSum += srcAccumFrames.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0) * w;
 #endif
-		wsum += w;
+			wSum += w;
+		}
 	}
 
-	outGI[dtid] = sum / wsum;
-#if defined(TEMPORAL_DENOISER) && !defined(SPECULAR_BLUR)
-	outAccumFrames[dtid] = fsum / wsum;
+	outIlY[dtid] = ySum / wSum;
+	outIlCoCg[dtid] = coCgSum / wSum;
+#if defined(TEMPORAL_DENOISER)
+	outAccumFrames[dtid] = fSum / wSum;
 #endif
 }
diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl
@@ -20,12 +20,17 @@
 // Screen Space Indirect Lighting with Visibility Bitmask
 //  https://arxiv.org/abs/2301.11376
 //
+// Exploring Raytraced Future in Metro Exodus
+//  https://developer.download.nvidia.com/video/gputechconf/gtc/2019/presentation/s9985-exploring-ray-traced-future-in-metro-exodus.pdf
+//
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+#include "Common/Color.hlsli"
 #include "Common/FastMath.hlsli"
 #include "Common/FrameBuffer.hlsli"
 #include "Common/GBuffer.hlsli"
 #include "Common/Math.hlsli"
+#include "Common/Spherical Harmonics/SphericalHarmonics.hlsli"
 #include "Common/VR.hlsli"
 #include "ScreenSpaceGI/common.hlsli"
 
@@ -36,12 +41,13 @@ Texture2D<float4> srcNormalRoughness : register(t1);
 Texture2D<float3> srcRadiance : register(t2);  // maybe half-res
 Texture2D<unorm float2> srcNoise : register(t3);
 Texture2D<unorm float> srcAccumFrames : register(t4);  // maybe half-res
-Texture2D<float4> srcPrevGI : register(t5);            // maybe half-res
-Texture2D<float4> srcPrevGISpecular : register(t6);    // maybe half-res
+Texture2D<float> srcPrevAo : register(t5);             // maybe half-res
+Texture2D<float4> srcPrevY : register(t6);             // maybe half-res
+Texture2D<float2> srcPrevCoCg : register(t7);          // maybe half-res
 
-RWTexture2D<float4> outGI : register(u0);
-RWTexture2D<float4> outGISpecular : register(u1);
-RWTexture2D<unorm float2> outBentNormal : register(u2);
+RWTexture2D<unorm float> outAo : register(u0);
+RWTexture2D<float4> outY : register(u1);
+RWTexture2D<float2> outCoCg : register(u2);
 RWTexture2D<half3> outPrevGeo : register(u3);
 
 float GetDepthFade(float depth)
@@ -79,7 +85,7 @@ float GetVisibilityFunctionSmithJointApprox(float roughness, float NdotV, float
 
 void CalculateGI(
 	uint2 dtid, float2 uv, float viewspaceZ, float3 viewspaceNormal,
-	out float4 o_currGIAO, out float4 o_currGIAOSpecular, out float3 o_bentNormal)
+	out float o_ao, out sh2 o_currY, out float2 o_currCoCg)
 {
 	const float2 frameScale = FrameDim * RcpTexDim;
 
@@ -112,14 +118,8 @@ void CalculateGI(
 	const float NoV = clamp(dot(viewVec, viewspaceNormal), 1e-5, 1);
 
 	float visibility = 0;
-	float visibilitySpecular = 0;
-	float3 radiance = 0;
-	float3 radianceSpecular = 0;
-	float3 bentNormal = viewspaceNormal;
-
-#ifdef GI_SPECULAR
-	const float roughness = max(0.2, saturate(1 - FULLRES_LOAD(srcNormalRoughness, dtid, uv * frameScale, samplerLinearClamp).z));  // can't handle low roughness
-#endif
+	float4 radianceY = 0;
+	float2 radianceCoCg = 0;
 
 	for (uint slice = 0; slice < NumSlices; slice++) {
 		float phi = (Math::PI * rcpNumSlices) * (slice + noiseSlice);
@@ -142,12 +142,6 @@ void CalculateGI(
 		uint bitmask = 0;
 #ifdef GI
 		uint bitmaskGI = 0;
-#	ifdef GI_SPECULAR
-		uint bitmaskGISpecular = 0;
-		float3 domVec = getSpecularDominantDirection(viewspaceNormal, viewVec, roughness);
-		float3 projectedDomVec = normalize(domVec - axisVec * dot(domVec, axisVec));
-		float nDom = sign(dot(orthoDirectionVec, projectedDomVec)) * FastMath::ACos(saturate(dot(projectedDomVec, viewVec)));
-#	endif
 #endif
 
 		// R1 sequence (http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/)
@@ -197,82 +191,44 @@ void CalculateGI(
 				float angleBackGI = FastMath::ACos(dot(sampleBackHorizonVecGI, viewVec));
 				float2 angleRangeGI = -sideSign * (sideSign == -1 ? float2(angleFront, angleBackGI) : float2(angleBackGI, angleFront));
 
-#	ifdef GI_SPECULAR
-				// thank u Olivier!
-				float coneHalfAngles = max(5e-2, specularLobeHalfAngle(roughness));  // not too small
-				float2 angleRangeSpecular = clamp((angleRangeGI + nDom) * 0.5 / coneHalfAngles, -1, 1) * 0.5 + 0.5;
-
-				// Experimental method using importance sampling
-				// https://agraphicsguynotes.com/posts/sample_microfacet_brdf/
-				// float2 angleRangeSpecular = angleBackGI;
-				// float2 specularSigns = sign(angleRangeSpecular);
-				// angleRangeSpecular = saturate(cos(angleRangeSpecular)) * (roughness2 - 1);
-				// angleRangeSpecular = roughness2 / (angleRangeSpecular * angleRangeSpecular + roughness2 - 1) - 1 / (roughness2 - 1);
-				// angleRangeSpecular = saturate((angleRangeSpecular * specularSigns) * 0.5 + 0.5);
-
-				uint2 bitsRangeGISpecular = uint2(round(angleRangeSpecular.x * 32u), round((angleRangeSpecular.y - angleRangeSpecular.x) * 32u));
-				uint maskedBitsGISpecular = s < GIRadius ? ((1 << bitsRangeGISpecular.y) - 1) << bitsRangeGISpecular.x : 0;
-#	endif
-
 				angleRangeGI = smoothstep(0, 1, (angleRangeGI + n) * RCP_PI + .5);  // https://discord.com/channels/586242553746030596/586245736413528082/1102228968247144570
 
 				uint2 bitsRangeGI = uint2(round(angleRangeGI.x * 32u), round((angleRangeGI.y - angleRangeGI.x) * 32u));
 				uint maskedBitsGI = s < GIRadius ? ((1 << bitsRangeGI.y) - 1) << bitsRangeGI.x : 0;
 
-				uint overlappedBits = maskedBitsGI & ~bitmaskGI;
-				bool checkGI = overlappedBits;
-#	ifdef GI_SPECULAR
-				uint overlappedBitsSpecular = maskedBitsGISpecular & ~bitmaskGISpecular;
-				checkGI = checkGI || overlappedBitsSpecular;
-#	endif
+				uint validBits = maskedBitsGI & ~bitmaskGI;
+				bool checkGI = validBits;
 
 				if (checkGI) {
-					float giBoost = 1 + GIDistanceCompensation * smoothstep(0, GICompensationMaxDist, s * EffectRadius);
+					float giBoost = 4.0 * Math::PI * (1 + GIDistanceCompensation * smoothstep(0, GICompensationMaxDist, s * EffectRadius));
 
 					// IL
 					float3 normalSample = GBuffer::DecodeNormal(srcNormalRoughness.SampleLevel(samplerPointClamp, sampleUV * frameScale, 0).xy);
 					float frontBackMult = saturate(-dot(normalSample, sampleHorizonVec));
 					frontBackMult = frontBackMult < 0 ? abs(frontBackMult) * BackfaceStrength : frontBackMult;  // backface
 
-					float NoL = clamp(dot(viewspaceNormal, sampleHorizonVec), 1e-5, 1);
+					if (frontBackMult > 0.f) {
+						float3 sampleHorizonVecWS = normalize(mul(FrameBuffer::CameraViewInverse[eyeIndex], half4(sampleHorizonVec, 0)).xyz);
 
-					if (frontBackMult > 0.f && NoL > 0.001f) {
 						float3 sampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * OUT_FRAME_SCALE, mipLevel).rgb * frontBackMult * giBoost;
+						sampleRadiance = max(sampleRadiance, 0);
+						float3 sampleRadianceYCoCg = Color::RGBToYCoCg(sampleRadiance);
 
-						float3 diffuseRadiance = sampleRadiance * countbits(overlappedBits) * 0.03125;  // 1/32
-						diffuseRadiance *= NoL;
-						diffuseRadiance = max(0, diffuseRadiance);
-
-						radiance += diffuseRadiance;
-
-#	ifdef GI_SPECULAR
-						float NoH = clamp(dot(viewspaceNormal, normalize(viewVec + sampleHorizonVec)), 1e-5, 1);
-
-						float3 specularRadiance = sampleRadiance * countbits(overlappedBitsSpecular) * 0.03125;  // 1/32
-						specularRadiance *= GetNormalDistributionFunctionGGX(roughness, NoH) * GetVisibilityFunctionSmithJointApprox(roughness, NoV, NoL);
-						specularRadiance = max(0, specularRadiance);
-
-						radianceSpecular += specularRadiance;
-#	endif
+						float bitmaskWeight = countbits(validBits) * 0.03125;
+						radianceY += sampleRadianceYCoCg.r * SphericalHarmonics::Evaluate(sampleHorizonVecWS) * bitmaskWeight;
+						radianceCoCg += sampleRadianceYCoCg.gb * bitmaskWeight;
 					}
 				}
 #endif  // GI
 
 				bitmask |= maskedBits;
 #ifdef GI
 				bitmaskGI |= maskedBitsGI;
-#	ifdef GI_SPECULAR
-				bitmaskGISpecular |= maskedBitsGISpecular;
-#	endif
 #endif
 			}
 		}
 
 		visibility += countbits(bitmask) * 0.03125;
-
-#if defined(GI) && defined(GI_SPECULAR)
-		visibilitySpecular += countbits(bitmaskGISpecular) * 0.03125;
-#endif
 	}
 
 	float depthFade = GetDepthFade(viewspaceZ);
@@ -282,42 +238,22 @@ void CalculateGI(
 	visibility = 1 - pow(abs(1 - visibility), AOPower);
 
 #ifdef GI
-	radiance *= rcpNumSlices;
-	radiance = lerp(radiance, 0, depthFade);
-#	ifdef GI_SPECULAR
-	radianceSpecular *= rcpNumSlices;
-	radianceSpecular = lerp(radianceSpecular, 0, depthFade);
-
-	visibilitySpecular *= rcpNumSlices;
-	visibilitySpecular = lerp(saturate(visibility), 0, depthFade);
-#	endif
-#endif
-
-#if !defined(GI) || !defined(GI_SPECULAR)
-	visibilitySpecular = 0.0;
-#endif
+	radianceY *= rcpNumSlices;
+	radianceY = lerp(radianceY, 0, depthFade);
 
-#ifdef BENT_NORMAL
-	bentNormal = normalize(bentNormal);
+	radianceCoCg *= rcpNumSlices;
 #endif
 
-	o_currGIAO = float4(radiance, visibility);
-	o_currGIAOSpecular = float4(radianceSpecular, visibilitySpecular);
-	o_bentNormal = bentNormal;
+	o_ao = visibility;
+	o_currY = radianceY;
+	o_currCoCg = radianceCoCg;
 }
 
 [numthreads(8, 8, 1)] void main(const uint2 dtid
 								: SV_DispatchThreadID) {
 	const float2 frameScale = FrameDim * RcpTexDim;
 
 	uint2 pxCoord = dtid;
-#if defined(HALF_RATE)
-	const uint halfWidth = uint(OUT_FRAME_DIM.x) >> 1;
-	const bool useHistory = dtid.x >= halfWidth;
-	pxCoord.x = (pxCoord.x % halfWidth) * 2 + (dtid.y + FrameIndex + useHistory) % 2;
-#else
-	const static bool useHistory = false;
-#endif
 
 	float2 uv = (pxCoord + .5) * RCP_OUT_FRAME_DIM;
 	uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv);
@@ -333,38 +269,27 @@ void CalculateGI(
 	// Move center pixel slightly towards camera to avoid imprecision artifacts due to depth buffer imprecision; offset depends on depth texture format used
 	viewspaceZ *= 0.99920h;  // this is good for FP16 depth buffer
 
-	float4 currGIAO = float4(0, 0, 0, 0);
-	float4 currGIAOSpecular = float4(0, 0, 0, 0);
-	float3 bentNormal = viewspaceNormal;
+	float currAo = 0;
+	float4 currY = 0;
+	float2 currCoCg = 0;
 
 	bool needGI = viewspaceZ > FP_Z && viewspaceZ < DepthFadeRange.y;
 	if (needGI) {
-		if (!useHistory)
-			CalculateGI(
-				pxCoord, uv, viewspaceZ, viewspaceNormal,
-				currGIAO, currGIAOSpecular, bentNormal);
+		CalculateGI(
+			pxCoord, uv, viewspaceZ, viewspaceNormal,
+			currAo, currY, currCoCg);
 
 #ifdef TEMPORAL_DENOISER
 		float lerpFactor = rcp(srcAccumFrames[pxCoord] * 255);
-#	if defined(HALF_RATE)
-		if (useHistory && lerpFactor != 1)
-			lerpFactor = 0;
-#	endif
-
-		currGIAO = lerp(srcPrevGI[pxCoord], currGIAO, lerpFactor);
-#	ifdef GI_SPECULAR
-		currGIAOSpecular = lerp(srcPrevGISpecular[pxCoord], currGIAOSpecular, lerpFactor);
-#	endif
+
+		currY = lerp(srcPrevY[pxCoord], currY, lerpFactor);
+		currCoCg = lerp(srcPrevCoCg[pxCoord], currCoCg, lerpFactor);
 #endif
 	}
-	currGIAO = any(ISNAN(currGIAO)) ? float4(0, 0, 0, 0) : currGIAO;
-	currGIAOSpecular = any(ISNAN(currGIAOSpecular)) ? float4(0, 0, 0, 0) : currGIAOSpecular;
+	currY = any(ISNAN(currY)) ? 0 : currY;
+	currCoCg = any(ISNAN(currCoCg)) ? 0 : currCoCg;
 
-	outGI[pxCoord] = currGIAO;
-#ifdef GI_SPECULAR
-	outGISpecular[pxCoord] = currGIAOSpecular;
-#endif
-#ifdef BENT_NORMAL
-	outBentNormal[pxCoord] = GBuffer::EncodeNormal(bentNormal);
-#endif
+	outAo[pxCoord] = currAo;
+	outY[pxCoord] = currY;
+	outCoCg[pxCoord] = currCoCg;
 }