Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: refactor ssgi to use spherical harmonics #837

Merged
merged 6 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 33 additions & 37 deletions features/Screen Space GI/Shaders/ScreenSpaceGI/blur.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
#include "Common/VR.hlsli"
#include "ScreenSpaceGI/common.hlsli"

Texture2D<float4> srcGI : register(t0); // maybe half-res
Texture2D<unorm float> srcAccumFrames : register(t1); // maybe half-res
Texture2D<half> srcDepth : register(t2);
Texture2D<half4> srcNormalRoughness : register(t3);
Texture2D<half> srcDepth : register(t0);
Texture2D<half4> srcNormalRoughness : register(t1);
Texture2D<unorm float> srcAccumFrames : register(t2); // maybe half-res
Texture2D<float4> srcIlY : register(t3); // maybe half-res
Texture2D<float2> srcIlCoCg : register(t4); // maybe half-res

RWTexture2D<float4> outGI : register(u0);
RWTexture2D<unorm float> outAccumFrames : register(u1);
RWTexture2D<unorm float> outAccumFrames : register(u0);
RWTexture2D<float4> outIlY : register(u1);
RWTexture2D<float2> outIlCoCg : register(u2);

// samples = 8, min distance = 0.5, average samples on radius = 2
static const float3 g_Poisson8[8] = {
Expand Down Expand Up @@ -73,6 +75,8 @@ float2x3 getKernelBasis(float3 D, float3 N, float roughness = 1.0, float anisoFa
return float2x3(T, B);
}

// TODO: spinning blur

[numthreads(8, 8, 1)] void main(const uint2 dtid
: SV_DispatchThreadID) {
const float2 frameScale = FrameDim * RcpTexDim;
Expand All @@ -90,34 +94,28 @@ float2x3 getKernelBasis(float3 D, float3 N, float roughness = 1.0, float anisoFa

float depth = READ_DEPTH(srcDepth, dtid);
float3 pos = ScreenToViewPosition(screenPos, depth, eyeIndex);
float4 normalRoughness = FULLRES_LOAD(srcNormalRoughness, dtid, uv, samplerLinearClamp);
float3 normal = GBuffer::DecodeNormal(normalRoughness.xy);
#ifdef SPECULAR_BLUR
float roughness = 1 - normalRoughness.z;
#endif
float3 normal = GBuffer::DecodeNormal(FULLRES_LOAD(srcNormalRoughness, dtid, uv, samplerLinearClamp).xy);

const float2 pixelDirRBViewspaceSizeAtCenterZ = depth.xx * (eyeIndex == 0 ? NDCToViewMul.xy : NDCToViewMul.zw) * RCP_OUT_FRAME_DIM;
const float worldRadius = radius * pixelDirRBViewspaceSizeAtCenterZ.x;
#ifdef SPECULAR_BLUR
float2x3 TvBv = getKernelBasis(getSpecularDominantDirection(normal, -normalize(pos), roughness), normal, roughness);
float halfAngle = specularLobeHalfAngle(roughness);
#else
float2x3 TvBv = getKernelBasis(normal, normal); // D = N
float halfAngle = Math::HALF_PI * .5f;
#endif

TvBv[0] *= worldRadius;
TvBv[1] *= worldRadius;
#ifdef TEMPORAL_DENOISER
halfAngle *= 1 - lerp(0, 0.8, sqrt(accumFrames / (float)MaxAccumFrames));
#endif

float4 gi = srcGI[dtid];
const float4 ilY = srcIlY[dtid];
const float2 ilCoCg = srcIlCoCg[dtid];

float4 sum = gi;
#if defined(TEMPORAL_DENOISER) && !defined(SPECULAR_BLUR)
float fsum = accumFrames;
float4 ySum = ilY;
float2 coCgSum = ilCoCg;
#if defined(TEMPORAL_DENOISER)
float fSum = accumFrames;
#endif
float wsum = 1;
float wSum = 1;
for (uint i = 0; i < numSamples; i++) {
float w = GaussianWeight(g_Poisson8[i].z);

Expand Down Expand Up @@ -146,30 +144,28 @@ float2x3 getKernelBasis(float3 D, float3 N, float roughness = 1.0, float anisoFa

float4 normalRoughnessSample = srcNormalRoughness.SampleLevel(samplerLinearClamp, uvSample * frameScale, 0);
float3 normalSample = GBuffer::DecodeNormal(normalRoughnessSample.xy);
#ifdef SPECULAR_BLUR
float roughnessSample = 1 - normalRoughnessSample.z;
#endif

float4 giSample = srcGI.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0);

// geometry weight
w *= saturate(1 - abs(dot(normal, posSample - pos)) * DistanceNormalisation);
// normal weight
w *= 1 - saturate(FastMath::acosFast4(saturate(dot(normalSample, normal))) / halfAngle);
#ifdef SPECULAR_BLUR
// roughness weight
w *= abs(roughness - roughnessSample) / (roughness * roughness * 0.99 + 0.01);
#endif

sum += giSample * w;
#if defined(TEMPORAL_DENOISER) && !defined(SPECULAR_BLUR)
fsum += srcAccumFrames.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0) * w;
if (w > 1e-8) {
float4 ySample = srcIlY.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0);
float2 coCgSample = srcIlCoCg.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0);

ySum += ySample * w;
coCgSum += coCgSample * w;
#if defined(TEMPORAL_DENOISER)
fSum += srcAccumFrames.SampleLevel(samplerLinearClamp, uvSample * OUT_FRAME_SCALE, 0) * w;
#endif
wsum += w;
wSum += w;
}
}

outGI[dtid] = sum / wsum;
#if defined(TEMPORAL_DENOISER) && !defined(SPECULAR_BLUR)
outAccumFrames[dtid] = fsum / wsum;
outIlY[dtid] = ySum / wSum;
outIlCoCg[dtid] = coCgSum / wSum;
#if defined(TEMPORAL_DENOISER)
outAccumFrames[dtid] = fSum / wSum;
#endif
}
163 changes: 44 additions & 119 deletions features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@
// Screen Space Indirect Lighting with Visibility Bitmask
// https://arxiv.org/abs/2301.11376
//
// Exploring Raytraced Future in Metro Exodus
// https://developer.download.nvidia.com/video/gputechconf/gtc/2019/presentation/s9985-exploring-ray-traced-future-in-metro-exodus.pdf
//
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

#include "Common/Color.hlsli"
#include "Common/FastMath.hlsli"
#include "Common/FrameBuffer.hlsli"
#include "Common/GBuffer.hlsli"
#include "Common/Math.hlsli"
#include "Common/Spherical Harmonics/SphericalHarmonics.hlsli"
#include "Common/VR.hlsli"
#include "ScreenSpaceGI/common.hlsli"

Expand All @@ -36,12 +41,13 @@ Texture2D<float4> srcNormalRoughness : register(t1);
Texture2D<float3> srcRadiance : register(t2); // maybe half-res
Texture2D<unorm float2> srcNoise : register(t3);
Texture2D<unorm float> srcAccumFrames : register(t4); // maybe half-res
Texture2D<float4> srcPrevGI : register(t5); // maybe half-res
Texture2D<float4> srcPrevGISpecular : register(t6); // maybe half-res
Texture2D<float> srcPrevAo : register(t5); // maybe half-res
Texture2D<float4> srcPrevY : register(t6); // maybe half-res
Texture2D<float2> srcPrevCoCg : register(t7); // maybe half-res

RWTexture2D<float4> outGI : register(u0);
RWTexture2D<float4> outGISpecular : register(u1);
RWTexture2D<unorm float2> outBentNormal : register(u2);
RWTexture2D<unorm float> outAo : register(u0);
RWTexture2D<float4> outY : register(u1);
RWTexture2D<float2> outCoCg : register(u2);
RWTexture2D<half3> outPrevGeo : register(u3);

float GetDepthFade(float depth)
Expand Down Expand Up @@ -79,7 +85,7 @@ float GetVisibilityFunctionSmithJointApprox(float roughness, float NdotV, float

void CalculateGI(
uint2 dtid, float2 uv, float viewspaceZ, float3 viewspaceNormal,
out float4 o_currGIAO, out float4 o_currGIAOSpecular, out float3 o_bentNormal)
out float o_ao, out sh2 o_currY, out float2 o_currCoCg)
{
const float2 frameScale = FrameDim * RcpTexDim;

Expand Down Expand Up @@ -112,14 +118,8 @@ void CalculateGI(
const float NoV = clamp(dot(viewVec, viewspaceNormal), 1e-5, 1);

float visibility = 0;
float visibilitySpecular = 0;
float3 radiance = 0;
float3 radianceSpecular = 0;
float3 bentNormal = viewspaceNormal;

#ifdef GI_SPECULAR
const float roughness = max(0.2, saturate(1 - FULLRES_LOAD(srcNormalRoughness, dtid, uv * frameScale, samplerLinearClamp).z)); // can't handle low roughness
#endif
float4 radianceY = 0;
float2 radianceCoCg = 0;

for (uint slice = 0; slice < NumSlices; slice++) {
float phi = (Math::PI * rcpNumSlices) * (slice + noiseSlice);
Expand All @@ -142,12 +142,6 @@ void CalculateGI(
uint bitmask = 0;
#ifdef GI
uint bitmaskGI = 0;
# ifdef GI_SPECULAR
uint bitmaskGISpecular = 0;
float3 domVec = getSpecularDominantDirection(viewspaceNormal, viewVec, roughness);
float3 projectedDomVec = normalize(domVec - axisVec * dot(domVec, axisVec));
float nDom = sign(dot(orthoDirectionVec, projectedDomVec)) * FastMath::ACos(saturate(dot(projectedDomVec, viewVec)));
# endif
#endif

// R1 sequence (http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/)
Expand Down Expand Up @@ -197,82 +191,44 @@ void CalculateGI(
float angleBackGI = FastMath::ACos(dot(sampleBackHorizonVecGI, viewVec));
float2 angleRangeGI = -sideSign * (sideSign == -1 ? float2(angleFront, angleBackGI) : float2(angleBackGI, angleFront));

# ifdef GI_SPECULAR
// thank u Olivier!
float coneHalfAngles = max(5e-2, specularLobeHalfAngle(roughness)); // not too small
float2 angleRangeSpecular = clamp((angleRangeGI + nDom) * 0.5 / coneHalfAngles, -1, 1) * 0.5 + 0.5;

// Experimental method using importance sampling
// https://agraphicsguynotes.com/posts/sample_microfacet_brdf/
// float2 angleRangeSpecular = angleBackGI;
// float2 specularSigns = sign(angleRangeSpecular);
// angleRangeSpecular = saturate(cos(angleRangeSpecular)) * (roughness2 - 1);
// angleRangeSpecular = roughness2 / (angleRangeSpecular * angleRangeSpecular + roughness2 - 1) - 1 / (roughness2 - 1);
// angleRangeSpecular = saturate((angleRangeSpecular * specularSigns) * 0.5 + 0.5);

uint2 bitsRangeGISpecular = uint2(round(angleRangeSpecular.x * 32u), round((angleRangeSpecular.y - angleRangeSpecular.x) * 32u));
uint maskedBitsGISpecular = s < GIRadius ? ((1 << bitsRangeGISpecular.y) - 1) << bitsRangeGISpecular.x : 0;
# endif

angleRangeGI = smoothstep(0, 1, (angleRangeGI + n) * RCP_PI + .5); // https://discord.com/channels/586242553746030596/586245736413528082/1102228968247144570

uint2 bitsRangeGI = uint2(round(angleRangeGI.x * 32u), round((angleRangeGI.y - angleRangeGI.x) * 32u));
uint maskedBitsGI = s < GIRadius ? ((1 << bitsRangeGI.y) - 1) << bitsRangeGI.x : 0;

uint overlappedBits = maskedBitsGI & ~bitmaskGI;
bool checkGI = overlappedBits;
# ifdef GI_SPECULAR
uint overlappedBitsSpecular = maskedBitsGISpecular & ~bitmaskGISpecular;
checkGI = checkGI || overlappedBitsSpecular;
# endif
uint validBits = maskedBitsGI & ~bitmaskGI;
bool checkGI = validBits;

if (checkGI) {
float giBoost = 1 + GIDistanceCompensation * smoothstep(0, GICompensationMaxDist, s * EffectRadius);
float giBoost = 4.0 * Math::PI * (1 + GIDistanceCompensation * smoothstep(0, GICompensationMaxDist, s * EffectRadius));

// IL
float3 normalSample = GBuffer::DecodeNormal(srcNormalRoughness.SampleLevel(samplerPointClamp, sampleUV * frameScale, 0).xy);
float frontBackMult = saturate(-dot(normalSample, sampleHorizonVec));
frontBackMult = frontBackMult < 0 ? abs(frontBackMult) * BackfaceStrength : frontBackMult; // backface

float NoL = clamp(dot(viewspaceNormal, sampleHorizonVec), 1e-5, 1);
if (frontBackMult > 0.f) {
float3 sampleHorizonVecWS = normalize(mul(FrameBuffer::CameraViewInverse[eyeIndex], half4(sampleHorizonVec, 0)).xyz);

if (frontBackMult > 0.f && NoL > 0.001f) {
float3 sampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * OUT_FRAME_SCALE, mipLevel).rgb * frontBackMult * giBoost;
sampleRadiance = max(sampleRadiance, 0);
float3 sampleRadianceYCoCg = Color::RGBToYCoCg(sampleRadiance);

float3 diffuseRadiance = sampleRadiance * countbits(overlappedBits) * 0.03125; // 1/32
diffuseRadiance *= NoL;
diffuseRadiance = max(0, diffuseRadiance);

radiance += diffuseRadiance;

# ifdef GI_SPECULAR
float NoH = clamp(dot(viewspaceNormal, normalize(viewVec + sampleHorizonVec)), 1e-5, 1);

float3 specularRadiance = sampleRadiance * countbits(overlappedBitsSpecular) * 0.03125; // 1/32
specularRadiance *= GetNormalDistributionFunctionGGX(roughness, NoH) * GetVisibilityFunctionSmithJointApprox(roughness, NoV, NoL);
specularRadiance = max(0, specularRadiance);

radianceSpecular += specularRadiance;
# endif
float bitmaskWeight = countbits(validBits) * 0.03125;
radianceY += sampleRadianceYCoCg.r * SphericalHarmonics::Evaluate(sampleHorizonVecWS) * bitmaskWeight;
radianceCoCg += sampleRadianceYCoCg.gb * bitmaskWeight;
}
}
#endif // GI

bitmask |= maskedBits;
#ifdef GI
bitmaskGI |= maskedBitsGI;
# ifdef GI_SPECULAR
bitmaskGISpecular |= maskedBitsGISpecular;
# endif
#endif
}
}

visibility += countbits(bitmask) * 0.03125;

#if defined(GI) && defined(GI_SPECULAR)
visibilitySpecular += countbits(bitmaskGISpecular) * 0.03125;
#endif
}

float depthFade = GetDepthFade(viewspaceZ);
Expand All @@ -282,42 +238,22 @@ void CalculateGI(
visibility = 1 - pow(abs(1 - visibility), AOPower);

#ifdef GI
radiance *= rcpNumSlices;
radiance = lerp(radiance, 0, depthFade);
# ifdef GI_SPECULAR
radianceSpecular *= rcpNumSlices;
radianceSpecular = lerp(radianceSpecular, 0, depthFade);

visibilitySpecular *= rcpNumSlices;
visibilitySpecular = lerp(saturate(visibility), 0, depthFade);
# endif
#endif

#if !defined(GI) || !defined(GI_SPECULAR)
visibilitySpecular = 0.0;
#endif
radianceY *= rcpNumSlices;
radianceY = lerp(radianceY, 0, depthFade);

#ifdef BENT_NORMAL
bentNormal = normalize(bentNormal);
radianceCoCg *= rcpNumSlices;
#endif

o_currGIAO = float4(radiance, visibility);
o_currGIAOSpecular = float4(radianceSpecular, visibilitySpecular);
o_bentNormal = bentNormal;
o_ao = visibility;
o_currY = radianceY;
o_currCoCg = radianceCoCg;
}

[numthreads(8, 8, 1)] void main(const uint2 dtid
: SV_DispatchThreadID) {
const float2 frameScale = FrameDim * RcpTexDim;

uint2 pxCoord = dtid;
#if defined(HALF_RATE)
const uint halfWidth = uint(OUT_FRAME_DIM.x) >> 1;
const bool useHistory = dtid.x >= halfWidth;
pxCoord.x = (pxCoord.x % halfWidth) * 2 + (dtid.y + FrameIndex + useHistory) % 2;
#else
const static bool useHistory = false;
#endif

float2 uv = (pxCoord + .5) * RCP_OUT_FRAME_DIM;
uint eyeIndex = Stereo::GetEyeIndexFromTexCoord(uv);
Expand All @@ -333,38 +269,27 @@ void CalculateGI(
// Move center pixel slightly towards camera to avoid imprecision artifacts due to depth buffer imprecision; offset depends on depth texture format used
viewspaceZ *= 0.99920h; // this is good for FP16 depth buffer

float4 currGIAO = float4(0, 0, 0, 0);
float4 currGIAOSpecular = float4(0, 0, 0, 0);
float3 bentNormal = viewspaceNormal;
float currAo = 0;
float4 currY = 0;
float2 currCoCg = 0;

bool needGI = viewspaceZ > FP_Z && viewspaceZ < DepthFadeRange.y;
if (needGI) {
if (!useHistory)
CalculateGI(
pxCoord, uv, viewspaceZ, viewspaceNormal,
currGIAO, currGIAOSpecular, bentNormal);
CalculateGI(
pxCoord, uv, viewspaceZ, viewspaceNormal,
currAo, currY, currCoCg);

#ifdef TEMPORAL_DENOISER
float lerpFactor = rcp(srcAccumFrames[pxCoord] * 255);
# if defined(HALF_RATE)
if (useHistory && lerpFactor != 1)
lerpFactor = 0;
# endif

currGIAO = lerp(srcPrevGI[pxCoord], currGIAO, lerpFactor);
# ifdef GI_SPECULAR
currGIAOSpecular = lerp(srcPrevGISpecular[pxCoord], currGIAOSpecular, lerpFactor);
# endif

currY = lerp(srcPrevY[pxCoord], currY, lerpFactor);
currCoCg = lerp(srcPrevCoCg[pxCoord], currCoCg, lerpFactor);
#endif
}
currGIAO = any(ISNAN(currGIAO)) ? float4(0, 0, 0, 0) : currGIAO;
currGIAOSpecular = any(ISNAN(currGIAOSpecular)) ? float4(0, 0, 0, 0) : currGIAOSpecular;
currY = any(ISNAN(currY)) ? 0 : currY;
currCoCg = any(ISNAN(currCoCg)) ? 0 : currCoCg;

outGI[pxCoord] = currGIAO;
#ifdef GI_SPECULAR
outGISpecular[pxCoord] = currGIAOSpecular;
#endif
#ifdef BENT_NORMAL
outBentNormal[pxCoord] = GBuffer::EncodeNormal(bentNormal);
#endif
outAo[pxCoord] = currAo;
outY[pxCoord] = currY;
outCoCg[pxCoord] = currCoCg;
}
Loading
Loading