diff --git a/.gitmodules b/.gitmodules index 6de574f60..0b21e2f6a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "extern/CommonLibSSE-NG"] path = extern/CommonLibSSE-NG url = https://github.com/alandtse/CommonLibVR.git +[submodule "extern/NVAPI"] + path = extern/NVAPI + url = https://github.com/NVIDIA/nvapi.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 36afac2ae..fe81cf487 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,11 +40,16 @@ find_package(pystring CONFIG REQUIRED) find_package(cppwinrt CONFIG REQUIRED) find_package(unordered_dense CONFIG REQUIRED) find_package(efsw CONFIG REQUIRED) + +set(NVAPI_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/extern/nvapi/" CACHE STRING "Path to NVAPI include headers/shaders" ) +set(NVAPI_LIBRARY "${CMAKE_SOURCE_DIR}/extern/nvapi/amd64/nvapi64.lib" CACHE STRING "Path to NVAPI .lib file") + target_include_directories( ${PROJECT_NAME} PRIVATE ${BSHOSHANY_THREAD_POOL_INCLUDE_DIRS} ${CLIB_UTIL_INCLUDE_DIRS} + ${NVAPI_INCLUDE_DIR} ) target_link_libraries( @@ -63,6 +68,7 @@ target_link_libraries( pystring::pystring unordered_dense::unordered_dense efsw::efsw + ${NVAPI_LIBRARY} ) # https://gitlab.kitware.com/cmake/cmake/-/issues/24922#note_1371990 diff --git a/extern/CommonLibSSE-NG b/extern/CommonLibSSE-NG index c5d4f170b..e83b23bdd 160000 --- a/extern/CommonLibSSE-NG +++ b/extern/CommonLibSSE-NG @@ -1 +1 @@ -Subproject commit c5d4f170b7d37658d6988f201aaabaa973695e4a +Subproject commit e83b23bddb893d44b8f3cf3e6379b9087ad25cc9 diff --git a/extern/NVAPI b/extern/NVAPI new file mode 160000 index 000000000..3a83ef27e --- /dev/null +++ b/extern/NVAPI @@ -0,0 +1 @@ +Subproject commit 3a83ef27ed8bd1a273cb667d3425b68c3c5f2933 diff --git a/features/Cloud Shadows/Shaders/CloudShadows/CloudShadows.hlsli b/features/Cloud Shadows/Shaders/CloudShadows/output.cs.hlsl similarity index 52% rename from features/Cloud Shadows/Shaders/CloudShadows/CloudShadows.hlsli rename to features/Cloud Shadows/Shaders/CloudShadows/output.cs.hlsl index c5229ae66..774f097ba 100644 --- a/features/Cloud Shadows/Shaders/CloudShadows/CloudShadows.hlsli +++ b/features/Cloud Shadows/Shaders/CloudShadows/output.cs.hlsl @@ -1,20 +1,23 @@ +#include "../Common/DeferredShared.hlsli" +#include "../Common/VR.hlsli" + struct PerPassCloudShadow { uint EnableCloudShadows; - float CloudHeight; float PlanetRadius; - float EffectMix; - float TransparencyPower; - float AbsorptionAmbient; - float RcpHPlusR; }; -StructuredBuffer perPassCloudShadow : register(t23); -TextureCube cloudShadows : register(t40); +StructuredBuffer perPassCloudShadow : register(t0); +TextureCube cloudShadows : register(t1); +Texture2D TexDepth : register(t2); + +RWTexture2D RWTexShadowMask : register(u0); + +SamplerState defaultSampler; float3 getCloudShadowSampleDir(float3 rel_pos, float3 eye_to_sun) { @@ -38,13 +41,40 @@ float3 getCloudShadowSampleDirFlatEarth(float3 rel_pos, float3 eye_to_sun) return v; } -float3 getCloudShadowMult(float3 rel_pos, float3 eye_to_sun, SamplerState samp) +float3 getCloudShadowMult(float3 rel_pos, float3 eye_to_sun) { // float3 cloudSampleDir = getCloudShadowSampleDirFlatEarth(rel_pos, eye_to_sun).xyz; float3 cloudSampleDir = getCloudShadowSampleDir(rel_pos, eye_to_sun).xyz; - float4 cloudCubeSample = cloudShadows.Sample(samp, cloudSampleDir); + float4 cloudCubeSample = cloudShadows.SampleLevel(defaultSampler, cloudSampleDir, 0); // TODO Sample in pixel shader float alpha = pow(saturate(cloudCubeSample.w), perPassCloudShadow[0].TransparencyPower); return lerp(1.0, 1.0 - alpha, perPassCloudShadow[0].EffectMix); +} + +[numthreads(32, 32, 1)] void main(uint2 dtid + : SV_DispatchThreadID) { + float2 uv = (dtid + .5) * RcpBufferDim; +#ifdef VR + const uint eyeIndex = uv > .5; +#else + const uint eyeIndex = 0; +#endif + + float3 ndc = float3(ConvertToStereoUV(uv, eyeIndex), 1); + ndc = ndc * 2 - 1; + ndc.y = -ndc.y; + ndc.z = TexDepth[dtid]; + + if (ndc.z > 0.9999) + return; + + float4 worldPos = mul(InvViewMatrix[eyeIndex], mul(InvProjMatrix[eyeIndex], float4(ndc, 1))); + worldPos.xyz /= worldPos.w; + + float3 dirLightDirWS = mul((float3x3)InvViewMatrix[eyeIndex], DirLightDirectionVS[eyeIndex].xyz); + float cloudShadow = getCloudShadowMult(worldPos.xyz, dirLightDirWS); + + half shadow = RWTexShadowMask[dtid]; + RWTexShadowMask[dtid] = shadow * cloudShadow; } \ No newline at end of file diff --git a/features/Dynamic Cubemaps/Shaders/DynamicCubemaps/DynamicCubemaps.hlsli b/features/Dynamic Cubemaps/Shaders/DynamicCubemaps/DynamicCubemaps.hlsli index 628c60e2b..f2fa58d20 100644 --- a/features/Dynamic Cubemaps/Shaders/DynamicCubemaps/DynamicCubemaps.hlsli +++ b/features/Dynamic Cubemaps/Shaders/DynamicCubemaps/DynamicCubemaps.hlsli @@ -31,7 +31,7 @@ float3 GetDynamicCubemap(float2 uv, float3 N, float3 VN, float3 V, float roughne float level = roughness * 9.0; - float3 specularIrradiance = specularTexture.SampleLevel(SampColorSampler, R, level); + float3 specularIrradiance = specularTexture.SampleLevel(SampColorSampler, R, level).xyz; specularIrradiance = sRGB2Lin(specularIrradiance); diffuseColor = sRGB2Lin(diffuseColor); @@ -56,7 +56,7 @@ float3 GetDynamicCubemapFresnel(float2 uv, float3 N, float3 VN, float3 V, float float2 specularBRDF = EnvBRDFApprox(roughness, NoV); if (specularBRDF.y > 0.001) { float3 R = reflect(-V, N); - float3 specularIrradiance = specularTexture.SampleLevel(SampColorSampler, R, level); + float3 specularIrradiance = specularTexture.SampleLevel(SampColorSampler, R, level).xyz; // Horizon specular occlusion // https://marmosetco.tumblr.com/post/81245981087 diff --git a/features/Grass Lighting/Shaders/RunGrass.hlsl b/features/Grass Lighting/Shaders/RunGrass.hlsl index dc76f0621..d51b9c9c9 100644 --- a/features/Grass Lighting/Shaders/RunGrass.hlsl +++ b/features/Grass Lighting/Shaders/RunGrass.hlsl @@ -1,5 +1,6 @@ #include "Common/Color.hlsl" #include "Common/FrameBuffer.hlsl" +#include "Common/GBuffer.hlsli" #include "Common/LightingData.hlsl" #include "Common/MotionBlur.hlsl" @@ -101,15 +102,12 @@ cbuffer PerFrame : register( #endif ) { - row_major float3x4 DirectionalAmbient; float SunlightScale; float Glossiness; float SpecularStrength; float SubsurfaceScatteringAmount; - bool EnableDirLightFix; bool OverrideComplexGrassSettings; float BasicGrassBrightness; - float pad[1]; } #ifdef VSHADER @@ -214,7 +212,7 @@ VS_OUTPUT main(VS_INPUT input) // Vertex normal needs to be transformed to world-space for lighting calculations. vsout.VertexNormal.xyz = mul(world3x3, input.Normal.xyz * 2.0 - 1.0); vsout.SphereNormal.xyz = mul(world3x3, normalize(input.Position.xyz)); - vsout.SphereNormal.w = input.Color.w; + vsout.SphereNormal.w = saturate(input.Color.w); return vsout; } @@ -227,9 +225,12 @@ struct PS_OUTPUT #if defined(RENDER_DEPTH) float4 PS : SV_Target0; #else - float4 Albedo : SV_Target0; + float4 Diffuse : SV_Target0; float2 MotionVectors : SV_Target1; - float4 Normal : SV_Target2; + float4 NormalGlossiness : SV_Target2; + float4 Albedo : SV_Target3; + float4 Specular : SV_Target4; + float4 Masks : SV_Target6; #endif // RENDER_DEPTH }; @@ -254,16 +255,6 @@ cbuffer AlphaTestRefCB : register(b11) } # endif // VR -float GetSoftLightMultiplier(float angle, float strength) -{ - float softLightParam = saturate((strength + angle) / (1 + strength)); - float arg1 = (softLightParam * softLightParam) * (3 - 2 * softLightParam); - float clampedAngle = saturate(angle); - float arg2 = (clampedAngle * clampedAngle) * (3 - 2 * clampedAngle); - float softLigtMul = saturate(arg1 - arg2); - return softLigtMul; -} - float3 GetLightSpecularInput(float3 L, float3 V, float3 N, float3 lightColor, float shininess) { float3 H = normalize(V + L); @@ -298,10 +289,6 @@ float3x3 CalculateTBN(float3 N, float3 p, float2 uv) return float3x3(T * invmax, B * invmax, N); } -# if defined(SCREEN_SPACE_SHADOWS) -# include "ScreenSpaceShadows/ShadowsPS.hlsli" -# endif - # if defined(LIGHT_LIMIT_FIX) # include "LightLimitFix/LightLimitFix.hlsli" # endif @@ -313,10 +300,6 @@ float3x3 CalculateTBN(float3 N, float3 p, float2 uv) # include "DynamicCubemaps/DynamicCubemaps.hlsli" # endif -# if defined(CLOUD_SHADOWS) -# include "CloudShadows/CloudShadows.hlsli" -# endif - PS_OUTPUT main(PS_INPUT input, bool frontFace : SV_IsFrontFace) { @@ -348,85 +331,51 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace psout.PS.w = diffuseAlpha; # else - float4 specColor = complex ? TexBaseSampler.Sample(SampBaseSampler, float2(input.TexCoord.x, 0.5 + input.TexCoord.y * 0.5)) : 1; + float4 specColor = complex ? TexBaseSampler.Sample(SampBaseSampler, float2(input.TexCoord.x, 0.5 + input.TexCoord.y * 0.5)) : 0; float4 shadowColor = TexShadowMaskSampler.Load(int3(input.HPosition.xy, 0)); uint eyeIndex = GetEyeIndexPS(input.HPosition, VPOSOffset); psout.MotionVectors = GetSSMotionVector(input.WorldPosition, input.PreviousWorldPosition, eyeIndex); - float3 ddx = ddx_coarse(input.ViewSpacePosition); - float3 ddy = ddy_coarse(input.ViewSpacePosition); - float3 normal = normalize(cross(ddx, ddy)); - float normalScale = max(1.0 / 1000.0, sqrt(normal.z * -8 + 8)); - psout.Normal.xy = float2(0.5, 0.5) + normal.xy / normalScale; - psout.Normal.zw = float2(0, 0); - -# if !defined(VR) - float3 viewDirection = -normalize(input.WorldPosition.xyz); -# else float3 viewDirection = -normalize(input.WorldPosition.xyz); -# endif // !VR - float3 worldNormal = normalize(input.VertexNormal.xyz); + float3 normal = normalize(input.VertexNormal.xyz); // Swaps direction of the backfaces otherwise they seem to get lit from the wrong direction. if (!frontFace) - worldNormal = -worldNormal; + normal = -normal; - worldNormal = normalize(lerp(worldNormal, normalize(input.SphereNormal.xyz), saturate(input.SphereNormal.w * 2))); + normal = normalize(lerp(normal, normalize(input.SphereNormal.xyz), input.SphereNormal.w)); if (complex) { - float3 normalColor = float4(TransformNormal(specColor.xyz), 1); + float3 normalColor = TransformNormal(specColor.xyz); // world-space -> tangent-space -> world-space. // This is because we don't have pre-computed tangents. - worldNormal = normalize(mul(normalColor, CalculateTBN(worldNormal, -input.WorldPosition, input.TexCoord.xy))); + normal = normalize(mul(normalColor, CalculateTBN(normal, -input.WorldPosition.xyz, input.TexCoord.xy))); } if (!complex || OverrideComplexGrassSettings) baseColor.xyz *= BasicGrassBrightness; - float3 dirLightColor = DirLightColor.xyz; - if (EnableDirLightFix) { - dirLightColor *= SunlightScale; - } - -# if defined(CLOUD_SHADOWS) - float3 cloudShadowMult = 1.0; - if (perPassCloudShadow[0].EnableCloudShadows && !lightingData[0].Reflections) { - cloudShadowMult = getCloudShadowMult(input.WorldPosition.xyz, DirLightDirection.xyz, SampColorSampler); - dirLightColor *= cloudShadowMult; - } -# endif - + float3 dirLightColor = DirLightColor.xyz * SunlightScale; dirLightColor *= shadowColor.x; -# if defined(SCREEN_SPACE_SHADOWS) - float dirLightSShadow = PrepassScreenSpaceShadows(input.WorldPosition, eyeIndex); - dirLightColor *= dirLightSShadow; -# endif // !SCREEN_SPACE_SHADOWS - float3 diffuseColor = 0; float3 specularColor = 0; float3 lightsDiffuseColor = 0; float3 lightsSpecularColor = 0; - float dirLightAngle = dot(worldNormal, DirLightDirection.xyz); - float3 dirDiffuseColor = dirLightColor * saturate(dirLightAngle); - - lightsDiffuseColor += dirDiffuseColor; + float dirLightAngle = dot(normal, DirLightDirection.xyz); // Generated texture to simulate light transport. // Numerous attempts were made to use a more interesting algorithm however they were mostly fruitless. - float3 subsurfaceColor = lerp(RGBToLuminance(baseColor.xyz), baseColor.xyz, 2.0); - - // Applies lighting across the whole surface apart from what is already lit. - lightsDiffuseColor += subsurfaceColor * dirLightColor * GetSoftLightMultiplier(dirLightAngle, SubsurfaceScatteringAmount); + float3 subsurfaceColor = baseColor.xyz * sqrt(input.SphereNormal.w); // Applies lighting from the opposite direction. Does not account for normals perpendicular to the light source. lightsDiffuseColor += subsurfaceColor * dirLightColor * saturate(-dirLightAngle) * SubsurfaceScatteringAmount; if (complex) - lightsSpecularColor += GetLightSpecularInput(DirLightDirection, viewDirection, worldNormal, dirLightColor, Glossiness); + lightsSpecularColor += GetLightSpecularInput(DirLightDirection, viewDirection, normal, dirLightColor, Glossiness); # if defined(LIGHT_LIMIT_FIX) float3 viewPosition = mul(CameraView[eyeIndex], float4(input.WorldPosition.xyz, 1)).xyz; @@ -458,7 +407,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace float3 lightColor = light.color.xyz; float3 normalizedLightDirection = normalize(lightDirection); - float lightAngle = dot(worldNormal.xyz, normalizedLightDirection.xyz); + float lightAngle = dot(normal, normalizedLightDirection); float3 normalizedLightDirectionVS = WorldToView(normalizedLightDirection, true, eyeIndex); if (light.firstPersonShadow) @@ -468,53 +417,46 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace float3 lightDiffuseColor = lightColor * saturate(lightAngle.xxx); - lightDiffuseColor += subsurfaceColor * lightColor * GetSoftLightMultiplier(lightAngle, SubsurfaceScatteringAmount); lightDiffuseColor += subsurfaceColor * lightColor * saturate(-lightAngle) * SubsurfaceScatteringAmount; - - lightsSpecularColor += GetLightSpecularInput(normalizedLightDirection, viewDirection, worldNormal.xyz, lightColor, Glossiness) * intensityMultiplier; - lightsDiffuseColor += lightDiffuseColor * intensityMultiplier; + + if (complex) + lightsSpecularColor += GetLightSpecularInput(normalizedLightDirection, viewDirection, normal, lightColor, Glossiness) * intensityMultiplier; } } } # endif - float3 directionalAmbientColor = mul(DirectionalAmbient, float4(worldNormal.xyz, 1)); -# if defined(CLOUD_SHADOWS) - if (perPassCloudShadow[0].EnableCloudShadows && !lightingData[0].Reflections) - directionalAmbientColor *= lerp(1.0, cloudShadowMult, perPassCloudShadow[0].AbsorptionAmbient); -# endif - lightsDiffuseColor += directionalAmbientColor; - diffuseColor += lightsDiffuseColor; - float3 color = max(0, diffuseColor * baseColor.xyz * input.VertexColor.xyz); + float3 albedo = max(0, baseColor.xyz * input.VertexColor.xyz); + diffuseColor *= albedo; - if (complex) { - specularColor += lightsSpecularColor; - specularColor *= specColor.w * SpecularStrength; - color.xyz += specularColor; - } + specularColor += lightsSpecularColor; + specularColor *= specColor.w * SpecularStrength; # if defined(LIGHT_LIMIT_FIX) if (perPassLLF[0].EnableLightsVisualisation) { if (perPassLLF[0].LightsVisualisationMode == 0) { - psout.Albedo.xyz = TurboColormap(0); + diffuseColor.xyz = TurboColormap(0); } else if (perPassLLF[0].LightsVisualisationMode == 1) { - psout.Albedo.xyz = TurboColormap(0); + diffuseColor.xyz = TurboColormap(0); } else { - psout.Albedo.xyz = TurboColormap((float)lightCount / 128.0); + diffuseColor.xyz = TurboColormap((float)lightCount / 128.0); } } else { - psout.Albedo.xyz = color; + psout.Diffuse = float4(diffuseColor, 1); } # else - psout.Albedo.xyz = color; + psout.Diffuse.xyz = float4(diffuseColor, 1); # endif - psout.Normal.w = specColor.w * SpecularStrength; - psout.Albedo.w = 1; + psout.Specular = float4(specularColor, 1); + psout.Albedo = float4(albedo, 1); + psout.Masks = float4(0, 0, 0, 0); + float3 normalVS = normalize(WorldToView(normal, false, eyeIndex)); + psout.NormalGlossiness = float4(EncodeNormal(normalVS), specColor.w, 1); # endif // RENDER_DEPTH return psout; } diff --git a/features/Screen Space GI/Shaders/Features/ScreenSpaceGI.ini b/features/Screen Space GI/Shaders/Features/ScreenSpaceGI.ini new file mode 100644 index 000000000..eb1a462ce --- /dev/null +++ b/features/Screen Space GI/Shaders/Features/ScreenSpaceGI.ini @@ -0,0 +1,2 @@ +[Info] +Version = 2-9-0 \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli b/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli new file mode 100644 index 000000000..95c5dea83 --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/common.hlsli @@ -0,0 +1,198 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016-2021, Intel Corporation +// +// SPDX-License-Identifier: MIT +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion", +// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf +// +// Implementation: Filip Strugar (filip.strugar@intel.com), Steve Mccalla (\_/) +// Version: (see XeGTAO.h) (='.'=) +// Details: https://github.com/GameTechDev/XeGTAO (")_(") +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// with additional edits by FiveLimbedCat/ProfJack + +#ifndef SSGI_COMMON +#define SSGI_COMMON + +#ifndef USE_HALF_FLOAT_PRECISION +# define USE_HALF_FLOAT_PRECISION 1 +#endif + +#if (USE_HALF_FLOAT_PRECISION != 0) +# if 1 // old fp16 approach ( 0.5) +#else +# define GET_EYE_IDX(uv) (0) +#endif + +/////////////////////////////////////////////////////////////////////////////// + +#define ISNAN(x) (!(x < 0.f || x > 0.f || x == 0.f)) + +// http://h14s.p5r.org/2012/09/0x5f3759df.html, [Drobot2014a] Low Level Optimizations for GCN, https://blog.selfshadow.com/publications/s2016-shading-course/activision/s2016_pbs_activision_occlusion.pdf slide 63 +lpfloat FastSqrt(float x) +{ + return (lpfloat)(asfloat(0x1fbd1df5 + (asint(x) >> 1))); +} + +// input [-1, 1] and output [0, PI], from https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/ +lpfloat FastACos(lpfloat inX) +{ + const lpfloat PI = 3.141593; + const lpfloat HALF_PI = 1.570796; + lpfloat x = abs(inX); + lpfloat res = -0.156583 * x + HALF_PI; + res *= FastSqrt(1.0 - x); + return (inX >= 0) ? res : PI - res; +} + +/////////////////////////////////////////////////////////////////////////////// + +// Inputs are screen XY and viewspace depth, output is viewspace position +float3 ScreenToViewPosition(const float2 screenPos, const float viewspaceDepth, const uint eyeIndex) +{ + const float2 _mul = eyeIndex == 0 ? NDCToViewMul.xy : NDCToViewMul.zw; + const float2 _add = eyeIndex == 0 ? NDCToViewAdd.xy : NDCToViewAdd.zw; + + float3 ret; + ret.xy = (_mul * screenPos.xy + _add) * viewspaceDepth; + ret.z = viewspaceDepth; + return ret; +} + +float ScreenToViewDepth(const float screenDepth) +{ + return (CameraData.w / (-screenDepth * CameraData.z + CameraData.x)); +} + +float3 ViewToWorldPosition(const float3 pos, const float4x4 invView) +{ + float4 worldpos = mul(invView, float4(pos, 1)); + return worldpos.xyz / worldpos.w; +} + +float3 ViewToWorldVector(const float3 vec, const float4x4 invView) +{ + return mul((float3x3)invView, vec); +} + +/////////////////////////////////////////////////////////////////////////////// + +// "Efficiently building a matrix to rotate one vector to another" +// http://cs.brown.edu/research/pubs/pdfs/1999/Moller-1999-EBA.pdf / https://dl.acm.org/doi/10.1080/10867651.1999.10487509 +// (using https://github.com/assimp/assimp/blob/master/include/assimp/matrix3x3.inl#L275 as a code reference as it seems to be best) +lpfloat3x3 RotFromToMatrix(lpfloat3 from, lpfloat3 to) +{ + const lpfloat e = dot(from, to); + const lpfloat f = abs(e); //(e < 0)? -e:e; + + // WARNING: This has not been tested/worked through, especially not for 16bit floats; seems to work in our special use case (from is always {0, 0, -1}) but wouldn't use it in general + if (f > lpfloat(1.0 - 0.0003)) + return lpfloat3x3(1, 0, 0, 0, 1, 0, 0, 0, 1); + + const lpfloat3 v = cross(from, to); + /* ... use this hand optimized version (9 mults less) */ + const lpfloat h = (1.0) / (1.0 + e); /* optimization by Gottfried Chen */ + const lpfloat hvx = h * v.x; + const lpfloat hvz = h * v.z; + const lpfloat hvxy = hvx * v.y; + const lpfloat hvxz = hvx * v.z; + const lpfloat hvyz = hvz * v.y; + + lpfloat3x3 mtx; + mtx[0][0] = e + hvx * v.x; + mtx[0][1] = hvxy - v.z; + mtx[0][2] = hvxz + v.y; + + mtx[1][0] = hvxy + v.z; + mtx[1][1] = e + h * v.y * v.y; + mtx[1][2] = hvyz - v.x; + + mtx[2][0] = hvxz - v.y; + mtx[2][1] = hvyz + v.x; + mtx[2][2] = e + hvz * v.z; + + return mtx; +} + +#endif \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl new file mode 100644 index 000000000..e76b0ba2c --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/gi.cs.hlsl @@ -0,0 +1,383 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016-2021, Intel Corporation +// +// SPDX-License-Identifier: MIT +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion", +// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf +// +// Implementation: Filip Strugar (filip.strugar@intel.com), Steve Mccalla (\_/) +// Version: (see XeGTAO.h) (='.'=) +// Details: https://github.com/GameTechDev/XeGTAO (")_(") +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// with additional edits by FiveLimbedCat/ProfJack +// +// More references: +// +// Screen Space Indirect Lighting with Visibility Bitmask +// https://arxiv.org/abs/2301.11376 +// +// HBIL +// https://github.com/Patapom/GodComplex/blob/master/Tests/TestHBIL/2018%20Mayaux%20-%20Horizon-Based%20Indirect%20Lighting%20(HBIL).pdf +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "../Common/FastMath.hlsli" +#include "../Common/GBuffer.hlsli" +#include "../Common/VR.hlsli" +#include "common.hlsli" + +#if USE_HALF_FLOAT_PRECISION == 0 +# define PI (3.1415926535897932384626433832795) +# define HALF_PI (1.5707963267948966192313216916398) +# define RCP_PI (0.31830988618) +#else +# define PI ((lpfloat)3.1415926535897932384626433832795) +# define HALF_PI ((lpfloat)1.5707963267948966192313216916398) +# define RCP_PI ((lpfloat)0.31830988618) +#endif + +Texture2D srcWorkingDepth : register(t0); +Texture2D srcNormal : register(t1); +Texture2D srcRadiance : register(t2); // maybe half-res +Texture2D srcHilbertLUT : register(t3); +Texture2D srcAccumFrames : register(t4); // maybe half-res +Texture2D srcPrevGI : register(t5); // maybe half-res + +RWTexture2D outGI : register(u0); +RWTexture2D outBentNormal : register(u1); +RWTexture2D outPrevDepth : register(u2); + +lpfloat GetDepthFade(lpfloat depth) +{ + return (lpfloat)saturate((depth - DepthFadeRange.x) * DepthFadeScaleConst); +} + +lpfloat2 SpatioTemporalNoise(uint2 pixCoord, uint temporalIndex) // without TAA, temporalIndex is always 0 +{ + float2 noise; + uint index = srcHilbertLUT.Load(uint3(pixCoord % 64, 0)).x; + index += 288 * (temporalIndex % 64); // why 288? tried out a few and that's the best so far (with XE_HILBERT_LEVEL 6U) - but there's probably better :) + // R2 sequence - see http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/ + return lpfloat2(frac(0.5 + index * float2(0.75487766624669276005, 0.5698402909980532659114))); +} + +// HBIL pp.29 +lpfloat IlIntegral(lpfloat2 integral_factor, lpfloat angle_prev, lpfloat angle_new) +{ + lpfloat sin_prev, cos_prev, sin_new, cos_new; + sincos(angle_prev, sin_prev, cos_prev); + sincos(angle_new, sin_new, cos_new); + + lpfloat delta_angle = angle_new - angle_prev; + return max(0, integral_factor.x * (delta_angle + sin_prev * cos_prev - sin_new * cos_new) + integral_factor.y * (cos_prev * cos_prev - cos_new * cos_new)); +} + +void CalculateGI( + uint2 dtid, float2 uv, float viewspaceZ, lpfloat3 viewspaceNormal, + out lpfloat4 o_currGIAO, out lpfloat3 o_bentNormal) +{ + uint eyeIndex = GET_EYE_IDX(uv); + float2 normalizedScreenPos = ConvertToStereoUV(uv, eyeIndex); + + const lpfloat rcpNumSlices = rcp(NumSlices); + const lpfloat rcpNumSteps = rcp(NumSteps); + + const lpfloat falloffRange = (lpfloat)EffectFalloffRange * (lpfloat)EffectRadius; + const lpfloat rcpFalloffRange = rcp(falloffRange); + const lpfloat falloffFrom = (lpfloat)EffectRadius * ((lpfloat)1 - (lpfloat)EffectFalloffRange); + const lpfloat falloffMul = -rcpFalloffRange; + const lpfloat falloffAdd = falloffFrom * rcpFalloffRange + (lpfloat)1.0; + + // quality settings / tweaks / hacks + // if the offset is under approx pixel size (pixelTooCloseThreshold), push it out to the minimum distance + const lpfloat pixelTooCloseThreshold = 1.3; + // approx viewspace pixel size at pixCoord; approximation of NDCToViewspace( uv.xy + ViewportSize.xy, pixCenterPos.z ).xy - pixCenterPos.xy; + const float2 pixelDirRBViewspaceSizeAtCenterZ = viewspaceZ.xx * (eyeIndex == 0 ? NDCToViewMul_x_PixelSize.xy : NDCToViewMul_x_PixelSize.zw); + + lpfloat screenspaceRadius = (lpfloat)EffectRadius / (lpfloat)pixelDirRBViewspaceSizeAtCenterZ.x; + // this is the min distance to start sampling from to avoid sampling from the center pixel (no useful data obtained from sampling center pixel) + const lpfloat minS = (lpfloat)pixelTooCloseThreshold / screenspaceRadius; + + ////////////////////////////////////////////////////////////////// + + const lpfloat2 localNoise = SpatioTemporalNoise(dtid, FrameIndex); + const lpfloat noiseSlice = localNoise.x; + const lpfloat noiseStep = localNoise.y; + + ////////////////////////////////////////////////////////////////// + + const float3 pixCenterPos = ScreenToViewPosition(normalizedScreenPos, viewspaceZ, eyeIndex); + const lpfloat3 viewVec = (lpfloat3)normalize(-pixCenterPos); + + lpfloat visibility = 0; + lpfloat3 radiance = 0; + lpfloat3 bentNormal = viewspaceNormal; + + for (uint slice = 0; slice < NumSlices; slice++) { + lpfloat phi = (PI * rcpNumSlices) * (slice + noiseSlice); + lpfloat3 directionVec = 0; + sincos(phi, directionVec.y, directionVec.x); + + // convert to screen units for later use + lpfloat2 omega = lpfloat2(directionVec.x, -directionVec.y) * screenspaceRadius * RcpFrameDim; +#ifdef VR + omega.x *= 2; +#endif + + const lpfloat3 orthoDirectionVec = directionVec - (dot(directionVec, viewVec) * viewVec); + const lpfloat3 axisVec = normalize(cross(orthoDirectionVec, viewVec)); + + lpfloat3 projectedNormalVec = viewspaceNormal - axisVec * dot(viewspaceNormal, axisVec); + lpfloat projectedNormalVecLength = length(projectedNormalVec); + lpfloat signNorm = (lpfloat)sign(dot(orthoDirectionVec, projectedNormalVec)); + lpfloat cosNorm = saturate(dot(projectedNormalVec, viewVec) / projectedNormalVecLength); + + lpfloat n = signNorm * ACos(cosNorm); + +#ifdef BITMASK + uint bitmask = 0; +#else + // this is a lower weight target; not using -1 as in the original paper because it is under horizon, so a 'weight' has different meaning based on the normal + lpfloat2 sincos_n; + sincos(n, sincos_n.x, sincos_n.y); + lpfloat lowHorizonCos1 = sincos_n.x; + const lpfloat lowHorizonCos0 = -lowHorizonCos1; + + lpfloat horizonCos0 = lowHorizonCos0; //-1; + lpfloat horizonCos1 = lowHorizonCos1; //-1; + + lpfloat3 sampleRadiance = 0; +#endif // BITMASK + + // R1 sequence (http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/) + lpfloat stepNoise = frac(noiseStep + slice * 0.6180339887498948482); + + [unroll] for (int sideSign = -1; sideSign <= 1; sideSign += 2) + { + [loop] for (uint step = 0; step < NumSteps; step++) + { + lpfloat s = (step + stepNoise) * rcpNumSteps; + s *= s; // default 2 is fine + s += minS; // avoid sampling center pixel + + lpfloat2 sampleOffset = s * omega; // no pixel alignment from original xegtao + + float2 sampleScreenPos = normalizedScreenPos + sampleOffset * sideSign; + [branch] if (any(sampleScreenPos > 1.0) || any(sampleScreenPos < 0.0)) break; + float2 sampleUV = ConvertFromStereoUV(sampleScreenPos, eyeIndex); + + lpfloat sampleOffsetLength = length(sampleOffset); + lpfloat mipLevel = (lpfloat)clamp(log2(sampleOffsetLength) - DepthMIPSamplingOffset, 0, 5); +#ifdef HALF_RES + mipLevel = max(mipLevel, 1); +#endif + + float SZ = srcWorkingDepth.SampleLevel(samplerPointClamp, sampleUV, mipLevel); + [branch] if (SZ > DepthFadeRange.y) continue; + + float3 samplePos = ScreenToViewPosition(sampleScreenPos, SZ, eyeIndex); + float3 sampleDelta = samplePos - float3(pixCenterPos); + lpfloat3 sampleHorizonVec = (lpfloat3)normalize(sampleDelta); + +#ifdef BITMASK + float3 sampleBackPos = samplePos - viewVec * Thickness; + lpfloat3 sampleBackHorizonVec = normalize(sampleBackPos - pixCenterPos); + + lpfloat angleFront = FastACos(dot(sampleHorizonVec, viewVec)); // either clamp or use lpfloat version for whatever reason + lpfloat angleBack = FastACos(dot(sampleBackHorizonVec, viewVec)); + lpfloat2 angleRange = -sideSign * (sideSign == -1 ? lpfloat2(angleFront, angleBack) : lpfloat2(angleBack, angleFront)); + angleRange = smoothstep(0, 1, (angleRange + n) * RCP_PI + .5); // https://discord.com/channels/586242553746030596/586245736413528082/1102228968247144570 + + uint2 bitsRange = uint2(floor(angleRange.x * 32u), round((angleRange.y - angleRange.x) * 32u)); // ceil gets too gray for flat ground + uint maskedBits = ((1 << bitsRange.y) - 1) << bitsRange.x; + +#else + + // this is our own thickness heuristic that relies on sooner discarding samples behind the center + lpfloat falloffBase = length(lpfloat3(sampleDelta) * lpfloat3(1, 1, 1 + ThinOccluderCompensation)); + lpfloat weight = saturate(falloffBase * falloffMul + falloffAdd); + + // sample horizon cos + lpfloat shc = (lpfloat)dot(sampleHorizonVec, viewVec); + + // discard unwanted samples + shc = lerp(sideSign == -1 ? lowHorizonCos1 : lowHorizonCos0, shc, weight); + lpfloat horizonCos = sideSign == -1 ? horizonCos1 : horizonCos0; +#endif + +#ifdef GI + float giBoost = 1 + GIDistanceCompensation * smoothstep(0, GICompensationMaxDist, s * EffectRadius); + +# ifdef BITMASK + bool checkGI = maskedBits; +# else + bool checkGI = shc > horizonCos; +# endif + + if (checkGI) { + // IL + lpfloat frontBackMult = 1.f; +# ifdef BACKFACE + if (dot(DecodeNormal(srcNormal.SampleLevel(samplerPointClamp, sampleUV, 0).xy), sampleHorizonVec) > 0) // backface + frontBackMult = BackfaceStrength; +# endif + + if (frontBackMult > 0.f) { +# ifdef BITMASK + lpfloat3 sampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * res_scale, mipLevel).rgb * frontBackMult * giBoost; + + sampleRadiance *= countbits(maskedBits & ~bitmask) * (lpfloat)0.03125; // 1/32 + sampleRadiance *= dot(viewspaceNormal, sampleHorizonVec); + sampleRadiance = max(0, sampleRadiance); + + radiance += sampleRadiance; +# else + lpfloat3 newSampleRadiance = 0; + newSampleRadiance = srcRadiance.SampleLevel(samplerPointClamp, sampleUV * res_scale, mipLevel).rgb * frontBackMult * giBoost; + + lpfloat anglePrev = n + sideSign * HALF_PI - FastACos(horizonCos); // lpfloat version is closest acos + lpfloat angleCurr = n + sideSign * HALF_PI - FastACos(shc); + lpfloat2 integralFactor = 0.5 * lpfloat2(dot(directionVec.xy, viewspaceNormal.xy) * sideSign, viewspaceNormal.z); + newSampleRadiance *= IlIntegral(integralFactor, anglePrev, angleCurr); + + // depth filtering. HBIL pp.38 + lpfloat t = smoothstep(0, 1, dot(viewspaceNormal, sampleHorizonVec)); + sampleRadiance = lerp(sampleRadiance, newSampleRadiance, t); + + radiance += max(0, sampleRadiance); +# endif + } +# ifndef BITMASK + horizonCos = shc; +# endif + } +#else +# ifndef BITMASK + // // thickness heuristic - see "4.3 Implementation details, Height-field assumption considerations" + // #if 0 // (disabled, not used) this should match the paper + // lpfloat newhorizonCos = max( horizonCos, shc ); + + // horizonCos = (horizonCos > shc)? lerp( newhorizonCos, shc, ThinOccluderCompensation ) :newhorizonCos ; + // #elif 0 // (disabled, not used) this is slightly different from the paper but cheaper and provides very similar results + // horizonCos = lerp(max(horizonCos, shc), shc, ThinOccluderCompensation); + // #else // this is a version where thicknessHeuristic is completely disabled + horizonCos = max(horizonCos, shc); +// #endif +# endif +#endif // GI + +#ifdef BITMASK + bitmask |= maskedBits; +#else + if (sideSign == -1) + horizonCos1 = horizonCos; + else + horizonCos0 = horizonCos; +#endif + } + } + +#ifdef BITMASK + visibility += (lpfloat)1.0 - countbits(bitmask) * (lpfloat)0.03125; + + // TODO: bent normal for bitmask? +#else +# if 1 // I can't figure out the slight overdarkening on high slopes, so I'm adding this fudge - in the training set, 0.05 is close (PSNR 21.34) to disabled (PSNR 21.45) + projectedNormalVecLength = lerp(projectedNormalVecLength, 1, 0.05); +# endif + + // line ~27, unrolled + lpfloat h0 = -FastACos(horizonCos1); // same, breaks stuff + lpfloat h1 = FastACos(horizonCos0); +# if 0 // we can skip clamping for a tiny little bit more performance + h0 = n + clamp( h0-n, (lpfloat)-HALF_PI, (lpfloat)HALF_PI ); + h1 = n + clamp( h1-n, (lpfloat)-HALF_PI, (lpfloat)HALF_PI ); +# endif + lpfloat iarc0 = ((lpfloat)cosNorm + (lpfloat)2 * (lpfloat)h0 * (lpfloat)sincos_n.x - (lpfloat)cos((lpfloat)2 * (lpfloat)h0 - n)); + lpfloat iarc1 = ((lpfloat)cosNorm + (lpfloat)2 * (lpfloat)h1 * (lpfloat)sincos_n.x - (lpfloat)cos((lpfloat)2 * (lpfloat)h1 - n)); + lpfloat localVisibility = (lpfloat)projectedNormalVecLength * (lpfloat)(iarc0 + iarc1) * (lpfloat).25; + visibility += localVisibility; + +# ifdef BENT_NORMAL + // see "Algorithm 2 Extension that computes bent normals b." + lpfloat2 sincos_3h0mn, sincos_3h1mn, sincos_h0pn, sincos_h1pn; + sincos(3 * h0 - n, sincos_3h0mn.x, sincos_3h0mn.y); + sincos(3 * h1 - n, sincos_3h1mn.x, sincos_3h1mn.y); + sincos(h0 + n, sincos_h0pn.x, sincos_h0pn.y); + sincos(h1 + n, sincos_h1pn.x, sincos_h1pn.y); + + lpfloat t0 = (6 * sin(h0 - n) - sincos_3h0mn.x + 6 * sin(h1 - n) - sincos_3h1mn.x + 16 * sincos_n.x - 3 * (sincos_h0pn.x + sincos_h1pn.x)) * 0.08333333333; // 1/12 + lpfloat t1 = (-sincos_3h0mn.y - sincos_3h1mn.y + 8 * sincos_n.y - 3 * (sincos_h0pn.y + sincos_h1pn.y)) * 0.08333333333; + lpfloat3 localBentNormal = lpfloat3(directionVec.x * t0, directionVec.y * t0, -t1); + localBentNormal = (lpfloat3)mul(RotFromToMatrix(lpfloat3(0, 0, -1), viewVec), localBentNormal) * projectedNormalVecLength; + bentNormal += localBentNormal; +# endif +#endif // BITMASK + } + + lpfloat depthFade = GetDepthFade(viewspaceZ); + + visibility *= rcpNumSlices; + visibility = lerp(saturate(visibility), 1, depthFade); + visibility = pow(visibility, AOPower); + +#ifdef GI + radiance *= rcpNumSlices; + radiance = lerp(radiance, 0, depthFade); + radiance *= GIStrength; +#endif + +#ifdef BENT_NORMAL + bentNormal = normalize(bentNormal); +#endif + + o_currGIAO = lpfloat4(radiance, visibility); + o_bentNormal = bentNormal; +} + +[numthreads(8, 8, 1)] void main(const uint2 dtid + : SV_DispatchThreadID) { + float2 uv = (dtid + .5f) * RcpFrameDim; + + float viewspaceZ = READ_DEPTH(srcWorkingDepth, dtid); + + outPrevDepth[dtid] = viewspaceZ; + + lpfloat2 normalSample = FULLRES_LOAD(srcNormal, dtid, uv, samplerLinearClamp).xy; + lpfloat3 viewspaceNormal = (lpfloat3)DecodeNormal(normalSample); + +// Move center pixel slightly towards camera to avoid imprecision artifacts due to depth buffer imprecision; offset depends on depth texture format used +#if USE_HALF_FLOAT_PRECISION == 1 + viewspaceZ *= 0.99920h; // this is good for FP16 depth buffer +#else + viewspaceZ *= 0.99999; // this is good for FP32 depth buffer +#endif + + lpfloat4 currGIAO = lpfloat4(0, 0, 0, 1); + lpfloat3 bentNormal = viewspaceNormal; + [branch] if (viewspaceZ < DepthFadeRange.y) + CalculateGI( + dtid, uv, viewspaceZ, viewspaceNormal, + currGIAO, bentNormal); + +#ifdef BENT_NORMAL + outBentNormal[dtid] = EncodeNormal(bentNormal); +#endif + +#ifdef TEMPORAL_DENOISER + if (viewspaceZ < DepthFadeRange.y) { + lpfloat4 prevGIAO = srcPrevGI[dtid]; + uint accumFrames = srcAccumFrames[dtid]; + + currGIAO = lerp(prevGIAO, currGIAO, fastRcpNR0(accumFrames)); + } +#endif + + currGIAO = any(ISNAN(currGIAO)) ? lpfloat4(0, 0, 0, 1) : currGIAO; + + outGI[dtid] = currGIAO; +} \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/hilbert.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/hilbert.cs.hlsl new file mode 100644 index 000000000..03ae75e8d --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/hilbert.cs.hlsl @@ -0,0 +1,46 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016-2021, Intel Corporation +// +// SPDX-License-Identifier: MIT +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion", +// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf +// +// Implementation: Filip Strugar (filip.strugar@intel.com), Steve Mccalla (\_/) +// Version: (see XeGTAO.h) (='.'=) +// Details: https://github.com/GameTechDev/XeGTAO (")_(") +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +RWTexture2D outHilbertLUT : register(u0); + +// From https://www.shadertoy.com/view/3tB3z3 - except we're using R2 here +#define XE_HILBERT_LEVEL 6U +#define XE_HILBERT_WIDTH ((1U << XE_HILBERT_LEVEL)) +#define XE_HILBERT_AREA (XE_HILBERT_WIDTH * XE_HILBERT_WIDTH) +inline uint HilbertIndex(uint posX, uint posY) +{ + uint index = 0U; + for (uint curLevel = XE_HILBERT_WIDTH / 2U; curLevel > 0U; curLevel /= 2U) { + uint regionX = (posX & curLevel) > 0U; + uint regionY = (posY & curLevel) > 0U; + index += curLevel * curLevel * ((3U * regionX) ^ regionY); + if (regionY == 0U) { + if (regionX == 1U) { + posX = uint((XE_HILBERT_WIDTH - 1U)) - posX; + posY = uint((XE_HILBERT_WIDTH - 1U)) - posY; + } + + uint temp = posX; + posX = posY; + posY = temp; + } + } + return index; +} + +[numthreads(32, 32, 1)] void main(uint2 tid + : SV_DispatchThreadID) { + outHilbertLUT[tid] = HilbertIndex(tid.x, tid.y); +} \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/output.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/output.cs.hlsl new file mode 100644 index 000000000..e91816534 --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/output.cs.hlsl @@ -0,0 +1,18 @@ +Texture2D srcGI : register(t0); +Texture2D srcAlbedo : register(t1); + +RWTexture2D outGI : register(u0); +RWTexture2D outGIAlbedo : register(u1); + +[numthreads(8, 8, 1)] void main(uint2 dtid + : SV_DispatchThreadID) { + half4 o = outGI[dtid]; + half4 i = srcGI[dtid]; + half3 gi = i.rgb * srcAlbedo[dtid].rgb; + o.rgb += gi; + o.w *= i.w; + outGI[dtid] = o; +#ifdef GI_BOUNCE + outGIAlbedo[dtid] = gi; +#endif +} \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterDepths.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterDepths.cs.hlsl new file mode 100644 index 000000000..930d33884 --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/prefilterDepths.cs.hlsl @@ -0,0 +1,128 @@ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016-2021, Intel Corporation +// +// SPDX-License-Identifier: MIT +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// XeGTAO is based on GTAO/GTSO "Jimenez et al. / Practical Real-Time Strategies for Accurate Indirect Occlusion", +// https://www.activision.com/cdn/research/Practical_Real_Time_Strategies_for_Accurate_Indirect_Occlusion_NEW%20VERSION_COLOR.pdf +// +// Implementation: Filip Strugar (filip.strugar@intel.com), Steve Mccalla (\_/) +// Version: (see XeGTAO.h) (='.'=) +// Details: https://github.com/GameTechDev/XeGTAO (")_(") +// +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "common.hlsli" + +Texture2D srcNDCDepth : register(t0); + +RWTexture2D outDepth0 : register(u0); +RWTexture2D outDepth1 : register(u1); +RWTexture2D outDepth2 : register(u2); +RWTexture2D outDepth3 : register(u3); +RWTexture2D outDepth4 : register(u4); + +// This is also a good place to do non-linear depth conversion for cases where one wants the 'radius' (effectively the threshold between near-field and far-field GI), +// is required to be non-linear (i.e. very large outdoors environments). +lpfloat ClampDepth(float depth) +{ +#ifdef USE_HALF_FLOAT_PRECISION + return (lpfloat)clamp(depth, 0.0h, 65504.0h); +#else + return clamp(depth, 0.0, 3.402823466e+38); +#endif +} + +// weighted average depth filter +lpfloat DepthMIPFilter(lpfloat depth0, lpfloat depth1, lpfloat depth2, lpfloat depth3) +{ + lpfloat maxDepth = max(max(depth0, depth1), max(depth2, depth3)); + + const lpfloat depthRangeScaleFactor = 0.75; // found empirically :) + const lpfloat effectRadius = depthRangeScaleFactor * (lpfloat)EffectRadius; + const lpfloat falloffRange = (lpfloat)EffectFalloffRange * effectRadius; + const lpfloat rcpFalloffRange = rcp(falloffRange); + const lpfloat falloffFrom = (lpfloat)EffectRadius * ((lpfloat)1 - (lpfloat)EffectFalloffRange); + const lpfloat falloffMul = -rcpFalloffRange; + const lpfloat falloffAdd = falloffFrom * rcpFalloffRange + (lpfloat)1.0; + + lpfloat weight0 = saturate((maxDepth - depth0) * falloffMul + falloffAdd); + lpfloat weight1 = saturate((maxDepth - depth1) * falloffMul + falloffAdd); + lpfloat weight2 = saturate((maxDepth - depth2) * falloffMul + falloffAdd); + lpfloat weight3 = saturate((maxDepth - depth3) * falloffMul + falloffAdd); + + lpfloat weightSum = weight0 + weight1 + weight2 + weight3; + return (weight0 * depth0 + weight1 * depth1 + weight2 * depth2 + weight3 * depth3) / weightSum; +} + +groupshared lpfloat g_scratchDepths[8][8]; +[numthreads(8, 8, 1)] void main(uint2 dispatchThreadID + : SV_DispatchThreadID, uint2 groupThreadID + : SV_GroupThreadID) { + // MIP 0 + const uint2 baseCoord = dispatchThreadID; + const uint2 pixCoord = baseCoord * 2; + const float2 uv = (pixCoord + .5) * RcpFrameDim * res_scale; + const uint eyeIndex = GET_EYE_IDX(uv); + + float4 depths4 = srcNDCDepth.GatherRed(samplerPointClamp, uv, int2(1, 1)); + lpfloat depth0 = ClampDepth(ScreenToViewDepth(depths4.w)); + lpfloat depth1 = ClampDepth(ScreenToViewDepth(depths4.z)); + lpfloat depth2 = ClampDepth(ScreenToViewDepth(depths4.x)); + lpfloat depth3 = ClampDepth(ScreenToViewDepth(depths4.y)); + outDepth0[pixCoord + uint2(0, 0)] = (lpfloat)depth0; + outDepth0[pixCoord + uint2(1, 0)] = (lpfloat)depth1; + outDepth0[pixCoord + uint2(0, 1)] = (lpfloat)depth2; + outDepth0[pixCoord + uint2(1, 1)] = (lpfloat)depth3; + + // MIP 1 + lpfloat dm1 = DepthMIPFilter(depth0, depth1, depth2, depth3); + outDepth1[baseCoord] = (lpfloat)dm1; + g_scratchDepths[groupThreadID.x][groupThreadID.y] = dm1; + + GroupMemoryBarrierWithGroupSync(); + + // MIP 2 + [branch] if (all((groupThreadID.xy % 2) == 0)) + { + lpfloat inTL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 0]; + lpfloat inTR = g_scratchDepths[groupThreadID.x + 1][groupThreadID.y + 0]; + lpfloat inBL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 1]; + lpfloat inBR = g_scratchDepths[groupThreadID.x + 1][groupThreadID.y + 1]; + + lpfloat dm2 = DepthMIPFilter(inTL, inTR, inBL, inBR); + outDepth2[baseCoord / 2] = (lpfloat)dm2; + g_scratchDepths[groupThreadID.x][groupThreadID.y] = dm2; + } + + GroupMemoryBarrierWithGroupSync(); + + // MIP 3 + [branch] if (all((groupThreadID.xy % 4) == 0)) + { + lpfloat inTL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 0]; + lpfloat inTR = g_scratchDepths[groupThreadID.x + 2][groupThreadID.y + 0]; + lpfloat inBL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 2]; + lpfloat inBR = g_scratchDepths[groupThreadID.x + 2][groupThreadID.y + 2]; + + lpfloat dm3 = DepthMIPFilter(inTL, inTR, inBL, inBR); + outDepth3[baseCoord / 4] = (lpfloat)dm3; + g_scratchDepths[groupThreadID.x][groupThreadID.y] = dm3; + } + + GroupMemoryBarrierWithGroupSync(); + + // MIP 4 + [branch] if (all((groupThreadID.xy % 8) == 0)) + { + lpfloat inTL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 0]; + lpfloat inTR = g_scratchDepths[groupThreadID.x + 4][groupThreadID.y + 0]; + lpfloat inBL = g_scratchDepths[groupThreadID.x + 0][groupThreadID.y + 4]; + lpfloat inBR = g_scratchDepths[groupThreadID.x + 4][groupThreadID.y + 4]; + + lpfloat dm4 = DepthMIPFilter(inTL, inTR, inBL, inBR); + outDepth4[baseCoord / 8] = (lpfloat)dm4; + //g_scratchDepths[ groupThreadID.x ][ groupThreadID.y ] = dm4; + } +} \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl new file mode 100644 index 000000000..a837d3ad9 --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/radianceDisocc.cs.hlsl @@ -0,0 +1,85 @@ +#include "../Common/GBuffer.hlsli" +#include "../Common/VR.hlsli" +#include "common.hlsli" + +Texture2D srcDiffuse : register(t0); +Texture2D srcPrevGI : register(t1); // maybe half-res +Texture2D srcCurrDepth : register(t2); +Texture2D srcCurrNormal : register(t3); +Texture2D srcPrevDepth : register(t4); // maybe half-res +Texture2D srcMotionVec : register(t5); +Texture2D srcPrevGIAlbedo : register(t6); + +RWTexture2D outRadianceDisocc : register(u0); +RWTexture2D outAccumFrames : register(u1); +RWTexture2D outRemappedPrevGI : register(u2); + +#if (defined(GI) && defined(GI_BOUNCE)) || defined(TEMPORAL_DENOISER) +# define REPROJECTION +#endif + +[numthreads(8, 8, 1)] void main(const uint2 pixCoord + : SV_DispatchThreadID) { + const float2 uv = (pixCoord + .5) * RcpFrameDim; + uint eyeIndex = GET_EYE_IDX(uv); + const float2 screen_pos = ConvertToStereoUV(uv, eyeIndex); + + float2 prev_uv = uv; +#ifdef REPROJECTION + prev_uv += FULLRES_LOAD(srcMotionVec, pixCoord, uv, samplerLinearClamp); +#endif + float2 prev_screen_pos = ConvertToStereoUV(prev_uv, eyeIndex); + + const float curr_depth = READ_DEPTH(srcCurrDepth, pixCoord); + + bool valid_history = false; + +#ifdef REPROJECTION + if ((curr_depth <= DepthFadeRange.y) && !(any(prev_screen_pos < 0) || any(prev_screen_pos > 1))) { + float3 curr_pos = ScreenToViewPosition(screen_pos, curr_depth, eyeIndex); + curr_pos = ViewToWorldPosition(curr_pos, InvViewMatrix[eyeIndex]); + + const float prev_depth = srcPrevDepth.SampleLevel(samplerPointClamp, prev_uv * res_scale, 0); + float3 prev_pos = ScreenToViewPosition(prev_screen_pos, prev_depth, eyeIndex); + prev_pos = ViewToWorldPosition(prev_pos, PrevInvViewMat[eyeIndex]); + + float3 delta_pos = curr_pos - prev_pos; + bool depth_pass = dot(delta_pos, delta_pos) < DepthDisocclusion * DepthDisocclusion; + valid_history = depth_pass; + } +#endif + + half4 prev_gi_albedo = 0; + half4 prev_gi = 0; + +#ifdef REPROJECTION + [branch] if (valid_history) + { +# if defined(GI) && defined(GI_BOUNCE) + prev_gi_albedo = srcPrevGIAlbedo.SampleLevel(samplerLinearClamp, prev_uv, 0); +# endif +# ifdef TEMPORAL_DENOISER + prev_gi = srcPrevGI.SampleLevel(samplerLinearClamp, prev_uv * res_scale, 0); +# endif + } +#endif + + half3 radiance = 0; +#ifdef GI + radiance = FULLRES_LOAD(srcDiffuse, pixCoord, uv, samplerLinearClamp); +# ifdef GI_BOUNCE + radiance += prev_gi_albedo.rgb * GIBounceFade; +# endif + outRadianceDisocc[pixCoord] = radiance; +#endif + +#ifdef TEMPORAL_DENOISER + uint accum_frames = 0; + [branch] if (valid_history) + accum_frames = outAccumFrames[pixCoord]; + accum_frames = min(accum_frames + 1, MaxAccumFrames); + + outAccumFrames[pixCoord] = accum_frames; + outRemappedPrevGI[pixCoord] = prev_gi; +#endif +} \ No newline at end of file diff --git a/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl new file mode 100644 index 000000000..f483f6338 --- /dev/null +++ b/features/Screen Space GI/Shaders/ScreenSpaceGI/upsample.cs.hlsl @@ -0,0 +1,58 @@ +// depth-aware upsampling: https://gist.github.com/pixelmager/a4364ea18305ed5ca707d89ddc5f8743 + +#include "../Common/FastMath.hlsli" +#include "common.hlsli" + +Texture2D srcDepth : register(t0); +Texture2D srcGI : register(t1); // half-res + +RWTexture2D outGI : register(u0); + +#define min4(v) min(min(v.x, v.y), min(v.z, v.w)) +#define max4(v) max(max(v.x, v.y), max(v.z, v.w)) + +[numthreads(8, 8, 1)] void main(const uint2 dtid + : SV_DispatchThreadID) { + int2 px00 = (dtid >> 1) + (dtid & 1) - 1; + int2 px10 = px00 + int2(1, 0); + int2 px01 = px00 + int2(0, 1); + int2 px11 = px00 + int2(1, 1); + + float4 d = float4( + srcDepth.Load(int3(px00, 1)), + srcDepth.Load(int3(px01, 1)), + srcDepth.Load(int3(px10, 1)), + srcDepth.Load(int3(px11, 1))); + + // note: edge-detection + float mind = min4(d); + float maxd = max4(d); + float diffd = maxd - mind; + float avg = dot(d, 0.25.xxxx); + bool d_edge = (diffd / avg) < 0.1; + + float4 atten; + + [branch] if (d_edge) + { + float4 gisample0 = srcGI[px00]; + float4 gisample1 = srcGI[px01]; + float4 gisample2 = srcGI[px10]; + float4 gisample3 = srcGI[px11]; + + float bgdepth = srcDepth[dtid]; + + //note: depth weighing from https://www.ppsloan.org/publications/ProxyPG.pdf#page=5 + float4 dd = abs(d - bgdepth); + float4 w = 1.0 / (dd + 0.00001); + float sumw = w.x + w.y + w.z + w.w; + + atten = (gisample0 * w.x + gisample1 * w.y + gisample2 * w.z + gisample3 * w.w) / (sumw + 0.00001); + } + else + { + atten = srcGI.SampleLevel(samplerLinearClamp, (dtid + .5) * RcpFrameDim * .25, 0); + } + + outGI[dtid] = atten; +} \ No newline at end of file diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/Common.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/Common.hlsl deleted file mode 100644 index 8c21b82ce..000000000 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/Common.hlsl +++ /dev/null @@ -1,51 +0,0 @@ -#include "../Common/Constants.hlsli" -#include "../Common/VR.hlsli" - -RWTexture2D OcclusionRW : register(u0); - -SamplerState LinearSampler : register(s0); - -Texture2D DepthTexture : register(t0); - -cbuffer PerFrame : register(b0) -{ - float2 BufferDim; - float2 RcpBufferDim; - float4x4 ProjMatrix[2]; - float4x4 InvProjMatrix[2]; - float4 CameraData; - float4 DynamicRes; - float4 InvDirLightDirectionVS; - float ShadowDistance; - uint MaxSamples; - float FarDistanceScale; - float FarThicknessScale; - float FarHardness; - float NearDistance; - float NearThickness; - float NearHardness; - float BlurRadius; - float BlurDropoff; - bool Enabled; -}; - -// Get a raw depth from the depth buffer. -float GetDepth(float2 uv) -{ - return DepthTexture.SampleLevel(LinearSampler, uv * DynamicRes.xy, 0).r; -} - -// Inverse project UV + raw depth into the view space. -float3 InverseProjectUVZ(float2 uv, float z, uint a_eyeIndex) -{ - uv.y = 1 - uv.y; - float4 cp = float4(uv * 2 - 1, z, 1); - float4 vp = mul(InvProjMatrix[a_eyeIndex], cp); - return vp.xyz / vp.w; -} - -float2 ViewToUV(float3 position, bool is_position, uint a_eyeIndex) -{ - float4 uv = mul(ProjMatrix[a_eyeIndex], float4(position, (float)is_position)); - return (uv.xy / uv.w) * float2(0.5f, -0.5f) + 0.5f; -} diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/FilterCS.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/FilterCS.hlsl deleted file mode 100644 index e85a08dc3..000000000 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/FilterCS.hlsl +++ /dev/null @@ -1,115 +0,0 @@ - -// Copyright (C) 2019-2022 Alessio Tamburini (aletamburini78@gmail.com) AKA Alenet -// All rights reserved. - -// The current maintainer is Llde https://github.com/llde - -// Contributor(s): -// - Timeslip -// - scanti -// - ShadeMe -// - Ethatron -// - GBR -// - mcfurston -// - noonemusteverknow -// - And all others that helped fixing bugs, betatesting or improving shaders - -// The source code is under public license. - -// The redistribution and use of the binaries, with or without modification, are permitted and publishing binaries is allowed, but the following conditions MUST be met: -// - the current license MUST be included in the redistribution of the binaries and it cannot be modified -// - the distributed binaries MUST report the word "UNOFFICIAL" (or a branch name) in the product description -// - the distributer MUST state that the official channels cannot be used for unofficial versions. -// - the source MUST be provided (included or separated) - -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER (AUTHOR) 'AS IT IS', WITHOUT WARRANTY OF ANY KIND. -// THIS SOFTWARE IS DISTRIBUTED FOR FREE AND ANY EXPRESS OR IMPLIED MONEY REQUEST IS DISCLAIMED BY THE AUTHOR. YOU CANNOT EARN MONEY (DIRECTLY OR INDIRECTLY) WITH ITS DISTRIBUTION. - -// HAVE FUN! - -#include "Common.hlsl" - -Texture2D OcclusionTexture : register(t1); - -#define cKernelSize 12 - -static const float BlurWeights[cKernelSize] = { - 0.057424882f, - 0.058107773f, - 0.061460144f, - 0.071020611f, - 0.088092873f, - 0.106530916f, - 0.106530916f, - 0.088092873f, - 0.071020611f, - 0.061460144f, - 0.058107773f, - 0.057424882f -}; - -static const float2 BlurOffsets[cKernelSize] = { - float2(-6.0f, -6.0f), - float2(-5.0f, -5.0f), - float2(-4.0f, -4.0f), - float2(-3.0f, -3.0f), - float2(-2.0f, -2.0f), - float2(-1.0f, -1.0f), - float2(1.0f, 1.0f), - float2(2.0f, 2.0f), - float2(3.0f, 3.0f), - float2(4.0f, 4.0f), - float2(5.0f, 5.0f), - float2(6.0f, 6.0f) -}; - -float InverseProjectUV(float2 uv, uint a_eyeIndex) -{ - float depth = GetDepth(uv); - return InverseProjectUVZ(uv, depth, a_eyeIndex).z; -} - -[numthreads(32, 32, 1)] void main(uint3 DTid - : SV_DispatchThreadID) { - -#if defined(HORIZONTAL) - float2 OffsetMask = float2(1.0f, 0.0f); -#elif defined(VERTICAL) - float2 OffsetMask = float2(0.0f, 1.0f); -#else -# error "Must define an axis!" -#endif - - float2 texCoord = (DTid.xy + 0.5) * RcpBufferDim; - uint eyeIndex = GetEyeIndexFromTexCoord(texCoord); - - float startDepth = GetDepth(texCoord * 2 * DynamicRes.zw); - if (startDepth >= 1) - return; - - float WeightSum = 0.114725602f; - float color1 = OcclusionTexture.SampleLevel(LinearSampler, texCoord * 2, 0).r * WeightSum; - - float depth1 = InverseProjectUVZ(texCoord * 2, startDepth, eyeIndex).z; - - float depthDrop = depth1 * BlurDropoff; - - [unroll] for (int i = 0; i < cKernelSize; i++) - { -#if defined(HORIZONTAL) - float2 uv = texCoord + (BlurOffsets[i] * OffsetMask * RcpBufferDim) * BlurRadius; -#elif defined(VERTICAL) - float2 uv = texCoord + (BlurOffsets[i] * OffsetMask * RcpBufferDim / 2) * BlurRadius; -#endif - float4 color2 = OcclusionTexture.SampleLevel(LinearSampler, uv * 2, 0).r; - float depth2 = InverseProjectUV(uv * 2, eyeIndex); - - // Depth-awareness - float awareness = saturate(depthDrop - abs(depth1 - depth2)); - - color1 += BlurWeights[i] * color2 * awareness; - WeightSum += BlurWeights[i] * awareness; - } - color1 /= WeightSum; - OcclusionRW[DTid.xy] = color1; -} diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/NormalMappingShadowsCS.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/NormalMappingShadowsCS.hlsl new file mode 100644 index 000000000..e88e4239c --- /dev/null +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/NormalMappingShadowsCS.hlsl @@ -0,0 +1,113 @@ +#include "../Common/DeferredShared.hlsli" +#include "../Common/GBuffer.hlsli" +#include "../Common/VR.hlsli" + +Texture2D NormalRoughnessTexture : register(t0); +Texture2D DepthTexture : register(t1); +Texture2D MasksTexture : register(t2); + +RWTexture2D ShadowMaskTextureRW : register(u0); + +half GetScreenDepth(half depth) +{ + return (CameraData.w / (-depth * CameraData.z + CameraData.x)); +} + +half InterleavedGradientNoise(half2 uv) +{ + // Temporal factor + half frameStep = half(FrameCount % 16) * 0.0625f; + uv.x += frameStep * 4.7526; + uv.y += frameStep * 3.1914; + + half3 magic = half3(0.06711056f, 0.00583715f, 52.9829189f); + return frac(magic.z * frac(dot(uv, magic.xy))); +} + +// Inverse project UV + raw depth into the view space. +half3 DepthToView(half2 uv, half z, uint a_eyeIndex) +{ + uv.y = 1 - uv.y; + half4 cp = half4(uv * 2 - 1, z, 1); + half4 vp = mul(InvProjMatrix[a_eyeIndex], cp); + return vp.xyz / vp.w; +} + +half2 ViewToUV(half3 position, bool is_position, uint a_eyeIndex) +{ + half4 uv = mul(ProjMatrix[a_eyeIndex], half4(position, (half)is_position)); + return (uv.xy / uv.w) * half2(0.5f, -0.5f) + 0.5f; +} + +[numthreads(32, 32, 1)] void main(uint3 globalId + : SV_DispatchThreadID, uint3 localId + : SV_GroupThreadID, uint3 groupId + : SV_GroupID) { + half2 uv = half2(globalId.xy + 0.5) * RcpBufferDim; + + half3 normalVS = DecodeNormal(NormalRoughnessTexture[globalId.xy].xy); + + half skinMask = MasksTexture[globalId.xy].x; + if (skinMask == 1.0) + return; + + half shadowMask = ShadowMaskTextureRW[globalId.xy].x; + + half rawDepth = DepthTexture[globalId.xy]; + if (rawDepth == 1.0) + return; + + half depth = GetScreenDepth(rawDepth); + if (depth < 16.5) + return; + + uint eyeIndex = GetEyeIndexFromTexCoord(uv); + + half3 viewPosition = DepthToView(ConvertFromStereoUV(uv, eyeIndex), rawDepth, eyeIndex); // viewPosition is in VR nonstereospace + viewPosition.z = depth; + + half3 endPosVS = viewPosition + DirLightDirectionVS[eyeIndex].xyz * 5; + half2 endPosUV = ViewToUV(endPosVS, false, eyeIndex); + + half2 startPosPixel = clamp(uv * BufferDim, 0, BufferDim); // uv is in VR stereo space + half2 endPosPixel = clamp(ConvertToStereoUV(endPosUV, eyeIndex) * BufferDim, 0, BufferDim); // convert back to stereospace since derived from viewPosition + + half NdotL = dot(normalVS, DirLightDirectionVS[eyeIndex].xyz); + + half shadow = 0; + + half3 viewDirectionVS = normalize(viewPosition); + + // Fade based on perceivable difference + half fade = smoothstep(4, 5, length(startPosPixel - endPosPixel)); + + // Only march for: not shadowed, not self-shadowed, march distance greater than 1 pixel + bool validMarchPixel = NdotL > 0.0 && shadowMask != 0.0 && fade > 0.0; + if (validMarchPixel) { + half step = 1.0 / 5.0; + half pos = step + step * (InterleavedGradientNoise(globalId.xy) * 2.0 - 1.0); + half slope = -NdotL; + + for (int i = 0; i < 5; i++) { + uint2 tmpCoords = lerp(startPosPixel, endPosPixel, pos); // coords are in stereo space to access Textures + half3 tmpNormal = DecodeNormal(NormalRoughnessTexture[tmpCoords].xy); + half tmpDepth = GetScreenDepth(DepthTexture[tmpCoords]); + half tmpNdotL = dot(tmpNormal, DirLightDirectionVS[eyeIndex].xyz); + + half shadowed = -tmpNdotL; + shadowed += NdotL * pos; + shadowed += max(0, dot(tmpNormal, viewDirectionVS)); + shadowed *= 1 - min(1, abs(depth - tmpDepth) * 0.1); + + slope += shadowed; + + shadow += max(0, slope); + pos += step; + } + } + + shadow = saturate(1.0 - shadow); + shadow = lerp(1.0, shadow, saturate(fade)); + + ShadowMaskTextureRW[globalId.xy] = min(shadowMask, lerp(shadow, 1.0, skinMask)); +} diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl index dd7288a3b..dd8d2d39f 100644 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/RaymarchCS.hlsl @@ -1,142 +1,57 @@ -#include "Common.hlsl" -Texture2D ShadowTexture : register(t1); +#include "../Common/DeferredShared.hlsli" -// Needed to fix a bug in VR that caused the arm -// to have the "outline" of the VR headset canvas -// rendered into it and to not cast rays outside the eyes -#ifdef VR -Texture2D StencilTexture : register(t89); - -float GetStencil(float2 uv) -{ - return StencilTexture.Load(int3(uv * BufferDim * DynamicRes.xy, 0)).g; -} - -float GetStencil(float2 uv, uint a_eyeIndex) -{ - uv = ConvertToStereoUV(uv, a_eyeIndex); - return GetStencil(uv); -} -#endif // VR - -bool IsSaturated(float value) -{ - return value == saturate(value); -} -bool IsSaturated(float2 value) { return IsSaturated(value.x) && IsSaturated(value.y); } - -// https://www.shadertoy.com/view/Xt23zV -float smoothbumpstep(float edge0, float edge1, float x) -{ - x = 1.0 - abs(clamp((x - edge0) / (edge1 - edge0), 0.0, 1.0) - .5) * 2.0; - return x * x * (3.0 - x - x); -} - -// Derived from the interleaved gradient function from Jimenez 2014 http://goo.gl/eomGso -float InterleavedGradientNoise(float2 uv) -{ - float3 magic = float3(0.06711056f, 0.00583715f, 52.9829189f); - return frac(magic.z * frac(dot(uv, magic.xy))); -} - -float GetScreenDepth(float depth) +half GetScreenDepth(half depth) { return (CameraData.w / (-depth * CameraData.z + CameraData.x)); } -float GetScreenDepth(float2 uv, uint a_eyeIndex) -{ - uv = ConvertToStereoUV(uv, a_eyeIndex); - float depth = GetDepth(uv); - return GetScreenDepth(depth); -} - -float ScreenSpaceShadowsUV(float2 texcoord, float3 lightDirectionVS, uint eyeIndex) -{ -#ifdef VR - if (GetStencil(texcoord) != 0) - return 1; -#endif // VR - - // Ignore the sky - float startDepth = GetDepth(texcoord); - if (startDepth >= 1) - return 1; - - // Compute ray position in view-space - float3 rayPos = InverseProjectUVZ(ConvertFromStereoUV(texcoord, eyeIndex), startDepth, eyeIndex); - - // Blends effect variables between near, mid and far field - float blendFactorFar = smoothstep(ShadowDistance / 3, ShadowDistance / 2, rayPos.z); - float blendFactorMid = smoothbumpstep(0, ShadowDistance / 2, rayPos.z); - - // Max shadow length, longer shadows are less accurate - float maxDistance = lerp(NearDistance, rayPos.z * FarDistanceScale, blendFactorFar); - - // Max ray steps, affects quality and performance - uint maxSteps = max(1, (uint)((float)MaxSamples * (1 - blendFactorMid))); - - // How far to move each sample each step - float stepLength = maxDistance / (float)maxSteps; - - // Compute ray step - float3 rayStep = lightDirectionVS * stepLength; - - // Offset starting position with interleaved gradient noise - float offset = InterleavedGradientNoise(texcoord * BufferDim); - rayPos += rayStep * offset; - - float thickness = lerp(NearThickness, rayPos.z * FarThicknessScale, blendFactorFar); - - // Accumulate samples - float shadow = 0.0f; - uint samples = 0; - - float2 rayUV = 0.0f; - for (uint i = 0; i < maxSteps; i++) { - samples++; - - // Step the ray - rayPos += rayStep; - rayUV = ViewToUV(rayPos, true, eyeIndex); - - // Ensure the UV coordinates are inside the screen - if (!IsSaturated(rayUV)) - break; - -#ifdef VR - if (GetStencil(rayUV, eyeIndex) != 0) - break; -#endif // VR - - // Compute the difference between the ray's and the camera's depth - float rayDepth = GetScreenDepth(rayUV, eyeIndex); - - // Difference between the current ray distance and the marched light - float depthDelta = rayPos.z - rayDepth; - - // Distant shadows simulate real shadows whereas near shadows are only intended for small objects - float rayShadow = depthDelta / thickness; - - // Check if the depth difference is considered a shadow - if (rayShadow > 0.0f && rayShadow <= 1.0f) - shadow += rayShadow; - } - - // Average samples - shadow /= samples; - - // Intensity and sharpness of shadows - shadow *= lerp(NearHardness, FarHardness, blendFactorFar); - - // Convert to visibility - return 1 - saturate(shadow); -} - -[numthreads(32, 32, 1)] void main(uint3 DTid - : SV_DispatchThreadID) { - float2 texCoord = (DTid.xy + 0.5) * RcpBufferDim * DynamicRes.zw; - uint eyeIndex = GetEyeIndexFromTexCoord(texCoord); - OcclusionRW[DTid.xy] = ScreenSpaceShadowsUV(texCoord, InvDirLightDirectionVS.xyz, eyeIndex); +#include "bend_sss_gpu.hlsli" + +Texture2D DepthTexture : register(t0); // Depth Buffer Texture (rasterized non-linear depth) +RWTexture2D OutputTexture : register(u0); // Output screen-space shadow buffer (typically single-channel, 8bit) +SamplerState PointBorderSampler : register(s0); // A point sampler, with Wrap Mode set to Clamp-To-Border-Color (D3D12_TEXTURE_ADDRESS_MODE_BORDER), and Border Color set to "FarDepthValue" (typically zero), or some other far-depth value out of DepthBounds. + // If you have issues where invalid shadows are appearing from off-screen, it is likely that this sampler is not correctly setup + +cbuffer PerFrame : register(b1) +{ + // Runtime data returned from BuildDispatchList(): + float4 LightCoordinate; // Values stored in DispatchList::LightCoordinate_Shader by BuildDispatchList() + int2 WaveOffset; // Values stored in DispatchData::WaveOffset_Shader by BuildDispatchList() + + // Renderer Specific Values: + float FarDepthValue; // Set to the Depth Buffer Value for the far clip plane, as determined by renderer projection matrix setup (typically 0). + float NearDepthValue; // Set to the Depth Buffer Value for the near clip plane, as determined by renderer projection matrix setup (typically 1). + + // Sampling data: + float2 InvDepthTextureSize; // Inverse of the texture dimensions for 'DepthTexture' (used to convert from pixel coordinates to UVs) + // If 'PointBorderSampler' is an Unnormalized sampler, then this value can be hard-coded to 1. + // The 'USE_HALF_PIXEL_OFFSET' macro might need to be defined if sampling at exact pixel coordinates isn't precise (e.g., if odd patterns appear in the shadow). + float SurfaceThickness; + float BilinearThreshold; + float ShadowContrast; +}; + +[numthreads(WAVE_SIZE, 1, 1)] void main( + int3 groupID + : SV_GroupID, + int groupThreadID + : SV_GroupThreadID) { + DispatchParameters parameters; + parameters.SetDefaults(); + + parameters.LightCoordinate = LightCoordinate; + parameters.WaveOffset = WaveOffset; + parameters.FarDepthValue = 1; + parameters.NearDepthValue = 0; + parameters.InvDepthTextureSize = InvDepthTextureSize; + parameters.DepthTexture = DepthTexture; + parameters.OutputTexture = OutputTexture; + parameters.PointBorderSampler = PointBorderSampler; + + parameters.SurfaceThickness = SurfaceThickness; + parameters.BilinearThreshold = BilinearThreshold; + parameters.ShadowContrast = ShadowContrast; + + WriteScreenSpaceShadow(parameters, groupID, groupThreadID); } \ No newline at end of file diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ShadowsPS.hlsli b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ShadowsPS.hlsli deleted file mode 100644 index ecf53d7df..000000000 --- a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/ShadowsPS.hlsli +++ /dev/null @@ -1,31 +0,0 @@ -cbuffer SSSData : register(b5) -{ - bool EnableSSS; - uint FrameCount; -}; - -Texture2D TexOcclusionSampler : register(t21); - -#define LinearSampler SampShadowMaskSampler - -float2 SSGetDynamicResolutionAdjustedScreenPosition(float2 uv) -{ - return uv * DynamicResolutionParams1.xy; -} - -float PrepassScreenSpaceShadows(float3 positionWS, uint eyeIndex = 0) -{ -#if defined(EYE) - return 1; -#else - if (EnableSSS && !FrameParams.z) { - float2 texCoord = ViewToUV(WorldToView(positionWS, true, eyeIndex), true, eyeIndex); - texCoord = ConvertToStereoUV(texCoord, eyeIndex); - texCoord = SSGetDynamicResolutionAdjustedScreenPosition(texCoord); - texCoord /= 2; - float shadow = TexOcclusionSampler.SampleLevel(LinearSampler, texCoord, 0); - return shadow; - } - return 1; -#endif -} \ No newline at end of file diff --git a/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli new file mode 100644 index 000000000..e8aa15452 --- /dev/null +++ b/features/Screen-Space Shadows/Shaders/ScreenSpaceShadows/bend_sss_gpu.hlsli @@ -0,0 +1,472 @@ + +// Copyright 2023 Sony Interactive Entertainment. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// If you have feedback, or found this code useful, we'd love to hear from you. +// https://www.bendstudio.com +// https://www.twitter.com/bendstudio +// +// We are *always* looking for talented graphics and technical programmers! +// https://www.bendstudio.com/careers + +// Common screen space shadow projection code (GPU): +//-------------------------------------------------------------- + +// The main shadow generation function is WriteScreenSpaceShadow(), it will read a depth texture, and write to a shadow texture +// This code is setup to target DX12 DXC shader compiler, but has also been tested on PS5 with appropriate API remapping. +// It can compile to DX11, but requires some modifications (e.g., early-out's use of wave intrinsics is not supported in DX11). +// Note; you can customize the 'EarlyOutPixel' function to perform custom early-out logic to optimize this shader. + +// The following Macros must be defined in the compute shader file before including this header: +// +// +#define WAVE_SIZE 64 // Wavefront size of the compute shader running this code. \ + // // numthreads[WAVE_SIZE, 1, 1] \ + // // Only tested with 64. \ + // \ + // #define SAMPLE_COUNT 512 // Number of shadow samples per-pixel. \ + // // Determines overall cost, as this value controls the length of the shadow (in pixels). \ + // // The number of texture-reads performed per-thread will be (SAMPLE_COUNT / WAVE_SIZE + 2) * 2. \ + // // Recommended starting value is 60 (This would be 4 reads per thread if WAVE_SIZE is 64). A value of 64 would require 6 reads. \ + // \ + // // Not all shadow samples are treated the same: \ + // // The bulk of samples will average together in to groups of 4, to produce a slightly smoothed result (so one sample cannot fully show the pixel) \ + // // However, the samples very close to the start pixel can optionally be forced to disable this averaging, so a single sample can fully shadow the pixel (HardShadowSamples) \ + // // Plus, a number of the last (most distant) samples can (for a small cost) apply a fade-out effect to soften a hash shadow cutoff (FadeOutSamples) \ + // +#define HARD_SHADOW_SAMPLES 0 // Number of initial shadow samples that will produce a hard shadow, and not perform sample-averaging. \ + // // This trades aliasing for grounding pixels very close to the shadow caster. \ + // // Recommended starting value: 4 \ + // +#define FADE_OUT_SAMPLES 0 // Number of samples that will fade out at the end of the shadow (for a minor cost). \ + // // Recommended starting value: 8 + +//#if defined(__HLSL_VERSION) || defined(__hlsl_dx_compiler) + +#define USE_HALF_PIXEL_OFFSET 1 // Apply a 0.5 texel offset when sampling a texture. Toggle this macro if the output shadow has odd, regular grid-like artefacts. + +// HLSL enforces that a pixel offset in a Sample() call must be a compile time constant, which isn't always required - and in some cases can give a small perf boost if used. +#define USE_UV_PIXEL_BIAS 1 // Use Sample(uv + bias) instead of Sample(uv, bias) + +//#endif + +// This is the list of runtime properties to pass to the shader +// Wherever possible, it is highly recommended to have these values be compile-time constants +struct DispatchParameters +{ + // Visual configuration: + // These values will require manual tuning. + // All shadow computation is performed in non-linear depth space (not in world space), so tuned value choices will depend on scene depth distribution (as determined by the Projection Matrix setup). + + half SurfaceThickness; // This is the assumed thickness of each pixel for shadow-casting, measured as a percentage of the difference in non-linear depth between the sample and FarDepthValue. + // Recommended starting value: 0.005 (0.5%) + + half BilinearThreshold; // Percentage threshold for determining if the difference between two depth values represents an edge, and should not perform interpolation. + // To tune this value, set 'DebugOutputEdgeMask' to true to visualize where edges are being detected. + // Recommended starting value: 0.02 (2%) + + half ShadowContrast; // A contrast boost is applied to the transition in/out of shadow. + // Recommended starting value: 2 or 4. Values >= 1 are valid. + + bool IgnoreEdgePixels; // If an edge is detected, the edge pixel will not contribute to the shadow. + // If a very flat surface is being lit and rendered at an grazing angles, the edge detect may incorrectly detect multiple 'edge' pixels along that flat surface. + // In these cases, the grazing angle of the light may subsequently produce aliasing artefacts in the shadow where these incorrect edges were detected. + // Setting this value to true would mean that those pixels would not cast a shadow, however it can also thin out otherwise valid shadows, especially on foliage edges. + // Recommended starting value: false, unless typical scenes have numerous large flat surfaces, in which case true. + + bool UsePrecisionOffset; // A small offset is applied to account for an imprecise depth buffer (recommend off) + + bool BilinearSamplingOffsetMode; // There are two modes to compute bilinear samples for shadow depth: + // true = sampling points for pixels are offset to the wavefront shared ray, shadow depths and starting depths are the same. Can project more jagged/aliased shadow lines in some cases. + // false = sampling points for pixels are not offset and start from pixel centers. Shadow depths are biased based on depth gradient across the current pixel bilinear sample. Has more issues in back-face / grazing areas. + // Both modes have subtle visual differences, which may / may not exaggerate depth buffer aliasing that gets projected in to the shadow. + // Evaluating the visual difference between each mode is recommended, then hard-coding the mode used to optimize the shader. + // Recommended starting value: false + + // Debug views + bool DebugOutputEdgeMask; // Use this to visualize edges, for tuning the 'BilinearThreshold' value. + bool DebugOutputThreadIndex; // Debug output to visualize layout of compute threads + bool DebugOutputWaveIndex; // Debug output to visualize layout of compute wavefronts, useful to sanity check the Light Coordinate is being computed correctly. + + // Culling / Early out: + //half2 DepthBounds; // Depth Bounds (min, max) for the on-screen volume of the light. Typically (0,1) for directional lights. Only used when 'UseEarlyOut' is true. + + //bool UseEarlyOut; // Set to true to early-out when depth values are not within [DepthBounds] - otherwise DepthBounds is unused + // [Optionally customize the 'EarlyOutPixel()' function to perform your own early-out logic, e.g. skipping pixels that a shadow map indicates are already fully occluded] + // This can dramatically reduce cost when only a small portion of the pixels need a shadow term (e.g., cull out sky pixels), however it does have some overhead (~15%) in worst-case where nothing early-outs + // Note; Early-out is most efficient when WAVE_SIZE matches the hardware wavefront size - otherwise cross wave communication is required. + + // Set sensible starting tuning values + void SetDefaults() + { + SurfaceThickness = 0.005; + BilinearThreshold = 0.02; + ShadowContrast = 4; + IgnoreEdgePixels = false; + UsePrecisionOffset = false; + BilinearSamplingOffsetMode = false; + DebugOutputEdgeMask = false; + DebugOutputThreadIndex = false; + DebugOutputWaveIndex = false; + } + + // Runtime data returned from BuildDispatchList(): + half4 LightCoordinate; // Values stored in DispatchList::LightCoordinate_Shader by BuildDispatchList() + int2 WaveOffset; // Values stored in DispatchData::WaveOffset_Shader by BuildDispatchList() + + // Renderer Specific Values: + half FarDepthValue; // Set to the Depth Buffer Value for the far clip plane, as determined by renderer projection matrix setup (typically 0). + half NearDepthValue; // Set to the Depth Buffer Value for the near clip plane, as determined by renderer projection matrix setup (typically 1). + + // Sampling data: + half2 InvDepthTextureSize; // Inverse of the texture dimensions for 'DepthTexture' (used to convert from pixel coordinates to UVs) + // If 'PointBorderSampler' is an Unnormalized sampler, then this value can be hard-coded to 1. + // The 'USE_HALF_PIXEL_OFFSET' macro might need to be defined if sampling at exact pixel coordinates isn't precise (e.g., if odd patterns appear in the shadow). + + Texture2D DepthTexture; // Depth Buffer Texture (rasterized non-linear depth) + RWTexture2D OutputTexture; // Output screen-space shadow buffer (typically single-channel, 8bit) + + SamplerState PointBorderSampler; // A point sampler, with Wrap Mode set to Clamp-To-Border-Color (D3D12_TEXTURE_ADDRESS_MODE_BORDER), and Border Color set to "FarDepthValue" (typically zero), or some other far-depth value out of DepthBounds. + // If you have issues where invalid shadows are appearing from off-screen, it is likely that this sampler is not correctly setup +}; + +// Forward declare: +// Generate the shadow +// Call this function from a compute shader with thread dimensions: numthreads[WAVE_SIZE, 1, 1] +// +// (int3) inGroupID: Compute shader group id register (SV_GroupID) +// (int) inGroupThreadId: Compute shader group thread id register (SV_GroupThreadID) +void WriteScreenSpaceShadow(struct DispatchParameters inParameters, int3 inGroupID, int inGroupThreadID); + +#if !defined(WAVE_SIZE) || !defined(SAMPLE_COUNT) || !defined(HARD_SHADOW_SAMPLES) || !defined(FADE_OUT_SAMPLES) +# error Before including bend_sss_gpu.h, four macros must be defined to configure the shader compile: WAVE_SIZE, SAMPLE_COUNT, HARD_SHADOW_SAMPLES, and FADE_OUT_SAMPLES. See the top of this file for details. +#else + +// static bool EarlyOutPixel(struct DispatchParameters inParameters, int2 pixel_xy, half depth) +// { +// //OPTIONAL TODO; customize this function to return true if the pixel should early-out for custom reasons. E.g., A shadow map pass already found the pixel was in shadow / backfaced, etc. +// // Recommended to keep this code very simple! + +// // Example: +// // return inParameters.CustomShadowMapTerm[pixel_xy] == 0; + +// (void)pixel_xy; //unused by this implementation, avoid potential compiler warning. + +// // The compiled code will be more optimal if the 'depth' value is not referenced. +// return depth >= inParameters.DepthBounds.y || depth <= inParameters.DepthBounds.x; +// } + +// Gets the start pixel coordinates for the pixels in the wavefront +// Also returns the delta to get to the next pixel after WAVE_COUNT pixels along the ray +static void ComputeWavefrontExtents(DispatchParameters inParameters, int3 inGroupID, uint inGroupThreadID, out half2 outDeltaXY, out half2 outPixelXY, out half outPixelDistance, out bool outMajorAxisX) +{ + int2 xy = inGroupID.yz * WAVE_SIZE + inParameters.WaveOffset.xy; + + //integer light position / fractional component + half2 light_xy = floor(inParameters.LightCoordinate.xy) + 0.5; + half2 light_xy_fraction = inParameters.LightCoordinate.xy - light_xy; + bool reverse_direction = inParameters.LightCoordinate.w > 0.0f; + + int2 sign_xy = sign(xy); + bool horizontal = abs(xy.x + sign_xy.y) < abs(xy.y - sign_xy.x); + + int2 axis; + axis.x = horizontal ? (+sign_xy.y) : (0); + axis.y = horizontal ? (0) : (-sign_xy.x); + + // Apply wave offset + xy = axis * (int)inGroupID.x + xy; + half2 xy_f = (half2)xy; + + // For interpolation to the light center, we only really care about the larger of the two axis + bool x_axis_major = abs(xy_f.x) > abs(xy_f.y); + half major_axis = x_axis_major ? xy_f.x : xy_f.y; + + half major_axis_start = abs(major_axis); + half major_axis_end = abs(major_axis) - (half)WAVE_SIZE; + + half ma_light_frac = x_axis_major ? light_xy_fraction.x : light_xy_fraction.y; + ma_light_frac = major_axis > 0 ? -ma_light_frac : ma_light_frac; + + // back in to screen direction + half2 start_xy = xy_f + light_xy; + + // For the very inner most ring, we need to interpolate to a pixel centered UV, so the UV->pixel rounding doesn't skip output pixels + half2 end_xy = lerp(inParameters.LightCoordinate.xy, start_xy, (major_axis_end + ma_light_frac) / (major_axis_start + ma_light_frac)); + + // The major axis should be a round number + half2 xy_delta = (start_xy - end_xy); + + // Inverse the read order when reverse direction is true + half thread_step = (half)(inGroupThreadID ^ (reverse_direction ? 0 : (WAVE_SIZE - 1))); + + half2 pixel_xy = lerp(start_xy, end_xy, thread_step / (half)WAVE_SIZE); + half pixel_distance = major_axis_start - thread_step + ma_light_frac; + + outPixelXY = pixel_xy; + outPixelDistance = pixel_distance; + outDeltaXY = xy_delta; + outMajorAxisX = x_axis_major; +} + +// Number of bilinear sample reads performed per-thread +# define READ_COUNT (SAMPLE_COUNT / WAVE_SIZE + 2) + +// Common shared data +groupshared half DepthData[READ_COUNT * WAVE_SIZE]; + +// Generate the shadow +// Call this function from a compute shader with thread dimensions: numthreads[WAVE_SIZE, 1, 1] +// +// (int3) inGroupID: Compute shader group id register (SV_GroupID) +// (int) inGroupThreadId: Compute shader group thread id register (SV_GroupThreadID) +void WriteScreenSpaceShadow(DispatchParameters inParameters, int3 inGroupID, int inGroupThreadID) +{ + half2 xy_delta; + half2 pixel_xy; + half pixel_distance; + bool x_axis_major; // major axis is x axis? abs(xy_delta.x) > abs(xy_delta.y). + + ComputeWavefrontExtents(inParameters, (int3)inGroupID, inGroupThreadID.x, xy_delta, pixel_xy, pixel_distance, x_axis_major); + + // Read in the depth values + half sampling_depth[READ_COUNT]; + half shadowing_depth[READ_COUNT]; + half depth_thickness_scale[READ_COUNT]; + half sample_distance[READ_COUNT]; + + const half direction = -inParameters.LightCoordinate.w; + const half z_sign = inParameters.NearDepthValue > inParameters.FarDepthValue ? -1 : +1; + + int i; + bool is_edge = false; + bool skip_pixel = false; + +# if defined(RIGHT) + pixel_xy.x += 1.0 / inParameters.InvDepthTextureSize.x; +# endif + + half2 write_xy = floor(pixel_xy); + +# if !defined(RIGHT) + half2 minUV = half2(0.0, 0.0); + half2 maxUV = half2(0.5, 1.0); +# else + half2 minUV = half2(0.5, 0.0); + half2 maxUV = half2(1.0, 1.0); +# endif + + half2 uv = pixel_xy * inParameters.InvDepthTextureSize * half2(0.5, 1.0); + + [unroll] for (i = 0; i < READ_COUNT; i++) + { + // We sample depth twice per pixel per sample, and interpolate with an edge detect filter + // Interpolation should only occur on the minor axis of the ray - major axis coordinates should be at pixel centers + half2 read_xy = floor(pixel_xy); + half minor_axis = x_axis_major ? pixel_xy.y : pixel_xy.x; + + // If a pixel has been detected as an edge, then optionally (inParameters.IgnoreEdgePixels) don't include it in the shadow + const half edge_skip = 1e20; // if edge skipping is enabled, apply an extreme value/blend on edge samples to push the value out of range + + half2 depths; + half bilinear = frac(minor_axis) - 0.5; + +# if USE_HALF_PIXEL_OFFSET + read_xy += 0.5; +# endif + + half bias = bilinear > 0 ? 1 : -1; + half2 offset_xy = half2(x_axis_major ? 0 : bias, x_axis_major ? bias : 0); + + // HLSL enforces that a pixel offset is a compile-time constant, which isn't strictly required (and can sometimes be a bit faster) + // So this fallback will use a manual uv offset instead +# if defined(VR) + depths.x = inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, read_xy * inParameters.InvDepthTextureSize * half2(0.5, 1.0), 0); + depths.y = inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, (read_xy + offset_xy) * inParameters.InvDepthTextureSize * half2(0.5, 1.0), 0); + depths.x = lerp(depths.x, 1.0, (float)(depths.x == 0)); // Stencil area + depths.y = lerp(depths.y, 1.0, (float)(depths.y == 0)); // Stencil area +# else + depths.x = inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, read_xy * inParameters.InvDepthTextureSize, 0); + depths.y = inParameters.DepthTexture.SampleLevel(inParameters.PointBorderSampler, (read_xy + offset_xy) * inParameters.InvDepthTextureSize, 0); +# endif + + // Depth thresholds (bilinear/shadow thickness) are based on a fractional ratio of the difference between sampled depth and the far clip depth + depth_thickness_scale[i] = abs(inParameters.FarDepthValue - depths.x); + + // If depth variance is more than a specific threshold, then just use point filtering + bool use_point_filter = abs(depths.x - depths.y) > depth_thickness_scale[i] * inParameters.BilinearThreshold; + + // Store for debug output when inParameters.DebugOutputEdgeMask is true + if (i == 0) + is_edge = use_point_filter; + + // The pixel starts sampling at this depth + sampling_depth[i] = depths.x; + + half edge_depth = inParameters.IgnoreEdgePixels ? edge_skip : depths.x; + // Any sample in this wavefront is possibly interpolated towards the bilinear sample + // So use should use a shadowing depth that is further away, based on the difference between the two samples + half shadow_depth = depths.x + abs(depths.x - depths.y) * z_sign; + + // Shadows cast from this depth + shadowing_depth[i] = use_point_filter ? edge_depth : shadow_depth; + + // Store for later + sample_distance[i] = pixel_distance + (WAVE_SIZE * i) * direction; + + // Iterate to the next pixel along the ray. This will be WAVE_SIZE pixels along the ray... + pixel_xy += xy_delta * direction; + } + + // Using early out, and no debug mode is enabled? + // if (inParameters.UseEarlyOut && (inParameters.DebugOutputWaveIndex == false && inParameters.DebugOutputThreadIndex == false && inParameters.DebugOutputEdgeMask == false)) + // { + // // read the depth of the pixel we are shadowing, and early-out + // // The compiler will typically rearrange this code to put it directly after the first depth read + // skip_pixel = EarlyOutPixel(inParameters, (int2)write_xy, sampling_depth[0]); + + // // are all threads in this wave out of bounds? + // bool early_out = WaveActiveAnyTrue(!skip_pixel) == false; + + // // WaveGetLaneCount returns the hardware wave size + // if (WaveGetLaneCount() == WAVE_SIZE) + // { + // // Optimal case: + // // If each wavefront is just a single wave, then we can trivially early-out. + // if (early_out == true) + // return; + // } + // else + // { + // // This wavefront is made up of multiple small waves, so we need to coordinate them for all to early-out together. + // // Doing this can make the worst case (all pixels drawn) a bit more expensive (~15%), but the best-case (all early-out) is typically 2-3x better. + // LdsEarlyOut = true; + + // GroupMemoryBarrierWithGroupSync(); + + // [branch] if (early_out == false) + // LdsEarlyOut = false; + + // GroupMemoryBarrierWithGroupSync(); + + // [branch] if (LdsEarlyOut) + // return; + // } + // } + + // Write the shadow depths to LDS + [unroll] for (i = 0; i < READ_COUNT; i++) + { + // Perspective correct the shadowing depth, in this space, all light rays are parallel + half stored_depth = (shadowing_depth[i] - inParameters.LightCoordinate.z) / sample_distance[i]; + + if (i != 0) { + // For pixels within sampling distance of the light, it is possible that sampling will + // overshoot the light coordinate for extended reads. We want to ignore these samples + stored_depth = sample_distance[i] > 0 ? stored_depth : 1e10; + } + + // Store the depth values in groupshared + int idx = (i * WAVE_SIZE) + inGroupThreadID.x; + DepthData[idx] = stored_depth; + } + + // Sync wavefronts now groupshared DepthData is written + GroupMemoryBarrierWithGroupSync(); + + // Skip first person +# if !defined(VR) + skip_pixel = skip_pixel || GetScreenDepth(sampling_depth[0]) < 16.5; +# endif + + // If the starting depth isn't in depth bounds, then we don't need a shadow + if (skip_pixel) + return; + + half start_depth = sampling_depth[0]; + + // lerp away from far depth by a tiny fraction? + // if (inParameters.UsePrecisionOffset) + // start_depth = lerp(start_depth, inParameters.FarDepthValue, -1.0 / 0xFFFF); + + // perspective correct the depth + start_depth = (start_depth - inParameters.LightCoordinate.z) / sample_distance[0]; + + // Start by reading the next value + int sample_index = inGroupThreadID.x + 1; + + half4 shadow_value = 1; + half hard_shadow = 1; + + // This is the inverse of how large the shadowing window is for the projected sample data. + // All values in the LDS sample list are scaled by 1.0 / sample_distance, such that all light directions become parallel. + // The multiply by sample_distance[0] here is to compensate for the projection divide in the data. + // The 1.0 / inParameters.SurfaceThickness is to adjust user selected thickness. So a 0.5% thickness will scale depth values from [0,1] to [0,200]. The shadow window is always 1 wide. + // 1.0 / depth_thickness_scale[0] is because SurfaceThickness is percentage of remaining depth between the sample and the far clip - not a percentage of the full depth range. + // The min() function is to make sure the window is a minimum width when very close to the light. The +direction term will bias the result so the pixel at the very center of the light is either fully lit or shadowed + half depth_scale = min(sample_distance[0] + direction, 1.0 / inParameters.SurfaceThickness) * sample_distance[0] / depth_thickness_scale[0]; + + start_depth = start_depth * depth_scale - z_sign; + + // The first number of hard shadow samples, a single pixel can produce a full shadow + [unroll] for (i = 0; i < HARD_SHADOW_SAMPLES; i++) + { + half depth_delta = abs(start_depth - DepthData[sample_index + i] * depth_scale); + + // We want to find the distance of the sample that is closest to the reference depth + hard_shadow = min(hard_shadow, depth_delta); + } + + // Brute force go! + // The main shadow samples, averaged in to a set of 4 shadow values + [unroll] for (i = HARD_SHADOW_SAMPLES; i < SAMPLE_COUNT - FADE_OUT_SAMPLES; i++) + { + half depth_delta = abs(start_depth - DepthData[sample_index + i] * depth_scale); + + // Do the same as the hard_shadow code above, but this will accumulate to 4 separate values. + // By using 4 values, the average shadow can be taken, which can help soften single-pixel shadows. + shadow_value[i & 3] = min(shadow_value[i & 3], depth_delta); + } + + // Final fade out samples + [unroll] for (i = SAMPLE_COUNT - FADE_OUT_SAMPLES; i < SAMPLE_COUNT; i++) + { + half depth_delta = abs(start_depth - DepthData[sample_index + i] * depth_scale); + + // Add the fade value to these samples + const half fade_out = (half)(i + 1 - (SAMPLE_COUNT - FADE_OUT_SAMPLES)) / (half)(FADE_OUT_SAMPLES + 1) * 0.75; + + shadow_value[i & 3] = min(shadow_value[i & 3], depth_delta + fade_out); + } + + // Apply the contrast value. + // A value of 0 indicates a sample was exactly matched to the reference depth (and the result is fully shadowed) + // We want some boost to this range, so samples don't have to exactly match to produce a full shadow. + shadow_value = saturate(shadow_value * (inParameters.ShadowContrast) + (1 - inParameters.ShadowContrast)); + hard_shadow = saturate(hard_shadow * (inParameters.ShadowContrast) + (1 - inParameters.ShadowContrast)); + + half result = 0; + + // Take the average of 4 samples, this is useful to reduces aliasing noise in the source depth, especially with long shadows. + result = dot(shadow_value, 0.25); + + // If the first samples are always producing a hard shadow, then compute this value separately. + result = min(hard_shadow, result); + + // Asking the GPU to write scattered single-byte pixels isn't great, + // But thankfully the latency is hidden by all the work we're doing... + inParameters.OutputTexture[(int2)write_xy] = min(inParameters.OutputTexture[(int2)write_xy], result); +} + +#endif // macro check \ No newline at end of file diff --git a/features/Subsurface Scattering/Shaders/SubsurfaceScattering/SeparableSSS.hlsli b/features/Subsurface Scattering/Shaders/SubsurfaceScattering/SeparableSSS.hlsli index 5d5f85617..9c29bddd5 100644 --- a/features/Subsurface Scattering/Shaders/SubsurfaceScattering/SeparableSSS.hlsli +++ b/features/Subsurface Scattering/Shaders/SubsurfaceScattering/SeparableSSS.hlsli @@ -93,10 +93,9 @@ float4 SSSSBlurCS( uint2 DTid, float2 texcoord, float2 dir, - float4 normals) + float sssAmount, + bool humanProfile) { - float sssAmount = normals.z; - // Fetch color of current pixel: float4 colorM = ColorTexture[DTid.xy]; @@ -111,8 +110,6 @@ float4 SSSSBlurCS( float depthM = DepthTexture[DTid.xy].r; depthM = GetScreenDepth(depthM); - bool humanProfile = sssAmount > 0.5; - float2 profile = humanProfile ? HumanProfile.xy : BaseProfile.xy; uint kernelOffset = humanProfile ? SSSS_N_SAMPLES : 0; @@ -126,7 +123,7 @@ float4 SSSSBlurCS( // Calculate the final step to fetch the surrounding pixels: float2 finalStep = scale * BufferDim * dir; - finalStep *= saturate((humanProfile ? (sssAmount.x - 0.5) : sssAmount) * 2.0); + finalStep *= sssAmount; finalStep *= profile.x; // Modulate it using the profile finalStep *= 1.0 / 3.0; // Divide by 3 as the kernels range from -3 to 3. diff --git a/features/Subsurface Scattering/Shaders/SubsurfaceScattering/SeparableSSSCS.hlsl b/features/Subsurface Scattering/Shaders/SubsurfaceScattering/SeparableSSSCS.hlsl index 1c1c27952..a35dbfef5 100644 --- a/features/Subsurface Scattering/Shaders/SubsurfaceScattering/SeparableSSSCS.hlsl +++ b/features/Subsurface Scattering/Shaders/SubsurfaceScattering/SeparableSSSCS.hlsl @@ -2,11 +2,10 @@ RWTexture2D SSSRW : register(u0); Texture2D ColorTexture : register(t0); Texture2D DepthTexture : register(t1); - -#if defined(HORIZONTAL) +#if defined(FIRSTPERSON) Texture2D NormalTexture : register(t2); #else -RWTexture2D NormalTexture : register(u1); +Texture2D MaskTexture : register(t2); #endif #define SSSS_N_SAMPLES 21 @@ -17,7 +16,7 @@ struct DiffusionProfile float Thickness; }; -cbuffer PerFrame : register(b0) +cbuffer PerFrame : register(b1) { float4 Kernels[SSSS_N_SAMPLES + SSSS_N_SAMPLES]; float4 BaseProfile; @@ -61,14 +60,31 @@ float InterleavedGradientNoise(float2 uv) : SV_DispatchThreadID) { float2 texCoord = (DTid.xy + 0.5) * RcpBufferDim; #if defined(HORIZONTAL) - float4 normals = NormalTexture[DTid.xy]; - float4 color = SSSSBlurCS(DTid.xy, texCoord, float2(1.0, 0.0), normals); + +# if defined(FIRSTPERSON) + float sssAmount = NormalTexture[DTid.xy].z; + bool humanProfile = sssAmount > 0.5; + sssAmount = saturate((humanProfile ? (sssAmount.x - 0.5) : sssAmount) * 2.0); +# else + float sssAmount = MaskTexture[DTid.xy].x; + bool humanProfile = MaskTexture[DTid.xy].y == sssAmount; +# endif + + float4 color = SSSSBlurCS(DTid.xy, texCoord, float2(1.0, 0.0), sssAmount, humanProfile); SSSRW[DTid.xy] = max(0, color); #else - float4 normals = NormalTexture[DTid.xy]; - float4 color = SSSSBlurCS(DTid.xy, texCoord, float2(0.0, 1.0), normals); + +# if defined(FIRSTPERSON) + float sssAmount = NormalTexture[DTid.xy].z; + bool humanProfile = sssAmount > 0.5; + sssAmount = saturate((humanProfile ? (sssAmount.x - 0.5) : sssAmount) * 2.0); +# else + float sssAmount = MaskTexture[DTid.xy].x; + bool humanProfile = MaskTexture[DTid.xy].y == sssAmount; +# endif + + float4 color = SSSSBlurCS(DTid.xy, texCoord, float2(0.0, 1.0), sssAmount, humanProfile); color.rgb = Lin2sRGB(color.rgb); - SSSRW[DTid.xy] = float4(max(0, color.rgb), 1); - NormalTexture[DTid.xy] = float4(normals.xy, 0.0, normals.w); + SSSRW[DTid.xy] = float4(color.rgb, 1.0); #endif } diff --git a/features/Terrain Occlusion/Shaders/Features/TerrainOcclusion.ini b/features/Terrain Occlusion/Shaders/Features/TerrainOcclusion.ini new file mode 100644 index 000000000..19f01444d --- /dev/null +++ b/features/Terrain Occlusion/Shaders/Features/TerrainOcclusion.ini @@ -0,0 +1,2 @@ +[Info] +Version = 1-0-0 \ No newline at end of file diff --git a/features/Terrain Occlusion/Shaders/TerrainOcclusion/AOGen.cs.hlsl b/features/Terrain Occlusion/Shaders/TerrainOcclusion/AOGen.cs.hlsl new file mode 100644 index 000000000..849eafeaa --- /dev/null +++ b/features/Terrain Occlusion/Shaders/TerrainOcclusion/AOGen.cs.hlsl @@ -0,0 +1,114 @@ +#define PI 3.1415926535 +#define HALF_PI 1.570796327 + +struct AOGenBuffer +{ + float aoDistance; + uint sliceCount; + uint sampleCount; + + float3 pos0; + float3 pos1; + float2 zRange; +}; + +RWTexture2D RWTexOcclusion : register(u0); +RWTexture2D RWTexNormalisedHeight : register(u1); + +StructuredBuffer aoGen : register(t0); +Texture2D TexHeightmap : register(t1); + +SamplerState HeightmapSampler +{ + Filter = MIN_MAG_MIP_LINEAR; + AddressU = Clamp; + AddressV = Clamp; + AddressW = Clamp; +}; + +float3 getPos(float2 uv) +{ + float3 pos = float3(uv, TexHeightmap.SampleLevel(HeightmapSampler, uv, 0).x); + pos = lerp(aoGen[0].pos0.xyz, aoGen[0].pos1.xyz, pos); + return pos; +} + +// https://gist.github.com/bgolus/a07ed65602c009d5e2f753826e8078a0 +float3 ReconstructNormal(float2 uv, float2 texelSize) +{ + // get current pixel's view space position + float3 viewSpacePos_c = getPos(uv + float2(0.0, 0.0) * texelSize); + + // get view space position at 1 pixel offsets in each major direction + float3 viewSpacePos_l = getPos(uv + float2(-1.0, 0.0) * texelSize); + float3 viewSpacePos_r = getPos(uv + float2(1.0, 0.0) * texelSize); + float3 viewSpacePos_d = getPos(uv + float2(0.0, -1.0) * texelSize); + float3 viewSpacePos_u = getPos(uv + float2(0.0, 1.0) * texelSize); + + // get the difference between the current and each offset position + float3 l = viewSpacePos_c - viewSpacePos_l; + float3 r = viewSpacePos_r - viewSpacePos_c; + float3 d = viewSpacePos_c - viewSpacePos_d; + float3 u = viewSpacePos_u - viewSpacePos_c; + + // pick horizontal and vertical diff with the smallest z difference + float3 hDeriv = abs(l.z) < abs(r.z) ? l : r; + float3 vDeriv = abs(d.z) < abs(u.z) ? d : u; + + // get view space normal from the cross product of the two smallest offsets + float3 viewNormal = normalize(cross(hDeriv, vDeriv)); + + return viewNormal; +} + +[numthreads(32, 32, 1)] void main(const uint2 tid + : SV_DispatchThreadID) { + uint2 dims; + TexHeightmap.GetDimensions(dims.x, dims.y); + float2 texelSize = rcp(dims); + + uint2 px_coord = tid.xy; + float2 uv = (px_coord + 0.5) * texelSize; + + float3 normal = -ReconstructNormal(uv, texelSize); + float3 pos = getPos(uv); + float3 view = float3(0, 0, 1); + + // helpful constants + float rcp_sample_count = rcp(aoGen[0].sampleCount); + float2 world_uv_scale = rcp(aoGen[0].pos1.xy - aoGen[0].pos0.xy); // delta world pos * world_uv_scale = delta uv; + + float cos_cone = 0; + float visibility = 0; + for (uint slice = 0; slice < aoGen[0].sliceCount; slice++) { + float theta = (PI / aoGen[0].sliceCount) * slice; + float3 slice_dir = 0; + sincos(theta, slice_dir.y, slice_dir.x); + + float3 axis_dir = cross(slice_dir, view); + float3 proj_normal = normal - axis_dir * dot(normal, axis_dir); + float proj_normal_len = length(proj_normal); + + float sgn_n = sign(dot(slice_dir, proj_normal)); + float cos_n = saturate(dot(proj_normal, view) / proj_normal_len); + float n = sgn_n * acos(cos_n); + + for (int side = 0; side <= 1; side++) { + float horizon_cos = -1; + for (uint samp = 0; samp < aoGen[0].sampleCount; samp++) { + float dist_ratio = (samp + 1) * rcp_sample_count; + float2 curr_uv = uv + (2 * side - 1) * dist_ratio * aoGen[0].aoDistance * slice_dir.xy * world_uv_scale; + float3 curr_pos = getPos(curr_uv); + float3 horizon_dir = normalize(curr_pos - pos); + horizon_cos = max(horizon_cos, dot(horizon_dir, view)); + } + float h = n + clamp((-1 + 2 * side) * acos(horizon_cos) - n, -HALF_PI, HALF_PI); + visibility += saturate(proj_normal_len * (cos_n + 2 * h * sin(n) - cos(2 * h - n)) * .25); + } + } + visibility /= aoGen[0].sliceCount; + + float norm_z = (pos.z - aoGen[0].zRange.x) / (aoGen[0].zRange.y - aoGen[0].zRange.x); + RWTexOcclusion[tid] = visibility; + RWTexNormalisedHeight[tid] = norm_z; +} \ No newline at end of file diff --git a/features/Terrain Occlusion/Shaders/TerrainOcclusion/Output.cs.hlsl b/features/Terrain Occlusion/Shaders/TerrainOcclusion/Output.cs.hlsl new file mode 100644 index 000000000..81c6026c8 --- /dev/null +++ b/features/Terrain Occlusion/Shaders/TerrainOcclusion/Output.cs.hlsl @@ -0,0 +1,44 @@ +#include "../Common/DeferredShared.hlsli" +#include "../Common/VR.hlsli" + +Texture2D TexDepth : register(t0); + +#define TERRA_OCC_OUTPUT +#include "TerraOcclusion.hlsli" + +RWTexture2D RWTexShadowMask : register(u0); +RWTexture2D RWTexGI : register(u1); + +SamplerState SamplerDefault; + +[numthreads(32, 32, 1)] void main(uint2 dtid + : SV_DispatchThreadID) { + float2 uv = (dtid + .5) * RcpBufferDim; +#ifdef VR + const uint eyeIndex = uv > .5; +#else + const uint eyeIndex = 0; +#endif + + float3 ndc = float3(ConvertToStereoUV(uv, eyeIndex), 1); + ndc = ndc * 2 - 1; + ndc.y = -ndc.y; + ndc.z = TexDepth[dtid]; + + float4 worldPos = mul(InvViewMatrix[eyeIndex], mul(InvProjMatrix[eyeIndex], float4(ndc, 1))); + worldPos.xyz /= worldPos.w; + float viewDistance = length(worldPos); + worldPos.xyz += CamPosAdjust[0].xyz; + + float terrainShadow = 1; + float terrainAo = 1; + + GetTerrainOcclusion(worldPos, viewDistance, SamplerDefault, terrainShadow, terrainAo); + + half shadow = RWTexShadowMask[dtid]; + RWTexShadowMask[dtid] = min(shadow, terrainShadow); + + float4 gi = RWTexGI[dtid]; + gi.w *= terrainAo; + RWTexGI[dtid] = gi; +} \ No newline at end of file diff --git a/features/Terrain Occlusion/Shaders/TerrainOcclusion/ShadowUpdate.cs.hlsl b/features/Terrain Occlusion/Shaders/TerrainOcclusion/ShadowUpdate.cs.hlsl new file mode 100644 index 000000000..21283bb1b --- /dev/null +++ b/features/Terrain Occlusion/Shaders/TerrainOcclusion/ShadowUpdate.cs.hlsl @@ -0,0 +1,110 @@ +Texture2D TexHeight : register(t0); +RWTexture2D RWTexShadowHeights : register(u0); + +cbuffer ShadowUpdateCB : register(b1) +{ + float2 LightPxDir : packoffset(c0.x); // direction on which light descends, from one pixel to next via dda + float2 LightDeltaZ : packoffset(c0.z); // per lightUVDir, normalised, [upper, lower] penumbra, should be negative + uint StartPxCoord : packoffset(c1.x); + float2 PxSize : packoffset(c1.y); + float pad : packoffset(c1.w); +} + +float GetInterpolatedHeight(float2 pxCoord, bool isVertical) +{ + uint2 dims; + TexHeight.GetDimensions(dims.x, dims.y); + + int2 lerpPxCoordA = int2(pxCoord - .5 * float2(isVertical, !isVertical)); + int2 lerpPxCoordB = int2(pxCoord + .5 * float2(isVertical, !isVertical)); + float heightA = TexHeight[lerpPxCoordA]; + float heightB = TexHeight[lerpPxCoordB]; + + bool inBoundA = all(lerpPxCoordA > 0); + bool inBoundB = all(lerpPxCoordB < dims); + if (inBoundA && inBoundB) + return lerp(heightA, heightB, frac(pxCoord - .5)); + else if (!inBoundA) + return heightB; + else + return heightA; +} + +float2 GetInterpolatedHeightRW(float2 pxCoord, bool isVertical) +{ + uint2 dims; + RWTexShadowHeights.GetDimensions(dims.x, dims.y); + + int2 lerpPxCoordA = int2(pxCoord - .5 * float2(isVertical, !isVertical)); + int2 lerpPxCoordB = int2(pxCoord + .5 * float2(isVertical, !isVertical)); + float2 heightA = RWTexShadowHeights[lerpPxCoordA]; + float2 heightB = RWTexShadowHeights[lerpPxCoordB]; + + bool inBoundA = all(lerpPxCoordA > 0); + bool inBoundB = all(lerpPxCoordB < dims); + if (inBoundA && inBoundB) + return lerp(heightA, heightB, frac(pxCoord - .5)); + else if (!inBoundA) + return heightB; + else + return heightA; +} + +groupshared float2 g_shadowHeight[1024]; + +[numthreads(1024, 1, 1)] void main(const uint gtid + : SV_GroupThreadID, const uint gid + : SV_GroupID) { + uint2 dims; + TexHeight.GetDimensions(dims.x, dims.y); + + bool isVertical = abs(LightPxDir.y) > abs(LightPxDir.x); + float2 lightUVDir = LightPxDir * PxSize; + + uint2 rayStartPxCoord = isVertical ? uint2(gid, StartPxCoord) : uint2(StartPxCoord, gid); + float2 rayStartUV = (rayStartPxCoord + .5) * PxSize; + float2 rawThreadUV = rayStartUV + gtid * lightUVDir; + + bool2 isUVinRange = (rawThreadUV > 0) && (rawThreadUV < 1); + bool isValid = isVertical ? isUVinRange.y : isUVinRange.x; + + float2 threadUV = rawThreadUV - floor(rawThreadUV); // wraparound + float2 threadPxCoord = threadUV * dims; + + float2 pastHeights; + if (isValid) { + pastHeights = RWTexShadowHeights[uint2(threadPxCoord)]; + + // bifilter + float2 heights = GetInterpolatedHeight(threadPxCoord, isVertical).xx; + + // fetch last dispatch + if (gtid == 0 && all(floor(rawThreadUV - lightUVDir) == floor(rawThreadUV))) { + float2 sampleHeights = GetInterpolatedHeightRW(threadPxCoord - LightPxDir, isVertical) + LightDeltaZ; + heights = heights.x > sampleHeights.x ? heights : sampleHeights; + } + + g_shadowHeight[gtid] = heights; + } + + GroupMemoryBarrierWithGroupSync(); + + // simple parallel scan + [unroll] for (uint offset = 1; offset < 1024; offset <<= 1) + { + if (isValid && gtid >= offset) { + if (all(floor(rawThreadUV - lightUVDir * offset) == floor(rawThreadUV))) // no wraparound happend + { + float2 currentHeights = g_shadowHeight[gtid]; + float2 sampleHeights = g_shadowHeight[gtid - offset] + LightDeltaZ * offset; + g_shadowHeight[gtid] = currentHeights.x > sampleHeights.x ? currentHeights : sampleHeights; + } + } + GroupMemoryBarrierWithGroupSync(); + } + + // save + if (isValid) { + RWTexShadowHeights[uint2(threadPxCoord)] = lerp(pastHeights, g_shadowHeight[gtid], .2f); + } +} \ No newline at end of file diff --git a/features/Terrain Occlusion/Shaders/TerrainOcclusion/TerraOcclusion.hlsli b/features/Terrain Occlusion/Shaders/TerrainOcclusion/TerraOcclusion.hlsli new file mode 100644 index 000000000..34ab358d4 --- /dev/null +++ b/features/Terrain Occlusion/Shaders/TerrainOcclusion/TerraOcclusion.hlsli @@ -0,0 +1,84 @@ +struct PerPassTerraOcc +{ + uint EnableTerrainShadow; + uint EnableTerrainAO; + + float HeightBias; + + float ShadowSofteningRadiusAngle; + float2 ShadowFadeDistance; + + float AOMix; + float AOPower; + float AOFadeOutHeightRcp; + + float3 scale; + float3 invScale; + float3 offset; + float2 zRange; +}; + +#ifdef TERRA_OCC_OUTPUT +StructuredBuffer perPassTerraOcc : register(t1); +Texture2D TexTerraOcc : register(t2); +Texture2D TexNormalisedHeight : register(t3); +Texture2D TexShadowHeight : register(t4); +#else +StructuredBuffer perPassTerraOcc : register(t1); +Texture2D TexTerraOcc : register(t2); +Texture2D TexNormalisedHeight : register(t3); +Texture2D TexShadowHeight : register(t4); +#endif + +float2 GetTerrainOcclusionUV(float2 xy) +{ + return xy * perPassTerraOcc[0].scale.xy + perPassTerraOcc[0].offset.xy; +} + +float2 GetTerrainOcclusionXY(float2 uv) +{ + return (uv - perPassTerraOcc[0].offset.xy) * perPassTerraOcc[0].invScale.xy; +} + +float GetTerrainZ(float norm_z) +{ + return lerp(perPassTerraOcc[0].zRange.x, perPassTerraOcc[0].zRange.y, norm_z) + perPassTerraOcc[0].HeightBias; +} + +float2 GetTerrainZ(float2 norm_z) +{ + return float2(GetTerrainZ(norm_z.x), GetTerrainZ(norm_z.y)); +} + +void GetTerrainOcclusion( + const float3 worldPos, const float viewDistance, SamplerState samp, + out float terrainShadow, out float terrainAo) +{ + terrainShadow = 1; + terrainAo = 1; + + float2 terraOccUV = GetTerrainOcclusionUV(worldPos.xy); + + if (any(terraOccUV < 0) && any(terraOccUV > 1)) + return; + + if (perPassTerraOcc[0].EnableTerrainShadow && (viewDistance > perPassTerraOcc[0].ShadowFadeDistance.x)) { + float fadeFactor = saturate((viewDistance - perPassTerraOcc[0].ShadowFadeDistance.x) / (perPassTerraOcc[0].ShadowFadeDistance.y - perPassTerraOcc[0].ShadowFadeDistance.x)); + float2 shadowHeight = GetTerrainZ(TexShadowHeight.SampleLevel(samp, terraOccUV, 0)); + float shadowFraction = saturate((worldPos.z - shadowHeight.y) / (shadowHeight.x - shadowHeight.y)); + terrainShadow = lerp(1, shadowFraction, fadeFactor); + } + if (perPassTerraOcc[0].EnableTerrainAO) { + float terrainHeight = GetTerrainZ(TexNormalisedHeight.SampleLevel(samp, terraOccUV, 0).x); + terrainAo = TexTerraOcc.SampleLevel(samp, terraOccUV, 0).x; + + // power + terrainAo = pow(terrainAo, perPassTerraOcc[0].AOPower); + + // height fadeout + float fadeOut = saturate((worldPos.z - terrainHeight) * perPassTerraOcc[0].AOFadeOutHeightRcp); + terrainAo = lerp(terrainAo, 1, fadeOut); + + terrainAo = lerp(1, terrainAo, perPassTerraOcc[0].AOMix); + } +} \ No newline at end of file diff --git a/features/Terrain Occlusion/textures/heightmaps/Tamriel.HeightMap.-57.-43.61.50.-32768.32768.-4629.4924.dds b/features/Terrain Occlusion/textures/heightmaps/Tamriel.HeightMap.-57.-43.61.50.-32768.32768.-4629.4924.dds new file mode 100644 index 000000000..8037dea8f Binary files /dev/null and b/features/Terrain Occlusion/textures/heightmaps/Tamriel.HeightMap.-57.-43.61.50.-32768.32768.-4629.4924.dds differ diff --git a/features/Terrain Occlusion/textures/heightmaps/readme.txt b/features/Terrain Occlusion/textures/heightmaps/readme.txt new file mode 100644 index 000000000..0a58a39e9 --- /dev/null +++ b/features/Terrain Occlusion/textures/heightmaps/readme.txt @@ -0,0 +1,11 @@ +[worldspace editorID].HeigthMap.[West cell].[South cell].[East cell].[North cell].[z min].[z max].[Terrain z min].[Terrain z max].dds +The min/max cell coordinates are the actual cells that contain terrain height data. +All z values are actual z values divided by 8. +z min/max corresponds to pixel value zero/pure black and one/pure white. +Terrain z min/max corresponds to the lowest/highest point of the terrain, or bounding box. + +Tamriel.HeightMap.-57.-43.61.50.-32768.32768.-4629.4924.dds +native Skyrim.esm data + +Terrain heightmap for Tamriel +Each cell has 32x32 heightmap data points, so max resolution is 32x32 pixels per cell. \ No newline at end of file diff --git a/features/Tree LOD Lighting/Shaders/DistantTree.hlsl b/features/Tree LOD Lighting/Shaders/DistantTree.hlsl deleted file mode 100644 index f9d0fa1ef..000000000 --- a/features/Tree LOD Lighting/Shaders/DistantTree.hlsl +++ /dev/null @@ -1,321 +0,0 @@ -#include "Common/Color.hlsl" -#include "Common/FrameBuffer.hlsl" -#include "Common/LightingData.hlsl" -#include "Common/MotionBlur.hlsl" - -cbuffer TreePerFrame : register(b3) -{ - row_major float3x4 DirectionalAmbient; - float4 DirLightColor; - float4 DirLightDirection; - float DirLightScale; - bool ComplexAtlasTexture; - bool EnableComplexTreeLOD; - bool EnableDirLightFix; - float SubsurfaceScatteringAmount; - float pad[3]; -} - -struct VS_INPUT -{ - float3 Position : POSITION0; - float2 TexCoord0 : TEXCOORD0; - float4 InstanceData1 : TEXCOORD4; - float4 InstanceData2 : TEXCOORD5; - float4 InstanceData3 : TEXCOORD6; // Unused - float4 InstanceData4 : TEXCOORD7; // Unused -#if defined(VR) - uint InstanceID : SV_INSTANCEID; -#endif // VR -}; - -struct VS_OUTPUT -{ - float4 Position : SV_POSITION0; - float3 TexCoord : TEXCOORD0; -#if defined(RENDER_DEPTH) - float4 Depth : TEXCOORD3; -#else - float4 WorldPosition : POSITION1; - float4 PreviousWorldPosition : POSITION2; -#endif - float3 SphereNormal : TEXCOORD4; - -#if !defined(VR) - row_major float3x4 World[1] : POSITION3; -#else - row_major float3x4 World[2] : POSITION3; -#endif // VR -#if defined(VR) - float ClipDistance : SV_ClipDistance0; // o11 - float CullDistance : SV_CullDistance0; // p11 - uint EyeIndex : EYEIDX0; -#endif // VR -}; - -#ifdef VSHADER -cbuffer PerTechnique : register(b0) -{ - float4 FogParam : packoffset(c0); -}; - -cbuffer PerGeometry : register(b2) -{ -# if !defined(VR) - row_major float4x4 WorldViewProj[1] : packoffset(c0); - row_major float4x4 World[1] : packoffset(c4); - row_major float4x4 PreviousWorld[1] : packoffset(c8); -# else - row_major float4x4 WorldViewProj[2] : packoffset(c0); - row_major float4x4 World[2] : packoffset(c8); - row_major float4x4 PreviousWorld[2] : packoffset(c16); -# endif -}; - -VS_OUTPUT main(VS_INPUT input) -{ - VS_OUTPUT vsout; - uint eyeIndex = GetEyeIndexVS( -# if defined(VR) - input.InstanceID -# endif - ); - - float3 scaledModelPosition = input.InstanceData1.www * input.Position.xyz; - float3 adjustedModelPosition = 0.0.xxx; - adjustedModelPosition.x = dot(float2(1, -1) * input.InstanceData2.xy, scaledModelPosition.xy); - adjustedModelPosition.y = dot(input.InstanceData2.yx, scaledModelPosition.xy); - adjustedModelPosition.z = scaledModelPosition.z; - - float4 finalModelPosition = float4(input.InstanceData1.xyz + adjustedModelPosition.xyz, 1.0); - float4 viewPosition = mul(WorldViewProj[eyeIndex], finalModelPosition); - -# ifdef RENDER_DEPTH - vsout.Depth.xy = viewPosition.zw; - vsout.Depth.zw = input.InstanceData2.zw; -# else - vsout.WorldPosition = mul(World[eyeIndex], finalModelPosition); - vsout.PreviousWorldPosition = mul(PreviousWorld[eyeIndex], finalModelPosition); -# endif - - vsout.Position = viewPosition; - vsout.TexCoord = float3(input.TexCoord0.xy, FogParam.z); - - scaledModelPosition = input.Position.xyz; - adjustedModelPosition.x = dot(float2(1, -1) * input.InstanceData2.xy, scaledModelPosition.xy); - adjustedModelPosition.y = dot(input.InstanceData2.yx, scaledModelPosition.xy); - adjustedModelPosition.z = scaledModelPosition.z; - - vsout.SphereNormal.xyz = mul(World[eyeIndex], normalize(float4(adjustedModelPosition, 0))); - - vsout.World[0] = World[0]; -# ifdef VR - vsout.World[1] = World[1]; - vsout.EyeIndex = eyeIndex; - VR_OUTPUT VRout = GetVRVSOutput(vsout.Position, eyeIndex); - vsout.Position = VRout.VRPosition; - vsout.ClipDistance.x = VRout.ClipDistance; - vsout.CullDistance.x = VRout.CullDistance; -# endif // VR - return vsout; -} -#endif - -typedef VS_OUTPUT PS_INPUT; - -struct PS_OUTPUT -{ - float4 Albedo : SV_Target0; - -#if !defined(RENDER_DEPTH) - float2 MotionVector : SV_Target1; - float4 Normal : SV_Target2; -#endif -}; - -#ifdef PSHADER -SamplerState SampDiffuse : register(s0); -SamplerState SampShadowMaskSampler : register(s14); -Texture2D TexDiffuse : register(t0); - -# if !defined(VR) -cbuffer AlphaTestRefCB : register(b11) -{ - float AlphaTestRefRS : packoffset(c0); -} -# endif - -cbuffer PerTechnique : register(b0) -{ - float4 DiffuseColor : packoffset(c0); - float4 AmbientColor : packoffset(c1); -}; - -const static float DepthOffsets[16] = { 0.003921568, 0.533333361, 0.133333340, 0.666666687, 0.800000000, 0.266666681, 0.933333337, 0.400000000, 0.200000000, 0.733333349, 0.066666670, 0.600000000, 0.996078432, 0.466666669, 0.866666675, 0.333333343 }; - -float GetSoftLightMultiplier(float angle, float strength) -{ - float softLightParam = saturate((strength + angle) / (1 + strength)); - float arg1 = (softLightParam * softLightParam) * (3 - 2 * softLightParam); - float clampedAngle = saturate(angle); - float arg2 = (clampedAngle * clampedAngle) * (3 - 2 * clampedAngle); - float softLigtMul = saturate(arg1 - arg2); - return softLigtMul; -} - -float3 TransformNormal(float3 normal) -{ - return normal * 2 + -1.0.xxx; -} - -// http://www.thetenthplanet.de/archives/1180 -float3x3 CalculateTBN(float3 N, float3 p, float2 uv) -{ - // get edge vectors of the pixel triangle - float3 dp1 = ddx_coarse(p); - float3 dp2 = ddy_coarse(p); - float2 duv1 = ddx_coarse(uv); - float2 duv2 = ddy_coarse(uv); - - // solve the linear system - float3 dp2perp = cross(dp2, N); - float3 dp1perp = cross(N, dp1); - float3 T = dp2perp * duv1.x + dp1perp * duv2.x; - float3 B = dp2perp * duv1.y + dp1perp * duv2.y; - - // construct a scale-invariant frame - float invmax = rsqrt(max(dot(T, T), dot(B, B))); - return float3x3(T * invmax, B * invmax, N); -} - -# if defined(SCREEN_SPACE_SHADOWS) -# include "ScreenSpaceShadows/ShadowsPS.hlsli" -# endif - -# if defined(CLOUD_SHADOWS) -# include "CloudShadows/CloudShadows.hlsli" -# endif - -PS_OUTPUT main(PS_INPUT input, bool frontFace - : SV_IsFrontFace) -{ - PS_OUTPUT psout; - -# if !defined(VR) - uint eyeIndex = 0; -# else - uint eyeIndex = input.EyeIndex; -# endif - -# if defined(RENDER_DEPTH) - uint2 temp = uint2(input.Position.xy); - uint index = ((temp.x << 2) & 12) | (temp.y & 3); - - float depthOffset = 0.5 - DepthOffsets[index]; - float depthModifier = (input.Depth.w * depthOffset) + input.Depth.z - 0.5; - - if (depthModifier < 0) { - discard; - } - - float alpha = TexDiffuse.Sample(SampDiffuse, input.TexCoord.xy).w; - - if ((alpha - AlphaTestRefRS) < 0) { - discard; - } - - psout.Albedo.xyz = input.Depth.xxx / input.Depth.yyy; - psout.Albedo.w = 0; -# else - - float4 baseColor = TexDiffuse.Sample(SampDiffuse, input.TexCoord.xy); - -# if defined(DO_ALPHA_TEST) - if ((baseColor.w - AlphaTestRefRS) < 0) { - discard; - } -# endif - - float2 screenMotionVector = GetSSMotionVector(input.WorldPosition, input.PreviousWorldPosition, eyeIndex); - - psout.MotionVector = screenMotionVector; - - float3 ddx = ddx_coarse(input.WorldPosition); - float3 ddy = ddy_coarse(input.WorldPosition); - float3 normal = normalize(cross(ddx, ddy)); - - float3 viewDirection = -normalize(input.WorldPosition.xyz); - float3 worldNormal = normal; - - worldNormal = normalize(input.SphereNormal.xyz); - worldNormal.xy *= 2; - worldNormal = normalize(worldNormal); - worldNormal = normalize(lerp(-worldNormal, normal, 0.25)); - - if (ComplexAtlasTexture && EnableComplexTreeLOD) { - float3 normalColor = TexDiffuse.Sample(SampDiffuse, float2(input.TexCoord.x, 0.5 + input.TexCoord.y)); - normalColor = TransformNormal(normalColor); - // Increases the strength of the normal to simulate more advanced lighting. - normalColor.xy *= 2; - normalColor = normalize(normalColor); - // world-space -> tangent-space -> world-space. - // This is because we don't have pre-computed tangents. - worldNormal.xyz = normalize(mul(normalColor.xyz, CalculateTBN(worldNormal.xyz, -input.WorldPosition.xyz, input.TexCoord.xy))); - } - - float3 dirLightColor = lerp(RGBToLuminance(DirLightColor.xyz), DirLightColor.xyz, 0.5) * 0.5; - - if (EnableDirLightFix) { - dirLightColor *= DirLightScale; - } - -# if defined(CLOUD_SHADOWS) - float3 normalizedDirLightDirectionWS = -normalize(mul(input.World[eyeIndex], float4(DirLightDirection.xyz, 0))).xyz; - - float3 cloudShadowMult = 1.0; - if (perPassCloudShadow[0].EnableCloudShadows && !lightingData[0].Reflections) { - cloudShadowMult = getCloudShadowMult(input.WorldPosition.xyz, normalizedDirLightDirectionWS.xyz, SampDiffuse); - dirLightColor *= cloudShadowMult; - } -# endif - - float3 nsDirLightColor = dirLightColor; - -# if defined(SCREEN_SPACE_SHADOWS) - float dirLightSShadow = PrepassScreenSpaceShadows(input.WorldPosition.xyz, eyeIndex); -# endif - - float3 diffuseColor = 0; - - float3 lightsDiffuseColor = 0; - - float dirLightAngle = dot(worldNormal.xyz, DirLightDirection.xyz); - float3 dirDiffuseColor = dirLightColor * saturate(dirLightAngle); - - lightsDiffuseColor += dirDiffuseColor * dirLightColor; - - float3 subsurfaceColor = lerp(RGBToLuminance(baseColor.xyz), baseColor.xyz, 2.0); - - // Applies lighting across the whole surface apart from what is already lit. - lightsDiffuseColor += subsurfaceColor * nsDirLightColor * GetSoftLightMultiplier(dirLightAngle, SubsurfaceScatteringAmount); - - // Applies lighting from the opposite direction. Does not account for normals perpendicular to the light source. - lightsDiffuseColor += subsurfaceColor * dirLightColor * saturate(-dirLightAngle) * SubsurfaceScatteringAmount; - - float3 directionalAmbientColor = mul(DirectionalAmbient, float4(worldNormal.xyz, 1)); -# if defined(CLOUD_SHADOWS) - if (perPassCloudShadow[0].EnableCloudShadows && !lightingData[0].Reflections) - directionalAmbientColor *= lerp(1.0, cloudShadowMult, perPassCloudShadow[0].AbsorptionAmbient); -# endif - lightsDiffuseColor += directionalAmbientColor; - - diffuseColor += lightsDiffuseColor; - - float3 color = diffuseColor * baseColor.xyz; - psout.Albedo.xyz = color; - psout.Albedo.w = 1; -# endif - - return psout; -} -#endif diff --git a/features/Tree LOD Lighting/Shaders/Features/TreeLODLighting.ini b/features/Tree LOD Lighting/Shaders/Features/TreeLODLighting.ini deleted file mode 100644 index c9108f73d..000000000 --- a/features/Tree LOD Lighting/Shaders/Features/TreeLODLighting.ini +++ /dev/null @@ -1,2 +0,0 @@ -[Info] -Version = 1-2-1 \ No newline at end of file diff --git a/package/Shaders/Common/Constants.hlsli b/package/Shaders/Common/Constants.hlsli index 40ae21d79..b366f043b 100644 --- a/package/Shaders/Common/Constants.hlsli +++ b/package/Shaders/Common/Constants.hlsli @@ -1,3 +1,7 @@ +#ifndef __CONSTANTS_DEPENDENCY_HLSL__ +#define __CONSTANTS_DEPENDENCY_HLSL__ + +#define M_HALFPI 1.57079637; #define M_PI 3.1415925 // PI #define M_2PI 6.283185 // PI * 2 @@ -7,3 +11,4 @@ const static float4x4 M_IdentityMatrix = { { 0, 0, 1, 0 }, { 0, 0, 0, 1 } }; +#endif //__CONSTANTS_DEPENDENCY_HLSL__ \ No newline at end of file diff --git a/package/Shaders/Common/DeferredShared.hlsli b/package/Shaders/Common/DeferredShared.hlsli new file mode 100644 index 000000000..72d581e99 --- /dev/null +++ b/package/Shaders/Common/DeferredShared.hlsli @@ -0,0 +1,18 @@ +cbuffer PerFrameDeferredShared : register(b0) +{ + float4 CamPosAdjust[2]; + float4 DirLightDirectionVS[2]; + float4 DirLightColor; + float4 CameraData; + float2 BufferDim; + float2 RcpBufferDim; + float4x4 ViewMatrix[2]; + float4x4 ProjMatrix[2]; + float4x4 ViewProjMatrix[2]; + float4x4 InvViewMatrix[2]; + float4x4 InvProjMatrix[2]; + float4x4 InvViewProjMatrix[2]; + row_major float3x4 DirectionalAmbient; + uint FrameCount; + uint pad0[3]; +}; \ No newline at end of file diff --git a/package/Shaders/Common/FastMath.hlsli b/package/Shaders/Common/FastMath.hlsli new file mode 100644 index 000000000..480034dc5 --- /dev/null +++ b/package/Shaders/Common/FastMath.hlsli @@ -0,0 +1,341 @@ +/****************************************************************************** + Shader Fast Math Lib (v0.41) + A shader math library for optimized approximate transcendental functions. + Optimized and tested on AMD GCN architecture. + Release notes: + v0.41 minor bug fixes, missing references + + v0.4 new constants calculated for new ranges, minor optimization and precision improvements + Developed during production of : Far Cry 4, Ubisoft Montreal + v0.3 added Newton Raphson 1 and 2 iterations + Newton Raphson methods provided for reference purpose (some code / architectures might still benefit from single NR). + v0.2 fast IEEE float math sqrt() rsqrt() rcp() + v0.1 4th order polynomial approximations for atan() asin() acos() + Developed during production of : Killzone : Shadow Fall, Guerrilla Games, SCEE + Ubisoft and Guerrilla Games granted permission for open source distribution. + Contact information: + Michal Drobot - @MichalDrobot + hello@drobot.org + Presented publicly part of a course: + Low Level Optimizations for AMD GCN + (available @ http://michaldrobot.com/publications/) +********************************************************************************/ + +/****************************************************************************** + The MIT License (MIT) + Copyright (c) <2014> + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +********************************************************************************/ + +#ifndef SHADER_FAST_MATH_INC_FX +#define SHADER_FAST_MATH_INC_FX + +// Define switch for PC compilation +#ifdef _PC +# define asint(_x) *reinterpret_cast(&_x); +# define asfloat(_x) *reinterpret_cast(&_x); +# include +#endif + +// Derived from batch testing +// TODO : Should be improved +#define IEEE_INT_RCP_CONST_NR0 0x7EF311C2 +#define IEEE_INT_RCP_CONST_NR1 0x7EF311C3 +#define IEEE_INT_RCP_CONST_NR2 0x7EF312AC + +// Derived from batch testing +#define IEEE_INT_SQRT_CONST_NR0 0x1FBD1DF5 + +// Biases for global ranges +// 0-1 or 1-2 specific ranges might improve from different bias +// Derived from batch testing +// TODO : Should be improved +#define IEEE_INT_RCP_SQRT_CONST_NR0 0x5f3759df +#define IEEE_INT_RCP_SQRT_CONST_NR1 0x5F375A86 +#define IEEE_INT_RCP_SQRT_CONST_NR2 0x5F375A86 + +// +// Normalized range [0,1] Constants +// +#define IEEE_INT_RCP_CONST_NR0_SNORM 0x7EEF370B +#define IEEE_INT_SQRT_CONST_NR0_SNORM 0x1FBD1DF5 +#define IEEE_INT_RCP_SQRT_CONST_NR0_SNORM 0x5F341A43 + +// +// Distance [0,1000] based constants +// +//#define IEEE_INT_RCP_CONST_NR0_SNORM 0x7EF3210C +//#define IEEE_INT_SQRT_CONST_NR0_SNORM 0x1FBD22DF +//#define IEEE_INT_RCP_SQRT_CONST_NR0_SNORM 0x5F33E79F + +// +// RCP SQRT +// + +// Approximate guess using integer float arithmetics based on IEEE floating point standard +float rcpSqrtIEEEIntApproximation(float inX, const int inRcpSqrtConst) +{ + int x = asint(inX); + x = inRcpSqrtConst - (x >> 1); + return asfloat(x); +} + +float rcpSqrtNewtonRaphson(float inXHalf, float inRcpX) +{ + return inRcpX * (-inXHalf * (inRcpX * inRcpX) + 1.5f); +} + +// +// Using 0 Newton Raphson iterations +// Relative error : ~3.4% over full +// Precise format : ~small float +// 2 ALU +// +float fastRcpSqrtNR0(float inX) +{ + float xRcpSqrt = rcpSqrtIEEEIntApproximation(inX, IEEE_INT_RCP_SQRT_CONST_NR0); + return xRcpSqrt; +} + +// +// Using 1 Newton Raphson iterations +// Relative error : ~0.2% over full +// Precise format : ~half float +// 6 ALU +// +float fastRcpSqrtNR1(float inX) +{ + float xhalf = 0.5f * inX; + float xRcpSqrt = rcpSqrtIEEEIntApproximation(inX, IEEE_INT_RCP_SQRT_CONST_NR1); + xRcpSqrt = rcpSqrtNewtonRaphson(xhalf, xRcpSqrt); + return xRcpSqrt; +} + +// +// Using 2 Newton Raphson iterations +// Relative error : ~4.6e-004% over full +// Precise format : ~full float +// 9 ALU +// +float fastRcpSqrtNR2(float inX) +{ + float xhalf = 0.5f * inX; + float xRcpSqrt = rcpSqrtIEEEIntApproximation(inX, IEEE_INT_RCP_SQRT_CONST_NR2); + xRcpSqrt = rcpSqrtNewtonRaphson(xhalf, xRcpSqrt); + xRcpSqrt = rcpSqrtNewtonRaphson(xhalf, xRcpSqrt); + return xRcpSqrt; +} + +// +// SQRT +// +float sqrtIEEEIntApproximation(float inX, const int inSqrtConst) +{ + int x = asint(inX); + x = inSqrtConst + (x >> 1); + return asfloat(x); +} + +// +// Using 0 Newton Raphson iterations +// Relative error : < 0.7% over full +// Precise format : ~small float +// 1 ALU +// +float fastSqrtNR0(float inX) +{ + float xRcp = sqrtIEEEIntApproximation(inX, IEEE_INT_SQRT_CONST_NR0); + return xRcp; +} + +// +// Use inverse Rcp Sqrt +// Using 1 Newton Raphson iterations +// Relative error : ~0.2% over full +// Precise format : ~half float +// 6 ALU +// +float fastSqrtNR1(float inX) +{ + // Inverse Rcp Sqrt + return inX * fastRcpSqrtNR1(inX); +} + +// +// Use inverse Rcp Sqrt +// Using 2 Newton Raphson iterations +// Relative error : ~4.6e-004% over full +// Precise format : ~full float +// 9 ALU +// +float fastSqrtNR2(float inX) +{ + // Inverse Rcp Sqrt + return inX * fastRcpSqrtNR2(inX); +} + +// +// RCP +// + +float rcpIEEEIntApproximation(float inX, const int inRcpConst) +{ + int x = asint(inX); + x = inRcpConst - x; + return asfloat(x); +} + +float rcpNewtonRaphson(float inX, float inRcpX) +{ + return inRcpX * (-inRcpX * inX + 2.0f); +} + +// +// Using 0 Newton Raphson iterations +// Relative error : < 0.4% over full +// Precise format : ~small float +// 1 ALU +// +float fastRcpNR0(float inX) +{ + float xRcp = rcpIEEEIntApproximation(inX, IEEE_INT_RCP_CONST_NR0); + return xRcp; +} + +// +// Using 1 Newton Raphson iterations +// Relative error : < 0.02% over full +// Precise format : ~half float +// 3 ALU +// +float fastRcpNR1(float inX) +{ + float xRcp = rcpIEEEIntApproximation(inX, IEEE_INT_RCP_CONST_NR1); + xRcp = rcpNewtonRaphson(inX, xRcp); + return xRcp; +} + +// +// Using 2 Newton Raphson iterations +// Relative error : < 5.0e-005% over full +// Precise format : ~full float +// 5 ALU +// +float fastRcpNR2(float inX) +{ + float xRcp = rcpIEEEIntApproximation(inX, IEEE_INT_RCP_CONST_NR2); + xRcp = rcpNewtonRaphson(inX, xRcp); + xRcp = rcpNewtonRaphson(inX, xRcp); + return xRcp; +} + +// +// Trigonometric functions +// +static const float fsl_PI = 3.1415926535897932384626433f; +static const float fsl_HALF_PI = 0.5f * fsl_PI; + +// 4th order polynomial approximation +// 4 VGRP, 16 ALU Full Rate +// 7 * 10^-5 radians precision +// Reference : Handbook of Mathematical Functions (chapter : Elementary Transcendental Functions), M. Abramowitz and I.A. Stegun, Ed. +float acosFast4(float inX) +{ + float x1 = abs(inX); + float x2 = x1 * x1; + float x3 = x2 * x1; + float s; + + s = -0.2121144f * x1 + 1.5707288f; + s = 0.0742610f * x2 + s; + s = -0.0187293f * x3 + s; + s = sqrt(1.0f - x1) * s; + + // acos function mirroring + // check per platform if compiles to a selector - no branch neeeded + return inX >= 0.0f ? s : fsl_PI - s; +} + +// 4th order polynomial approximation +// 4 VGRP, 16 ALU Full Rate +// 7 * 10^-5 radians precision +float asinFast4(float inX) +{ + float x = inX; + + // asin is offset of acos + return fsl_HALF_PI - acosFast4(x); +} + +// 4th order hyperbolical approximation +// 4 VGRP, 12 ALU Full Rate +// 7 * 10^-5 radians precision +// Reference : Efficient approximations for the arctangent function, Rajan, S. Sichun Wang Inkol, R. Joyal, A., May 2006 +float atanFast4(float inX) +{ + float x = inX; + return x * (-0.1784f * abs(x) - 0.0663f * x * x + 1.0301f); +} + +//////////////////////////////////////////////////////////////////////////////////////////////// + +// https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/ + +// max absolute error 9.0x10^-3 +// Eberly's polynomial degree 1 - respect bounds +// 4 VGPR, 12 FR (8 FR, 1 QR), 1 scalar +// input [-1, 1] and output [0, PI] +float ACos(float inX) +{ + float x = abs(inX); + float res = -0.156583f * x + fsl_HALF_PI; + res *= fastSqrtNR0(1.0f - x); + return (inX >= 0) ? res : fsl_PI - res; +} + +// Same cost as Acos + 1 FR +// Same error +// input [-1, 1] and output [-PI/2, PI/2] +float ASin(float x) +{ + return fsl_HALF_PI - ACos(x); +} + +// max absolute error 1.3x10^-3 +// Eberly's odd polynomial degree 5 - respect bounds +// 4 VGPR, 14 FR (10 FR, 1 QR), 2 scalar +// input [0, infinity] and output [0, PI/2] +float ATanPos(float x) +{ + float t0 = (x < 1.0f) ? x : 1.0f / x; + float t1 = t0 * t0; + float poly = 0.0872929f; + poly = -0.301895f + poly * t1; + poly = 1.0f + poly * t1; + poly = poly * t0; + return (x < 1.0f) ? poly : fsl_HALF_PI - poly; +} + +// 4 VGPR, 16 FR (12 FR, 1 QR), 2 scalar +// input [-infinity, infinity] and output [-PI/2, PI/2] +float ATan(float x) +{ + float t0 = ATanPos(abs(x)); + return (x < 0.0f) ? -t0 : t0; +} + +#endif //SHADER_FAST_MATH_INC_FX \ No newline at end of file diff --git a/package/Shaders/Common/GBuffer.hlsli b/package/Shaders/Common/GBuffer.hlsli new file mode 100644 index 000000000..418ec0cb3 --- /dev/null +++ b/package/Shaders/Common/GBuffer.hlsli @@ -0,0 +1,33 @@ + +// https://knarkowicz.wordpress.com/2014/04/16/octahedron-normal-vector-encoding/ + +half2 OctWrap(half2 v) +{ + return (1.0 - abs(v.yx)) * (v.xy >= 0.0 ? 1.0 : -1.0); +} + +half2 EncodeNormal(half3 n) +{ + n = -n; + n /= (abs(n.x) + abs(n.y) + abs(n.z)); + n.xy = n.z >= 0.0 ? n.xy : OctWrap(n.xy); + n.xy = n.xy * 0.5 + 0.5; + return n.xy; +} + +half3 DecodeNormal(half2 f) +{ + f = f * 2.0 - 1.0; + // https://twitter.com/Stubbesaurus/status/937994790553227264 + half3 n = half3(f.x, f.y, 1.0 - abs(f.x) - abs(f.y)); + half t = saturate(-n.z); + n.xy += n.xy >= 0.0 ? -t : t; + return -normalize(n); +} + +half2 EncodeNormalVanilla(half3 n) +{ + n.z = max(1.0 / 1000.0, sqrt(8 + -8 * n.z)); + n.xy /= n.z; + return n.xy + 0.5; +} \ No newline at end of file diff --git a/package/Shaders/Common/LightingData.hlsl b/package/Shaders/Common/LightingData.hlsl index af5c35871..f47b9fb66 100644 --- a/package/Shaders/Common/LightingData.hlsl +++ b/package/Shaders/Common/LightingData.hlsl @@ -10,6 +10,7 @@ struct LightingData float4 CameraData; float2 BufferDim; float Timer; + bool Opaque; }; StructuredBuffer lightingData : register(t126); @@ -21,7 +22,7 @@ float GetDepth(float2 uv, uint a_eyeIndex = 0) { uv = ConvertToStereoUV(uv, a_eyeIndex); uv = GetDynamicResolutionAdjustedScreenPosition(uv); - return TexDepthSampler.Load(int3(uv * lightingData[0].BufferDim, 0)); + return TexDepthSampler.Load(int3(uv * lightingData[0].BufferDim, 0)).x; } float GetScreenDepth(float depth) diff --git a/package/Shaders/Common/VR.hlsli b/package/Shaders/Common/VR.hlsli index a4e724aee..cd784504d 100644 --- a/package/Shaders/Common/VR.hlsli +++ b/package/Shaders/Common/VR.hlsli @@ -1,3 +1,5 @@ +#ifndef __VR_DEPENDENCY_HLSL__ +#define __VR_DEPENDENCY_HLSL__ #ifdef VR cbuffer VRValues : register(b13) { @@ -122,7 +124,7 @@ uint GetEyeIndexVS(uint instanceID = 0) /** Gets VR Output -@param texCoord Texcoord on the screen [0,1] +@param clipPos clipPosition. Typically the VSHADER position at SV_POSITION0 @param a_eyeIndex The eyeIndex; 0 is left, 1 is right @returns VR_OUTPUT with VR values */ @@ -154,4 +156,5 @@ VR_OUTPUT GetVRVSOutput(float4 clipPos, uint a_eyeIndex = 0) # endif // VR return vsout; } -#endif \ No newline at end of file +#endif +#endif //__VR_DEPENDENCY_HLSL__ \ No newline at end of file diff --git a/package/Shaders/DeferredCompositeCS.hlsl b/package/Shaders/DeferredCompositeCS.hlsl new file mode 100644 index 000000000..7c7988341 --- /dev/null +++ b/package/Shaders/DeferredCompositeCS.hlsl @@ -0,0 +1,165 @@ + +#include "Common/DeferredShared.hlsli" +#include "Common/GBuffer.hlsli" +#include "Common/VR.hlsli" + +Texture2D SpecularTexture : register(t0); +Texture2D AlbedoTexture : register(t1); +Texture2D ReflectanceTexture : register(t2); +Texture2D NormalRoughnessTexture : register(t3); +Texture2D ShadowMaskTexture : register(t4); +Texture2D DepthTexture : register(t5); +Texture2D MasksTexture : register(t6); +Texture2D GITexture : register(t7); + +RWTexture2D MainRW : register(u0); +RWTexture2D NormalTAAMaskSpecularMaskRW : register(u1); + +SamplerState LinearSampler : register(s0); + +// # define DEBUG + +half GetScreenDepth(half depth) +{ + return (CameraData.w / (-depth * CameraData.z + CameraData.x)); +} + +[numthreads(32, 32, 1)] void DirectionalPass(uint3 globalId + : SV_DispatchThreadID, uint3 localId + : SV_GroupThreadID, uint3 groupId + : SV_GroupID) { + half2 uv = half2(globalId.xy + 0.5) * RcpBufferDim.xy; + uint eyeIndex = GetEyeIndexFromTexCoord(uv); + + half3 normalGlossiness = NormalRoughnessTexture[globalId.xy]; + half3 normalVS = DecodeNormal(normalGlossiness.xy); + + half rawDepth = DepthTexture[globalId.xy]; + half depth = GetScreenDepth(rawDepth); + + half weight = 1.0; + + half NdotL = dot(normalVS, DirLightDirectionVS[eyeIndex].xyz); + + half3 albedo = AlbedoTexture[globalId.xy]; + half3 masks = MasksTexture[globalId.xy]; + + half3 color = MainRW[globalId.xy].rgb; + color += albedo * lerp(max(0, NdotL), 1.0, masks.z) * DirLightColor.xyz; + + MainRW[globalId.xy] = half4(color.xyz, 1.0); +}; + +[numthreads(32, 32, 1)] void DirectionalShadowPass(uint3 globalId + : SV_DispatchThreadID, uint3 localId + : SV_GroupThreadID, uint3 groupId + : SV_GroupID) { + half2 uv = half2(globalId.xy + 0.5) * RcpBufferDim.xy; + uint eyeIndex = GetEyeIndexFromTexCoord(uv); + + half3 normalGlossiness = NormalRoughnessTexture[globalId.xy]; + half3 normalVS = DecodeNormal(normalGlossiness.xy); + + half rawDepth = DepthTexture[globalId.xy]; + half depth = GetScreenDepth(rawDepth); + + half weight = 1.0; + + half NdotL = dot(normalVS, DirLightDirectionVS[eyeIndex].xyz); + half shadow = 1.0; + + half3 albedo = AlbedoTexture[globalId.xy]; + half3 masks = MasksTexture[globalId.xy]; + + if (NdotL > 0.0 || masks.z > 0.0) { + shadow = ShadowMaskTexture[globalId.xy]; + + if (shadow != 0) { + // Approximation of PCF in screen-space + for (int i = -1; i < 1; i++) { + for (int k = -1; k < 1; k++) { + if (i == 0 && k == 0) + continue; + float2 offset = float2(i, k) * RcpBufferDim.xy * 1.5; + float sampleDepth = GetScreenDepth(DepthTexture.SampleLevel(LinearSampler, uv + offset, 0)); + float attenuation = 1.0 - saturate(abs(sampleDepth - depth)); + shadow += ShadowMaskTexture.SampleLevel(LinearSampler, uv + offset, 0) * attenuation; + weight += attenuation; + } + } + shadow /= weight; + } + } + + half3 color = MainRW[globalId.xy].rgb; + + color += albedo * lerp(max(0, NdotL), 1.0, masks.z) * DirLightColor.xyz * shadow; + + MainRW[globalId.xy] = half4(color.xyz, 1.0); +}; + +[numthreads(32, 32, 1)] void AmbientCompositePass(uint3 globalId + : SV_DispatchThreadID, uint3 localId + : SV_GroupThreadID, uint3 groupId + : SV_GroupID) { + half2 uv = half2(globalId.xy + 0.5) * RcpBufferDim.xy; + uint eyeIndex = GetEyeIndexFromTexCoord(uv); + + half3 normalGlossiness = NormalRoughnessTexture[globalId.xy]; + half3 normalVS = DecodeNormal(normalGlossiness.xy); + + half4 diffuseColor = MainRW[globalId.xy]; + + half3 normalWS = normalize(mul(InvViewMatrix[eyeIndex], half4(normalVS, 0))); + + half3 albedo = AlbedoTexture[globalId.xy]; + + half4 giAo = GITexture[globalId.xy]; + half3 gi = giAo.rgb; + half ao = giAo.w; + + half3 masks = MasksTexture[globalId.xy]; + + half3 directionalAmbientColor = mul(DirectionalAmbient, half4(normalWS, 1.0)); + diffuseColor.rgb += albedo * directionalAmbientColor * ao + gi; + + MainRW[globalId.xy] = half4(diffuseColor.xyz, 1.0); +}; + +[numthreads(32, 32, 1)] void MainCompositePass(uint3 globalId + : SV_DispatchThreadID, uint3 localId + : SV_GroupThreadID, uint3 groupId + : SV_GroupID) { + half2 uv = half2(globalId.xy + 0.5) * RcpBufferDim.xy; + uint eyeIndex = GetEyeIndexFromTexCoord(uv); + + half3 normalGlossiness = NormalRoughnessTexture[globalId.xy]; + half3 normalVS = DecodeNormal(normalGlossiness.xy); + + half3 diffuseColor = MainRW[globalId.xy]; + half3 specularColor = SpecularTexture[globalId.xy]; + half3 albedo = AlbedoTexture[globalId.xy]; + half3 masks = MasksTexture[globalId.xy]; + + half3 normalWS = normalize(mul(InvViewMatrix[eyeIndex], half4(normalVS, 0))); + + half glossiness = normalGlossiness.z; + half3 color = diffuseColor + specularColor; + +#if defined(DEBUG) + half2 texCoord = half2(globalId.xy) / BufferDim.xy; + + if (texCoord.x < 0.5 && texCoord.y < 0.5) { + color = color; + } else if (texCoord.x < 0.5) { + color = albedo; + } else if (texCoord.y < 0.5) { + color = normalWS; + } else { + color = glossiness; + } +#endif + + MainRW[globalId.xy] = half4(color.xyz, 1.0); + NormalTAAMaskSpecularMaskRW[globalId.xy] = half4(EncodeNormalVanilla(normalVS), 0.0, glossiness); +} \ No newline at end of file diff --git a/package/Shaders/DistantTree.hlsl b/package/Shaders/DistantTree.hlsl index ec8351556..629496e47 100644 --- a/package/Shaders/DistantTree.hlsl +++ b/package/Shaders/DistantTree.hlsl @@ -1,3 +1,9 @@ +#include "Common/Constants.hlsli" +#include "Common/FrameBuffer.hlsl" +#include "Common/GBuffer.hlsli" +#include "Common/MotionBlur.hlsl" +#include "Common/VR.hlsli" + struct VS_INPUT { float3 Position : POSITION0; @@ -6,6 +12,9 @@ struct VS_INPUT float4 InstanceData2 : TEXCOORD5; float4 InstanceData3 : TEXCOORD6; float4 InstanceData4 : TEXCOORD7; +#if defined(VR) + uint InstanceID : SV_INSTANCEID; +#endif // VR }; struct VS_OUTPUT @@ -18,7 +27,14 @@ struct VS_OUTPUT #else float4 WorldPosition : POSITION1; float4 PreviousWorldPosition : POSITION2; -#endif +#endif // RENDER_DEPTH + float4 ViewPosition : POSITION3; + +#if defined(VR) + float ClipDistance : SV_ClipDistance0; // o11 + float CullDistance : SV_CullDistance0; // p11 + uint EyeIndex : EYEIDX0; +#endif // VR }; #ifdef VSHADER @@ -29,14 +45,25 @@ cbuffer PerTechnique : register(b0) cbuffer PerGeometry : register(b2) { - row_major float4x4 WorldViewProj : packoffset(c0); - row_major float4x4 World : packoffset(c4); - row_major float4x4 PreviousWorld : packoffset(c8); +# if !defined(VR) + row_major float4x4 WorldViewProj[1] : packoffset(c0); + row_major float4x4 World[1] : packoffset(c4); + row_major float4x4 PreviousWorld[1] : packoffset(c8); +# else + row_major float4x4 WorldViewProj[2] : packoffset(c0); + row_major float4x4 World[2] : packoffset(c8); + row_major float4x4 PreviousWorld[2] : packoffset(c16); +# endif // !VR }; VS_OUTPUT main(VS_INPUT input) { VS_OUTPUT vsout; + uint eyeIndex = GetEyeIndexVS( +# if defined(VR) + input.InstanceID +# endif // VR + ); float3 scaledModelPosition = input.InstanceData1.www * input.Position.xyz; float3 adjustedModelPosition = 0.0.xxx; @@ -44,33 +71,46 @@ VS_OUTPUT main(VS_INPUT input) adjustedModelPosition.y = dot(input.InstanceData2.yx, scaledModelPosition.xy); adjustedModelPosition.z = scaledModelPosition.z; float4 finalModelPosition = float4(input.InstanceData1.xyz + adjustedModelPosition.xyz, 1.0); - float4 viewPosition = mul(WorldViewProj, finalModelPosition); + float4 viewPosition = mul(WorldViewProj[eyeIndex], finalModelPosition); # ifdef RENDER_DEPTH vsout.Depth.xy = viewPosition.zw; vsout.Depth.zw = input.InstanceData2.zw; # else - vsout.WorldPosition = mul(World, finalModelPosition); - vsout.PreviousWorldPosition = mul(PreviousWorld, finalModelPosition); -# endif + vsout.WorldPosition = mul(World[eyeIndex], finalModelPosition); + vsout.PreviousWorldPosition = mul(PreviousWorld[eyeIndex], finalModelPosition); + vsout.ViewPosition = viewPosition; +# endif // RENDER_DEPTH vsout.Position = viewPosition; vsout.TexCoord = float3(input.TexCoord0.xy, FogParam.z); +# ifdef VR + vsout.EyeIndex = eyeIndex; + VR_OUTPUT VRout = GetVRVSOutput(vsout.Position, eyeIndex); + vsout.Position = VRout.VRPosition; + vsout.ClipDistance.x = VRout.ClipDistance; + vsout.CullDistance.x = VRout.CullDistance; +# endif // VR + return vsout; } -#endif +#endif // VSHADER typedef VS_OUTPUT PS_INPUT; struct PS_OUTPUT { - float4 Albedo : SV_Target0; + float4 Diffuse : SV_Target0; #if !defined(RENDER_DEPTH) +# if defined(DEFERRED) float2 MotionVector : SV_Target1; float4 Normal : SV_Target2; -#endif + float4 Albedo : SV_Target3; + float4 Masks : SV_Target6; +# endif // DEFERRED +#endif // !RENDER_DEPTH }; #ifdef PSHADER @@ -78,10 +118,12 @@ SamplerState SampDiffuse : register(s0); Texture2D TexDiffuse : register(t0); +# if !defined(VR) cbuffer AlphaTestRefCB : register(b11) { float AlphaTestRefRS : packoffset(c0); } +# endif // !VR cbuffer PerFrame : register(b12) { @@ -119,6 +161,12 @@ PS_OUTPUT main(PS_INPUT input) { PS_OUTPUT psout; +# if !defined(VR) + uint eyeIndex = 0; +# else + uint eyeIndex = input.EyeIndex; +# endif // !VR + # if defined(RENDER_DEPTH) uint2 temp = uint2(input.Position.xy); uint index = ((temp.x << 2) & 12) | (temp.y & 3); @@ -136,8 +184,8 @@ PS_OUTPUT main(PS_INPUT input) discard; } - psout.Albedo.xyz = input.Depth.xxx / input.Depth.yyy; - psout.Albedo.w = 0; + psout.Diffuse.xyz = input.Depth.xxx / input.Depth.yyy; + psout.Diffuse.w = 0; # else float4 baseColor = TexDiffuse.Sample(SampDiffuse, input.TexCoord.xy); @@ -145,21 +193,28 @@ PS_OUTPUT main(PS_INPUT input) if ((baseColor.w - AlphaTestRefRS) < 0) { discard; } -# endif +# endif // DO_ALPHA_TEST + +# if defined(DEFERRED) + psout.Diffuse.xyz = 0; + psout.Diffuse.w = 1; - psout.Albedo = float4((input.TexCoord.zzz * DiffuseColor.xyz + AmbientColor.xyz) * baseColor.xyz, 1.0); + psout.MotionVector = GetSSMotionVector(input.WorldPosition, input.PreviousWorldPosition, eyeIndex); - float4 screenPosition = mul(ScreenProj, input.WorldPosition); - screenPosition.xy = screenPosition.xy / screenPosition.ww; - float4 previousScreenPosition = mul(PreviousScreenProj, input.PreviousWorldPosition); - previousScreenPosition.xy = previousScreenPosition.xy / previousScreenPosition.ww; - float2 screenMotionVector = float2(-0.5, 0.5) * (screenPosition.xy - previousScreenPosition.xy); + float3 ddx = ddx_coarse(input.ViewPosition); + float3 ddy = ddy_coarse(input.ViewPosition); + float3 normal = normalize(cross(ddx, ddy)); - psout.MotionVector = screenMotionVector; + psout.Normal.xy = EncodeNormal(normal); + psout.Normal.zw = 0; - psout.Normal = float4(0.5, 0.5, 0, 0); -# endif + psout.Albedo = float4(baseColor.xyz * 0.5, 1); + psout.Masks = float4(0, 0, 1, 0); +# else + psout.Diffuse = float4((input.TexCoord.zzz * DiffuseColor.xyz + AmbientColor.xyz) * baseColor.xyz, 1.0); +# endif // DEFERRED +# endif // RENDER_DEPTH return psout; } -#endif +#endif // PSHADER diff --git a/package/Shaders/Effect.hlsl b/package/Shaders/Effect.hlsl index a347ed339..46eeb17b5 100644 --- a/package/Shaders/Effect.hlsl +++ b/package/Shaders/Effect.hlsl @@ -1,4 +1,11 @@ +#include "Common/Color.hlsl" +#include "Common/FrameBuffer.hlsl" +#include "Common/GBuffer.hlsli" +#include "Common/LightingData.hlsl" +#include "Common/MotionBlur.hlsl" +#include "Common/Permutation.hlsl" #include "Common/Skinned.hlsli" +#include "Common/VR.hlsli" #define EFFECT @@ -26,6 +33,9 @@ struct VS_INPUT float4 BoneWeights : BLENDWEIGHT0; float4 BoneIndices : BLENDINDICES0; #endif +#if defined(VR) + uint InstanceID : SV_INSTANCEID; +#endif // VR }; struct VS_OUTPUT @@ -71,19 +81,35 @@ struct VS_OUTPUT float3 ScreenSpaceNormal : TEXCOORD7; # endif #endif +#if defined(VR) + float ClipDistance : SV_ClipDistance0; // o11 + float CullDistance : SV_CullDistance0; // p11 + uint EyeIndex : EYEIDX0; +#endif // VR }; #ifdef VSHADER -cbuffer PerFrame : register(b12) +cbuffer VS_PerFrame : register(b12) { - row_major float4x3 ScreenProj : packoffset(c0); - row_major float4x4 ViewProj : packoffset(c8); -# if defined(SKINNED) - float3 BonesPivot : packoffset(c40); -# if defined(MOTIONVECTORS_NORMALS) - float3 PreviousBonesPivot : packoffset(c41); -# endif -# endif +# if !defined(VR) + row_major float4x3 ScreenProj[1] : packoffset(c0); + row_major float4x4 ViewProj[1] : packoffset(c8); +# if defined(SKINNED) + float3 BonesPivot[1] : packoffset(c40); +# if defined(MOTIONVECTORS_NORMALS) + float3 PreviousBonesPivot[1] : packoffset(c41); +# endif // MOTIONVECTORS_NORMALS +# endif // SKINNED +# else + row_major float4x3 ScreenProj[2] : packoffset(c0); + row_major float4x4 ViewProj[2] : packoffset(c16); +# if defined(SKINNED) + float3 BonesPivot[2] : packoffset(c80); +# if defined(MOTIONVECTORS_NORMALS) + float3 PreviousBonesPivot[2] : packoffset(c82); +# endif // MOTIONVECTORS_NORMALS +# endif // SKINNED +# endif // VR }; cbuffer PerTechnique : register(b0) @@ -102,12 +128,21 @@ cbuffer PerMaterial : register(b1) cbuffer PerGeometry : register(b2) { - row_major float3x4 World : packoffset(c0); - row_major float3x4 PreviousWorld : packoffset(c3); +# if !defined(VR) + row_major float3x4 World[1] : packoffset(c0); + row_major float3x4 PreviousWorld[1] : packoffset(c3); float4 MatProj[3] : packoffset(c6); - float4 EyePosition : packoffset(c12); - float4 PosAdjust : packoffset(c13); + float4 EyePosition[1] : packoffset(c12); + float4 PosAdjust[1] : packoffset(c13); float4 TexcoordOffsetMembrane : packoffset(c14); +# else + row_major float3x4 World[2] : packoffset(c0); + row_major float3x4 PreviousWorld[2] : packoffset(c6); + float4 MatProj[3] : packoffset(c12); + float4 EyePosition[2] : packoffset(c21); + float4 PosAdjust[2] : packoffset(c23); + float4 TexcoordOffsetMembrane : packoffset(c25); +# endif // VR } cbuffer IndexedTexcoordBuffer : register(b11) @@ -115,9 +150,6 @@ cbuffer IndexedTexcoordBuffer : register(b11) float4 IndexedTexCoord[128] : packoffset(c0); } -# define M_HALFPI 1.57079637; -# define M_PI 3.141593 - # if defined(PROJECTED_UV) float GetProjectedU(float3 worldPosition, float4 texCoordOffset) { @@ -152,43 +184,47 @@ float GetProjectedU(float3 worldPosition, float4 texCoordOffset) return abs(0.318309158 * projUvTmp4) * texCoordOffset.w + texCoordOffset.y; } -float GetProjectedV(float3 worldPosition) +float GetProjectedV(float3 worldPosition, uint a_eyeIndex = 0) { - return (-PosAdjust.x + (PosAdjust.z + worldPosition.z)) / PosAdjust.y; + return (-PosAdjust[a_eyeIndex].x + (PosAdjust[a_eyeIndex].z + worldPosition.z)) / PosAdjust[a_eyeIndex].y; } # endif VS_OUTPUT main(VS_INPUT input) { VS_OUTPUT vsout; - + uint eyeIndex = GetEyeIndexVS( +# if defined(VR) + input.InstanceID +# endif // VR + ); precise float4 inputPosition = float4(input.Position.xyz, 1.0); - precise row_major float4x4 world4x4 = float4x4(World[0], World[1], World[2], float4(0, 0, 0, 1)); + precise row_major float4x4 world4x4 = float4x4(World[eyeIndex][0], World[eyeIndex][1], World[eyeIndex][2], float4(0, 0, 0, 1)); precise float3x3 world3x3 = - transpose(float3x3(transpose(World)[0], transpose(World)[1], transpose(World)[2])); + transpose(float3x3(transpose(World[eyeIndex])[0], transpose(World[eyeIndex])[1], transpose(World[eyeIndex])[2])); # if defined(SKY_OBJECT) - float4x4 viewProj = float4x4(ViewProj[0], ViewProj[1], ViewProj[3], ViewProj[3]); + float4x4 viewProj = float4x4(ViewProj[eyeIndex][0], ViewProj[eyeIndex][1], ViewProj[eyeIndex][3], ViewProj[eyeIndex][3]); # else - row_major float4x4 viewProj = ViewProj; + row_major float4x4 viewProj = ViewProj[eyeIndex]; # endif # if defined(SKINNED) precise int4 actualIndices = 765.01.xxxx * input.BoneIndices.xyzw; # if defined(MOTIONVECTORS_NORMALS) float3x4 previousBoneTransformMatrix = - GetBoneTransformMatrix(PreviousBones, actualIndices, PreviousBonesPivot, input.BoneWeights); + GetBoneTransformMatrix(PreviousBones, actualIndices, PreviousBonesPivot[eyeIndex], input.BoneWeights); precise float4 previousWorldPosition = float4(mul(inputPosition, transpose(previousBoneTransformMatrix)), 1); # endif float3x4 boneTransformMatrix = - GetBoneTransformMatrix(Bones, actualIndices, BonesPivot, input.BoneWeights); + GetBoneTransformMatrix(Bones, actualIndices, BonesPivot[eyeIndex], input.BoneWeights); precise float4 worldPosition = float4(mul(inputPosition, transpose(boneTransformMatrix)), 1); float4 viewPos = mul(viewProj, worldPosition); # else - precise float4 worldPosition = float4(mul(World, inputPosition), 1); - precise float4 previousWorldPosition = float4(mul(PreviousWorld, inputPosition), 1); + precise float4 worldPosition = float4(mul(World[eyeIndex], inputPosition), 1); + precise float4 previousWorldPosition = float4(mul(PreviousWorld[eyeIndex], inputPosition), 1); precise row_major float4x4 modelView = mul(viewProj, world4x4); float4 viewPos = mul(modelView, inputPosition); # endif @@ -257,7 +293,7 @@ VS_OUTPUT main(VS_INPUT input) # if defined(NORMALS) && !defined(MEMBRANE) texCoord.y = dot(MatProj[1].xyz, inputPosition.xyz); # else - texCoord.y = GetProjectedV(worldPosition.xyz); + texCoord.y = GetProjectedV(worldPosition.xyz, eyeIndex); # endif # else # if defined(TEXTURE) @@ -290,7 +326,7 @@ VS_OUTPUT main(VS_INPUT input) float3 eyePosition = 0.0.xxx; # if defined(MEMBRANE) && defined(TEXTURE) && !defined(SKINNED) - eyePosition = EyePosition.xyz; + eyePosition = EyePosition[eyeIndex].xyz; # endif float3 viewPosition = inputPosition.xyz; @@ -333,7 +369,7 @@ VS_OUTPUT main(VS_INPUT input) # elif defined(FALLOFF) || (defined(SKINNED) && defined(MEMBRANE)) float3 screenSpaceNormal = worldNormal; # else - float4x4 modelScreen = mul(ScreenProj, world4x4); + float4x4 modelScreen = mul(ScreenProj[eyeIndex], world4x4); float3 screenSpaceNormal = normalize(mul(modelScreen, float4(normal, 0))).xyz; # endif @@ -355,6 +391,13 @@ VS_OUTPUT main(VS_INPUT input) vsout.PreviousWorldPosition = previousWorldPosition; # endif +# ifdef VR + vsout.EyeIndex = eyeIndex; + VR_OUTPUT VRout = GetVRVSOutput(vsout.Position, eyeIndex); + vsout.Position = VRout.VRPosition; + vsout.ClipDistance.x = VRout.ClipDistance; + vsout.CullDistance.x = VRout.CullDistance; +# endif // VR return vsout; } #endif @@ -372,30 +415,43 @@ Texture2D TexNoiseSampler : register(t2); Texture2D TexDepthSamplerEffect : register(t3); Texture2D TexGrayscaleSampler : register(t4); +#if defined(DEFERRED) struct PS_OUTPUT { - float4 Color : SV_Target0; -#if defined(MOTIONVECTORS_NORMALS) + float4 Diffuse : SV_Target0; +# if defined(MOTIONVECTORS_NORMALS) + float4 MotionVectors : SV_Target1; + float4 NormalGlossiness : SV_Target2; +# elif defined(NORMALS) + float4 NormalGlossiness : SV_Target2; +# endif + float4 Albedo : SV_Target3; + float4 Specular : SV_Target4; + float4 Reflectance : SV_Target5; + float4 Masks : SV_Target6; +}; +#else +struct PS_OUTPUT +{ + float4 Diffuse : SV_Target0; +# if defined(MOTIONVECTORS_NORMALS) float2 MotionVectors : SV_Target1; float4 ScreenSpaceNormals : SV_Target2; -#else +# else float4 Normal : SV_Target1; float4 Color2 : SV_Target2; -#endif +# endif }; +#endif #ifdef PSHADER -# include "Common/Color.hlsl" -# include "Common/FrameBuffer.hlsl" -# include "Common/MotionBlur.hlsl" -# include "Common/Permutation.hlsl" -# include "Common/LightingData.hlsl" - -cbuffer AlphaTestRefBuffer : register(b11) +# if !defined(VR) +cbuffer AlphaTestRefCB : register(b11) { - float AlphaTestRef1 : packoffset(c0); + float AlphaTestRefRS : packoffset(c0); } +# endif // !VR cbuffer PerTechnique : register(b0) { @@ -413,9 +469,10 @@ cbuffer PerMaterial : register(b1) cbuffer PerGeometry : register(b2) { - float4 PLightPositionX : packoffset(c0); - float4 PLightPositionY : packoffset(c1); - float4 PLightPositionZ : packoffset(c2); +# if !defined(VR) + float4 PLightPositionX[1] : packoffset(c0); + float4 PLightPositionY[1] : packoffset(c1); + float4 PLightPositionZ[1] : packoffset(c2); float4 PLightingRadiusInverseSquared : packoffset(c3); float4 PLightColorR : packoffset(c4); float4 PLightColorG : packoffset(c5); @@ -425,6 +482,20 @@ cbuffer PerGeometry : register(b2) float4 AlphaTestRef : packoffset(c9); float4 MembraneRimColor : packoffset(c10); float4 MembraneVars : packoffset(c11); +# else + float4 PLightPositionX[2] : packoffset(c0); + float4 PLightPositionY[2] : packoffset(c2); + float4 PLightPositionZ[2] : packoffset(c4); + float4 PLightingRadiusInverseSquared : packoffset(c6); + float4 PLightColorR : packoffset(c7); + float4 PLightColorG : packoffset(c8); + float4 PLightColorB : packoffset(c9); + float4 DLightColor : packoffset(c10); + float4 PropertyColor : packoffset(c11); // VR should be 11; this could start earlier though + float4 AlphaTestRef : packoffset(c12); + float4 MembraneRimColor : packoffset(c13); + float4 MembraneVars : packoffset(c14); +# endif }; # if defined(MEMBRANE) || !defined(LIGHTING) @@ -436,9 +507,9 @@ cbuffer PerGeometry : register(b2) # endif # if defined(LIGHTING) -float3 GetLightingColor(float3 msPosition) +float3 GetLightingColor(float3 msPosition, uint a_eyeIndex = 0) { - float4 lightDistanceSquared = (PLightPositionX - msPosition.xxxx) * (PLightPositionX - msPosition.xxxx) + (PLightPositionY - msPosition.yyyy) * (PLightPositionY - msPosition.yyyy) + (PLightPositionZ - msPosition.zzzz) * (PLightPositionZ - msPosition.zzzz); + float4 lightDistanceSquared = (PLightPositionX[a_eyeIndex] - msPosition.xxxx) * (PLightPositionX[a_eyeIndex] - msPosition.xxxx) + (PLightPositionY[a_eyeIndex] - msPosition.yyyy) * (PLightPositionY[a_eyeIndex] - msPosition.yyyy) + (PLightPositionZ[a_eyeIndex] - msPosition.zzzz) * (PLightPositionZ[a_eyeIndex] - msPosition.zzzz); float4 lightFadeMul = 1.0.xxxx - saturate(PLightingRadiusInverseSquared * lightDistanceSquared); float3 color = DLightColor.xyz; @@ -454,6 +525,12 @@ PS_OUTPUT main(PS_INPUT input) { PS_OUTPUT psout; +# if !defined(VR) + uint eyeIndex = 0; +# else + uint eyeIndex = input.EyeIndex; +# endif // !VR + float4 fogMul = 1; # if !defined(MULTBLEND) fogMul.xyz = input.FogAlpha; @@ -499,11 +576,11 @@ PS_OUTPUT main(PS_INPUT input) float lightingInfluence = LightingInfluence.x; float3 propertyColor = PropertyColor.xyz; + # if defined(LIGHTING) - propertyColor = GetLightingColor(input.MSPosition); + propertyColor = GetLightingColor(input.MSPosition, eyeIndex); # if defined(LIGHT_LIMIT_FIX) - uint eyeIndex = 0; uint lightCount = 0; if (LightingInfluence.x > 0.0) { float3 viewPosition = mul(CameraView[eyeIndex], float4(input.WorldPosition.xyz, 1)).xyz; @@ -605,7 +682,7 @@ PS_OUTPUT main(PS_INPUT input) float3 lightColor = lerp(baseColor.xyz, propertyColor * baseColor.xyz, lightingInfluence.xxx); # if !defined(MOTIONVECTORS_NORMALS) - if (alpha * fogMul.w - AlphaTestRef1 < 0) { + if (alpha * fogMul.w - AlphaTestRefRS < 0) { discard; } # endif @@ -628,22 +705,38 @@ PS_OUTPUT main(PS_INPUT input) # else finalColor *= fogMul; # endif - psout.Color = finalColor; + psout.Diffuse = finalColor; # if defined(LIGHT_LIMIT_FIX) && defined(LLFDEBUG) if (perPassLLF[0].EnableLightsVisualisation) { if (perPassLLF[0].LightsVisualisationMode == 0) { - psout.Color.xyz = TurboColormap(0.0); + psout.Diffuse.xyz = TurboColormap(0.0); } else if (perPassLLF[0].LightsVisualisationMode == 1) { - psout.Color.xyz = TurboColormap(0.0); + psout.Diffuse.xyz = TurboColormap(0.0); } else { - psout.Color.xyz = TurboColormap((float)lightCount / 128.0); + psout.Diffuse.xyz = TurboColormap((float)lightCount / 128.0); } } # endif -# if defined(MOTIONVECTORS_NORMALS) - float2 screenMotionVector = GetSSMotionVector(input.WorldPosition, input.PreviousWorldPosition, 0); +# if defined(DEFERRED) + +# if defined(MOTIONVECTORS_NORMALS) +# if (defined(MEMBRANE) && defined(SKINNED) && defined(NORMALS)) + float3 screenSpaceNormal = normalize(input.TBN0); +# else + float3 screenSpaceNormal = normalize(input.ScreenSpaceNormal); +# endif + psout.NormalGlossiness = float4(EncodeNormal(screenSpaceNormal), 0.0, psout.Diffuse.w); + float2 screenMotionVector = GetSSMotionVector(input.WorldPosition, input.PreviousWorldPosition, eyeIndex); + psout.MotionVectors = float4(screenMotionVector, 0.0, psout.Diffuse.w); +# endif + + psout.Specular = float4(0.0.xxx, psout.Diffuse.w); + psout.Albedo = float4(baseColor.xyz * psout.Diffuse.w, psout.Diffuse.w); + psout.Reflectance = float4(0.0.xxx, psout.Diffuse.w); +# elif defined(MOTIONVECTORS_NORMALS) + float2 screenMotionVector = GetSSMotionVector(input.WorldPosition, input.PreviousWorldPosition, eyeIndex); psout.MotionVectors = screenMotionVector; # if (defined(MEMBRANE) && defined(SKINNED) && defined(NORMALS)) diff --git a/package/Shaders/Lighting.hlsl b/package/Shaders/Lighting.hlsl index d6addde81..464986b39 100644 --- a/package/Shaders/Lighting.hlsl +++ b/package/Shaders/Lighting.hlsl @@ -1,5 +1,6 @@ #include "Common/Color.hlsl" #include "Common/FrameBuffer.hlsl" +#include "Common/GBuffer.hlsli" #include "Common/LightingData.hlsl" #include "Common/LodLandscape.hlsli" #include "Common/MotionBlur.hlsl" @@ -394,15 +395,31 @@ VS_OUTPUT main(VS_INPUT input) typedef VS_OUTPUT PS_INPUT; +#if defined(DEFERRED) struct PS_OUTPUT { - float4 Albedo : SV_Target0; + float4 Diffuse : SV_Target0; + float4 MotionVectors : SV_Target1; + float4 NormalGlossiness : SV_Target2; + float4 Albedo : SV_Target3; + float4 Specular : SV_Target4; + float4 Reflectance : SV_Target5; + float4 Masks : SV_Target6; +# if defined(SNOW) + float4 SnowParameters : SV_Target7; +# endif +}; +#else +struct PS_OUTPUT +{ + float4 Diffuse : SV_Target0; float4 MotionVectors : SV_Target1; float4 ScreenSpaceNormals : SV_Target2; -#if defined(SNOW) +# if defined(SNOW) float4 SnowParameters : SV_Target3; -#endif +# endif }; +#endif #ifdef PSHADER @@ -901,10 +918,6 @@ float GetSnowParameterY(float texProjTmp, float alpha) # include "WetnessEffects/WetnessEffects.hlsli" # endif -# if defined(CLOUD_SHADOWS) -# include "CloudShadows/CloudShadows.hlsli" -# endif - # if !defined(LANDSCAPE) # undef TERRAIN_BLENDING # endif @@ -927,6 +940,13 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace uint eyeIndex = GetEyeIndexPS(input.Position, VPOSOffset); # if defined(SKINNED) || !defined(MODELSPACENORMALS) float3x3 tbn = float3x3(input.TBN0.xyz, input.TBN1.xyz, input.TBN2.xyz); + +# if !defined(TREE_ANIM) + // Fix incorrect vertex normals on double-sided meshes + if (!frontFace) + tbn = -tbn; +# endif + float3x3 tbnTr = transpose(tbn); # endif // defined (SKINNED) || !defined (MODELSPACENORMALS) @@ -1377,26 +1397,12 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace normalizedDirLightDirectionWS = normalize(mul(input.World[eyeIndex], float4(DirLightDirection.xyz, 0))); # endif -# if defined(CLOUD_SHADOWS) - float3 cloudShadowMult = 1.0; - if (perPassCloudShadow[0].EnableCloudShadows) { - cloudShadowMult = getCloudShadowMult(input.WorldPosition.xyz, normalizedDirLightDirectionWS, SampColorSampler); - dirLightColor *= cloudShadowMult; - } -# endif - float3 nsDirLightColor = dirLightColor; if ((shaderDescriptors[0].PixelShaderDescriptor & _DefShadow) && (shaderDescriptors[0].PixelShaderDescriptor & _ShadowDir)) dirLightColor *= shadowColor.xxx; -# if defined(SCREEN_SPACE_SHADOWS) - float dirLightSShadow = PrepassScreenSpaceShadows(input.WorldPosition.xyz, eyeIndex); - dirLightSShadow = lerp(dirLightSShadow, 1.0, !frontFace * 0.2); - dirLightColor *= dirLightSShadow; -# endif // SCREEN_SPACE_SHADOWS - -# if defined(CPM_AVAILABLE) && (defined(SKINNED) || !defined(MODELSPACENORMALS)) +# if !defined(DEFERRED) && defined(CPM_AVAILABLE) && (defined(SKINNED) || !defined(MODELSPACENORMALS)) float3 dirLightDirectionTS = mul(DirLightDirection, tbn).xyz; bool dirLightIsLit = true; @@ -1430,7 +1436,12 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace float3 lightsSpecularColor = 0.0.xxx; float dirLightAngle = dot(modelNormal.xyz, DirLightDirection.xyz); + +# if defined(DEFERRED) + float3 dirDiffuseColor = 0.0; +# else float3 dirDiffuseColor = dirLightColor * saturate(dirLightAngle.xxx); +# endif # if defined(SOFT_LIGHTING) lightsDiffuseColor += nsDirLightColor.xyz * GetSoftLightMultiplier(dirLightAngle) * rimSoftLightColor.xyz; @@ -1755,12 +1766,12 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace # endif float3 directionalAmbientColor = mul(DirectionalAmbient, modelNormal); -# if defined(CLOUD_SHADOWS) - if (perPassCloudShadow[0].EnableCloudShadows) - directionalAmbientColor *= lerp(1.0, cloudShadowMult, perPassCloudShadow[0].AbsorptionAmbient); + +# if !defined(DEFERRED) + diffuseColor += directionalAmbientColor; # endif - diffuseColor = directionalAmbientColor + emitColor.xyz + diffuseColor; + diffuseColor += emitColor.xyz; # if defined(ENVMAP) || defined(MULTI_LAYER_PARALLAX) || defined(EYE) float envMaskColor = TexEnvMaskSampler.Sample(SampEnvMaskSampler, uv).x; @@ -1833,10 +1844,13 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace color.xyz = diffuseColor * baseColor.xyz; # if defined(HAIR) - float3 vertexColor = (input.Color.yyy * (TintColor.xyz - 1.0.xxx) + 1.0.xxx) * color.xyz; + float3 vertexColor = (input.Color.yyy * (TintColor.xyz - 1.0.xxx) + 1.0.xxx); # else - float3 vertexColor = input.Color.xyz * color.xyz; + float3 vertexColor = input.Color.xyz; # endif // defined (HAIR) + float3 realVertexColor = vertexColor; + + vertexColor *= color.xyz; # if defined(MULTI_LAYER_PARALLAX) float layerValue = MultiLayerParallaxData.x * TexLayerSampler.Sample(SampLayerSampler, uv).w; @@ -1848,8 +1862,14 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace float3 layerColor = TexLayerSampler.Sample(SampLayerSampler, layerUv).xyz; - vertexColor = (saturate(viewNormalAngle) * (1 - baseColor.w)).xxx * ((directionalAmbientColor + lightsDiffuseColor) * (input.Color.xyz * layerColor) - vertexColor) + vertexColor; + float mlpBlendFactor = saturate(viewNormalAngle) * (1.0 - baseColor.w); +# if defined(DEFERRED) + vertexColor = lerp(vertexColor, (directionalAmbientColor + lightsDiffuseColor + (dirLightColor * saturate(dirLightAngle.xxx))) * (input.Color.xyz * layerColor), mlpBlendFactor); + baseColor.xyz *= 1.0 - mlpBlendFactor; +# else + vertexColor = lerp(vertexColor, (directionalAmbientColor + lightsDiffuseColor) * (input.Color.xyz * layerColor), mlpBlendFactor); +# endif # endif // MULTI_LAYER_PARALLAX # if defined(SPECULAR) @@ -1867,26 +1887,31 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace specularColor = 0; # endif +# if defined(DEFERRED) + diffuseColor += dirLightColor * saturate(dirLightAngle); + diffuseColor += directionalAmbientColor; +# endif + # if (defined(ENVMAP) || defined(MULTI_LAYER_PARALLAX) || defined(EYE)) # if defined(DYNAMIC_CUBEMAPS) if (dynamicCubemap) { diffuseColor = 1.0; - vertexColor = sRGB2Lin(vertexColor); + specularColor = sRGB2Lin(specularColor); } # endif # if defined(CPM_AVAILABLE) && defined(ENVMAP) # if defined(DYNAMIC_CUBEMAPS) - vertexColor += envColor * lerp(complexSpecular, 1.0, dynamicCubemap) * diffuseColor; + specularColor += envColor * lerp(complexSpecular, 1.0, dynamicCubemap) * diffuseColor; # else - vertexColor += envColor * complexSpecular * diffuseColor; + specularColor += envColor * complexSpecular * diffuseColor; # endif # else - vertexColor += envColor * diffuseColor; + specularColor += envColor * diffuseColor; # endif # if defined(DYNAMIC_CUBEMAPS) if (dynamicCubemap) - vertexColor = Lin2sRGB(vertexColor); + specularColor = Lin2sRGB(specularColor); # endif # endif @@ -1897,7 +1922,13 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace color.xyz = tmpColor.xyz + ColourOutputClamp.xxx; color.xyz = min(vertexColor.xyz, color.xyz); +# if defined(CPM_AVAILABLE) && defined(ENVMAP) + specularColor *= complexSpecular; +# endif + +# if !defined(DEFERRED) color.xyz += specularColor; +# endif // defined (CPM_AVAILABLE) && defined(ENVMAP) color.xyz = sRGB2Lin(color.xyz); @@ -1930,7 +1961,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace # endif # if defined(LANDSCAPE) && !defined(LOD_LAND_BLEND) - psout.Albedo.w = 0; + psout.Diffuse.w = 0; # else float alpha = baseColor.w; # if !defined(ADDITIONAL_ALPHA_MASK) @@ -1977,23 +2008,23 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace discard; } # endif // DO_ALPHA_TEST - psout.Albedo.w = alpha; + psout.Diffuse.w = alpha; # endif # if defined(LIGHT_LIMIT_FIX) && defined(LLFDEBUG) if (perPassLLF[0].EnableLightsVisualisation) { if (perPassLLF[0].LightsVisualisationMode == 0) { - psout.Albedo.xyz = TurboColormap(strictLightData[0].NumStrictLights >= 7.0); + psout.Diffuse.xyz = TurboColormap(strictLightData[0].NumStrictLights >= 7.0); } else if (perPassLLF[0].LightsVisualisationMode == 1) { - psout.Albedo.xyz = TurboColormap((float)strictLightData[0].NumStrictLights / 15.0); + psout.Diffuse.xyz = TurboColormap((float)strictLightData[0].NumStrictLights / 15.0); } else { - psout.Albedo.xyz = TurboColormap((float)numClusteredLights / 128.0); + psout.Diffuse.xyz = TurboColormap((float)numClusteredLights / 128.0); } } else { - psout.Albedo.xyz = color.xyz - tmpColor.xyz * FrameParams.zzz; + psout.Diffuse.xyz = color.xyz - tmpColor.xyz * FrameParams.zzz; } # else - psout.Albedo.xyz = color.xyz - tmpColor.xyz * FrameParams.zzz; + psout.Diffuse.xyz = color.xyz - tmpColor.xyz * FrameParams.zzz; # endif // defined(LIGHT_LIMIT_FIX) # if defined(SNOW) @@ -2003,6 +2034,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace psout.MotionVectors.xy = SSRParams.z > 1e-5 ? float2(1, 0) : screenMotionVector.xy; psout.MotionVectors.zw = float2(0, 1); +# if !defined(DEFERRED) float tmp = -1e-5 + SSRParams.x; float tmp3 = (SSRParams.y - tmp); float tmp2 = (glossiness - tmp); @@ -2011,7 +2043,7 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace tmp *= tmp * (3 + -2 * tmp); psout.ScreenSpaceNormals.w = tmp * SSRParams.w; -# if defined(WATER_BLENDING) +# if defined(WATER_BLENDING) if (perPassWaterBlending[0].EnableWaterBlendingSSR) { // Compute distance to water surface float distToWater = max(0, input.WorldPosition.z - waterHeight); @@ -2019,17 +2051,17 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace // Reduce SSR amount psout.ScreenSpaceNormals.w *= blendFactor; } -# endif // WATER_BLENDING +# endif // WATER_BLENDING -# if (defined(ENVMAP) || defined(MULTI_LAYER_PARALLAX) || defined(EYE)) -# if defined(DYNAMIC_CUBEMAPS) +# if (defined(ENVMAP) || defined(MULTI_LAYER_PARALLAX) || defined(EYE)) +# if defined(DYNAMIC_CUBEMAPS) psout.ScreenSpaceNormals.w = saturate(sqrt(envMask)); +# endif # endif -# endif -# if defined(WETNESS_EFFECTS) +# if defined(WETNESS_EFFECTS) psout.ScreenSpaceNormals.w = max(psout.ScreenSpaceNormals.w, flatnessAmount); -# endif +# endif // Green reflections fix if (FrameParams.z) @@ -2040,9 +2072,9 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace psout.ScreenSpaceNormals.xy = screenSpaceNormal.xy + 0.5.xx; psout.ScreenSpaceNormals.z = 0; -# if defined(TERRAIN_BLENDING) +# if defined(TERRAIN_BLENDING) // Pixel Depth Offset -# if defined(COMPLEX_PARALLAX_MATERIALS) +# if defined(COMPLEX_PARALLAX_MATERIALS) if (perPassParallax[0].EnableTerrainParallax) { float height = 0; if (input.LandBlendWeights1.x > 0) @@ -2070,24 +2102,37 @@ PS_OUTPUT main(PS_INPUT input, bool frontFace clip(blendFactorTerrain); blendFactorTerrain = saturate(blendFactorTerrain); } -# endif +# endif - psout.Albedo.w = blendFactorTerrain; + psout.Diffuse.w = blendFactorTerrain; -# if defined(SNOW) +# if defined(SNOW) psout.SnowParameters.w = blendFactorTerrain; +# endif # endif -# endif - -# if defined(OUTLINE) - psout.Albedo = float4(1, 0, 0, 1); -# endif // OUTLINE -# if defined(SSS) && defined(SKIN) +# if defined(SSS) && defined(SKIN) if (perPassSSS[0].ValidMaterial) { float sssAmount = saturate(baseColor.a) * 0.5; psout.ScreenSpaceNormals.z = perPassSSS[0].IsBeastRace ? sssAmount : sssAmount + 0.5; } +# endif +# else + + psout.MotionVectors.zw = float2(0.0, psout.Diffuse.w); + psout.Specular = float4(specularColor.xyz, psout.Diffuse.w); + psout.Albedo = float4(baseColor.xyz * realVertexColor, psout.Diffuse.w); + psout.Reflectance = float4(0.0.xxx, psout.Diffuse.w); + psout.Masks = float4(0, 0, 0, psout.Diffuse.w); + + float outGlossiness = saturate(glossiness * SSRParams.w); + + psout.NormalGlossiness = float4(EncodeNormal(screenSpaceNormal), outGlossiness, psout.Diffuse.w); + +# if defined(SSS) && defined(SKIN) + psout.Masks.x = saturate(baseColor.a); + psout.Masks.y = !perPassSSS[0].IsBeastRace; +# endif # endif return psout; diff --git a/package/Shaders/RunGrass.hlsl b/package/Shaders/RunGrass.hlsl index 81e1dd92d..613855fee 100644 --- a/package/Shaders/RunGrass.hlsl +++ b/package/Shaders/RunGrass.hlsl @@ -1,3 +1,5 @@ +#include "Common/GBuffer.hlsli" + struct VS_INPUT { float4 Position : POSITION0; @@ -108,12 +110,12 @@ VS_OUTPUT main(VS_INPUT input) float3 instanceNormal = float3(input.InstanceData2.z, input.InstanceData3.zw); float dirLightAngle = dot(DirLightDirection.xyz, instanceNormal); - float3 diffuseMultiplier = input.InstanceData1.www * input.Color.xyz * saturate(dirLightAngle.xxx); + float3 diffuseMultiplier = input.InstanceData1.www * input.Color.xyz; float perInstanceFade = dot(cb8[(asuint(cb7[0].x) >> 2)].xyzw, M_IdentityMatrix[(asint(cb7[0].x) & 3)].xyzw); float distanceFade = 1 - saturate((length(projSpacePosition.xyz) - AlphaParam1) / AlphaParam2); - vsout.DiffuseColor.xyz = DirLightColor.xyz * diffuseMultiplier; + vsout.DiffuseColor.xyz = diffuseMultiplier; vsout.DiffuseColor.w = distanceFade * perInstanceFade; vsout.TexCoord.xy = input.TexCoord.xy; @@ -140,13 +142,22 @@ struct PS_OUTPUT #if defined(RENDER_DEPTH) float4 PS : SV_Target0; #else - float4 Albedo : SV_Target0; + float4 Diffuse : SV_Target0; float2 MotionVectors : SV_Target1; float4 Normal : SV_Target2; + float4 Albedo : SV_Target3; + float4 Masks : SV_Target6; #endif }; #ifdef PSHADER + +# include "Common/Color.hlsl" +# include "Common/FrameBuffer.hlsl" +# include "Common/LightingData.hlsl" +# include "Common/MotionBlur.hlsl" +# include "Common/Permutation.hlsl" + SamplerState SampBaseSampler : register(s0); SamplerState SampShadowMaskSampler : register(s1); @@ -158,13 +169,6 @@ cbuffer AlphaTestRefCB : register(b11) float AlphaTestRefRS : packoffset(c0); } -cbuffer PerFrame : register(b12) -{ - float4 UnknownPerFrame1[12] : packoffset(c0); - row_major float4x4 ScreenProj : packoffset(c12); - row_major float4x4 PreviousScreenProj : packoffset(c16); -}; - PS_OUTPUT main(PS_INPUT input) { PS_OUTPUT psout; @@ -186,27 +190,20 @@ PS_OUTPUT main(PS_INPUT input) # else float sunShadowMask = TexShadowMaskSampler.Load(int3(input.HPosition.xy, 0)).x; - // Albedo - float diffuseFraction = lerp(sunShadowMask, 1, input.AmbientColor.w); - float3 diffuseColor = input.DiffuseColor.xyz * baseColor.xyz; - float3 ambientColor = input.AmbientColor.xyz * baseColor.xyz; - psout.Albedo.xyz = input.TexCoord.zzz * (diffuseColor * diffuseFraction + ambientColor); - psout.Albedo.w = 1; - - float4 screenPosition = mul(ScreenProj, input.WorldPosition); - screenPosition.xy = screenPosition.xy / screenPosition.ww; - float4 previousScreenPosition = mul(PreviousScreenProj, input.PreviousWorldPosition); - previousScreenPosition.xy = previousScreenPosition.xy / previousScreenPosition.ww; - float2 screenMotionVector = float2(-0.5, 0.5) * (screenPosition.xy - previousScreenPosition.xy); + psout.Diffuse.xyz = 0; + psout.Diffuse.w = 1; - psout.MotionVectors = screenMotionVector; + psout.MotionVectors = GetSSMotionVector(input.WorldPosition, input.PreviousWorldPosition, 0); float3 ddx = ddx_coarse(input.ViewSpacePosition); float3 ddy = ddy_coarse(input.ViewSpacePosition); float3 normal = normalize(cross(ddx, ddy)); - float normalScale = max(1.0 / 1000.0, sqrt(normal.z * -8 + 8)); - psout.Normal.xy = float2(0.5, 0.5) + normal.xy / normalScale; + + psout.Normal.xy = EncodeNormal(normal); psout.Normal.zw = float2(0, 0); + + psout.Albedo = float4(baseColor.xyz * input.DiffuseColor.xyz * 0.5, 1); + psout.Masks = float4(0, 0, 1, 0); # endif return psout; diff --git a/package/Shaders/VariableRateShading/ComputeNASData.hlsl b/package/Shaders/VariableRateShading/ComputeNASData.hlsl new file mode 100644 index 000000000..f6e75d4f4 --- /dev/null +++ b/package/Shaders/VariableRateShading/ComputeNASData.hlsl @@ -0,0 +1,102 @@ +#define GROUP_SIZE (8 * 8) + +Texture2D InputTexture : register(t0); +Texture2D InputTextureMotionVectors : register(t1); +RWTexture2D OutputTexture : register(u0); + +groupshared float4 sampleCache[GROUP_SIZE]; +groupshared float2 sampleCacheMotionVectors[GROUP_SIZE]; +groupshared float errXCache[GROUP_SIZE]; +groupshared float errYCache[GROUP_SIZE]; + +float RgbToLuminance(float3 color) +{ + return dot(color, float3(0.299, 0.587, 0.114)); +} + +[numthreads(8, 8, 1)] void main(uint3 GroupID + : SV_GroupID, uint3 GroupThreadID + : SV_GroupThreadID) { + const uint threadIndex = GroupThreadID.y * 8 + GroupThreadID.x; + const uint2 sampleIndex = (GroupID.xy * 8 + GroupThreadID.xy) * 2.0; + + // Fetch color (final post-AA) data + // l0.x l0.y + // l0.z l0.w l2.x + // l1.x l1.y + // l1.z l1.w l2.y + // l2.z + float4 l0; + l0.x = RgbToLuminance(InputTexture[sampleIndex + uint2(0, 0)]); + l0.y = RgbToLuminance(InputTexture[sampleIndex + uint2(1, 0)]); + l0.z = RgbToLuminance(InputTexture[sampleIndex + uint2(0, 1)]); + l0.w = RgbToLuminance(InputTexture[sampleIndex + uint2(1, 1)]); + + float4 l1; + l1.x = RgbToLuminance(InputTexture[sampleIndex + uint2(0, 2)]); + l1.y = RgbToLuminance(InputTexture[sampleIndex + uint2(1, 2)]); + l1.z = RgbToLuminance(InputTexture[sampleIndex + uint2(0, 3)]); + l1.w = RgbToLuminance(InputTexture[sampleIndex + uint2(1, 3)]); + + float3 l2; + l2.x = RgbToLuminance(InputTexture[sampleIndex + uint2(2, 1)]); + l2.y = RgbToLuminance(InputTexture[sampleIndex + uint2(2, 3)]); + l2.z = RgbToLuminance(InputTexture[sampleIndex + uint2(1, 4)]); + + sampleCache[threadIndex] = l0 + l1; + + float2 m = 0.0f; + m += InputTextureMotionVectors[sampleIndex + uint2(0, 0)]; + m += InputTextureMotionVectors[sampleIndex + uint2(1, 0)]; + m += InputTextureMotionVectors[sampleIndex + uint2(0, 1)]; + m += InputTextureMotionVectors[sampleIndex + uint2(1, 1)]; + + m += InputTextureMotionVectors[sampleIndex + uint2(0, 2)]; + m += InputTextureMotionVectors[sampleIndex + uint2(1, 2)]; + m += InputTextureMotionVectors[sampleIndex + uint2(0, 3)]; + m += InputTextureMotionVectors[sampleIndex + uint2(1, 3)]; + + sampleCacheMotionVectors[threadIndex] = m; + + // Derivatives X + float4 a = float4(l0.y, l2.x, l1.y, l2.y); + float4 b = float4(l0.x, l0.w, l1.x, l1.w); + float4 dx = abs(a - b); + + // Derivatives Y + a = float4(l0.z, l1.y, l1.z, l2.z); + b = float4(l0.x, l0.w, l1.x, l1.w); + float4 dy = abs(a - b); + + // Compute maximum partial derivative of all 16x16 pixels (256 total) + // this approach is more "sensitive" to individual outliers in a tile, since it takes the max instead of the average + float maxDx = max(max(dx.x, dx.y), max(dx.z, dx.w)); + float maxDy = max(max(dy.x, dy.y), max(dy.z, dy.w)); + + errXCache[threadIndex] = maxDx; + errYCache[threadIndex] = maxDy; + + GroupMemoryBarrierWithGroupSync(); + + // Parallel reduction + [unroll] for (uint s = (64 >> 1); s > 0; s >>= 1) + { + if (threadIndex < s) { + sampleCache[threadIndex] += sampleCache[threadIndex + s]; + sampleCacheMotionVectors[threadIndex] += sampleCacheMotionVectors[threadIndex + s]; + errXCache[threadIndex] = max(errXCache[threadIndex], errXCache[threadIndex + s]); + errYCache[threadIndex] = max(errYCache[threadIndex], errYCache[threadIndex + s]); + } + + GroupMemoryBarrierWithGroupSync(); + } + + // Average + if (threadIndex == 0) { + float avgLuma = dot(sampleCache[0], 1.0 / 8.0) / GROUP_SIZE + 0.1; + float2 avgMotionVectors = dot(sampleCacheMotionVectors[0], 1.0 / 8.0) / GROUP_SIZE; + float errX = errXCache[0]; + float errY = errYCache[0]; + OutputTexture[GroupID.xy] = float4(float2(errX, errY) / abs(avgLuma), avgMotionVectors); + } +} \ No newline at end of file diff --git a/package/Shaders/VariableRateShading/ComputeShadingRate.hlsl b/package/Shaders/VariableRateShading/ComputeShadingRate.hlsl new file mode 100644 index 000000000..7a3a24e3f --- /dev/null +++ b/package/Shaders/VariableRateShading/ComputeShadingRate.hlsl @@ -0,0 +1,77 @@ +RWTexture2D vrsSurface : register(u0); +Texture2D nasDataSurface : register(t0); + +[numthreads(32, 32, 1)] void main(uint3 DispatchThreadID + : SV_DispatchThreadID, uint3 GroupThreadID + : SV_GroupThreadID, uint3 GroupID + : SV_GroupID) { + float4 nasData = nasDataSurface[DispatchThreadID.xy]; + + float2 mVec = abs(nasData.zw); + + // Error scalers (equations from the I3D 2019 paper) + // bhv for half rate, bqv for quarter rate + float2 bhv = pow(1.0 / (1 + pow(1.05 * mVec, 3.1)), 0.35); + float2 bqv = 2.13 * pow(1.0 / (1 + pow(0.55 * mVec, 2.41)), 0.49); + + // Sample block error data from NAS data pass and apply the error scalars + float2 diff = nasData.xy; + float2 diff2 = diff * bhv; + float2 diff4 = diff * bqv; + + uint screenWidth, screenHeight; + nasDataSurface.GetDimensions(screenWidth, screenHeight); + + float2 uv = DispatchThreadID.xy * rcp(float2(screenWidth, screenHeight)); + float threshold = lerp(0.07, 0.14, distance(float2(0.5, 0.5), uv)); + + /*` + D3D12_SHADING_RATE_1X1 = 0, // 0b0000 + D3D12_SHADING_RATE_1X2 = 0x1, // 0b0001 + D3D12_SHADING_RATE_2X1 = 0x4, // 0b0100 + D3D12_SHADING_RATE_2X2 = 0x5, // 0b0101 + D3D12_SHADING_RATE_2X4 = 0x6, // 0b0110 + D3D12_SHADING_RATE_4X2 = 0x9, // 0b1001 + D3D12_SHADING_RATE_4X4 = 0xa // 0b1010 + */ + + // Compute block shading rate based on if the error computation goes over the threshold + // shading rates in D3D are purposely designed to be able to combined, e.g. 2x1 | 1x2 = 2x2 + uint ShadingRate = 0; + ShadingRate |= ((diff2.x >= threshold) ? 0 : ((diff4.x > threshold) ? 0x4 : 0x8)); + ShadingRate |= ((diff2.y >= threshold) ? 0 : ((diff4.y > threshold) ? 0x1 : 0x2)); + + // Disable 4x4 shading rate (low quality, limited perf gain) + if (ShadingRate == 0xa) { + ShadingRate = (diff2.x > diff2.y) ? 0x6 : 0x9; // use 2x4 or 4x2 based on directional gradient + } + // Disable 4x1 or 1x4 shading rate (unsupported) + else if (ShadingRate == 0x8) { + ShadingRate = 0x4; + } else if (ShadingRate == 0x2) { + ShadingRate = 0x1; + } + + // vsrd[i].shadingRateTable[0] = NV_PIXEL_X1_PER_RASTER_PIXEL; + // vsrd[i].shadingRateTable[1] = NV_PIXEL_X1_PER_2X1_RASTER_PIXELS; + // vsrd[i].shadingRateTable[2] = NV_PIXEL_X1_PER_1X2_RASTER_PIXELS; + // vsrd[i].shadingRateTable[3] = NV_PIXEL_X1_PER_2X2_RASTER_PIXELS; + // vsrd[i].shadingRateTable[4] = NV_PIXEL_X1_PER_4X2_RASTER_PIXELS; + // vsrd[i].shadingRateTable[5] = NV_PIXEL_X1_PER_2X4_RASTER_PIXELS; + // vsrd[i].shadingRateTable[6] = NV_PIXEL_X1_PER_4X4_RASTER_PIXELS; + + if (ShadingRate == 0x1) + ShadingRate = 2; + else if (ShadingRate == 0x4) + ShadingRate = 1; + else if (ShadingRate == 0x5) + ShadingRate = 3; + else if (ShadingRate == 0x6) + ShadingRate = 5; + else if (ShadingRate == 0x9) + ShadingRate = 4; + else if (ShadingRate == 0xa) + ShadingRate = 6; + + vrsSurface[DispatchThreadID.xy] = ShadingRate; +} \ No newline at end of file diff --git a/package/Shaders/VariableRateShading/LICENSE.txt b/package/Shaders/VariableRateShading/LICENSE.txt new file mode 100644 index 000000000..2014a4832 --- /dev/null +++ b/package/Shaders/VariableRateShading/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/src/Bindings.cpp b/src/Bindings.cpp deleted file mode 100644 index e0d2b30c4..000000000 --- a/src/Bindings.cpp +++ /dev/null @@ -1,268 +0,0 @@ -#include "Bindings.h" -#include "State.h" -#include "Util.h" - -void Bindings::DepthStencilStateSetDepthMode(RE::BSGraphics::DepthStencilDepthMode a_mode) -{ - auto& state = State::GetSingleton()->shadowState; - GET_INSTANCE_MEMBER(depthStencilDepthMode, state) - GET_INSTANCE_MEMBER(depthStencilDepthModePrevious, state) - GET_INSTANCE_MEMBER(stateUpdateFlags, state) - - if (depthStencilDepthMode != a_mode) { - depthStencilDepthMode = a_mode; - if (depthStencilDepthModePrevious != a_mode) - stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_DEPTH_MODE); - else - stateUpdateFlags.reset(RE::BSGraphics::ShaderFlags::DIRTY_DEPTH_MODE); - } -} - -void Bindings::AlphaBlendStateSetMode(uint32_t a_mode) -{ - auto& state = State::GetSingleton()->shadowState; - GET_INSTANCE_MEMBER(alphaBlendMode, state) - GET_INSTANCE_MEMBER(stateUpdateFlags, state) - - if (alphaBlendMode != a_mode) { - alphaBlendMode = a_mode; - stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND); - } -} - -void Bindings::AlphaBlendStateSetAlphaToCoverage(uint32_t a_value) -{ - auto& state = State::GetSingleton()->shadowState; - GET_INSTANCE_MEMBER(alphaBlendAlphaToCoverage, state) - GET_INSTANCE_MEMBER(stateUpdateFlags, state) - - if (alphaBlendAlphaToCoverage != a_value) { - alphaBlendAlphaToCoverage = a_value; - stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND); - } -} - -void Bindings::AlphaBlendStateSetWriteMode(uint32_t a_value) -{ - auto& state = State::GetSingleton()->shadowState; - GET_INSTANCE_MEMBER(alphaBlendWriteMode, state) - GET_INSTANCE_MEMBER(stateUpdateFlags, state) - - if (alphaBlendWriteMode != a_value) { - alphaBlendWriteMode = a_value; - stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND); - } -} - -void Bindings::SetOverwriteTerrainMode(bool a_enable) -{ - if (overrideTerrain != a_enable) { - overrideTerrain = a_enable; - auto& state = State::GetSingleton()->shadowState; - GET_INSTANCE_MEMBER(stateUpdateFlags, state) - stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_DEPTH_MODE); - stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND); - } -} - -void Bindings::SetOverwriteTerrainMaskingMode(TerrainMaskMode a_mode) -{ - if (terrainMask != a_mode) { - terrainMask = a_mode; - auto& state = State::GetSingleton()->shadowState; - GET_INSTANCE_MEMBER(stateUpdateFlags, state) - stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_RENDERTARGET); - } -} - -struct DepthStates -{ - ID3D11DepthStencilState* a[6][40]; -}; - -struct BlendStates -{ - ID3D11BlendState* a[7][2][13][2]; -}; - -// Reimplementation of elements of the renderer's bindings system to support additional features - -void Bindings::SetDirtyStates(bool) -{ - auto& state = State::GetSingleton()->shadowState; - auto& context = State::GetSingleton()->context; - GET_INSTANCE_MEMBER(depthStencilStencilMode, state) - GET_INSTANCE_MEMBER(depthStencilDepthMode, state) - GET_INSTANCE_MEMBER(alphaBlendAlphaToCoverage, state) - GET_INSTANCE_MEMBER(alphaBlendMode, state) - GET_INSTANCE_MEMBER(alphaBlendModeExtra, state) - GET_INSTANCE_MEMBER(alphaBlendWriteMode, state) - GET_INSTANCE_MEMBER(cubeMapRenderTarget, state) - GET_INSTANCE_MEMBER(cubeMapRenderTargetView, state) - GET_INSTANCE_MEMBER(depthStencil, state) - GET_INSTANCE_MEMBER(depthStencilSlice, state) - GET_INSTANCE_MEMBER(renderTargets, state) - GET_INSTANCE_MEMBER(setCubeMapRenderTargetMode, state) - GET_INSTANCE_MEMBER(setDepthStencilMode, state) - GET_INSTANCE_MEMBER(setRenderTargetMode, state) - GET_INSTANCE_MEMBER(stateUpdateFlags, state) - GET_INSTANCE_MEMBER(stencilRef, state) - GET_INSTANCE_MEMBER(PSTexture, state) - - auto rendererData = RE::BSGraphics::Renderer::GetSingleton(); - - static DepthStates* depthStates = (DepthStates*)REL::RelocationID(524747, 411362).address(); - static BlendStates* blendStates = (BlendStates*)REL::RelocationID(524749, 411364).address(); - - if (stateUpdateFlags.any(RE::BSGraphics::ShaderFlags::DIRTY_RENDERTARGET, RE::BSGraphics::ShaderFlags::DIRTY_VRPREVIEW)) { - // Build active render target view array - ID3D11RenderTargetView* renderTargetViews[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT]; - uint32_t viewCount = 0; - - if (cubeMapRenderTarget == RE::RENDER_TARGETS_CUBEMAP::kNONE) { - // This loops through all 8 RTs or until a RENDER_TARGET_NONE entry is hit - for (int i = 0; i < D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT; i++) { - if (renderTargets[i] == RE::RENDER_TARGETS::kNONE) { - if (terrainMask == TerrainMaskMode::kWrite) { - if (i == 3 || i == 4) { - renderTargetViews[i] = terrainBlendingMask->rtv.get(); - viewCount++; - } - } - break; - } else { - renderTargetViews[i] = rendererData->GetRuntimeData().renderTargets[renderTargets[i]].RTV; - } - - viewCount++; - - if (setRenderTargetMode[i] == RE::BSGraphics::SetRenderTargetMode::SRTM_CLEAR) { - context->ClearRenderTargetView(renderTargetViews[i], rendererData->GetRendererData().clearColor); - setRenderTargetMode[i] = RE::BSGraphics::SetRenderTargetMode::SRTM_NO_CLEAR; - } - } - } else { - // Use a single RT for the cubemap - renderTargetViews[0] = rendererData->GetRendererData().cubemapRenderTargets[cubeMapRenderTarget].cubeSideRTV[cubeMapRenderTargetView]; - viewCount = 1; - - if (setCubeMapRenderTargetMode == RE::BSGraphics::SetRenderTargetMode::SRTM_CLEAR) { - context->ClearRenderTargetView(renderTargetViews[0], rendererData->GetRendererData().clearColor); - setCubeMapRenderTargetMode = RE::BSGraphics::SetRenderTargetMode::SRTM_NO_CLEAR; - } - } - - switch (setDepthStencilMode) { - case RE::BSGraphics::SetRenderTargetMode::SRTM_CLEAR: - case RE::BSGraphics::SetRenderTargetMode::SRTM_CLEAR_DEPTH: - case RE::BSGraphics::SetRenderTargetMode::SRTM_CLEAR_STENCIL: - case RE::BSGraphics::SetRenderTargetMode::SRTM_INIT: - rendererData->GetRuntimeData().readOnlyDepth = false; - break; - } - - // VR Only - if (REL::Module::IsVR() && rendererData->GetRuntimeData().readOnlyDepth && depthStencil != -1) { - rendererData->GetRuntimeData().readOnlyDepth = false; - for (int i = 0; i < 16; i++) { // not sure what 16 is from - if (PSTexture[i] == rendererData->GetDepthStencilData().depthStencils[depthStencil].depthSRV || - PSTexture[i] == rendererData->GetDepthStencilData().depthStencils[depthStencil].stencilSRV) - rendererData->GetRuntimeData().readOnlyDepth = true; - } - } - - // - // Determine which depth stencil to render to. When there's no active depth stencil, - // simply send a nullptr to dx11. - // - ID3D11DepthStencilView* newDepthStencil = nullptr; - - if (depthStencil != -1) { - if (rendererData->GetRuntimeData().readOnlyDepth) - newDepthStencil = rendererData->GetDepthStencilData().depthStencils[depthStencil].readOnlyViews[depthStencilSlice]; - else - newDepthStencil = rendererData->GetDepthStencilData().depthStencils[depthStencil].views[depthStencilSlice]; - - // Only clear the stencil if specific flags are set - if (newDepthStencil) { - uint32_t clearFlags = 0; - - switch (setDepthStencilMode) { - case RE::BSGraphics::SetRenderTargetMode::SRTM_CLEAR: - case RE::BSGraphics::SetRenderTargetMode::SRTM_INIT: - clearFlags = D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL; - break; - - case RE::BSGraphics::SetRenderTargetMode::SRTM_CLEAR_DEPTH: - clearFlags = D3D11_CLEAR_DEPTH; - break; - - case RE::BSGraphics::SetRenderTargetMode::SRTM_CLEAR_STENCIL: - clearFlags = D3D11_CLEAR_STENCIL; - break; - } - - if (clearFlags) { - context->ClearDepthStencilView(newDepthStencil, clearFlags, 1.0f, 0); - setDepthStencilMode = RE::BSGraphics::SetRenderTargetMode::SRTM_NO_CLEAR; - } - } - } - - if (!REL::Module::IsVR()) - context->OMSetRenderTargets(viewCount, renderTargetViews, newDepthStencil); - else { - // VR calls a function instead of using OMSetRenderTargets - typedef void (*_VR_OMSetRenderTargets)(ID3D11RenderTargetView* a_renderTargetView[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT], ID3D11DepthStencilView* a_depthStencilView, uint32_t a_numRTV); - REL::Relocation<_VR_OMSetRenderTargets> VR_OMSetRenderTargets{ REL::Offset(0x0dc4240) }; - VR_OMSetRenderTargets(renderTargetViews, newDepthStencil, viewCount); - } - stateUpdateFlags.reset(RE::BSGraphics::ShaderFlags::DIRTY_RENDERTARGET, RE::BSGraphics::ShaderFlags::DIRTY_VRPREVIEW); - } - - if (stateUpdateFlags.any(RE::BSGraphics::ShaderFlags::DIRTY_DEPTH_STENCILREF_MODE, RE::BSGraphics::ShaderFlags::DIRTY_DEPTH_MODE)) { - stateUpdateFlags.reset(RE::BSGraphics::ShaderFlags::DIRTY_DEPTH_STENCILREF_MODE, RE::BSGraphics::ShaderFlags::DIRTY_DEPTH_MODE); - if (overrideTerrain) - context->OMSetDepthStencilState(depthStates->a[std::to_underlying(RE::BSGraphics::DepthStencilDepthMode::kTestGreaterEqual)][depthStencilStencilMode], stencilRef); - else - context->OMSetDepthStencilState(depthStates->a[std::to_underlying(depthStencilDepthMode)][depthStencilStencilMode], stencilRef); - } - - if (stateUpdateFlags.any(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND)) { - stateUpdateFlags.reset(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND); - if (overrideTerrain) - context->OMSetBlendState(blendStates->a[1][alphaBlendAlphaToCoverage][alphaBlendWriteMode][alphaBlendModeExtra], nullptr, 0xFFFFFFFF); - else - context->OMSetBlendState(blendStates->a[alphaBlendMode][alphaBlendAlphaToCoverage][alphaBlendWriteMode][alphaBlendModeExtra], nullptr, 0xFFFFFFFF); - } -} - -void Bindings::SetupResources() -{ - //auto renderer = RE::BSGraphics::Renderer::GetSingleton(); - //auto mainTexture = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; - - //{ - // D3D11_TEXTURE2D_DESC texDesc{}; - // mainTexture.texture->GetDesc(&texDesc); - // terrainBlendingMask = new Texture2D(texDesc); - - // D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - // mainTexture.SRV->GetDesc(&srvDesc); - // terrainBlendingMask->CreateSRV(srvDesc); - - // D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = {}; - // mainTexture.RTV->GetDesc(&rtvDesc); - // terrainBlendingMask->CreateRTV(rtvDesc); - //} -} - -void Bindings::Reset() -{ - //SetOverwriteTerrainMode(false); - //SetOverwriteTerrainMaskingMode(TerrainMaskMode::kNone); - - //auto& context = State::GetSingleton()->context; - //FLOAT clear[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; - //context->ClearRenderTargetView(terrainBlendingMask->rtv.get(), clear); -} diff --git a/src/Bindings.h b/src/Bindings.h deleted file mode 100644 index 509a08370..000000000 --- a/src/Bindings.h +++ /dev/null @@ -1,40 +0,0 @@ -#pragma once - -#include "Buffer.h" - -class Bindings -{ -public: - static Bindings* GetSingleton() - { - static Bindings singleton; - return &singleton; - } - - bool overrideTerrain = false; - - enum class TerrainMaskMode : uint32_t - { - kNone, - kWrite, - kRead - }; - - TerrainMaskMode terrainMask = TerrainMaskMode::kNone; - - Texture2D* terrainBlendingMask; - - void DepthStencilStateSetDepthMode(RE::BSGraphics::DepthStencilDepthMode a_mode); - - void AlphaBlendStateSetMode(uint32_t a_mode); - void AlphaBlendStateSetAlphaToCoverage(uint32_t a_value); - void AlphaBlendStateSetWriteMode(uint32_t a_value); - - void SetOverwriteTerrainMode(bool a_enable); - - void SetOverwriteTerrainMaskingMode(TerrainMaskMode a_mode); - - void SetDirtyStates(bool a_isComputeShader); - void SetupResources(); - void Reset(); -}; diff --git a/src/Deferred.cpp b/src/Deferred.cpp new file mode 100644 index 000000000..d270ad3c3 --- /dev/null +++ b/src/Deferred.cpp @@ -0,0 +1,701 @@ +#include "Deferred.h" +#include "State.h" +#include "Util.h" +#include +#include +#include +#include +#include +#include +#include + +void Deferred::DepthStencilStateSetDepthMode(RE::BSGraphics::DepthStencilDepthMode a_mode) +{ + auto& state = State::GetSingleton()->shadowState; + GET_INSTANCE_MEMBER(depthStencilDepthMode, state) + GET_INSTANCE_MEMBER(depthStencilDepthModePrevious, state) + GET_INSTANCE_MEMBER(stateUpdateFlags, state) + + if (depthStencilDepthMode != a_mode) { + depthStencilDepthMode = a_mode; + if (depthStencilDepthModePrevious != a_mode) + stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_DEPTH_MODE); + else + stateUpdateFlags.reset(RE::BSGraphics::ShaderFlags::DIRTY_DEPTH_MODE); + } +} + +void Deferred::AlphaBlendStateSetMode(uint32_t a_mode) +{ + auto& state = State::GetSingleton()->shadowState; + GET_INSTANCE_MEMBER(alphaBlendMode, state) + GET_INSTANCE_MEMBER(stateUpdateFlags, state) + + if (alphaBlendMode != a_mode) { + alphaBlendMode = a_mode; + stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND); + } +} + +void Deferred::AlphaBlendStateSetAlphaToCoverage(uint32_t a_value) +{ + auto& state = State::GetSingleton()->shadowState; + GET_INSTANCE_MEMBER(alphaBlendAlphaToCoverage, state) + GET_INSTANCE_MEMBER(stateUpdateFlags, state) + + if (alphaBlendAlphaToCoverage != a_value) { + alphaBlendAlphaToCoverage = a_value; + stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND); + } +} + +void Deferred::AlphaBlendStateSetWriteMode(uint32_t a_value) +{ + auto& state = State::GetSingleton()->shadowState; + GET_INSTANCE_MEMBER(alphaBlendWriteMode, state) + GET_INSTANCE_MEMBER(stateUpdateFlags, state) + + if (alphaBlendWriteMode != a_value) { + alphaBlendWriteMode = a_value; + stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND); + } +} + +struct DepthStates +{ + ID3D11DepthStencilState* a[6][40]; +}; + +struct BlendStates +{ + ID3D11BlendState* a[7][2][13][2]; +}; + +void SetupRenderTarget(RE::RENDER_TARGET target, D3D11_TEXTURE2D_DESC texDesc, D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc, D3D11_RENDER_TARGET_VIEW_DESC rtvDesc, D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc, DXGI_FORMAT format) +{ + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto& device = State::GetSingleton()->device; + + texDesc.Format = format; + srvDesc.Format = format; + rtvDesc.Format = format; + uavDesc.Format = format; + + auto& data = renderer->GetRuntimeData().renderTargets[target]; + DX::ThrowIfFailed(device->CreateTexture2D(&texDesc, nullptr, &data.texture)); + DX::ThrowIfFailed(device->CreateShaderResourceView(data.texture, &srvDesc, &data.SRV)); + DX::ThrowIfFailed(device->CreateRenderTargetView(data.texture, &rtvDesc, &data.RTV)); + DX::ThrowIfFailed(device->CreateUnorderedAccessView(data.texture, &uavDesc, &data.UAV)); +} + +void Deferred::SetupResources() +{ + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + + { + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + + D3D11_TEXTURE2D_DESC texDesc{}; + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = {}; + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + + main.texture->GetDesc(&texDesc); + main.SRV->GetDesc(&srvDesc); + main.RTV->GetDesc(&rtvDesc); + main.UAV->GetDesc(&uavDesc); + + // Available targets: + // MAIN ONLY ALPHA + // WATER REFLECTIONS + // BLURFULL_BUFFER + // LENSFLAREVIS + // SAO DOWNSCALED + // SAO CAMERAZ+MIP_LEVEL_0_ESRAM + // SAO_RAWAO_DOWNSCALED + // SAO_RAWAO_PREVIOUS_DOWNSCALDE + // SAO_TEMP_BLUR_DOWNSCALED + // INDIRECT + // INDIRECT_DOWNSCALED + // RAWINDIRECT + // RAWINDIRECT_DOWNSCALED + // RAWINDIRECT_PREVIOUS + // RAWINDIRECT_PREVIOUS_DOWNSCALED + // RAWINDIRECT_SWAP + // VOLUMETRIC_LIGHTING_HALF_RES + // VOLUMETRIC_LIGHTING_BLUR_HALF_RES + // VOLUMETRIC_LIGHTING_QUARTER_RES + // VOLUMETRIC_LIGHTING_BLUR_QUARTER_RES + // TEMPORAL_AA_WATER_1 + // TEMPORAL_AA_WATER_2 + + // Albedo + SetupRenderTarget(ALBEDO, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R8G8B8A8_UNORM); + // Specular + SetupRenderTarget(SPECULAR, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R11G11B10_FLOAT); + // Reflectance + SetupRenderTarget(REFLECTANCE, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R8G8B8A8_UNORM); + // Normal + Roughness + SetupRenderTarget(NORMALROUGHNESS, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R8G8B8A8_UNORM); + // Masks + SetupRenderTarget(MASKS, texDesc, srvDesc, rtvDesc, uavDesc, DXGI_FORMAT_R8G8B8A8_UNORM); + } + + { + deferredCB = new ConstantBuffer(ConstantBufferDesc()); + } + + { + auto& device = State::GetSingleton()->device; + + D3D11_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + samplerDesc.MaxAnisotropy = 1; + samplerDesc.MinLOD = 0; + samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; + DX::ThrowIfFailed(device->CreateSamplerState(&samplerDesc, &linearSampler)); + } + + { + D3D11_TEXTURE2D_DESC texDesc; + auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + mainTex.texture->GetDesc(&texDesc); + + texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { + .MostDetailedMip = 0, + .MipLevels = 1 } + }; + D3D11_RENDER_TARGET_VIEW_DESC rtvDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } + }; + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } + }; + + { + giTexture = new Texture2D(texDesc); + giTexture->CreateSRV(srvDesc); + giTexture->CreateRTV(rtvDesc); + giTexture->CreateUAV(uavDesc); + } + } +} + +void Deferred::Reset() +{ + //for (auto& str : perms) + //{ + //// logger::info("{}", str); + //} +} + +void Deferred::UpdateConstantBuffer() +{ + auto state = State::GetSingleton(); + auto viewport = RE::BSGraphics::State::GetSingleton(); + + DeferredCB data{}; + + auto& shadowState = State::GetSingleton()->shadowState; + + if (REL::Module::IsVR()) { + auto posAdjust = shadowState->GetVRRuntimeData().posAdjust.getEye(0); + data.CamPosAdjust[0] = { posAdjust.x, posAdjust.y, posAdjust.z, 0 }; + posAdjust = shadowState->GetVRRuntimeData().posAdjust.getEye(1); + data.CamPosAdjust[1] = { posAdjust.x, posAdjust.y, posAdjust.z, 0 }; + + data.ViewMatrix[0] = shadowState->GetVRRuntimeData().cameraData.getEye(0).viewMat; + data.ViewMatrix[1] = shadowState->GetVRRuntimeData().cameraData.getEye(1).viewMat; + data.ProjMatrix[0] = shadowState->GetVRRuntimeData().cameraData.getEye(0).projMat; + data.ProjMatrix[1] = shadowState->GetVRRuntimeData().cameraData.getEye(1).projMat; + data.ViewProjMatrix[0] = shadowState->GetVRRuntimeData().cameraData.getEye(0).viewProjMat; + data.ViewProjMatrix[1] = shadowState->GetVRRuntimeData().cameraData.getEye(1).viewProjMat; + data.InvViewMatrix[0] = shadowState->GetVRRuntimeData().cameraData.getEye(0).viewMat.Invert(); + data.InvViewMatrix[1] = shadowState->GetVRRuntimeData().cameraData.getEye(1).viewMat.Invert(); + data.InvProjMatrix[0] = shadowState->GetVRRuntimeData().cameraData.getEye(0).projMat.Invert(); + data.InvProjMatrix[1] = shadowState->GetVRRuntimeData().cameraData.getEye(1).projMat.Invert(); + data.InvViewProjMatrix[0] = data.InvViewMatrix[0] * data.InvProjMatrix[0]; + data.InvViewProjMatrix[1] = data.InvViewMatrix[1] * data.InvProjMatrix[1]; + } else { + auto posAdjust = shadowState->GetRuntimeData().posAdjust.getEye(0); + data.CamPosAdjust[0] = { posAdjust.x, posAdjust.y, posAdjust.z, 0 }; + data.ViewMatrix[0] = shadowState->GetRuntimeData().cameraData.getEye(0).viewMat; + data.ProjMatrix[0] = shadowState->GetRuntimeData().cameraData.getEye(0).projMat; + data.ViewProjMatrix[0] = shadowState->GetRuntimeData().cameraData.getEye(0).viewProjMat; + data.InvViewMatrix[0] = shadowState->GetRuntimeData().cameraData.getEye(0).viewMat.Invert(); + data.InvProjMatrix[0] = shadowState->GetRuntimeData().cameraData.getEye(0).projMat.Invert(); + data.InvViewProjMatrix[0] = data.InvViewMatrix[0] * data.InvProjMatrix[0]; + } + + auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); + auto dirLight = skyrim_cast(accumulator->GetRuntimeData().activeShadowSceneNode->GetRuntimeData().sunLight->light.get()); + + data.DirLightColor = { dirLight->GetLightRuntimeData().diffuse.red, dirLight->GetLightRuntimeData().diffuse.green, dirLight->GetLightRuntimeData().diffuse.blue, 1.0f }; + + auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); + data.DirLightColor *= !REL::Module::IsVR() ? imageSpaceManager->GetRuntimeData().data.baseData.hdr.sunlightScale : imageSpaceManager->GetVRRuntimeData().data.baseData.hdr.sunlightScale; + + auto& direction = dirLight->GetWorldDirection(); + float4 position{ -direction.x, -direction.y, -direction.z, 0.0f }; + + data.DirLightDirectionVS[0] = float4::Transform(position, data.ViewMatrix[0]); + data.DirLightDirectionVS[0].Normalize(); + + data.DirLightDirectionVS[1] = float4::Transform(position, data.ViewMatrix[1]); + data.DirLightDirectionVS[1].Normalize(); + + auto& shaderManager = RE::BSShaderManager::State::GetSingleton(); + RE::NiTransform& dalcTransform = shaderManager.directionalAmbientTransform; + Util::StoreTransform3x4NoScale(data.DirectionalAmbient, dalcTransform); + + data.BufferDim.x = state->screenWidth; + data.BufferDim.y = state->screenHeight; + + data.RcpBufferDim.x = 1.0f / State::GetSingleton()->screenWidth; + data.RcpBufferDim.y = 1.0f / State::GetSingleton()->screenHeight; + + auto useTAA = !REL::Module::IsVR() ? imageSpaceManager->GetRuntimeData().BSImagespaceShaderISTemporalAA->taaEnabled : imageSpaceManager->GetVRRuntimeData().BSImagespaceShaderISTemporalAA->taaEnabled; + data.FrameCount = useTAA ? viewport->uiFrameCount : 0; + + data.CameraData = Util::GetCameraData(); + + deferredCB->Update(data); +} + +void Deferred::StartDeferred() +{ + if (!inWorld) + return; + + auto& shaderCache = SIE::ShaderCache::Instance(); + + if (!shaderCache.IsEnabled()) + return; + + static bool setup = false; + if (!setup) { + auto& device = State::GetSingleton()->device; + + static BlendStates* blendStates = (BlendStates*)REL::RelocationID(524749, 411364).address(); + + { + forwardBlendStates[0] = blendStates->a[0][0][1][0]; + + D3D11_BLEND_DESC blendDesc; + forwardBlendStates[0]->GetDesc(&blendDesc); + + blendDesc.IndependentBlendEnable = false; + + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[0])); + } + + { + forwardBlendStates[1] = blendStates->a[0][0][10][0]; + + D3D11_BLEND_DESC blendDesc; + forwardBlendStates[1]->GetDesc(&blendDesc); + + blendDesc.IndependentBlendEnable = false; + + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[1])); + } + + { + forwardBlendStates[2] = blendStates->a[1][0][1][0]; + + D3D11_BLEND_DESC blendDesc; + forwardBlendStates[2]->GetDesc(&blendDesc); + + blendDesc.IndependentBlendEnable = false; + + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[2])); + } + + { + forwardBlendStates[3] = blendStates->a[1][0][11][0]; + + D3D11_BLEND_DESC blendDesc; + forwardBlendStates[3]->GetDesc(&blendDesc); + + blendDesc.IndependentBlendEnable = false; + + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[3])); + } + + { + forwardBlendStates[4] = blendStates->a[2][0][1][0]; + + D3D11_BLEND_DESC blendDesc; + forwardBlendStates[4]->GetDesc(&blendDesc); + + blendDesc.IndependentBlendEnable = false; + + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[4])); + } + + { + forwardBlendStates[5] = blendStates->a[2][0][11][0]; + + D3D11_BLEND_DESC blendDesc; + forwardBlendStates[5]->GetDesc(&blendDesc); + + blendDesc.IndependentBlendEnable = false; + + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[5])); + } + + { + forwardBlendStates[6] = blendStates->a[3][0][11][0]; + + D3D11_BLEND_DESC blendDesc; + forwardBlendStates[6]->GetDesc(&blendDesc); + + blendDesc.IndependentBlendEnable = false; + + DX::ThrowIfFailed(device->CreateBlendState(&blendDesc, &deferredBlendStates[6])); + } + setup = true; + } + + auto& state = State::GetSingleton()->shadowState; + GET_INSTANCE_MEMBER(renderTargets, state) + GET_INSTANCE_MEMBER(setRenderTargetMode, state) + GET_INSTANCE_MEMBER(stateUpdateFlags, state) + + // Backup original render targets + for (uint i = 0; i < 4; i++) { + forwardRenderTargets[i] = renderTargets[i]; + } + + RE::RENDER_TARGET targets[8]{ + RE::RENDER_TARGET::kMAIN, + RE::RENDER_TARGET::kMOTION_VECTOR, + NORMALROUGHNESS, + ALBEDO, + SPECULAR, + REFLECTANCE, + MASKS, + forwardRenderTargets[3] // Improved snow shader + }; + + for (uint i = 2; i < 8; i++) { + renderTargets[i] = targets[i]; // We must use unused targets to be indexable + setRenderTargetMode[i] = RE::BSGraphics::SetRenderTargetMode::SRTM_CLEAR; // Dirty from last frame, this calls ClearRenderTargetView once + } + + stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_RENDERTARGET); // Run OMSetRenderTargets again + + static BlendStates* blendStates = (BlendStates*)REL::RelocationID(524749, 411364).address(); + + // Set modified blend states + blendStates->a[0][0][1][0] = deferredBlendStates[0]; + blendStates->a[0][0][10][0] = deferredBlendStates[1]; + blendStates->a[1][0][1][0] = deferredBlendStates[2]; + blendStates->a[1][0][11][0] = deferredBlendStates[3]; + blendStates->a[2][0][1][0] = deferredBlendStates[4]; + blendStates->a[2][0][11][0] = deferredBlendStates[5]; + blendStates->a[3][0][11][0] = deferredBlendStates[6]; + + stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND); + + deferredPass = true; +} + +void Deferred::DeferredPasses() +{ + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto& context = State::GetSingleton()->context; + auto state = State::GetSingleton(); + auto viewport = RE::BSGraphics::State::GetSingleton(); + + UpdateConstantBuffer(); + + { + auto buffer = deferredCB->CB(); + context->CSSetConstantBuffers(0, 1, &buffer); + } + + { + FLOAT clr[4] = { 0., 0., 0., 1. }; + context->ClearUnorderedAccessViewFloat(giTexture->uav.get(), clr); + } + + auto specular = renderer->GetRuntimeData().renderTargets[SPECULAR]; + auto albedo = renderer->GetRuntimeData().renderTargets[ALBEDO]; + auto reflectance = renderer->GetRuntimeData().renderTargets[REFLECTANCE]; + auto normalRoughness = renderer->GetRuntimeData().renderTargets[NORMALROUGHNESS]; + auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY]; + auto shadowMask = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kSHADOW_MASK]; + auto masks = renderer->GetRuntimeData().renderTargets[MASKS]; + + auto main = renderer->GetRuntimeData().renderTargets[forwardRenderTargets[0]]; + auto normals = renderer->GetRuntimeData().renderTargets[forwardRenderTargets[2]]; + + // Only render directional shadows if the game has a directional shadow caster + auto shadowSceneNode = RE::BSShaderManager::State::GetSingleton().shadowSceneNode[0]; + auto shadowDirLight = (RE::BSShadowLight*)shadowSceneNode->GetRuntimeData().shadowDirLight; + bool dirShadow = shadowDirLight && shadowDirLight->shadowLightIndex == 0; + + if (dirShadow) { + if (ScreenSpaceShadows::GetSingleton()->loaded) { + ScreenSpaceShadows::GetSingleton()->DrawShadows(); + } + + if (TerrainOcclusion::GetSingleton()->loaded) { + TerrainOcclusion::GetSingleton()->DrawTerrainOcclusion(); + } + + if (CloudShadows::GetSingleton()->loaded) { + CloudShadows::GetSingleton()->DrawShadows(); + } + } + + { + ID3D11ShaderResourceView* srvs[7]{ + specular.SRV, + albedo.SRV, + reflectance.SRV, + normalRoughness.SRV, + shadowMask.SRV, + depth.depthSRV, + masks.SRV + }; + + context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); + + ID3D11UnorderedAccessView* uavs[2]{ main.UAV, normals.UAV }; + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); + + context->CSSetSamplers(0, 1, &linearSampler); + + auto shader = dirShadow ? GetComputeDirectionalShadow() : GetComputeDirectional(); + context->CSSetShader(shader, nullptr, 0); + + float resolutionX = state->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale; + float resolutionY = state->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentHeightScale; + + uint32_t dispatchX = (uint32_t)std::ceil(resolutionX / 32.0f); + uint32_t dispatchY = (uint32_t)std::ceil(resolutionY / 32.0f); + + context->Dispatch(dispatchX, dispatchY, 1); + } + + // Features that require full diffuse lighting should be put here + + if (ScreenSpaceGI::GetSingleton()->loaded) { + ScreenSpaceGI::GetSingleton()->DrawSSGI(giTexture); + } + + { + { + ID3D11ShaderResourceView* srvs[8]{ + specular.SRV, + albedo.SRV, + reflectance.SRV, + normalRoughness.SRV, + shadowMask.SRV, + depth.depthSRV, + masks.SRV, + giTexture->srv.get(), + }; + + context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); + + ID3D11UnorderedAccessView* uavs[2]{ main.UAV, normals.UAV }; + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); + + context->CSSetSamplers(0, 1, &linearSampler); + + auto shader = GetComputeAmbientComposite(); + context->CSSetShader(shader, nullptr, 0); + + float resolutionX = state->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale; + float resolutionY = state->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentHeightScale; + + uint32_t dispatchX = (uint32_t)std::ceil(resolutionX / 32.0f); + uint32_t dispatchY = (uint32_t)std::ceil(resolutionY / 32.0f); + + context->Dispatch(dispatchX, dispatchY, 1); + } + } + + if (SubsurfaceScattering::GetSingleton()->loaded) { + SubsurfaceScattering::GetSingleton()->DrawSSSWrapper(false); + } + + { + { + ID3D11ShaderResourceView* srvs[8]{ + specular.SRV, + albedo.SRV, + reflectance.SRV, + normalRoughness.SRV, + shadowMask.SRV, + depth.depthSRV, + masks.SRV, + giTexture->srv.get(), + }; + + context->CSSetShaderResources(0, ARRAYSIZE(srvs), srvs); + + ID3D11UnorderedAccessView* uavs[2]{ main.UAV, normals.UAV }; + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); + + context->CSSetSamplers(0, 1, &linearSampler); + + auto shader = GetComputeMainComposite(); + context->CSSetShader(shader, nullptr, 0); + + float resolutionX = state->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale; + float resolutionY = state->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentHeightScale; + + uint32_t dispatchX = (uint32_t)std::ceil(resolutionX / 32.0f); + uint32_t dispatchY = (uint32_t)std::ceil(resolutionY / 32.0f); + + context->Dispatch(dispatchX, dispatchY, 1); + } + } + + ID3D11ShaderResourceView* views[8]{ nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }; + context->CSSetShaderResources(0, ARRAYSIZE(views), views); + + ID3D11UnorderedAccessView* uavs[2]{ nullptr, nullptr }; + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(uavs), uavs, nullptr); + + ID3D11Buffer* buffer = nullptr; + context->CSSetConstantBuffers(0, 1, &buffer); + + context->CSSetShader(nullptr, nullptr, 0); +} + +void Deferred::EndDeferred() +{ + if (!inWorld) + return; + + inWorld = false; + + auto& shaderCache = SIE::ShaderCache::Instance(); + + if (!shaderCache.IsEnabled()) + return; + + auto& state = State::GetSingleton()->shadowState; + GET_INSTANCE_MEMBER(renderTargets, state) + GET_INSTANCE_MEMBER(stateUpdateFlags, state) + + // Do not render to our targets past this point + for (uint i = 0; i < 4; i++) { + renderTargets[i] = forwardRenderTargets[i]; + } + + for (uint i = 4; i < 8; i++) { + state->GetRuntimeData().renderTargets[i] = RE::RENDER_TARGET::kNONE; + } + + auto context = RE::BSGraphics::Renderer::GetSingleton()->GetRuntimeData().context; + context->OMSetRenderTargets(0, nullptr, nullptr); // Unbind all bound render targets + + DeferredPasses(); // Perform deferred passes and composite forward buffers + + stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_RENDERTARGET); // Run OMSetRenderTargets again + + static BlendStates* blendStates = (BlendStates*)REL::RelocationID(524749, 411364).address(); + + // Restore modified blend states + blendStates->a[0][0][1][0] = forwardBlendStates[0]; + blendStates->a[0][0][10][0] = forwardBlendStates[1]; + blendStates->a[1][0][1][0] = forwardBlendStates[2]; + blendStates->a[1][0][11][0] = forwardBlendStates[3]; + blendStates->a[2][0][1][0] = forwardBlendStates[4]; + blendStates->a[2][0][11][0] = forwardBlendStates[5]; + blendStates->a[3][0][11][0] = forwardBlendStates[6]; + + stateUpdateFlags.set(RE::BSGraphics::ShaderFlags::DIRTY_ALPHA_BLEND); + + deferredPass = false; +} + +void Deferred::UpdatePerms() +{ + if (deferredPass) { + auto& state = State::GetSingleton()->shadowState; + GET_INSTANCE_MEMBER(alphaBlendMode, state) + GET_INSTANCE_MEMBER(alphaBlendAlphaToCoverage, state) + GET_INSTANCE_MEMBER(alphaBlendWriteMode, state) + GET_INSTANCE_MEMBER(alphaBlendModeExtra, state) + + std::string comboStr = std::format("{} {} {} {}", alphaBlendMode, alphaBlendAlphaToCoverage, alphaBlendWriteMode, alphaBlendModeExtra); + + perms.insert(comboStr); + } +} + +void Deferred::ClearShaderCache() +{ + if (directionalShadowCS) { + directionalShadowCS->Release(); + directionalShadowCS = nullptr; + } + if (directionalCS) { + directionalCS->Release(); + directionalCS = nullptr; + } + if (ambientCompositeCS) { + ambientCompositeCS->Release(); + ambientCompositeCS = nullptr; + } + if (mainCompositeCS) { + mainCompositeCS->Release(); + mainCompositeCS = nullptr; + } +} + +ID3D11ComputeShader* Deferred::GetComputeDirectionalShadow() +{ + if (!directionalShadowCS) { + logger::debug("Compiling DeferredCompositeCS DirectionalShadowPass"); + directionalShadowCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", {}, "cs_5_0", "DirectionalShadowPass"); + } + return directionalShadowCS; +} + +ID3D11ComputeShader* Deferred::GetComputeDirectional() +{ + if (!directionalCS) { + logger::debug("Compiling DeferredCompositeCS DirectionalPass"); + directionalCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", {}, "cs_5_0", "DirectionalPass"); + } + return directionalCS; +} + +ID3D11ComputeShader* Deferred::GetComputeAmbientComposite() +{ + if (!ambientCompositeCS) { + logger::debug("Compiling DeferredCompositeCS AmbientCompositePass"); + ambientCompositeCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", {}, "cs_5_0", "AmbientCompositePass"); + } + return ambientCompositeCS; +} + +ID3D11ComputeShader* Deferred::GetComputeMainComposite() +{ + if (!mainCompositeCS) { + logger::debug("Compiling DeferredCompositeCS MainCompositePass"); + mainCompositeCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\DeferredCompositeCS.hlsl", {}, "cs_5_0", "MainCompositePass"); + } + return mainCompositeCS; +} \ No newline at end of file diff --git a/src/Deferred.h b/src/Deferred.h new file mode 100644 index 000000000..6af20ad4c --- /dev/null +++ b/src/Deferred.h @@ -0,0 +1,127 @@ +#pragma once + +#include "Buffer.h" +#include "State.h" + +#define ALBEDO RE::RENDER_TARGETS::kINDIRECT +#define SPECULAR RE::RENDER_TARGETS::kINDIRECT_DOWNSCALED +#define REFLECTANCE RE::RENDER_TARGETS::kRAWINDIRECT +#define NORMALROUGHNESS RE::RENDER_TARGETS::kRAWINDIRECT_DOWNSCALED +#define MASKS RE::RENDER_TARGETS::kRAWINDIRECT_PREVIOUS + +class Deferred +{ +public: + static Deferred* GetSingleton() + { + static Deferred singleton; + return &singleton; + } + + void DepthStencilStateSetDepthMode(RE::BSGraphics::DepthStencilDepthMode a_mode); + + void AlphaBlendStateSetMode(uint32_t a_mode); + void AlphaBlendStateSetAlphaToCoverage(uint32_t a_value); + void AlphaBlendStateSetWriteMode(uint32_t a_value); + + void SetupResources(); + void Reset(); + + void StartDeferred(); + void DeferredPasses(); + void EndDeferred(); + + ID3D11BlendState* deferredBlendStates[7]; + ID3D11BlendState* forwardBlendStates[7]; + RE::RENDER_TARGET forwardRenderTargets[4]; + + ID3D11ComputeShader* directionalShadowCS = nullptr; + ID3D11ComputeShader* directionalCS = nullptr; + ID3D11ComputeShader* ambientCompositeCS = nullptr; + ID3D11ComputeShader* mainCompositeCS = nullptr; + + std::unordered_set perms; + void UpdatePerms(); + + void ClearShaderCache(); + ID3D11ComputeShader* GetComputeAmbientComposite(); + ID3D11ComputeShader* GetComputeMainComposite(); + ID3D11ComputeShader* GetComputeDirectionalShadow(); + ID3D11ComputeShader* GetComputeDirectional(); + + bool inWorld = false; + bool deferredPass = false; + + struct alignas(16) DeferredCB + { + float4 CamPosAdjust[2]; + float4 DirLightDirectionVS[2]; + float4 DirLightColor; + float4 CameraData; + float2 BufferDim; + float2 RcpBufferDim; + DirectX::XMFLOAT4X4 ViewMatrix[2]; + DirectX::XMFLOAT4X4 ProjMatrix[2]; + DirectX::XMFLOAT4X4 ViewProjMatrix[2]; + DirectX::XMFLOAT4X4 InvViewMatrix[2]; + DirectX::XMFLOAT4X4 InvProjMatrix[2]; + DirectX::XMFLOAT4X4 InvViewProjMatrix[2]; + DirectX::XMFLOAT3X4 DirectionalAmbient; + uint FrameCount; + uint pad0[3]; + }; + + ConstantBuffer* deferredCB = nullptr; + + ID3D11SamplerState* linearSampler = nullptr; + + Texture2D* giTexture = nullptr; // RGB - GI/IL, A - AO + + void UpdateConstantBuffer(); + + struct Hooks + { + struct Main_RenderWorld + { + static void thunk(bool a1) + { + GetSingleton()->inWorld = true; + func(a1); + } + + static inline REL::Relocation func; + }; + + struct Main_RenderWorld_Start + { + static void thunk(RE::BSBatchRenderer* This, uint32_t StartRange, uint32_t EndRanges, uint32_t RenderFlags, int GeometryGroup) + { + // Here is where the first opaque objects start rendering + GetSingleton()->StartDeferred(); + func(This, StartRange, EndRanges, RenderFlags, GeometryGroup); // RenderBatches + } + static inline REL::Relocation func; + }; + + struct Main_RenderWorld_End + { + static void thunk(RE::BSShaderAccumulator* This, uint32_t RenderFlags) + { + func(This, RenderFlags); + // After this point, water starts rendering + GetSingleton()->EndDeferred(); + } + static inline REL::Relocation func; + }; + + static void Install() + { + stl::write_thunk_call(REL::RelocationID(35560, 36559).address() + REL::Relocate(0x831, 0x841, 0x791)); + + stl::write_thunk_call(REL::RelocationID(99938, 106583).address() + REL::Relocate(0x8E, 0x84)); + stl::write_thunk_call(REL::RelocationID(99938, 106583).address() + REL::Relocate(0x319, 0x308, 0x321)); + + logger::info("[Deferred] Installed hooks"); + } + }; +}; \ No newline at end of file diff --git a/src/Feature.cpp b/src/Feature.cpp index 503152e52..8b1ac5f41 100644 --- a/src/Feature.cpp +++ b/src/Feature.cpp @@ -2,15 +2,16 @@ #include "FeatureVersions.h" #include "Features/CloudShadows.h" -#include "Features/DistantTreeLighting.h" #include "Features/DynamicCubemaps.h" #include "Features/ExtendedMaterials.h" #include "Features/GrassCollision.h" #include "Features/GrassLighting.h" #include "Features/LightLimitFix.h" +#include "Features/ScreenSpaceGI.h" #include "Features/ScreenSpaceShadows.h" #include "Features/SubsurfaceScattering.h" #include "Features/TerrainBlending.h" +#include "Features/TerrainOcclusion.h" #include "Features/WaterBlending.h" #include "Features/WaterCaustics.h" #include "Features/WaterParallax.h" @@ -104,7 +105,6 @@ const std::vector& Feature::GetFeatureList() // Cat: essentially load order i guess static std::vector features = { GrassLighting::GetSingleton(), - DistantTreeLighting::GetSingleton(), GrassCollision::GetSingleton(), ScreenSpaceShadows::GetSingleton(), ExtendedMaterials::GetSingleton(), @@ -116,7 +116,9 @@ const std::vector& Feature::GetFeatureList() TerrainBlending::GetSingleton(), WaterParallax::GetSingleton(), WaterCaustics::GetSingleton(), - SubsurfaceScattering::GetSingleton() + SubsurfaceScattering::GetSingleton(), + TerrainOcclusion::GetSingleton(), + ScreenSpaceGI::GetSingleton() }; static std::vector featuresVR(features); diff --git a/src/Features/CloudShadows.cpp b/src/Features/CloudShadows.cpp index 56c1d1e60..f61f1e35e 100644 --- a/src/Features/CloudShadows.cpp +++ b/src/Features/CloudShadows.cpp @@ -2,18 +2,16 @@ #include "State.h" +#include "Deferred.h" #include "Util.h" -#include "magic_enum_flags.hpp" - NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( CloudShadows::Settings, EnableCloudShadows, CloudHeight, PlanetRadius, EffectMix, - TransparencyPower, - AbsorptionAmbient) + TransparencyPower) enum class SkyShaderTechniques { @@ -41,13 +39,6 @@ void CloudShadows::DrawSettings() "The amount of light absorbed by the cloud is determined by the alpha of the cloud. " "Negative value will result in more light absorbed, and more contrast between lit and occluded areas."); - ImGui::SliderFloat("Ambient Absorption", &settings.AbsorptionAmbient, 0.f, 1.f, "%.2f"); - if (auto _tt = Util::HoverTooltipWrapper()) - ImGui::Text( - "By default, ambient light is not affected by cloud, as it is an approximation of reflected light. " - "However, if you want darker ambient, you may turn it up a bit. " - "Not entirely physical, nonetheless helpful."); - ImGui::TreePop(); } @@ -80,13 +71,30 @@ void CloudShadows::CheckResourcesSide(int side) context->ClearRenderTargetView(cubemapCloudOccRTVs[side], black); } +void CloudShadows::CompileComputeShaders() +{ + logger::debug("Compiling shaders..."); + { + outputProgram = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\CloudShadows\\output.cs.hlsl", { {} }, "cs_5_0")); + } +} + +void CloudShadows::ClearShaderCache() +{ + if (outputProgram) + outputProgram->Release(); + CompileComputeShaders(); +} + void CloudShadows::ModifySky(const RE::BSShader*, const uint32_t descriptor) { if (!settings.EnableCloudShadows) return; auto& shadowState = State::GetSingleton()->shadowState; - auto cubeMapRenderTarget = !REL::Module::IsVR() ? shadowState->GetRuntimeData().cubeMapRenderTarget : shadowState->GetVRRuntimeData().cubeMapRenderTarget; + + GET_INSTANCE_MEMBER(cubeMapRenderTarget, shadowState); + if (cubeMapRenderTarget != RE::RENDER_TARGETS_CUBEMAP::kREFLECTIONS) return; @@ -163,59 +171,73 @@ void CloudShadows::ModifySky(const RE::BSShader*, const uint32_t descriptor) } } -void CloudShadows::ModifyLighting() +void CloudShadows::DrawShadows() { - auto& context = State::GetSingleton()->context; + if (!settings.EnableCloudShadows || + (RE::Sky::GetSingleton()->mode.get() != RE::Sky::Mode::kFull) || + !RE::Sky::GetSingleton()->currentClimate) + return; auto& shadowState = State::GetSingleton()->shadowState; - auto cubeMapRenderTarget = !REL::Module::IsVR() ? shadowState->GetRuntimeData().cubeMapRenderTarget : shadowState->GetVRRuntimeData().cubeMapRenderTarget; + + GET_INSTANCE_MEMBER(cubeMapRenderTarget, shadowState); + if (cubeMapRenderTarget != RE::RENDER_TARGETS_CUBEMAP::kREFLECTIONS) { static Util::FrameChecker frame_checker; + + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto& context = State::GetSingleton()->context; + auto deferred = Deferred::GetSingleton(); + if (frame_checker.isNewFrame()) context->GenerateMips(texCubemapCloudOcc->srv.get()); - auto srv = texCubemapCloudOcc->srv.get(); - context->PSSetShaderResources(40, 1, &srv); - } else { - ID3D11ShaderResourceView* srv = nullptr; - context->PSSetShaderResources(40, 1, &srv); - } + std::array srvs = { nullptr }; + std::array uavs = { nullptr }; + + srvs.at(0) = perPass->SRV(); + srvs.at(1) = texCubemapCloudOcc->srv.get(); + srvs.at(2) = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY].depthSRV; + + uavs.at(0) = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kSHADOW_MASK].UAV; + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(outputProgram, nullptr, 0); + context->Dispatch((deferred->giTexture->desc.Width + 31u) >> 5, (deferred->giTexture->desc.Height + 31u) >> 5, 1); - ID3D11ShaderResourceView* views[1]{}; - views[0] = perPass->srv.get(); - context->PSSetShaderResources(23, ARRAYSIZE(views), views); + // clean up + srvs.fill(nullptr); + uavs.fill(nullptr); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + } } void CloudShadows::Draw(const RE::BSShader* shader, const uint32_t descriptor) { + if (!settings.EnableCloudShadows || + (RE::Sky::GetSingleton()->mode.get() != RE::Sky::Mode::kFull) || + !RE::Sky::GetSingleton()->currentClimate) + return; + static Util::FrameChecker frame_checker; if (frame_checker.isNewFrame()) { // update settings buffer - auto& context = State::GetSingleton()->context; - PerPass perPassData{}; perPassData.Settings = settings; perPassData.Settings.TransparencyPower = exp2(perPassData.Settings.TransparencyPower); perPassData.RcpHPlusR = 1.f / (settings.CloudHeight + settings.PlanetRadius); - D3D11_MAPPED_SUBRESOURCE mapped; - DX::ThrowIfFailed(context->Map(perPass->resource.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - size_t bytes = sizeof(PerPass); - memcpy_s(mapped.pData, bytes, &perPassData, bytes); - context->Unmap(perPass->resource.get(), 0); + perPass->Update(&perPassData, sizeof(perPassData)); } switch (shader->shaderType.get()) { case RE::BSShader::Type::Sky: ModifySky(shader, descriptor); break; - case RE::BSShader::Type::Lighting: - case RE::BSShader::Type::DistantTree: - case RE::BSShader::Type::Grass: - // case RE::BSShader::Type::Water: - ModifyLighting(); - break; default: break; } @@ -264,22 +286,11 @@ void CloudShadows::SetupResources() } { - D3D11_BUFFER_DESC sbDesc{}; - sbDesc.Usage = D3D11_USAGE_DYNAMIC; - sbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - sbDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - sbDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; - sbDesc.StructureByteStride = sizeof(PerPass); - sbDesc.ByteWidth = sizeof(PerPass); - perPass = std::make_unique(sbDesc); - - D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc; - srvDesc.Format = DXGI_FORMAT_UNKNOWN; - srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; - srvDesc.Buffer.FirstElement = 0; - srvDesc.Buffer.NumElements = 1; - perPass->CreateSRV(srvDesc); + perPass = std::make_unique(StructuredBufferDesc(), 1); + perPass->CreateSRV(); } + + CompileComputeShaders(); } void CloudShadows::RestoreDefaultSettings() diff --git a/src/Features/CloudShadows.h b/src/Features/CloudShadows.h index 03ac74bf1..6099207dd 100644 --- a/src/Features/CloudShadows.h +++ b/src/Features/CloudShadows.h @@ -19,25 +19,18 @@ struct CloudShadows : Feature struct Settings { uint EnableCloudShadows = true; - float CloudHeight = 2e3f / 1.428e-2f; float PlanetRadius = 6371e3f / 1.428e-2f; - float EffectMix = 1.f; - float TransparencyPower = 0.1f; - float AbsorptionAmbient = 0.2f; } settings; - struct alignas(16) PerPass + struct PerPass { Settings Settings; - float RcpHPlusR; - - float padding; }; - std::unique_ptr perPass = nullptr; + std::unique_ptr perPass = nullptr; bool isCubemapPass = false; ID3D11BlendState* resetBlendState = nullptr; @@ -48,15 +41,20 @@ struct CloudShadows : Feature ID3D11RenderTargetView* cubemapCloudOccRTVs[6] = { nullptr }; ID3D11ShaderResourceView* cubemapCloudOccDebugSRV = nullptr; + ID3D11ComputeShader* outputProgram = nullptr; + virtual void SetupResources() override; + void CompileComputeShaders(); + virtual inline void Reset() override {} + virtual void ClearShaderCache() override; virtual void DrawSettings() override; void CheckResourcesSide(int side); void ModifySky(const RE::BSShader* shader, const uint32_t descriptor); - void ModifyLighting(); virtual void Draw(const RE::BSShader* shader, const uint32_t descriptor) override; + void DrawShadows(); virtual void Load(json& o_json) override; virtual void Save(json& o_json) override; diff --git a/src/Features/DistantTreeLighting.cpp b/src/Features/DistantTreeLighting.cpp deleted file mode 100644 index d179ff8a1..000000000 --- a/src/Features/DistantTreeLighting.cpp +++ /dev/null @@ -1,157 +0,0 @@ -#include "DistantTreeLighting.h" - -#include "State.h" -#include "Util.h" - -NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( - DistantTreeLighting::Settings, - EnableComplexTreeLOD, - EnableDirLightFix, - SubsurfaceScatteringAmount) - -void DistantTreeLighting::DrawSettings() -{ - if (ImGui::TreeNodeEx("Complex Tree LOD", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Checkbox("Enable Complex Tree LOD", (bool*)&settings.EnableComplexTreeLOD); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text( - "Enables advanced lighting simulation on tree LOD. " - "Requires DynDOLOD. " - "See https://dyndolod.info/ for more information. "); - } - - ImGui::Spacing(); - ImGui::Spacing(); - ImGui::TreePop(); - } - - if (ImGui::TreeNodeEx("Lights", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Checkbox("Enable Directional Light Fix", (bool*)&settings.EnableDirLightFix); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Fix for trees not being affected by sunlight scale."); - } - - ImGui::Spacing(); - ImGui::Spacing(); - ImGui::TreePop(); - } - - if (ImGui::TreeNodeEx("Effects", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::SliderFloat("SSS Amount", &settings.SubsurfaceScatteringAmount, 0.0f, 1.0f); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text( - "Subsurface Scattering (SSS) amount. " - "Soft lighting controls how evenly lit an object is. " - "Back lighting illuminates the back face of an object. " - "Combined to model the transport of light through the surface. "); - } - - ImGui::Spacing(); - ImGui::Spacing(); - ImGui::TreePop(); - } -} - -enum class DistantTreeShaderTechniques -{ - DistantTreeBlock = 0, - Depth = 1, -}; - -void DistantTreeLighting::ModifyDistantTree(const RE::BSShader*, const uint32_t descriptor) -{ - if (auto player = RE::PlayerCharacter::GetSingleton()) { - if (auto worldSpace = player->GetWorldspace()) { - if (lastWorldSpace != worldSpace) { - lastWorldSpace = worldSpace; - if (auto name = worldSpace->GetFormEditorID()) { - CSimpleIniA ini; - ini.SetUnicode(); - auto path = std::format("Data\\Textures\\Terrain\\{}\\Trees\\{}TreeLOD.ini", name, name); - ini.LoadFile(path.c_str()); - complexAtlasTexture = ini.GetBoolValue("Information", "ComplexAtlasTexture", false); - } else { - complexAtlasTexture = false; - } - } - } - } - - const auto technique = descriptor & 1; - if (technique != static_cast(DistantTreeShaderTechniques::Depth)) { - PerPass perPassData{}; - ZeroMemory(&perPassData, sizeof(perPassData)); - - auto& shaderState = RE::BSShaderManager::State::GetSingleton(); - RE::NiTransform& dalcTransform = shaderState.directionalAmbientTransform; - - Util::StoreTransform3x4NoScale(perPassData.DirectionalAmbient, dalcTransform); - - auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); - - auto sunLight = skyrim_cast(accumulator->GetRuntimeData().activeShadowSceneNode->GetRuntimeData().sunLight->light.get()); - if (sunLight) { - auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); - auto sunlightScale = !REL::Module::IsVR() ? imageSpaceManager->GetRuntimeData().data.baseData.hdr.sunlightScale : - imageSpaceManager->GetVRRuntimeData().data.baseData.hdr.sunlightScale; - - perPassData.DirLightScale = sunlightScale * sunLight->GetLightRuntimeData().fade; - - perPassData.DirLightColor.x = sunLight->GetLightRuntimeData().diffuse.red; - perPassData.DirLightColor.y = sunLight->GetLightRuntimeData().diffuse.green; - perPassData.DirLightColor.z = sunLight->GetLightRuntimeData().diffuse.blue; - - auto& direction = sunLight->GetWorldDirection(); - perPassData.DirLightDirection.x = direction.x; - perPassData.DirLightDirection.y = direction.y; - perPassData.DirLightDirection.z = direction.z; - } - - perPassData.ComplexAtlasTexture = complexAtlasTexture; - - perPassData.Settings = settings; - - perPass->Update(perPassData); - - auto& context = State::GetSingleton()->context; - - ID3D11Buffer* buffers[2]; - context->VSGetConstantBuffers(2, 1, buffers); // buffers[0] - buffers[1] = perPass->CB(); - context->VSSetConstantBuffers(2, ARRAYSIZE(buffers), buffers); - context->PSGetConstantBuffers(2, 1, buffers); // buffers[0] - context->PSSetConstantBuffers(2, ARRAYSIZE(buffers), buffers); - } -} - -void DistantTreeLighting::Draw(const RE::BSShader* shader, const uint32_t descriptor) -{ - switch (shader->shaderType.get()) { - case RE::BSShader::Type::DistantTree: - ModifyDistantTree(shader, descriptor); - break; - } -} - -void DistantTreeLighting::Load(json& o_json) -{ - if (o_json[GetName()].is_object()) - settings = o_json[GetName()]; - - Feature::Load(o_json); -} - -void DistantTreeLighting::Save(json& o_json) -{ - o_json[GetName()] = settings; -} - -void DistantTreeLighting::RestoreDefaultSettings() -{ - settings = {}; -} - -void DistantTreeLighting::SetupResources() -{ - perPass = new ConstantBuffer(ConstantBufferDesc()); -} diff --git a/src/Features/DistantTreeLighting.h b/src/Features/DistantTreeLighting.h deleted file mode 100644 index 33cc023fb..000000000 --- a/src/Features/DistantTreeLighting.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once - -#include "Buffer.h" -#include "Feature.h" - -struct DistantTreeLighting : Feature -{ - static DistantTreeLighting* GetSingleton() - { - static DistantTreeLighting singleton; - return &singleton; - } - - virtual inline std::string GetName() { return "Tree LOD Lighting"; } - virtual inline std::string GetShortName() { return "TreeLODLighting"; } - - struct Settings - { - std::uint32_t EnableComplexTreeLOD = 1; - std::uint32_t EnableDirLightFix = 1; - float SubsurfaceScatteringAmount = 0.5; - }; - - struct alignas(16) PerPass - { - DirectX::XMFLOAT3X4 DirectionalAmbient; - DirectX::XMFLOAT4 DirLightColor; - DirectX::XMFLOAT4 DirLightDirection; - float DirLightScale; - std::uint32_t ComplexAtlasTexture; - Settings Settings; - float pad[3]; - }; - - Settings settings; - ConstantBuffer* perPass = nullptr; - - RE::TESWorldSpace* lastWorldSpace = nullptr; - bool complexAtlasTexture = false; - - virtual void SetupResources(); - virtual inline void Reset() {} - - virtual void DrawSettings(); - void ModifyDistantTree(const RE::BSShader* shader, const uint32_t descriptor); - virtual void Draw(const RE::BSShader* shader, const uint32_t descriptor); - - virtual void Load(json& o_json); - virtual void Save(json& o_json); - - virtual void RestoreDefaultSettings(); - bool SupportsVR() override { return true; }; -}; diff --git a/src/Features/DynamicCubemaps.cpp b/src/Features/DynamicCubemaps.cpp index c86336454..946e11b1c 100644 --- a/src/Features/DynamicCubemaps.cpp +++ b/src/Features/DynamicCubemaps.cpp @@ -397,7 +397,9 @@ void DynamicCubemaps::Draw(const RE::BSShader* shader, const uint32_t) if (shader->shaderType.get() == RE::BSShader::Type::Lighting || shader->shaderType.get() == RE::BSShader::Type::Water) { // During world cubemap generation we cannot use the cubemap auto& shadowState = State::GetSingleton()->shadowState; - auto cubeMapRenderTarget = !REL::Module::IsVR() ? shadowState->GetRuntimeData().cubeMapRenderTarget : shadowState->GetVRRuntimeData().cubeMapRenderTarget; + + GET_INSTANCE_MEMBER(cubeMapRenderTarget, shadowState); + if (cubeMapRenderTarget != RE::RENDER_TARGETS_CUBEMAP::kREFLECTIONS && !renderedScreenCamera) { UpdateCubemap(); renderedScreenCamera = true; diff --git a/src/Features/GrassLighting.cpp b/src/Features/GrassLighting.cpp index 2063233e0..b7f008556 100644 --- a/src/Features/GrassLighting.cpp +++ b/src/Features/GrassLighting.cpp @@ -8,7 +8,6 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( Glossiness, SpecularStrength, SubsurfaceScatteringAmount, - EnableDirLightFix, OverrideComplexGrassSettings, BasicGrassBrightness) @@ -52,11 +51,6 @@ void GrassLighting::DrawSettings() } if (ImGui::TreeNodeEx("Lighting", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Checkbox("Enable Directional Light Fix", (bool*)&settings.EnableDirLightFix); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Fix for grass not being affected by sunlight scale."); - } - ImGui::Checkbox("Override Complex Grass Lighting Settings", (bool*)&settings.OverrideComplexGrassSettings); if (auto _tt = Util::HoverTooltipWrapper()) { ImGui::Text( @@ -83,14 +77,9 @@ void GrassLighting::ModifyGrass(const RE::BSShader*, const uint32_t descriptor) const auto technique = descriptor & 0b1111; if (technique != static_cast(GrassShaderTechniques::RenderDepth)) { if (updatePerFrame) { - auto& state = RE::BSShaderManager::State::GetSingleton(); - RE::NiTransform& dalcTransform = state.directionalAmbientTransform; auto imageSpaceManager = RE::ImageSpaceManager::GetSingleton(); PerFrame perFrameData{}; - ZeroMemory(&perFrameData, sizeof(perFrameData)); - Util::StoreTransform3x4NoScale(perFrameData.DirectionalAmbient, dalcTransform); - perFrameData.SunlightScale = !REL::Module::IsVR() ? imageSpaceManager->GetRuntimeData().data.baseData.hdr.sunlightScale : imageSpaceManager->GetVRRuntimeData().data.baseData.hdr.sunlightScale; diff --git a/src/Features/GrassLighting.h b/src/Features/GrassLighting.h index 7d578110f..b3ec0729f 100644 --- a/src/Features/GrassLighting.h +++ b/src/Features/GrassLighting.h @@ -19,17 +19,15 @@ struct GrassLighting : Feature float Glossiness = 20.0f; float SpecularStrength = 0.5f; float SubsurfaceScatteringAmount = 1.0f; - uint EnableDirLightFix = true; uint OverrideComplexGrassSettings = false; - float BasicGrassBrightness = 0.666f; + float BasicGrassBrightness = 1.0f; }; struct alignas(16) PerFrame { - DirectX::XMFLOAT3X4 DirectionalAmbient; float SunlightScale; Settings Settings; - float pad[1]; + float pad[2]; }; Settings settings; diff --git a/src/Features/ScreenSpaceGI.cpp b/src/Features/ScreenSpaceGI.cpp new file mode 100644 index 000000000..559bfd98a --- /dev/null +++ b/src/Features/ScreenSpaceGI.cpp @@ -0,0 +1,685 @@ +#include "ScreenSpaceGI.h" + +#include "Deferred.h" +#include "State.h" +#include "Util.h" + +#include "DirectXTex.h" + +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( + ScreenSpaceGI::Settings, + Enabled, + UseBitmask, + EnableGI, + EnableTemporalDenoiser, + NumSlices, + NumSteps, + DepthMIPSamplingOffset, + EffectRadius, + EffectFalloffRange, + ThinOccluderCompensation, + Thickness, + DepthFadeRange, + CheckBackface, + BackfaceStrength, + EnableGIBounce, + GIBounceFade, + GIDistanceCompensation, + GICompensationMaxDist, + AOPower, + GIStrength, + DepthDisocclusion, + MaxAccumFrames) + +class DisableGuard +{ +private: + bool disable; + +public: + DisableGuard(bool disable) : + disable(disable) + { + if (disable) + ImGui::BeginDisabled(); + } + ~DisableGuard() + { + if (disable) + ImGui::EndDisabled(); + } +}; + +bool percentageSlider(const char* label, float* data, const char* format = "%.1f %%") +{ + float percentageData = (*data) * 1e2f; + bool retval = ImGui::SliderFloat(label, &percentageData, 0.f, 100.f, format); + (*data) = percentageData * 1e-2f; + return retval; +} + +//////////////////////////////////////////////////////////////////////////////////// + +void ScreenSpaceGI::RestoreDefaultSettings() +{ + settings = {}; +} + +void ScreenSpaceGI::DrawSettings() +{ + /////////////////////////////// + ImGui::SeparatorText("Toggles"); + + if (ImGui::BeginTable("Toggles", 3)) { + ImGui::TableNextColumn(); + ImGui::Checkbox("Enabled", &settings.Enabled); + ImGui::TableNextColumn(); + recompileFlag |= ImGui::Checkbox("GI", &settings.EnableGI); + ImGui::TableNextColumn(); + recompileFlag |= ImGui::Checkbox("Bitmask", &settings.UseBitmask); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("An alternative way to calculate AO/GI"); + + ImGui::EndTable(); + } + + /////////////////////////////// + ImGui::SeparatorText("Quality/Performance"); + + ImGui::SliderInt("Slices", (int*)&settings.NumSlices, 1, 10); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How many directions do the samples take. A greater value reduces noise but is more expensive."); + + ImGui::SliderInt("Steps Per Slice", (int*)&settings.NumSteps, 1, 20); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How many samples does it take in one direction. A greater value enhances the effects but is more expensive."); + + ImGui::SliderFloat("MIP Sampling Offset", &settings.DepthMIPSamplingOffset, 2.f, 6.f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Mainly performance (texture memory bandwidth) setting but as a side-effect reduces overshadowing by thin objects and increases temporal instability."); + + if (ImGui::BeginTable("Quality Toggles", 2)) { + ImGui::TableNextColumn(); + recompileFlag |= ImGui::Checkbox("Half Resolution", &settings.HalfRes); + + ImGui::EndTable(); + } + + /////////////////////////////// + ImGui::SeparatorText("Visual"); + + ImGui::SliderFloat("AO Power", &settings.AOPower, 0.f, 3.f, "%.2f"); + + { + auto _ = DisableGuard(!settings.EnableGI); + ImGui::SliderFloat("GI Strength", &settings.GIStrength, 0.f, 20.f, "%.2f"); + // percentageSlider("GI Saturation", &settings.GISaturation); + } + + ImGui::Separator(); + + ImGui::SliderFloat("Effect radius", &settings.EffectRadius, 10.f, 300.0f, "%.1f game units"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("World (viewspace) effect radius. Depends on the scene & requirements"); + + ImGui::SliderFloat2("Depth Fade Range", &settings.DepthFadeRange.x, 1e4, 5e4, "%.0f game units"); + + ImGui::Separator(); + + { + auto _ = DisableGuard(settings.UseBitmask); + + ImGui::SliderFloat("Falloff Range", &settings.EffectFalloffRange, 0.05f, 1.0f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Gently reduce sample impact as it gets out of 'Effect radius' bounds"); + + ImGui::SliderFloat("Thin Occluder Compensation", &settings.ThinOccluderCompensation, 0.f, 0.7f, "%.2f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Slightly reduce impact of samples further back to counter the bias from depth-based (incomplete) input scene geometry data"); + } + { + auto _ = DisableGuard(!settings.UseBitmask); + + ImGui::SliderFloat("Thickness", &settings.Thickness, 0.f, 500.0f, "%.1f game units"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How thick the occluders are. 20 to 30 percent of effect radius is recommended."); + } + + /////////////////////////////// + ImGui::SeparatorText("Visual - GI"); + + { + auto _ = DisableGuard(!settings.EnableGI); + + ImGui::SliderFloat("GI Distance Compensation", &settings.GIDistanceCompensation, 0.0f, 9.0f, "%.1f"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text( + "Brighten up further radiance samples that are otherwise too weak. Creates a wider GI look.\n" + "If using bitmask, this value should be roughly inverse to thickness."); + + ImGui::SliderFloat("GI Compensation Distance", &settings.GICompensationMaxDist, 10.0f, 500.0f, "%.1f game units"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("The distance of maximal compensation/brightening."); + + ImGui::Separator(); + + recompileFlag |= ImGui::Checkbox("GI Bounce", &settings.EnableGIBounce); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Simulates multiple light bounces. Better with denoiser on."); + + { + auto __ = DisableGuard(!settings.EnableGIBounce); + ImGui::Indent(); + percentageSlider("GI Bounce Strength", &settings.GIBounceFade); + ImGui::Unindent(); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How much of this frame's GI gets carried to the next frame."); + } + + ImGui::Separator(); + + recompileFlag |= ImGui::Checkbox("Backface Checks", &settings.CheckBackface); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("Disable to get some frames, IF you don't care about light emitting from the back of objects."); + { + auto __ = DisableGuard(!settings.CheckBackface); + ImGui::Indent(); + percentageSlider("Backface Lighting Mix", &settings.BackfaceStrength); + ImGui::Unindent(); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How bright at the back of objects is compared to the front. A small value to make up for foliage translucency."); + } + } + + /////////////////////////////// + ImGui::SeparatorText("Denoising"); + + ImGui::TextWrapped("At full resolution, you can try disabling denoisers and let TAA handle the noise."); + + recompileFlag |= ImGui::Checkbox("Temporal Denoiser", &settings.EnableTemporalDenoiser); + + { + auto _ = DisableGuard(!settings.EnableTemporalDenoiser); + ImGui::Indent(); + ImGui::SliderInt("Max Frame Accumulation", (int*)&settings.MaxAccumFrames, 1, 64, "%d", ImGuiSliderFlags_AlwaysClamp); + ImGui::Unindent(); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("How many past frames to accumulate results with. Higher values are less noisy but potentially cause ghosting."); + } + + // ImGui::SliderInt("Passes", (int*)&settings.DenoisePasses, 0, 10); + // if (auto _tt = Util::HoverTooltipWrapper()) + // ImGui::Text("How many denoising passes to go through. The more the blurrier."); + + { + auto _ = DisableGuard(!settings.EnableTemporalDenoiser && !(settings.EnableGI || settings.EnableGIBounce)); + + ImGui::SliderFloat("Movement Disocclusion", &settings.DepthDisocclusion, 0.f, 100.f, "%.1f game units"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text( + "If a pixel has moved this far from the last frame, its radiance will not be carried to this frame.\n" + "Lower values are stricter."); + } + + /////////////////////////////// + ImGui::SeparatorText("Debug"); + + if (ImGui::TreeNode("Buffer Viewer")) { + static float debugRescale = .3f; + ImGui::SliderFloat("View Resize", &debugRescale, 0.f, 1.f); + + // ImGui doesn't support U32 + // if (ImGui::TreeNode("texHilbertLUT")) { + // ImGui::Image(texHilbertLUT->srv.get(), { (float)texHilbertLUT->desc.Width, (float)texHilbertLUT->desc.Height }); + // ImGui::TreePop(); + // } + if (ImGui::TreeNode("texWorkingDepth")) { + ImGui::Image(texWorkingDepth->srv.get(), { texWorkingDepth->desc.Width * debugRescale, texWorkingDepth->desc.Height * debugRescale }); + ImGui::TreePop(); + } + if (ImGui::TreeNode("texPrevDepth")) { + ImGui::Image(texPrevDepth->srv.get(), { texPrevDepth->desc.Width * debugRescale, texPrevDepth->desc.Height * debugRescale }); + ImGui::TreePop(); + } + if (ImGui::TreeNode("texRadiance")) { + ImGui::Image(texRadiance->srv.get(), { texRadiance->desc.Width * debugRescale, texRadiance->desc.Height * debugRescale }); + ImGui::TreePop(); + } + if (ImGui::TreeNode("texGI0")) { + ImGui::Image(texGI0->srv.get(), { texGI0->desc.Width * debugRescale, texGI0->desc.Height * debugRescale }); + ImGui::TreePop(); + } + if (ImGui::TreeNode("texGI1")) { + ImGui::Image(texGI1->srv.get(), { texGI1->desc.Width * debugRescale, texGI1->desc.Height * debugRescale }); + ImGui::TreePop(); + } + if (ImGui::TreeNode("texPrevGIAlbedo")) { + ImGui::Image(texPrevGIAlbedo->srv.get(), { texPrevGIAlbedo->desc.Width * debugRescale, texPrevGIAlbedo->desc.Height * debugRescale }); + ImGui::TreePop(); + } + + ImGui::TreePop(); + } +} + +void ScreenSpaceGI::Load(json& o_json) +{ + if (o_json[GetName()].is_object()) + settings = o_json[GetName()]; + + Feature::Load(o_json); +} + +void ScreenSpaceGI::Save([[maybe_unused]] json& o_json) +{ + o_json[GetName()] = settings; +} + +void ScreenSpaceGI::SetupResources() +{ + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto& device = State::GetSingleton()->device; + + logger::debug("Creating buffers..."); + { + ssgiCB = eastl::make_unique(ConstantBufferDesc()); + } + + logger::debug("Creating textures..."); + { + D3D11_TEXTURE2D_DESC texDesc{ + .Width = 64, + .Height = 64, + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_R32_UINT, + .SampleDesc = { 1, 0 }, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS, + .CPUAccessFlags = 0, + .MiscFlags = 0 + }; + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { + .MostDetailedMip = 0, + .MipLevels = texDesc.MipLevels } + }; + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } + }; + + { + texHilbertLUT = eastl::make_unique(texDesc); + texHilbertLUT->CreateSRV(srvDesc); + texHilbertLUT->CreateUAV(uavDesc); + } + + auto mainTex = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + mainTex.texture->GetDesc(&texDesc); + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R11G11B10_FLOAT; + texDesc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + texDesc.MipLevels = srvDesc.Texture2D.MipLevels = 5; + texDesc.MiscFlags |= D3D11_RESOURCE_MISC_GENERATE_MIPS; + + { + texRadiance = eastl::make_unique(texDesc); + texRadiance->CreateSRV(srvDesc); + texRadiance->CreateUAV(uavDesc); + } + + texDesc.BindFlags &= ~D3D11_BIND_RENDER_TARGET; + texDesc.MiscFlags &= ~D3D11_RESOURCE_MISC_GENERATE_MIPS; + texDesc.Format = srvDesc.Format = uavDesc.Format = DXGI_FORMAT_R16_FLOAT; + + { + texWorkingDepth = eastl::make_unique(texDesc); + texWorkingDepth->CreateSRV(srvDesc); + for (int i = 0; i < 5; ++i) { + uavDesc.Texture2D.MipSlice = i; + DX::ThrowIfFailed(device->CreateUnorderedAccessView(texWorkingDepth->resource.get(), &uavDesc, uavWorkingDepth[i].put())); + } + } + + uavDesc.Texture2D.MipSlice = 0; + texDesc.MipLevels = srvDesc.Texture2D.MipLevels = 1; + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + { + texGI0 = eastl::make_unique(texDesc); + texGI0->CreateSRV(srvDesc); + texGI0->CreateUAV(uavDesc); + + texGI1 = eastl::make_unique(texDesc); + texGI1->CreateSRV(srvDesc); + texGI1->CreateUAV(uavDesc); + } + + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R11G11B10_FLOAT; + { + texPrevGIAlbedo = eastl::make_unique(texDesc); + texPrevGIAlbedo->CreateSRV(srvDesc); + texPrevGIAlbedo->CreateUAV(uavDesc); + } + + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R8_UINT; + { + texAccumFrames = eastl::make_unique(texDesc); + texAccumFrames->CreateSRV(srvDesc); + texAccumFrames->CreateUAV(uavDesc); + } + + srvDesc.Format = uavDesc.Format = texDesc.Format = DXGI_FORMAT_R16_FLOAT; + { + texPrevDepth = eastl::make_unique(texDesc); + texPrevDepth->CreateSRV(srvDesc); + texPrevDepth->CreateUAV(uavDesc); + } + } + + logger::debug("Creating samplers..."); + { + D3D11_SAMPLER_DESC samplerDesc = { + .Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR, + .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, + .MaxAnisotropy = 1, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX + }; + DX::ThrowIfFailed(device->CreateSamplerState(&samplerDesc, linearClampSampler.put())); + + samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + DX::ThrowIfFailed(device->CreateSamplerState(&samplerDesc, pointClampSampler.put())); + } + + CompileComputeShaders(); +} + +void ScreenSpaceGI::ClearShaderCache() +{ + static const std::vector*> shaderPtrs = { + &hilbertLutCompute, &prefilterDepthsCompute, &radianceDisoccCompute, &giCompute, &upsampleCompute, &outputCompute + }; + + for (auto shader : shaderPtrs) + if ((*shader)) { + (*shader)->Release(); + shader->detach(); + } + + CompileComputeShaders(); +} + +void ScreenSpaceGI::CompileComputeShaders() +{ + struct ShaderCompileInfo + { + winrt::com_ptr* programPtr; + std::string_view filename; + std::vector> defines; + }; + + std::vector + shaderInfos = { + { &hilbertLutCompute, "hilbert.cs.hlsl", {} }, + { &prefilterDepthsCompute, "prefilterDepths.cs.hlsl", {} }, + { &radianceDisoccCompute, "radianceDisocc.cs.hlsl", {} }, + { &giCompute, "gi.cs.hlsl", {} }, + { &upsampleCompute, "upsample.cs.hlsl", {} }, + { &outputCompute, "output.cs.hlsl", {} } + }; + for (auto& info : shaderInfos) { + if (REL::Module::IsVR()) + info.defines.push_back({ "VR", "" }); + if (settings.HalfRes) + info.defines.push_back({ "HALF_RES", "" }); + if (settings.EnableTemporalDenoiser) + info.defines.push_back({ "TEMPORAL_DENOISER", "" }); + if (settings.UseBitmask) + info.defines.push_back({ "BITMASK", "" }); + if (settings.EnableGI) + info.defines.push_back({ "GI", "" }); + if (settings.EnableGIBounce) + info.defines.push_back({ "GI_BOUNCE", "" }); + if (settings.CheckBackface) + info.defines.push_back({ "BACKFACE", "" }); + } + + for (auto& info : shaderInfos) { + auto path = std::filesystem::path("Data\\Shaders\\ScreenSpaceGI") / info.filename; + if (auto rawPtr = reinterpret_cast(Util::CompileShader(path.c_str(), info.defines, "cs_5_0"))) + info.programPtr->attach(rawPtr); + } + + hilbertLutGenFlag = true; + recompileFlag = false; +} + +bool ScreenSpaceGI::ShadersOK() +{ + return hilbertLutCompute && prefilterDepthsCompute && radianceDisoccCompute && giCompute && upsampleCompute && outputCompute; +} + +void ScreenSpaceGI::GenerateHilbertLUT() +{ + auto& context = State::GetSingleton()->context; + + ID3D11UnorderedAccessView* uav = texHilbertLUT->uav.get(); + context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + context->CSSetShader(hilbertLutCompute.get(), nullptr, 0); + + context->Dispatch(2, 2, 1); + + uav = nullptr; + context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + context->CSSetShader(nullptr, nullptr, 0); + + hilbertLutGenFlag = false; +} + +void ScreenSpaceGI::UpdateSB() +{ + auto viewport = RE::BSGraphics::State::GetSingleton(); + auto& state = State::GetSingleton()->shadowState; + + uint resolution[2] = { + (uint)(State::GetSingleton()->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale), + (uint)(State::GetSingleton()->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale) + }; + uint halfRes[2] = { (resolution[0] + 1) >> 1, (resolution[1] + 1) >> 1 }; + + float2 res = settings.HalfRes ? float2{ (float)halfRes[0], (float)halfRes[1] } : float2{ (float)resolution[0], (float)resolution[1] }; + + static float4x4 prevInvView[2] = {}; + + SSGICB data; + { + for (int eyeIndex = 0; eyeIndex < (1 + REL::Module::IsVR()); ++eyeIndex) { + auto eye = (!REL::Module::IsVR()) ? state->GetRuntimeData().cameraData.getEye(eyeIndex) : state->GetVRRuntimeData().cameraData.getEye(eyeIndex); + + data.PrevInvViewMat[eyeIndex] = prevInvView[eyeIndex]; + data.NDCToViewMul[eyeIndex] = { 2.0f / eye.projMat(0, 0), -2.0f / eye.projMat(1, 1) }; + data.NDCToViewAdd[eyeIndex] = { -1.0f / eye.projMat(0, 0), 1.0f / eye.projMat(1, 1) }; + data.NDCToViewMul_x_PixelSize[eyeIndex] = data.NDCToViewMul[eyeIndex] / res; + if (REL::Module::IsVR()) + data.NDCToViewMul[eyeIndex].x *= 2; + + prevInvView[eyeIndex] = eye.viewMat.Invert(); + } + + data.FrameDim = res; + data.RcpFrameDim = float2(1.0f) / res; + data.FrameIndex = viewport->uiFrameCount; + + data.NumSlices = settings.NumSlices; + data.NumSteps = settings.NumSteps; + data.DepthMIPSamplingOffset = settings.DepthMIPSamplingOffset; + + data.EffectRadius = settings.EffectRadius; + data.EffectFalloffRange = settings.EffectFalloffRange; + data.ThinOccluderCompensation = settings.ThinOccluderCompensation; + data.Thickness = settings.Thickness; + data.DepthFadeRange = settings.DepthFadeRange; + data.DepthFadeScaleConst = 1 / (settings.DepthFadeRange.y - settings.DepthFadeRange.x); + + data.BackfaceStrength = settings.BackfaceStrength; + data.GIBounceFade = settings.GIBounceFade; + data.GIDistanceCompensation = settings.GIDistanceCompensation; + data.GICompensationMaxDist = settings.GICompensationMaxDist; + + data.AOPower = settings.AOPower; + data.GIStrength = settings.GIStrength; + + data.DepthDisocclusion = settings.DepthDisocclusion; + data.MaxAccumFrames = settings.MaxAccumFrames; + } + + ssgiCB->Update(data); +} + +void ScreenSpaceGI::DrawSSGI(Texture2D* outGI) +{ + if (!(settings.Enabled && ShadersOK())) + return; + + ////////////////////////////////////////////////////// + + if (recompileFlag) + ClearShaderCache(); + + if (hilbertLutGenFlag) + GenerateHilbertLUT(); + + UpdateSB(); + + ////////////////////////////////////////////////////// + + auto& context = State::GetSingleton()->context; + auto viewport = RE::BSGraphics::State::GetSingleton(); + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto rts = renderer->GetRuntimeData().renderTargets; + auto deferred = Deferred::GetSingleton(); + + uint resolution[2] = { + (uint)(State::GetSingleton()->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale), + (uint)(State::GetSingleton()->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale) + }; + uint halfRes[2] = { resolution[0] >> 1, resolution[1] >> 1 }; + auto targetRes = settings.HalfRes ? halfRes : resolution; + + std::array srvs = { nullptr }; + std::array uavs = { nullptr }; + std::array samplers = { pointClampSampler.get(), linearClampSampler.get() }; + auto cb = ssgiCB->CB(); + + auto resetViews = [&]() { + srvs.fill(nullptr); + uavs.fill(nullptr); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + }; + + ////////////////////////////////////////////////////// + + context->CSSetConstantBuffers(1, 1, &cb); + context->CSSetSamplers(0, (uint)samplers.size(), samplers.data()); + + // prefilter depths + { + srvs[0] = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY].depthSRV; + for (int i = 0; i < 5; ++i) + uavs[i] = uavWorkingDepth[i].get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(prefilterDepthsCompute.get(), nullptr, 0); + context->Dispatch((resolution[0] + 15) >> 4, (resolution[1] + 15) >> 4, 1); + } + + // fetch radiance and disocclusion + { + resetViews(); + srvs[0] = rts[deferred->forwardRenderTargets[0]].SRV; + srvs[1] = texGI0->srv.get(); + srvs[2] = texWorkingDepth->srv.get(); + srvs[3] = rts[NORMALROUGHNESS].SRV; + srvs[4] = texPrevDepth->srv.get(); + srvs[5] = rts[RE::RENDER_TARGET::kMOTION_VECTOR].SRV; + srvs[6] = texPrevGIAlbedo->srv.get(); + + uavs[0] = texRadiance->uav.get(); + uavs[1] = texAccumFrames->uav.get(); + uavs[2] = texGI1->uav.get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(radianceDisoccCompute.get(), nullptr, 0); + context->Dispatch((targetRes[0] + 7u) >> 3, (targetRes[1] + 7u) >> 3, 1); + + context->GenerateMips(texRadiance->srv.get()); + } + + // GI + { + resetViews(); + srvs[0] = texWorkingDepth->srv.get(); + srvs[1] = rts[NORMALROUGHNESS].SRV; + srvs[2] = texRadiance->srv.get(); + srvs[3] = texHilbertLUT->srv.get(); + srvs[4] = texAccumFrames->srv.get(); + srvs[5] = texGI1->srv.get(); + + uavs[0] = texGI0->uav.get(); + uavs[1] = nullptr; + uavs[2] = texPrevDepth->uav.get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(giCompute.get(), nullptr, 0); + context->Dispatch((targetRes[0] + 7u) >> 3, (targetRes[1] + 7u) >> 3, 1); + } + + // upsasmple + if (settings.HalfRes) { + resetViews(); + srvs[0] = texWorkingDepth->srv.get(); + srvs[1] = texGI0->srv.get(); + + uavs[0] = texGI1->uav.get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(upsampleCompute.get(), nullptr, 0); + context->Dispatch((resolution[0] + 7u) >> 3, (resolution[1] + 7u) >> 3, 1); + } + + // output + { + resetViews(); + srvs[0] = settings.HalfRes ? texGI1->srv.get() : texGI0->srv.get(); + srvs[1] = rts[ALBEDO].SRV; + + uavs[0] = outGI->uav.get(); + uavs[1] = texPrevGIAlbedo->uav.get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(outputCompute.get(), nullptr, 0); + context->Dispatch((resolution[0] + 7u) >> 3, (resolution[1] + 7u) >> 3, 1); + } + + // cleanup + resetViews(); + + samplers.fill(nullptr); + cb = nullptr; + + context->CSSetConstantBuffers(1, 1, &cb); + context->CSSetSamplers(0, (uint)samplers.size(), samplers.data()); + context->CSSetShader(nullptr, nullptr, 0); +} \ No newline at end of file diff --git a/src/Features/ScreenSpaceGI.h b/src/Features/ScreenSpaceGI.h new file mode 100644 index 000000000..7a0a3142e --- /dev/null +++ b/src/Features/ScreenSpaceGI.h @@ -0,0 +1,129 @@ +#pragma once + +#include "Buffer.h" +#include "Feature.h" + +struct ScreenSpaceGI : Feature +{ + static ScreenSpaceGI* GetSingleton() + { + static ScreenSpaceGI singleton; + return &singleton; + } + + virtual inline std::string GetName() override { return "Screen Space GI"; } + virtual inline std::string GetShortName() override { return "ScreenSpaceGI"; } + + virtual void RestoreDefaultSettings() override; + virtual void DrawSettings() override; + + virtual void Load(json& o_json) override; + virtual void Save(json& o_json) override; + + virtual inline void Reset() override{}; + virtual void SetupResources() override; + virtual void ClearShaderCache() override; + void CompileComputeShaders(); + bool ShadersOK(); + + virtual inline void Draw(const RE::BSShader*, const uint32_t) override{}; + + void DrawSSGI(Texture2D* outGI); + void GenerateHilbertLUT(); + void UpdateSB(); + + ////////////////////////////////////////////////////////////////////////////////// + + bool hilbertLutGenFlag = false; + bool recompileFlag = false; + + struct Settings + { + bool Enabled = true; + bool UseBitmask = true; + bool EnableGI = true; + // performance/quality + uint NumSlices = 2; + uint NumSteps = 5; + bool HalfRes = true; + // float SampleDistributionPower = 1.f; + float DepthMIPSamplingOffset = 3.3f; + // visual + float EffectRadius = 200.f; // world (viewspace) maximum size of the shadow + float EffectFalloffRange = .615f; + float ThinOccluderCompensation = 0.f; + float Thickness = 50.f; + float2 DepthFadeRange = { 2e4, 3e4 }; + // gi + bool CheckBackface = true; + float BackfaceStrength = 0.1f; + bool EnableGIBounce = true; + float GIBounceFade = 0.8f; + float GIDistanceCompensation = 1; + float GICompensationMaxDist = 200; + // mix + float AOPower = 1.f; + float GIStrength = 8.f; + // denoise + bool EnableTemporalDenoiser = true; + float DepthDisocclusion = 50.f; + uint MaxAccumFrames = 16; + } settings; + + struct alignas(16) SSGICB + { + float4x4 PrevInvViewMat[2]; + float2 NDCToViewMul[2]; + float2 NDCToViewAdd[2]; + float2 NDCToViewMul_x_PixelSize[2]; + + float2 FrameDim; + float2 RcpFrameDim; // + uint FrameIndex; + + uint NumSlices; + uint NumSteps; + float DepthMIPSamplingOffset; // + + float EffectRadius; + float EffectFalloffRange; + float ThinOccluderCompensation; + float Thickness; // + float2 DepthFadeRange; + float DepthFadeScaleConst; + + float BackfaceStrength; // + float GIBounceFade; + float GIDistanceCompensation; + float GICompensationMaxDist; + + float AOPower; // + float GIStrength; + + float DepthDisocclusion; + uint MaxAccumFrames; + + float pad[1]; + }; + eastl::unique_ptr ssgiCB; + + eastl::unique_ptr texHilbertLUT = nullptr; + eastl::unique_ptr texWorkingDepth = nullptr; + winrt::com_ptr uavWorkingDepth[5] = { nullptr }; + eastl::unique_ptr texPrevDepth = nullptr; + eastl::unique_ptr texRadiance = nullptr; + eastl::unique_ptr texAccumFrames = nullptr; + eastl::unique_ptr texGI0 = { nullptr }; + eastl::unique_ptr texGI1 = nullptr; + eastl::unique_ptr texPrevGIAlbedo = { nullptr }; + + winrt::com_ptr linearClampSampler = nullptr; + winrt::com_ptr pointClampSampler = nullptr; + + winrt::com_ptr hilbertLutCompute = nullptr; + winrt::com_ptr prefilterDepthsCompute = nullptr; + winrt::com_ptr radianceDisoccCompute = nullptr; + winrt::com_ptr giCompute = nullptr; + winrt::com_ptr upsampleCompute = nullptr; + winrt::com_ptr outputCompute = nullptr; +}; \ No newline at end of file diff --git a/src/Features/ScreenSpaceShadows.cpp b/src/Features/ScreenSpaceShadows.cpp index 9581e4bc3..8b332e371 100644 --- a/src/Features/ScreenSpaceShadows.cpp +++ b/src/Features/ScreenSpaceShadows.cpp @@ -1,502 +1,330 @@ #include "ScreenSpaceShadows.h" +#include "Deferred.h" #include "State.h" #include "Util.h" +#pragma warning(push) +#pragma warning(disable: 4838 4244) +#include "ScreenSpaceShadows/bend_sss_cpu.h" +#pragma warning(pop) + using RE::RENDER_TARGETS; NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( - ScreenSpaceShadows::Settings, - MaxSamples, - FarDistanceScale, - FarThicknessScale, - FarHardness, - NearDistance, - NearThickness, - NearHardness, - BlurRadius, - BlurDropoff, - Enabled) + ScreenSpaceShadows::BendSettings, + Enable, + EnableNormalMappingShadows, + SampleCount, + SurfaceThickness, + BilinearThreshold, + ShadowContrast) void ScreenSpaceShadows::DrawSettings() { if (ImGui::TreeNodeEx("General", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::Checkbox("Enable Screen-Space Shadows", &settings.Enabled); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Enables screen-space shadows."); - } + ImGui::Checkbox("Enable", (bool*)&bendSettings.Enable); + ImGui::Checkbox("Enable Normal Mapping Shadows", (bool*)&bendSettings.EnableNormalMappingShadows); + ImGui::SliderInt("Sample Count", (int*)&bendSettings.SampleCount, 1, 4); - ImGui::SliderInt("Max Samples", (int*)&settings.MaxSamples, 1, 512); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Controls the accuracy of traced shadows."); - } + ImGui::SliderFloat("SurfaceThickness", &bendSettings.SurfaceThickness, 0.005f, 0.05f); + ImGui::SliderFloat("BilinearThreshold", &bendSettings.BilinearThreshold, 0.02f, 1.0f); + ImGui::SliderFloat("ShadowContrast", &bendSettings.ShadowContrast, 0.0f, 4.0f); ImGui::Spacing(); ImGui::Spacing(); ImGui::TreePop(); } +} - if (ImGui::TreeNodeEx("Blur Filter", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::SliderFloat("Blur Radius", &settings.BlurRadius, 0, 1); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Blur radius."); - } - - ImGui::SliderFloat("Blur Depth Dropoff", &settings.BlurDropoff, 0.001f, 0.1f); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Blur depth dropoff."); - } - - ImGui::Spacing(); - ImGui::Spacing(); - ImGui::TreePop(); +void ScreenSpaceShadows::ClearShaderCache() +{ + if (raymarchCS) { + raymarchCS->Release(); + raymarchCS = nullptr; } - - if (ImGui::TreeNodeEx("Near Shadows", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::SliderFloat("Near Distance", &settings.NearDistance, 0.25f, 128); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Near Shadow Distance."); - } - - ImGui::SliderFloat("Near Thickness", &settings.NearThickness, 0, 128); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Near Shadow Thickness."); - } - ImGui::SliderFloat("Near Hardness", &settings.NearHardness, 0, 64); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Near Shadow Hardness."); - } - - ImGui::Spacing(); - ImGui::Spacing(); - ImGui::TreePop(); + if (raymarchRightCS) { + raymarchRightCS->Release(); + raymarchRightCS = nullptr; } - - if (ImGui::TreeNodeEx("Far Shadows", ImGuiTreeNodeFlags_DefaultOpen)) { - ImGui::SliderFloat("Far Distance Scale", &settings.FarDistanceScale, 0, 1); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Far Shadow Distance Scale."); - } - ImGui::SliderFloat("Far Thickness Scale", &settings.FarThicknessScale, 0, 1); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Far Shadow Thickness Scale."); - } - ImGui::SliderFloat("Far Hardness", &settings.FarHardness, 0, 64); - if (auto _tt = Util::HoverTooltipWrapper()) { - ImGui::Text("Far Shadow Hardness."); - } - - ImGui::TreePop(); + if (normalMappingShadowsCS) { + normalMappingShadowsCS->Release(); + normalMappingShadowsCS = nullptr; } } -enum class GrassShaderTechniques +ID3D11ComputeShader* ScreenSpaceShadows::GetComputeRaymarch() { - RenderDepth = 8, -}; + static uint sampleCount = bendSettings.SampleCount; -void ScreenSpaceShadows::ModifyGrass(const RE::BSShader*, const uint32_t descriptor) -{ - const auto technique = descriptor & 0b1111; - if (technique != static_cast(GrassShaderTechniques::RenderDepth)) { - ModifyLighting(nullptr, 0); + if (sampleCount != bendSettings.SampleCount) { + sampleCount = bendSettings.SampleCount; + if (raymarchCS) { + raymarchCS->Release(); + raymarchCS = nullptr; + } } -} - -enum class DistantTreeShaderTechniques -{ - DistantTreeBlock = 0, - Depth = 1, -}; -void ScreenSpaceShadows::ModifyDistantTree(const RE::BSShader*, const uint32_t descriptor) -{ - const auto technique = descriptor & 1; - if (technique != static_cast(DistantTreeShaderTechniques::Depth)) { - ModifyLighting(nullptr, 0); + if (!raymarchCS) { + logger::debug("Compiling RaymarchCS"); + raymarchCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\ScreenSpaceShadows\\RaymarchCS.hlsl", { { "SAMPLE_COUNT", std::format("{}", sampleCount * 64).c_str() } }, "cs_5_0"); } + return raymarchCS; } -enum class LightingShaderTechniques -{ - None = 0, - Envmap = 1, - Glowmap = 2, - Parallax = 3, - Facegen = 4, - FacegenRGBTint = 5, - Hair = 6, - ParallaxOcc = 7, - MTLand = 8, - LODLand = 9, - Snow = 10, // unused - MultilayerParallax = 11, - TreeAnim = 12, - LODObjects = 13, - MultiIndexSparkle = 14, - LODObjectHD = 15, - Eye = 16, - Cloud = 17, // unused - LODLandNoise = 18, - MTLandLODBlend = 19, - Outline = 20, -}; - -uint32_t GetTechnique(uint32_t descriptor) -{ - return 0x3F & (descriptor >> 24); -} - -void ScreenSpaceShadows::ClearShaderCache() +ID3D11ComputeShader* ScreenSpaceShadows::GetComputeRaymarchRight() { - if (raymarchProgram) { - raymarchProgram->Release(); - raymarchProgram = nullptr; - } - if (horizontalBlurProgram) { - horizontalBlurProgram->Release(); - horizontalBlurProgram = nullptr; - } - if (verticalBlurProgram) { - verticalBlurProgram->Release(); - verticalBlurProgram = nullptr; - } -} + static uint sampleCount = bendSettings.SampleCount; -ID3D11ComputeShader* ScreenSpaceShadows::GetComputeShader() -{ - if (!raymarchProgram) { - logger::debug("Compiling raymarchProgram"); - raymarchProgram = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\ScreenSpaceShadows\\RaymarchCS.hlsl", {}, "cs_5_0"); + if (sampleCount != bendSettings.SampleCount) { + sampleCount = bendSettings.SampleCount; + if (raymarchRightCS) { + raymarchRightCS->Release(); + raymarchRightCS = nullptr; + } } - return raymarchProgram; -} -ID3D11ComputeShader* ScreenSpaceShadows::GetComputeShaderHorizontalBlur() -{ - if (!horizontalBlurProgram) { - logger::debug("Compiling horizontalBlurProgram"); - horizontalBlurProgram = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\ScreenSpaceShadows\\FilterCS.hlsl", { { "HORIZONTAL", "" } }, "cs_5_0"); + if (!raymarchRightCS) { + logger::debug("Compiling RaymarchCS RIGHT"); + raymarchRightCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\ScreenSpaceShadows\\RaymarchCS.hlsl", { { "SAMPLE_COUNT", std::format("{}", sampleCount * 64).c_str() }, { "RIGHT", "" } }, "cs_5_0"); } - return horizontalBlurProgram; + return raymarchRightCS; } -ID3D11ComputeShader* ScreenSpaceShadows::GetComputeShaderVerticalBlur() +ID3D11ComputeShader* ScreenSpaceShadows::GetComputeNormalMappingShadows() { - if (!verticalBlurProgram) { - verticalBlurProgram = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\ScreenSpaceShadows\\FilterCS.hlsl", { { "VERTICAL", "" } }, "cs_5_0"); - logger::debug("Compiling verticalBlurProgram"); + if (!normalMappingShadowsCS) { + logger::debug("Compiling NormalMappingShadowsCS"); + normalMappingShadowsCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\ScreenSpaceShadows\\NormalMappingShadowsCS.hlsl", {}, "cs_5_0"); } - return verticalBlurProgram; + return normalMappingShadowsCS; } -void ScreenSpaceShadows::ModifyLighting(const RE::BSShader*, const uint32_t) +void ScreenSpaceShadows::DrawShadows() { - if (!loaded) + if (!bendSettings.Enable) return; + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); auto& context = State::GetSingleton()->context; + auto shadowState = State::GetSingleton()->shadowState; + auto viewport = RE::BSGraphics::State::GetSingleton(); + auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); auto dirLight = skyrim_cast(accumulator->GetRuntimeData().activeShadowSceneNode->GetRuntimeData().sunLight->light.get()); - bool skyLight = true; - if (auto sky = RE::Sky::GetSingleton()) - skyLight = sky->mode.get() == RE::Sky::Mode::kFull; - - if (dirLight && skyLight) { - auto renderer = RE::BSGraphics::Renderer::GetSingleton(); - - if (!screenSpaceShadowsTexture) { - { - logger::debug("Creating screenSpaceShadowsTexture"); - - auto& device = State::GetSingleton()->device; - - D3D11_SAMPLER_DESC samplerDesc = {}; - samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; - samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; - samplerDesc.MaxAnisotropy = 1; - samplerDesc.MinLOD = 0; - samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; - DX::ThrowIfFailed(device->CreateSamplerState(&samplerDesc, &computeSampler)); - } - - { - auto shadowMask = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kSHADOW_MASK]; - - D3D11_TEXTURE2D_DESC texDesc{}; - shadowMask.texture->GetDesc(&texDesc); - texDesc.Format = DXGI_FORMAT_R16_FLOAT; - texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_RENDER_TARGET; - screenSpaceShadowsTexture = new Texture2D(texDesc); - - texDesc.Width /= 2; - texDesc.Height /= 2; - screenSpaceShadowsTextureTemp = new Texture2D(texDesc); - - D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - shadowMask.SRV->GetDesc(&srvDesc); - srvDesc.Format = texDesc.Format; - screenSpaceShadowsTexture->CreateSRV(srvDesc); - screenSpaceShadowsTextureTemp->CreateSRV(srvDesc); - - D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.Format = texDesc.Format; - uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; - uavDesc.Texture2D.MipSlice = 0; - screenSpaceShadowsTexture->CreateUAV(uavDesc); - screenSpaceShadowsTextureTemp->CreateUAV(uavDesc); - } - } + auto state = State::GetSingleton(); - auto& shadowState = State::GetSingleton()->shadowState; + auto& directionNi = dirLight->GetWorldDirection(); + float3 light = { directionNi.x, directionNi.y, directionNi.z }; + light.Normalize(); + float4 lightProjection = float4(-light.x, -light.y, -light.z, 0.0f); - bool enableSSS = true; + Matrix viewProjMat = !REL::Module::IsVR() ? + shadowState->GetRuntimeData().cameraData.getEye().viewProjMat : + shadowState->GetVRRuntimeData().cameraData.getEye().viewProjMat; - GET_INSTANCE_MEMBER(cubeMapRenderTarget, shadowState) + lightProjection = DirectX::SimpleMath::Vector4::Transform(lightProjection, viewProjMat); + float lightProjectionF[4] = { lightProjection.x, lightProjection.y, lightProjection.z, lightProjection.w }; - if (cubeMapRenderTarget == RE::RENDER_TARGETS_CUBEMAP::kREFLECTIONS) { - enableSSS = false; + int viewportSize[2] = { (int)state->screenWidth, (int)state->screenHeight }; - } else if (!renderedScreenCamera && settings.Enabled) { - renderedScreenCamera = true; + if (REL::Module::IsVR()) + viewportSize[0] /= 2; - // Backup the game state - struct OldState - { - ID3D11ShaderResourceView* srvs[2]; - ID3D11SamplerState* sampler; - ID3D11ComputeShader* shader; - ID3D11Buffer* buffer; - ID3D11UnorderedAccessView* uav; - ID3D11ClassInstance* instance; - UINT numInstances; - }; + int minRenderBounds[2] = { 0, 0 }; + int maxRenderBounds[2] = { + (int)((float)viewportSize[0] * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale), + (int)((float)viewportSize[1] * viewport->GetRuntimeData().dynamicResolutionCurrentHeightScale) + }; - OldState old{}; - context->CSGetShaderResources(0, 2, old.srvs); - context->CSGetSamplers(0, 1, &old.sampler); - context->CSGetShader(&old.shader, &old.instance, &old.numInstances); - context->CSGetConstantBuffers(0, 1, &old.buffer); - context->CSGetUnorderedAccessViews(0, 1, &old.uav); + auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY]; + context->CSSetShaderResources(0, 1, &depth.depthSRV); - { - auto viewport = RE::BSGraphics::State::GetSingleton(); + auto shadowMask = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kSHADOW_MASK]; + context->CSSetUnorderedAccessViews(0, 1, &shadowMask.UAV, nullptr); - float resolutionX = screenSpaceShadowsTexture->desc.Width * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale; - float resolutionY = screenSpaceShadowsTexture->desc.Height * viewport->GetRuntimeData().dynamicResolutionCurrentHeightScale; + context->CSSetSamplers(0, 1, &pointBorderSampler); - { - RaymarchCB data{}; + auto buffer = raymarchCB->CB(); + context->CSSetConstantBuffers(1, 1, &buffer); - data.BufferDim.x = (float)screenSpaceShadowsTexture->desc.Width; - data.BufferDim.y = (float)screenSpaceShadowsTexture->desc.Height; + context->CSSetShader(GetComputeRaymarch(), nullptr, 0); - data.RcpBufferDim.x = 1.0f / data.BufferDim.x; - data.RcpBufferDim.y = 1.0f / data.BufferDim.y; + auto dispatchList = Bend::BuildDispatchList(lightProjectionF, viewportSize, minRenderBounds, maxRenderBounds); - data.DynamicRes.x = viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale; - data.DynamicRes.y = viewport->GetRuntimeData().dynamicResolutionCurrentHeightScale; + for (int i = 0; i < dispatchList.DispatchCount; i++) { + auto dispatchData = dispatchList.Dispatch[i]; - data.DynamicRes.z = 1.0f / data.DynamicRes.x; - data.DynamicRes.w = 1.0f / data.DynamicRes.y; + RaymarchCB data{}; + data.LightCoordinate[0] = dispatchList.LightCoordinate_Shader[0]; + data.LightCoordinate[1] = dispatchList.LightCoordinate_Shader[1]; + data.LightCoordinate[2] = dispatchList.LightCoordinate_Shader[2]; + data.LightCoordinate[3] = dispatchList.LightCoordinate_Shader[3]; - for (int eyeIndex = 0; eyeIndex < (!REL::Module::IsVR() ? 1 : 2); eyeIndex++) { - if (REL::Module::IsVR()) - data.ProjMatrix[eyeIndex] = shadowState->GetVRRuntimeData().cameraData.getEye(eyeIndex).projMat; - else - data.ProjMatrix[eyeIndex] = shadowState->GetRuntimeData().cameraData.getEye().projMat; + data.WaveOffset[0] = dispatchData.WaveOffset_Shader[0]; + data.WaveOffset[1] = dispatchData.WaveOffset_Shader[1]; - data.InvProjMatrix[eyeIndex] = data.ProjMatrix[eyeIndex].Invert(); - } + data.FarDepthValue = 1.0f; + data.NearDepthValue = 0.0f; - data.CameraData = Util::GetCameraData(); + data.InvDepthTextureSize[0] = 1.0f / (float)viewportSize[0]; + data.InvDepthTextureSize[1] = 1.0f / (float)viewportSize[1]; - auto& direction = dirLight->GetWorldDirection(); - float4 position{ -direction.x, -direction.y, -direction.z, 0.0f }; + data.settings = bendSettings; - auto viewMatrix = !REL::Module::IsVR() ? shadowState->GetRuntimeData().cameraData.getEye().viewMat : shadowState->GetVRRuntimeData().cameraData.getEye().viewMat; + raymarchCB->Update(data); - data.InvDirLightDirectionVS = float4::Transform(position, viewMatrix); + context->Dispatch(dispatchData.WaveCount[0], dispatchData.WaveCount[1], dispatchData.WaveCount[2]); + } - data.ShadowDistance = 10000.0f; + if (REL::Module::IsVR()) { + lightProjection = float4(-light.x, -light.y, -light.z, 0.0f); - data.Settings = settings; + viewProjMat = shadowState->GetVRRuntimeData().cameraData.getEye(1).viewProjMat; - raymarchCB->Update(data); - } + lightProjection = DirectX::SimpleMath::Vector4::Transform(lightProjection, viewProjMat); - ID3D11Buffer* buffer[1] = { raymarchCB->CB() }; - context->CSSetConstantBuffers(0, 1, buffer); + float lightProjectionRightF[4] = { lightProjection.x, lightProjection.y, lightProjection.z, lightProjection.w }; - context->CSSetSamplers(0, 1, &computeSampler); + context->CSSetShader(GetComputeRaymarchRight(), nullptr, 0); - auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY]; + dispatchList = Bend::BuildDispatchList(lightProjectionRightF, viewportSize, minRenderBounds, maxRenderBounds); - ID3D11ShaderResourceView* view = depth.depthSRV; - context->CSSetShaderResources(0, 1, &view); + for (int i = 0; i < dispatchList.DispatchCount; i++) { + auto dispatchData = dispatchList.Dispatch[i]; - ID3D11ShaderResourceView* stencilView = nullptr; - if (REL::Module::IsVR()) { - stencilView = depth.stencilSRV; - context->CSSetShaderResources(89, 1, &stencilView); - } + RaymarchCB data{}; + data.LightCoordinate[0] = dispatchList.LightCoordinate_Shader[0]; + data.LightCoordinate[1] = dispatchList.LightCoordinate_Shader[1]; + data.LightCoordinate[2] = dispatchList.LightCoordinate_Shader[2]; + data.LightCoordinate[3] = dispatchList.LightCoordinate_Shader[3]; - ID3D11UnorderedAccessView* uav = screenSpaceShadowsTexture->uav.get(); - context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + data.WaveOffset[0] = dispatchData.WaveOffset_Shader[0]; + data.WaveOffset[1] = dispatchData.WaveOffset_Shader[1]; - auto shader = GetComputeShader(); - context->CSSetShader(shader, nullptr, 0); + data.FarDepthValue = 1.0f; + data.NearDepthValue = 0.0f; - context->Dispatch((uint32_t)std::ceil(resolutionX / 32.0f), (uint32_t)std::ceil(resolutionY / 32.0f), 1); + data.InvDepthTextureSize[0] = 1.0f / (float)viewportSize[0]; + data.InvDepthTextureSize[1] = 1.0f / (float)viewportSize[1]; - if (REL::Module::IsVR()) { - stencilView = nullptr; - context->CSSetShaderResources(89, 1, &stencilView); - } + data.settings = bendSettings; - // Filter - { - uav = nullptr; - context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); - view = nullptr; - context->CSSetShaderResources(1, 1, &view); + raymarchCB->Update(data); - view = screenSpaceShadowsTexture->srv.get(); + context->Dispatch(dispatchData.WaveCount[0], dispatchData.WaveCount[1], dispatchData.WaveCount[2]); + } + } - context->CSSetShaderResources(1, 1, &view); + ID3D11ShaderResourceView* views[1]{ nullptr }; + context->CSSetShaderResources(0, 1, views); - uav = screenSpaceShadowsTextureTemp->uav.get(); - context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + ID3D11UnorderedAccessView* uavs[1]{ nullptr }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - shader = GetComputeShaderHorizontalBlur(); - context->CSSetShader(shader, nullptr, 0); + context->CSSetShader(nullptr, nullptr, 0); - context->Dispatch((uint32_t)std::ceil(resolutionX / 64.0f), (uint32_t)std::ceil(resolutionY / 64.0f), 1); - } + ID3D11SamplerState* sampler = nullptr; + context->CSSetSamplers(0, 1, &sampler); - { - uav = nullptr; - context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); - view = nullptr; - context->CSSetShaderResources(1, 1, &view); + buffer = nullptr; + context->CSSetConstantBuffers(1, 1, &buffer); - view = screenSpaceShadowsTextureTemp->srv.get(); + if (bendSettings.EnableNormalMappingShadows) + DrawNormalMappingShadows(); +} - context->CSSetShaderResources(1, 1, &view); +void ScreenSpaceShadows::DrawNormalMappingShadows() +{ + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto& context = State::GetSingleton()->context; - uav = screenSpaceShadowsTexture->uav.get(); - context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + { + auto normalRoughness = renderer->GetRuntimeData().renderTargets[NORMALROUGHNESS]; + auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY]; + auto masks = renderer->GetRuntimeData().renderTargets[MASKS]; - shader = GetComputeShaderVerticalBlur(); - context->CSSetShader(shader, nullptr, 0); + ID3D11ShaderResourceView* srvs[3]{ normalRoughness.SRV, depth.depthSRV, masks.SRV }; + context->CSSetShaderResources(0, 3, srvs); - context->Dispatch((uint32_t)std::ceil(resolutionX / 64.0f), (uint32_t)std::ceil(resolutionY / 64.0f), 1); - } - } + auto shadowMask = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kSHADOW_MASK]; + ID3D11UnorderedAccessView* uavs[1]{ shadowMask.UAV }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - // Restore the game state - context->CSSetShaderResources(0, 2, old.srvs); - for (uint8_t i = 0; i < 2; i++) - if (old.srvs[i]) - old.srvs[i]->Release(); + auto shader = GetComputeNormalMappingShadows(); + context->CSSetShader(shader, nullptr, 0); - context->CSSetSamplers(0, 1, &old.sampler); - if (old.sampler) - old.sampler->Release(); + auto state = State::GetSingleton(); + auto viewport = RE::BSGraphics::State::GetSingleton(); - context->CSSetShader(old.shader, &old.instance, old.numInstances); - if (old.shader) - old.shader->Release(); + float resolutionX = state->screenWidth * viewport->GetRuntimeData().dynamicResolutionCurrentWidthScale; + float resolutionY = state->screenHeight * viewport->GetRuntimeData().dynamicResolutionCurrentHeightScale; - context->CSSetConstantBuffers(0, 1, &old.buffer); - if (old.buffer) - old.buffer->Release(); + uint32_t dispatchX = (uint32_t)std::ceil(resolutionX / 32.0f); + uint32_t dispatchY = (uint32_t)std::ceil(resolutionY / 32.0f); - context->CSSetUnorderedAccessViews(0, 1, &old.uav, nullptr); - if (old.uav) - old.uav->Release(); - } + context->Dispatch(dispatchX, dispatchY, 1); + } - PerPass data{}; - data.EnableSSS = enableSSS && settings.Enabled; - perPass->Update(data); + ID3D11ShaderResourceView* views[3]{ nullptr, nullptr, nullptr }; + context->CSSetShaderResources(0, 3, views); - if (renderedScreenCamera) { - auto shadowMask = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGET_DEPTHSTENCIL::kPOST_ZPREPASS_COPY]; - ID3D11ShaderResourceView* views[2]{}; - views[0] = shadowMask.depthSRV; - views[1] = screenSpaceShadowsTexture->srv.get(); - context->PSSetShaderResources(20, ARRAYSIZE(views), views); - } - } else { - PerPass data{}; - data.EnableSSS = false; - perPass->Update(data); - } + ID3D11UnorderedAccessView* uavs[1]{ nullptr }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); - ID3D11Buffer* buffers[1]{}; - buffers[0] = perPass->CB(); - context->PSSetConstantBuffers(5, ARRAYSIZE(buffers), buffers); + ID3D11SamplerState* sampler = nullptr; + context->CSSetSamplers(0, 1, &sampler); - context->PSSetSamplers(14, 1, &computeSampler); + context->CSSetShader(nullptr, nullptr, 0); } -void ScreenSpaceShadows::Draw(const RE::BSShader* shader, const uint32_t descriptor) +void ScreenSpaceShadows::Draw(const RE::BSShader*, const uint32_t) { - switch (shader->shaderType.get()) { - case RE::BSShader::Type::Grass: - ModifyGrass(shader, descriptor); - break; - case RE::BSShader::Type::DistantTree: - ModifyDistantTree(shader, descriptor); - break; - case RE::BSShader::Type::Lighting: - ModifyLighting(shader, descriptor); - break; - } } void ScreenSpaceShadows::Load(json& o_json) { if (o_json[GetName()].is_object()) - settings = o_json[GetName()]; + bendSettings = o_json[GetName()]; Feature::Load(o_json); } void ScreenSpaceShadows::Save(json& o_json) { - o_json[GetName()] = settings; + o_json[GetName()] = bendSettings; } void ScreenSpaceShadows::RestoreDefaultSettings() { - settings = {}; + bendSettings = {}; } void ScreenSpaceShadows::SetupResources() { - perPass = new ConstantBuffer(ConstantBufferDesc()); raymarchCB = new ConstantBuffer(ConstantBufferDesc()); + + { + auto& device = State::GetSingleton()->device; + + D3D11_SAMPLER_DESC samplerDesc = {}; + samplerDesc.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT; + samplerDesc.AddressU = D3D11_TEXTURE_ADDRESS_BORDER; + samplerDesc.AddressV = D3D11_TEXTURE_ADDRESS_BORDER; + samplerDesc.AddressW = D3D11_TEXTURE_ADDRESS_BORDER; + samplerDesc.MaxAnisotropy = 1; + samplerDesc.MinLOD = 0; + samplerDesc.MaxLOD = D3D11_FLOAT32_MAX; + samplerDesc.BorderColor[0] = 1.0f; + samplerDesc.BorderColor[1] = 1.0f; + samplerDesc.BorderColor[2] = 1.0f; + samplerDesc.BorderColor[3] = 1.0f; + DX::ThrowIfFailed(device->CreateSamplerState(&samplerDesc, &pointBorderSampler)); + } } void ScreenSpaceShadows::Reset() { - renderedScreenCamera = false; } - -bool ScreenSpaceShadows::HasShaderDefine(RE::BSShader::Type shaderType) -{ - switch (shaderType) { - case RE::BSShader::Type::Lighting: - case RE::BSShader::Type::Grass: - case RE::BSShader::Type::DistantTree: - return true; - default: - return false; - } -} \ No newline at end of file diff --git a/src/Features/ScreenSpaceShadows.h b/src/Features/ScreenSpaceShadows.h index d04146d69..6d05fa490 100644 --- a/src/Features/ScreenSpaceShadows.h +++ b/src/Features/ScreenSpaceShadows.h @@ -13,80 +13,63 @@ struct ScreenSpaceShadows : Feature virtual inline std::string GetName() { return "Screen-Space Shadows"; } virtual inline std::string GetShortName() { return "ScreenSpaceShadows"; } - inline std::string_view GetShaderDefineName() override { return "SCREEN_SPACE_SHADOWS"; } - bool HasShaderDefine(RE::BSShader::Type shaderType) override; - - struct Settings + struct BendSettings { - uint32_t MaxSamples = !REL::Module::IsVR() ? 24u : 6u; - float FarDistanceScale = 0.025f; - float FarThicknessScale = 0.025f; - float FarHardness = 8.0f; - float NearDistance = 16.0f; - float NearThickness = 2.0f; - float NearHardness = 32.0f; - float BlurRadius = 0.5f; - float BlurDropoff = 0.005f; - bool Enabled = true; + float SurfaceThickness = 0.005f; + float BilinearThreshold = 0.02f; + float ShadowContrast = 1.0f; + uint Enable = 1; + uint EnableNormalMappingShadows = 1; + uint SampleCount = 1; }; - struct alignas(16) PerPass - { - uint32_t EnableSSS; - uint32_t FrameCount; - uint32_t pad[2]; - }; + BendSettings bendSettings; struct alignas(16) RaymarchCB { - float2 BufferDim; - float2 RcpBufferDim; - float4x4 ProjMatrix[2]; - float4x4 InvProjMatrix[2]; - float4 CameraData; - float4 DynamicRes; - float4 InvDirLightDirectionVS; - float ShadowDistance = 10000; - Settings Settings; - uint32_t pad[1]; - }; + // Runtime data returned from BuildDispatchList(): + float LightCoordinate[4]; // Values stored in DispatchList::LightCoordinate_Shader by BuildDispatchList() + int WaveOffset[2]; // Values stored in DispatchData::WaveOffset_Shader by BuildDispatchList() - Settings settings; + // Renderer Specific Values: + float FarDepthValue; // Set to the Depth Buffer Value for the far clip plane, as determined by renderer projection matrix setup (typically 0). + float NearDepthValue; // Set to the Depth Buffer Value for the near clip plane, as determined by renderer projection matrix setup (typically 1). - ConstantBuffer* perPass = nullptr; + // Sampling data: + float InvDepthTextureSize[2]; // Inverse of the texture dimensions for 'DepthTexture' (used to convert from pixel coordinates to UVs) + // If 'PointBorderSampler' is an Unnormalized sampler, then this value can be hard-coded to 1. + // The 'USE_HALF_PIXEL_OFFSET' macro might need to be defined if sampling at exact pixel coordinates isn't precise (e.g., if odd patterns appear in the shadow). - ID3D11SamplerState* computeSampler = nullptr; + BendSettings settings; + }; - Texture2D* screenSpaceShadowsTexture = nullptr; - Texture2D* screenSpaceShadowsTextureTemp = nullptr; + ID3D11SamplerState* pointBorderSampler = nullptr; ConstantBuffer* raymarchCB = nullptr; - ID3D11ComputeShader* raymarchProgram = nullptr; - - ID3D11ComputeShader* horizontalBlurProgram = nullptr; - ID3D11ComputeShader* verticalBlurProgram = nullptr; - - bool renderedScreenCamera = false; + ID3D11ComputeShader* raymarchCS = nullptr; + ID3D11ComputeShader* raymarchRightCS = nullptr; + ID3D11ComputeShader* normalMappingShadowsCS = nullptr; virtual void SetupResources(); virtual void Reset(); virtual void DrawSettings(); - void ModifyGrass(const RE::BSShader* shader, const uint32_t descriptor); - void ModifyDistantTree(const RE::BSShader*, const uint32_t descriptor); virtual void ClearShaderCache() override; - ID3D11ComputeShader* GetComputeShader(); - ID3D11ComputeShader* GetComputeShaderHorizontalBlur(); - ID3D11ComputeShader* GetComputeShaderVerticalBlur(); + ID3D11ComputeShader* GetComputeRaymarch(); + ID3D11ComputeShader* GetComputeRaymarchRight(); + + ID3D11ComputeShader* GetComputeNormalMappingShadows(); - void ModifyLighting(const RE::BSShader* shader, const uint32_t descriptor); virtual void Draw(const RE::BSShader* shader, const uint32_t descriptor); virtual void Load(json& o_json); virtual void Save(json& o_json); + void DrawShadows(); + void DrawNormalMappingShadows(); + virtual void RestoreDefaultSettings(); bool SupportsVR() override { return true; }; diff --git a/src/Features/ScreenSpaceShadows/bend_sss_cpu.h b/src/Features/ScreenSpaceShadows/bend_sss_cpu.h new file mode 100644 index 000000000..683bfb6b7 --- /dev/null +++ b/src/Features/ScreenSpaceShadows/bend_sss_cpu.h @@ -0,0 +1,245 @@ +#pragma once + +// Copyright 2023 Sony Interactive Entertainment. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// If you have feedback, or found this code useful, we'd love to hear from you. +// https://www.bendstudio.com +// https://www.twitter.com/bendstudio +// +// We are *always* looking for talented graphics and technical programmers! +// https://www.bendstudio.com/careers + +// Common screen space shadow projection code (CPU): +//-------------------------------------------------------------- + +namespace Bend +{ + // Generating a screen-space-shadow requires a number of Compute Shader dispatches + // The compute shader reads from a depth buffer, and writes a single-channel texture of the same dimensions + // Each dispatch is of the same compute shader, (see bend_sss_gpu.h). + // The number of dispatches required varies based on the on-screen location of the light. + // Typically there will be just one or two dispatches when the light is off-screen, and 4 to 6 when the light is on-screen. + // Syncing the GPU between individual dispatches is not required + + // These structures and function are used to generate the number of dispatches, the wave count of each dispatch (X/Y/Z) and shader parameters for each dispatch + + struct DispatchData + { + int WaveCount[3]; // Compute Shader Dispatch(X,Y,Z) wave counts X/Y/Z + int WaveOffset_Shader[2]; // This value is passed in to shader. It will be different for each dispatch + }; + + struct DispatchList + { + float LightCoordinate_Shader[4]; // This value is passed in to shader, this will be the same value for all dispatches for this light + + DispatchData Dispatch[8]; // List of dispatches (max count is 8) + int DispatchCount; // Number of compute dispatches written to the list + }; + + // Helper functions + inline int bend_min(const int a, const int b) { return a > b ? b : a; } + inline int bend_max(const int a, const int b) { return a > b ? a : b; } + + // Call this function on the CPU to get a list of Compute Shader dispatches required to generate a screen-space shadow for a given light + // Syncing the GPU between individual dispatches is not required + // + // inLightProjection: Homogeneous coordinate of the light, result of {light} * {ViewProjectionMatrix}, (without W divide) + // For infinite directional lights, use {light} = float4(normalized light direction, 0) and for point/spot lights use {light} = float4(light world position, 1) + // + // inViewportSize: width/height of the render target + // + // inRenderBounds: 2D Screen Bounds of the light within the viewport, inclusive. [0,0], [width,height] for full-screen. + // Note; the shader will still read/write outside of these bounds (by a maximum of 2 * WAVE_SIZE pixels), due to how the wavefront projection works. + // + // inExpandedDepthRange: Set to true if the rendering API expects z/w coordinate output from a vertex shader to be a [-1,+1] expanded range, and becomes [0,1] range in the depth buffer. Typically this is false. + // + // inWaveSize: Wavefront size of the compiled compute shader (currently only tested with 64) + // + DispatchList BuildDispatchList(float inLightProjection[4], int inViewportSize[2], int inMinRenderBounds[2], int inMaxRenderBounds[2], bool inExpandedZRange = false, int inWaveSize = 64) + { + DispatchList result = {}; + + // Floating point division in the shader has a practical limit for precision when the light is *very* far off screen (~1m pixels+) + // So when computing the light XY coordinate, use an adjusted w value to handle these extreme values + float xy_light_w = inLightProjection[3]; + float FP_limit = 0.000002f * (float)inWaveSize; + + if (xy_light_w >= 0 && xy_light_w < FP_limit) + xy_light_w = FP_limit; + else if (xy_light_w < 0 && xy_light_w > -FP_limit) + xy_light_w = -FP_limit; + + // Need precise XY pixel coordinates of the light + result.LightCoordinate_Shader[0] = ((inLightProjection[0] / xy_light_w) * +0.5f + 0.5f) * (float)inViewportSize[0]; + result.LightCoordinate_Shader[1] = ((inLightProjection[1] / xy_light_w) * -0.5f + 0.5f) * (float)inViewportSize[1]; + result.LightCoordinate_Shader[2] = inLightProjection[3] == 0 ? 0 : (inLightProjection[2] / inLightProjection[3]); + result.LightCoordinate_Shader[3] = inLightProjection[3] > 0 ? 1 : -1; + + if (inExpandedZRange) { + result.LightCoordinate_Shader[2] = result.LightCoordinate_Shader[2] * 0.5f + 0.5f; + } + + int light_xy[2] = { (int)(result.LightCoordinate_Shader[0] + 0.5f), (int)(result.LightCoordinate_Shader[1] + 0.5f) }; + + // Make the bounds inclusive, relative to the light + const int biased_bounds[4] = { + inMinRenderBounds[0] - light_xy[0], + -(inMaxRenderBounds[1] - light_xy[1]), + inMaxRenderBounds[0] - light_xy[0], + -(inMinRenderBounds[1] - light_xy[1]), + }; + + // Process 4 quadrants around the light center, + // They each form a rectangle with one corner on the light XY coordinate + // If the rectangle isn't square, it will need breaking in two on the larger axis + // 0 = bottom left, 1 = bottom right, 2 = top left, 2 = top right + for (int q = 0; q < 4; q++) { + // Quads 0 and 3 needs to be +1 vertically, 1 and 2 need to be +1 horizontally + bool vertical = q == 0 || q == 3; + + // Bounds relative to the quadrant + const int bounds[4] = { + bend_max(0, ((q & 1) ? biased_bounds[0] : -biased_bounds[2])) / inWaveSize, + bend_max(0, ((q & 2) ? biased_bounds[1] : -biased_bounds[3])) / inWaveSize, + bend_max(0, (((q & 1) ? biased_bounds[2] : -biased_bounds[0]) + inWaveSize * (vertical ? 1 : 2) - 1)) / inWaveSize, + bend_max(0, (((q & 2) ? biased_bounds[3] : -biased_bounds[1]) + inWaveSize * (vertical ? 2 : 1) - 1)) / inWaveSize, + }; + + if ((bounds[2] - bounds[0]) > 0 && (bounds[3] - bounds[1]) > 0) { + int bias_x = (q == 2 || q == 3) ? 1 : 0; + int bias_y = (q == 1 || q == 3) ? 1 : 0; + + DispatchData& disp = result.Dispatch[result.DispatchCount++]; + + disp.WaveCount[0] = inWaveSize; + disp.WaveCount[1] = bounds[2] - bounds[0]; + disp.WaveCount[2] = bounds[3] - bounds[1]; + disp.WaveOffset_Shader[0] = ((q & 1) ? bounds[0] : -bounds[2]) + bias_x; + disp.WaveOffset_Shader[1] = ((q & 2) ? -bounds[3] : bounds[1]) + bias_y; + + // We want the far corner of this quadrant relative to the light, + // as we need to know where the diagonal light ray intersects with the edge of the bounds + int axis_delta = +biased_bounds[0] - biased_bounds[1]; + if (q == 1) + axis_delta = +biased_bounds[2] + biased_bounds[1]; + if (q == 2) + axis_delta = -biased_bounds[0] - biased_bounds[3]; + if (q == 3) + axis_delta = -biased_bounds[2] + biased_bounds[3]; + + axis_delta = (axis_delta + inWaveSize - 1) / inWaveSize; + + if (axis_delta > 0) { + DispatchData& disp2 = result.Dispatch[result.DispatchCount++]; + + // Take copy of current volume + disp2 = disp; + + if (q == 0) { + // Split on Y, split becomes -1 larger on x + disp2.WaveCount[2] = bend_min(disp.WaveCount[2], axis_delta); + disp.WaveCount[2] -= disp2.WaveCount[2]; + disp2.WaveOffset_Shader[1] = disp.WaveOffset_Shader[1] + disp.WaveCount[2]; + disp2.WaveOffset_Shader[0]--; + disp2.WaveCount[1]++; + } + if (q == 1) { + // Split on X, split becomes +1 larger on y + disp2.WaveCount[1] = bend_min(disp.WaveCount[1], axis_delta); + disp.WaveCount[1] -= disp2.WaveCount[1]; + disp2.WaveOffset_Shader[0] = disp.WaveOffset_Shader[0] + disp.WaveCount[1]; + disp2.WaveCount[2]++; + } + if (q == 2) { + // Split on X, split becomes -1 larger on y + disp2.WaveCount[1] = bend_min(disp.WaveCount[1], axis_delta); + disp.WaveCount[1] -= disp2.WaveCount[1]; + disp.WaveOffset_Shader[0] += disp2.WaveCount[1]; + disp2.WaveCount[2]++; + disp2.WaveOffset_Shader[1]--; + } + if (q == 3) { + // Split on Y, split becomes +1 larger on x + disp2.WaveCount[2] = bend_min(disp.WaveCount[2], axis_delta); + disp.WaveCount[2] -= disp2.WaveCount[2]; + disp.WaveOffset_Shader[1] += disp2.WaveCount[2]; + disp2.WaveCount[1]++; + } + + // Remove if too small + if (disp2.WaveCount[1] <= 0 || disp2.WaveCount[2] <= 0) { + disp2 = result.Dispatch[--result.DispatchCount]; + } + if (disp.WaveCount[1] <= 0 || disp.WaveCount[2] <= 0) { + disp = result.Dispatch[--result.DispatchCount]; + } + } + } + } + + // Scale the shader values by the wave count, the shader expects this + for (int i = 0; i < result.DispatchCount; i++) { + result.Dispatch[i].WaveOffset_Shader[0] *= inWaveSize; + result.Dispatch[i].WaveOffset_Shader[1] *= inWaveSize; + } + + return result; + } +} + +/* +* Common Problems, and tips to solve them: +* +* The shader doesn't compile? +* - The shader is only tested with HLSL (DXC compiler) and PS5 using a HLSL->PS5 warpper +* It should be possible to compile for DX11 with FXC, with features removed (early-out's use of wave intrinsics is not supported in DX11) +* Other shader languages (e.g., glsl) are unsupported and will require manual conversion +* +* I have it compiled and running, but I'm seeing complete nonsense? +* - Start by enabling 'DebugOutputWaveIndex' to visualize the wavefront layout. +* You should see wavefronts aligned and projected towards the light position / direction. If not, then 'inLightProjection' is probably wrong. +* +* Struggling to get 'inLightProjection' right? +* - Think of this as similar to what a vertex-shader would output for position export; that is, a 4-component transformed position (e.g., SV_POSITION output from the VS) +* This usually means: +* inLightProjection = float4(position,1) * ViewProjectionMatrix - positional light +* or: +* inLightProjection = float4(direction,0) * ViewProjectionMatrix - directional light +* +* Almost everything is in shadow? +* Is the background around an object casting a shadow on to it? +* Does everything look like a weird paper cut-out? +* There is a big shadow halo around the light source? +* - FarDepthValue / NearDepthValue may not be set correctly +* These are the values you'd see in the depth buffer for objects at the near and far clip plane (typically far = 0, near = 1) +* In very rare cases, the vertex shader may be expected to output a [-1,+1] z-range, which becomes [0,1] in the depth buffer. If this is the case, enable 'inExpandedZRange'. +* +* There are small glitchy lines all over the output? +* - Try with/without 'USE_HALF_PIXEL_OFFSET' defined in the shader. This is enabled by default for HLSL. +* +* Invalid shadows occasionally appear from offscreen / at the edge of the screen? +* - The 'PointBorderSampler' may not be setup correctly. The shader will intentionally read from offscreen, so the sampler must return an invalid value (e.g., FarDepthValue) in these cases by using clamp-to-border. +* +* Light is always coming from the same direction no matter how I rotate the camera? +* - You may be using just the ProjectionMatrix, not the ViewProjectionMatrix when computing 'inLightProjection'. +* +* Shadow is extremely thick, or very faded? +* - Try scaling 'SurfaceThickness' up and down, start with 0.005, and scale up/down in multiples of 2. Make sure to scale BilinearThreshold in a similar way. +* +* I see lots of striated patterns on flat surfaces? +* - 'BilinearThreshold' may not be set to an ideal value (enable 'DebugOutputEdgeMask' to debug it), or try enabling 'IgnoreEdgePixels' +* - These issues can be more common in otherwise occluded areas when BilinearSamplingOffsetMode is false, and may be prevalent if visualizing the shadow - but not noticeable in a lit scene. +*/ \ No newline at end of file diff --git a/src/Features/SubsurfaceScattering.cpp b/src/Features/SubsurfaceScattering.cpp index 9ff550a02..03e21d93e 100644 --- a/src/Features/SubsurfaceScattering.cpp +++ b/src/Features/SubsurfaceScattering.cpp @@ -2,6 +2,7 @@ #include #include "State.h" +#include #include NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(SubsurfaceScattering::DiffusionProfile, @@ -155,7 +156,7 @@ void SubsurfaceScattering::CalculateKernel(DiffusionProfile& a_profile, Kernel& } } -void SubsurfaceScattering::DrawSSSWrapper(bool) +void SubsurfaceScattering::DrawSSSWrapper(bool a_firstPerson) { if (!SIE::ShaderCache::Instance().IsEnabled()) return; @@ -188,7 +189,7 @@ void SubsurfaceScattering::DrawSSSWrapper(bool) ID3D11DepthStencilView* nullDsv = nullptr; context->OMSetRenderTargets(8, nullViews, nullDsv); - DrawSSS(); + DrawSSS(a_firstPerson); context->PSSetShaderResources(0, 8, srvs); context->CSSetShaderResources(0, 8, srvsCS); @@ -214,7 +215,7 @@ void SubsurfaceScattering::DrawSSSWrapper(bool) State::GetSingleton()->EndPerfEvent(); } -void SubsurfaceScattering::DrawSSS() +void SubsurfaceScattering::DrawSSS(bool a_firstPerson) { auto viewport = RE::BSGraphics::State::GetSingleton(); @@ -251,16 +252,16 @@ void SubsurfaceScattering::DrawSSS() { ID3D11Buffer* buffer[1] = { blurCB->CB() }; - context->CSSetConstantBuffers(0, 1, buffer); + context->CSSetConstantBuffers(1, 1, buffer); + auto main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; auto depth = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY]; - auto snowSwap = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; - auto normals = renderer->GetRuntimeData().renderTargets[normalsMode]; + auto mask = renderer->GetRuntimeData().renderTargets[a_firstPerson ? normalsMode : MASKS]; ID3D11ShaderResourceView* views[3]; - views[0] = snowSwap.SRV; + views[0] = main.SRV; views[1] = depth.depthSRV; - views[2] = normals.SRV; + views[2] = mask.SRV; context->CSSetShaderResources(0, 3, views); @@ -276,9 +277,6 @@ void SubsurfaceScattering::DrawSSS() context->Dispatch((uint32_t)std::ceil(resolutionX / 32.0f), (uint32_t)std::ceil(resolutionY / 32.0f), 1); } - ID3D11ShaderResourceView* view = nullptr; - context->CSSetShaderResources(2, 1, &view); - uav = nullptr; context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); @@ -287,8 +285,8 @@ void SubsurfaceScattering::DrawSSS() views[0] = blurHorizontalTemp->srv.get(); context->CSSetShaderResources(0, 1, views); - ID3D11UnorderedAccessView* uavs[2] = { snowSwap.UAV, normals.UAV }; - context->CSSetUnorderedAccessViews(0, 2, uavs, nullptr); + ID3D11UnorderedAccessView* uavs[1] = { main.UAV }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); auto shader = GetComputeShaderVerticalBlur(); context->CSSetShader(shader, nullptr, 0); @@ -298,13 +296,13 @@ void SubsurfaceScattering::DrawSSS() } ID3D11Buffer* buffer = nullptr; - context->CSSetConstantBuffers(0, 1, &buffer); + context->CSSetConstantBuffers(1, 1, &buffer); ID3D11ShaderResourceView* views[3]{ nullptr, nullptr, nullptr }; context->CSSetShaderResources(0, 3, views); - ID3D11UnorderedAccessView* uavs[2]{ nullptr, nullptr }; - context->CSSetUnorderedAccessViews(0, 2, uavs, nullptr); + ID3D11UnorderedAccessView* uavs[1]{ nullptr }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); ID3D11ComputeShader* shader = nullptr; context->CSSetShader(shader, nullptr, 0); @@ -404,6 +402,14 @@ void SubsurfaceScattering::ClearShaderCache() verticalSSBlur->Release(); verticalSSBlur = nullptr; } + if (horizontalSSBlurFP) { + horizontalSSBlurFP->Release(); + horizontalSSBlurFP = nullptr; + } + if (verticalSSBlurFP) { + verticalSSBlurFP->Release(); + verticalSSBlurFP = nullptr; + } if (clearBuffer) { clearBuffer->Release(); clearBuffer = nullptr; @@ -428,6 +434,24 @@ ID3D11ComputeShader* SubsurfaceScattering::GetComputeShaderVerticalBlur() return verticalSSBlur; } +ID3D11ComputeShader* SubsurfaceScattering::GetComputeShaderHorizontalBlurFP() +{ + if (!horizontalSSBlurFP) { + logger::debug("Compiling horizontalSSBlur FIRSTPERSON"); + horizontalSSBlurFP = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\SubsurfaceScattering\\SeparableSSSCS.hlsl", { { "HORIZONTAL", "" }, { "FIRSTPERSON", "" } }, "cs_5_0"); + } + return horizontalSSBlurFP; +} + +ID3D11ComputeShader* SubsurfaceScattering::GetComputeShaderVerticalBlurFP() +{ + if (!verticalSSBlurFP) { + logger::debug("Compiling verticalSSBlur FIRSTPERSON"); + verticalSSBlurFP = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\SubsurfaceScattering\\SeparableSSSCS.hlsl", { { "FIRSTPERSON", "" } }, "cs_5_0"); + } + return verticalSSBlurFP; +} + ID3D11ComputeShader* SubsurfaceScattering::GetComputeShaderClearBuffer() { if (!clearBuffer) { diff --git a/src/Features/SubsurfaceScattering.h b/src/Features/SubsurfaceScattering.h index dac25629d..62d99cc30 100644 --- a/src/Features/SubsurfaceScattering.h +++ b/src/Features/SubsurfaceScattering.h @@ -73,6 +73,8 @@ struct SubsurfaceScattering : Feature ID3D11ComputeShader* horizontalSSBlur = nullptr; ID3D11ComputeShader* verticalSSBlur = nullptr; + ID3D11ComputeShader* horizontalSSBlurFP = nullptr; + ID3D11ComputeShader* verticalSSBlurFP = nullptr; ID3D11ComputeShader* clearBuffer = nullptr; RE::RENDER_TARGET normalsMode = RE::RENDER_TARGET::kNONE; @@ -93,9 +95,9 @@ struct SubsurfaceScattering : Feature float3 Profile(DiffusionProfile& a_profile, float r); void CalculateKernel(DiffusionProfile& a_profile, Kernel& kernel); - void DrawSSSWrapper(bool a_firstPerson = false); + void DrawSSSWrapper(bool a_firstPerson); - void DrawSSS(); + void DrawSSS(bool a_firstPerson); virtual void Draw(const RE::BSShader* shader, const uint32_t descriptor); @@ -105,6 +107,8 @@ struct SubsurfaceScattering : Feature virtual void ClearShaderCache(); ID3D11ComputeShader* GetComputeShaderHorizontalBlur(); ID3D11ComputeShader* GetComputeShaderVerticalBlur(); + ID3D11ComputeShader* GetComputeShaderHorizontalBlurFP(); + ID3D11ComputeShader* GetComputeShaderVerticalBlurFP(); ID3D11ComputeShader* GetComputeShaderClearBuffer(); virtual void PostPostLoad() override; @@ -127,16 +131,6 @@ struct SubsurfaceScattering : Feature static inline REL::Relocation func; }; - struct Main_RenderWorld_End - { - static void thunk(RE::BSBatchRenderer* This, uint32_t StartRange, uint32_t EndRanges, uint32_t RenderFlags, int GeometryGroup) - { - func(This, StartRange, EndRanges, RenderFlags, GeometryGroup); // RenderSky - GetSingleton()->DrawSSSWrapper(); - } - static inline REL::Relocation func; - }; - struct Main_RenderFirstPersonView_Start { static void thunk(RE::BSBatchRenderer* This, uint32_t StartRange, uint32_t EndRanges, uint32_t RenderFlags, int GeometryGroup) @@ -170,11 +164,10 @@ struct SubsurfaceScattering : Feature static void Install() { stl::write_thunk_call(REL::RelocationID(99938, 106583).address() + REL::Relocate(0x8E, 0x84)); - stl::write_thunk_call(REL::RelocationID(99938, 106583).address() + REL::Relocate(0x247, 0x237, 0x24F)); - if (!REL::Module::IsVR()) { - stl::write_thunk_call(REL::RelocationID(99943, 106588).address() + REL::Relocate(0x70, 0x66)); - stl::write_thunk_call(REL::RelocationID(99943, 106588).address() + REL::Relocate(0x49C, 0x47E, 0x4fc)); - } + //if (!REL::Module::IsVR()) { + // stl::write_thunk_call(REL::RelocationID(99943, 106588).address() + REL::Relocate(0x70, 0x66)); + // stl::write_thunk_call(REL::RelocationID(99943, 106588).address() + REL::Relocate(0x49C, 0x47E, 0x4fc)); + //} stl::write_vfunc<0x6, BSLightingShader_SetupGeometry>(RE::VTABLE_BSLightingShader[0]); logger::info("[SSS] Installed hooks"); } diff --git a/src/Features/TerrainBlending.cpp b/src/Features/TerrainBlending.cpp index 64dc5d5d2..4eae2ecec 100644 --- a/src/Features/TerrainBlending.cpp +++ b/src/Features/TerrainBlending.cpp @@ -1,6 +1,6 @@ #include "TerrainBlending.h" -#include "Bindings.h" +#include "Deferred.h" #include "State.h" void TerrainBlending::DrawSettings() @@ -57,45 +57,45 @@ void TerrainBlending::PostPostLoad() Hooks::Install(); } -void TerrainBlending::SetupGeometry(RE::BSRenderPass* a_pass) +void TerrainBlending::SetupGeometry(RE::BSRenderPass*) { - if (!enableBlending) { - Bindings::GetSingleton()->SetOverwriteTerrainMode(false); - Bindings::GetSingleton()->SetOverwriteTerrainMaskingMode(Bindings::TerrainMaskMode::kNone); - return; - } - - bool validPass = TerrainBlending::ValidBlendingPass(a_pass); - - if (a_pass->shaderProperty->flags.all(RE::BSShaderProperty::EShaderPropertyFlag::kMultiTextureLandscape)) { - if (validPass) { - Bindings::GetSingleton()->SetOverwriteTerrainMode(true); - Bindings::GetSingleton()->SetOverwriteTerrainMaskingMode(Bindings::TerrainMaskMode::kRead); - - auto& context = State::GetSingleton()->context; - auto view = Bindings::GetSingleton()->terrainBlendingMask ? Bindings::GetSingleton()->terrainBlendingMask->srv.get() : nullptr; - if (view) - context->PSSetShaderResources(35, 1, &view); - } else { - Bindings::GetSingleton()->SetOverwriteTerrainMode(false); - Bindings::GetSingleton()->SetOverwriteTerrainMaskingMode(Bindings::TerrainMaskMode::kNone); - } - } else { - Bindings::GetSingleton()->SetOverwriteTerrainMode(false); - bool staticReference = false; - if (validPass) { - if (auto ref = a_pass->geometry->GetUserData()) { - if (auto base = ref->GetBaseObject()) { - if (base->As()) { - staticReference = true; - } - } - } - } - Bindings::GetSingleton()->SetOverwriteTerrainMaskingMode(validPass && !staticReference ? - (!REL::Module::IsVR() ? - Bindings::TerrainMaskMode::kWrite : - Bindings::TerrainMaskMode::kRead) : // Fix VR artifact where static objects would appear shifted in each eye - Bindings::TerrainMaskMode::kNone); - } + //if (!enableBlending) { + // Deferred::GetSingleton()->SetOverwriteTerrainMode(false); + // Deferred::GetSingleton()->SetOverwriteTerrainMaskingMode(Deferred::TerrainMaskMode::kNone); + // return; + //} + + //bool validPass = TerrainBlending::ValidBlendingPass(a_pass); + + //if (a_pass->shaderProperty->flags.all(RE::BSShaderProperty::EShaderPropertyFlag::kMultiTextureLandscape)) { + // if (validPass) { + // Deferred::GetSingleton()->SetOverwriteTerrainMode(true); + // Deferred::GetSingleton()->SetOverwriteTerrainMaskingMode(Deferred::TerrainMaskMode::kRead); + + // auto context = RE::BSGraphics::Renderer::GetSingleton()->GetRuntimeData().context; + // auto view = Deferred::GetSingleton()->terrainBlendingMask ? Deferred::GetSingleton()->terrainBlendingMask->srv.get() : nullptr; + // if (view) + // context->PSSetShaderResources(35, 1, &view); + // } else { + // Deferred::GetSingleton()->SetOverwriteTerrainMode(false); + // Deferred::GetSingleton()->SetOverwriteTerrainMaskingMode(Deferred::TerrainMaskMode::kNone); + // } + //} else { + // Deferred::GetSingleton()->SetOverwriteTerrainMode(false); + // bool staticReference = false; + // if (validPass) { + // if (auto ref = a_pass->geometry->GetUserData()) { + // if (auto base = ref->GetBaseObject()) { + // if (base->As()) { + // staticReference = true; + // } + // } + // } + // } + // Deferred::GetSingleton()->SetOverwriteTerrainMaskingMode(validPass && !staticReference ? + // (!REL::Module::IsVR() ? + // Deferred::TerrainMaskMode::kWrite : + // Deferred::TerrainMaskMode::kRead) : // Fix VR artifact where static objects would appear shifted in each eye + // Deferred::TerrainMaskMode::kNone); + //} } \ No newline at end of file diff --git a/src/Features/TerrainBlending.h b/src/Features/TerrainBlending.h index 072533e35..104721122 100644 --- a/src/Features/TerrainBlending.h +++ b/src/Features/TerrainBlending.h @@ -1,7 +1,7 @@ #pragma once -#include "Bindings.h" #include "Buffer.h" +#include "Deferred.h" #include "Feature.h" struct TerrainBlending : Feature @@ -60,8 +60,8 @@ struct TerrainBlending : Feature static void thunk(RE::BSShader* This, RE::BSRenderPass* a_pass, uint32_t a_renderFlags) { func(This, a_pass, a_renderFlags); - Bindings::GetSingleton()->SetOverwriteTerrainMode(false); - Bindings::GetSingleton()->SetOverwriteTerrainMaskingMode(Bindings::TerrainMaskMode::kNone); + // Deferred::GetSingleton()->SetOverwriteTerrainMode(false); + // Deferred::GetSingleton()->SetOverwriteTerrainMaskingMode(Deferred::TerrainMaskMode::kNone); } static inline REL::Relocation func; }; diff --git a/src/Features/TerrainOcclusion.cpp b/src/Features/TerrainOcclusion.cpp new file mode 100644 index 000000000..f0436e4c2 --- /dev/null +++ b/src/Features/TerrainOcclusion.cpp @@ -0,0 +1,593 @@ +#include "TerrainOcclusion.h" + +#include "Deferred.h" +#include "State.h" +#include "Util.h" + +#include + +#include +#include + +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( + TerrainOcclusion::Settings::AOGenSettings, + AoDistance, + SliceCount, + SampleCount) + +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( + TerrainOcclusion::Settings::EffectSettings, + EnableTerrainShadow, + EnableTerrainAO, + HeightBias, + ShadowSofteningRadiusAngle, + AOPower, + AOFadeOutHeight) + +NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT( + TerrainOcclusion::Settings, + AoGen, + Effect) + +void TerrainOcclusion::Load(json& o_json) +{ + if (o_json[GetName()].is_object()) + settings = o_json[GetName()]; + + Feature::Load(o_json); +} + +void TerrainOcclusion::Save(json& o_json) +{ + o_json[GetName()] = settings; +} + +void TerrainOcclusion::DrawSettings() +{ + ImGui::Checkbox("Enable Terrain Shadow", (bool*)&settings.Effect.EnableTerrainShadow); + ImGui::Checkbox("Enable Terrain AO", (bool*)&settings.Effect.EnableTerrainAO); + + ImGui::SliderFloat("Height Map Bias", &settings.Effect.HeightBias, -2000.f, 0.f, "%.0f units"); + + ImGui::SeparatorText("Shadow"); + { + // ImGui::SliderAngle("Softening", &settings.Effect.ShadowSofteningRadiusAngle, .1f, 10.f, "%.2f deg", ImGuiSliderFlags_AlwaysClamp); + // if (auto _tt = Util::HoverTooltipWrapper()) + // ImGui::Text("Controls the solid angle of sunlight, making terrain shadows softer."); + + ImGui::SliderFloat2("Fade Distance", &settings.Effect.ShadowFadeDistance.x, 0, 10000.f, "%.0f units"); + if (auto _tt = Util::HoverTooltipWrapper()) { + ImGui::Text("Shadows around you are and should be handled by vanilla shadow maps."); + if (auto settingCollection = RE::INIPrefSettingCollection::GetSingleton()) { + auto gameShadowDist = settingCollection->GetSetting("fShadowDistance:Display")->GetFloat(); + ImGui::Text("Your fShadowDistance setting: %f", gameShadowDist); + } + } + } + + ImGui::SeparatorText("AO"); + { + ImGui::SliderFloat("Mix", &settings.Effect.AOMix, 0, 1, "%.2f", ImGuiSliderFlags_AlwaysClamp); + ImGui::SliderFloat("Power", &settings.Effect.AOPower, 0.2f, 5, "%.2f"); + ImGui::SliderFloat("Fadeout Height", &settings.Effect.AOFadeOutHeight, 500, 5000, "%.0f units"); + if (auto _tt = Util::HoverTooltipWrapper()) + ImGui::Text("On the ground AO is the most prominent. Up to a certain height it will gradually fade out."); + + if (ImGui::TreeNodeEx("Precomputation", ImGuiTreeNodeFlags_DefaultOpen)) { + ImGui::SliderFloat("Distance", &settings.AoGen.AoDistance, 1.f / 32, 32, "%.2f cells"); + ImGui::InputScalar("Slices", ImGuiDataType_U32, &settings.AoGen.SliceCount); + ImGui::InputScalar("Samples", ImGuiDataType_U32, &settings.AoGen.SampleCount); + if (ImGui::Button("Force Regenerate AO", { -1, 0 })) + needPrecompute = true; + + ImGui::TreePop(); + } + } + + if (ImGui::CollapsingHeader("Debug")) { + std::string curr_worldspace = "N/A"; + std::string curr_worldspace_name = "N/A"; + auto tes = RE::TES::GetSingleton(); + if (tes) { + auto worldspace = tes->GetRuntimeData2().worldSpace; + if (worldspace) { + curr_worldspace = worldspace->GetFormEditorID(); + curr_worldspace_name = worldspace->GetName(); + } + } + ImGui::Text(fmt::format("Current worldspace: {} ({})", curr_worldspace, curr_worldspace_name).c_str()); + ImGui::Text(fmt::format("Has height map: {}", heightmaps.contains(curr_worldspace)).c_str()); + + ImGui::Separator(); + + if (texOcclusion) { + ImGui::BulletText("shadowUpdateCBData"); + ImGui::Indent(); + { + ImGui::Text(fmt::format("LightPxDir: ({}, {})", shadowUpdateCBData.LightPxDir.x, shadowUpdateCBData.LightPxDir.y).c_str()); + ImGui::Text(fmt::format("LightDeltaZ: ({}, {})", shadowUpdateCBData.LightDeltaZ.x, shadowUpdateCBData.LightDeltaZ.y).c_str()); + ImGui::Text(fmt::format("StartPxCoord: {}", shadowUpdateCBData.StartPxCoord).c_str()); + ImGui::Text(fmt::format("PxSize: ({}, {})", shadowUpdateCBData.PxSize.x, shadowUpdateCBData.PxSize.y).c_str()); + } + ImGui::Unindent(); + + ImGui::BulletText("texOcclusion"); + ImGui::Image(texOcclusion->srv.get(), { texOcclusion->desc.Width * .1f, texOcclusion->desc.Height * .1f }); + ImGui::BulletText("texNormalisedHeight"); + ImGui::Image(texNormalisedHeight->srv.get(), { texNormalisedHeight->desc.Width * .1f, texNormalisedHeight->desc.Height * .1f }); + ImGui::BulletText("texShadowHeight"); + ImGui::Image(texShadowHeight->srv.get(), { texShadowHeight->desc.Width * .1f, texShadowHeight->desc.Height * .1f }); + } + } +} + +void TerrainOcclusion::ClearShaderCache() +{ + if (occlusionProgram) { + occlusionProgram->Release(); + occlusionProgram = nullptr; + } + if (shadowUpdateProgram) { + shadowUpdateProgram->Release(); + shadowUpdateProgram = nullptr; + } + if (outputProgram) { + outputProgram->Release(); + outputProgram = nullptr; + } + + CompileComputeShaders(); +} + +void TerrainOcclusion::SetupResources() +{ + logger::debug("Listing height maps..."); + { + std::filesystem::path texture_dir{ L"Data\\textures\\heightmaps\\" }; + for (auto const& dir_entry : std::filesystem::directory_iterator{ texture_dir }) { + auto filename = dir_entry.path().filename(); + if (filename.extension() != ".dds") + continue; + + logger::debug("Found dds: {}", filename.string()); + + auto splitstr = pystring::split(filename.stem().string(), "."); + + if (splitstr.size() != 10) + logger::warn("{} has incorrect number ({} instead of 10) of fields", filename.string(), splitstr.size()); + + if (splitstr[1] == "HeightMap") { + HeightMapMetadata metadata; + try { + metadata.worldspace = splitstr[0]; + metadata.pos0.x = std::stoi(splitstr[2]) * 4096.f; + metadata.pos1.y = std::stoi(splitstr[3]) * 4096.f; + metadata.pos1.x = (std::stoi(splitstr[4]) + 1) * 4096.f; + metadata.pos0.y = (std::stoi(splitstr[5]) + 1) * 4096.f; + metadata.pos0.z = std::stoi(splitstr[6]) * 8.f; + metadata.pos1.z = std::stoi(splitstr[7]) * 8.f; + metadata.zRange.x = std::stoi(splitstr[8]) * 8.f; + metadata.zRange.y = std::stoi(splitstr[9]) * 8.f; + } catch (std::exception& e) { + logger::warn("Failed to parse {}. Error: {}", filename.string(), e.what()); + continue; + } + + metadata.dir = dir_entry.path().parent_path().wstring(); + metadata.filename = filename.string(); + + if (heightmaps.contains(metadata.worldspace)) { + logger::warn("{} has more than one height maps!", metadata.worldspace); + } else { + heightmaps[metadata.worldspace] = metadata; + } + } else if (splitstr[1] != "AO" && splitstr[1] != "Cone") + logger::warn("{} has unknown type ({})", filename.string(), splitstr[1]); + } + } + + logger::debug("Creating structured buffers..."); + { + D3D11_BUFFER_DESC sbDesc{}; + sbDesc.Usage = D3D11_USAGE_DYNAMIC; + sbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + sbDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + sbDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + sbDesc.StructureByteStride = sizeof(AOGenBuffer); + sbDesc.ByteWidth = sizeof(AOGenBuffer); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + srvDesc.Buffer.FirstElement = 0; + srvDesc.Buffer.NumElements = 1; + + aoGenBuffer = std::make_unique(sbDesc); + aoGenBuffer->CreateSRV(srvDesc); + + sbDesc.StructureByteStride = sizeof(PerPass); + sbDesc.ByteWidth = sizeof(PerPass); + + perPass = std::make_unique(sbDesc); + perPass->CreateSRV(srvDesc); + } + + logger::debug("Creating constant buffers..."); + { + shadowUpdateCB = std::make_unique(ConstantBufferDesc()); + } + + CompileComputeShaders(); +} + +void TerrainOcclusion::CompileComputeShaders() +{ + logger::debug("Compiling shaders..."); + { + auto program_ptr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\TerrainOcclusion\\AOGen.cs.hlsl", { {} }, "cs_5_0")); + if (program_ptr) + occlusionProgram.attach(program_ptr); + + program_ptr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\TerrainOcclusion\\ShadowUpdate.cs.hlsl", { {} }, "cs_5_0")); + if (program_ptr) + shadowUpdateProgram.attach(program_ptr); + + program_ptr = reinterpret_cast(Util::CompileShader(L"Data\\Shaders\\TerrainOcclusion\\Output.cs.hlsl", { {} }, "cs_5_0")); + if (program_ptr) + outputProgram.attach(program_ptr); + } +} + +bool TerrainOcclusion::IsHeightMapReady() +{ + if (auto tes = RE::TES::GetSingleton()) + if (auto worldspace = tes->GetRuntimeData2().worldSpace) + return cachedHeightmap && cachedHeightmap->worldspace == worldspace->GetFormEditorID(); + return false; +} + +void TerrainOcclusion::Draw(const RE::BSShader*, const uint32_t) +{ +} + +void TerrainOcclusion::UpdateBuffer() +{ + auto& context = State::GetSingleton()->context; + + bool isHeightmapReady = IsHeightMapReady(); + + PerPass data = { + .effect = settings.Effect, + }; + data.effect.EnableTerrainAO = data.effect.EnableTerrainAO && isHeightmapReady; + data.effect.EnableTerrainShadow = data.effect.EnableTerrainShadow && isHeightmapReady; + + if (isHeightmapReady) { + data.effect.AOFadeOutHeight = 1.f / data.effect.AOFadeOutHeight; + + data.invScale = cachedHeightmap->pos1 - cachedHeightmap->pos0; + data.scale = float3(1.f, 1.f, 1.f) / data.invScale; + data.offset = -cachedHeightmap->pos0 * data.scale; + data.zRange = cachedHeightmap->zRange; + } + + D3D11_MAPPED_SUBRESOURCE mapped; + DX::ThrowIfFailed(context->Map(perPass->resource.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + size_t bytes = sizeof(PerPass); + memcpy_s(mapped.pData, bytes, &data, bytes); + context->Unmap(perPass->resource.get(), 0); +} + +void TerrainOcclusion::LoadHeightmap() +{ + auto tes = RE::TES::GetSingleton(); + if (!tes) + return; + auto worldspace = tes->GetRuntimeData2().worldSpace; + if (!worldspace) + return; + std::string worldspace_name = worldspace->GetFormEditorID(); + if (!heightmaps.contains(worldspace_name)) // no height map for that, but we don't remove cache + return; + if (cachedHeightmap && cachedHeightmap->worldspace == worldspace_name) // already cached + return; + + auto& device = State::GetSingleton()->device; + + logger::debug("Loading height map..."); + { + auto& target_heightmap = heightmaps[worldspace_name]; + + DirectX::ScratchImage image; + try { + std::filesystem::path path{ target_heightmap.dir }; + path /= target_heightmap.filename; + + DX::ThrowIfFailed(LoadFromDDSFile(path.c_str(), DirectX::DDS_FLAGS_NONE, nullptr, image)); + } catch (const DX::com_exception& e) { + logger::error("{}", e.what()); + return; + } + + ID3D11Resource* pResource = nullptr; + try { + DX::ThrowIfFailed(CreateTexture(device, + image.GetImages(), image.GetImageCount(), + image.GetMetadata(), &pResource)); + } catch (const DX::com_exception& e) { + logger::error("{}", e.what()); + return; + } + + texHeightMap.release(); + texHeightMap = std::make_unique(reinterpret_cast(pResource)); + + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { + .Format = texHeightMap->desc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { + .MostDetailedMip = 0, + .MipLevels = 1 } + }; + texHeightMap->CreateSRV(srvDesc); + + cachedHeightmap = &heightmaps[worldspace_name]; + } + + shadowUpdateIdx = 0; + needPrecompute = true; +} + +void TerrainOcclusion::Precompute() +{ + if (!cachedHeightmap) + return; + + auto& context = State::GetSingleton()->context; + + logger::info("Creating occlusion texture..."); + { + texOcclusion.release(); + texNormalisedHeight.release(); + texShadowHeight.release(); + + D3D11_TEXTURE2D_DESC texDesc = { + .Width = texHeightMap->desc.Width, + .Height = texHeightMap->desc.Height, + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_R8_UNORM, + .SampleDesc = { .Count = 1 }, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS + }; + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, + .Texture2D = { + .MostDetailedMip = 0, + .MipLevels = 1 } + }; + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = { + .Format = texDesc.Format, + .ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D, + .Texture2D = { .MipSlice = 0 } + }; + + texOcclusion = std::make_unique(texDesc); + texOcclusion->CreateSRV(srvDesc); + texOcclusion->CreateUAV(uavDesc); + + texDesc.Format = srvDesc.Format = uavDesc.Format = DXGI_FORMAT_R16_FLOAT; + texNormalisedHeight = std::make_unique(texDesc); + texNormalisedHeight->CreateSRV(srvDesc); + texNormalisedHeight->CreateUAV(uavDesc); + + texDesc.Format = srvDesc.Format = uavDesc.Format = DXGI_FORMAT_R16G16_FLOAT; + texShadowHeight = std::make_unique(texDesc); + texShadowHeight->CreateSRV(srvDesc); + texShadowHeight->CreateUAV(uavDesc); + } + + { + AOGenBuffer data = { + .settings = settings.AoGen, + .pos0 = cachedHeightmap->pos0, + .pos1 = cachedHeightmap->pos1, + .zRange = cachedHeightmap->zRange + }; + + data.settings.AoDistance *= 4096.f; + + D3D11_MAPPED_SUBRESOURCE mapped; + DX::ThrowIfFailed(context->Map(aoGenBuffer->resource.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + size_t bytes = sizeof(AOGenBuffer); + memcpy_s(mapped.pData, bytes, &data, bytes); + context->Unmap(aoGenBuffer->resource.get(), 0); + } + + /* ---- BACKUP ---- */ + struct ShaderState + { + ID3D11ShaderResourceView* srvs[2] = { nullptr }; + ID3D11ComputeShader* shader = nullptr; + ID3D11UnorderedAccessView* uavs[2] = { nullptr }; + ID3D11ClassInstance* instance = nullptr; + ID3D11SamplerState* samplers[1] = { nullptr }; + UINT numInstances; + } old, newer; + context->CSGetShaderResources(0, ARRAYSIZE(old.srvs), old.srvs); + context->CSGetShader(&old.shader, &old.instance, &old.numInstances); + context->CSGetUnorderedAccessViews(0, ARRAYSIZE(old.uavs), old.uavs); + context->CSGetSamplers(0, ARRAYSIZE(old.samplers), old.samplers); + + /* ---- DISPATCH ---- */ + logger::info("Precomputation..."); + newer.srvs[0] = aoGenBuffer->srv.get(); + newer.srvs[1] = texHeightMap->srv.get(); + newer.uavs[0] = texOcclusion->uav.get(); + newer.uavs[1] = texNormalisedHeight->uav.get(); + + context->CSSetSamplers(0, ARRAYSIZE(newer.samplers), newer.samplers); + context->CSSetShaderResources(0, ARRAYSIZE(newer.srvs), newer.srvs); + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(newer.uavs), newer.uavs, nullptr); + context->CSSetShader(occlusionProgram.get(), nullptr, 0); + context->Dispatch(((texOcclusion->desc.Width - 1) >> 5) + 1, ((texOcclusion->desc.Height - 1) >> 5) + 1, 1); + + /* ---- RESTORE ---- */ + context->CSSetShaderResources(0, ARRAYSIZE(old.srvs), old.srvs); + context->CSSetShader(old.shader, &old.instance, old.numInstances); + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(old.uavs), old.uavs, nullptr); + context->CSSetSamplers(0, ARRAYSIZE(old.samplers), old.samplers); + + needPrecompute = false; +} + +void TerrainOcclusion::UpdateShadow() +{ + if (!IsHeightMapReady()) + return; + + auto& context = State::GetSingleton()->context; + auto accumulator = RE::BSGraphics::BSShaderAccumulator::GetCurrentAccumulator(); + auto sunLight = skyrim_cast(accumulator->GetRuntimeData().activeShadowSceneNode->GetRuntimeData().sunLight->light.get()); + if (!sunLight) + return; + + /* ---- UPDATE CB ---- */ + uint width = texNormalisedHeight->desc.Width; + uint height = texNormalisedHeight->desc.Height; + + // only update direction at the start of each cycle + static float2 cachedDirLightPxDir; + static float2 cachedDirLightDZRange; + static uint edgePxCoord; + static int signDir; + static uint maxUpdates; + if (shadowUpdateIdx == 0) { + auto direction = sunLight->GetWorldDirection(); + float3 dirLightDir = { direction.x, direction.y, direction.z }; + if (dirLightDir.z > 0) + dirLightDir = -dirLightDir; + + // in UV + float3 invScale = cachedHeightmap->pos1 - cachedHeightmap->pos0; + invScale.z = cachedHeightmap->zRange.y - cachedHeightmap->zRange.x; + float3 dirLightPxDir = dirLightDir / invScale; + dirLightPxDir.x *= width; + dirLightPxDir.y *= height; + + float stepMult; + if (abs(dirLightPxDir.x) >= abs(dirLightPxDir.y)) { + stepMult = 1.f / abs(dirLightPxDir.x); + edgePxCoord = dirLightPxDir.x > 0 ? 0 : (width - 1); + signDir = dirLightPxDir.x > 0 ? 1 : -1; + maxUpdates = ((width - 1) >> 10) + 1; + } else { + stepMult = 1.f / abs(dirLightPxDir.y); + edgePxCoord = dirLightPxDir.y > 0 ? 0 : height - 1; + signDir = dirLightPxDir.y > 0 ? 1 : -1; + maxUpdates = ((height - 1) >> 10) + 1; + } + dirLightPxDir *= stepMult; + + cachedDirLightPxDir = { dirLightPxDir.x, dirLightPxDir.y }; + + // soft shadow angles + float lenUV = float2{ dirLightDir.x, dirLightDir.y }.Length(); + float dirLightAngle = atan2(-dirLightDir.z, lenUV); + float upperAngle = std::max(0.f, dirLightAngle - settings.Effect.ShadowSofteningRadiusAngle); + float lowerAngle = std::min(RE::NI_HALF_PI - 1e-2f, dirLightAngle + settings.Effect.ShadowSofteningRadiusAngle); + + cachedDirLightDZRange = -(lenUV / invScale.z * stepMult) * float2{ std::tan(upperAngle), std::tan(lowerAngle) }; + } + + shadowUpdateCBData = { + .LightPxDir = cachedDirLightPxDir, + .LightDeltaZ = cachedDirLightDZRange, + .StartPxCoord = edgePxCoord + signDir * shadowUpdateIdx * 1024u, + .PxSize = { 1.f / texNormalisedHeight->desc.Width, 1.f / texNormalisedHeight->desc.Height } + }; + shadowUpdateCB->Update(shadowUpdateCBData); + + shadowUpdateIdx = (shadowUpdateIdx + 1) % maxUpdates; + + /* ---- BACKUP ---- */ + struct ShaderState + { + ID3D11ShaderResourceView* srvs[1] = { nullptr }; + ID3D11ComputeShader* shader = nullptr; + ID3D11UnorderedAccessView* uavs[1] = { nullptr }; + ID3D11Buffer* buffer = nullptr; + } old, newer; + + /* ---- DISPATCH ---- */ + newer.srvs[0] = texNormalisedHeight->srv.get(); + newer.uavs[0] = texShadowHeight->uav.get(); + newer.buffer = shadowUpdateCB->CB(); + + context->CSSetShaderResources(0, ARRAYSIZE(newer.srvs), newer.srvs); + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(newer.uavs), newer.uavs, nullptr); + context->CSSetConstantBuffers(1, 1, &newer.buffer); + context->CSSetShader(shadowUpdateProgram.get(), nullptr, 0); + context->Dispatch(abs(cachedDirLightPxDir.x) >= abs(cachedDirLightPxDir.y) ? height : width, 1, 1); + + /* ---- RESTORE ---- */ + context->CSSetShaderResources(0, ARRAYSIZE(old.srvs), old.srvs); + context->CSSetShader(old.shader, nullptr, 0); + context->CSSetUnorderedAccessViews(0, ARRAYSIZE(old.uavs), old.uavs, nullptr); + context->CSSetConstantBuffers(1, 1, &old.buffer); +} + +void TerrainOcclusion::DrawTerrainOcclusion() +{ + LoadHeightmap(); + UpdateBuffer(); + + if (!settings.Effect.EnableTerrainShadow && !settings.Effect.EnableTerrainAO) + return; + + if (needPrecompute) + Precompute(); + if (settings.Effect.EnableTerrainShadow) + UpdateShadow(); + + //////////////////////////////////////////////////////////////////////////////// + + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto& context = State::GetSingleton()->context; + auto deferred = Deferred::GetSingleton(); + + std::array srvs = { nullptr }; + std::array uavs = { nullptr }; + std::array samplers = { nullptr }; + + { + srvs.at(0) = renderer->GetDepthStencilData().depthStencils[RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY].depthSRV; + srvs.at(1) = perPass->srv.get(); + if (texOcclusion) + srvs.at(2) = texOcclusion->srv.get(); + if (texNormalisedHeight) + srvs.at(3) = texNormalisedHeight->srv.get(); + if (texShadowHeight) + srvs.at(4) = texShadowHeight->srv.get(); + + uavs.at(0) = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kSHADOW_MASK].UAV; + uavs.at(1) = deferred->giTexture->uav.get(); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetShader(outputProgram.get(), nullptr, 0); + context->Dispatch((deferred->giTexture->desc.Width + 31u) >> 5, (deferred->giTexture->desc.Height + 31u) >> 5, 1); + } + + // clean up + + srvs.fill(nullptr); + uavs.fill(nullptr); + samplers.fill(nullptr); + + context->CSSetShaderResources(0, (uint)srvs.size(), srvs.data()); + context->CSSetUnorderedAccessViews(0, (uint)uavs.size(), uavs.data(), nullptr); + context->CSSetSamplers(0, (uint)samplers.size(), samplers.data()); +} diff --git a/src/Features/TerrainOcclusion.h b/src/Features/TerrainOcclusion.h new file mode 100644 index 000000000..3fa2a5774 --- /dev/null +++ b/src/Features/TerrainOcclusion.h @@ -0,0 +1,122 @@ +#pragma once + +#include "Buffer.h" +#include "Feature.h" + +struct TerrainOcclusion : public Feature +{ + static TerrainOcclusion* GetSingleton() + { + static TerrainOcclusion singleton; + return std::addressof(singleton); + } + + virtual inline std::string GetName() { return "Terrain Occlusion"; } + virtual inline std::string GetShortName() { return "TerrainOcclusion"; } + inline std::string_view GetShaderDefineName() override { return "TERRA_OCC"; } + inline bool HasShaderDefine(RE::BSShader::Type type) override { return type == RE::BSShader::Type::Lighting; }; + + uint shadowUpdateIdx = 0; + + struct Settings + { + struct AOGenSettings + { + float AoDistance = 12; + uint SliceCount = 60; + uint SampleCount = 60; + } AoGen; + + struct EffectSettings + { + uint EnableTerrainShadow = true; + uint EnableTerrainAO = true; + + float HeightBias = -1000.f; // in game unit + + float ShadowSofteningRadiusAngle = 1.f * RE::NI_PI / 180.f; + float2 ShadowFadeDistance = { 1000.f, 2000.f }; + + float AOMix = 1.f; + float AOPower = 1.f; + float AOFadeOutHeight = 2000; + } Effect; + } settings; + + bool needPrecompute = false; + + struct HeightMapMetadata + { + std::wstring dir; + std::string filename; + std::string worldspace; + float3 pos0, pos1; // left-top-z=0 vs right-bottom-z=1 + float2 zRange; + }; + std::unordered_map heightmaps; + HeightMapMetadata* cachedHeightmap; + + struct AOGenBuffer + { + Settings::AOGenSettings settings; + + float3 pos0; + float3 pos1; + float2 zRange; + }; + std::unique_ptr aoGenBuffer = nullptr; + + struct ShadowUpdateCB + { + float2 LightPxDir; // direction on which light descends, from one pixel to next via dda + float2 LightDeltaZ; // per LightUVDir, upper penumbra and lower, should be negative + uint StartPxCoord; + float2 PxSize; + + float pad; + } shadowUpdateCBData; + static_assert(sizeof(ShadowUpdateCB) % 16 == 0); + std::unique_ptr shadowUpdateCB = nullptr; + + struct PerPass + { + Settings::EffectSettings effect; + + float3 scale; + float3 invScale; + float3 offset; + float2 zRange; + }; + std::unique_ptr perPass = nullptr; + + winrt::com_ptr occlusionProgram = nullptr; + winrt::com_ptr shadowUpdateProgram = nullptr; + winrt::com_ptr outputProgram = nullptr; + + std::unique_ptr texHeightMap = nullptr; + std::unique_ptr texOcclusion = nullptr; + std::unique_ptr texNormalisedHeight = nullptr; + std::unique_ptr texShadowHeight = nullptr; + + bool IsHeightMapReady(); + + virtual void SetupResources() override; + void CompileComputeShaders(); + + virtual void DrawSettings() override; + + virtual inline void Reset() override{}; + + virtual void Draw(const RE::BSShader*, const uint32_t) override; + void UpdateBuffer(); + void DrawTerrainOcclusion(); + void LoadHeightmap(); + void Precompute(); + void UpdateShadow(); + + virtual void Load(json& o_json) override; + virtual void Save(json&) override; + + virtual inline void RestoreDefaultSettings() override { settings = {}; } + virtual void ClearShaderCache() override; +}; \ No newline at end of file diff --git a/src/Hooks.cpp b/src/Hooks.cpp index 834203fe6..985e2781c 100644 --- a/src/Hooks.cpp +++ b/src/Hooks.cpp @@ -2,10 +2,11 @@ #include -#include "Bindings.h" +#include "Deferred.h" #include "Menu.h" #include "ShaderCache.h" #include "State.h" +#include "VariableRateShading.h" #include "ShaderTools/BSShaderHooks.h" @@ -142,13 +143,7 @@ decltype(&hk_BSGraphics_SetDirtyStates) ptr_BSGraphics_SetDirtyStates; void hk_BSGraphics_SetDirtyStates(bool isCompute) { - //auto& shaderCache = SIE::ShaderCache::Instance(); - - //if (shaderCache.IsEnabled()) - // Bindings::GetSingleton()->SetDirtyStates(isCompute); - (ptr_BSGraphics_SetDirtyStates)(isCompute); - State::GetSingleton()->Draw(); } @@ -400,6 +395,16 @@ namespace Hooks static inline REL::Relocation func; }; + struct CreateRenderTarget_ShadowMask + { + static void thunk(RE::BSGraphics::Renderer* This, RE::RENDER_TARGETS::RENDER_TARGET a_target, RE::BSGraphics::RenderTargetProperties* a_properties) + { + State::GetSingleton()->ModifyRenderTarget(a_target, a_properties); + func(This, a_target, a_properties); + } + static inline REL::Relocation func; + }; + void Install() { SKSE::AllocTrampoline(14); @@ -440,5 +445,6 @@ namespace Hooks stl::write_thunk_call(REL::RelocationID(100458, 107175).address() + REL::Relocate(0x3F0, 0x3F3, 0x548)); stl::write_thunk_call(REL::RelocationID(100458, 107175).address() + REL::Relocate(0x458, 0x45B, 0x5B0)); stl::write_thunk_call(REL::RelocationID(100458, 107175).address() + REL::Relocate(0x46B, 0x46E, 0x5C3)); + stl::write_thunk_call(REL::RelocationID(100458, 107175).address() + REL::Relocate(0x555, 0x554, 0x6b9)); } } \ No newline at end of file diff --git a/src/Menu.cpp b/src/Menu.cpp index aed9910b3..ec32f9ed4 100644 --- a/src/Menu.cpp +++ b/src/Menu.cpp @@ -11,6 +11,10 @@ #include "Feature.h" #include "Features/LightLimitFix/ParticleLights.h" +#include "Deferred.h" + +#include "VariableRateShading.h" + #define SETTING_MENU_TOGGLEKEY "Toggle Key" #define SETTING_MENU_SKIPKEY "Skip Compilation Key" #define SETTING_MENU_FONTSCALE "Font Scale" @@ -158,6 +162,8 @@ void Menu::DrawSettings() ImGui::TableNextColumn(); if (ImGui::Button("Clear Shader Cache", { -1, 0 })) { shaderCache.Clear(); + Deferred::GetSingleton()->ClearShaderCache(); + VariableRateShading::GetSingleton()->ClearShaderCache(); for (auto* feature : Feature::GetFeatureList()) { if (feature->loaded) { feature->ClearShaderCache(); @@ -425,6 +431,8 @@ void Menu::DrawSettings() ImGui::Separator(); + VariableRateShading::GetSingleton()->DrawSettings(); + if (ImGui::BeginTable("Feature Table", 2, ImGuiTableFlags_SizingStretchProp | ImGuiTableFlags_Resizable)) { ImGui::TableSetupColumn("##ListOfFeatures", 0, 3); ImGui::TableSetupColumn("##FeatureConfig", 0, 7); diff --git a/src/ShaderCache.cpp b/src/ShaderCache.cpp index 8a1614bb7..9e6bfb91d 100644 --- a/src/ShaderCache.cpp +++ b/src/ShaderCache.cpp @@ -7,6 +7,7 @@ #include #include +#include "Deferred.h" #include "Feature.h" #include "State.h" @@ -60,8 +61,11 @@ namespace SIE int lastIndex = 0; - if (technique == ShaderCache::LightingShaderTechniques::Outline) { + if (technique == ShaderCache::LightingShaderTechniques::Outline) defines[lastIndex++] = { "OUTLINE", nullptr }; + + if (descriptor & static_cast(ShaderCache::LightingShaderFlags::Deferred)) { + defines[lastIndex++] = { "DEFERRED", nullptr }; } for (auto* feature : Feature::GetFeatureList()) { @@ -102,6 +106,10 @@ namespace SIE defines[lastIndex++] = { "DO_ALPHA_TEST", nullptr }; } + if (descriptor & static_cast(ShaderCache::DistantTreeShaderFlags::Deferred)) { + defines[lastIndex++] = { "DEFERRED", nullptr }; + } + for (auto* feature : Feature::GetFeatureList()) { if (feature->loaded && feature->HasShaderDefine(RE::BSShader::Type::DistantTree)) { defines[lastIndex++] = { feature->GetShaderDefineName().data(), nullptr }; @@ -354,6 +362,11 @@ namespace SIE ++defines; } + if (descriptor & static_cast(ShaderCache::EffectShaderFlags::Deferred)) { + defines[0] = { "DEFERRED", nullptr }; + ++defines; + } + for (auto* feature : Feature::GetFeatureList()) { if (feature->loaded && feature->HasShaderDefine(RE::BSShader::Type::Effect)) { defines[0] = { feature->GetShaderDefineName().data(), nullptr }; @@ -1396,13 +1409,6 @@ namespace SIE RE::BSGraphics::VertexShader* ShaderCache::GetVertexShader(const RE::BSShader& shader, uint32_t descriptor) { - if (shader.shaderType.get() == RE::BSShader::Type::Effect) { - if (descriptor & static_cast(ShaderCache::EffectShaderFlags::Lighting)) { - } else { - return nullptr; - } - } - auto state = State::GetSingleton(); if (!((ShaderCache::IsSupportedShader(shader) || state->IsDeveloperMode() && state->IsShaderEnabled(shader) && ShaderCache::IsShaderSourceAvailable(shader)))) { return nullptr; @@ -1437,13 +1443,6 @@ namespace SIE RE::BSGraphics::PixelShader* ShaderCache::GetPixelShader(const RE::BSShader& shader, uint32_t descriptor) { - if (shader.shaderType.get() == RE::BSShader::Type::Effect) { - if (descriptor & static_cast(ShaderCache::EffectShaderFlags::Lighting)) { - } else { - return nullptr; - } - } - auto state = State::GetSingleton(); if (!((ShaderCache::IsSupportedShader(shader) || state->IsDeveloperMode() && state->IsShaderEnabled(shader) && ShaderCache::IsShaderSourceAvailable(shader)))) { return nullptr; diff --git a/src/ShaderCache.h b/src/ShaderCache.h index a326d4ce3..bf3b0ab02 100644 --- a/src/ShaderCache.h +++ b/src/ShaderCache.h @@ -118,7 +118,8 @@ namespace SIE return type == RE::BSShader::Type::Lighting || type == RE::BSShader::Type::DistantTree || type == RE::BSShader::Type::Water || - type == RE::BSShader::Type::Grass; + type == RE::BSShader::Type::Grass || + type == RE::BSShader::Type::Effect; } inline static bool IsSupportedShader(const RE::BSShader& shader) @@ -229,7 +230,10 @@ namespace SIE VC = 1 << 0, Skinned = 1 << 1, ModelSpaceNormals = 1 << 2, - // flags 3 to 8 are unused + // flags 3 to 8 are unused by vanilla + // Community Shaders start + Deferred = 1 << 4, + // Community Shaders end Specular = 1 << 9, SoftLighting = 1 << 10, RimLighting = 1 << 11, @@ -244,7 +248,7 @@ namespace SIE DoAlphaTest = 1 << 20, Snow = 1 << 21, CharacterLight = 1 << 22, - AdditionalAlphaMask = 1 << 23, + AdditionalAlphaMask = 1 << 23 }; enum class BloodSplatterShaderTechniques @@ -261,7 +265,8 @@ namespace SIE enum class DistantTreeShaderFlags { - AlphaTest = 0x10000, + Deferred = 1 << 8, + AlphaTest = 1 << 16, }; enum class SkyShaderTechniques @@ -348,6 +353,7 @@ namespace SIE SkyObject = 1 << 24, MsnSpuSkinned = 1 << 25, MotionVectorsNormals = 1 << 26, + Deferred = 1 << 27 }; enum class UtilityShaderFlags : uint64_t diff --git a/src/State.cpp b/src/State.cpp index 8bef4761b..8b831bd91 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -9,43 +9,50 @@ #include "Feature.h" #include "Util.h" +#include "Deferred.h" #include "Features/TerrainBlending.h" +#include "VariableRateShading.h" + void State::Draw() { - auto& shaderCache = SIE::ShaderCache::Instance(); - if (shaderCache.IsEnabled() && currentShader && updateShader) { + Deferred::GetSingleton()->UpdatePerms(); + if (currentShader && updateShader) { auto type = currentShader->shaderType.get(); - if (type > 0 && type < RE::BSShader::Type::Total) { - if (enabledClasses[type - 1]) { - ModifyShaderLookup(*currentShader, currentVertexDescriptor, currentPixelDescriptor); - UpdateSharedData(currentShader, currentPixelDescriptor); - - static RE::BSGraphics::VertexShader* vertexShader = nullptr; - static RE::BSGraphics::PixelShader* pixelShader = nullptr; - - vertexShader = shaderCache.GetVertexShader(*currentShader, currentVertexDescriptor); - pixelShader = shaderCache.GetPixelShader(*currentShader, currentPixelDescriptor); - - if (vertexShader && pixelShader) { - context->VSSetShader(reinterpret_cast(vertexShader->shader), NULL, NULL); - context->PSSetShader(reinterpret_cast(pixelShader->shader), NULL, NULL); - } + VariableRateShading::GetSingleton()->UpdateViews(type != RE::BSShader::Type::ImageSpace && type != RE::BSShader::Type::Sky && type != RE::BSShader::Type::Water); + auto& shaderCache = SIE::ShaderCache::Instance(); + if (shaderCache.IsEnabled()) { + if (type > 0 && type < RE::BSShader::Type::Total) { + if (enabledClasses[type - 1]) { + ModifyShaderLookup(*currentShader, currentVertexDescriptor, currentPixelDescriptor); + UpdateSharedData(currentShader, currentPixelDescriptor); + + static RE::BSGraphics::VertexShader* vertexShader = nullptr; + static RE::BSGraphics::PixelShader* pixelShader = nullptr; + + vertexShader = shaderCache.GetVertexShader(*currentShader, currentVertexDescriptor); + pixelShader = shaderCache.GetPixelShader(*currentShader, currentPixelDescriptor); + + if (vertexShader && pixelShader) { + context->VSSetShader(reinterpret_cast(vertexShader->shader), NULL, NULL); + context->PSSetShader(reinterpret_cast(pixelShader->shader), NULL, NULL); + } - BeginPerfEvent(std::format("Draw: CommunityShaders {}::{}", magic_enum::enum_name(currentShader->shaderType.get()), currentPixelDescriptor)); - if (IsDeveloperMode()) { - SetPerfMarker(std::format("Defines: {}", SIE::ShaderCache::GetDefinesString(currentShader->shaderType.get(), currentPixelDescriptor))); - } + BeginPerfEvent(std::format("Draw: CommunityShaders {}::{}", magic_enum::enum_name(currentShader->shaderType.get()), currentPixelDescriptor)); + if (IsDeveloperMode()) { + SetPerfMarker(std::format("Defines: {}", SIE::ShaderCache::GetDefinesString(currentShader->shaderType.get(), currentPixelDescriptor))); + } - if (vertexShader && pixelShader) { - for (auto* feature : Feature::GetFeatureList()) { - if (feature->loaded) { - auto hasShaderDefine = feature->HasShaderDefine(currentShader->shaderType.get()); - if (hasShaderDefine) - BeginPerfEvent(feature->GetShortName()); - feature->Draw(currentShader, currentPixelDescriptor); - if (hasShaderDefine) - EndPerfEvent(); + if (vertexShader && pixelShader) { + for (auto* feature : Feature::GetFeatureList()) { + if (feature->loaded) { + auto hasShaderDefine = feature->HasShaderDefine(currentShader->shaderType.get()); + if (hasShaderDefine) + BeginPerfEvent(feature->GetShortName()); + feature->Draw(currentShader, currentPixelDescriptor); + if (hasShaderDefine) + EndPerfEvent(); + } } } } @@ -168,9 +175,10 @@ void State::Reset() for (auto* feature : Feature::GetFeatureList()) if (feature->loaded) feature->Reset(); - Bindings::GetSingleton()->Reset(); + Deferred::GetSingleton()->Reset(); if (!RE::UI::GetSingleton()->GameIsPaused()) timer += RE::GetSecondsSinceLastFrame(); + VariableRateShading::GetSingleton()->UpdateVRS(); } void State::Setup() @@ -179,7 +187,8 @@ void State::Setup() for (auto* feature : Feature::GetFeatureList()) if (feature->loaded) feature->SetupResources(); - //Bindings::GetSingleton()->SetupResources(); + Deferred::GetSingleton()->SetupResources(); + VariableRateShading::GetSingleton()->Setup(); } static const std::string& GetConfigPath(State::ConfigMode a_configMode) @@ -334,6 +343,7 @@ void State::PostPostLoad() logger::info("Skyrim Upscaler detected"); else logger::info("Skyrim Upscaler not detected"); + Deferred::Hooks::Install(); } bool State::ValidateCache(CSimpleIniA& a_ini) @@ -461,7 +471,7 @@ void State::SetupResources() void State::ModifyShaderLookup(const RE::BSShader& a_shader, uint& a_vertexDescriptor, uint& a_pixelDescriptor) { - if (a_shader.shaderType.get() == RE::BSShader::Type::Lighting || a_shader.shaderType.get() == RE::BSShader::Type::Water || a_shader.shaderType.get() == RE::BSShader::Type::Effect) { + if (a_shader.shaderType.get() == RE::BSShader::Type::Lighting || a_shader.shaderType.get() == RE::BSShader::Type::Water || a_shader.shaderType.get() == RE::BSShader::Type::Effect || a_shader.shaderType.get() == RE::BSShader::Type::DistantTree) { if (a_vertexDescriptor != lastVertexDescriptor || a_pixelDescriptor != lastPixelDescriptor) { PerShader data{}; data.VertexShaderDescriptor = a_vertexDescriptor; @@ -505,6 +515,9 @@ void State::ModifyShaderLookup(const RE::BSShader& a_shader, uint& a_vertexDescr if (vr || !enableImprovedSnow->GetBool()) a_pixelDescriptor &= ~((uint32_t)SIE::ShaderCache::LightingShaderFlags::Snow); + if (Deferred::GetSingleton()->deferredPass) + a_pixelDescriptor |= (uint32_t)SIE::ShaderCache::LightingShaderFlags::Deferred; + { uint32_t technique = 0x3F & (a_vertexDescriptor >> 24); if (technique == (uint32_t)SIE::ShaderCache::LightingShaderTechniques::Glowmap || @@ -546,6 +559,15 @@ void State::ModifyShaderLookup(const RE::BSShader& a_shader, uint& a_vertexDescr a_pixelDescriptor &= ~((uint32_t)SIE::ShaderCache::EffectShaderFlags::GrayscaleToColor | (uint32_t)SIE::ShaderCache::EffectShaderFlags::GrayscaleToAlpha | (uint32_t)SIE::ShaderCache::EffectShaderFlags::IgnoreTexAlpha); + + if (Deferred::GetSingleton()->deferredPass) + a_pixelDescriptor |= (uint32_t)SIE::ShaderCache::EffectShaderFlags::Deferred; + } + break; + case RE::BSShader::Type::DistantTree: + { + if (Deferred::GetSingleton()->deferredPass) + a_pixelDescriptor |= (uint32_t)SIE::ShaderCache::DistantTreeShaderFlags::Deferred; } break; } @@ -575,9 +597,9 @@ void State::UpdateSharedData(const RE::BSShader* a_shader, const uint32_t) if (a_shader->shaderType.get() == RE::BSShader::Type::Lighting) { bool updateBuffer = false; - bool currentReflections = (!REL::Module::IsVR() ? - shadowState->GetRuntimeData().cubeMapRenderTarget : - shadowState->GetVRRuntimeData().cubeMapRenderTarget) == RE::RENDER_TARGETS_CUBEMAP::kREFLECTIONS; + GET_INSTANCE_MEMBER(cubeMapRenderTarget, shadowState); + + bool currentReflections = cubeMapRenderTarget == RE::RENDER_TARGETS_CUBEMAP::kREFLECTIONS; if (lightingData.Reflections != (uint)currentReflections) { updateBuffer = true; diff --git a/src/Util.cpp b/src/Util.cpp index df18a3843..93e9f4cce 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -163,6 +163,10 @@ namespace Util std::transform(path.begin(), path.end(), std::back_inserter(str), [](wchar_t c) { return (char)c; }); + if (!std::filesystem::exists(FilePath)) { + logger::error("Failed to compile shader; {} does not exist", str); + return nullptr; + } logger::debug("Compiling {} with {}", str, DefinesToString(macros)); if (FAILED(D3DCompileFromFile(FilePath, macros.data(), D3D_COMPILE_STANDARD_FILE_INCLUDE, Program, ProgramType, flags, 0, &shaderBlob, &shaderErrors))) { logger::warn("Shader compilation failed:\n\n{}", shaderErrors ? (const char*)shaderErrors->GetBufferPointer() : "Unknown error"); diff --git a/src/VariableRateShading.cpp b/src/VariableRateShading.cpp new file mode 100644 index 000000000..ab7af80c3 --- /dev/null +++ b/src/VariableRateShading.cpp @@ -0,0 +1,347 @@ +#include "VariableRateShading.h" +#include "State.h" +#include + +HMODULE hNVAPI_DLL; + +void VariableRateShading::ClearShaderCache() +{ + if (computeNASDataCS) { + computeNASDataCS->Release(); + computeNASDataCS = nullptr; + } + if (computeShadingRateCS) { + computeShadingRateCS->Release(); + computeShadingRateCS = nullptr; + } +} + +ID3D11ComputeShader* VariableRateShading::GetComputeNASData() +{ + if (!computeNASDataCS) { + logger::debug("Compiling ComputeNASData"); + computeNASDataCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\VariableRateShading\\ComputeNASData.hlsl", {}, "cs_5_0"); + } + return computeNASDataCS; +} + +ID3D11ComputeShader* VariableRateShading::GetComputeShadingRate() +{ + if (!computeShadingRateCS) { + logger::debug("Compiling ComputeShadingRate"); + computeShadingRateCS = (ID3D11ComputeShader*)Util::CompileShader(L"Data\\Shaders\\VariableRateShading\\ComputeShadingRate.hlsl", {}, "cs_5_0"); + } + return computeShadingRateCS; +} + +void VariableRateShading::DrawSettings() +{ + ImGui::Checkbox("Enable Variable Rate Shading", &enableVRS); + + ImGui::Spacing(); +} + +void VariableRateShading::UpdateVRS() +{ + if (!vrsActive) + return; + + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto& context = State::GetSingleton()->context; + + ID3D11RenderTargetView* views[8]; + ID3D11DepthStencilView* dsv; + context->OMGetRenderTargets(8, views, &dsv); + + ID3D11RenderTargetView* nullViews[8] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }; + ID3D11DepthStencilView* nullDsv = nullptr; + context->OMSetRenderTargets(8, nullViews, nullDsv); + + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kMAIN]; + auto& motionVectors = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGET::kMOTION_VECTOR]; + + ID3D11ShaderResourceView* srvs[2]{ + main.SRV, + motionVectors.SRV + }; + + context->CSSetShaderResources(0, 2, srvs); + + ID3D11UnorderedAccessView* uavs[1]{ reductionData->uav.get() }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + auto shader = GetComputeNASData(); + context->CSSetShader(shader, nullptr, 0); + + context->Dispatch(reductionData->desc.Width, reductionData->desc.Height, 1); + + srvs[0] = nullptr; + srvs[1] = nullptr; + context->CSSetShaderResources(0, 1, srvs); + + uavs[0] = nullptr; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + context->CSSetShader(nullptr, nullptr, 0); + + ComputeShadingRate(); + + context->OMSetRenderTargets(8, views, dsv); + + for (int i = 0; i < 8; i++) { + if (views[i]) + views[i]->Release(); + } + + if (dsv) + dsv->Release(); +} + +void VariableRateShading::ComputeShadingRate() +{ + auto& context = State::GetSingleton()->context; + + ID3D11ShaderResourceView* srvs[1]{ + reductionData->srv.get() + }; + + context->CSSetShaderResources(0, 1, srvs); + + ID3D11UnorderedAccessView* uavs[1]{ singleEyeVRSUAV[0] }; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + auto shader = GetComputeShadingRate(); + context->CSSetShader(shader, nullptr, 0); + + float resolutionX = (float)reductionData->desc.Width; + float resolutionY = (float)reductionData->desc.Height; + + uint32_t dispatchX = (uint32_t)std::ceil(resolutionX / 32.0f); + uint32_t dispatchY = (uint32_t)std::ceil(resolutionY / 32.0f); + + context->Dispatch(dispatchX, dispatchY, 1); + + srvs[0] = nullptr; + context->CSSetShaderResources(0, 1, srvs); + + uavs[0] = nullptr; + context->CSSetUnorderedAccessViews(0, 1, uavs, nullptr); + + context->CSSetShader(nullptr, nullptr, 0); +} + +std::vector CreateSingleEyeFixedFoveatedVRSPattern(int width, int height) +{ + std::vector data(width * height); + + enum class ShadingRate + { + k1x1, + k2x2dir, + k2x2, + k4x4dir, + k4x4, + }; + + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + data[y * width + x] = 0; + } + } + + return data; +} + +void VariableRateShading::Setup() +{ + auto renderer = RE::BSGraphics::Renderer::GetSingleton(); + auto& device = State::GetSingleton()->device; + + logger::info("Trying to load NVAPI..."); + + hNVAPI_DLL = LoadLibraryA("nvapi64.dll"); + + if (!hNVAPI_DLL) + return; + + NvAPI_Status result = NvAPI_Initialize(); + if (result != NVAPI_OK) { + return; + } + + NV_D3D1x_GRAPHICS_CAPS caps; + memset(&caps, 0, sizeof(NV_D3D1x_GRAPHICS_CAPS)); + NvAPI_Status status = NvAPI_D3D1x_GetGraphicsCapabilities(device, NV_D3D1x_GRAPHICS_CAPS_VER, &caps); + if (status != NVAPI_OK || !caps.bVariablePixelRateShadingSupported) { + logger::info("Variable rate shading is not available."); + return; + } + + vrsActive = true; + logger::info("Successfully initialized NVAPI; Variable Rate Shading is available."); + + auto width = State::GetSingleton()->screenWidth; + auto height = State::GetSingleton()->screenHeight; + SetupSingleEyeVRS(0, (int)width, (int)height); + + { + auto& main = renderer->GetRuntimeData().renderTargets[RE::RENDER_TARGETS::kMAIN]; + + D3D11_TEXTURE2D_DESC texDesc{}; + D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + + main.texture->GetDesc(&texDesc); + main.SRV->GetDesc(&srvDesc); + main.UAV->GetDesc(&uavDesc); + + texDesc.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + srvDesc.Format = texDesc.Format; + uavDesc.Format = texDesc.Format; + + texDesc.Width /= 16; + texDesc.Height /= 16; + + reductionData = new Texture2D(texDesc); + reductionData->CreateSRV(srvDesc); + reductionData->CreateUAV(uavDesc); + } + + { + for (uint i = 0; i < 114; i++) { + auto& target = renderer->GetRuntimeData().renderTargets[i]; + if (target.texture) { + D3D11_TEXTURE2D_DESC texDesc{}; + target.texture->GetDesc(&texDesc); + + if (texDesc.Width == width && texDesc.Height == height) { + screenTargets.insert(i); + } + } + } + } +} + +void VariableRateShading::SetupSingleEyeVRS(int eye, int width, int height) +{ + auto& device = State::GetSingleton()->device; + + int vrsWidth = width / NV_VARIABLE_PIXEL_SHADING_TILE_WIDTH; + int vrsHeight = height / NV_VARIABLE_PIXEL_SHADING_TILE_HEIGHT; + + logger::info("Creating VRS pattern texture for eye"); + + D3D11_TEXTURE2D_DESC texDesc = {}; + texDesc.Width = vrsWidth; + texDesc.Height = vrsHeight; + texDesc.ArraySize = 1; + texDesc.Format = DXGI_FORMAT_R8_UINT; + texDesc.SampleDesc.Count = 1; + texDesc.SampleDesc.Quality = 0; + texDesc.Usage = D3D11_USAGE_DEFAULT; + texDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + texDesc.CPUAccessFlags = 0; + texDesc.MiscFlags = 0; + texDesc.MipLevels = 1; + auto data = CreateSingleEyeFixedFoveatedVRSPattern(vrsWidth, vrsHeight); + D3D11_SUBRESOURCE_DATA subresourceData; + subresourceData.pSysMem = data.data(); + subresourceData.SysMemPitch = vrsWidth; + subresourceData.SysMemSlicePitch = 0; + DX::ThrowIfFailed(device->CreateTexture2D(&texDesc, &subresourceData, &singleEyeVRSTex[eye])); + + D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = texDesc.Format; + uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = 0; + DX::ThrowIfFailed(device->CreateUnorderedAccessView(singleEyeVRSTex[eye], &uavDesc, &singleEyeVRSUAV[eye])); + + logger::info("Creating shading rate resource view for eye"); + NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC vd = {}; + vd.version = NV_D3D11_SHADING_RATE_RESOURCE_VIEW_DESC_VER; + vd.Format = texDesc.Format; + vd.ViewDimension = NV_SRRV_DIMENSION_TEXTURE2D; + vd.Texture2D.MipSlice = 0; + NvAPI_Status status = NvAPI_D3D11_CreateShadingRateResourceView(device, singleEyeVRSTex[eye], &vd, &singleEyeVRSView[eye]); + if (status != NVAPI_OK) { + logger::info("Failed to create VRS pattern view for eye"); + return; + } +} + +void VariableRateShading::UpdateViews(bool a_enable) +{ + if (!vrsActive) + return; + + bool interior = false; + if (auto player = RE::PlayerCharacter::GetSingleton()) { + if (auto cell = player->GetParentCell()) { + if (cell->IsInteriorCell()) { + interior = true; + } + } + } + auto& context = State::GetSingleton()->context; + + auto state = RE::BSGraphics::RendererShadowState::GetSingleton(); + + GET_INSTANCE_MEMBER(renderTargets, state); + GET_INSTANCE_MEMBER(depthStencil, state); + + vrsPass = screenTargets.contains(renderTargets[0]); + + if (depthStencil == RE::RENDER_TARGETS_DEPTHSTENCIL::kMAIN || depthStencil == RE::RENDER_TARGETS_DEPTHSTENCIL::kPOST_ZPREPASS_COPY) { + vrsPass = true; + } + + vrsPass = enableVRS && vrsPass && a_enable && !RE::UI::GetSingleton()->GameIsPaused() && interior; + + static bool currentVRS = false; + + if (currentVRS == vrsPass) + return; + + currentVRS = vrsPass; + + if (vrsPass) { + ID3D11NvShadingRateResourceView* shadingRateView = singleEyeVRSView[0]; + + NvAPI_Status statusSRRV = NvAPI_D3D11_RSSetShadingRateResourceView(context, shadingRateView); + if (statusSRRV != NVAPI_OK) + logger::info("Setting the shading rate resource view failed"); + } + + NV_D3D11_VIEWPORT_SHADING_RATE_DESC vsrd[8]; + for (uint i = 0; i < 8; i++) { + vsrd[i].enableVariablePixelShadingRate = false; + memset(vsrd[i].shadingRateTable, NV_PIXEL_X0_CULL_RASTER_PIXELS, sizeof(vsrd[0].shadingRateTable)); + } + + uint viewportCount = 8; + + if (vrsPass) { + for (uint i = 0; i < viewportCount; i++) { + vsrd[i].enableVariablePixelShadingRate = true; + memset(vsrd[i].shadingRateTable, NV_PIXEL_X1_PER_RASTER_PIXEL, sizeof(vsrd[i].shadingRateTable)); + vsrd[i].shadingRateTable[0] = NV_PIXEL_X1_PER_RASTER_PIXEL; + vsrd[i].shadingRateTable[1] = NV_PIXEL_X1_PER_2X1_RASTER_PIXELS; + vsrd[i].shadingRateTable[2] = NV_PIXEL_X1_PER_1X2_RASTER_PIXELS; + vsrd[i].shadingRateTable[3] = NV_PIXEL_X1_PER_2X2_RASTER_PIXELS; + vsrd[i].shadingRateTable[4] = NV_PIXEL_X1_PER_4X2_RASTER_PIXELS; + vsrd[i].shadingRateTable[5] = NV_PIXEL_X1_PER_2X4_RASTER_PIXELS; + vsrd[i].shadingRateTable[6] = NV_PIXEL_X1_PER_4X4_RASTER_PIXELS; + } + } + + NV_D3D11_VIEWPORTS_SHADING_RATE_DESC srd; + srd.version = NV_D3D11_VIEWPORTS_SHADING_RATE_DESC_VER; + srd.numViewports = viewportCount; + srd.pViewports = vsrd; + + { + NvAPI_Status statusVPSR = NvAPI_D3D11_RSSetViewportsPixelShadingRates(context, &srd); + if (statusVPSR != NVAPI_OK) + logger::info("Setting the viewport pixel shading rate failed"); + } +} \ No newline at end of file diff --git a/src/VariableRateShading.h b/src/VariableRateShading.h new file mode 100644 index 000000000..249568cc4 --- /dev/null +++ b/src/VariableRateShading.h @@ -0,0 +1,42 @@ +#pragma once + +#include "Buffer.h" +#include + +class VariableRateShading +{ +public: + static VariableRateShading* GetSingleton() + { + static VariableRateShading singleton; + return &singleton; + } + + bool enableVRS = true; + + bool nvapiLoaded = false; + bool vrsActive = false; + bool vrsPass = false; + ID3D11Texture2D* singleEyeVRSTex[2]; + ID3D11UnorderedAccessView* singleEyeVRSUAV[2]; + ID3D11NvShadingRateResourceView* singleEyeVRSView[2]; + + std::unordered_set screenTargets; + + Texture2D* reductionData = nullptr; + ID3D11ComputeShader* computeNASDataCS = nullptr; + ID3D11ComputeShader* computeShadingRateCS = nullptr; + + void ClearShaderCache(); + ID3D11ComputeShader* GetComputeNASData(); + ID3D11ComputeShader* GetComputeShadingRate(); + + void DrawSettings(); + + void UpdateVRS(); + void ComputeShadingRate(); + + void Setup(); + void SetupSingleEyeVRS(int eye, int width, int height); + void UpdateViews(bool a_enable); +}; \ No newline at end of file