doitsujin · misyltoad · Sep 26, 2022 · Sep 26, 2022 · Sep 26, 2022 · Sep 26, 2022
diff --git a/dxvk.conf b/dxvk.conf
@@ -451,6 +451,15 @@
 
 # d3d9.supportDFFormats = True
 
+# Use D32f for D24
+#
+# Useful for reproducing AMD issues on other hw.
+#
+# Supported values:
+# - True/False
+
+# d3d9.useD32forD24 = False
+
 # Support X4R4G4B4
 #
 # Support the X4R4G4B4 format.

diff --git a/src/d3d9/d3d9_common_texture.cpp b/src/d3d9/d3d9_common_texture.cpp
@@ -40,6 +40,8 @@ namespace dxvk {
 
     m_mapMode        = DetermineMapMode();
     m_shadow         = DetermineShadowState();
+    m_upgradedToD32f = ConvertFormatUnfixed(m_desc.Format).FormatColor != VK_FORMAT_D32_SFLOAT_S8_UINT &&
+                       m_mapping.FormatColor == VK_FORMAT_D32_SFLOAT_S8_UINT;
     m_supportsFetch4 = DetermineFetch4Compatibility();
 
     const bool createImage = m_desc.Pool != D3DPOOL_SYSTEMMEM && m_desc.Pool != D3DPOOL_SCRATCH && m_desc.Format != D3D9Format::NULL_FORMAT;

diff --git a/src/d3d9/d3d9_common_texture.h b/src/d3d9/d3d9_common_texture.h
@@ -203,6 +203,14 @@ namespace dxvk {
       return m_shadow;
     }
 
+    /**
+     * \brief Dref Clamp
+     * \returns Whether the texture emulates an UNORM format with D32f
+     */
+    bool IsUpgradedToD32f() const {
+      return m_upgradedToD32f;
+    }
+
     /**
      * \brief FETCH4 compatibility
      * \returns Whether the format of the texture supports the FETCH4 hack
@@ -499,6 +507,7 @@ namespace dxvk {
     D3D9_VK_FORMAT_MAPPING        m_mapping;
 
     bool                          m_shadow; //< Depth Compare-ness
+    bool                          m_upgradedToD32f; // Dref Clamp
     bool                          m_supportsFetch4;
 
     int64_t                       m_size = 0;

diff --git a/src/d3d9/d3d9_device.cpp b/src/d3d9/d3d9_device.cpp
@@ -3791,6 +3791,9 @@ namespace dxvk {
         m_dirtySamplerStates |= 1u << StateSampler;
       }
 
+      m_drefClamp &= ~(1u << StateSampler);
+      m_drefClamp |= uint32_t(newTexture->IsUpgradedToD32f()) << StateSampler;
+
       const bool oldCube = m_cubeTextures & (1u << StateSampler);
       const bool newCube = newTexture->GetType() == D3DRTYPE_CUBETEXTURE;
       if (oldCube != newCube) {
@@ -6249,7 +6252,8 @@ namespace dxvk {
 
     const uint32_t nullTextureMask = usedSamplerMask & ~usedTextureMask;
     const uint32_t depthTextureMask = m_depthTextures & usedTextureMask;
-    UpdateCommonSamplerSpec(nullTextureMask, depthTextureMask);
+    const uint32_t drefClampMask = m_drefClamp & depthTextureMask;
+    UpdateCommonSamplerSpec(nullTextureMask, depthTextureMask, drefClampMask);
 
     if (m_flags.test(D3D9DeviceFlag::DirtySharedPixelShaderData)) {
       m_flags.clr(D3D9DeviceFlag::DirtySharedPixelShaderData);
@@ -7249,7 +7253,7 @@ namespace dxvk {
     UpdatePixelShaderSamplerSpec(0u, 0u, 0u);
     UpdateVertexBoolSpec(0u);
     UpdatePixelBoolSpec(0u);
-    UpdateCommonSamplerSpec(0u, 0u);
+    UpdateCommonSamplerSpec(0u, 0u, 0u);
 
     return D3D_OK;
   }
@@ -7443,9 +7447,10 @@ namespace dxvk {
   }
 
 
-  void D3D9DeviceEx::UpdateCommonSamplerSpec(uint32_t nullMask, uint32_t depthMask) {
+  void D3D9DeviceEx::UpdateCommonSamplerSpec(uint32_t nullMask, uint32_t depthMask, uint32_t drefMask) {
     bool dirty  = m_specInfo.set<SpecSamplerDepthMode>(depthMask);
          dirty |= m_specInfo.set<SpecSamplerNull>(nullMask);
+         dirty |= m_specInfo.set<SpecDrefClamp>(drefMask);
 
     if (dirty)
       m_flags.set(D3D9DeviceFlag::DirtySpecializationEntries);

diff --git a/src/d3d9/d3d9_device.h b/src/d3d9/d3d9_device.h
@@ -1145,7 +1145,7 @@ namespace dxvk {
     void UpdateVertexBoolSpec(uint32_t value);
     void UpdatePixelBoolSpec(uint32_t value);
     void UpdatePixelShaderSamplerSpec(uint32_t types, uint32_t projections, uint32_t fetch4);
-    void UpdateCommonSamplerSpec(uint32_t boundMask, uint32_t depthMask);
+    void UpdateCommonSamplerSpec(uint32_t boundMask, uint32_t depthMask, uint32_t drefMask);
     void UpdatePointModeSpec(uint32_t mode);
     void UpdateFogModeSpec(bool fogEnabled, D3DFOGMODE vertexFogMode, D3DFOGMODE pixelFogMode);
 
@@ -1237,6 +1237,7 @@ namespace dxvk {
     uint32_t                        m_instancedData = 0;
 
     uint32_t                        m_depthTextures = 0;
+    uint32_t                        m_drefClamp = 0;
     uint32_t                        m_cubeTextures = 0;
     uint32_t                        m_textureTypes = 0;
     uint32_t                        m_projectionBitfield  = 0;

diff --git a/src/d3d9/d3d9_format.cpp b/src/d3d9/d3d9_format.cpp
@@ -434,7 +434,8 @@ namespace dxvk {
 
     // AMD do not support 24-bit depth buffers on Vulkan,
     // so we have to fall back to a 32-bit depth format.
-    m_d24s8Support = CheckImageFormatSupport(adapter, VK_FORMAT_D24_UNORM_S8_UINT,
+    m_d24s8Support = !options.useD32forD24 &&
+                     CheckImageFormatSupport(adapter, VK_FORMAT_D24_UNORM_S8_UINT,
       VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT |
       VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT);
 
@@ -444,7 +445,6 @@ namespace dxvk {
       VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT |
       VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT);
 
-    // VK_EXT_4444_formats
     if (!m_d24s8Support)
       Logger::info("D3D9: VK_FORMAT_D24_UNORM_S8_UINT -> VK_FORMAT_D32_SFLOAT_S8_UINT");
 

diff --git a/src/d3d9/d3d9_options.cpp b/src/d3d9/d3d9_options.cpp
@@ -55,6 +55,7 @@ namespace dxvk {
     this->supportDFFormats              = config.getOption<bool>        ("d3d9.supportDFFormats",              true);
     this->supportX4R4G4B4               = config.getOption<bool>        ("d3d9.supportX4R4G4B4",               true);
     this->supportD32                    = config.getOption<bool>        ("d3d9.supportD32",                    true);
+    this->useD32forD24                  = config.getOption<bool>        ("d3d9.useD32forD24",                  false);
     this->disableA8RT                   = config.getOption<bool>        ("d3d9.disableA8RT",                   false);
     this->invariantPosition             = config.getOption<bool>        ("d3d9.invariantPosition",             true);
     this->memoryTrackTest               = config.getOption<bool>        ("d3d9.memoryTrackTest",               false);

diff --git a/src/d3d9/d3d9_options.h b/src/d3d9/d3d9_options.h
@@ -91,6 +91,9 @@ namespace dxvk {
     /// Support D32
     bool supportD32;
 
+    /// Use D32f for D24
+    bool useD32forD24;
+
     /// Disable D3DFMT_A8 for render targets.
     /// Specifically to work around a game
     /// bug in The Sims 2 that happens on native too!

diff --git a/src/d3d9/d3d9_spec_constants.h b/src/d3d9/d3d9_spec_constants.h
@@ -27,6 +27,7 @@ namespace dxvk {
     SpecVertexShaderBools,  // 16 bools                       | Bits: 16
     SpecPixelShaderBools,   // 16 bools                       | Bits: 16
 
+    SpecDrefClamp,          // 1 bit for 16 PS samplers       | Bits: 16
     SpecFetch4,             // 1 bit for 16 PS samplers       | Bits: 16
 
     SpecConstantCount,
@@ -62,7 +63,8 @@ namespace dxvk {
       { 3, 0,  16 }, // VertexShaderBools
       { 3, 16, 16 }, // PixelShaderBools
 
-      { 4, 0,  16 }, // Fetch4
+      { 4, 0,  16 }, // DrefClamp
+      { 4, 16, 16 }, // Fetch4
     }};
 
     template <D3D9SpecConstantId Id, typename T>

diff --git a/src/dxso/dxso_compiler.cpp b/src/dxso/dxso_compiler.cpp
@@ -2648,10 +2648,30 @@ void DxsoCompiler::emitControlFlowGenericLoop(
     DxsoRegMask vec3Mask(true, true, true,  false);
     DxsoRegMask srcMask (true, true, true,  true);
 
-    auto GetProjectionValue = [&]() {
+    auto DoProjection = [&](DxsoRegisterValue coord, bool switchProjRes) {
+      uint32_t bool_t = m_module.defBoolType();
+      uint32_t texcoord_t = getVectorTypeId(coord.type);
+
       uint32_t w = 3;
-      return m_module.opCompositeExtract(
-        m_module.defFloatType(32), texcoordVar.id, 1, &w);
+
+      uint32_t projScalar = m_module.opCompositeExtract(
+        m_module.defFloatType(32), coord.id, 1, &w);
+
+      projScalar = m_module.opFDiv(m_module.defFloatType(32), m_module.constf32(1.0), projScalar);
+      uint32_t projResult = m_module.opVectorTimesScalar(texcoord_t, coord.id, projScalar);
+
+      if (switchProjRes) {
+        uint32_t shouldProj = m_spec.get(m_module, m_specUbo, SpecProjectionType, samplerIdx, 1);
+        shouldProj = m_module.opINotEqual(bool_t, shouldProj, m_module.constu32(0));
+
+        uint32_t bvec4_t = m_module.defVectorType(bool_t, 4);
+        std::array<uint32_t, 4> indices = { shouldProj, shouldProj, shouldProj, shouldProj };
+        shouldProj = m_module.opCompositeConstruct(bvec4_t, indices.size(), indices.data());
+
+        return m_module.opSelect(texcoord_t, shouldProj, projResult, coord.id);
+      } else {
+        return projResult;
+      }
     };
 
     if (opcode == DxsoOpcode::TexM3x2Tex || opcode == DxsoOpcode::TexM3x3Tex || opcode == DxsoOpcode::TexM3x3Spec || opcode == DxsoOpcode::TexM3x3VSpec) {
@@ -2711,23 +2731,9 @@ void DxsoCompiler::emitControlFlowGenericLoop(
       texcoordVar = m;
       samplerIdx = ctx.dst.id.num;
 
-      uint32_t texcoord_t = getVectorTypeId(texcoordVar.type);
-
       // The projection (/.w) happens before this...
       // Of course it does...
-      uint32_t bool_t = m_module.defBoolType();
-
-      uint32_t shouldProj = m_spec.get(m_module, m_specUbo, SpecProjectionType, samplerIdx, 1);
-      shouldProj = m_module.opINotEqual(bool_t, shouldProj, m_module.constu32(0));
-
-      uint32_t bvec4_t = m_module.defVectorType(bool_t, 4);
-      std::array<uint32_t, 4> indices = { shouldProj, shouldProj, shouldProj, shouldProj };
-      shouldProj = m_module.opCompositeConstruct(bvec4_t, indices.size(), indices.data());
-
-      uint32_t projScalar = m_module.opFDiv(m_module.defFloatType(32), m_module.constf32(1.0), GetProjectionValue());
-      uint32_t projResult = m_module.opVectorTimesScalar(texcoord_t, texcoordVar.id, projScalar);
-
-      texcoordVar.id = m_module.opSelect(texcoord_t, shouldProj, projResult, texcoordVar.id);
+      texcoordVar.id = DoProjection(texcoordVar, true);
 
       // u' = tc(m).x + [bm00(m) * t(n).x + bm10(m) * t(n).y]
       // v' = tc(m).y + [bm01(m) * t(n).x + bm11(m) * t(n).y]
@@ -2811,7 +2817,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(
 
     DxsoSampler sampler = m_samplers.at(samplerIdx);
 
-    auto SampleImage = [this, opcode, dst, ctx, samplerIdx, GetProjectionValue](DxsoRegisterValue texcoordVar, DxsoSamplerInfo& sampler, bool depth, DxsoSamplerType samplerType, uint32_t isNull) {
+    auto SampleImage = [this, opcode, dst, ctx, samplerIdx, DoProjection](DxsoRegisterValue texcoordVar, DxsoSamplerInfo& sampler, bool depth, DxsoSamplerType samplerType, uint32_t isNull) {
       DxsoRegisterValue result;
       result.type.ctype  = dst.type.ctype;
       result.type.ccount = depth ? 1 : 4;
@@ -2838,12 +2844,10 @@ void DxsoCompiler::emitControlFlowGenericLoop(
         imageOperands.sGradY = emitRegisterLoad(ctx.src[3], gradMask).id;
       }
 
-      uint32_t projDivider = 0;
-
       if (opcode == DxsoOpcode::Tex
         && m_programInfo.majorVersion() >= 2) {
         if (ctx.instruction.specificData.texld == DxsoTexLdMode::Project) {
-          projDivider = GetProjectionValue();
+          texcoordVar.id = DoProjection(texcoordVar, false);
         }
         else if (ctx.instruction.specificData.texld == DxsoTexLdMode::Bias) {
           uint32_t w = 3;
@@ -2853,37 +2857,29 @@ void DxsoCompiler::emitControlFlowGenericLoop(
         }
       }
 
-      bool switchProjResult = m_programInfo.majorVersion() < 2 && samplerType != SamplerTypeTextureCube;
-
-      if (switchProjResult)
-        projDivider = GetProjectionValue();
-
-      // We already handled this...
-      if (opcode == DxsoOpcode::TexBem) {
-        switchProjResult = false;
-        projDivider = 0;
+      // We already handled this for TexBem(L)
+      if (m_programInfo.majorVersion() < 2 && samplerType != SamplerTypeTextureCube && opcode != DxsoOpcode::TexBem && opcode != DxsoOpcode::TexBemL) {
+        texcoordVar.id = DoProjection(texcoordVar, true);
       }
 
-      uint32_t reference = 0;
+      uint32_t bool_t = m_module.defBoolType();
 
+      uint32_t reference = 0;
       if (depth) {
+        uint32_t fType = m_module.defFloatType(32);
         uint32_t component = sampler.dimensions;
         reference = m_module.opCompositeExtract(
-          m_module.defFloatType(32), texcoordVar.id, 1, &component);
-      }
-
-      if (projDivider != 0) {
-        for (uint32_t i = sampler.dimensions; i < 4; i++) {
-          texcoordVar.id = m_module.opCompositeInsert(getVectorTypeId(texcoordVar.type),
-            projDivider, texcoordVar.id, 1, &i);
-        }
+          fType, texcoordVar.id, 1, &component);
+        uint32_t clampDref = m_spec.get(m_module, m_specUbo, SpecDrefClamp, samplerIdx, 1);
+        clampDref = m_module.opINotEqual(bool_t, clampDref, m_module.constu32(0));
+        uint32_t clampedDref = m_module.opFClamp(fType, reference, m_module.constf32(0.0f), m_module.constf32(1.0f));
+        reference = m_module.opSelect(fType, clampDref, clampedDref, reference);
       }
 
       uint32_t fetch4 = 0;
       if (m_programInfo.type() == DxsoProgramType::PixelShader && samplerType != SamplerTypeTexture3D) {
         fetch4 = m_spec.get(m_module, m_specUbo, SpecFetch4, samplerIdx, 1);
 
-        uint32_t bool_t = m_module.defBoolType();
         fetch4 = m_module.opINotEqual(bool_t, fetch4, m_module.constu32(0));
 
         uint32_t bvec4_t = m_module.defVectorType(bool_t, 4);
@@ -2892,41 +2888,13 @@ void DxsoCompiler::emitControlFlowGenericLoop(
       }
 
       result.id = this->emitSample(
-        projDivider != 0,
         typeId,
         sampler,
         texcoordVar,
         reference,
         fetch4,
         imageOperands);
 
-      if (switchProjResult) {
-        uint32_t bool_t = m_module.defBoolType();
-
-        uint32_t nonProjResult = this->emitSample(
-          0,
-          typeId,
-          sampler,
-          texcoordVar,
-          reference,
-          fetch4,
-          imageOperands);
-
-        uint32_t shouldProj = m_spec.get(m_module, m_specUbo, SpecProjectionType, samplerIdx, 1);
-        shouldProj = m_module.opINotEqual(m_module.defBoolType(), shouldProj, m_module.constu32(0));
-
-        // Depth  -> .x
-        // Colour -> .xyzw
-        // Need to replicate the bool for the opSelect.
-        if (!depth) {
-          uint32_t bvec4_t = m_module.defVectorType(bool_t, 4);
-          std::array<uint32_t, 4> indices = { shouldProj, shouldProj, shouldProj, shouldProj };
-          shouldProj = m_module.opCompositeConstruct(bvec4_t, indices.size(), indices.data());
-        }
-
-        result.id = m_module.opSelect(typeId, shouldProj, result.id, nonProjResult);
-      }
-
       // If we are sampling depth we've already specc'ed this!
       // This path is always size 4 because it only hits on color.
       if (isNull != 0) {
@@ -3118,7 +3086,6 @@ void DxsoCompiler::emitControlFlowGenericLoop(
 
 
   uint32_t DxsoCompiler::emitSample(
-          bool                    projected,
           uint32_t                resultType,
           DxsoSamplerInfo&        samplerInfo,
           DxsoRegisterValue       coordinates,
@@ -3134,37 +3101,22 @@ void DxsoCompiler::emitControlFlowGenericLoop(
 
     uint32_t val;
 
-    // No Fetch 4
-    if (projected) {
-      if (depthCompare) {
-        if (explicitLod)
-          val = m_module.opImageSampleProjDrefExplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
-        else
-          val = m_module.opImageSampleProjDrefImplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
-      }
-      else {
-        if (explicitLod)
-          val = m_module.opImageSampleProjExplicitLod(resultType, sampledImage, coordinates.id, operands);
-        else
-          val = m_module.opImageSampleProjImplicitLod(resultType, sampledImage, coordinates.id, operands);
-      }
+
+    if (depthCompare) {
+      if (explicitLod)
+        val = m_module.opImageSampleDrefExplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
+      else
+        val = m_module.opImageSampleDrefImplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
     }
     else {
-      if (depthCompare) {
-        if (explicitLod)
-          val = m_module.opImageSampleDrefExplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
-        else
-          val = m_module.opImageSampleDrefImplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
-      }
-      else {
-        if (explicitLod)
-          val = m_module.opImageSampleExplicitLod(resultType, sampledImage, coordinates.id, operands);
-        else
-          val = m_module.opImageSampleImplicitLod(resultType, sampledImage, coordinates.id, operands);
-      }
+      if (explicitLod)
+        val = m_module.opImageSampleExplicitLod(resultType, sampledImage, coordinates.id, operands);
+      else
+        val = m_module.opImageSampleImplicitLod(resultType, sampledImage, coordinates.id, operands);
     }
 
 
+
     if (fetch4 && !depthCompare) {
       SpirvImageOperands fetch4Operands = operands;
       fetch4Operands.flags &= ~spv::ImageOperandsLodMask;

diff --git a/src/dxso/dxso_compiler.h b/src/dxso/dxso_compiler.h
@@ -664,7 +664,6 @@ namespace dxvk {
     void emitTextureDepth(const DxsoInstructionContext& ctx);
 
     uint32_t emitSample(
-            bool                    projected,
             uint32_t                resultType,
             DxsoSamplerInfo&        samplerInfo,
             DxsoRegisterValue       coordinates,