Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[d3d9, dxso] Clamp Dref to [0.0, 1.0] if the texture is emulated UNORM #2957

Merged
merged 5 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions dxvk.conf
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,15 @@

# d3d9.supportDFFormats = True

# Use D32f for D24
#
# Useful for reproducing AMD issues on other hw.
#
# Supported values:
# - True/False

# d3d9.useD32forD24 = False

# Support X4R4G4B4
#
# Support the X4R4G4B4 format.
Expand Down
2 changes: 2 additions & 0 deletions src/d3d9/d3d9_common_texture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ namespace dxvk {

m_mapMode = DetermineMapMode();
m_shadow = DetermineShadowState();
m_upgradedToD32f = ConvertFormatUnfixed(m_desc.Format).FormatColor != VK_FORMAT_D32_SFLOAT_S8_UINT &&
m_mapping.FormatColor == VK_FORMAT_D32_SFLOAT_S8_UINT;
m_supportsFetch4 = DetermineFetch4Compatibility();

const bool createImage = m_desc.Pool != D3DPOOL_SYSTEMMEM && m_desc.Pool != D3DPOOL_SCRATCH && m_desc.Format != D3D9Format::NULL_FORMAT;
Expand Down
9 changes: 9 additions & 0 deletions src/d3d9/d3d9_common_texture.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,14 @@ namespace dxvk {
return m_shadow;
}

/**
* \brief Dref Clamp
* \returns Whether the texture emulates an UNORM format with D32f
*/
bool IsUpgradedToD32f() const {
return m_upgradedToD32f;
}

/**
* \brief FETCH4 compatibility
* \returns Whether the format of the texture supports the FETCH4 hack
Expand Down Expand Up @@ -499,6 +507,7 @@ namespace dxvk {
D3D9_VK_FORMAT_MAPPING m_mapping;

bool m_shadow; //< Depth Compare-ness
bool m_upgradedToD32f; // Dref Clamp
bool m_supportsFetch4;

int64_t m_size = 0;
Expand Down
11 changes: 8 additions & 3 deletions src/d3d9/d3d9_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3791,6 +3791,9 @@ namespace dxvk {
m_dirtySamplerStates |= 1u << StateSampler;
}

m_drefClamp &= ~(1u << StateSampler);
m_drefClamp |= uint32_t(newTexture->IsUpgradedToD32f()) << StateSampler;

const bool oldCube = m_cubeTextures & (1u << StateSampler);
const bool newCube = newTexture->GetType() == D3DRTYPE_CUBETEXTURE;
if (oldCube != newCube) {
Expand Down Expand Up @@ -6249,7 +6252,8 @@ namespace dxvk {

const uint32_t nullTextureMask = usedSamplerMask & ~usedTextureMask;
const uint32_t depthTextureMask = m_depthTextures & usedTextureMask;
UpdateCommonSamplerSpec(nullTextureMask, depthTextureMask);
const uint32_t drefClampMask = m_drefClamp & depthTextureMask;
UpdateCommonSamplerSpec(nullTextureMask, depthTextureMask, drefClampMask);

if (m_flags.test(D3D9DeviceFlag::DirtySharedPixelShaderData)) {
m_flags.clr(D3D9DeviceFlag::DirtySharedPixelShaderData);
Expand Down Expand Up @@ -7249,7 +7253,7 @@ namespace dxvk {
UpdatePixelShaderSamplerSpec(0u, 0u, 0u);
UpdateVertexBoolSpec(0u);
UpdatePixelBoolSpec(0u);
UpdateCommonSamplerSpec(0u, 0u);
UpdateCommonSamplerSpec(0u, 0u, 0u);

return D3D_OK;
}
Expand Down Expand Up @@ -7443,9 +7447,10 @@ namespace dxvk {
}


void D3D9DeviceEx::UpdateCommonSamplerSpec(uint32_t nullMask, uint32_t depthMask) {
void D3D9DeviceEx::UpdateCommonSamplerSpec(uint32_t nullMask, uint32_t depthMask, uint32_t drefMask) {
bool dirty = m_specInfo.set<SpecSamplerDepthMode>(depthMask);
dirty |= m_specInfo.set<SpecSamplerNull>(nullMask);
dirty |= m_specInfo.set<SpecDrefClamp>(drefMask);

if (dirty)
m_flags.set(D3D9DeviceFlag::DirtySpecializationEntries);
Expand Down
3 changes: 2 additions & 1 deletion src/d3d9/d3d9_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -1145,7 +1145,7 @@ namespace dxvk {
void UpdateVertexBoolSpec(uint32_t value);
void UpdatePixelBoolSpec(uint32_t value);
void UpdatePixelShaderSamplerSpec(uint32_t types, uint32_t projections, uint32_t fetch4);
void UpdateCommonSamplerSpec(uint32_t boundMask, uint32_t depthMask);
void UpdateCommonSamplerSpec(uint32_t boundMask, uint32_t depthMask, uint32_t drefMask);
void UpdatePointModeSpec(uint32_t mode);
void UpdateFogModeSpec(bool fogEnabled, D3DFOGMODE vertexFogMode, D3DFOGMODE pixelFogMode);

Expand Down Expand Up @@ -1237,6 +1237,7 @@ namespace dxvk {
uint32_t m_instancedData = 0;

uint32_t m_depthTextures = 0;
uint32_t m_drefClamp = 0;
uint32_t m_cubeTextures = 0;
uint32_t m_textureTypes = 0;
uint32_t m_projectionBitfield = 0;
Expand Down
4 changes: 2 additions & 2 deletions src/d3d9/d3d9_format.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,8 @@ namespace dxvk {

// AMD do not support 24-bit depth buffers on Vulkan,
// so we have to fall back to a 32-bit depth format.
m_d24s8Support = CheckImageFormatSupport(adapter, VK_FORMAT_D24_UNORM_S8_UINT,
m_d24s8Support = !options.useD32forD24 &&
CheckImageFormatSupport(adapter, VK_FORMAT_D24_UNORM_S8_UINT,
VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT |
VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT);

Expand All @@ -444,7 +445,6 @@ namespace dxvk {
VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT |
VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT);

// VK_EXT_4444_formats
if (!m_d24s8Support)
Logger::info("D3D9: VK_FORMAT_D24_UNORM_S8_UINT -> VK_FORMAT_D32_SFLOAT_S8_UINT");

Expand Down
1 change: 1 addition & 0 deletions src/d3d9/d3d9_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace dxvk {
this->supportDFFormats = config.getOption<bool> ("d3d9.supportDFFormats", true);
this->supportX4R4G4B4 = config.getOption<bool> ("d3d9.supportX4R4G4B4", true);
this->supportD32 = config.getOption<bool> ("d3d9.supportD32", true);
this->useD32forD24 = config.getOption<bool> ("d3d9.useD32forD24", false);
this->disableA8RT = config.getOption<bool> ("d3d9.disableA8RT", false);
this->invariantPosition = config.getOption<bool> ("d3d9.invariantPosition", true);
this->memoryTrackTest = config.getOption<bool> ("d3d9.memoryTrackTest", false);
Expand Down
3 changes: 3 additions & 0 deletions src/d3d9/d3d9_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ namespace dxvk {
/// Support D32
bool supportD32;

/// Use D32f for D24
bool useD32forD24;

/// Disable D3DFMT_A8 for render targets.
/// Specifically to work around a game
/// bug in The Sims 2 that happens on native too!
Expand Down
4 changes: 3 additions & 1 deletion src/d3d9/d3d9_spec_constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace dxvk {
SpecVertexShaderBools, // 16 bools | Bits: 16
SpecPixelShaderBools, // 16 bools | Bits: 16

SpecDrefClamp, // 1 bit for 16 PS samplers | Bits: 16
SpecFetch4, // 1 bit for 16 PS samplers | Bits: 16

SpecConstantCount,
Expand Down Expand Up @@ -62,7 +63,8 @@ namespace dxvk {
{ 3, 0, 16 }, // VertexShaderBools
{ 3, 16, 16 }, // PixelShaderBools

{ 4, 0, 16 }, // Fetch4
{ 4, 0, 16 }, // DrefClamp
{ 4, 16, 16 }, // Fetch4
}};

template <D3D9SpecConstantId Id, typename T>
Expand Down
144 changes: 48 additions & 96 deletions src/dxso/dxso_compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2648,10 +2648,30 @@ void DxsoCompiler::emitControlFlowGenericLoop(
DxsoRegMask vec3Mask(true, true, true, false);
DxsoRegMask srcMask (true, true, true, true);

auto GetProjectionValue = [&]() {
auto DoProjection = [&](DxsoRegisterValue coord, bool switchProjRes) {
uint32_t bool_t = m_module.defBoolType();
uint32_t texcoord_t = getVectorTypeId(coord.type);

uint32_t w = 3;
return m_module.opCompositeExtract(
m_module.defFloatType(32), texcoordVar.id, 1, &w);

uint32_t projScalar = m_module.opCompositeExtract(
m_module.defFloatType(32), coord.id, 1, &w);

projScalar = m_module.opFDiv(m_module.defFloatType(32), m_module.constf32(1.0), projScalar);
uint32_t projResult = m_module.opVectorTimesScalar(texcoord_t, coord.id, projScalar);

if (switchProjRes) {
uint32_t shouldProj = m_spec.get(m_module, m_specUbo, SpecProjectionType, samplerIdx, 1);
shouldProj = m_module.opINotEqual(bool_t, shouldProj, m_module.constu32(0));

uint32_t bvec4_t = m_module.defVectorType(bool_t, 4);
std::array<uint32_t, 4> indices = { shouldProj, shouldProj, shouldProj, shouldProj };
shouldProj = m_module.opCompositeConstruct(bvec4_t, indices.size(), indices.data());

return m_module.opSelect(texcoord_t, shouldProj, projResult, coord.id);
} else {
return projResult;
}
};

if (opcode == DxsoOpcode::TexM3x2Tex || opcode == DxsoOpcode::TexM3x3Tex || opcode == DxsoOpcode::TexM3x3Spec || opcode == DxsoOpcode::TexM3x3VSpec) {
Expand Down Expand Up @@ -2711,23 +2731,9 @@ void DxsoCompiler::emitControlFlowGenericLoop(
texcoordVar = m;
samplerIdx = ctx.dst.id.num;

uint32_t texcoord_t = getVectorTypeId(texcoordVar.type);

// The projection (/.w) happens before this...
// Of course it does...
uint32_t bool_t = m_module.defBoolType();

uint32_t shouldProj = m_spec.get(m_module, m_specUbo, SpecProjectionType, samplerIdx, 1);
shouldProj = m_module.opINotEqual(bool_t, shouldProj, m_module.constu32(0));

uint32_t bvec4_t = m_module.defVectorType(bool_t, 4);
std::array<uint32_t, 4> indices = { shouldProj, shouldProj, shouldProj, shouldProj };
shouldProj = m_module.opCompositeConstruct(bvec4_t, indices.size(), indices.data());

uint32_t projScalar = m_module.opFDiv(m_module.defFloatType(32), m_module.constf32(1.0), GetProjectionValue());
uint32_t projResult = m_module.opVectorTimesScalar(texcoord_t, texcoordVar.id, projScalar);

texcoordVar.id = m_module.opSelect(texcoord_t, shouldProj, projResult, texcoordVar.id);
texcoordVar.id = DoProjection(texcoordVar, true);

// u' = tc(m).x + [bm00(m) * t(n).x + bm10(m) * t(n).y]
// v' = tc(m).y + [bm01(m) * t(n).x + bm11(m) * t(n).y]
Expand Down Expand Up @@ -2811,7 +2817,7 @@ void DxsoCompiler::emitControlFlowGenericLoop(

DxsoSampler sampler = m_samplers.at(samplerIdx);

auto SampleImage = [this, opcode, dst, ctx, samplerIdx, GetProjectionValue](DxsoRegisterValue texcoordVar, DxsoSamplerInfo& sampler, bool depth, DxsoSamplerType samplerType, uint32_t isNull) {
auto SampleImage = [this, opcode, dst, ctx, samplerIdx, DoProjection](DxsoRegisterValue texcoordVar, DxsoSamplerInfo& sampler, bool depth, DxsoSamplerType samplerType, uint32_t isNull) {
DxsoRegisterValue result;
result.type.ctype = dst.type.ctype;
result.type.ccount = depth ? 1 : 4;
Expand All @@ -2838,12 +2844,10 @@ void DxsoCompiler::emitControlFlowGenericLoop(
imageOperands.sGradY = emitRegisterLoad(ctx.src[3], gradMask).id;
}

uint32_t projDivider = 0;

if (opcode == DxsoOpcode::Tex
&& m_programInfo.majorVersion() >= 2) {
if (ctx.instruction.specificData.texld == DxsoTexLdMode::Project) {
projDivider = GetProjectionValue();
texcoordVar.id = DoProjection(texcoordVar, false);
}
else if (ctx.instruction.specificData.texld == DxsoTexLdMode::Bias) {
uint32_t w = 3;
Expand All @@ -2853,37 +2857,29 @@ void DxsoCompiler::emitControlFlowGenericLoop(
}
}

bool switchProjResult = m_programInfo.majorVersion() < 2 && samplerType != SamplerTypeTextureCube;

if (switchProjResult)
projDivider = GetProjectionValue();

// We already handled this...
if (opcode == DxsoOpcode::TexBem) {
switchProjResult = false;
projDivider = 0;
// We already handled this for TexBem(L)
if (m_programInfo.majorVersion() < 2 && samplerType != SamplerTypeTextureCube && opcode != DxsoOpcode::TexBem && opcode != DxsoOpcode::TexBemL) {
texcoordVar.id = DoProjection(texcoordVar, true);
}

uint32_t reference = 0;
uint32_t bool_t = m_module.defBoolType();

uint32_t reference = 0;
if (depth) {
uint32_t fType = m_module.defFloatType(32);
uint32_t component = sampler.dimensions;
reference = m_module.opCompositeExtract(
m_module.defFloatType(32), texcoordVar.id, 1, &component);
}

if (projDivider != 0) {
for (uint32_t i = sampler.dimensions; i < 4; i++) {
texcoordVar.id = m_module.opCompositeInsert(getVectorTypeId(texcoordVar.type),
projDivider, texcoordVar.id, 1, &i);
}
fType, texcoordVar.id, 1, &component);
uint32_t clampDref = m_spec.get(m_module, m_specUbo, SpecDrefClamp, samplerIdx, 1);
clampDref = m_module.opINotEqual(bool_t, clampDref, m_module.constu32(0));
uint32_t clampedDref = m_module.opFClamp(fType, reference, m_module.constf32(0.0f), m_module.constf32(1.0f));
reference = m_module.opSelect(fType, clampDref, clampedDref, reference);
}

uint32_t fetch4 = 0;
if (m_programInfo.type() == DxsoProgramType::PixelShader && samplerType != SamplerTypeTexture3D) {
fetch4 = m_spec.get(m_module, m_specUbo, SpecFetch4, samplerIdx, 1);

uint32_t bool_t = m_module.defBoolType();
fetch4 = m_module.opINotEqual(bool_t, fetch4, m_module.constu32(0));

uint32_t bvec4_t = m_module.defVectorType(bool_t, 4);
Expand All @@ -2892,41 +2888,13 @@ void DxsoCompiler::emitControlFlowGenericLoop(
}

result.id = this->emitSample(
projDivider != 0,
typeId,
sampler,
texcoordVar,
reference,
fetch4,
imageOperands);

if (switchProjResult) {
uint32_t bool_t = m_module.defBoolType();

uint32_t nonProjResult = this->emitSample(
0,
typeId,
sampler,
texcoordVar,
reference,
fetch4,
imageOperands);

uint32_t shouldProj = m_spec.get(m_module, m_specUbo, SpecProjectionType, samplerIdx, 1);
shouldProj = m_module.opINotEqual(m_module.defBoolType(), shouldProj, m_module.constu32(0));

// Depth -> .x
// Colour -> .xyzw
// Need to replicate the bool for the opSelect.
if (!depth) {
uint32_t bvec4_t = m_module.defVectorType(bool_t, 4);
std::array<uint32_t, 4> indices = { shouldProj, shouldProj, shouldProj, shouldProj };
shouldProj = m_module.opCompositeConstruct(bvec4_t, indices.size(), indices.data());
}

result.id = m_module.opSelect(typeId, shouldProj, result.id, nonProjResult);
}

// If we are sampling depth we've already specc'ed this!
// This path is always size 4 because it only hits on color.
if (isNull != 0) {
Expand Down Expand Up @@ -3118,7 +3086,6 @@ void DxsoCompiler::emitControlFlowGenericLoop(


uint32_t DxsoCompiler::emitSample(
bool projected,
uint32_t resultType,
DxsoSamplerInfo& samplerInfo,
DxsoRegisterValue coordinates,
Expand All @@ -3134,37 +3101,22 @@ void DxsoCompiler::emitControlFlowGenericLoop(

uint32_t val;

// No Fetch 4
if (projected) {
if (depthCompare) {
if (explicitLod)
val = m_module.opImageSampleProjDrefExplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
else
val = m_module.opImageSampleProjDrefImplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
}
else {
if (explicitLod)
val = m_module.opImageSampleProjExplicitLod(resultType, sampledImage, coordinates.id, operands);
else
val = m_module.opImageSampleProjImplicitLod(resultType, sampledImage, coordinates.id, operands);
}

if (depthCompare) {
if (explicitLod)
val = m_module.opImageSampleDrefExplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
else
val = m_module.opImageSampleDrefImplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
}
else {
if (depthCompare) {
if (explicitLod)
val = m_module.opImageSampleDrefExplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
else
val = m_module.opImageSampleDrefImplicitLod(resultType, sampledImage, coordinates.id, reference, operands);
}
else {
if (explicitLod)
val = m_module.opImageSampleExplicitLod(resultType, sampledImage, coordinates.id, operands);
else
val = m_module.opImageSampleImplicitLod(resultType, sampledImage, coordinates.id, operands);
}
if (explicitLod)
val = m_module.opImageSampleExplicitLod(resultType, sampledImage, coordinates.id, operands);
else
val = m_module.opImageSampleImplicitLod(resultType, sampledImage, coordinates.id, operands);
}



if (fetch4 && !depthCompare) {
SpirvImageOperands fetch4Operands = operands;
fetch4Operands.flags &= ~spv::ImageOperandsLodMask;
Expand Down
1 change: 0 additions & 1 deletion src/dxso/dxso_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,6 @@ namespace dxvk {
void emitTextureDepth(const DxsoInstructionContext& ctx);

uint32_t emitSample(
bool projected,
uint32_t resultType,
DxsoSamplerInfo& samplerInfo,
DxsoRegisterValue coordinates,
Expand Down