Skip to content

Commit

Permalink
GPUDevice: Add compute shader support
Browse files Browse the repository at this point in the history
stenzek committed Nov 24, 2024
1 parent affbdfc commit e647192
Showing 20 changed files with 791 additions and 197 deletions.
84 changes: 59 additions & 25 deletions src/util/d3d11_device.cpp
Original file line number Diff line number Diff line change
@@ -185,6 +185,8 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features)
m_features.texture_buffers_emulated_with_ssbo = false;
m_features.feedback_loops = false;
m_features.geometry_shaders = !(disabled_features & FEATURE_MASK_GEOMETRY_SHADERS);
m_features.compute_shaders =
(!(disabled_features & FEATURE_MASK_COMPUTE_SHADERS) && feature_level >= D3D_FEATURE_LEVEL_11_0);
m_features.partial_msaa_resolve = false;
m_features.memory_import = false;
m_features.explicit_present = false;
@@ -896,19 +898,7 @@ void D3D11Device::PushUniformBuffer(const void* data, u32 data_size)
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += data_size;

if (m_uniform_buffer.IsUsingMapNoOverwrite())
{
const UINT first_constant = (res.index_aligned * UNIFORM_BUFFER_ALIGNMENT) / 16u;
const UINT num_constants = req_size / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
else
{
DebugAssert(res.index_aligned == 0);
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
BindUniformBuffer(res.index_aligned * UNIFORM_BUFFER_ALIGNMENT, req_size);
}

void* D3D11Device::MapUniformBuffer(u32 size)
@@ -930,18 +920,37 @@ void D3D11Device::UnmapUniformBuffer(u32 size)
m_uniform_buffer.Unmap(m_context.Get(), req_size);
s_stats.buffer_streamed += size;

BindUniformBuffer(pos, req_size);
}

void D3D11Device::BindUniformBuffer(u32 offset, u32 size)
{
if (m_uniform_buffer.IsUsingMapNoOverwrite())
{
const UINT first_constant = pos / 16u;
const UINT num_constants = req_size / 16u;
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
const UINT first_constant = offset / 16u;
const UINT num_constants = size / 16u;
if (m_current_compute_shader)
{
m_context->CSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
else
{
m_context->VSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
m_context->PSSetConstantBuffers1(0, 1, m_uniform_buffer.GetD3DBufferArray(), &first_constant, &num_constants);
}
}
else
{
DebugAssert(pos == 0);
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
DebugAssert(offset == 0);
if (m_current_compute_shader)
{
m_context->CSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
else
{
m_context->VSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
m_context->PSSetConstantBuffers(0, 1, m_uniform_buffer.GetD3DBufferArray());
}
}
}

@@ -1004,9 +1013,16 @@ void D3D11Device::SetRenderTargets(GPUTexture* const* rts, u32 num_rts, GPUTextu
for (u32 i = 0; i < m_num_current_render_targets; i++)
uavs[i] = m_current_render_targets[i]->GetD3DUAV();

m_context->OMSetRenderTargetsAndUnorderedAccessViews(
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
m_num_current_render_targets, uavs.data(), nullptr);
if (!m_current_compute_shader)
{
m_context->OMSetRenderTargetsAndUnorderedAccessViews(
0, nullptr, m_current_depth_target ? m_current_depth_target->GetD3DDSV() : nullptr, 0,
m_num_current_render_targets, uavs.data(), nullptr);
}
else
{
m_context->CSSetUnorderedAccessViews(0, m_num_current_render_targets, uavs.data(), nullptr);
}
}
else
{
@@ -1046,11 +1062,15 @@ void D3D11Device::SetTextureSampler(u32 slot, GPUTexture* texture, GPUSampler* s
{
m_current_textures[slot] = T;
m_context->PSSetShaderResources(slot, 1, &T);
if (m_current_compute_shader)
m_context->CSSetShaderResources(slot, 1, &T);
}
if (m_current_samplers[slot] != S)
{
m_current_samplers[slot] = S;
m_context->PSSetSamplers(slot, 1, &S);
if (m_current_compute_shader)
m_context->CSSetSamplers(slot, 1, &S);
}
}

@@ -1060,6 +1080,8 @@ void D3D11Device::SetTextureBuffer(u32 slot, GPUTextureBuffer* buffer)
if (m_current_textures[slot] != B)
{
m_current_textures[slot] = B;

// Compute doesn't support texture buffers, yet...
m_context->PSSetShaderResources(slot, 1, &B);
}
}
@@ -1113,14 +1135,14 @@ void D3D11Device::SetScissor(const GSVector4i rc)

void D3D11Device::Draw(u32 vertex_count, u32 base_vertex)
{
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
s_stats.num_draws++;
m_context->Draw(vertex_count, base_vertex);
}

void D3D11Device::DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex)
{
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped());
DebugAssert(!m_vertex_buffer.IsMapped() && !m_index_buffer.IsMapped() && !m_current_compute_shader);
s_stats.num_draws++;
m_context->DrawIndexed(index_count, base_index, base_vertex);
}
@@ -1129,3 +1151,15 @@ void D3D11Device::DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 ba
{
Panic("Barriers are not supported");
}

void D3D11Device::Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z)
{
DebugAssert(m_current_compute_shader);
s_stats.num_draws++;

const u32 groups_x = threads_x / group_size_x;
const u32 groups_y = threads_y / group_size_y;
const u32 groups_z = threads_z / group_size_z;
m_context->Dispatch(groups_x, groups_y, groups_z);
}
6 changes: 6 additions & 0 deletions src/util/d3d11_device.h
Original file line number Diff line number Diff line change
@@ -75,6 +75,7 @@ class D3D11Device final : public GPUDevice
std::string_view source, const char* entry_point,
DynamicHeapArray<u8>* out_binary, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::GraphicsConfig& config, Error* error) override;
std::unique_ptr<GPUPipeline> CreatePipeline(const GPUPipeline::ComputeConfig& config, Error* error) override;

void PushDebugGroup(const char* name) override;
void PopDebugGroup() override;
@@ -98,6 +99,8 @@ class D3D11Device final : public GPUDevice
void Draw(u32 vertex_count, u32 base_vertex) override;
void DrawIndexed(u32 index_count, u32 base_index, u32 base_vertex) override;
void DrawIndexedWithBarrier(u32 index_count, u32 base_index, u32 base_vertex, DrawBarrier type) override;
void Dispatch(u32 threads_x, u32 threads_y, u32 threads_z, u32 group_size_x, u32 group_size_y,
u32 group_size_z) override;

bool SetGPUTimingEnabled(bool enabled) override;
float GetAndResetAccumulatedGPUTime() override;
@@ -140,6 +143,8 @@ class D3D11Device final : public GPUDevice

bool CreateBuffers();
void DestroyBuffers();
void BindUniformBuffer(u32 offset, u32 size);
void UnbindComputePipeline();

bool IsRenderTargetBound(const D3D11Texture* tex) const;

@@ -180,6 +185,7 @@ class D3D11Device final : public GPUDevice
ID3D11VertexShader* m_current_vertex_shader = nullptr;
ID3D11GeometryShader* m_current_geometry_shader = nullptr;
ID3D11PixelShader* m_current_pixel_shader = nullptr;
ID3D11ComputeShader* m_current_compute_shader = nullptr;
ID3D11RasterizerState* m_current_rasterizer_state = nullptr;
ID3D11DepthStencilState* m_current_depth_state = nullptr;
ID3D11BlendState* m_current_blend_state = nullptr;
Loading

0 comments on commit e647192

Please sign in to comment.