Skip to content

Commit

Permalink
GPU: Remove global indirection
Browse files Browse the repository at this point in the history
  • Loading branch information
stenzek committed Dec 21, 2024
1 parent c4b0430 commit 26db661
Show file tree
Hide file tree
Showing 13 changed files with 107 additions and 98 deletions.
4 changes: 2 additions & 2 deletions src/core/bus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1875,7 +1875,7 @@ template<MemoryAccessSize size>
u32 Bus::HWHandlers::GPURead(PhysicalMemoryAddress address)
{
const u32 offset = address & GPU_MASK;
u32 value = g_gpu->ReadRegister(FIXUP_WORD_OFFSET(size, offset));
u32 value = g_gpu.ReadRegister(FIXUP_WORD_OFFSET(size, offset));
value = FIXUP_WORD_READ_VALUE(size, offset, value);
BUS_CYCLES(2);
return value;
Expand All @@ -1885,7 +1885,7 @@ template<MemoryAccessSize size>
void Bus::HWHandlers::GPUWrite(PhysicalMemoryAddress address, u32 value)
{
const u32 offset = address & GPU_MASK;
g_gpu->WriteRegister(FIXUP_WORD_OFFSET(size, offset), FIXUP_WORD_WRITE_VALUE(size, offset, value));
g_gpu.WriteRegister(FIXUP_WORD_OFFSET(size, offset), FIXUP_WORD_WRITE_VALUE(size, offset, value));
}

template<MemoryAccessSize size>
Expand Down
10 changes: 5 additions & 5 deletions src/core/dma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -802,9 +802,9 @@ TickCount DMA::TransferMemoryToDevice(u32 address, u32 increment, u32 word_count
{
case Channel::GPU:
{
if (g_gpu->BeginDMAWrite()) [[likely]]
if (g_gpu.BeginDMAWrite()) [[likely]]
{
if (GPUDump::Recorder* dump = g_gpu->GetGPUDump()) [[unlikely]]
if (GPUDump::Recorder* dump = g_gpu.GetGPUDump()) [[unlikely]]
{
// No wraparound?
dump->BeginGP0Packet(word_count);
Expand All @@ -831,10 +831,10 @@ TickCount DMA::TransferMemoryToDevice(u32 address, u32 increment, u32 word_count
{
u32 value;
std::memcpy(&value, &ram_pointer[address], sizeof(u32));
g_gpu->DMAWrite(address, value);
g_gpu.DMAWrite(address, value);
address = (address + increment) & mask;
}
g_gpu->EndDMAWrite();
g_gpu.EndDMAWrite();
}
}
break;
Expand Down Expand Up @@ -900,7 +900,7 @@ TickCount DMA::TransferDeviceToMemory(u32 address, u32 increment, u32 word_count
switch (channel)
{
case Channel::GPU:
g_gpu->DMARead(dest_pointer, word_count);
g_gpu.DMARead(dest_pointer, word_count);
break;

case Channel::CDROM:
Expand Down
26 changes: 14 additions & 12 deletions src/core/gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@

LOG_CHANNEL(GPU);

std::unique_ptr<GPU> g_gpu;
ALIGN_TO_CACHE_LINE GPU g_gpu;

// aligning VRAM to 4K is fine, since the ARM64 instructions compute 4K page aligned addresses
// or it would be, except we want to import the memory for readbacks on metal..
Expand All @@ -60,13 +60,13 @@ u16 g_gpu_clut[GPU_CLUT_SIZE];
const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();

static TimingEvent s_crtc_tick_event(
"GPU CRTC Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu->CRTCTickEvent(ticks); },
"GPU CRTC Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu.CRTCTickEvent(ticks); },
nullptr);
static TimingEvent s_command_tick_event(
"GPU Command Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu->CommandTickEvent(ticks); },
"GPU Command Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu.CommandTickEvent(ticks); },
nullptr);
static TimingEvent s_frame_done_event(
"Frame Done", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu->FrameDoneEvent(ticks); },
"Frame Done", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu.FrameDoneEvent(ticks); },
nullptr);

// #define PSX_GPU_STATS
Expand All @@ -77,14 +77,7 @@ static u32 s_active_gpu_cycles_frames = 0;

GPU::GPU() = default;

GPU::~GPU()
{
s_command_tick_event.Deactivate();
s_crtc_tick_event.Deactivate();
s_frame_done_event.Deactivate();

StopRecordingGPUDump();
}
GPU::~GPU() = default;

void GPU::Initialize()
{
Expand All @@ -104,6 +97,15 @@ void GPU::Initialize()
#endif
}

void GPU::Shutdown()
{
s_command_tick_event.Deactivate();
s_crtc_tick_event.Deactivate();
s_frame_done_event.Deactivate();

StopRecordingGPUDump();
}

void GPU::UpdateSettings(const Settings& old_settings)
{
m_force_progressive_scan = (g_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive);
Expand Down
3 changes: 2 additions & 1 deletion src/core/gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class GPU final
~GPU();

void Initialize();
void Shutdown();
void Reset(bool clear_vram);
bool DoState(StateWrapper& sw, bool update_display);
void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss, bool update_display);
Expand Down Expand Up @@ -561,6 +562,6 @@ class GPU final
static const GP0CommandHandlerTable s_GP0_command_handler_table;
};

extern std::unique_ptr<GPU> g_gpu;
extern GPU g_gpu;
extern u16 g_vram[VRAM_SIZE / sizeof(u16)];
extern u16 g_gpu_clut[GPU_CLUT_SIZE];
6 changes: 3 additions & 3 deletions src/core/gpu_dump.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ std::unique_ptr<GPUDump::Recorder> GPUDump::Recorder::Create(std::string path, s

ret = std::unique_ptr<Recorder>(new Recorder(std::move(fp), num_frames, std::move(path)));
ret->WriteHeaders(serial);
g_gpu->WriteCurrentVideoModeToDump(ret.get());
g_gpu.WriteCurrentVideoModeToDump(ret.get());
ret->WriteCurrentVRAM();

// Write start of stream.
Expand Down Expand Up @@ -285,7 +285,7 @@ void GPUDump::Recorder::WriteHeaders(std::string_view serial)

// Write textual video mode.
BeginPacket(PacketType::TextualVideoFormat);
WriteString(g_gpu->IsInPALMode() ? "PAL" : "NTSC");
WriteString(g_gpu.IsInPALMode() ? "PAL" : "NTSC");
EndPacket();

// Write DuckStation version.
Expand Down Expand Up @@ -520,7 +520,7 @@ void GPUDump::Player::ProcessPacket(const PacketRef& pkt)
if (pkt.type <= PacketType::VSyncEvent)
{
// gp0/gp1/vsync => direct to gpu
g_gpu->ProcessGPUDumpPacket(pkt.type, pkt.data);
g_gpu.ProcessGPUDumpPacket(pkt.type, pkt.data);
return;
}
}
Expand Down
1 change: 0 additions & 1 deletion src/core/gpu_hw_texture_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3483,7 +3483,6 @@ void GPUTextureCache::ReloadTextureReplacements(bool show_info)

PurgeUnreferencedTexturesFromCache();

DebugAssert(g_gpu);
UpdateVRAMTrackingState();
InvalidateSources();

Expand Down
80 changes: 44 additions & 36 deletions src/core/gpu_sw_rasterizer.inl
Original file line number Diff line number Diff line change
Expand Up @@ -506,10 +506,11 @@ struct PixelVectors
} // namespace

template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
ALWAYS_INLINE_RELEASE static void
ShadePixel(const PixelVectors<texture_enable>& pv, GPUTextureMode texture_mode, GPUTransparencyMode transparency_mode,
u32 start_x, u32 y, GSVectorNi vertex_color_rg, GSVectorNi vertex_color_ba, GSVectorNi texcoord_x,
GSVectorNi texcoord_y, GSVectorNi preserve_mask, GSVectorNi dither)
ALWAYS_INLINE_RELEASE static void ShadePixel(const PixelVectors<texture_enable>& RESTRICT pv,
GPUTextureMode texture_mode, GPUTransparencyMode transparency_mode,
u32 start_x, u32 y, GSVectorNi vertex_color_rg, GSVectorNi vertex_color_ba,
GSVectorNi texcoord_x, GSVectorNi texcoord_y, GSVectorNi preserve_mask,
GSVectorNi dither)
{
static constexpr GSVectorNi coord_mask_x = GSVectorNi::cxpr(VRAM_WIDTH_MASK);
static constexpr GSVectorNi coord_mask_y = GSVectorNi::cxpr(VRAM_HEIGHT_MASK);
Expand Down Expand Up @@ -693,7 +694,7 @@ ShadePixel(const PixelVectors<texture_enable>& pv, GPUTextureMode texture_mode,
}

template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
static void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
static void DrawRectangle(const GPUBackendDrawRectangleCommand* RESTRICT cmd)
{
const s32 origin_x = cmd->x;
const s32 origin_y = cmd->y;
Expand Down Expand Up @@ -765,8 +766,9 @@ static void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)

// TODO: Vectorize line draw.
template<bool shading_enable, bool transparency_enable>
static void DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1)
static void DrawLine(const GPUBackendDrawLineCommand* RESTRICT cmd,
const GPUBackendDrawLineCommand::Vertex* RESTRICT p0,
const GPUBackendDrawLineCommand::Vertex* RESTRICT p1)
{
static constexpr u32 XY_SHIFT = 32;
static constexpr u32 RGB_SHIFT = 12;
Expand Down Expand Up @@ -971,8 +973,8 @@ struct TrianglePart
#ifndef USE_VECTOR

template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable>
static void DrawSpan(const GPUBackendDrawCommand* cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv,
const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep)
static void DrawSpan(const GPUBackendDrawCommand* RESTRICT cmd, s32 y, s32 x_start, s32 x_bound, UVStepper uv,
const UVSteps& RESTRICT uvstep, RGBStepper rgb, const RGBSteps& RESTRICT rgbstep)
{
s32 width = x_bound - x_start;
s32 current_x = TruncateGPUVertexPosition(x_start);
Expand Down Expand Up @@ -1011,9 +1013,10 @@ static void DrawSpan(const GPUBackendDrawCommand* cmd, s32 y, s32 x_start, s32 x
}

template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable>
ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawCommand* cmd, const TrianglePart& tp,
const UVStepper& uv, const UVSteps& uvstep, const RGBStepper& rgb,
const RGBSteps& rgbstep)
ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawCommand* RESTRICT cmd,
const TrianglePart& RESTRICT tp, const UVStepper& RESTRICT uv,
const UVSteps& RESTRICT uvstep, const RGBStepper& RESTRICT rgb,
const RGBSteps& RESTRICT rgbstep)
{
static constexpr auto unfp_xy = [](s64 xfp) -> s32 { return static_cast<s32>(static_cast<u64>(xfp) >> 32); };

Expand Down Expand Up @@ -1150,9 +1153,10 @@ struct TriangleVectors : PixelVectors<texture_enable>
} // namespace

template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable>
ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawCommand* cmd, s32 y, s32 x_start, s32 x_bound,
UVStepper uv, const UVSteps& uvstep, RGBStepper rgb, const RGBSteps& rgbstep,
const TriangleVectors<shading_enable, texture_enable>& tv)
ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawCommand* RESTRICT cmd, s32 y, s32 x_start, s32 x_bound,
UVStepper uv, const UVSteps& RESTRICT uvstep, RGBStepper rgb,
const RGBSteps& RESTRICT rgbstep,
const TriangleVectors<shading_enable, texture_enable>& RESTRICT tv)
{
s32 width = x_bound - x_start;
s32 current_x = TruncateGPUVertexPosition(x_start);
Expand Down Expand Up @@ -1255,9 +1259,10 @@ ALWAYS_INLINE_RELEASE static void DrawSpan(const GPUBackendDrawCommand* cmd, s32
}

template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable>
ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawCommand* cmd, const TrianglePart& tp,
const UVStepper& uv, const UVSteps& uvstep, const RGBStepper& rgb,
const RGBSteps& rgbstep)
ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawCommand* RESTRICT cmd,
const TrianglePart& RESTRICT tp, const UVStepper& RESTRICT uv,
const UVSteps& RESTRICT uvstep, const RGBStepper& RESTRICT rgb,
const RGBSteps& RESTRICT rgbstep)
{
static constexpr auto unfp_xy = [](s64 xfp) -> s32 { return static_cast<s32>(static_cast<u64>(xfp) >> 32); };

Expand Down Expand Up @@ -1356,13 +1361,15 @@ ALWAYS_INLINE_RELEASE static void DrawTrianglePart(const GPUBackendDrawCommand*
#endif // USE_VECTOR

template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable>
static void DrawTriangle(const GPUBackendDrawCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2)
static void DrawTriangle(const GPUBackendDrawCommand* RESTRICT cmd,
const GPUBackendDrawPolygonCommand::Vertex* RESTRICT v0,
const GPUBackendDrawPolygonCommand::Vertex* RESTRICT v1,
const GPUBackendDrawPolygonCommand::Vertex* RESTRICT v2)
{
#ifdef CHECK_VECTOR
const GPUBackendDrawPolygonCommand::Vertex* orig_v0 = v0;
const GPUBackendDrawPolygonCommand::Vertex* orig_v1 = v1;
const GPUBackendDrawPolygonCommand::Vertex* orig_v2 = v2;
const GPUBackendDrawPolygonCommand::Vertex* RESTRICT orig_v0 = v0;
const GPUBackendDrawPolygonCommand::Vertex* RESTRICT orig_v1 = v1;
const GPUBackendDrawPolygonCommand::Vertex* RESTRICT orig_v2 = v2;
#endif

// Sort vertices so that v0 is the top vertex, v1 is the bottom vertex, and v2 is the side vertex.
Expand Down Expand Up @@ -1417,8 +1424,8 @@ static void DrawTriangle(const GPUBackendDrawCommand* cmd, const GPUBackendDrawP
const u32 ofi = BoolToUInt32(!right_facing);

TrianglePart triparts[2];
TrianglePart& tpo = triparts[vo];
TrianglePart& tpp = triparts[vo ^ 1];
TrianglePart& RESTRICT tpo = triparts[vo];
TrianglePart& RESTRICT tpp = triparts[vo ^ 1];
tpo.start_y = vertices[0 ^ vo]->y;
tpo.end_y = vertices[1 ^ vo]->y;
tpp.start_y = vertices[1 ^ vp]->y;
Expand Down Expand Up @@ -1469,7 +1476,7 @@ static void DrawTriangle(const GPUBackendDrawCommand* cmd, const GPUBackendDrawP
// Undo the start of the vertex, so that when we add the offset for each line, it starts at the beginning value.
UVStepper uv;
RGBStepper rgb;
const GPUBackendDrawPolygonCommand::Vertex* top_left_vertex = vertices[tl];
const GPUBackendDrawPolygonCommand::Vertex* RESTRICT top_left_vertex = vertices[tl];
if constexpr (texture_enable)
{
uv.Init(top_left_vertex->u, top_left_vertex->v);
Expand Down Expand Up @@ -1542,7 +1549,7 @@ static void FillVRAMImpl(u32 x, u32 y, u32 width, u32 height, u32 color, bool in
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;

u16* row_ptr = &g_vram[row * VRAM_WIDTH + x];
u16* RESTRICT row_ptr = &g_vram[row * VRAM_WIDTH + x];
u32 xoffs = 0;
for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width)
GSVector4i::store<false>(row_ptr, fill);
Expand All @@ -1563,7 +1570,7 @@ static void FillVRAMImpl(u32 x, u32 y, u32 width, u32 height, u32 color, bool in
if ((row & u32(1)) == active_field)
continue;

u16* row_ptr = &g_vram[row * VRAM_WIDTH + x];
u16* RESTRICT row_ptr = &g_vram[row * VRAM_WIDTH + x];
u32 xoffs = 0;
for (; xoffs < aligned_width; xoffs += vector_width, row_ptr += vector_width)
GSVector4i::store<false>(row_ptr, fill);
Expand All @@ -1579,7 +1586,7 @@ static void FillVRAMImpl(u32 x, u32 y, u32 width, u32 height, u32 color, bool in
if ((row & u32(1)) == active_field)
continue;

u16* row_ptr = &g_vram[row * VRAM_WIDTH];
u16* RESTRICT row_ptr = &g_vram[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
Expand All @@ -1593,7 +1600,7 @@ static void FillVRAMImpl(u32 x, u32 y, u32 width, u32 height, u32 color, bool in
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
u16* RESTRICT row_ptr = &g_vram[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
Expand Down Expand Up @@ -1622,7 +1629,7 @@ static void FillVRAMImpl(u32 x, u32 y, u32 width, u32 height, u32 color, bool in
if ((row & u32(1)) == active_field)
continue;

u16* row_ptr = &g_vram[row * VRAM_WIDTH];
u16* RESTRICT row_ptr = &g_vram[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
Expand All @@ -1635,7 +1642,7 @@ static void FillVRAMImpl(u32 x, u32 y, u32 width, u32 height, u32 color, bool in
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
u16* RESTRICT row_ptr = &g_vram[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
Expand All @@ -1646,12 +1653,13 @@ static void FillVRAMImpl(u32 x, u32 y, u32 width, u32 height, u32 color, bool in
#endif
}

static void WriteVRAMImpl(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask)
static void WriteVRAMImpl(u32 x, u32 y, u32 width, u32 height, const void* RESTRICT data, bool set_mask,
bool check_mask)
{
// Fast path when the copy is not oversized.
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask)
{
const u16* src_ptr = static_cast<const u16*>(data);
const u16* RESTRICT src_ptr = static_cast<const u16*>(data);
u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x];
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
Expand All @@ -1664,7 +1672,7 @@ static void WriteVRAMImpl(u32 x, u32 y, u32 width, u32 height, const void* data,
{
// Slow path when we need to handle wrap-around.
// During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or }
const u16* src_ptr = static_cast<const u16*>(data);
const u16* RESTRICT src_ptr = static_cast<const u16*>(data);
const u16 mask_and = check_mask ? 0x8000u : 0x0000u;
const u16 mask_or = set_mask ? 0x8000u : 0x0000u;

Expand Down Expand Up @@ -1713,7 +1721,7 @@ static void WriteVRAMImpl(u32 x, u32 y, u32 width, u32 height, const void* data,
for (; col < width;)
{
// TODO: Handle unaligned reads...
u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
u16* RESTRICT pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
if (((*pixel_ptr) & mask_and) == 0)
*pixel_ptr = *(src_ptr++) | mask_or;
}
Expand Down
Loading

0 comments on commit 26db661

Please sign in to comment.