-
Notifications
You must be signed in to change notification settings - Fork 2.2k
/
GPUCommon.h
308 lines (256 loc) · 8.95 KB
/
GPUCommon.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#pragma once
#include "Common/Common.h"
#include "Common/MemoryUtil.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
#include "GPU/Common/GPUDebugInterface.h"
#if defined(__ANDROID__)
#include <atomic>
#endif
#if defined(_M_SSE)
#include <emmintrin.h>
#endif
class FramebufferManagerCommon;
class TextureCacheCommon;
class DrawEngineCommon;
class GraphicsContext;
namespace Draw {
class DrawContext;
}
enum DrawType {
DRAW_UNKNOWN,
DRAW_PRIM,
DRAW_SPLINE,
DRAW_BEZIER,
};
enum {
FLAG_FLUSHBEFORE = 1,
FLAG_FLUSHBEFOREONCHANGE = 2,
FLAG_EXECUTE = 4, // needs to actually be executed. unused for now.
FLAG_EXECUTEONCHANGE = 8,
FLAG_READS_PC = 16,
FLAG_WRITES_PC = 32,
FLAG_DIRTYONCHANGE = 64, // NOTE: Either this or FLAG_EXECUTE*, not both!
};
class GPUCommon : public GPUInterface, public GPUDebugInterface {
public:
GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw);
virtual ~GPUCommon();
Draw::DrawContext *GetDrawContext() override {
return draw_;
}
void Reinitialize() override;
void BeginHostFrame() override;
void EndHostFrame() override;
void InterruptStart(int listid) override;
void InterruptEnd(int listid) override;
void SyncEnd(GPUSyncType waitType, int listid, bool wokeThreads) override;
void EnableInterrupts(bool enable) override {
interruptsEnabled_ = enable;
}
void Resized() override;
void DumpNextFrame() override;
void ExecuteOp(u32 op, u32 diff) override;
void PreExecuteOp(u32 op, u32 diff) override;
bool InterpretList(DisplayList &list) override;
void ProcessDLQueue();
u32 UpdateStall(int listid, u32 newstall) override;
u32 EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head) override;
u32 DequeueList(int listid) override;
int ListSync(int listid, int mode) override;
u32 DrawSync(int mode) override;
int GetStack(int index, u32 stackPtr) override;
void DoState(PointerWrap &p) override;
bool BusyDrawing() override;
u32 Continue() override;
u32 Break(int mode) override;
void ReapplyGfxState() override;
void CopyDisplayToOutput() override = 0;
void InitClear() override = 0;
bool PerformMemoryCopy(u32 dest, u32 src, int size) override;
bool PerformMemorySet(u32 dest, u8 v, int size) override;
bool PerformMemoryDownload(u32 dest, int size) override;
bool PerformMemoryUpload(u32 dest, int size) override;
void InvalidateCache(u32 addr, int size, GPUInvalidationType type) override;
void NotifyVideoUpload(u32 addr, int size, int width, int format) override;
bool PerformStencilUpload(u32 dest, int size) override;
void Execute_OffsetAddr(u32 op, u32 diff);
void Execute_Vaddr(u32 op, u32 diff);
void Execute_Iaddr(u32 op, u32 diff);
void Execute_Origin(u32 op, u32 diff);
void Execute_Jump(u32 op, u32 diff);
void Execute_BJump(u32 op, u32 diff);
void Execute_Call(u32 op, u32 diff);
void Execute_Ret(u32 op, u32 diff);
void Execute_End(u32 op, u32 diff);
void Execute_VertexType(u32 op, u32 diff);
void Execute_VertexTypeSkinning(u32 op, u32 diff);
void Execute_Bezier(u32 op, u32 diff);
void Execute_Spline(u32 op, u32 diff);
void Execute_BoundingBox(u32 op, u32 diff);
void Execute_BlockTransferStart(u32 op, u32 diff);
void Execute_TexSize0(u32 op, u32 diff);
void Execute_TexLevel(u32 op, u32 diff);
void Execute_WorldMtxNum(u32 op, u32 diff);
void Execute_WorldMtxData(u32 op, u32 diff);
void Execute_ViewMtxNum(u32 op, u32 diff);
void Execute_ViewMtxData(u32 op, u32 diff);
void Execute_ProjMtxNum(u32 op, u32 diff);
void Execute_ProjMtxData(u32 op, u32 diff);
void Execute_TgenMtxNum(u32 op, u32 diff);
void Execute_TgenMtxData(u32 op, u32 diff);
void Execute_BoneMtxNum(u32 op, u32 diff);
void Execute_BoneMtxData(u32 op, u32 diff);
void Execute_MorphWeight(u32 op, u32 diff);
void Execute_Unknown(u32 op, u32 diff);
int EstimatePerVertexCost();
// Note: Not virtual!
void Flush();
#ifdef USE_CRT_DBG
#undef new
#endif
void *operator new(size_t s) {
return AllocateAlignedMemory(s, 16);
}
void operator delete(void *p) {
FreeAlignedMemory(p);
}
#ifdef USE_CRT_DBG
#define new DBG_NEW
#endif
// From GPUDebugInterface.
bool GetCurrentDisplayList(DisplayList &list) override;
bool GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) override;
bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer) override;
bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer) override;
bool GetCurrentTexture(GPUDebugBuffer &buffer, int level) override;
bool GetCurrentClut(GPUDebugBuffer &buffer) override;
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) override;
bool GetOutputFramebuffer(GPUDebugBuffer &buffer) override;
std::vector<std::string> DebugGetShaderIDs(DebugShaderType shader) override { return std::vector<std::string>(); };
std::string DebugGetShaderString(std::string id, DebugShaderType shader, DebugShaderStringType stringType) override {
return "N/A";
}
bool DescribeCodePtr(const u8 *ptr, std::string &name) override;
std::vector<DisplayList> ActiveDisplayLists() override;
void ResetListPC(int listID, u32 pc) override;
void ResetListStall(int listID, u32 stall) override;
void ResetListState(int listID, DisplayListState state) override;
GPUDebugOp DissassembleOp(u32 pc, u32 op) override;
std::vector<GPUDebugOp> DissassembleOpRange(u32 startpc, u32 endpc) override;
void NotifySteppingEnter() override;
void NotifySteppingExit() override;
u32 GetRelativeAddress(u32 data) override;
u32 GetVertexAddress() override;
u32 GetIndexAddress() override;
GPUgstate GetGState() override;
void SetCmdValue(u32 op) override;
void UpdateUVScaleOffset() {
#ifdef _M_SSE
__m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *)&gstate.texscaleu), 8);
_mm_storeu_si128((__m128i *)&gstate_c.uv, values);
#elif PPSSPP_PLATFORM(ARM_NEON)
const uint32x4_t values = vshlq_n_u32(vld1q_u32(&gstate.texscaleu), 8);
vst1q_u32(&gstate_c.uv, values);
#else
gstate_c.uv.uScale = getFloat24(gstate.texscaleu);
gstate_c.uv.vScale = getFloat24(gstate.texscalev);
gstate_c.uv.uOff = getFloat24(gstate.texoffsetu);
gstate_c.uv.vOff = getFloat24(gstate.texoffsetv);
#endif
}
DisplayList* getList(int listid) override {
return &dls[listid];
}
const std::list<int>& GetDisplayLists() override {
return dlQueue;
}
bool DecodeTexture(u8* dest, const GPUgstate &state) override {
return false;
}
std::vector<FramebufferInfo> GetFramebufferList() override;
void ClearShaderCache() override {}
void CleanupBeforeUI() override {}
s64 GetListTicks(int listid) override {
if (listid >= 0 && listid < DisplayListMaxCount) {
return dls[listid].waitTicks;
}
return -1;
}
typedef void (GPUCommon::*CmdFunc)(u32 op, u32 diff);
protected:
void SetDrawType(DrawType type, GEPrimitiveType prim) {
if (type != lastDraw_) {
// We always flush when drawing splines/beziers so no need to do so here
gstate_c.Dirty(DIRTY_UVSCALEOFFSET | DIRTY_VERTEXSHADER_STATE);
lastDraw_ = type;
}
// Prim == RECTANGLES can cause CanUseHardwareTransform to flip, so we need to dirty.
// Also, culling may be affected so dirty the raster state.
if ((prim == GE_PRIM_RECTANGLES) != (lastPrim_ == GE_PRIM_RECTANGLES)) {
Flush();
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE);
lastPrim_ = prim;
}
}
void BeginFrame() override;
// To avoid virtual calls to PreExecuteOp().
virtual void FastRunLoop(DisplayList &list) = 0;
void SlowRunLoop(DisplayList &list);
void UpdatePC(u32 currentPC, u32 newPC);
void UpdateState(GPURunState state);
void PopDLQueue();
void CheckDrawSync();
int GetNextListIndex();
virtual void FastLoadBoneMatrix(u32 target);
// TODO: Unify this.
virtual void FinishDeferred() {}
void DoBlockTransfer(u32 skipDrawReason);
void AdvanceVerts(u32 vertType, int count, int bytesRead) {
if ((vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
int indexShift = ((vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
gstate_c.indexAddr += count << indexShift;
} else {
gstate_c.vertexAddr += bytesRead;
}
}
FramebufferManagerCommon *framebufferManager_;
TextureCacheCommon *textureCache_;
DrawEngineCommon *drawEngineCommon_;
ShaderManagerCommon *shaderManager_;
GraphicsContext *gfxCtx_;
Draw::DrawContext *draw_;
typedef std::list<int> DisplayListQueue;
int nextListID;
DisplayList dls[DisplayListMaxCount];
DisplayList *currentList;
DisplayListQueue dlQueue;
bool interruptRunning;
GPURunState gpuState;
bool isbreak;
u64 drawCompleteTicks;
u64 busyTicks;
int downcount;
u64 startingTicks;
u32 cycleLastPC;
int cyclesExecuted;
bool dumpNextFrame_;
bool dumpThisFrame_;
bool debugRecording_;
bool interruptsEnabled_;
bool resized_;
DrawType lastDraw_;
GEPrimitiveType lastPrim_;
private:
// Debug stats.
double timeSteppingStarted_;
double timeSpentStepping_;
};
struct CommonCommandTableEntry {
uint8_t cmd;
uint8_t flags;
uint64_t dirty;
GPUCommon::CmdFunc func;
};
extern const CommonCommandTableEntry commonCommandTable[];
extern size_t commonCommandTableSize;