Skip to content

Commit

Permalink
Merge pull request #9226 from hrydgard/cleaned-up-hw-tess
Browse files Browse the repository at this point in the history
Cleaned up branch of Xebra's hw tess
  • Loading branch information
hrydgard authored Jan 23, 2017
2 parents b0697b6 + f111eed commit ce2feb2
Show file tree
Hide file tree
Showing 25 changed files with 981 additions and 120 deletions.
1 change: 1 addition & 0 deletions Core/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ static ConfigSetting graphicsSettings[] = {
// Not really a graphics setting...
ReportedConfigSetting("TimerHack", &g_Config.bTimerHack, &DefaultTimerHack, true, true),
ReportedConfigSetting("SplineBezierQuality", &g_Config.iSplineBezierQuality, 2, true, true),
ReportedConfigSetting("HardwareTessellation", &g_Config.bHardwareTessellation, false, true, true),
ReportedConfigSetting("PostShader", &g_Config.sPostShaderName, "Off", true, true),

ReportedConfigSetting("MemBlockTransferGPU", &g_Config.bBlockTransferGPU, true, true, true),
Expand Down
1 change: 1 addition & 0 deletions Core/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ struct Config {
bool bDisableSlowFramebufEffects;
bool bFragmentTestCache;
int iSplineBezierQuality; // 0 = low , 1 = Intermediate , 2 = High
bool bHardwareTessellation;
std::string sPostShaderName; // Off for off.
bool bGfxDebugOutput;

Expand Down
13 changes: 13 additions & 0 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,17 @@ class DrawEngineCommon {

// Fixed index buffer for easy quad generation from spline/bezier
u16 *quadIndices_;

// Hardware tessellation
int numPatches;
class TessellationDataTransfer {
protected:
int prevSize;
int prevSizeTex;
int prevSizeCol;
public:
// Send spline/bezier's control points to vertex shader through floating point texture.
virtual void SendDataToShader(const float *pos, const float *tex, const float *col, int size, bool hasColor, bool hasTexCoords) = 0;
};
TessellationDataTransfer *tessDataTransfer;
};
20 changes: 20 additions & 0 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ std::string VertexShaderDesc(const ShaderID &id) {
if (id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2)) desc << "WScale " << id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2) << " ";
if (id.Bit(VS_BIT_FLATSHADE)) desc << "Flat ";

if (id.Bit(VS_BIT_BEZIER)) desc << "Bezier ";
if (id.Bit(VS_BIT_SPLINE)) desc << "Spline ";
if (id.Bit(VS_BIT_HAS_COLOR_TESS)) desc << "TessC ";
if (id.Bit(VS_BIT_HAS_TEXCOORD_TESS)) desc << "TessT ";
if (id.Bit(VS_BIT_NORM_REVERSE_TESS)) desc << "TessRevN ";

// TODO: More...

return desc.str();
Expand All @@ -59,6 +65,12 @@ void ComputeVertexShaderID(ShaderID *id_out, u32 vertType, bool useHWTransform)
bool hasColor = (vertType & GE_VTYPE_COL_MASK) != 0;
bool hasNormal = (vertType & GE_VTYPE_NRM_MASK) != 0;
bool hasTexcoord = (vertType & GE_VTYPE_TC_MASK) != 0;

bool doBezier = gstate_c.bezier;
bool doSpline = gstate_c.spline;
bool hasColorTess = (gstate.vertType & GE_VTYPE_COL_MASK) != 0 && (doBezier || doSpline);
bool hasTexcoordTess = (gstate.vertType & GE_VTYPE_TC_MASK) != 0 && (doBezier || doSpline);

bool enableFog = gstate.isFogEnabled() && !gstate.isModeThrough() && !gstate.isModeClear();
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
// lmode: && !isModeThrough!?
Expand Down Expand Up @@ -119,6 +131,14 @@ void ComputeVertexShaderID(ShaderID *id_out, u32 vertType, bool useHWTransform)

id.SetBit(VS_BIT_NORM_REVERSE, gstate.areNormalsReversed());
id.SetBit(VS_BIT_HAS_TEXCOORD, hasTexcoord);

if (g_Config.bHardwareTessellation) {
id.SetBit(VS_BIT_BEZIER, doBezier);
id.SetBit(VS_BIT_SPLINE, doSpline);
id.SetBit(VS_BIT_HAS_COLOR_TESS, hasColorTess);
id.SetBit(VS_BIT_HAS_TEXCOORD_TESS, hasTexcoordTess);
id.SetBit(VS_BIT_NORM_REVERSE_TESS, gstate.isPatchNormalsReversed());
}
}

id.SetBit(VS_BIT_FLATSHADE, doFlatShading);
Expand Down
12 changes: 9 additions & 3 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ enum {
VS_BIT_USE_HW_TRANSFORM = 8,
VS_BIT_HAS_NORMAL = 9, // conditioned on hw transform
VS_BIT_NORM_REVERSE = 10,
VS_BIT_HAS_TEXCOORD = 11, // 5 free after
VS_BIT_HAS_TEXCOORD = 11,
VS_BIT_HAS_COLOR_TESS = 12, // 1 bit
VS_BIT_HAS_TEXCOORD_TESS = 13, // 1 bit
VS_BIT_NORM_REVERSE_TESS = 14, // 1 bit 1 free after
VS_BIT_UVGEN_MODE = 16,
VS_BIT_UVPROJ_MODE = 18, // 2, can overlap with LS0
VS_BIT_LS0 = 18, // 2
Expand All @@ -30,14 +33,17 @@ enum {
VS_BIT_LIGHT2_TYPE = 42, // 2 bits
VS_BIT_LIGHT3_COMP = 44, // 2 bits
VS_BIT_LIGHT3_TYPE = 46, // 2 bits
VS_BIT_MATERIAL_UPDATE = 48, // 3 bits, 1 free after
VS_BIT_MATERIAL_UPDATE = 48, // 3 bits
VS_BIT_SPLINE = 51, // 1 bit
VS_BIT_LIGHT0_ENABLE = 52,
VS_BIT_LIGHT1_ENABLE = 53,
VS_BIT_LIGHT2_ENABLE = 54,
VS_BIT_LIGHT3_ENABLE = 55,
VS_BIT_LIGHTING_ENABLE = 56,
VS_BIT_WEIGHT_FMTSCALE = 57, // only two bits, 1 free after
VS_BIT_FLATSHADE = 62, // 1 free after
VS_BIT_FLATSHADE = 62, // 1 bit
VS_BIT_BEZIER = 63, // 1 bit
// No more free
};


Expand Down
216 changes: 162 additions & 54 deletions GPU/Common/SplineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,41 @@ static void spline_knot(int n, int type, float *knot) {
}
}

// Prepare mesh of one patch for "Instanced Tessellation".
static void TessellateSplinePatchHardware(u8 *&dest, u16 *indices, int &count, const SplinePatchLocal &spatch) {
SimpleVertex *&vertices = (SimpleVertex*&)dest;

float inv_u = 1.0f / (float)spatch.tess_u;
float inv_v = 1.0f / (float)spatch.tess_v;

// Generating simple input vertices for the spline-computing vertex shader.
for (int tile_v = 0; tile_v < spatch.tess_v + 1; ++tile_v) {
for (int tile_u = 0; tile_u < spatch.tess_u + 1; ++tile_u) {
SimpleVertex &vert = vertices[tile_v * (spatch.tess_u + 1) + tile_u];
vert.pos.x = (float)tile_u * inv_u;
vert.pos.y = (float)tile_v * inv_v;

// TODO: Move to shader uniform and unify this method spline and bezier if necessary.
// For compute normal
vert.nrm.x = inv_u;
vert.nrm.y = inv_v;
}
}

// Combine the vertices into triangles.
for (int tile_v = 0; tile_v < spatch.tess_v; ++tile_v) {
for (int tile_u = 0; tile_u < spatch.tess_u; ++tile_u) {
int idx0 = tile_v * (spatch.tess_u + 1) + tile_u;
int idx1 = tile_v * (spatch.tess_u + 1) + tile_u + 1;
int idx2 = (tile_v + 1) * (spatch.tess_u + 1) + tile_u;
int idx3 = (tile_v + 1) * (spatch.tess_u + 1) + tile_u + 1;

CopyQuadIndex(indices, spatch.primType, idx0, idx1, idx2, idx3);
count += 6;
}
}
}

static void _SplinePatchLowQuality(u8 *&dest, u16 *indices, int &count, const SplinePatchLocal &spatch, u32 origVertType) {
// Fast and easy way - just draw the control points, generate some very basic normal vector substitutes.
// Very inaccurate but okay for Loco Roco. Maybe should keep it as an option because it's fast.
Expand Down Expand Up @@ -767,6 +802,37 @@ static void _BezierPatchHighQuality(u8 *&dest, u16 *&indices, int &count, int te
dest += (tess_u + 1) * (tess_v + 1) * sizeof(SimpleVertex);
}

// Prepare mesh of one patch for "Instanced Tessellation".
static void TesselateBezierPatchHardware(u8 *&dest, u16 *indices, int &count, int tess_u, int tess_v, GEPatchPrimType primType) {
SimpleVertex *&vertices = (SimpleVertex*&)dest;

float inv_u = 1.0f / (float)tess_u;
float inv_v = 1.0f / (float)tess_v;

// Generating simple input vertices for the bezier-computing vertex shader.
for (int tile_v = 0; tile_v < tess_v + 1; ++tile_v) {
for (int tile_u = 0; tile_u < tess_u + 1; ++tile_u) {
SimpleVertex &vert = vertices[tile_v * (tess_u + 1) + tile_u];

vert.pos.x = (float)tile_u * inv_u;
vert.pos.y = (float)tile_v * inv_v;
}
}

// Combine the vertices into triangles.
for (int tile_v = 0; tile_v < tess_v; ++tile_v) {
for (int tile_u = 0; tile_u < tess_u; ++tile_u) {
int idx0 = tile_v * (tess_u + 1) + tile_u;
int idx1 = tile_v * (tess_u + 1) + tile_u + 1;
int idx2 = (tile_v + 1) * (tess_u + 1) + tile_u;
int idx3 = (tile_v + 1) * (tess_u + 1) + tile_u + 1;

CopyQuadIndex(indices, primType, idx0, idx1, idx2, idx3);
count += 6;
}
}
}

void TesselateBezierPatch(u8 *&dest, u16 *&indices, int &count, int tess_u, int tess_v, const BezierPatch &patch, u32 origVertType, int maxVertices) {
switch (g_Config.iSplineBezierQuality) {
case LOW_QUALITY:
Expand All @@ -781,6 +847,31 @@ void TesselateBezierPatch(u8 *&dest, u16 *&indices, int &count, int tess_u, int
}
}

class IndexConverter {
private:
union {
const void *indices;
const u8 *indices8;
const u16 *indices16;
const u32 *indices32;
};
u32 indexType;
public:
IndexConverter(u32 vertType, const void *indices) : indices(indices), indexType(vertType & GE_VTYPE_IDX_MASK) {}

inline u32 convert(u32 index) const {
switch (indexType) {
case GE_VTYPE_IDX_8BIT:
return indices8[index];
case GE_VTYPE_IDX_16BIT:
return indices16[index];
case GE_VTYPE_IDX_32BIT:
return indices32[index];
}
return index;
}
};

// This maps GEPatchPrimType to GEPrimitiveType.
const GEPrimitiveType primType[] = { GE_PRIM_TRIANGLES, GE_PRIM_LINES, GE_PRIM_POINTS, GE_PRIM_POINTS };

Expand All @@ -790,11 +881,7 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi

u16 index_lower_bound = 0;
u16 index_upper_bound = count_u * count_v - 1;
bool indices_16bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT;
bool indices_32bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_32BIT;
const u8 *indices8 = (const u8 *)indices;
const u16 *indices16 = (const u16 *)indices;
const u32 *indices32 = (const u32 *)indices;
IndexConverter idxConv(vertType, indices);
if (indices)
GetIndexBounds(indices, count_u * count_v, vertType, &index_lower_bound, &index_upper_bound);

Expand Down Expand Up @@ -825,19 +912,7 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi

// Make an array of pointers to the control points, to get rid of indices.
for (int idx = 0; idx < count_u * count_v; idx++) {
if (indices) {
u32 ind;
if (indices_32bit) {
ind = indices32[idx];
} else if (indices_16bit) {
ind = indices16[idx];
} else {
ind = indices8[idx];
}
points[idx] = simplified_control_points + ind;
} else {
points[idx] = simplified_control_points + idx;
}
points[idx] = simplified_control_points + (indices ? idxConv.convert(idx) : idx);
}

int count = 0;
Expand All @@ -856,9 +931,30 @@ void DrawEngineCommon::SubmitSpline(const void *control_points, const void *indi
patch.primType = prim_type;
patch.patchFacing = patchFacing;

int maxVertexCount = SPLINE_BUFFER_SIZE / vertexSize;
TesselateSplinePatch(dest, quadIndices_, count, patch, origVertType, maxVertexCount);
if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) {
float *pos = (float*)(decoded + 65536 * 18); // Size 3 float
float *tex = pos + count_u * count_v * 3; // Size 3 float
float *col = tex + count_u * count_v * 3; // Size 4 float
const bool hasColor = (origVertType & GE_VTYPE_COL_MASK) != 0;
const bool hasTexCoords = (origVertType & GE_VTYPE_TC_MASK) != 0;

for (int idx = 0; idx < count_u * count_v; idx++) {
memcpy(pos + idx * 3, points[idx]->pos.AsArray(), 3 * sizeof(float));
if (hasTexCoords)
memcpy(tex + idx * 3, points[idx]->uv, 2 * sizeof(float));
if (hasColor)
memcpy(col + idx * 4, Vec4f::FromRGBA(points[idx]->color_32).AsArray(), 4 * sizeof(float));
}
if (!hasColor)
memcpy(col, Vec4f::FromRGBA(points[0]->color_32).AsArray(), 4 * sizeof(float));

tessDataTransfer->SendDataToShader(pos, tex, col, count_u * count_v, hasColor, hasTexCoords);
TessellateSplinePatchHardware(dest, quadIndices_, count, patch);
numPatches = (count_u - 3) * (count_v - 3);
} else {
int maxVertexCount = SPLINE_BUFFER_SIZE / vertexSize;
TesselateSplinePatch(dest, quadIndices_, count, patch, origVertType, maxVertexCount);
}
delete[] points;

u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT;
Expand Down Expand Up @@ -890,11 +986,7 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi

u16 index_lower_bound = 0;
u16 index_upper_bound = count_u * count_v - 1;
bool indices_16bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT;
bool indices_32bit = (vertType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_32BIT;
const u8 *indices8 = (const u8 *)indices;
const u16 *indices16 = (const u16 *)indices;
const u32 *indices32 = (const u32 *)indices;
IndexConverter idxConv(vertType, indices);
if (indices)
GetIndexBounds(indices, count_u*count_v, vertType, &index_lower_bound, &index_upper_bound);

Expand Down Expand Up @@ -922,35 +1014,45 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
ERROR_LOG(G3D, "Something went really wrong, vertex size: %i vs %i", vertexSize, (int)sizeof(SimpleVertex));
}

float *pos = (float*)(decoded + 65536 * 18); // Size 3 float
float *tex = pos + count_u * count_v * 3; // Size 3 float
float *col = tex + count_u * count_v * 3; // Size 4 float
const bool hasColor = (origVertType & GE_VTYPE_COL_MASK) != 0;
const bool hasTexCoords = (origVertType & GE_VTYPE_TC_MASK) != 0;

// Bezier patches share less control points than spline patches. Otherwise they are pretty much the same (except bezier don't support the open/close thing)
int num_patches_u = (count_u - 1) / 3;
int num_patches_v = (count_v - 1) / 3;
BezierPatch *patches = new BezierPatch[num_patches_u * num_patches_v];
for (int patch_u = 0; patch_u < num_patches_u; patch_u++) {
for (int patch_v = 0; patch_v < num_patches_v; patch_v++) {
BezierPatch& patch = patches[patch_u + patch_v * num_patches_u];
for (int point = 0; point < 16; ++point) {
int idx = (patch_u * 3 + point % 4) + (patch_v * 3 + point / 4) * count_u;
if (indices) {
u32 ind;
if (indices_32bit) {
ind = indices32[idx];
} else if (indices_16bit) {
ind = indices16[idx];
} else {
ind = indices8[idx];
}
patch.points[point] = simplified_control_points + ind;
} else {
patch.points[point] = simplified_control_points + idx;
BezierPatch *patches;
if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) {
for (int idx = 0; idx < count_u * count_v; idx++) {
SimpleVertex *point = simplified_control_points + (indices ? idxConv.convert(idx) : idx);
memcpy(pos + idx * 3, point->pos.AsArray(), 3 * sizeof(float));
if (hasTexCoords)
memcpy(tex + idx * 3, point->uv, 2 * sizeof(float));
if (hasColor)
memcpy(col + idx * 4, Vec4f::FromRGBA(point->color_32).AsArray(), 4 * sizeof(float));
}
if (!hasColor) {
SimpleVertex *point = simplified_control_points + (indices ? idxConv.convert(0) : 0);
memcpy(col, Vec4f::FromRGBA(point->color_32).AsArray(), 4 * sizeof(float));
}
} else {
patches = new BezierPatch[num_patches_u * num_patches_v];
for (int patch_u = 0; patch_u < num_patches_u; patch_u++) {
for (int patch_v = 0; patch_v < num_patches_v; patch_v++) {
BezierPatch& patch = patches[patch_u + patch_v * num_patches_u];
for (int point = 0; point < 16; ++point) {
int idx = (patch_u * 3 + point % 4) + (patch_v * 3 + point / 4) * count_u;
patch.points[point] = simplified_control_points + (indices ? idxConv.convert(idx) : idx);
}
patch.u_index = patch_u * 3;
patch.v_index = patch_v * 3;
patch.index = patch_v * num_patches_u + patch_u;
patch.primType = prim_type;
patch.computeNormals = computeNormals;
patch.patchFacing = patchFacing;
}
patch.u_index = patch_u * 3;
patch.v_index = patch_v * 3;
patch.index = patch_v * num_patches_u + patch_u;
patch.primType = prim_type;
patch.computeNormals = computeNormals;
patch.patchFacing = patchFacing;
}
}

Expand All @@ -969,12 +1071,18 @@ void DrawEngineCommon::SubmitBezier(const void *control_points, const void *indi
}

u16 *inds = quadIndices_;
int maxVertices = SPLINE_BUFFER_SIZE / vertexSize;
for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) {
const BezierPatch &patch = patches[patch_idx];
TesselateBezierPatch(dest, inds, count, tess_u, tess_v, patch, origVertType, maxVertices);
if (g_Config.bHardwareTessellation && g_Config.bHardwareTransform && !g_Config.bSoftwareRendering) {
tessDataTransfer->SendDataToShader(pos, tex, col, count_u * count_v, hasColor, hasTexCoords);
TesselateBezierPatchHardware(dest, inds, count, tess_u, tess_v, prim_type);
numPatches = num_patches_u * num_patches_v;
} else {
int maxVertices = SPLINE_BUFFER_SIZE / vertexSize;
for (int patch_idx = 0; patch_idx < num_patches_u*num_patches_v; ++patch_idx) {
const BezierPatch &patch = patches[patch_idx];
TesselateBezierPatch(dest, inds, count, tess_u, tess_v, patch, origVertType, maxVertices);
}
delete[] patches;
}
delete[] patches;

u32 vertTypeWithIndex16 = (vertType & ~GE_VTYPE_IDX_MASK) | GE_VTYPE_IDX_16BIT;

Expand Down
Loading

0 comments on commit ce2feb2

Please sign in to comment.