Skip to content

Commit

Permalink
Merge branch 'Mesa3D:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
alexvorxx authored Feb 7, 2024
2 parents 3ba8eb8 + 31c9e17 commit 6c7fec2
Show file tree
Hide file tree
Showing 81 changed files with 1,408 additions and 316 deletions.
12 changes: 10 additions & 2 deletions meson_options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,21 @@ option(
)

option(
'gallium-windows-dll-name',
'gallium-wgl-dll-name',
type : 'string',
value : 'libgallium_wgl',
description : 'name of gallium megadriver DLL built for Windows. ' +
description : 'name of gallium wgl target DLL built for Windows. ' +
'defaults to libgallium_wgl.dll to match DRI',
)

option(
'gallium-d3d10-dll-name',
type : 'string',
value : 'libgallium_d3d10',
description : 'name of gallium d3d10 target DLL built for Windows. ' +
'defaults to libgallium_d3d10.dll to match DRI',
)

option(
'opencl-spirv',
type : 'boolean',
Expand Down
29 changes: 29 additions & 0 deletions src/amd/compiler/aco_assembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,35 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
out.push_back(encoding);
break;
}
case Format::VOPD: {
VOPD_instruction& vopd = instr->vopd();
uint32_t encoding = (0b110010 << 26);
encoding |= reg(ctx, instr->operands[0]);
if (instr->opcode != aco_opcode::v_dual_mov_b32)
encoding |= reg(ctx, instr->operands[1], 8) << 9;
encoding |= (uint32_t)ctx.opcode[(int)vopd.opy] << 17;
encoding |= opcode << 22;
out.push_back(encoding);

unsigned opy_start = instr->opcode == aco_opcode::v_dual_mov_b32 ? 1 : 2;
switch (instr->opcode) {
case aco_opcode::v_dual_fmac_f32:
case aco_opcode::v_dual_fmaak_f32:
case aco_opcode::v_dual_fmamk_f32:
case aco_opcode::v_dual_cndmask_b32:
case aco_opcode::v_dual_dot2acc_f32_f16:
case aco_opcode::v_dual_dot2acc_f32_bf16: opy_start = 3; break;
default: break;
}

encoding = reg(ctx, instr->operands[opy_start]);
if (vopd.opy != aco_opcode::v_dual_mov_b32)
encoding |= reg(ctx, instr->operands[opy_start + 1], 8) << 9;
encoding |= (reg(ctx, instr->definitions[1], 8) >> 1) << 17;
encoding |= reg(ctx, instr->definitions[0], 8) << 24;
out.push_back(encoding);
break;
}
case Format::DS: {
DS_instruction& ds = instr->ds();
uint32_t encoding = (0b110110 << 26);
Expand Down
1 change: 1 addition & 0 deletions src/amd/compiler/aco_builder_h.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,7 @@ class Builder {
("vopc_sdwa", [Format.VOPC, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2])),
("vop3", [Format.VOP3], 'VALU_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
("vop3p", [Format.VOP3P], 'VALU_instruction', [(1, 2), (1, 3)]),
("vopd", [Format.VOPD], 'VOPD_instruction', [(2, 2), (2, 3), (2, 4), (2, 5), (2, 6)]),
("vinterp_inreg", [Format.VINTERP_INREG], 'VINTERP_inreg_instruction', [(1, 3)]),
("vintrp", [Format.VINTRP], 'VINTRP_instruction', [(1, 2), (1, 3)]),
("vop1_dpp", [Format.VOP1, Format.DPP16], 'DPP16_instruction', [(1, 1)]),
Expand Down
4 changes: 4 additions & 0 deletions src/amd/compiler/aco_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ static const std::array<aco_compiler_statistic_info, aco_num_statistics> statist
ret[aco_statistic_salu] = aco_compiler_statistic_info{"SALU", "Number of SALU instructions"};
ret[aco_statistic_vmem] = aco_compiler_statistic_info{"VMEM", "Number of VMEM instructions"};
ret[aco_statistic_smem] = aco_compiler_statistic_info{"SMEM", "Number of SMEM instructions"};
ret[aco_statistic_vopd] = aco_compiler_statistic_info{"VOPD", "Number of VOPD instructions"};
return ret;
}();

Expand Down Expand Up @@ -199,6 +200,9 @@ aco_postprocess_shader(const struct aco_compiler_options* options,
aco::lower_to_hw_instr(program.get());
validate(program.get());

if (!options->optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_SCHED_VOPD))
aco::schedule_vopd(program.get());

/* Schedule hardware instructions for ILP */
if (!options->optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_SCHED_ILP))
aco::schedule_ilp(program.get());
Expand Down
3 changes: 2 additions & 1 deletion src/amd/compiler/aco_ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,9 @@ static const struct debug_control aco_debug_options[] = {
{"force-waitdeps", DEBUG_FORCE_WAITDEPS},
{"novn", DEBUG_NO_VN},
{"noopt", DEBUG_NO_OPT},
{"nosched", DEBUG_NO_SCHED | DEBUG_NO_SCHED_ILP},
{"nosched", DEBUG_NO_SCHED | DEBUG_NO_SCHED_ILP | DEBUG_NO_SCHED_VOPD},
{"nosched-ilp", DEBUG_NO_SCHED_ILP},
{"nosched-vopd", DEBUG_NO_SCHED_VOPD},
{"perfinfo", DEBUG_PERF_INFO},
{"liveinfo", DEBUG_LIVE_INFO},
{NULL, 0}};
Expand Down
23 changes: 22 additions & 1 deletion src/amd/compiler/aco_ir.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ enum {
DEBUG_FORCE_WAITDEPS = 0x200,
DEBUG_NO_VALIDATE_IR = 0x400,
DEBUG_NO_SCHED_ILP = 0x800,
DEBUG_NO_SCHED_VOPD = 0x1000,
};

enum storage_class : uint8_t {
Expand Down Expand Up @@ -957,6 +958,7 @@ struct Pseudo_reduction_instruction;
struct VALU_instruction;
struct VINTERP_inreg_instruction;
struct VINTRP_instruction;
struct VOPD_instruction;
struct DPP16_instruction;
struct DPP8_instruction;
struct SDWA_instruction;
Expand Down Expand Up @@ -1210,6 +1212,17 @@ struct Instruction {
return *(VINTERP_inreg_instruction*)this;
}
constexpr bool isVINTERP_INREG() const noexcept { return format == Format::VINTERP_INREG; }
VOPD_instruction& vopd() noexcept
{
assert(isVOPD());
return *(VOPD_instruction*)this;
}
const VOPD_instruction& vopd() const noexcept
{
assert(isVOPD());
return *(VOPD_instruction*)this;
}
constexpr bool isVOPD() const noexcept { return format == Format::VOPD; }
constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
Expand Down Expand Up @@ -1278,7 +1291,8 @@ struct Instruction {
}
constexpr bool isVALU() const noexcept
{
return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P() || isVINTERP_INREG();
return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P() || isVINTERP_INREG() ||
isVOPD();
}

constexpr bool isSALU() const noexcept
Expand Down Expand Up @@ -1368,6 +1382,12 @@ struct VINTERP_inreg_instruction : public VALU_instruction {
static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
"Unexpected padding");

struct VOPD_instruction : public VALU_instruction {
aco_opcode opy;
uint16_t padding;
};
static_assert(sizeof(VOPD_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");

/**
* Data Parallel Primitives Format:
* This format can be used for VOP1, VOP2 or VOPC instructions.
Expand Down Expand Up @@ -2209,6 +2229,7 @@ void ssa_elimination(Program* program);
void lower_to_hw_instr(Program* program);
void schedule_program(Program* program, live& live_vars);
void schedule_ilp(Program* program);
void schedule_vopd(Program* program);
void spill(Program* program, live& live_vars);
void insert_wait_states(Program* program);
bool dealloc_vgprs(Program* program);
Expand Down
26 changes: 26 additions & 0 deletions src/amd/compiler/aco_opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class Format(IntEnum):
VINTRP = auto()
# Vector ALU Formats
VINTERP_INREG = auto()
VOPD = auto()
VOP1 = 1 << 7
VOP2 = 1 << 8
VOPC = 1 << 9
Expand Down Expand Up @@ -186,6 +187,8 @@ def get_builder_fields(self):
elif self == Format.VOP3P:
return [('uint8_t', 'opsel_lo', None),
('uint8_t', 'opsel_hi', None)]
elif self == Format.VOPD:
return [('aco_opcode', 'opy', None)]
elif self == Format.VINTERP_INREG:
return [('unsigned', 'wait_exp', 7),
('uint8_t', 'opsel', 0)]
Expand Down Expand Up @@ -1272,6 +1275,29 @@ def default_class(opcodes, cls):
opcode(name, gfx7, gfx9, gfx10, gfx11, Format.VOP3, cls, in_mod, out_mod, definitions = defs, operands = ops)


VOPD = {
(0x00, "v_dual_fmac_f32"),
(0x01, "v_dual_fmaak_f32"),
(0x02, "v_dual_fmamk_f32"),
(0x03, "v_dual_mul_f32"),
(0x04, "v_dual_add_f32"),
(0x05, "v_dual_sub_f32"),
(0x06, "v_dual_subrev_f32"),
(0x07, "v_dual_mul_dx9_zero_f32"),
(0x08, "v_dual_mov_b32"),
(0x09, "v_dual_cndmask_b32"),
(0x0a, "v_dual_max_f32"),
(0x0b, "v_dual_min_f32"),
(0x0c, "v_dual_dot2acc_f32_f16"),
(0x0d, "v_dual_dot2acc_f32_bf16"),
(0x10, "v_dual_add_nc_u32"),
(0x11, "v_dual_lshlrev_b32"),
(0x12, "v_dual_and_b32"),
}
for gfx11, name in VOPD:
opcode(name, -1, -1, -1, gfx11, format = Format.VOPD, cls = InstrClass.Valu32)


# DS instructions: 3 inputs (1 addr, 2 data), 1 output
DS = {
(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),
Expand Down
6 changes: 6 additions & 0 deletions src/amd/compiler/aco_print_ir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,12 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
fprintf(output, " attr%d.%c", vintrp.attribute, "xyzw"[vintrp.component]);
break;
}
case Format::VOPD: {
const VOPD_instruction& vopd = instr->vopd();
// TODO: beautify
fprintf(output, " %s", instr_info.name[(int)vopd.opy]);
break;
}
case Format::DS: {
const DS_instruction& ds = instr->ds();
if (ds.offset0)
Expand Down
Loading

0 comments on commit 6c7fec2

Please sign in to comment.