Skip to content

Commit

Permalink
SIMD: implements comparison instructions
Browse files Browse the repository at this point in the history
Signed-off-by: Takeshi Yoneda <[email protected]>
  • Loading branch information
mathetake committed Jun 3, 2022
1 parent 6e458ac commit 5b86e19
Show file tree
Hide file tree
Showing 19 changed files with 1,862 additions and 17 deletions.
77 changes: 75 additions & 2 deletions internal/asm/amd64/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ const (
CVTSL2SD
// CVTSL2SS is the CVTSI2SS instruction in 32-bit mode. https://www.felixcloutier.com/x86/cvtsi2ss
CVTSL2SS
// CVTSL2SD is the CVTSI2SD instruction in 64-bit mode. https://www.felixcloutier.com/x86/cvtsi2sd
// CVTSQ2SD is the CVTSI2SD instruction in 64-bit mode. https://www.felixcloutier.com/x86/cvtsi2sd
CVTSQ2SD
// CVTSL2SS is the CVTSI2SS instruction in 64-bit mode. https://www.felixcloutier.com/x86/cvtsi2ss
// CVTSQ2SS is the CVTSI2SS instruction in 64-bit mode. https://www.felixcloutier.com/x86/cvtsi2ss
CVTSQ2SS
// CVTSS2SD is the CVTSS2SD instruction. https://www.felixcloutier.com/x86/cvtss2sd
CVTSS2SD
Expand Down Expand Up @@ -409,6 +409,45 @@ const (
PUNPCKLBW
// PUNPCKHBW is the PUNPCKHBW instruction https://www.felixcloutier.com/x86/punpckhbw:punpckhwd:punpckhdq:punpckhqdq
PUNPCKHBW
// CMPPS is the CMPPS instruction https://www.felixcloutier.com/x86/cmpps
CMPPS
// CMPPD is the https://www.felixcloutier.com/x86/cmppd
CMPPD
// PCMPGTQ is the PCMPGTQ instruction https://www.felixcloutier.com/x86/pcmpgtq
PCMPGTQ
// PCMPGTD is the PCMPGTD instruction https://www.felixcloutier.com/x86/pcmpgtb:pcmpgtw:pcmpgtd
PCMPGTD
// PCMPGTW is the PCMPGTW instruction https://www.felixcloutier.com/x86/pcmpgtb:pcmpgtw:pcmpgtd
PCMPGTW
// PCMPGTB is the PCMPGTB instruction https://www.felixcloutier.com/x86/pcmpgtb:pcmpgtw:pcmpgtd
PCMPGTB
// PMINSD is the PMINSD instruction https://www.felixcloutier.com/x86/pminsd:pminsq
PMINSD
// PMINSW is the PMINSW instruction https://www.felixcloutier.com/x86/pminsb:pminsw
PMINSW
// PMINSB is the PMINSB instruction https://www.felixcloutier.com/x86/pminsb:pminsw
PMINSB
// PMAXSD is the PMAXSD instruction https://www.felixcloutier.com/x86/pmaxsb:pmaxsw:pmaxsd:pmaxsq
PMAXSD
// PMAXSW is the PMAXSW instruction https://www.felixcloutier.com/x86/pmaxsb:pmaxsw:pmaxsd:pmaxsq
PMAXSW
// PMAXSB is the PMAXSB instruction https://www.felixcloutier.com/x86/pmaxsb:pmaxsw:pmaxsd:pmaxsq
PMAXSB
// PMINUD is the PMINUD instruction https://www.felixcloutier.com/x86/pminud:pminuq
PMINUD
// PMINUW is the PMINUW instruction https://www.felixcloutier.com/x86/pminub:pminuw
PMINUW
// PMINUB is the PMINUB instruction https://www.felixcloutier.com/x86/pminub:pminuw
PMINUB
// PMAXUD is the PMAXUD instruction https://www.felixcloutier.com/x86/pmaxud:pmaxuq
PMAXUD
// PMAXUW is the PMAXUW instruction https://www.felixcloutier.com/x86/pmaxub:pmaxuw
PMAXUW
// PMAXUB is the PMAXUB instruction https://www.felixcloutier.com/x86/pmaxub:pmaxuw
PMAXUB

// instructionEnd is always placed at the bottom of this iota definition to be used in the test.
instructionEnd
)

// InstructionName returns the name for an instruction
Expand Down Expand Up @@ -784,6 +823,40 @@ func InstructionName(instruction asm.Instruction) string {
return "NEGQ"
case NONE:
return "NONE"
case CMPPS:
return "CMPPS"
case CMPPD:
return "CMPPD"
case PCMPGTQ:
return "PCMPGTQ"
case PCMPGTD:
return "PCMPGTD"
case PMINSD:
return "PMINSD"
case PMAXSD:
return "PMAXSD"
case PMINSW:
return "PMINSW"
case PCMPGTB:
return "PCMPGTB"
case PMINSB:
return "PMINSB"
case PMINUD:
return "PMINUD"
case PMINUW:
return "PMINUW"
case PMINUB:
return "PMINUB"
case PMAXUD:
return "PMAXUD"
case PMAXUW:
return "PMAXUW"
case PMAXUB:
return "PMAXUB"
case PCMPGTW:
return "PCMPGTW"
case PMAXSW:
return "PMAXSW"
}
panic(instruction)
}
Expand Down
15 changes: 15 additions & 0 deletions internal/asm/amd64/consts_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package amd64

import (
"testing"

"github.com/tetratelabs/wazero/internal/asm"
"github.com/tetratelabs/wazero/internal/testing/require"
)

// TestInstructionName ensures that all the instruction's name is defined.
func TestInstructionName(t *testing.T) {
for inst := asm.Instruction(0); inst < instructionEnd; inst++ {
require.NotEqual(t, "", InstructionName(inst))
}
}
37 changes: 37 additions & 0 deletions internal/asm/amd64/impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,43 @@ var registerToRegisterOpcode = map[asm.Instruction]struct {
PUNPCKLBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x60}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/punpckhbw:punpckhwd:punpckhdq:punpckhqdq
PUNPCKHBW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x68}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/cmpps
CMPPS: {opcode: []byte{0x0f, 0xc2}, requireSrcFloat: true, requireDstFloat: true, needArg: true},
// https://www.felixcloutier.com/x86/cmppd
CMPPD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xc2}, requireSrcFloat: true, requireDstFloat: true, needArg: true},
// https://www.felixcloutier.com/x86/pcmpgtq
PCMPGTQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x37}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pcmpgtb:pcmpgtw:pcmpgtd
PCMPGTD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x66}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pcmpgtb:pcmpgtw:pcmpgtd
PCMPGTW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x65}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pcmpgtb:pcmpgtw:pcmpgtd
PCMPGTB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x64}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pminsd:pminsq
PMINSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x39}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pmaxsb:pmaxsw:pmaxsd:pmaxsq
PMAXSD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3d}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pmaxsb:pmaxsw:pmaxsd:pmaxsq
PMAXSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xee}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pmaxsb:pmaxsw:pmaxsd:pmaxsq
PMAXSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3c}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pminsb:pminsw
PMINSW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xea}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pminsb:pminsw
PMINSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x38}, requireSrcFloat: true, requireDstFloat: true},

// https://www.felixcloutier.com/x86/pminud:pminuq
PMINUD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3b}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pminub:pminuw
PMINUW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3a}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pminub:pminuw
PMINUB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xda}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pmaxud:pmaxuq
PMAXUD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3f}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pmaxub:pmaxuw
PMAXUW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x3e}, requireSrcFloat: true, requireDstFloat: true},
// https://www.felixcloutier.com/x86/pmaxub:pmaxuw
PMAXUB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xde}, requireSrcFloat: true, requireDstFloat: true},
}

var RegisterToRegisterShiftOpcode = map[asm.Instruction]struct {
Expand Down
204 changes: 204 additions & 0 deletions internal/asm/amd64/impl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,210 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) {
},
exp: []byte{0x66, 0x45, 0xf, 0xf3, 0xdf},
},
{
name: "cmpeqps xmm11, xmm15",
n: &NodeImpl{
Instruction: CMPPS,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX15,
DstReg: RegX11,
Arg: 0, // CMPPS with arg=0 == CMPEQPS.
},
exp: []byte{0x45, 0xf, 0xc2, 0xdf, 0x0},
},
{
name: "cmpordps xmm1, xmm5",
n: &NodeImpl{
Instruction: CMPPS,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX5,
DstReg: RegX1,
Arg: 7, // CMPPS with arg=7 == CMPORDPS.
},
exp: []byte{0xf, 0xc2, 0xcd, 0x7},
},
{
name: "cmplepd xmm11, xmm15",
n: &NodeImpl{
Instruction: CMPPD,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX15,
DstReg: RegX11,
Arg: 2, // CMPPD with arg=2 == CMPLEPD.
},
exp: []byte{0x66, 0x45, 0xf, 0xc2, 0xdf, 0x2},
},
{
name: "cmpneqpd xmm1, xmm5",
n: &NodeImpl{
Instruction: CMPPD,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX5,
DstReg: RegX1,
Arg: 4, // CMPPD with arg=4 == CMPNEQPD.
},
exp: []byte{0x66, 0xf, 0xc2, 0xcd, 0x4},
},
{
name: "pcmpgtq xmm10, xmm3",
n: &NodeImpl{
Instruction: PCMPGTQ,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX3,
DstReg: RegX10,
},
exp: []byte{0x66, 0x44, 0xf, 0x38, 0x37, 0xd3},
},
{
name: "pcmpgtd xmm10, xmm3",
n: &NodeImpl{
Instruction: PCMPGTD,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX3,
DstReg: RegX10,
},
exp: []byte{0x66, 0x44, 0xf, 0x66, 0xd3},
},
{
name: "pminsd xmm10, xmm3",
n: &NodeImpl{
Instruction: PMINSD,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX3,
DstReg: RegX10,
},
exp: []byte{0x66, 0x44, 0xf, 0x38, 0x39, 0xd3},
},
{
name: "pmaxsd xmm1, xmm12",
n: &NodeImpl{
Instruction: PMAXSD,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX12,
DstReg: RegX1,
},
exp: []byte{0x66, 0x41, 0xf, 0x38, 0x3d, 0xcc},
},
{
name: "pmaxsw xmm1, xmm12",
n: &NodeImpl{
Instruction: PMAXSW,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX12,
DstReg: RegX1,
},
exp: []byte{0x66, 0x41, 0xf, 0x38, 0x3d, 0xcc},
},
{
name: "pminsw xmm1, xmm12",
n: &NodeImpl{
Instruction: PMINSW,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX12,
DstReg: RegX1,
},
exp: []byte{0x66, 0x41, 0xf, 0xea, 0xcc},
},
{
name: "pcmpgtb xmm1, xmm12",
n: &NodeImpl{
Instruction: PCMPGTB,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX12,
DstReg: RegX1,
},
exp: []byte{0x66, 0x41, 0xf, 0x64, 0xcc},
},
{
name: "pminsb xmm1, xmm12",
n: &NodeImpl{
Instruction: PMINSB,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX12,
DstReg: RegX1,
},
exp: []byte{0x66, 0x41, 0xf, 0x38, 0x38, 0xcc},
},
{
name: "pmaxsb xmm1, xmm2",
n: &NodeImpl{
Instruction: PMAXSB,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX2,
DstReg: RegX1,
},
exp: []byte{0x66, 0xf, 0x38, 0x3c, 0xca},
},
{
name: "pminud xmm1, xmm2",
n: &NodeImpl{
Instruction: PMINUD,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX2,
DstReg: RegX1,
},
exp: []byte{0x66, 0xf, 0x38, 0x3b, 0xca},
},
{
name: "pminuw xmm1, xmm2",
n: &NodeImpl{
Instruction: PMINUW,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX2,
DstReg: RegX1,
},
exp: []byte{0x66, 0xf, 0x38, 0x3a, 0xca},
},
{
name: "pminub xmm1, xmm2",
n: &NodeImpl{
Instruction: PMINUB,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX2,
DstReg: RegX1,
},
exp: []byte{0x66, 0xf, 0xda, 0xca},
},
{
name: "pmaxud xmm1, xmm2",
n: &NodeImpl{
Instruction: PMAXUD,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX2,
DstReg: RegX1,
},
exp: []byte{0x66, 0xf, 0x38, 0x3f, 0xca},
},
{
name: "pmaxuw xmm1, xmm2",
n: &NodeImpl{
Instruction: PMAXUW,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX2,
DstReg: RegX1,
},
exp: []byte{0x66, 0xf, 0x38, 0x3e, 0xca},
},
{
name: "pmaxub xmm1, xmm2",
n: &NodeImpl{
Instruction: PMAXUB,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX2,
DstReg: RegX1,
},
exp: []byte{0x66, 0xf, 0xde, 0xca},
},
{
name: "pcmpgtw xmm1, xmm2",
n: &NodeImpl{
Instruction: PCMPGTW,
Types: OperandTypesRegisterToRegister,
SrcReg: RegX2,
DstReg: RegX1,
},
exp: []byte{0x66, 0xf, 0x65, 0xca},
},
}

for _, tt := range tests {
Expand Down
9 changes: 8 additions & 1 deletion internal/asm/arm64/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,9 @@ const (
TBL1
// TBL2 is the TBL instruction whose source is two vectors. https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/TBL--Table-vector-Lookup-
TBL2

// instructionEnd is always placed at the bottom of this iota definition to be used in the test.
instructionEnd
)

// VectorArrangement is the arrangement of data within a vector register.
Expand Down Expand Up @@ -1082,6 +1085,10 @@ func InstructionName(i asm.Instruction) string {
return "CMEQ"
case ADDP:
return "ADDP"
case TBL1:
return "TBL1"
case TBL2:
return "TBL2"
}
panic("unknown instruction")
panic(i)
}
Loading

0 comments on commit 5b86e19

Please sign in to comment.