Skip to content

Commit

Permalink
arm64: implement vector comparisons (#632)
Browse files Browse the repository at this point in the history
Signed-off-by: Takeshi Yoneda <[email protected]>
  • Loading branch information
mathetake authored Jun 17, 2022
1 parent 8b8b411 commit cd00799
Show file tree
Hide file tree
Showing 6 changed files with 334 additions and 31 deletions.
28 changes: 28 additions & 0 deletions internal/asm/arm64/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,20 @@ const (
SSHR
// EXT is the EXT instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/EXT--Extract-vector-from-pair-of-vectors-?lang=en
EXT
// CMGT is the CMGT(register) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGT--register---Compare-signed-Greater-than--vector--?lang=en
CMGT
// CMHI is the CMHI(register) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHI--register---Compare-unsigned-Higher--vector--?lang=en
CMHI
// CMGE is the CMGE(register) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGE--register---Compare-signed-Greater-than-or-Equal--vector--?lang=en
CMGE
// CMHS is the CMHS(register) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHS--register---Compare-unsigned-Higher-or-Same--vector--?lang=en
CMHS
// FCMEQ is the FCMEQ(register) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMEQ--register---Floating-point-Compare-Equal--vector--?lang=en
FCMEQ
// FCMGT is the FCMGT(register) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGT--register---Floating-point-Compare-Greater-than--vector--?lang=en
FCMGT
// FCMGE is the FCMGE(register) instruction https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGE--register---Floating-point-Compare-Greater-than-or-Equal--vector--?lang=en
FCMGE

// instructionEnd is always placed at the bottom of this iota definition to be used in the test.
instructionEnd
Expand Down Expand Up @@ -1176,6 +1190,20 @@ func InstructionName(i asm.Instruction) string {
return "SSHR"
case EXT:
return "EXT"
case CMGT:
return "CMGT"
case CMHI:
return "CMHI"
case CMGE:
return "CMGE"
case CMHS:
return "CMHS"
case FCMEQ:
return "FCMEQ"
case FCMGT:
return "FCMGT"
case FCMGE:
return "FCMGE"
}
panic(fmt.Errorf("unknown instruction %d", i))
}
82 changes: 59 additions & 23 deletions internal/asm/arm64/impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -3049,41 +3049,77 @@ var advancedSIMDThreeSame = map[asm.Instruction]struct {
return
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/UMAXP--Unsigned-Maximum-Pairwise-?lang=en
UMAXP: {U: 0b1, Opcode: 0b10100, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) {
size, Q = arrangementSizeQ(arrangement)
return
}},
UMAXP: {U: 0b1, Opcode: 0b10100, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMEQ--register---Compare-bitwise-Equal--vector--?lang=en
CMEQ: {U: 0b1, Opcode: 0b10001, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) {
size, Q = arrangementSizeQ(arrangement)
return
}},
CMEQ: {U: 0b1, Opcode: 0b10001, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/dui0801/g/A64-SIMD-Vector-Instructions/ADDP--vector-
VADDP: {U: 0b0, Opcode: 0b10111, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) {
size, Q = arrangementSizeQ(arrangement)
return
}},
VADDP: {U: 0b0, Opcode: 0b10111, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/ADD--vector---Add--vector--?lang=en
VADD: {U: 0, Opcode: 0b10000, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) {
size, Q = arrangementSizeQ(arrangement)
return
}},
VADD: {U: 0, Opcode: 0b10000, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SUB--vector---Subtract--vector--?lang=en
VSUB: {U: 1, Opcode: 0b10000, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) {
size, Q = arrangementSizeQ(arrangement)
VSUB: {U: 1, Opcode: 0b10000, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en
SSHL: {U: 0, Opcode: 0b01000, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en
USHL: {U: 0b1, Opcode: 0b01000, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGT--register---Compare-signed-Greater-than--vector--?lang=en
CMGT: {U: 0b0, Opcode: 0b00110, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHI--register---Compare-unsigned-Higher--vector--?lang=en
CMHI: {U: 0b1, Opcode: 0b00110, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMGE--register---Compare-signed-Greater-than-or-Equal--vector--?lang=en
CMGE: {U: 0b0, Opcode: 0b00111, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/CMHS--register---Compare-unsigned-Higher-or-Same--vector--?lang=en
CMHS: {U: 0b1, Opcode: 0b00111, qAndSizeResolver: advancedSIMDThreeSameDefaultResolver},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMEQ--register---Floating-point-Compare-Equal--vector--?lang=en
FCMEQ: {U: 0b0, Opcode: 0b11100, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) {
switch arrangement {
case VectorArrangement4S:
size, Q = 0b00, 1
case VectorArrangement2S:
size, Q = 0b00, 0
case VectorArrangement2D:
size, Q = 0b01, 1
default:
err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(FCMEQ))
}
return
}},
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/SSHL--Signed-Shift-Left--register--?lang=en
SSHL: {U: 0, Opcode: 0b01000, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) {
size, Q = arrangementSizeQ(arrangement)
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGT--register---Floating-point-Compare-Greater-than--vector--?lang=en
FCMGT: {U: 0b1, Opcode: 0b11100, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) {
switch arrangement {
case VectorArrangement4S:
size, Q = 0b10, 1
case VectorArrangement2S:
size, Q = 0b10, 0
case VectorArrangement2D:
size, Q = 0b11, 1
default:
err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(FCMGT))
}
return
}},
USHL: {U: 0b1, Opcode: 0b01000, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) {
size, Q = arrangementSizeQ(arrangement)
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/FCMGE--register---Floating-point-Compare-Greater-than-or-Equal--vector--?lang=en
FCMGE: {U: 0b1, Opcode: 0b11100, qAndSizeResolver: func(arrangement VectorArrangement) (Q, size byte, err error) {
switch arrangement {
case VectorArrangement4S:
size, Q = 0b00, 1
case VectorArrangement2S:
size, Q = 0b00, 0
case VectorArrangement2D:
size, Q = 0b01, 1
default:
err = fmt.Errorf("unsupported arrangement %s for %s", arrangement.String(), InstructionName(FCMGE))
}
return
}},
}

func advancedSIMDThreeSameDefaultResolver(arrangement VectorArrangement) (Q, size byte, err error) {
// TODO: simply use arrangementSizeQ as the resolver after refactoring other call-site of arrangementSizeQ.
size, Q = arrangementSizeQ(arrangement)
return
}

// advancedSIMDAcrossLanes holds information to encode instructions as "Advanced SIMD across lanes" in
// https://developer.arm.com/documentation/ddi0596/2021-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en
var advancedSIMDAcrossLanes = map[asm.Instruction]struct {
Expand Down
176 changes: 176 additions & 0 deletions internal/asm/arm64/impl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1741,6 +1741,182 @@ func TestAssemblerImpl_encodeTwoVectorRegistersToVectorRegister(t *testing.T) {
},
exp: []byte{0xe0, 0x39, 0x1, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "cmeq v0.8b, v15.8b, v1.8b",
n: &NodeImpl{
Instruction: CMEQ,
DstReg: RegV0,
SrcReg: RegV1,
SrcReg2: RegV15,
VectorArrangement: VectorArrangement8B,
},
exp: []byte{0xe0, 0x8d, 0x21, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "cmgt v0.16b, v15.16b, v1.16b",
n: &NodeImpl{
Instruction: CMGT,
DstReg: RegV0,
SrcReg: RegV1,
SrcReg2: RegV15,
VectorArrangement: VectorArrangement16B,
},
exp: []byte{0xe0, 0x35, 0x21, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "cmhi v0.8h, v15.8h, v1.8h",
n: &NodeImpl{
Instruction: CMHI,
DstReg: RegV0,
SrcReg: RegV1,
SrcReg2: RegV15,
VectorArrangement: VectorArrangement8H,
},
exp: []byte{0xe0, 0x35, 0x61, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "cmhi v0.4h, v15.4h, v1.4h",
n: &NodeImpl{
Instruction: CMHI,
DstReg: RegV0,
SrcReg: RegV1,
SrcReg2: RegV15,
VectorArrangement: VectorArrangement4H,
},
exp: []byte{0xe0, 0x35, 0x61, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "cmge v0.4s, v15.4s, v1.4s",
n: &NodeImpl{
Instruction: CMGE,
DstReg: RegV0,
SrcReg: RegV1,
SrcReg2: RegV15,
VectorArrangement: VectorArrangement4S,
},
exp: []byte{0xe0, 0x3d, 0xa1, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "cmge v0.2s, v15.2s, v1.2s",
n: &NodeImpl{
Instruction: CMGE,
DstReg: RegV0,
SrcReg: RegV1,
SrcReg2: RegV15,
VectorArrangement: VectorArrangement2S,
},
exp: []byte{0xe0, 0x3d, 0xa1, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "cmhs v30.2d, v4.2d, v11.2d",
n: &NodeImpl{
Instruction: CMHS,
DstReg: RegV30,
SrcReg: RegV11,
SrcReg2: RegV4,
VectorArrangement: VectorArrangement2D,
},
exp: []byte{0x9e, 0x3c, 0xeb, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "fcmeq v30.2d, v4.2d, v11.2d",
n: &NodeImpl{
Instruction: FCMEQ,
DstReg: RegV30,
SrcReg: RegV11,
SrcReg2: RegV4,
VectorArrangement: VectorArrangement2D,
},
exp: []byte{0x9e, 0xe4, 0x6b, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "fcmeq v30.4s, v4.4s, v11.4s",
n: &NodeImpl{
Instruction: FCMEQ,
DstReg: RegV30,
SrcReg: RegV11,
SrcReg2: RegV4,
VectorArrangement: VectorArrangement4S,
},
exp: []byte{0x9e, 0xe4, 0x2b, 0x4e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "fcmeq v30.2s, v4.2s, v11.2s",
n: &NodeImpl{
Instruction: FCMEQ,
DstReg: RegV30,
SrcReg: RegV11,
SrcReg2: RegV4,
VectorArrangement: VectorArrangement2S,
},
exp: []byte{0x9e, 0xe4, 0x2b, 0xe, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "fcmgt v30.2d, v4.2d, v11.2d",
n: &NodeImpl{
Instruction: FCMGT,
DstReg: RegV30,
SrcReg: RegV11,
SrcReg2: RegV4,
VectorArrangement: VectorArrangement2D,
},
exp: []byte{0x9e, 0xe4, 0xeb, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "fcmgt v30.4s, v4.4s, v11.4s",
n: &NodeImpl{
Instruction: FCMGT,
DstReg: RegV30,
SrcReg: RegV11,
SrcReg2: RegV4,
VectorArrangement: VectorArrangement4S,
},
exp: []byte{0x9e, 0xe4, 0xab, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "fcmgt v30.2s, v4.2s, v11.2s",
n: &NodeImpl{
Instruction: FCMGT,
DstReg: RegV30,
SrcReg: RegV11,
SrcReg2: RegV4,
VectorArrangement: VectorArrangement2S,
},
exp: []byte{0x9e, 0xe4, 0xab, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "fcmge v30.2d, v4.2d, v11.2d",
n: &NodeImpl{
Instruction: FCMGE,
DstReg: RegV30,
SrcReg: RegV11,
SrcReg2: RegV4,
VectorArrangement: VectorArrangement2D,
},
exp: []byte{0x9e, 0xe4, 0x6b, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "fcmge v30.4s, v4.4s, v11.4s",
n: &NodeImpl{
Instruction: FCMGE,
DstReg: RegV30,
SrcReg: RegV11,
SrcReg2: RegV4,
VectorArrangement: VectorArrangement4S,
},
exp: []byte{0x9e, 0xe4, 0x2b, 0x6e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
{
name: "fcmge v30.2s, v4.2s, v11.2s",
n: &NodeImpl{
Instruction: FCMGE,
DstReg: RegV30,
SrcReg: RegV11,
SrcReg2: RegV4,
VectorArrangement: VectorArrangement2S,
},
exp: []byte{0x9e, 0xe4, 0x2b, 0x2e, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
},
}

for _, tt := range tests {
Expand Down
5 changes: 0 additions & 5 deletions internal/engine/compiler/compiler_vec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2994,11 +2994,6 @@ func f64x2(f1, f2 float64) (ret [16]byte) {
}

func TestCompiler_compileV128Cmp(t *testing.T) {
if runtime.GOARCH != "amd64" {
// TODO: implement on amd64.
t.Skip()
}

tests := []struct {
name string
cmpType wazeroir.V128CmpType
Expand Down
Loading

0 comments on commit cd00799

Please sign in to comment.