diff --git a/internal/asm/amd64/consts.go b/internal/asm/amd64/consts.go index d6a5fcd8290..a7890826d9c 100644 --- a/internal/asm/amd64/consts.go +++ b/internal/asm/amd64/consts.go @@ -377,6 +377,8 @@ const ( PCMPEQD // PCMPEQQ is the PCMPEQQ instruction https://www.felixcloutier.com/x86/pcmpeqq PCMPEQQ + // PADDUSB is the PADDUSB instruction https://www.felixcloutier.com/x86/paddusb:paddusw + PADDUSB ) // InstructionName returns the name for an instruction @@ -706,6 +708,18 @@ func InstructionName(instruction asm.Instruction) string { return "INSERTPS" case MOVLHPS: return "MOVLHPS" + case PTEST: + return "PTEST" + case PCMPEQB: + return "PCMPEQB" + case PCMPEQW: + return "PCMPEQW" + case PCMPEQD: + return "PCMPEQD" + case PCMPEQQ: + return "PCMPEQQ" + case PADDUSB: + return "PADDUSB" } return "Unknown" } diff --git a/internal/asm/amd64/impl.go b/internal/asm/amd64/impl.go index 7a4f96aaaf4..cee279675c8 100644 --- a/internal/asm/amd64/impl.go +++ b/internal/asm/amd64/impl.go @@ -1235,6 +1235,7 @@ var registerToRegisterOpcode = map[asm.Instruction]struct { PCMPEQW: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x75}, requireSrcFloat: true, requireDstFloat: true}, PCMPEQD: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x76}, requireSrcFloat: true, requireDstFloat: true}, PCMPEQQ: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0x38, 0x29}, requireSrcFloat: true, requireDstFloat: true}, + PADDUSB: {mandatoryPrefix: 0x66, opcode: []byte{0x0f, 0xdc}, requireSrcFloat: true, requireDstFloat: true}, } var RegisterToRegisterShiftOpcode = map[asm.Instruction]struct { diff --git a/internal/engine/compiler/compiler_vec_test.go b/internal/engine/compiler/compiler_vec_test.go index 9364c5fdaf5..0bab7aedbe4 100644 --- a/internal/engine/compiler/compiler_vec_test.go +++ b/internal/engine/compiler/compiler_vec_test.go @@ -1465,3 +1465,96 @@ func TestCompiler_compileV128AllTrue(t *testing.T) { }) } } + +func TestCompiler_compileV128Swizzle(t *testing.T) { + i8ToU8 := func(v int8) byte { + return byte(v) + } + tests := []struct { + name string + indexVec, baseVec [16]byte + expVec [16]byte + }{ + { + name: "1", + baseVec: [16]byte{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, + indexVec: [16]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + expVec: [16]byte{16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, + }, + { + name: "2", + baseVec: [16]byte{i8ToU8(-16), i8ToU8(-15), i8ToU8(-14), i8ToU8(-13), i8ToU8(-12), + i8ToU8(-11), i8ToU8(-10), i8ToU8(-9), i8ToU8(-8), i8ToU8(-7), i8ToU8(-6), i8ToU8(-5), + i8ToU8(-4), i8ToU8(-3), i8ToU8(-2), i8ToU8(-1)}, + indexVec: [16]byte{i8ToU8(-8), i8ToU8(-7), i8ToU8(-6), i8ToU8(-5), i8ToU8(-4), + i8ToU8(-3), i8ToU8(-2), i8ToU8(-1), 16, 17, 18, 19, 20, 21, 22, 23}, + expVec: [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + }, + { + name: "3", + baseVec: [16]byte{100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, + indexVec: [16]byte{15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}, + expVec: [16]byte{115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100}, + }, + { + name: "4", + baseVec: [16]byte{100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115}, + indexVec: [16]byte{ + 9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 23, + }, + expVec: [16]byte{109, 0, 110, 0, 111, 0, 112, 0, 113, 0, 114, 0, 115, 0, 0, 0}, + }, + { + name: "5", + baseVec: [16]byte{0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73}, + indexVec: [16]byte{9, 16, 10, 17, 11, 18, 12, 19, 13, 20, 14, 21, 15, 22, 16, 23}, + expVec: [16]byte{0x6d, 0, 0x6e, 0, 0x6f, 0, 0x70, 0, 0x71, 0, 0x72, 0, 0x73, 0, 0, 0}, + }, + } + + for _, tc := range tests { + tc := tc + t.Run(tc.name, func(t *testing.T) { + env := newCompilerEnvironment() + compiler := env.requireNewCompiler(t, newCompiler, + &wazeroir.CompilationResult{HasMemory: true, Signature: &wasm.FunctionType{}}) + + err := compiler.compilePreamble() + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.baseVec[:8]), + Hi: binary.LittleEndian.Uint64(tc.baseVec[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Const(&wazeroir.OperationV128Const{ + Lo: binary.LittleEndian.Uint64(tc.indexVec[:8]), + Hi: binary.LittleEndian.Uint64(tc.indexVec[8:]), + }) + require.NoError(t, err) + + err = compiler.compileV128Swizzle(&wazeroir.OperationV128Swizzle{}) + require.NoError(t, err) + + require.Equal(t, uint64(2), compiler.runtimeValueLocationStack().sp) + require.Equal(t, 1, len(compiler.runtimeValueLocationStack().usedRegisters)) + + err = compiler.compileReturnFunction() + require.NoError(t, err) + + // Generate and run the code under test. + code, _, _, err := compiler.compile() + require.NoError(t, err) + env.exec(code) + + require.Equal(t, nativeCallStatusCodeReturned, env.callEngine().statusCode) + + lo, hi := env.stackTopAsV128() + var actual [16]byte + binary.LittleEndian.PutUint64(actual[:8], lo) + binary.LittleEndian.PutUint64(actual[8:], hi) + require.Equal(t, tc.expVec, actual) + }) + } +} diff --git a/internal/engine/compiler/impl_vec_amd64.go b/internal/engine/compiler/impl_vec_amd64.go index 17c8059897b..8986f2334e2 100644 --- a/internal/engine/compiler/impl_vec_amd64.go +++ b/internal/engine/compiler/impl_vec_amd64.go @@ -449,8 +449,38 @@ func (c *amd64Compiler) compileV128Shuffle(o *wazeroir.OperationV128Shuffle) err return nil } +var swizzleConst = [16]byte{ + 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, + 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, +} + // compileV128Swizzle implements compiler.compileV128Swizzle for amd64. -func (c *amd64Compiler) compileV128Swizzle(o *wazeroir.OperationV128Swizzle) error { +func (c *amd64Compiler) compileV128Swizzle(*wazeroir.OperationV128Swizzle) error { + indexVec := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(indexVec); err != nil { + return err + } + + baseVec := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(baseVec); err != nil { + return err + } + + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + err = c.assembler.CompileStaticConstToRegister(amd64.MOVDQU, swizzleConst[:], tmp) + if err != nil { + return err + } + + c.assembler.CompileRegisterToRegister(amd64.PADDUSB, tmp, indexVec.register) + c.assembler.CompileRegisterToRegister(amd64.PSHUFB, indexVec.register, baseVec.register) + + c.pushVectorRuntimeValueLocationOnRegister(baseVec.register) + c.locationStack.markRegisterUnused(indexVec.register) return nil } diff --git a/internal/integration_test/asm/amd64_debug/golang_asm.go b/internal/integration_test/asm/amd64_debug/golang_asm.go index b8ff43c5f46..90a1a27b143 100644 --- a/internal/integration_test/asm/amd64_debug/golang_asm.go +++ b/internal/integration_test/asm/amd64_debug/golang_asm.go @@ -591,4 +591,5 @@ var castAsGolangAsmInstruction = [...]obj.As{ amd64.PCMPEQW: x86.APCMPEQW, amd64.PCMPEQD: x86.APCMPEQL, amd64.PCMPEQQ: x86.APCMPEQQ, + amd64.PADDUSB: x86.APADDUSB, } diff --git a/internal/integration_test/asm/amd64_debug/impl_test.go b/internal/integration_test/asm/amd64_debug/impl_test.go index 90df363b796..05b9aa27da8 100644 --- a/internal/integration_test/asm/amd64_debug/impl_test.go +++ b/internal/integration_test/asm/amd64_debug/impl_test.go @@ -934,6 +934,7 @@ func TestAssemblerImpl_EncodeRegisterToRegister(t *testing.T) { {instruction: amd64.PCMPEQW, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.PCMPEQD, srcRegs: floatRegisters, DstRegs: floatRegisters}, {instruction: amd64.PCMPEQQ, srcRegs: floatRegisters, DstRegs: floatRegisters}, + {instruction: amd64.PADDUSB, srcRegs: floatRegisters, DstRegs: floatRegisters}, } for _, tt := range tests { diff --git a/internal/integration_test/spectest/v2/testdata/simd_lane.wast b/internal/integration_test/spectest/v2/testdata/simd_lane.wast index 9d4b5fd7296..067f616af69 100644 --- a/internal/integration_test/spectest/v2/testdata/simd_lane.wast +++ b/internal/integration_test/spectest/v2/testdata/simd_lane.wast @@ -309,6 +309,7 @@ (v128.const i8x16 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115) (v128.const i8x16 9 16 10 17 11 18 12 19 13 20 14 21 15 22 16 23)) (v128.const i8x16 109 0 110 0 111 0 112 0 113 0 114 0 115 0 0 0)) + (assert_return (invoke "v8x16_swizzle" (v128.const i8x16 0x64 0x65 0x66 0x67 0x68 0x69 0x6a 0x6b 0x6c 0x6d 0x6e 0x6f 0x70 0x71 0x72 0x73) (v128.const i8x16 9 16 10 17 11 18 12 19 13 20 14 21 15 22 16 23))