Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

arm64: dot and pseudo min/max instructions #643

Merged
merged 3 commits into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions internal/engine/compiler/compiler_vec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4833,11 +4833,6 @@ func TestCompiler_compileV128Round(t *testing.T) {
}

func TestCompiler_compileV128_Pmax_Pmin(t *testing.T) {
if runtime.GOARCH != "amd64" {
// TODO: implement on amd64.
t.Skip()
}

tests := []struct {
name string
shape wazeroir.Shape
Expand Down Expand Up @@ -7084,11 +7079,6 @@ func TestCompiler_compileV128FConvertFromI(t *testing.T) {
}

func TestCompiler_compileV128Dot(t *testing.T) {
if runtime.GOARCH != "amd64" {
// TODO: implement on amd64.
t.Skip()
}

tests := []struct {
name string
x1, x2, exp [16]byte
Expand Down
68 changes: 64 additions & 4 deletions internal/engine/compiler/impl_vec_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -1206,12 +1206,45 @@ func (c *arm64Compiler) compileV128AvgrU(o *wazeroir.OperationV128AvgrU) error {

// compileV128Pmin implements compiler.compileV128Pmin for arm64.
func (c *arm64Compiler) compileV128Pmin(o *wazeroir.OperationV128Pmin) error {
return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind())
return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.Shape), false)
}

// compileV128Pmax implements compiler.compileV128Pmax for arm64.
func (c *arm64Compiler) compileV128Pmax(o *wazeroir.OperationV128Pmax) error {
return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind())
return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.Shape), true)
}

// compileV128PseudoMinOrMax implements compileV128Pmax and compileV128Pmin.
func (c *arm64Compiler) compileV128PseudoMinOrMax(arr arm64.VectorArrangement, max bool) error {
x2 := c.locationStack.popV128()
if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil {
return err
}

x1 := c.locationStack.popV128()
if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil {
return err
}

result, err := c.allocateRegister(registerTypeVector)
if err != nil {
return err
}

x1r, x2r := x1.register, x2.register

// Sets all bits on each lane if x1r's lane satisfies the condition (min or max), zeros otherwise.
if max {
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1r, x2r, result, arr)
} else {
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2r, x1r, result, arr)
}
// Select each bit based on the result bits ^.
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, x1r, x2r, result, arm64.VectorArrangement16B)

c.markRegisterUnused(x1r, x2r)
c.pushVectorRuntimeValueLocationOnRegister(result)
return nil
}

// compileV128Ceil implements compiler.compileV128Ceil for arm64.
Expand Down Expand Up @@ -1356,8 +1389,35 @@ func (c *arm64Compiler) compileV128FConvertFromI(o *wazeroir.OperationV128FConve
}

// compileV128Dot implements compiler.compileV128Dot for arm64.
func (c *arm64Compiler) compileV128Dot(o *wazeroir.OperationV128Dot) error {
return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind())
func (c *arm64Compiler) compileV128Dot(*wazeroir.OperationV128Dot) error {
x2 := c.locationStack.popV128()
if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil {
return err
}

x1 := c.locationStack.popV128()
if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil {
return err
}

tmp, err := c.allocateRegister(registerTypeVector)
if err != nil {
return err
}

x1r, x2r := x1.register, x2.register

// Multiply lower integers and get the 32-bit results into tmp.
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL, x1r, x2r, tmp, arm64.VectorArrangement4H)
// Multiply higher integers and get the 32-bit results into x1r.
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL2, x1r, x2r, x1r, arm64.VectorArrangement8H)
// Adds these two results into x1r.
c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VADDP, x1r, tmp, x1r, arm64.VectorArrangement4S)

c.markRegisterUnused(x2r)
c.pushVectorRuntimeValueLocationOnRegister(x1r)

return nil
}

// compileV128Narrow implements compiler.compileV128Narrow for arm64.
Expand Down
4 changes: 1 addition & 3 deletions internal/integration_test/spectest/v2/spec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ func TestCompiler(t *testing.T) {

spectest.Run(t, testcases, compiler.NewEngine, enabledFeatures, func(jsonname string) bool {
switch path.Base(jsonname) {
case "simd_f64x2_pmin_pmax.json", "simd_f32x4_pmin_pmax.json",
"simd_i32x4_dot_i16x8.json", "simd_splat.json", "simd_load.json",
"simd_conversions.json":
case "simd_splat.json", "simd_load.json", "simd_conversions.json":
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

almost there!!!

// TODO: implement on arm64.
return runtime.GOARCH == "amd64"
default:
Expand Down