From 77b8cd0fa8cfb249208685efa4fa716ea4142201 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Mon, 20 Jun 2022 17:36:31 +0900 Subject: [PATCH 1/3] arm64: implement dot Signed-off-by: Takeshi Yoneda --- internal/engine/compiler/compiler_vec_test.go | 5 --- internal/engine/compiler/impl_vec_arm64.go | 31 +++++++++++++++++-- .../integration_test/spectest/v2/spec_test.go | 3 +- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/internal/engine/compiler/compiler_vec_test.go b/internal/engine/compiler/compiler_vec_test.go index c4801c5f52..1542311177 100644 --- a/internal/engine/compiler/compiler_vec_test.go +++ b/internal/engine/compiler/compiler_vec_test.go @@ -7084,11 +7084,6 @@ func TestCompiler_compileV128FConvertFromI(t *testing.T) { } func TestCompiler_compileV128Dot(t *testing.T) { - if runtime.GOARCH != "amd64" { - // TODO: implement on amd64. - t.Skip() - } - tests := []struct { name string x1, x2, exp [16]byte diff --git a/internal/engine/compiler/impl_vec_arm64.go b/internal/engine/compiler/impl_vec_arm64.go index e873c6fad0..da69086c1f 100644 --- a/internal/engine/compiler/impl_vec_arm64.go +++ b/internal/engine/compiler/impl_vec_arm64.go @@ -1356,8 +1356,35 @@ func (c *arm64Compiler) compileV128FConvertFromI(o *wazeroir.OperationV128FConve } // compileV128Dot implements compiler.compileV128Dot for arm64. -func (c *arm64Compiler) compileV128Dot(o *wazeroir.OperationV128Dot) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) +func (c *arm64Compiler) compileV128Dot(*wazeroir.OperationV128Dot) error { + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + tmp, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + x1r, x2r := x1.register, x2.register + + // Multiply lower integers and get the 32-bit results into tmp. + c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL, x1r, x2r, tmp, arm64.VectorArrangement4H) + // Multiply higher integers and get the 32-bit results into x1r. + c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.SMULL2, x1r, x2r, x1r, arm64.VectorArrangement8H) + // Adds these two results into x1r. + c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.VADDP, x1r, tmp, x1r, arm64.VectorArrangement4S) + + c.markRegisterUnused(x2r) + c.pushVectorRuntimeValueLocationOnRegister(x1r) + + return nil } // compileV128Narrow implements compiler.compileV128Narrow for arm64. diff --git a/internal/integration_test/spectest/v2/spec_test.go b/internal/integration_test/spectest/v2/spec_test.go index 8aa41a3af5..a949e40e6a 100644 --- a/internal/integration_test/spectest/v2/spec_test.go +++ b/internal/integration_test/spectest/v2/spec_test.go @@ -27,8 +27,7 @@ func TestCompiler(t *testing.T) { spectest.Run(t, testcases, compiler.NewEngine, enabledFeatures, func(jsonname string) bool { switch path.Base(jsonname) { case "simd_f64x2_pmin_pmax.json", "simd_f32x4_pmin_pmax.json", - "simd_i32x4_dot_i16x8.json", "simd_splat.json", "simd_load.json", - "simd_conversions.json": + "simd_splat.json", "simd_load.json", "simd_conversions.json": // TODO: implement on arm64. return runtime.GOARCH == "amd64" default: From b900be25fde8613e8fe83b080c6ff82ca862d050 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Mon, 20 Jun 2022 17:45:32 +0900 Subject: [PATCH 2/3] arm64: implement pseudo max/min Signed-off-by: Takeshi Yoneda --- internal/engine/compiler/compiler_vec_test.go | 5 --- internal/engine/compiler/impl_vec_arm64.go | 35 +++++++++++++++++-- .../integration_test/spectest/v2/spec_test.go | 3 +- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/internal/engine/compiler/compiler_vec_test.go b/internal/engine/compiler/compiler_vec_test.go index 1542311177..310f925112 100644 --- a/internal/engine/compiler/compiler_vec_test.go +++ b/internal/engine/compiler/compiler_vec_test.go @@ -4833,11 +4833,6 @@ func TestCompiler_compileV128Round(t *testing.T) { } func TestCompiler_compileV128_Pmax_Pmin(t *testing.T) { - if runtime.GOARCH != "amd64" { - // TODO: implement on amd64. - t.Skip() - } - tests := []struct { name string shape wazeroir.Shape diff --git a/internal/engine/compiler/impl_vec_arm64.go b/internal/engine/compiler/impl_vec_arm64.go index da69086c1f..9829c18ae8 100644 --- a/internal/engine/compiler/impl_vec_arm64.go +++ b/internal/engine/compiler/impl_vec_arm64.go @@ -1206,12 +1206,43 @@ func (c *arm64Compiler) compileV128AvgrU(o *wazeroir.OperationV128AvgrU) error { // compileV128Pmin implements compiler.compileV128Pmin for arm64. func (c *arm64Compiler) compileV128Pmin(o *wazeroir.OperationV128Pmin) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.Shape), false) } // compileV128Pmax implements compiler.compileV128Pmax for arm64. func (c *arm64Compiler) compileV128Pmax(o *wazeroir.OperationV128Pmax) error { - return fmt.Errorf("TODO: %s is not implemented yet on arm64 compiler", o.Kind()) + return c.compileV128PseudoMinOrMax(defaultArrangementForShape(o.Shape), true) +} + +// compileV128PseudoMinOrMax implements compileV128Pmax and compileV128Pmin. +func (c *arm64Compiler) compileV128PseudoMinOrMax(arr arm64.VectorArrangement, max bool) error { + x2 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x2); err != nil { + return err + } + + x1 := c.locationStack.popV128() + if err := c.compileEnsureOnGeneralPurposeRegister(x1); err != nil { + return err + } + + result, err := c.allocateRegister(registerTypeVector) + if err != nil { + return err + } + + x1r, x2r := x1.register, x2.register + + if max { + c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1r, x2r, result, arr) + } else { + c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2r, x1r, result, arr) + } + c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, x1r, x2r, result, arm64.VectorArrangement16B) + + c.markRegisterUnused(x1r, x2r) + c.pushVectorRuntimeValueLocationOnRegister(result) + return nil } // compileV128Ceil implements compiler.compileV128Ceil for arm64. diff --git a/internal/integration_test/spectest/v2/spec_test.go b/internal/integration_test/spectest/v2/spec_test.go index a949e40e6a..c46aa589f4 100644 --- a/internal/integration_test/spectest/v2/spec_test.go +++ b/internal/integration_test/spectest/v2/spec_test.go @@ -26,8 +26,7 @@ func TestCompiler(t *testing.T) { spectest.Run(t, testcases, compiler.NewEngine, enabledFeatures, func(jsonname string) bool { switch path.Base(jsonname) { - case "simd_f64x2_pmin_pmax.json", "simd_f32x4_pmin_pmax.json", - "simd_splat.json", "simd_load.json", "simd_conversions.json": + case "simd_splat.json", "simd_load.json", "simd_conversions.json": // TODO: implement on arm64. return runtime.GOARCH == "amd64" default: From 0449eb465c9627644a23d3be16081692a1dfefe4 Mon Sep 17 00:00:00 2001 From: Takeshi Yoneda Date: Mon, 20 Jun 2022 17:49:53 +0900 Subject: [PATCH 3/3] comment Signed-off-by: Takeshi Yoneda --- internal/engine/compiler/impl_vec_arm64.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/engine/compiler/impl_vec_arm64.go b/internal/engine/compiler/impl_vec_arm64.go index 9829c18ae8..6c5093db0f 100644 --- a/internal/engine/compiler/impl_vec_arm64.go +++ b/internal/engine/compiler/impl_vec_arm64.go @@ -1233,11 +1233,13 @@ func (c *arm64Compiler) compileV128PseudoMinOrMax(arr arm64.VectorArrangement, m x1r, x2r := x1.register, x2.register + // Sets all bits on each lane if x1r's lane satisfies the condition (min or max), zeros otherwise. if max { c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x1r, x2r, result, arr) } else { c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.FCMGT, x2r, x1r, result, arr) } + // Select each bit based on the result bits ^. c.assembler.CompileTwoVectorRegistersToVectorRegister(arm64.BSL, x1r, x2r, result, arm64.VectorArrangement16B) c.markRegisterUnused(x1r, x2r)