Skip to content

Commit

Permalink
ssa: memory usage optimization around lastDefinitions (#2233)
Browse files Browse the repository at this point in the history
This avoids the unnecessary duplicated zero initial value definitions 
for Wasm function locals. As a result, for certain binaries which
have huge number of locals like Zig standard libraries,  we can see
1~10% improvements in compilation performance without impacting 
other cases like the below.

### Zig Stdlib tests
```
                             │ old_zig.txt │            new_zig.txt            │
                             │   sec/op    │   sec/op    vs base               │
Zig/Compile/test-opt.wasm-10    4.466 ± 2%   4.399 ± 0%   -1.50% (p=0.007 n=7)
Zig/Run/test-opt.wasm-10        18.86 ± 0%   18.84 ± 0%        ~ (p=0.535 n=7)
Zig/Compile/test.wasm-10        5.684 ± 1%   5.084 ± 1%  -10.55% (p=0.001 n=7)
Zig/Run/test.wasm-10            19.28 ± 1%   19.25 ± 1%        ~ (p=0.535 n=7)
geomean                         9.802        9.490        -3.18%

                             │ old_zig.txt  │            new_zig.txt             │
                             │     B/op     │     B/op      vs base              │
Zig/Compile/test-opt.wasm-10   395.4Mi ± 0%   396.7Mi ± 0%  +0.32% (p=0.001 n=7)
Zig/Run/test-opt.wasm-10       741.7Mi ± 0%   741.7Mi ± 0%       ~ (p=0.941 n=7)
Zig/Compile/test.wasm-10       671.6Mi ± 0%   660.1Mi ± 0%  -1.72% (p=0.001 n=7)
Zig/Run/test.wasm-10           1.296Gi ± 0%   1.296Gi ± 0%       ~ (p=0.363 n=7)
geomean                        715.1Mi        712.6Mi       -0.35%

                             │ old_zig.txt │            new_zig.txt            │
                             │  allocs/op  │  allocs/op   vs base              │
Zig/Compile/test-opt.wasm-10   363.1k ± 0%   363.2k ± 0%       ~ (p=0.456 n=7)
Zig/Run/test-opt.wasm-10       51.58k ± 0%   51.58k ± 0%       ~ (p=0.812 n=7)
Zig/Compile/test.wasm-10       516.3k ± 0%   515.5k ± 0%  -0.16% (p=0.001 n=7)
Zig/Run/test.wasm-10           2.156M ± 0%   2.156M ± 0%       ~ (p=0.171 n=7)
geomean                        380.0k        379.8k       -0.03%
```

### wazero compiled as wasip1 binary 
```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero
               │  old.txt   │           new.txt           │
               │   sec/op   │   sec/op    vs base         │
Compilation-10   2.418 ± 0%   2.421 ± 0%  ~ (p=0.383 n=7)

               │   old.txt    │              new.txt               │
               │     B/op     │     B/op      vs base              │
Compilation-10   339.9Mi ± 0%   339.9Mi ± 0%  -0.01% (p=0.001 n=7)

               │   old.txt   │           new.txt            │
               │  allocs/op  │  allocs/op   vs base         │
Compilation-10   603.9k ± 0%   604.0k ± 0%  ~ (p=0.620 n=7)
```

### TinyGo 
```
goos: darwin
goarch: arm64
pkg: github.com/tetratelabs/wazero/internal/integration_test/stdlibs
                                      │ old_tinygo.txt │            new_tinygo.txt             │
                                      │     sec/op     │    sec/op      vs base                │
TinyGo/Compile/container_heap.test-10    414.1m ± 0%     413.1m ± 1%         ~ (p=0.165 n=7)
TinyGo/Run/container_heap.test-10        14.48m ± 1%     14.54m ± 1%         ~ (p=0.383 n=7)
TinyGo/Compile/container_list.test-10    413.1m ± 1%     412.1m ± 1%         ~ (p=0.620 n=7)
TinyGo/Run/container_list.test-10        14.29m ± 0%     14.33m ± 1%         ~ (p=0.259 n=7)
TinyGo/Compile/container_ring.test-10    406.8m ± 1%     407.1m ± 1%         ~ (p=0.902 n=7)
TinyGo/Run/container_ring.test-10        14.28m ± 1%     14.29m ± 1%         ~ (p=0.318 n=7)
TinyGo/Compile/crypto_des.test-10        421.3m ± 0%     422.4m ± 1%         ~ (p=0.209 n=7)
TinyGo/Run/crypto_des.test-10            18.32m ± 1%     18.38m ± 2%         ~ (p=0.209 n=7)
TinyGo/Compile/crypto_md5.test-10        418.7m ± 1%     419.8m ± 0%         ~ (p=0.318 n=7)
TinyGo/Run/crypto_md5.test-10            21.78m ± 5%     20.65m ± 2%         ~ (p=0.165 n=7)
TinyGo/Compile/crypto_rc4.test-10        421.8m ± 4%     404.3m ± 1%    -4.16% (p=0.001 n=7)
TinyGo/Run/crypto_rc4.test-10            162.6m ± 1%     162.3m ± 1%         ~ (p=0.318 n=7)
TinyGo/Compile/crypto_sha1.test-10       420.5m ± 1%     419.4m ± 1%         ~ (p=0.535 n=7)
TinyGo/Run/crypto_sha1.test-10           16.04m ± 1%     16.12m ± 1%    +0.50% (p=0.038 n=7)
TinyGo/Compile/crypto_sha256.test-10     426.8m ± 0%     427.4m ± 1%         ~ (p=1.000 n=7)
TinyGo/Run/crypto_sha256.test-10         16.24m ±  ∞ ¹   16.38m ±  ∞ ¹       ~ (p=1.000 n=1) ²
geomean                                  95.31m          94.88m         -0.45%
¹ need >= 6 samples for confidence interval at level 0.95
² need >= 4 samples to detect a difference at alpha level 0.05

                                      │ old_tinygo.txt │             new_tinygo.txt             │
                                      │      B/op      │      B/op       vs base                │
TinyGo/Compile/container_heap.test-10   48.58Mi ± 0%     48.55Mi ± 0%    -0.06% (p=0.001 n=7)
TinyGo/Run/container_heap.test-10       16.63Mi ± 0%     16.63Mi ± 0%         ~ (p=0.620 n=7)
TinyGo/Compile/container_list.test-10   48.56Mi ± 0%     48.54Mi ± 0%    -0.05% (p=0.001 n=7)
TinyGo/Run/container_list.test-10       16.40Mi ± 0%     16.40Mi ± 0%         ~ (p=0.535 n=7)
TinyGo/Compile/container_ring.test-10   47.81Mi ± 0%     47.78Mi ± 0%    -0.05% (p=0.001 n=7)
TinyGo/Run/container_ring.test-10       16.30Mi ± 0%     16.30Mi ± 0%         ~ (p=0.871 n=7)
TinyGo/Compile/crypto_des.test-10       48.70Mi ± 0%     48.67Mi ± 0%    -0.05% (p=0.001 n=7)
TinyGo/Run/crypto_des.test-10           16.76Mi ± 0%     16.76Mi ± 0%         ~ (p=0.119 n=7)
TinyGo/Compile/crypto_md5.test-10       48.75Mi ± 0%     48.73Mi ± 0%    -0.03% (p=0.001 n=7)
TinyGo/Run/crypto_md5.test-10           44.97Mi ± 0%     44.97Mi ± 0%         ~ (p=0.333 n=7)
TinyGo/Compile/crypto_rc4.test-10       47.79Mi ± 0%     47.76Mi ± 0%    -0.06% (p=0.001 n=7)
TinyGo/Run/crypto_rc4.test-10           29.28Mi ± 0%     29.28Mi ± 0%         ~ (p=0.274 n=7)
TinyGo/Compile/crypto_sha1.test-10      48.99Mi ± 0%     48.97Mi ± 0%    -0.03% (p=0.001 n=7)
TinyGo/Run/crypto_sha1.test-10          17.44Mi ± 0%     17.44Mi ± 0%         ~ (p=0.456 n=7)
TinyGo/Compile/crypto_sha256.test-10    48.82Mi ± 0%     48.81Mi ± 0%    -0.03% (p=0.004 n=7)
TinyGo/Run/crypto_sha256.test-10        17.53Mi ±  ∞ ¹   17.53Mi ±  ∞ ¹       ~ (p=1.000 n=1) ²
geomean                                 31.45Mi          31.45Mi         -0.02%
¹ need >= 6 samples for confidence interval at level 0.95
² need >= 4 samples to detect a difference at alpha level 0.05

                                      │ old_tinygo.txt │            new_tinygo.txt             │
                                      │   allocs/op    │   allocs/op    vs base                │
TinyGo/Compile/container_heap.test-10    83.64k ± 0%     83.63k ± 0%         ~ (p=0.365 n=7)
TinyGo/Run/container_heap.test-10        374.9k ± 0%     374.9k ± 0%         ~ (p=1.000 n=7)
TinyGo/Compile/container_list.test-10    83.38k ± 0%     83.39k ± 0%         ~ (p=0.805 n=7)
TinyGo/Run/container_list.test-10        370.0k ± 0%     370.0k ± 0%         ~ (p=0.633 n=7)
TinyGo/Compile/container_ring.test-10    83.30k ± 0%     83.35k ± 0%         ~ (p=1.000 n=7)
TinyGo/Run/container_ring.test-10        367.6k ± 0%     367.6k ± 0%         ~ (p=0.617 n=7)
TinyGo/Compile/crypto_des.test-10        83.67k ± 0%     83.70k ± 0%         ~ (p=0.805 n=7)
TinyGo/Run/crypto_des.test-10            378.1k ± 0%     378.1k ± 0%         ~ (p=0.078 n=7)
TinyGo/Compile/crypto_md5.test-10        83.81k ± 0%     83.84k ± 0%         ~ (p=0.805 n=7)
TinyGo/Run/crypto_md5.test-10            393.3k ± 0%     393.3k ± 0%         ~ (p=0.690 n=7)
TinyGo/Compile/crypto_rc4.test-10        83.39k ± 0%     83.33k ± 0%         ~ (p=0.097 n=7)
TinyGo/Run/crypto_rc4.test-10            367.1k ± 0%     367.1k ± 0%         ~ (p=0.232 n=7)
TinyGo/Compile/crypto_sha1.test-10       84.00k ± 0%     84.06k ± 0%         ~ (p=0.154 n=7)
TinyGo/Run/crypto_sha1.test-10           392.7k ± 0%     392.7k ± 0%         ~ (p=1.000 n=7)
TinyGo/Compile/crypto_sha256.test-10     83.85k ± 0%     83.86k ± 0%         ~ (p=0.620 n=7)
TinyGo/Run/crypto_sha256.test-10         394.5k ±  ∞ ¹   394.5k ±  ∞ ¹       ~ (p=1.000 n=1) ²
```

Signed-off-by: Takeshi Yoneda <[email protected]>
  • Loading branch information
mathetake authored Jun 7, 2024
1 parent 0c5d971 commit f47fd2e
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 29 deletions.
8 changes: 4 additions & 4 deletions internal/engine/wazevo/backend/backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -458,11 +458,11 @@ L2 (SSA Block: blk2):
name: "single_predecessor_local_refs", m: testcases.SinglePredecessorLocalRefs.Module,
afterLoweringARM64: `
L1 (SSA Block: blk0):
mov x132?, xzr
cbz w132?, (L2)
L3 (SSA Block: blk1):
mov x131?, xzr
mov x0, x131?
cbz w131?, (L2)
L3 (SSA Block: blk1):
mov x130?, xzr
mov x0, x130?
ret
L2 (SSA Block: blk2):
L4 (SSA Block: blk3):
Expand Down
21 changes: 1 addition & 20 deletions internal/engine/wazevo/frontend/frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,26 +301,7 @@ func (c *Compiler) declareWasmLocals(entry ssa.BasicBlock) {
st := WasmTypeToSSAType(typ)
variable := c.ssaBuilder.DeclareVariable(st)
c.setWasmLocalVariable(wasm.Index(i)+localCount, variable)

zeroInst := c.ssaBuilder.AllocateInstruction()
switch st {
case ssa.TypeI32:
zeroInst.AsIconst32(0)
case ssa.TypeI64:
zeroInst.AsIconst64(0)
case ssa.TypeF32:
zeroInst.AsF32const(0)
case ssa.TypeF64:
zeroInst.AsF64const(0)
case ssa.TypeV128:
zeroInst.AsVconst(0, 0)
default:
panic("TODO: " + wasm.ValueTypeName(typ))
}

c.ssaBuilder.InsertInstruction(zeroInst)
value := zeroInst.Return()
c.ssaBuilder.DefineVariable(variable, value, entry)
c.ssaBuilder.InsertZeroValue(st)
}
}

Expand Down
7 changes: 2 additions & 5 deletions internal/engine/wazevo/frontend/frontend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,13 +375,11 @@ blk3: () <-- (blk1)
exp: `
blk0: (exec_ctx:i64, module_ctx:i64)
v2:i32 = Iconst_32 0x0
v3:i32 = Iconst_32 0x0
v4:i32 = Iconst_32 0x0
Brz v2, blk2
Jump blk1
blk1: () <-- (blk0)
Return v4
Return v2
blk2: () <-- (blk0)
Jump blk3
Expand All @@ -392,12 +390,11 @@ blk3: () <-- (blk2)
expAfterPasses: `
blk0: (exec_ctx:i64, module_ctx:i64)
v2:i32 = Iconst_32 0x0
v4:i32 = Iconst_32 0x0
Brz v2, blk2
Jump fallthrough
blk1: () <-- (blk0)
Return v4
Return v2
blk2: () <-- (blk0)
Jump fallthrough
Expand Down
34 changes: 34 additions & 0 deletions internal/engine/wazevo/ssa/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ type Builder interface {
// Idom returns the immediate dominator of the given BasicBlock.
Idom(blk BasicBlock) BasicBlock

// VarLengthPool returns the VarLengthPool of Value.
VarLengthPool() *wazevoapi.VarLengthPool[Value]

// InsertZeroValue inserts a zero value constant instruction of the given type.
InsertZeroValue(t Type)
}

// NewBuilder returns a new Builder implementation.
Expand Down Expand Up @@ -203,6 +207,32 @@ type builder struct {
donePostBlockLayoutPasses bool

currentSourceOffset SourceOffset

// zeros are the zero value constants for each type.
zeros [typeEnd]Value
}

// InsertZeroValue implements Builder.InsertZeroValue.
func (b *builder) InsertZeroValue(t Type) {
if b.zeros[t].Valid() {
return
}
zeroInst := b.AllocateInstruction()
switch t {
case TypeI32:
zeroInst.AsIconst32(0)
case TypeI64:
zeroInst.AsIconst64(0)
case TypeF32:
zeroInst.AsF32const(0)
case TypeF64:
zeroInst.AsF64const(0)
case TypeV128:
zeroInst.AsVconst(0, 0)
default:
panic("TODO: " + t.String())
}
b.zeros[t] = zeroInst.Insert(b).Return()
}

func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] {
Expand All @@ -218,6 +248,7 @@ func (b *builder) ReturnBlock() BasicBlock {
func (b *builder) Init(s *Signature) {
b.nextVariable = 0
b.currentSignature = s
b.zeros = [typeEnd]Value{ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid}
resetBasicBlock(b.returnBlk)
b.instructionsPool.Reset()
b.basicBlocksPool.Reset()
Expand Down Expand Up @@ -486,6 +517,9 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value
value: value,
})
return value
} else if blk.EntryBlock() {
// If this is the entry block, we reach the uninitialized variable which has zero value.
return b.zeros[b.definedVariableType(variable)]
}

if pred := blk.singlePred; pred != nil {
Expand Down
3 changes: 3 additions & 0 deletions internal/engine/wazevo/ssa/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ const (

// TypeV128 represents 128-bit SIMD vectors.
TypeV128

// -- Do not add new types after this line. ----
typeEnd
)

// String implements fmt.Stringer.
Expand Down

0 comments on commit f47fd2e

Please sign in to comment.