-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Arm64: Optimize pairs of "str wzr" to "str xzr" #84350
Arm64: Optimize pairs of "str wzr" to "str xzr" #84350
Conversation
Optimise following patterns ``` stp wzr, wzr, [x2, #0x08] => str xzr, [x2, #0x08] ``` and ``` stp wzr, wzr, [x14, #0x20] str xzr, [x14, #0x18] => stp xzr, xzr, [x14, #0x18] ```
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch, @kunalspathak Issue DetailsOptimize following patterns
and
|
Asmdiffs shows matching patterns but doesn't show a significant number of matches. Diffs are based on 1,484,292 contexts (402,635 MinOpts, 1,081,657 FullOpts). MISSED contexts: 154 (0.01%) Overall (-316 bytes)
MinOpts (+0 bytes)
FullOpts (-316 bytes)
Example diffsbenchmarks.run.linux.arm64.checked.mch+0 (0.00%) : 18625.dasm - System.Xml.EncodingStreamWrapper:EnsureByteBuffer():this@@ -44,7 +44,7 @@ G_M48029_IG04: ; bbWeight=0.50, gcVars=0000000000000000 {}, gcrefRegs=800
bl CORINFO_HELP_ASSIGN_REF
; gcrRegs -[x0 x15]
; byrRegs -[x14]
- stp wzr, wzr, [x19, #0x4C]
+ str xzr, [x19, #0x4C]
;; size=36 bbWeight=0.50 PerfScore 3.00
G_M48029_IG05: ; bbWeight=0.50, epilog, nogc, extend
ldr x19, [sp, #0x18]
+0 (0.00%) : 30209.dasm - ProtoBuf.ProtoReader:Init(ProtoBuf.Meta.TypeModel,System.Object):this@@ -62,7 +62,7 @@ G_M15330_IG05: ; bbWeight=1, gcrefRegs=80006 {x1 x2 x19}, byrefRegs=0000
; gcrRegs -[x2 x15]
; byrRegs -[x14]
str xzr, [x19, #0x30]
- stp wzr, wzr, [x19, #0x38]
+ str xzr, [x19, #0x38]
movn x0, #0xD1FFAB1E LSL #48
str x0, [x19, #0x28]
cbz x1, G_M15330_IG07
+0 (0.00%) : 24642.dasm - System.Net.MultiArrayBuffer:Dispose():this@@ -29,7 +29,7 @@ G_M25088_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
; byrRegs +[x19]
;; size=16 bbWeight=1 PerfScore 3.00
G_M25088_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=80000 {x19}, byref, isz
- stp wzr, wzr, [x19, #0x0C]
+ str xzr, [x19, #0x0C]
ldr x0, [x19]
; gcrRegs +[x0]
cbz x0, G_M25088_IG08
+0 (0.00%) : 256.dasm - System.IO.StreamReader:ReadBuffer():int:this@@ -49,7 +49,7 @@ G_M42876_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
; gcrRegs +[x19]
;; size=16 bbWeight=1 PerfScore 3.00
G_M42876_IG02: ; bbWeight=1, gcrefRegs=80000 {x19}, byrefRegs=0000 {}, byref, isz
- stp wzr, wzr, [x19, #0x38]
+ str xzr, [x19, #0x38]
ldrb w0, [x19, #0x4E]
cbnz w0, G_M42876_IG04
;; size=12 bbWeight=1 PerfScore 5.00
@@ -285,7 +285,7 @@ G_M42876_IG21: ; bbWeight=0.50, gcrefRegs=80000 {x19}, byrefRegs=0000 {},
blr x7
; gcrRegs -[x0-x1 x4]
str w0, [x19, #0x3C]
- stp wzr, wzr, [x19, #0x40]
+ str xzr, [x19, #0x40]
;; size=48 bbWeight=0.50 PerfScore 11.75
G_M42876_IG22: ; bbWeight=1, gcrefRegs=80000 {x19}, byrefRegs=0000 {}, byref
ldr w0, [x19, #0x3C]
+0 (0.00%) : 17152.dasm - System.Runtime.Serialization.Formatters.Binary.ObjectProgress:Init():this@@ -22,7 +22,7 @@ G_M29322_IG02: ; bbWeight=1, gcrefRegs=0001 {x0}, byrefRegs=0000 {}, byre
mov w1, #3
str w1, [x0, #0x54]
stp xzr, xzr, [x0, #0x08]
- stp wzr, wzr, [x0, #0x58]
+ str xzr, [x0, #0x58]
str wzr, [x0, #0x60]
str xzr, [x0, #0x18]
str wzr, [x0, #0x64]
+0 (0.00%) : 30208.dasm - ProtoBuf.ProtoReader+StreamProtoReader:Init(System.IO.Stream,ProtoBuf.Meta.TypeModel,System.Object,long):this@@ -163,7 +163,7 @@ G_M61747_IG10: ; bbWeight=0.50, gcrefRegs=88000 {x15 x19}, byrefRegs=0000
bl CORINFO_HELP_ASSIGN_REF
; gcrRegs -[x15]
; byrRegs -[x14]
- stp wzr, wzr, [x19, #0x60]
+ str xzr, [x19, #0x60]
cmp x21, #0
cset x0, ge
strb w0, [x19, #0x45]
libraries_tests.pmi.linux.arm64.checked.mch-4 (-16.67%) : 124255.dasm - Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this@@ -18,15 +18,14 @@ G_M57957_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=8 bbWeight=1 PerfScore 1.50
G_M57957_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0001 {x0}, byref
; byrRegs +[x0]
- stp wzr, wzr, [x0, #0x10]
- str xzr, [x0, #0x08]
- ;; size=8 bbWeight=1 PerfScore 2.00
+ stp xzr, xzr, [x0, #0x08]
+ ;; size=4 bbWeight=1 PerfScore 1.00
G_M57957_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 24, prolog size 8, PerfScore 7.90, instruction count 6, allocated bytes for code 24 (MethodHash=989d1d9a) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this
+; Total bytes of code 20, prolog size 8, PerfScore 6.50, instruction count 5, allocated bytes for code 20 (MethodHash=989d1d9a) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this
; ============================================================
Unwind Info:
@@ -37,7 +36,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 6 (0x00006) Actual length = 24 (0x000018)
+ Function Length : 5 (0x00005) Actual length = 20 (0x000014)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
-4 (-16.67%) : 355187.dasm - Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this@@ -18,15 +18,14 @@ G_M57957_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=8 bbWeight=1 PerfScore 1.50
G_M57957_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0001 {x0}, byref
; byrRegs +[x0]
- stp wzr, wzr, [x0, #0x10]
- str xzr, [x0, #0x08]
- ;; size=8 bbWeight=1 PerfScore 2.00
+ stp xzr, xzr, [x0, #0x08]
+ ;; size=4 bbWeight=1 PerfScore 1.00
G_M57957_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 24, prolog size 8, PerfScore 7.90, instruction count 6, allocated bytes for code 24 (MethodHash=989d1d9a) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this
+; Total bytes of code 20, prolog size 8, PerfScore 6.50, instruction count 5, allocated bytes for code 20 (MethodHash=989d1d9a) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this
; ============================================================
Unwind Info:
@@ -37,7 +36,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 6 (0x00006) Actual length = 24 (0x000018)
+ Function Length : 5 (0x00005) Actual length = 20 (0x000014)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
-4 (-11.11%) : 124256.dasm - Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:.ctor(Microsoft.CodeAnalysis.Collections.SegmentedArray`1[System.__Canon]):this@@ -29,15 +29,14 @@ G_M28862_IG02: ; bbWeight=1, gcrefRegs=0004 {x2}, byrefRegs=0001 {x0}, by
bl CORINFO_HELP_CHECKED_ASSIGN_REF
; gcrRegs -[x2 x15]
; byrRegs -[x14]
- stp wzr, wzr, [x0, #0x10]
- str xzr, [x0, #0x08]
- ;; size=20 bbWeight=1 PerfScore 4.00
+ stp xzr, xzr, [x0, #0x08]
+ ;; size=16 bbWeight=1 PerfScore 3.00
G_M28862_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 36, prolog size 8, PerfScore 11.10, instruction count 9, allocated bytes for code 36 (MethodHash=90ad8f41) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:.ctor(Microsoft.CodeAnalysis.Collections.SegmentedArray`1[System.__Canon]):this
+; Total bytes of code 32, prolog size 8, PerfScore 9.70, instruction count 8, allocated bytes for code 32 (MethodHash=90ad8f41) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:.ctor(Microsoft.CodeAnalysis.Collections.SegmentedArray`1[System.__Canon]):this
; ============================================================
Unwind Info:
@@ -48,7 +47,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 9 (0x00009) Actual length = 36 (0x000024)
+ Function Length : 8 (0x00008) Actual length = 32 (0x000020)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
+0 (0.00%) : 352896.dasm - Microsoft.Build.Framework.PropertyInitialValueSetEventArgs:.ctor(System.String,System.String,System.String,System.String,System.String,System.String,int):this@@ -95,8 +95,8 @@ G_M33953_IG03: ; bbWeight=1, gcrefRegs=780001 {x0 x19 x20 x21 x22}, byref
str w26, [x19, #0x70]
stp xzr, xzr, [x19, #0x50]
str xzr, [x19, #0x60]
- stp wzr, wzr, [x19, #0x74]
- stp wzr, wzr, [x19, #0x7C]
+ str xzr, [x19, #0x74]
+ str xzr, [x19, #0x7C]
add x14, x19, #136
; byrRegs +[x14]
mov x15, x20
+0 (0.00%) : 356864.dasm - System.Collections.Tests.StackTests:Push_Null()@@ -50,7 +50,7 @@ G_M027_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, i
bl CORINFO_HELP_ASSIGN_REF
; gcrRegs -[x0 x15]
; byrRegs -[x14]
- stp wzr, wzr, [x19, #0x10]
+ str xzr, [x19, #0x10]
movz x0, #0xD1FFAB1E
movk x0, #0xD1FFAB1E LSL #16
movk x0, #0xD1FFAB1E LSL #32
+0 (0.00%) : 379008.dasm - System.Xml.XmlDiff.XmlDiffDocument:LoadPI(System.Xml.XmlDiff.XmlDiffNode,System.Xml.XmlReader,System.Xml.XmlDiff.PositionInfo):this@@ -65,7 +65,7 @@ G_M9276_IG02: ; bbWeight=1, gcrefRegs=780000 {x19 x20 x21 x22}, byrefRegs
; gcrRegs +[x15]
stp xzr, xzr, [x25, #0x08]
stp xzr, xzr, [x25, #0x18]
- stp wzr, wzr, [x25, #0x30]
+ str xzr, [x25, #0x30]
add x14, x25, #64
; byrRegs +[x14]
bl CORINFO_HELP_ASSIGN_REF
libraries.crossgen2.linux.arm64.checked.mch-4 (-16.67%) : 144851.dasm - Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this@@ -19,15 +19,14 @@ G_M57957_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=8 bbWeight=1 PerfScore 1.50
G_M57957_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0001 {x0}, byref
; byrRegs +[x0]
- stp wzr, wzr, [x0, #0x10]
- str xzr, [x0, #0x08]
- ;; size=8 bbWeight=1 PerfScore 2.00
+ stp xzr, xzr, [x0, #0x08]
+ ;; size=4 bbWeight=1 PerfScore 1.00
G_M57957_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 24, prolog size 8, PerfScore 7.90, instruction count 6, allocated bytes for code 24 (MethodHash=989d1d9a) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this
+; Total bytes of code 20, prolog size 8, PerfScore 6.50, instruction count 5, allocated bytes for code 20 (MethodHash=989d1d9a) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this
; ============================================================
Unwind Info:
@@ -38,7 +37,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 6 (0x00006) Actual length = 24 (0x000018)
+ Function Length : 5 (0x00005) Actual length = 20 (0x000014)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
-4 (-8.33%) : 144847.dasm - Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:.ctor(Microsoft.CodeAnalysis.Collections.SegmentedArray`1[System.__Canon]):this@@ -33,15 +33,14 @@ G_M28862_IG02: ; bbWeight=1, gcrefRegs=0004 {x2}, byrefRegs=0001 {x0}, by
blr x12
; gcrRegs -[x2 x15]
; byrRegs -[x14]
- stp wzr, wzr, [x0, #0x10]
- str xzr, [x0, #0x08]
- ;; size=32 bbWeight=1 PerfScore 8.00
+ stp xzr, xzr, [x0, #0x08]
+ ;; size=28 bbWeight=1 PerfScore 7.00
G_M28862_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 48, prolog size 8, PerfScore 16.30, instruction count 12, allocated bytes for code 48 (MethodHash=90ad8f41) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:.ctor(Microsoft.CodeAnalysis.Collections.SegmentedArray`1[System.__Canon]):this
+; Total bytes of code 44, prolog size 8, PerfScore 14.90, instruction count 11, allocated bytes for code 44 (MethodHash=90ad8f41) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:.ctor(Microsoft.CodeAnalysis.Collections.SegmentedArray`1[System.__Canon]):this
; ============================================================
Unwind Info:
@@ -52,7 +51,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 12 (0x0000c) Actual length = 48 (0x000030)
+ Function Length : 11 (0x0000b) Actual length = 44 (0x00002c)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
-8 (-3.85%) : 30705.dasm - System.Decimal:.cctor()@@ -78,15 +78,14 @@ G_M23462_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
add x1, x1, #8
; gcrRegs -[x1]
; byrRegs +[x1]
- stp wzr, wzr, [x1]
- str xzr, [x1, #0x08]
+ stp xzr, xzr, [x1]
ldr x1, [x0, #0x18]
; gcrRegs +[x1]
; byrRegs -[x1]
add x1, x1, #8
; gcrRegs -[x1]
; byrRegs +[x1]
- stp wzr, wzr, [x1]
+ str xzr, [x1]
mov x2, #1
str x2, [x1, #0x08]
ldr x1, [x0, #0x20]
@@ -128,15 +127,14 @@ G_M23462_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
add x1, x1, #8
; gcrRegs -[x1]
; byrRegs +[x1]
- stp wzr, wzr, [x1]
- str xzr, [x1, #0x08]
+ stp xzr, xzr, [x1]
ldr x1, [x0, #0x40]
; gcrRegs +[x1]
; byrRegs -[x1]
add x1, x1, #8
; gcrRegs -[x1]
; byrRegs +[x1]
- stp wzr, wzr, [x1]
+ str xzr, [x1]
mov x2, #1
str x2, [x1, #0x08]
ldr x0, [x0, #0x48]
@@ -149,13 +147,13 @@ G_M23462_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; byrRegs -[x1]
stp w1, wzr, [x0]
str x2, [x0, #0x08]
- ;; size=192 bbWeight=1 PerfScore 56.00
+ ;; size=184 bbWeight=1 PerfScore 54.00
G_M23462_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 208, prolog size 8, PerfScore 80.30, instruction count 52, allocated bytes for code 208 (MethodHash=008ca459) for method System.Decimal:.cctor()
+; Total bytes of code 200, prolog size 8, PerfScore 77.50, instruction count 50, allocated bytes for code 200 (MethodHash=008ca459) for method System.Decimal:.cctor()
; ============================================================
Unwind Info:
@@ -166,7 +164,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 52 (0x00034) Actual length = 208 (0x0000d0)
+ Function Length : 50 (0x00032) Actual length = 200 (0x0000c8)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
+0 (0.00%) : 2816.dasm - System.Xml.XmlBaseReader+XmlElementNode:.ctor(System.Xml.XmlBufferReader):this@@ -67,7 +67,7 @@ G_M42336_IG02: ; bbWeight=1, gcrefRegs=180000 {x19 x20}, byrefRegs=0000 {
; byrRegs -[x14]
mov w11, #1
str w11, [x22, #0x10]
- stp wzr, wzr, [x22, #0x18]
+ str xzr, [x22, #0x18]
adrp x11, [HIGH RELOC #0xD1FFAB1E] // function address
add x11, x11, [LOW RELOC #0xD1FFAB1E]
ldr x0, [x11]
+0 (0.00%) : 41472.dasm - System.IO.StreamReader:Dispose(bool):this@@ -57,7 +57,7 @@ G_M54782_IG05: ; bbWeight=0.50, gcrefRegs=0001 {x0}, byrefRegs=0000 {}, b
; gcrRegs +[x0]
;; size=36 bbWeight=0.50 PerfScore 7.25
G_M54782_IG06: ; bbWeight=0.50, gcrefRegs=0001 {x0}, byrefRegs=0000 {}, byref
- stp wzr, wzr, [x0, #0x38]
+ str xzr, [x0, #0x38]
;; size=4 bbWeight=0.50 PerfScore 0.50
G_M54782_IG07: ; bbWeight=0.50, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref, epilog, nogc
; gcrRegs -[x0]
@@ -74,7 +74,7 @@ G_M54782_IG09: ; bbWeight=0, gcVars=0000000000000001 {V00}, gcrefRegs=000
; GC ptr vars +{V00}
ldr x0, [fp, #0x10] // [V00 this]
; gcrRegs +[x0]
- stp wzr, wzr, [x0, #0x38]
+ str xzr, [x0, #0x38]
;; size=8 bbWeight=0 PerfScore 0.00
G_M54782_IG10: ; bbWeight=0, funclet epilog, nogc, extend
ldp fp, lr, [sp], #0x20
+0 (0.00%) : 124352.dasm - System.Data.SqlTypes.SqlDecimal:SetToZero():this@@ -21,8 +21,8 @@ G_M11197_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0001 {x0}, byre
; byrRegs +[x0]
mov w1, #1
strb w1, [x0, #0x01]
- stp wzr, wzr, [x0, #0x0C]
- stp wzr, wzr, [x0, #0x04]
+ str xzr, [x0, #0x0C]
+ str xzr, [x0, #0x04]
strb w1, [x0]
;; size=20 bbWeight=1 PerfScore 4.50
G_M11197_IG03: ; bbWeight=1, epilog, nogc, extend
libraries.pmi.linux.arm64.checked.mch-4 (-16.67%) : 208972.dasm - Microsoft.Build.BackEnd.WorkUnitResult:.ctor():this@@ -18,15 +18,14 @@ G_M57471_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=8 bbWeight=1 PerfScore 1.50
G_M57471_IG02: ; bbWeight=1, gcrefRegs=0001 {x0}, byrefRegs=0000 {}, byref
; gcrRegs +[x0]
- stp wzr, wzr, [x0, #0x10]
- str xzr, [x0, #0x08]
- ;; size=8 bbWeight=1 PerfScore 2.00
+ stp xzr, xzr, [x0, #0x08]
+ ;; size=4 bbWeight=1 PerfScore 1.00
G_M57471_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 24, prolog size 8, PerfScore 7.90, instruction count 6, allocated bytes for code 24 (MethodHash=5b531f80) for method Microsoft.Build.BackEnd.WorkUnitResult:.ctor():this
+; Total bytes of code 20, prolog size 8, PerfScore 6.50, instruction count 5, allocated bytes for code 20 (MethodHash=5b531f80) for method Microsoft.Build.BackEnd.WorkUnitResult:.ctor():this
; ============================================================
Unwind Info:
@@ -37,7 +36,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 6 (0x00006) Actual length = 24 (0x000018)
+ Function Length : 5 (0x00005) Actual length = 20 (0x000014)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
-4 (-16.67%) : 146791.dasm - Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this@@ -18,15 +18,14 @@ G_M57957_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=8 bbWeight=1 PerfScore 1.50
G_M57957_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0001 {x0}, byref
; byrRegs +[x0]
- stp wzr, wzr, [x0, #0x10]
- str xzr, [x0, #0x08]
- ;; size=8 bbWeight=1 PerfScore 2.00
+ stp xzr, xzr, [x0, #0x08]
+ ;; size=4 bbWeight=1 PerfScore 1.00
G_M57957_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 24, prolog size 8, PerfScore 7.90, instruction count 6, allocated bytes for code 24 (MethodHash=989d1d9a) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this
+; Total bytes of code 20, prolog size 8, PerfScore 6.50, instruction count 5, allocated bytes for code 20 (MethodHash=989d1d9a) for method Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[System.__Canon]:Reset():this
; ============================================================
Unwind Info:
@@ -37,7 +36,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 6 (0x00006) Actual length = 24 (0x000018)
+ Function Length : 5 (0x00005) Actual length = 20 (0x000014)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
-4 (-11.11%) : 5832.dasm - System.Decimal:TryConvertFrom[ubyte](ubyte,byref):bool@@ -34,16 +34,15 @@ G_M24523_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0002 {x1}, byre
; byrRegs +[x1]
uxtb w0, w0
mov w0, w0
- stp wzr, wzr, [x1]
- str x0, [x1, #0x08]
+ stp xzr, x0, [x1]
mov w0, #1
- ;; size=20 bbWeight=1 PerfScore 3.50
+ ;; size=16 bbWeight=1 PerfScore 2.50
G_M24523_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 36, prolog size 8, PerfScore 10.60, instruction count 9, allocated bytes for code 36 (MethodHash=bb42a034) for method System.Decimal:TryConvertFrom[ubyte](ubyte,byref):bool
+; Total bytes of code 32, prolog size 8, PerfScore 9.20, instruction count 8, allocated bytes for code 32 (MethodHash=bb42a034) for method System.Decimal:TryConvertFrom[ubyte](ubyte,byref):bool
; ============================================================
Unwind Info:
@@ -54,7 +53,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 9 (0x00009) Actual length = 36 (0x000024)
+ Function Length : 8 (0x00008) Actual length = 32 (0x000020)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
+0 (0.00%) : 142528.dasm - Microsoft.CodeAnalysis.Collections.SegmentedArray`1[ubyte]:GetEnumerator():Microsoft.CodeAnalysis.Collections.SegmentedArray`1+Enumerator[ubyte]:this@@ -34,7 +34,7 @@ G_M31162_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0101 {x0 x8}, b
bl CORINFO_HELP_CHECKED_ASSIGN_REF
; gcrRegs -[x15]
; byrRegs -[x0 x14]
- stp wzr, wzr, [x8, #0x08]
+ str xzr, [x8, #0x08]
strb wzr, [x8, #0x10]
;; size=20 bbWeight=1 PerfScore 6.50
G_M31162_IG03: ; bbWeight=1, epilog, nogc, extend
+0 (0.00%) : 195264.dasm - Microsoft.CSharp.RuntimeBinder.Semantics.ExpressionBinder:GetEnumBinOpSigs(System.Collections.Generic.List`1[Microsoft.CSharp.RuntimeBinder.Semantics.ExpressionBinder+BinOpFullSig],Microsoft.CSharp.RuntimeBinder.Semantics.ExpressionBinder+BinOpArgInfo):bool:this@@ -345,7 +345,7 @@ G_M39090_IG20: ; bbWeight=0.50, gcrefRegs=780000 {x19 x20 x21 x22}, byref
ldr w1, [fp, #0x18] // [V05 loc2]
movn w14, #0
stp w14, w14, [x2, #0x10]
- stp wzr, wzr, [x2, #0x18]
+ str xzr, [x2, #0x18]
add x14, x2, #8
; byrRegs +[x14]
mov x15, x19
+0 (0.00%) : 209600.dasm - Microsoft.Build.BackEnd.Logging.SerialConsoleLogger:ResetConsoleLoggerState():this@@ -87,7 +87,7 @@ G_M1626_IG04: ; bbWeight=0.50, gcrefRegs=80000 {x19}, byrefRegs=0000 {},
stp xzr, xzr, [x19, #0x28]
;; size=4 bbWeight=0.50 PerfScore 0.50
G_M1626_IG05: ; bbWeight=1, gcrefRegs=80000 {x19}, byrefRegs=0000 {}, byref
- stp wzr, wzr, [x19, #0x64]
+ str xzr, [x19, #0x64]
stp xzr, xzr, [x19, #0x38]
str xzr, [x19, #0x48]
;; size=12 bbWeight=1 PerfScore 3.00
coreclr_tests.run.linux.arm64.checked.mch-4 (-6.25%) : 570824.dasm - u4div+CL:.ctor():this@@ -36,15 +36,14 @@ G_M20711_IG02: ; bbWeight=1, gcrefRegs=0001 {x0}, byrefRegs=0000 {}, byre
add x0, x0, #48
; gcrRegs -[x0]
; byrRegs +[x0]
- stp wzr, wzr, [x0]
- str x1, [x0, #0x08]
- ;; size=48 bbWeight=1 PerfScore 9.50
+ stp xzr, x1, [x0]
+ ;; size=44 bbWeight=1 PerfScore 8.50
G_M20711_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 64, prolog size 8, PerfScore 19.40, instruction count 16, allocated bytes for code 64 (MethodHash=d091af18) for method u4div+CL:.ctor():this
+; Total bytes of code 60, prolog size 8, PerfScore 18.00, instruction count 15, allocated bytes for code 60 (MethodHash=d091af18) for method u4div+CL:.ctor():this
; ============================================================
Unwind Info:
@@ -55,7 +54,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 16 (0x00010) Actual length = 64 (0x000040)
+ Function Length : 15 (0x0000f) Actual length = 60 (0x00003c)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
-4 (-6.25%) : 570828.dasm - u4rem+CL:.ctor():this@@ -36,15 +36,14 @@ G_M33766_IG02: ; bbWeight=1, gcrefRegs=0001 {x0}, byrefRegs=0000 {}, byre
add x0, x0, #48
; gcrRegs -[x0]
; byrRegs +[x0]
- stp wzr, wzr, [x0]
- str x1, [x0, #0x08]
- ;; size=48 bbWeight=1 PerfScore 9.50
+ stp xzr, x1, [x0]
+ ;; size=44 bbWeight=1 PerfScore 8.50
G_M33766_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 64, prolog size 8, PerfScore 19.40, instruction count 16, allocated bytes for code 64 (MethodHash=eeaa7c19) for method u4rem+CL:.ctor():this
+; Total bytes of code 60, prolog size 8, PerfScore 18.00, instruction count 15, allocated bytes for code 60 (MethodHash=eeaa7c19) for method u4rem+CL:.ctor():this
; ============================================================
Unwind Info:
@@ -55,7 +54,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 16 (0x00010) Actual length = 64 (0x000040)
+ Function Length : 15 (0x0000f) Actual length = 60 (0x00003c)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
-4 (-6.25%) : 570832.dasm - u8div+CL:.ctor():this@@ -36,15 +36,14 @@ G_M17259_IG02: ; bbWeight=1, gcrefRegs=0001 {x0}, byrefRegs=0000 {}, byre
add x0, x0, #56
; gcrRegs -[x0]
; byrRegs +[x0]
- stp wzr, wzr, [x0]
- str x1, [x0, #0x08]
- ;; size=48 bbWeight=1 PerfScore 9.50
+ stp xzr, x1, [x0]
+ ;; size=44 bbWeight=1 PerfScore 8.50
G_M17259_IG03: ; bbWeight=1, epilog, nogc, extend
ldp fp, lr, [sp], #0x10
ret lr
;; size=8 bbWeight=1 PerfScore 2.00
-; Total bytes of code 64, prolog size 8, PerfScore 19.40, instruction count 16, allocated bytes for code 64 (MethodHash=710abc94) for method u8div+CL:.ctor():this
+; Total bytes of code 60, prolog size 8, PerfScore 18.00, instruction count 15, allocated bytes for code 60 (MethodHash=710abc94) for method u8div+CL:.ctor():this
; ============================================================
Unwind Info:
@@ -55,7 +54,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 16 (0x00010) Actual length = 64 (0x000040)
+ Function Length : 15 (0x0000f) Actual length = 60 (0x00003c)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
+0 (0.00%) : 402112.dasm - System.IO.StreamReader:ReadBuffer():int:this@@ -47,7 +47,7 @@ G_M42876_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
; gcrRegs +[x19]
;; size=16 bbWeight=1 PerfScore 3.00
G_M42876_IG02: ; bbWeight=1, gcrefRegs=80000 {x19}, byrefRegs=0000 {}, byref, isz
- stp wzr, wzr, [x19, #0x38]
+ str xzr, [x19, #0x38]
ldrb w0, [x19, #0x4E]
cbnz w0, G_M42876_IG04
;; size=12 bbWeight=1 PerfScore 5.00
@@ -290,7 +290,7 @@ G_M42876_IG20: ; bbWeight=0, gcrefRegs=80000 {x19}, byrefRegs=0000 {}, by
blr x7
; gcrRegs -[x0-x1 x4]
str w0, [x19, #0x3C]
- stp wzr, wzr, [x19, #0x40]
+ str xzr, [x19, #0x40]
;; size=48 bbWeight=0 PerfScore 0.00
G_M42876_IG21: ; bbWeight=0, gcrefRegs=80000 {x19}, byrefRegs=0000 {}, byref
ldr w0, [x19, #0x3C]
+0 (0.00%) : 436992.dasm - lclfldrem:TestEntryPoint():int@@ -1901,7 +1901,7 @@ G_M45041_IG03: ; bbWeight=1, extend
; gcrRegs +[x0]
add x2, x0, #32
; byrRegs +[x2]
- stp wzr, wzr, [x2]
+ str xzr, [x2]
mov x0, #9
; gcrRegs -[x0]
str x0, [x2, #0x08]
@@ -2248,7 +2248,7 @@ G_M45041_IG04: ; bbWeight=1, extend
; gcrRegs +[x0]
add x2, x0, #16
; byrRegs +[x2]
- stp wzr, wzr, [x2]
+ str xzr, [x2]
mov x1, #5
str x1, [x2, #0x08]
ldr q16, [x24]
+0 (0.00%) : 569856.dasm - Test_10w5d.testout1:Func_0_1_6_6_4():System.Decimal@@ -624,7 +624,7 @@ G_M40390_IG02: ; bbWeight=1.00, gcrefRegs=0000 {}, byrefRegs=0000 {}, byr
movz x19, #0xD1FFAB1E // data for <unknown class>:<unknown field>
movk x19, #0xD1FFAB1E LSL #16
movk x19, #0xD1FFAB1E LSL #32
- stp wzr, wzr, [x19]
+ str xzr, [x19]
mov x0, #0xD1FFAB1E
str x0, [x19, #0x08]
movz x20, #0xD1FFAB1E // data for <unknown class>:<unknown field>
DetailsImprovements/regressions per collection
Context information
jit-analyze output |
Is there any advantage of using str wzr, mem
str wzr, mem+8 to stp wzr, wzr, mem and with this PR, we are correcting it to |
Not really, they both have latency of 1 cycle and throughput 2 on N1 systems [1]. However, this is allowing us to match patterns like the following.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks!
Optimize following patterns
and
Fixes: #35136