-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JIT: Loop hoisting inhibited by phase-ordering issue #6554
Comments
I've verified with opt repeat that phase ordering is the key blocker in some of these cases; given this method int sum(int start, int stop)
{
int s = 0;
for (int i = start; i < stop; ++i)
{
s += a[i] + a[i] + a[i] + a[i];
}
return s;
} we do hoist the length loads out of the loop with optRepeat. Interestingly, it requires repeat count of 3, because with repeat count of 2 there's still a copy in the way that doesn't get removed until after value numbering has run. |
The disasm for original issue in the PR description. Not sure why assertion prop gets rid of the extra checks. ; Assembly listing for method ArrayPerf:Test(long)
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; optimized code
; rsp based frame
; fully interruptible
; No PGO data
; invoked as altjit
; Final local variable assignments
;
; V00 arg0 [V00,T19] ( 4, 7 ) long -> rsi single-def
; V01 loc0 [V01,T14] ( 7, 25 ) int -> rax
; V02 loc1 [V02,T16] ( 5, 17 ) int -> rdx
; V03 loc2 [V03,T18] ( 4, 13 ) long -> rcx
; V04 loc3 [V04,T00] ( 14,212 ) int -> rdx
; V05 OutArgs [V05 ] ( 1, 1 ) lclBlk (32) [rsp+00H] "OutgoingArgSpace"
; V06 tmp1 [V06,T20] ( 2, 4 ) long -> rdx "argument with side effect"
; V07 tmp2 [V07,T15] ( 3, 24 ) ref -> rcx "arr expr"
; V08 tmp3 [V08,T17] ( 2, 16 ) ref -> rcx "arr expr"
; V09 tmp4 [V09,T03] ( 2, 64 ) ref -> r8 "arr expr"
;* V10 tmp5 [V10,T05] ( 0, 0 ) ref -> zero-ref "arr expr"
;* V11 tmp6 [V11,T06] ( 0, 0 ) ref -> zero-ref "arr expr"
;* V12 tmp7 [V12,T07] ( 0, 0 ) ref -> zero-ref "arr expr"
;* V13 tmp8 [V13,T08] ( 0, 0 ) ref -> zero-ref "arr expr"
;* V14 tmp9 [V14,T09] ( 0, 0 ) ref -> zero-ref "arr expr"
;* V15 tmp10 [V15,T10] ( 0, 0 ) ref -> zero-ref "arr expr"
;* V16 tmp11 [V16,T11] ( 0, 0 ) ref -> zero-ref "arr expr"
;* V17 tmp12 [V17,T12] ( 0, 0 ) ref -> zero-ref "arr expr"
;* V18 tmp13 [V18,T13] ( 0, 0 ) ref -> zero-ref "arr expr"
; V19 cse0 [V19,T02] ( 6, 30 ) ref -> registers "CSE - aggressive"
; V20 cse1 [V20,T04] ( 8, 32 ) int -> rdi "CSE - aggressive"
;* V21 cse2 [V21,T21] ( 0, 0 ) long -> zero-ref "CSE - moderate"
; V22 cse3 [V22,T01] ( 15,192 ) int -> r8 "CSE - aggressive"
;
; Lcl frame size = 40
G_M18693_IG01:
push rdi
push rsi
sub rsp, 40
mov rsi, rcx
;; size=9 bbWeight=0 PerfScore 0.00
G_M18693_IG02:
mov rcx, 0xD1FFAB1E
mov edx, 5
call CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
mov edi, dword ptr [(reloc)]
movsxd rdx, edi
mov rcx, 0xD1FFAB1E ; System.Int32[]
call CORINFO_HELP_NEWARR_1_VC
mov rcx, 0xD1FFAB1E
mov rdx, rax
call CORINFO_HELP_CHECKED_ASSIGN_REF
xor eax, eax
test edi, edi
jle SHORT G_M18693_IG04
mov rdx, 0xD1FFAB1E
mov rdx, gword ptr [rdx]
;; size=81 bbWeight=1 PerfScore 10.25
G_M18693_IG03:
mov rcx, rdx
mov r8d, dword ptr [rcx+8]
cmp eax, r8d
jae G_M18693_IG11
mov r8d, eax
mov dword ptr [rcx+4*r8+16], eax
inc eax
cmp eax, edi
jl SHORT G_M18693_IG03
;; size=30 bbWeight=4 PerfScore 25.00
G_M18693_IG04:
xor edx, edx
test edi, edi
jle SHORT G_M18693_IG06
mov rax, 0xD1FFAB1E
mov rax, gword ptr [rax]
;; size=19 bbWeight=1 PerfScore 3.75
G_M18693_IG05:
mov rcx, rax
mov r8d, dword ptr [rcx+8]
cmp edx, r8d
jae SHORT G_M18693_IG11
inc edx
cmp edx, edi
jl SHORT G_M18693_IG05
;; size=18 bbWeight=4 PerfScore 20.00
G_M18693_IG06:
xor ecx, ecx
test rsi, rsi
jle SHORT G_M18693_IG10
;; size=7 bbWeight=1 PerfScore 1.50
G_M18693_IG07:
xor edx, edx
test edi, edi
jle SHORT G_M18693_IG09
mov rax, 0xD1FFAB1E
mov rax, gword ptr [rax]
;; size=19 bbWeight=4 PerfScore 15.00
G_M18693_IG08:
mov r8, rax
mov r8d, dword ptr [r8+8]
cmp edx, r8d
jae SHORT G_M18693_IG11
cmp edx, r8d
jae SHORT G_M18693_IG11
cmp edx, r8d
jae SHORT G_M18693_IG11
cmp edx, r8d
jae SHORT G_M18693_IG11
cmp edx, r8d
jae SHORT G_M18693_IG11
cmp edx, r8d
jae SHORT G_M18693_IG11
cmp edx, r8d
jae SHORT G_M18693_IG11
cmp edx, r8d
jae SHORT G_M18693_IG11
cmp edx, r8d
jae SHORT G_M18693_IG11
cmp edx, r8d
jae SHORT G_M18693_IG11
inc edx
cmp edx, edi
jl SHORT G_M18693_IG08
;; size=63 bbWeight=16 PerfScore 260.00
G_M18693_IG09:
inc rcx
cmp rcx, rsi
jl SHORT G_M18693_IG07
;; size=8 bbWeight=4 PerfScore 6.00
G_M18693_IG10:
add rsp, 40
pop rsi
pop rdi
ret
;; size=7 bbWeight=1 PerfScore 2.25
G_M18693_IG11:
call CORINFO_HELP_RNGCHKFAIL
int3
;; size=6 bbWeight=0 PerfScore 0.00
; Total bytes of code 267, prolog size 9, PerfScore 370.45, instruction count 82, allocated bytes for code 267 (MethodHash=ef70b6fa) for method ArrayPerf:Test(long)
; ============================================================
|
Consider this code:
Even with fixes in place for #6552 and #6553, we're unable to hoist the invariant array length loads (that get inserted for the bounds-checks on all those
m_arr[i]
accesses) out of the loops, because them_arr[i]
expressions look like this whenoptHoistLoopCode
runs:Note that the static field
m_arr
is loaded and then stored to a local in the left side of a comma, then subsequently referenced in thearrLen
and load expressions. Even though thearrLen
expression is loop-invariant (and recognized as such with fixes for #6552 and #6553 in place), we can't hoist it becauseoptTreeIsValidAtLoopHead
fails for it -- the use oflclVar ref V12 tmp6
(the temp which holds the pointer to the array) can't be hoisted above its definition without a rewrite. The rewrite that we'd need actually does happen (as a result of the RHS of the assign into the def being hoisted out of the loop) during CSE, but since that runs afteroptLoopHoist
we're at something of an impasse.To my mind, this sort of issue is an argument for a rewrite-as-you-go, ssa-based implementation of CSE and LICM (which would necessitate recording heap dependencies in SSA, but also ought to enable dropping the liberal/conservative value number bifurcation).
category:cq
theme:loop-opt
skill-level:expert
cost:medium
The text was updated successfully, but these errors were encountered: