-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Windows/Arm64: Use 8.1 atomic instructions if they are available #70921
Changes from all commits
565d19a
cd6be8b
518cf86
f6ba4e3
a24be98
3ae4cde
447f9fa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -602,7 +602,11 @@ FORCEINLINE AwareLock::EnterHelperResult ObjHeader::EnterObjMonitorHelper(Thread | |
} | ||
|
||
LONG newValue = oldValue | tid; | ||
#if defined(TARGET_WINDOWS) && defined(TARGET_ARM64) | ||
if (FastInterlockedCompareExchangeAcquire((LONG*)&m_SyncBlockValue, newValue, oldValue) == oldValue) | ||
#else | ||
if (InterlockedCompareExchangeAcquire((LONG*)&m_SyncBlockValue, newValue, oldValue) == oldValue) | ||
#endif | ||
{ | ||
return AwareLock::EnterHelperResult_Entered; | ||
} | ||
|
@@ -650,7 +654,11 @@ FORCEINLINE AwareLock::EnterHelperResult ObjHeader::EnterObjMonitorHelper(Thread | |
return AwareLock::EnterHelperResult_UseSlowPath; | ||
} | ||
|
||
#if defined(TARGET_WINDOWS) && defined(TARGET_ARM64) | ||
if (FastInterlockedCompareExchangeAcquire((LONG*)&m_SyncBlockValue, newValue, oldValue) == oldValue) | ||
#else | ||
if (InterlockedCompareExchangeAcquire((LONG*)&m_SyncBlockValue, newValue, oldValue) == oldValue) | ||
#endif | ||
{ | ||
return AwareLock::EnterHelperResult_Entered; | ||
} | ||
|
@@ -723,7 +731,12 @@ FORCEINLINE AwareLock::LeaveHelperAction ObjHeader::LeaveObjMonitorHelper(Thread | |
{ | ||
// We are leaving the lock | ||
DWORD newValue = (syncBlockValue & (~SBLK_MASK_LOCK_THREADID)); | ||
|
||
#if defined(TARGET_WINDOWS) && defined(TARGET_ARM64) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think moving the static/dynamic switching logic into It should result in simpler looking code and if switching needs to change due to new compiler or platform, no need to revisit all the callsites. |
||
if (FastInterlockedCompareExchangeRelease((LONG*)&m_SyncBlockValue, newValue, syncBlockValue) != (LONG)syncBlockValue) | ||
#else | ||
if (InterlockedCompareExchangeRelease((LONG*)&m_SyncBlockValue, newValue, syncBlockValue) != (LONG)syncBlockValue) | ||
#endif | ||
{ | ||
return AwareLock::LeaveHelperAction_Yield; | ||
} | ||
|
@@ -732,7 +745,11 @@ FORCEINLINE AwareLock::LeaveHelperAction ObjHeader::LeaveObjMonitorHelper(Thread | |
{ | ||
// recursion and ThinLock | ||
DWORD newValue = syncBlockValue - SBLK_LOCK_RECLEVEL_INC; | ||
#if defined(TARGET_WINDOWS) && defined(TARGET_ARM64) | ||
if (FastInterlockedCompareExchangeRelease((LONG*)&m_SyncBlockValue, newValue, syncBlockValue) != (LONG)syncBlockValue) | ||
#else | ||
if (InterlockedCompareExchangeRelease((LONG*)&m_SyncBlockValue, newValue, syncBlockValue) != (LONG)syncBlockValue) | ||
#endif | ||
{ | ||
return AwareLock::LeaveHelperAction_Yield; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,14 @@ | |
#define MAX_CACHE_LINE_SIZE 64 | ||
#endif | ||
|
||
#ifndef DACCESS_COMPILE | ||
#if defined(TARGET_WINDOWS) && defined(TARGET_ARM64) | ||
// Flag to check if atomics feature is available on | ||
// the machine | ||
extern bool g_arm64_atomics_present; | ||
#endif | ||
#endif | ||
|
||
#ifndef TARGET_UNIX | ||
// Copied from malloc.h: don't want to bring in the whole header file. | ||
void * __cdecl _alloca(size_t); | ||
|
@@ -71,6 +79,64 @@ BOOL inline FitsInU4(unsigned __int64 val) | |
return val == (unsigned __int64)(unsigned __int32)val; | ||
} | ||
|
||
#if defined(DACCESS_COMPILE) | ||
#define FastInterlockedCompareExchange InterlockedCompareExchange | ||
#define FastInterlockedCompareExchangeAcquire InterlockedCompareExchangeAcquire | ||
#define FastInterlockedCompareExchangeRelease InterlockedCompareExchangeRelease | ||
#else | ||
|
||
#if defined(TARGET_WINDOWS) && defined(TARGET_ARM64) | ||
|
||
FORCEINLINE LONG FastInterlockedCompareExchange( | ||
LONG volatile *Destination, | ||
LONG Exchange, | ||
LONG Comperand) | ||
{ | ||
if (g_arm64_atomics_present) | ||
{ | ||
return (LONG) __casal32((unsigned __int32*) Destination, (unsigned __int32)Comperand, (unsigned __int32)Exchange); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. technically it can be unconditionally enabled for osx-arm64 for us There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Ah, no, that will depend on compilers flag, e.g. if someone adds There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
What is the default flag used for osx? Since its basline is 8.1, it should be
I don't understand how/why will someone want to use
Can you check what code we generate today with default flags on osx for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think it was
We don't set march/mcpu/m* flags so we use defaults There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The barrier is a workaround for M1 is v8.1+, so There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it use |
||
} | ||
else | ||
{ | ||
return InterlockedCompareExchange(Destination, Exchange, Comperand); | ||
} | ||
} | ||
|
||
FORCEINLINE LONG FastInterlockedCompareExchangeAcquire( | ||
IN OUT LONG volatile *Destination, | ||
IN LONG Exchange, | ||
IN LONG Comperand | ||
) | ||
{ | ||
if (g_arm64_atomics_present) | ||
{ | ||
return (LONG) __casa32((unsigned __int32*) Destination, (unsigned __int32)Comperand, (unsigned __int32)Exchange); | ||
} | ||
else | ||
{ | ||
return InterlockedCompareExchangeAcquire(Destination, Exchange, Comperand); | ||
} | ||
} | ||
|
||
FORCEINLINE LONG FastInterlockedCompareExchangeRelease( | ||
IN OUT LONG volatile *Destination, | ||
IN LONG Exchange, | ||
IN LONG Comperand | ||
) | ||
{ | ||
if (g_arm64_atomics_present) | ||
{ | ||
return (LONG) __casl32((unsigned __int32*) Destination, (unsigned __int32)Comperand, (unsigned __int32)Exchange); | ||
} | ||
else | ||
{ | ||
return InterlockedCompareExchangeRelease(Destination, Exchange, Comperand); | ||
} | ||
} | ||
|
||
#endif // defined(TARGET_WINDOWS) && defined(TARGET_ARM64) | ||
|
||
#endif //defined(DACCESS_COMPILE) | ||
|
||
|
||
//************************************************************************ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't we just remove some or all of the "FastInterlockedXXXX" versions?
Having "Fast" variants always feels a bit confusing. Are they unfit for some uses? Where would I want to use the slow versions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, we wanted to just patch the hot paths (which is where we should always use fast versions) instead of all the paths and hence I had to reintroduce them at just few call-sites.