From a6b98a7b6b0f810184e1acc2dfe817a16bb76c8c Mon Sep 17 00:00:00 2001 From: "Nicholas S. Park" Date: Wed, 5 Feb 2020 10:44:08 -0600 Subject: [PATCH] section/collectives: Revise pSync wording - Add common pSync requirements to collectives intro - Remove redundant pSync wording from individual function descriptions - Clarify "size at least" for pSync argument descriptions - Closes #181 --- content/collective_intro.tex | 10 +++++++--- content/shmem_alltoall.tex | 22 +++------------------- content/shmem_alltoalls.tex | 8 +++----- content/shmem_barrier.tex | 17 ++--------------- content/shmem_broadcast.tex | 25 +++---------------------- content/shmem_collect.tex | 21 +-------------------- content/shmem_reductions.tex | 20 +++----------------- content/shmem_sync.tex | 13 ++----------- 8 files changed, 24 insertions(+), 112 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index c1452376a..5f7165206 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -83,10 +83,14 @@ \subsubsection*{Active-set-based collectives} Another argument important to active-set-based collective routines is \VAR{pSync}, which is a symmetric work array. All \acp{PE} participating in an active-set-based collective must pass the -same \VAR{pSync} array. On completion of such a collective call, the \VAR{pSync} is +same \VAR{pSync} array. +Every element of the \VAR{pSync} array must be initialized to +\LibConstRef{SHMEM\_SYNC\_VALUE} before it is used as an argument to +any active-set-based collective routine. +On completion of such a collective call, the \VAR{pSync} is restored to its original contents. The user is permitted to reuse a \VAR{pSync} -array if all previous collective routines using the \VAR{pSync} array have been -completed by all participating \acp{PE}. One can use a synchronization +array if all previous collective routines using the \VAR{pSync} array have +completed on all participating \acp{PE}. One can use a synchronization collective routine such as \FUNC{shmem\_barrier} to ensure completion of previous active-set-based collective routines. The \FUNC{shmem\_barrier} and \FUNC{shmem\_sync} routines allow the same \VAR{pSync} array to be used on consecutive calls as long as the \acp{PE} diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 1a51da719..b6ea85f22 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -49,10 +49,7 @@ consecutive \ac{PE} numbers in the active set.} \apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set.} \apiargument{IN}{pSync}{ - Symmetric address of a work array of size \CONST{SHMEM\_ALLTOALL\_SYNC\_SIZE}. - Every element of \VAR{pSync} must be initialized with the value - \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set - enter the routine.} + Symmetric address of a work array of size at least \CONST{SHMEM\_ALLTOALL\_SYNC\_SIZE}.} \end{DeprecateBlock} \end{apiarguments} @@ -135,21 +132,8 @@ } \apinotes{ - This routine restores \VAR{pSync} to its original contents. Multiple calls - to \openshmem\ routines that use the same \VAR{pSync} array do not require - that \VAR{pSync} be reinitialized after the first call. - The user must ensure that the \VAR{pSync} array is not being updated by any - \ac{PE} in the active set while any of the \acp{PE} participates in - processing of an \openshmem\ \FUNC{shmem\_alltoall} routine. Be careful to - avoid these situations: If the \VAR{pSync} array is initialized at run time, - some type of synchronization is needed to ensure that all \acp{PE} in the - active set have initialized \VAR{pSync} before any of them enter an - \openshmem\ routine called with the \VAR{pSync} synchronization array. A - \VAR{pSync} array may be reused on a subsequent \openshmem\ - \FUNC{shmem\_alltoall} routine only if none of the \acp{PE} in the - active set are still processing a prior \openshmem\ \FUNC{shmem\_alltoall} - routine call that used the same \VAR{pSync} array. In general, this can be - ensured only by doing some type of synchronization. + % TODO: REMOVE ME! + None. } \begin{apiexamples} diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index 4348edd23..7abc9486e 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -55,10 +55,7 @@ consecutive \ac{PE} numbers in the active set.} \apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set.} \apiargument{IN}{pSync}{ - Symmetric address of a work array of size \CONST{SHMEM\_ALLTOALLS\_SYNC\_SIZE}. - Every element of \VAR{pSync} must be initialized with the value - \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set - enter the routine.} + Symmetric address of a work array of size at least \CONST{SHMEM\_ALLTOALLS\_SYNC\_SIZE}.} \end{DeprecateBlock} \end{apiarguments} @@ -105,7 +102,8 @@ } \apinotes{ - See notes for \FUNC{shmem\_alltoall} in Section~\ref{subsec:shmem_alltoall}. + % TODO: Remove me! + None. } \begin{apiexamples} diff --git a/content/shmem_barrier.tex b/content/shmem_barrier.tex index 620bb99ae..37dd5990a 100644 --- a/content/shmem_barrier.tex +++ b/content/shmem_barrier.tex @@ -17,10 +17,7 @@ \ac{PE} numbers in the active set.} \apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set.} \apiargument{IN}{pSync}{ - Symmetric address of a work array of size \CONST{SHMEM\_BARRIER\_SYNC\_SIZE}. - Every element - of \VAR{pSync} must be initialized to \CONST{SHMEM\_SYNC\_VALUE} before any of - the \acp{PE} in the active set enter \FUNC{shmem\_barrier} the first time.} + Symmetric address of a work array of size at least \CONST{SHMEM\_BARRIER\_SYNC\_SIZE}.} \end{apiarguments} @@ -42,9 +39,8 @@ memory updates, including \acp{AMO} and \ac{RMA} operations, done by any of the \acp{PE} in the active set on the default context are complete before returning. - The same \VAR{pSync} array may be reused on consecutive calls to + The same \VAR{pSync} array may be reused on consecutive calls to \FUNC{shmem\_barrier} if the same active set is used. - } \apireturnvalues{ @@ -60,15 +56,6 @@ followed by a call to \FUNC{shmem\_team\_sync} on the desired team. - If the \VAR{pSync} array is initialized at the run time, all - \acp{PE} must be synchronized before the first call to \FUNC{shmem\_barrier} - (e.g., by \FUNC{shmem\_barrier\_all}) to ensure the array has been initialized - by all \acp{PE} before it is used. - - If the active set does not change, \FUNC{shmem\_barrier} can be called - repeatedly with the same \VAR{pSync} array. No additional synchronization - beyond that implied by \FUNC{shmem\_barrier} itself is necessary in this case. - The \FUNC{shmem\_barrier} routine can be used to portably ensure that memory access operations observe remote updates in the order enforced by initiator \acp{PE}. diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 66a98f28c..036734a7d 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -50,10 +50,7 @@ consecutive \ac{PE} numbers in the active set.} \apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set.} \apiargument{IN}{pSync}{ - Symmetric address of a work array of size \CONST{SHMEM\_BCAST\_SYNC\_SIZE}. - Every element of \VAR{pSync} must be initialized with the value - \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set - enters \FUNC{shmem\_broadcast}.} + Symmetric address of a work array of size at least \CONST{SHMEM\_BCAST\_SYNC\_SIZE}.} \end{DeprecateBlock} \end{apiarguments} @@ -109,8 +106,8 @@ is ready to accept the broadcast data. \item For active-set-based broadcasts, the \VAR{pSync} array on all \acp{PE} in the - active set is not still in use from a prior call to a collective - \openshmem routine. + active set is not still in use from a prior call to an \openshmem + collective routine. \end{itemize} Otherwise, the behavior is undefined. @@ -138,22 +135,6 @@ } \apinotes{ - Active-set-based \openshmem broadcast routines restore \VAR{pSync} to its original contents. - Multiple calls to active-set-based routines that use the same \VAR{pSync} array do not - require that \VAR{pSync} be reinitialized after the first call. - - The user must ensure that the \VAR{pSync} array is not being updated by any - \ac{PE} in the active set while any of the \acp{PE} participates in processing - of an \openshmem broadcast routine. Be careful to avoid these situations: If the - \VAR{pSync} array is initialized at run time, before its first use, some type of synchronization is - needed to ensure that all \acp{PE} in the active set have initialized - \VAR{pSync} before any of them enter an \openshmem routine called with the - \VAR{pSync} synchronization array. A \VAR{pSync} array may be reused on a - subsequent \openshmem broadcast routine only if none of the \acp{PE} in the - active set are still processing a prior \openshmem broadcast routine call that - used the same \VAR{pSync} array. In general, this can be ensured only by doing - some type of synchronization. - Team handle error checking and integer return codes are currently undefined. Implementations may define these behaviors as needed, but programs should ensure portability by doing their own checks for invalid team handles and for diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index a9b79cc6d..53b41da90 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -52,10 +52,7 @@ consecutive \ac{PE} numbers in the active set.} \apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set.} \apiargument{IN}{pSync}{ - Symmetric address of a work array of size \CONST{SHMEM\_COLLECT\_SYNC\_SIZE}. - Every element of \VAR{pSync} must be initialized with the value - \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set - enter \FUNC{shmem\_collect} or \FUNC{shmem\_fcollect}.} + Symmetric address of a work array of size at least \CONST{SHMEM\_COLLECT\_SYNC\_SIZE}.} \end{DeprecateBlock} \end{apiarguments} @@ -111,22 +108,6 @@ } \apinotes{ - All \openshmem collective routines reset the values in \VAR{pSync} before they - return, so a particular \VAR{pSync} buffer need only be initialized the first - time it is used. - - The user must ensure that the \VAR{pSync} array is not being updated on any \ac{PE} - in the active set while any of the \acp{PE} participate in processing of an - \openshmem collective routine. Be careful to avoid these situations: If the - \VAR{pSync} array is initialized at run time, some type of synchronization is - needed to ensure that all \acp{PE} in the working set have initialized - \VAR{pSync} before any of them enter an \openshmem routine called with the - \VAR{pSync} synchronization array. A \VAR{pSync} array can be reused on a - subsequent \openshmem collective routine only if none of the \acp{PE} in the - active set are still processing a prior \openshmem collective routine call - that used the same \VAR{pSync} array. In general, this may be ensured only by - doing some type of synchronization. - The collective routines operate on active \ac{PE} sets that have a non-power-of-two \VAR{PE\_size} with some performance degradation. They operate with no performance degradation when \VAR{nelems} is a non-power-of-two value. diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 7c738a0bb..833b95f6e 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -223,10 +223,7 @@ max(\VAR{nreduce}/2 + 1, \CONST{SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}) elements.} \apiargument{IN}{pSync}{ - Symmetric address of a work array of size \CONST{SHMEM\_REDUCE\_SYNC\_SIZE}. - Every element of \VAR{pSync} must be initialized with the value - \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set - enter the reduction routine.} + Symmetric address of a work array of size at least \CONST{SHMEM\_REDUCE\_SYNC\_SIZE}.} \end{DeprecateBlock} \end{apiarguments} @@ -297,19 +294,8 @@ } \apinotes{ - All \openshmem reduction routines reset the values in \VAR{pSync} before they - return, so a particular \VAR{pSync} buffer need only be initialized the first - time it is used. The user must ensure that the \VAR{pSync} array is not being updated on any \ac{PE} - in the active set while any of the \acp{PE} participate in processing of an - \openshmem reduction routine. Be careful to avoid the following situations: If - the \VAR{pSync} array is initialized at run time, some type of synchronization - is needed to ensure that all \acp{PE} in the working set have initialized - \VAR{pSync} before any of them enter an \openshmem routine called with the - \VAR{pSync} synchronization array. A \VAR{pSync} or \VAR{pWrk} array can be - reused in a subsequent reduction routine call only if none of the \acp{PE} in - the active set are still processing a prior reduction routine call that used - the same \VAR{pSync} or \VAR{pWrk} arrays. In general, this can be assured only - by doing some type of synchronization. + % TODO: Remove me! + None. } \begin{apiexamples} diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 4cd7ce8ee..eb7f22441 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -30,9 +30,8 @@ \apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between consecutive \ac{PE} numbers in the active set.} \apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set.} -\apiargument{IN}{pSync}{Symmetric address of a work array. Every element of - \VAR{pSync} must be initialized to \CONST{SHMEM\_SYNC\_VALUE} before any of the - \acp{PE} in the active set enter \FUNC{shmem\_sync} the first time.} +\apiargument{IN}{pSync}{ + Symmetric address of a work array of size at least \CONST{SHMEM\_SYNC\_SIZE}.} \end{DeprecateBlock} \end{apiarguments} @@ -78,14 +77,6 @@ } \apinotes{ - If the \VAR{pSync} array is initialized at run time, another method of - synchronization (e.g., \FUNC{shmem\_sync\_all}) must be used before - the initial use of that \VAR{pSync} array by \FUNC{shmem\_sync}. - - If the active set does not change, \FUNC{shmem\_sync} can be called - repeatedly with the same \VAR{pSync} array. No additional synchronization - beyond that implied by \FUNC{shmem\_sync} itself is necessary in this case. - The \FUNC{shmem\_sync} routine can be used to portably ensure that memory access operations observe remote updates in the order enforced by the initiator \acp{PE}, provided that the initiator PE ensures completion of remote