From 2b39c09666e34ea4792f227b5bbc406e12a6722b Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 10 Sep 2018 13:46:29 -0500 Subject: [PATCH 01/11] Add support for NBI put-with-signal NBI put-with-signal is an extension to its blocking variant. --- content/shmem_put_signal_nbi.tex | 84 ++++++++++++++++++++++++++++++++ main_spec.tex | 3 ++ utils/defs.tex | 9 ++-- 3 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 content/shmem_put_signal_nbi.tex diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex new file mode 100644 index 000000000..4d416a1e9 --- /dev/null +++ b/content/shmem_put_signal_nbi.tex @@ -0,0 +1,84 @@ +\color{Green} +\apisummary{ + The nonblocking put-with-signal routines provide a method for copying data + from a contiguous local data object to a data object on a specified \ac{PE} + and subsequently setting a remote flag to signal completion. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +\end{Csynopsis} +where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. + +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +\end{CsynopsisCol} +where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. + +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_putmem\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +\end{CsynopsisCol} + +\begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This + data object must be remotely accessible.} + \apiargument{IN}{source}{Data object containing the data to be copied.} + \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} + \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote + \ac{PE} as the signal. This signal data object must be + remotely accessible.} + \apiargument{IN}{signal}{Unsigned 64-bit value that is assigned to the + remote \VAR{sig\_addr} signal data object.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} +\end{apiarguments} + +\apidescription{ + The routines return after posting the operation. The operation is considered + complete after the subsequent call to \FUNC{shmem\_quiet}. At the completion + of \FUNC{shmem\_quiet}, the data has been copied out of the \VAR{source} + array on the local \ac{PE} and delivered into the \VAR{dest} array on the + destination \ac{PE}. The delivery of \VAR{signal} flag on the remote + \ac{PE} indicates the delivery of its corresponding \VAR{dest} data words + into the data object on the remote \ac{PE}. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + The \VAR{dest} and \VAR{sig\_addr} data objects must both be remotely + accessible. The \VAR{sig\_addr} and \VAR{dest} could be of different kinds, + for example, one could be a global/static \Cstd variable and the other could + be allocated on the symmetric heap. + + The restrict qualifier in \VAR{sig\_addr} expects the data object to be + distinct from \VAR{dest} and \VAR{source} data objects. + + The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the + delivery of its corresponding \VAR{dest} data words into the data object on + the remote \ac{PE}. Without a memory-ordering operation, there is no implied + ordering between the delivery of the signal word of a nonblocking + put-with-signal routine and another data transfer. For example, the delivery + of the signal word in a sequence consisting of a put routine followed by a + nonblocking put-with-signal routine does not imply delivery of the put + routine's data. +} + +\end{apidefinition} +\color{Black} diff --git a/main_spec.tex b/main_spec.tex index d27dae188..8d236dcf7 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -185,6 +185,9 @@ \subsection{Non-blocking Remote Memory Access Routines}\label{sec:rma_nbi} \subsubsection{\textbf{SHMEM\_PUT\_NBI}}\label{subsec:shmem_put_nbi} \input{content/shmem_put_nbi.tex} +\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL\_NBI}}\label{subsec:shmem_put_signal_nbi} +\input{content/shmem_put_signal_nbi.tex} + \subsubsection{\textbf{SHMEM\_GET\_NBI}}\label{subsec:shmem_get_nbi} \input{content/shmem_get_nbi.tex} diff --git a/utils/defs.tex b/utils/defs.tex index d7161ecdb..b9619431f 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -408,7 +408,8 @@ \textbf{C11:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, uint64_t, + restrict, shmem_ctx_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} @@ -424,7 +425,8 @@ \textbf{C/C++:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, uint64_t, + restrict, shmem_ctx_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} @@ -433,7 +435,8 @@ \color{red} {\lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, uint64_t, + restrict, shmem_ctx_t}, aboveskip=0pt, belowskip=0pt}}}{} \lstnewenvironment{Fsynopsis} From 55c63758cab8a190626341d4cf2ecdb9fd0cefb1 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 17:24:58 -0500 Subject: [PATCH 02/11] Implement review comments similar to blocking put-with-signal We have incorporated common review comments from put-with-signal blocking routines: 1. duplicated explanation from summary to description 2. removed restrict qualifier and also overlapping explanation 3. modified ctx arg explanation --- content/shmem_put_signal_nbi.tex | 34 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 4d416a1e9..0cbbbbd9b 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -8,32 +8,32 @@ \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_putmem\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_putmem\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_putmem\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} \begin{apiarguments} - \apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the operation is + performed on the default context.} \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} @@ -48,9 +48,12 @@ \end{apiarguments} \apidescription{ - The routines return after posting the operation. The operation is considered - complete after the subsequent call to \FUNC{shmem\_quiet}. At the completion - of \FUNC{shmem\_quiet}, the data has been copied out of the \VAR{source} + The nonblocking put-with-signal routines provide a method for copying data + from a contiguous local data object to a data object on a specified \ac{PE} + and subsequently setting a remote flag to signal completion. The routines + return after posting the operation. The operation is considered complete + after the subsequent call to \FUNC{shmem\_quiet}. At the completion of + \FUNC{shmem\_quiet}, the data has been copied out of the \VAR{source} array on the local \ac{PE} and delivered into the \VAR{dest} array on the destination \ac{PE}. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its corresponding \VAR{dest} data words @@ -67,9 +70,6 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The restrict qualifier in \VAR{sig\_addr} expects the data object to be - distinct from \VAR{dest} and \VAR{source} data objects. - The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \VAR{dest} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied From 27eb675f96fe6b2d355aad223f761a4b9920e7a9 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 18 Oct 2018 12:33:52 -0500 Subject: [PATCH 03/11] Adding overlapping semantics in put-with-signal-nbi --- content/shmem_put_signal_nbi.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 0cbbbbd9b..3d99679e5 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -70,6 +70,8 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. + The \VAR{sig\_addr} and \VAR{dest} may not be overlapping in memory. + The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \VAR{dest} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied From 3361c3e0af3d0eda7299a5a27a9aa1cfcd86241d Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Tue, 8 Jan 2019 13:40:08 -0600 Subject: [PATCH 04/11] Explicitly state the NBI signal update is AMO Based on recent review comments, it looks like it would be more clear if we state that the signal update is an atomic operation We have added this as part of the Notes to Implementers section. --- content/shmem_put_signal_nbi.tex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 3d99679e5..8f7279ffa 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -82,5 +82,12 @@ routine's data. } +\apiimpnotes{ + Implementations must ensure that put-with-signal routines are compatible + with all point-to-point synchronization interfaces. The delivery of + \signal{} flag on the remote \ac{PE} must not cause partial updates. This + requires the update on \signal{} flag to be an atomic memory operation. +} + \end{apidefinition} \color{Black} From 086a01ce955038813ff62a3f0182ed4b57fa7ba4 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Tue, 8 Jan 2019 13:44:18 -0600 Subject: [PATCH 05/11] Fix variable usage in NBI notes section --- content/shmem_put_signal_nbi.tex | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 8f7279ffa..38f19b75c 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ The nonblocking put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} @@ -85,9 +84,8 @@ \apiimpnotes{ Implementations must ensure that put-with-signal routines are compatible with all point-to-point synchronization interfaces. The delivery of - \signal{} flag on the remote \ac{PE} must not cause partial updates. This - requires the update on \signal{} flag to be an atomic memory operation. + \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This + requires the update on \VAR{signal} flag to be an atomic memory operation. } \end{apidefinition} -\color{Black} From 3b48d2bdea19d1c837d1da171706c3aa6a4a2e51 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Wed, 9 Jan 2019 14:37:06 -0600 Subject: [PATCH 06/11] Move NBI put-with-signal apiimpnotes to apinotes Previously, we had the information about the signal updates atomicity guarantees in the notes to implementors section for NBI put-with-signal We are not now moving this into main notes section. We have also clarifies the atomicity guarantees by refering to atomicty section. --- content/shmem_put_signal_nbi.tex | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 38f19b75c..ff5439f89 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -79,13 +79,12 @@ of the signal word in a sequence consisting of a put routine followed by a nonblocking put-with-signal routine does not imply delivery of the put routine's data. -} -\apiimpnotes{ - Implementations must ensure that put-with-signal routines are compatible - with all point-to-point synchronization interfaces. The delivery of - \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This - requires the update on \VAR{signal} flag to be an atomic memory operation. + The nonblocking put-with-signal routines are compatible with all + point-to-point synchronization interfaces. The delivery of \VAR{signal} flag + on the remote \ac{PE} must not cause partial updates. This requires the + update on \VAR{signal} flag to be an atomic operation, with atomicity + guarantees described in Section~\ref{subsec:amo_guarantees}. } \end{apidefinition} From 68cd22c8adcc0b35512f16b354755ad3f70753e5 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 12:45:22 -0600 Subject: [PATCH 07/11] Add backmatter for NBI put-with-signal --- content/backmatter.tex | 3 +++ 1 file changed, 3 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 647278eac..7e2e43e3a 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -510,6 +510,9 @@ \section{Version 1.5} The following list describes the specific changes in \openshmem[1.5]: \begin{itemize} % +\item Added support for nonblocking put-with-signal functions. +\\ See Section \ref{subsec:shmem_put_signal_nbi}. +% \item Specified the validity of communication contexts, added the constant \CONST{SHMEM\_CTX\_INVALID}, and clarified the behavior of \FUNC{shmem\_ctx\_*} routines on invalid contexts. From 2d4a35f3266710f2a67f7ebab5b35a91ae2a7bf7 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 12:51:54 -0600 Subject: [PATCH 08/11] RM restrict qualifier from def.tex Previously, we used restrict qualifier and defined in the def.tex for syntax highlighting in the function definitions. As the usage of restrict qualifier is removed, this change is no longer nedeed. --- utils/defs.tex | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/utils/defs.tex b/utils/defs.tex index b9619431f..2a86dd709 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -408,8 +408,8 @@ \textbf{C11:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, uint64_t, - restrict, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, + uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} @@ -425,8 +425,8 @@ \textbf{C/C++:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, uint64_t, - restrict, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, + uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} @@ -435,8 +435,8 @@ \color{red} {\lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, uint64_t, - restrict, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, + uint64_t}, aboveskip=0pt, belowskip=0pt}}}{} \lstnewenvironment{Fsynopsis} From 48f2ec1693224abe69edbf30e5c7e097087b3ea8 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 13:03:10 -0600 Subject: [PATCH 09/11] Fix \VAR and macro usage correctly We were incorrectly using variable and macros incorrectly for \dest and \source. Fixing it in put-with-signal-nbi. --- content/shmem_put_signal_nbi.tex | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index ff5439f89..868887b52 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -36,7 +36,7 @@ \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} - \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} + \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote \ac{PE} as the signal. This signal data object must be @@ -52,10 +52,10 @@ and subsequently setting a remote flag to signal completion. The routines return after posting the operation. The operation is considered complete after the subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, the data has been copied out of the \VAR{source} - array on the local \ac{PE} and delivered into the \VAR{dest} array on the + \FUNC{shmem\_quiet}, the data has been copied out of the \source{} + array on the local \ac{PE} and delivered into the \dest{} array on the destination \ac{PE}. The delivery of \VAR{signal} flag on the remote - \ac{PE} indicates the delivery of its corresponding \VAR{dest} data words + \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. } @@ -64,15 +64,15 @@ } \apinotes{ - The \VAR{dest} and \VAR{sig\_addr} data objects must both be remotely - accessible. The \VAR{sig\_addr} and \VAR{dest} could be of different kinds, + The \dest{} and \VAR{sig\_addr} data objects must both be remotely + accessible. The \VAR{sig\_addr} and \dest{} could be of different kinds, for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The \VAR{sig\_addr} and \VAR{dest} may not be overlapping in memory. + The \VAR{sig\_addr} and \dest{} may not be overlapping in memory. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the - delivery of its corresponding \VAR{dest} data words into the data object on + delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied ordering between the delivery of the signal word of a nonblocking put-with-signal routine and another data transfer. For example, the delivery From d361527358c4f30db7f8f0b236b99087c444150e Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 14 Jan 2019 22:25:32 -0600 Subject: [PATCH 10/11] Reframe NBI signal-put compatibility with p2p syncs --- content/shmem_put_signal_nbi.tex | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 868887b52..996d8a4f2 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -80,11 +80,11 @@ nonblocking put-with-signal routine does not imply delivery of the put routine's data. - The nonblocking put-with-signal routines are compatible with all - point-to-point synchronization interfaces. The delivery of \VAR{signal} flag - on the remote \ac{PE} must not cause partial updates. This requires the - update on \VAR{signal} flag to be an atomic operation, with atomicity - guarantees described in Section~\ref{subsec:amo_guarantees}. + The signal set by the nonblocking put-with-signal routines is compatible + with all point-to-point synchronization interfaces. The delivery of + \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This + requires the update on \VAR{signal} flag to be an atomic operation, with + atomicity guarantees described in Section~\ref{subsec:amo_guarantees}. } \end{apidefinition} From 6315ecc5781eb69f85b0ab7aa9bba66a950a6904 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 3 May 2019 17:30:42 -0500 Subject: [PATCH 11/11] Update NBI put-with-signal atomicity description --- content/shmem_put_signal_nbi.tex | 75 ++++++++++++++++---------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/content/shmem_put_signal_nbi.tex b/content/shmem_put_signal_nbi.tex index 996d8a4f2..9673c7d21 100644 --- a/content/shmem_put_signal_nbi.tex +++ b/content/shmem_put_signal_nbi.tex @@ -1,62 +1,75 @@ +\color{ForestGreen} \apisummary{ The nonblocking put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently setting a remote flag to signal completion. + and subsequently updating a remote flag to signal completion. } \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_putmem\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_putmem\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_putmem\_signal\_nbi}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{CsynopsisCol} \begin{apiarguments} \apiargument{IN}{ctx}{A context handle specifying the context on which to perform the operation. When this argument is not provided, the operation is performed on the default context.} - \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This - data object must be remotely accessible.} + \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. + This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote - \ac{PE} as the signal. This signal data object must be - remotely accessible.} - \apiargument{IN}{signal}{Unsigned 64-bit value that is assigned to the + \ac{PE} as the signal. This signal data object must be remotely accessible.} + \apiargument{IN}{signal}{Unsigned 64-bit value that is used for updating the remote \VAR{sig\_addr} signal data object.} + \apiargument{IN}{sig\_op}{Signal operator that represents the type of update + to be performed on the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} \end{apiarguments} \apidescription{ The nonblocking put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently setting a remote flag to signal completion. The routines - return after posting the operation. The operation is considered complete - after the subsequent call to \FUNC{shmem\_quiet}. At the completion of - \FUNC{shmem\_quiet}, the data has been copied out of the \source{} - array on the local \ac{PE} and delivered into the \dest{} array on the - destination \ac{PE}. The delivery of \VAR{signal} flag on the remote - \ac{PE} indicates the delivery of its corresponding \dest{} data words - into the data object on the remote \ac{PE}. + and subsequently updating a remote flag to signal completion. + + The routines return after posting the operation. The operation is considered + complete after a subsequent call to \FUNC{shmem\_quiet}. At the completion + of \FUNC{shmem\_quiet}, the data has been copied out of the \source{} array + on the local \ac{PE} and delivered into the \dest{} array on the destination + \ac{PE}. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates + the delivery of its corresponding \dest{} data words into the data object on + the remote \ac{PE}. + + The \VAR{sig\_op} signal operator determines the type of update to be + performed on the remote \VAR{sig\_addr} signal data object. + + An update to the \VAR{sig\_addr} signal data object through a non-blocking + put-with-signal routine completes as if performed atomically with respect to + any other non-blocking put-with-signal routine that updates the + \VAR{sig\_addr} signal data object using the same \VAR{sig\_op} signal + update operator and any point-to-point synchronization routine that accesses + the \VAR{sig\_addr} signal data object. } \apireturnvalues{ @@ -69,22 +82,8 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The \VAR{sig\_addr} and \dest{} may not be overlapping in memory. - - The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the - delivery of its corresponding \dest{} data words into the data object on - the remote \ac{PE}. Without a memory-ordering operation, there is no implied - ordering between the delivery of the signal word of a nonblocking - put-with-signal routine and another data transfer. For example, the delivery - of the signal word in a sequence consisting of a put routine followed by a - nonblocking put-with-signal routine does not imply delivery of the put - routine's data. - - The signal set by the nonblocking put-with-signal routines is compatible - with all point-to-point synchronization interfaces. The delivery of - \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This - requires the update on \VAR{signal} flag to be an atomic operation, with - atomicity guarantees described in Section~\ref{subsec:amo_guarantees}. + \VAR{sig\_addr} and \dest{} may not be overlapping in memory. } \end{apidefinition} +\color{black}