From fd54dd320325dc67f1cf0f70e2857a931eb5f91e Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Thu, 24 May 2018 09:56:36 -0500 Subject: [PATCH 01/30] Add initial support for put with signal --- content/shmem_put_signal.tex | 98 +++++++++++++++++++++++++ example_code/shmem_put_signal_example.c | 60 +++++++++++++++ main_spec.tex | 3 + utils/defs.tex | 1 + 4 files changed, 162 insertions(+) create mode 100644 content/shmem_put_signal.tex create mode 100644 example_code/shmem_put_signal_example.c diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex new file mode 100644 index 000000000..8cc9e3841 --- /dev/null +++ b/content/shmem_put_signal.tex @@ -0,0 +1,98 @@ +\color{Green} +\apisummary{ + The put with signal routines provide a method for copying data from a + contiguous local data object to a data object on a specified \ac{PE} + and set a remote flag to signal completion. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +\end{Csynopsis} +where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. + +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +\end{CsynopsisCol} +where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. + +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +\end{CsynopsisCol} + +\begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This + data object must be remotely accessible.} + \apiargument{IN}{source}{Data object containing the data to be copied.} + \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using + \Fortran, it must be a constant, variable, or array element of default + integer type.} + \apiargument{OUT}{sig\_addr}{Signal data object to be updated on the remote + \ac{PE} to be updated as the signal. This signal data object must be + remotely accessible and it can be in the same or differnt memory segment + as the \VAR{dest} data object.} + \apiargument{IN}{signal}{Unsigned 64-bit value used to set the remote + \VAR{sig\_addr} signal data object.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be + of type integer. When using \Fortran, it must be a constant, variable, + or array element of default integer type.} +\end{apiarguments} + +\apidescription{ + The routines return after the data has been copied out of the \source{} + array on the local \ac{PE}. The delivery of \signal flag on the remote + \ac{PE} guarantees the delivery of data words into the data object on the + remote \ac{PE}. Furthermore, two successive put with signal routines or + a successive put followed by a put with signal routine may deliver data + out of order unless a call to \FUNC{shmem\_fence} is introduced between + the two calls and the delivery of the \signal flag on the remote \ac{PE} + guarantees only the delivery of its corresponding data object on the + remote \ac{PE}. + } + +\apidesctable{ + The \dest{} and \source{} data objects must conform to certain typing + constraints, which are as follows:} + {Routine}{Data type of \VAR{dest} and \VAR{source}} + \apitablerow{shmem\_putmem}{Any data type. nelems is scaled in bytes.} + \apitablerow{shmem\_put8}{Any noncharacter type that + has a storage size equal to \CONST{8} bits.} + \apitablerow{shmem\_put16}{Any noncharacter type that + has a storage size equal to \CONST{16} bits.} + \apitablerow{shmem\_put32}{Any noncharacter type + that has a storage size equal to \CONST{32} bits.} + \apitablerow{shmem\_put64}{Any noncharacter type that + has a storage size equal to \CONST{64} bits.} + \apitablerow{shmem\_put128}{Any noncharacter type that has a + storage size equal to \CONST{128} bits.} + +\apireturnvalues{ + None. +} +\apinotes{ +} + +\begin{apiexamples} + +\apicexample + { The following \FUNC{shmem\_put\_signal} example is for \Cstd[11] programs:} + {./example_code/shmem_put_signal_example.c} + {} +\end{apiexamples} + +\end{apidefinition} +\color{Black} diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c new file mode 100644 index 000000000..f43e615e3 --- /dev/null +++ b/example_code/shmem_put_signal_example.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include + +#define ITERATIONS (100) +#define MAX_SIZE (2<<18) + +int +main(int argc, char* argv[]) +{ + shmem_init(); + + int me = shmem_my_pe(); + int n = shmem_n_pes(); + int r = ITERATIONS; + size_t bloat = MAX_SIZE; + size_t size; + + for (size = 1; size < bloat; size*=2) { + uint64_t* message = malloc(size * sizeof(uint64_t)); + uint64_t* data = shmem_malloc(r * size * sizeof(uint64_t)); + uint64_t* signals = shmem_malloc(r * sizeof(uint64_t)); + + memset(message, 0, size * sizeof(uint64_t)); + memset(data, 0, r * size * sizeof(uint64_t)); + memset(signals, 0, r * sizeof(uint64_t)); + shmem_barrier_all(); + + message[0] = 10; + int i; + for (i = 0; i < r; i++) { + int j = i - (me == 0); + if (j >= 0) { + shmem_long_wait_until((long *)&signals[j], + SHMEM_CMP_EQ, 1); + message[0] = data[j * size] + 10; + } + int pe = (me + 1) % n; + shmemx_putmem_signal(&data[i * size], message, + size * sizeof(uint64_t), + &signals[i], 1, pe); + } + if (me == 0) { + shmem_long_wait_until((long *)&signals[r-1], + SHMEM_CMP_EQ, 1); + printf("Final message = %lu for size %zu\n", + data[(r-1) * size], size); + } + + shmem_barrier_all(); + shmem_free(signals); + shmem_free(data); + free(message); + shmem_barrier_all(); + } + + shmem_finalize(); + return 0; +} diff --git a/main_spec.tex b/main_spec.tex index d27dae188..2f8e7c4c1 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -170,6 +170,9 @@ \subsubsection{\textbf{SHMEM\_P}}\label{subsec:shmem_p} \subsubsection{\textbf{SHMEM\_IPUT}}\label{subsec:shmem_iput} \input{content/shmem_iput.tex} +\subsubsection{\textbf{SHMEM\_PUT\_SIGNAL}}\label{subsec:shmem_put_signal} +\input{content/shmem_put_signal.tex} + \subsubsection{\textbf{SHMEM\_GET}}\label{subsec:shmem_get} \input{content/shmem_get.tex} diff --git a/utils/defs.tex b/utils/defs.tex index d7161ecdb..b261712bd 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -59,6 +59,7 @@ \newcommand{\source}{\textit{source}} \newcommand{\dest}{\textit{dest}} +\newcommand{\signal}{\textit{signal}} \newcommand{\PUT}{\textit{Put}} \newcommand{\GET}{\textit{Get}} \newcommand{\OPR}[1]{\textit{#1}} From 2c4075cb6bff0d9843ea8d70d251731314daeee5 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Thu, 24 May 2018 13:27:01 -0500 Subject: [PATCH 02/30] Update put+signal after RMA WG meeting --- content/shmem_put_signal.tex | 57 +++++++++++++----------------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 8cc9e3841..5efd326f2 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -38,58 +38,43 @@ data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} - arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using - \Fortran, it must be a constant, variable, or array element of default - integer type.} - \apiargument{OUT}{sig\_addr}{Signal data object to be updated on the remote - \ac{PE} to be updated as the signal. This signal data object must be - remotely accessible and it can be in the same or differnt memory segment - as the \VAR{dest} data object.} + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} + \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote + \ac{PE} as the signal. This signal data object must be + remotely accessible.} \apiargument{IN}{signal}{Unsigned 64-bit value used to set the remote \VAR{sig\_addr} signal data object.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be - of type integer. When using \Fortran, it must be a constant, variable, - or array element of default integer type.} + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} \end{apiarguments} \apidescription{ The routines return after the data has been copied out of the \source{} - array on the local \ac{PE}. The delivery of \signal flag on the remote - \ac{PE} guarantees the delivery of data words into the data object on the - remote \ac{PE}. Furthermore, two successive put with signal routines or - a successive put followed by a put with signal routine may deliver data - out of order unless a call to \FUNC{shmem\_fence} is introduced between - the two calls and the delivery of the \signal flag on the remote \ac{PE} - guarantees only the delivery of its corresponding data object on the - remote \ac{PE}. - } - -\apidesctable{ - The \dest{} and \source{} data objects must conform to certain typing - constraints, which are as follows:} - {Routine}{Data type of \VAR{dest} and \VAR{source}} - \apitablerow{shmem\_putmem}{Any data type. nelems is scaled in bytes.} - \apitablerow{shmem\_put8}{Any noncharacter type that - has a storage size equal to \CONST{8} bits.} - \apitablerow{shmem\_put16}{Any noncharacter type that - has a storage size equal to \CONST{16} bits.} - \apitablerow{shmem\_put32}{Any noncharacter type - that has a storage size equal to \CONST{32} bits.} - \apitablerow{shmem\_put64}{Any noncharacter type that - has a storage size equal to \CONST{64} bits.} - \apitablerow{shmem\_put128}{Any noncharacter type that has a - storage size equal to \CONST{128} bits.} + array on the local \ac{PE}. The delivery of \signal{} flag on the remote + \ac{PE} guarantees the delivery of its corresponding \dest{} data words + into the data object on the remote \ac{PE}. +} \apireturnvalues{ None. } + \apinotes{ + The \VAR{sig\_addr} data object can be in the same or different memory + segment as the \VAR{dest} data object. + + The delivery of \signal{} flag on the remote \ac{PE} guarantees only the + delivery of its corresponding \dest{} data words into the data object on + the remote \ac{PE}. For example, two successive put with signal routines + or a successive put followed by a put with signal routine may deliver data + out of order unless a call to \FUNC{shmem\_fence} is introduced between + the two calls. } \begin{apiexamples} \apicexample - { The following \FUNC{shmem\_put\_signal} example is for \Cstd[11] programs:} + { The following example is for the \FUNC{shmem\_put\_signal} usage for + ping-pong programs:} {./example_code/shmem_put_signal_example.c} {} \end{apiexamples} From 83fd993cfc0b9b1f96171b0be6a2a284a20a9eee Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Sat, 26 May 2018 03:36:10 -0500 Subject: [PATCH 03/30] RM memory segment reference from put_signal notes --- content/shmem_put_signal.tex | 8 ++++---- utils/defs.tex | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 5efd326f2..fa00ccd43 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -59,15 +59,15 @@ } \apinotes{ - The \VAR{sig\_addr} data object can be in the same or different memory - segment as the \VAR{dest} data object. + The \VAR{sig\_addr} data object can be placed in the symmetric data segment + or the symmetric heap which can be same or different from the \VAR{dest} + data object. The delivery of \signal{} flag on the remote \ac{PE} guarantees only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. For example, two successive put with signal routines or a successive put followed by a put with signal routine may deliver data - out of order unless a call to \FUNC{shmem\_fence} is introduced between - the two calls. + out of order. } \begin{apiexamples} diff --git a/utils/defs.tex b/utils/defs.tex index b261712bd..a33d57764 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -409,14 +409,14 @@ \textbf{C11:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} { \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, uint64_t}, aboveskip=0pt, belowskip=0pt}}{} @@ -425,7 +425,7 @@ \textbf{C/C++:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} From 5680e752631f3703bda64df4b0d8304639c63664 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Sat, 26 May 2018 04:13:39 -0500 Subject: [PATCH 04/30] rm ping-pong and add ring-bcast for put_sig example --- content/shmem_put_signal.tex | 4 +- example_code/shmem_put_signal_example.c | 77 +++++++++++-------------- utils/defs.tex | 2 +- 3 files changed, 36 insertions(+), 47 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index fa00ccd43..c5b12772b 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -73,8 +73,8 @@ \begin{apiexamples} \apicexample - { The following example is for the \FUNC{shmem\_put\_signal} usage for - ping-pong programs:} + { The following example shows a simple ring-based broacast operation using + \FUNC{shmem\_put\_signal}:} {./example_code/shmem_put_signal_example.c} {} \end{apiexamples} diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index f43e615e3..5d0fd6c4d 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -1,60 +1,49 @@ #include #include -#include #include -#define ITERATIONS (100) -#define MAX_SIZE (2<<18) +#define MAX_SIZE (2<<10) +#define VAL_USED 10 int main(int argc, char* argv[]) { + int i, err_count = 0; + shmem_init(); + size_t size = MAX_SIZE; int me = shmem_my_pe(); int n = shmem_n_pes(); - int r = ITERATIONS; - size_t bloat = MAX_SIZE; - size_t size; - - for (size = 1; size < bloat; size*=2) { - uint64_t* message = malloc(size * sizeof(uint64_t)); - uint64_t* data = shmem_malloc(r * size * sizeof(uint64_t)); - uint64_t* signals = shmem_malloc(r * sizeof(uint64_t)); - - memset(message, 0, size * sizeof(uint64_t)); - memset(data, 0, r * size * sizeof(uint64_t)); - memset(signals, 0, r * sizeof(uint64_t)); - shmem_barrier_all(); - - message[0] = 10; - int i; - for (i = 0; i < r; i++) { - int j = i - (me == 0); - if (j >= 0) { - shmem_long_wait_until((long *)&signals[j], - SHMEM_CMP_EQ, 1); - message[0] = data[j * size] + 10; - } - int pe = (me + 1) % n; - shmemx_putmem_signal(&data[i * size], message, - size * sizeof(uint64_t), - &signals[i], 1, pe); - } - if (me == 0) { - shmem_long_wait_until((long *)&signals[r-1], - SHMEM_CMP_EQ, 1); - printf("Final message = %lu for size %zu\n", - data[(r-1) * size], size); - } - - shmem_barrier_all(); - shmem_free(signals); - shmem_free(data); - free(message); - shmem_barrier_all(); + int pe = (me + 1)%n; + + uint64_t* message = malloc(size * sizeof(uint64_t)); + uint64_t* data = shmem_malloc(size * sizeof(uint64_t)); + uint64_t* signals = shmem_malloc(sizeof(uint64_t)); + + signals[0] = 0; + for (i = 0; i < size; i++) { + message[i] = VAL_USED; + data[i] = 0; + } + shmem_barrier_all(); + + if (me != 0) { + shmem_long_wait_until((long *)&signals[0], SHMEM_CMP_EQ, 1); } + shmemx_putmem_signal(data, message, size*sizeof(uint64_t), + &signals[0], 1, pe); + + if (me == 0) { + shmem_long_wait_until((long *)&signals[0], SHMEM_CMP_EQ, 1); + printf("BCAST with put with signal is complete\n"); + } + + free(message); + shmem_free(data); + shmem_free(signals); + shmem_finalize(); - return 0; + return 0; } diff --git a/utils/defs.tex b/utils/defs.tex index a33d57764..60e8cd101 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -517,7 +517,7 @@ ##1 \lstinputlisting[language={C}, tabsize=2, basicstyle=\ttfamily\footnotesize, - morekeywords={size_t, ptrdiff_t, shmem_ctx_t, _Thread_local}]{##2} + morekeywords={size_t, ptrdiff_t, shmem_ctx_t, _Thread_local, uint64_t}]{##2} ##3 } \newcommand{\apifexample}[3]{ ##1 From 7c8e3bb21da5e66536c5dbf234d8864d87ee5a43 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Sat, 26 May 2018 04:23:04 -0500 Subject: [PATCH 05/30] add changelog for blocking put_signal --- content/backmatter.tex | 3 +++ 1 file changed, 3 insertions(+) diff --git a/content/backmatter.tex b/content/backmatter.tex index 647278eac..107652419 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -510,6 +510,9 @@ \section{Version 1.5} The following list describes the specific changes in \openshmem[1.5]: \begin{itemize} % +\item Added support for blocking put with signal functions. +\\ See Section \ref{subsec:shmem_put_signal}. +% \item Specified the validity of communication contexts, added the constant \CONST{SHMEM\_CTX\_INVALID}, and clarified the behavior of \FUNC{shmem\_ctx\_*} routines on invalid contexts. From 86981ad9a433664c1118a082fafd1520374b5549 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 29 Jun 2018 09:43:31 -0500 Subject: [PATCH 06/30] Implement initial reviews for put-with-sig --- content/shmem_put_signal.tex | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index c5b12772b..dd8e7c3da 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -1,8 +1,8 @@ \color{Green} \apisummary{ - The put with signal routines provide a method for copying data from a + The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and set a remote flag to signal completion. + and subsequently setting a remote flag to signal completion. } \begin{apidefinition} @@ -59,21 +59,23 @@ } \apinotes{ - The \VAR{sig\_addr} data object can be placed in the symmetric data segment - or the symmetric heap which can be same or different from the \VAR{dest} - data object. + The \VAR{dest} and \VAR{sig\_addr} data object must both be remotely + accessible, but may each be allocated from the symmetric heap or global/ + static memory. - The delivery of \signal{} flag on the remote \ac{PE} guarantees only the + The delivery of \signal{} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on - the remote \ac{PE}. For example, two successive put with signal routines - or a successive put followed by a put with signal routine may deliver data - out of order. + the remote \ac{PE}. Without a memory-ordering operation, there is no implied + ordering between the delivery of the signal word of a put-with-signal + routine and another data transfer. For example, the delivery of the signal + word in a sequence consisting of a put routine followed by a put-with-signal + routine does not imply delivery of the put routine's data. } \begin{apiexamples} \apicexample - { The following example shows a simple ring-based broacast operation using + { The following example shows a simple ring-based broadcast operation using \FUNC{shmem\_put\_signal}:} {./example_code/shmem_put_signal_example.c} {} From 643f4d0d0f89351fa232faeb49665da0ae13585a Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 29 Jun 2018 09:46:13 -0500 Subject: [PATCH 07/30] Implement reviews for put-with-sig - draft 2 --- content/shmem_put_signal.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index dd8e7c3da..6e366193d 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -50,7 +50,7 @@ \apidescription{ The routines return after the data has been copied out of the \source{} array on the local \ac{PE}. The delivery of \signal{} flag on the remote - \ac{PE} guarantees the delivery of its corresponding \dest{} data words + \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. } From fce2079fce5cd0c7ab3ccd38655b21d6fff4697b Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 23 Jul 2018 16:01:49 -0500 Subject: [PATCH 08/30] Fix example code after put-with-signal reading --- example_code/shmem_put_signal_example.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index 5d0fd6c4d..cd10da1f9 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -1,48 +1,41 @@ #include -#include #include +#include -#define MAX_SIZE (2<<10) -#define VAL_USED 10 - -int -main(int argc, char* argv[]) +int main(void) { int i, err_count = 0; shmem_init(); - size_t size = MAX_SIZE; + size_t size = (2<<10); int me = shmem_my_pe(); int n = shmem_n_pes(); int pe = (me + 1)%n; - uint64_t* message = malloc(size * sizeof(uint64_t)); uint64_t* data = shmem_malloc(size * sizeof(uint64_t)); - uint64_t* signals = shmem_malloc(sizeof(uint64_t)); + static uint64_t sig_addr = 0; - signals[0] = 0; for (i = 0; i < size; i++) { - message[i] = VAL_USED; + message[i] = me; data[i] = 0; } shmem_barrier_all(); if (me != 0) { - shmem_long_wait_until((long *)&signals[0], SHMEM_CMP_EQ, 1); + shmem_uint64_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); } - shmemx_putmem_signal(data, message, size*sizeof(uint64_t), - &signals[0], 1, pe); + shmem_putmem_signal(data, message, size*sizeof(uint64_t), + &sig_addr, 1, pe); if (me == 0) { - shmem_long_wait_until((long *)&signals[0], SHMEM_CMP_EQ, 1); + shmem_uint64_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); printf("BCAST with put with signal is complete\n"); } free(message); shmem_free(data); - shmem_free(signals); shmem_finalize(); return 0; From a67a5f3b084a53f6b45ffedb59c15e155392ccb6 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Tue, 7 Aug 2018 13:09:03 -0500 Subject: [PATCH 09/30] Reword signal argument defn as per review --- content/shmem_put_signal.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 6e366193d..dfa214c20 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -42,8 +42,8 @@ \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote \ac{PE} as the signal. This signal data object must be remotely accessible.} - \apiargument{IN}{signal}{Unsigned 64-bit value used to set the remote - \VAR{sig\_addr} signal data object.} + \apiargument{IN}{signal}{Unsigned 64-bit value that is assigned to the + remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} \end{apiarguments} From 8f55c039d4a6b9891628b8a4ac70ca13ff429500 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Wed, 8 Aug 2018 08:08:14 -0500 Subject: [PATCH 10/30] Update sig_addr dst data object kind explanation --- content/shmem_put_signal.tex | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index dfa214c20..86481bd6b 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -59,9 +59,10 @@ } \apinotes{ - The \VAR{dest} and \VAR{sig\_addr} data object must both be remotely - accessible, but may each be allocated from the symmetric heap or global/ - static memory. + The \VAR{dest} and \VAR{sig\_addr} data objects must both be remotely + accessible. The \VAR{sig\_addr} and \VAR{dest} could be of different kinds, + for example, one could be a global/static \Cstd variable and the other could + be allocated on the symmetric heap. The delivery of \signal{} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on From dc36e68facac42b9b3e9a8d3530e6bc77f524d5d Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 10 Sep 2018 12:55:41 -0500 Subject: [PATCH 11/30] Add put-with-signal in p2p-sync intro --- content/p2p_sync_intro.tex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex index 8855716ab..11cf4474b 100644 --- a/content/p2p_sync_intro.tex +++ b/content/p2p_sync_intro.tex @@ -3,8 +3,9 @@ object. The point-to-point synchronization routines can be used to portably ensure that memory access operations observe remote updates in the order enforced by -the initiator \ac{PE} using the \FUNC{shmem\_fence} and \FUNC{shmem\_quiet} -routines. +the initiator \ac{PE} using the put-with-signal(refer +section~\ref{subsec:shmem_put_signal}, \FUNC{shmem\_fence} and +\FUNC{shmem\_quiet} routines. Where appropriate compiler support is available, \openshmem provides type-generic point-to-point synchronization interfaces via \Cstd[11] generic From 34a33e5f8e2554be3da711ed196b97c4d4f120e7 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Mon, 10 Sep 2018 13:06:33 -0500 Subject: [PATCH 12/30] Add restrict qualifier to sig_addr We are adding the restricy qualifier to sig_addr data objects and expects it to not overlap or be same as the dest or source data objects. --- content/shmem_put_signal.tex | 21 +++++++++++---------- utils/defs.tex | 9 ++++++--- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 86481bd6b..f4e6bb992 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -1,4 +1,3 @@ -\color{Green} \apisummary{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} @@ -8,26 +7,26 @@ \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} \begin{apiarguments} @@ -64,6 +63,9 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. + The restrict qualifier in \VAR{sig\_addr} expects the data object to be + distinct from \VAR{dest} and \VAR{source} data objects. + The delivery of \signal{} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied @@ -83,4 +85,3 @@ \end{apiexamples} \end{apidefinition} -\color{Black} diff --git a/utils/defs.tex b/utils/defs.tex index 60e8cd101..d39b6303e 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -409,14 +409,16 @@ \textbf{C11:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, uint64_t}, + morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, + uint64_t, restrict}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} { \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, uint64_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, + uint64_t, restrict}, aboveskip=0pt, belowskip=0pt}}{} @@ -425,7 +427,8 @@ \textbf{C/C++:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, uint64_t}, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, + uint64_t, restrict}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} From ecfdb213adc1116073829960390374b5da1cc131 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 16:19:35 -0500 Subject: [PATCH 13/30] RM reference to put-with-signal from p2p sync intro Previously, we had a statement (Refer Section...) in the p2p sync intro section. Removing that reference and just using the routine name directly. --- content/p2p_sync_intro.tex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex index 11cf4474b..c2a2e1dbd 100644 --- a/content/p2p_sync_intro.tex +++ b/content/p2p_sync_intro.tex @@ -3,8 +3,7 @@ object. The point-to-point synchronization routines can be used to portably ensure that memory access operations observe remote updates in the order enforced by -the initiator \ac{PE} using the put-with-signal(refer -section~\ref{subsec:shmem_put_signal}, \FUNC{shmem\_fence} and +the initiator \ac{PE} using the put-with-signal, \FUNC{shmem\_fence} and \FUNC{shmem\_quiet} routines. Where appropriate compiler support is available, \openshmem provides From 5c67d334fb7c4d44cc5e3d7d382b5d245bbb19ac Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 16:23:06 -0500 Subject: [PATCH 14/30] RM restrict qualifier from the put-with-signal usage --- content/shmem_put_signal.tex | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index f4e6bb992..f446e9b1f 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -7,26 +7,26 @@ \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *restrict sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); \end{CsynopsisCol} \begin{apiarguments} @@ -63,9 +63,6 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The restrict qualifier in \VAR{sig\_addr} expects the data object to be - distinct from \VAR{dest} and \VAR{source} data objects. - The delivery of \signal{} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied From 8cd7a55420435b1afeb7caec6736389cd4a8155c Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 16:27:12 -0500 Subject: [PATCH 15/30] Add new context arg explanation in PWS --- content/shmem_put_signal.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index f446e9b1f..3360d1c30 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -30,9 +30,9 @@ \end{CsynopsisCol} \begin{apiarguments} - \apiargument{IN}{ctx}{The context on which to perform the operation. - When this argument is not provided, the operation is performed on - \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{ctx}{A context handle specifying the context on which to + perform the operation. When this argument is not provided, the operation is + performed on the default context.} \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} From 47bc318046c3c543e64f076e3b664c105dab29cd Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 17:02:33 -0500 Subject: [PATCH 16/30] Fix review comments for PWS example 1. Use calloc and avoid barriers from malloc and explicit calls 2. use all C11 generic shmem calls 3. Follow shmem bcast semantics - to bcast to source itself 4. convert wavefront-like transfer semantics to true bcast --- example_code/shmem_put_signal_example.c | 31 +++++++++++-------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index cd10da1f9..22c1f4dff 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -1,37 +1,34 @@ -#include #include +#include #include +#include int main(void) { - int i, err_count = 0; + int i, err_count = 0; shmem_init(); - size_t size = (2<<10); - int me = shmem_my_pe(); - int n = shmem_n_pes(); - int pe = (me + 1)%n; - uint64_t* message = malloc(size * sizeof(uint64_t)); - uint64_t* data = shmem_malloc(size * sizeof(uint64_t)); + size_t size = 2048; + int me = shmem_my_pe(); + int n = shmem_n_pes(); + int pe = (me + 1) % n; + uint64_t * message = malloc(size * sizeof(uint64_t)); static uint64_t sig_addr = 0; for (i = 0; i < size; i++) { message[i] = me; - data[i] = 0; } - shmem_barrier_all(); + + uint64_t *data = shmem_calloc(size, sizeof(uint64_t)); if (me != 0) { - shmem_uint64_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); + shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); + memcpy(message, data, size * sizeof(uint64_t)); } - shmem_putmem_signal(data, message, size*sizeof(uint64_t), - &sig_addr, 1, pe); - - if (me == 0) { - shmem_uint64_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); - printf("BCAST with put with signal is complete\n"); + if (me != (n - 1)) { + shmem_put_signal(data, message, size, &sig_addr, 1, pe); } free(message); From 246605310212340f9ec0b2c6bb972e3437682554 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 8 Oct 2018 17:11:56 -0500 Subject: [PATCH 17/30] Duplicate PSW explanation from API summary to description --- content/shmem_put_signal.tex | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 3360d1c30..5a7a5fbd4 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -47,10 +47,13 @@ \end{apiarguments} \apidescription{ - The routines return after the data has been copied out of the \source{} - array on the local \ac{PE}. The delivery of \signal{} flag on the remote - \ac{PE} indicates the delivery of its corresponding \dest{} data words - into the data object on the remote \ac{PE}. + The put-with-signal routines provide a method for copying data from a + contiguous local data object to a data object on a specified \ac{PE} + and subsequently setting a remote flag to signal completion. The routines + return after the data has been copied out of the \source{} array on the + local \ac{PE}. The delivery of \signal{} flag on the remote \ac{PE} + indicates the delivery of its corresponding \dest{} data words into the + data object on the remote \ac{PE}. } \apireturnvalues{ From 2662d767241a080699562c44d3dd850000d7671a Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 11 Oct 2018 11:23:04 -0500 Subject: [PATCH 18/30] Update the put-with-signal example Previously, we performed bcast example with SHMEM bcast semantics, without any transfer to PE-0. Now, we perform bcast from PE-0 to all other PEs and itself. --- content/shmem_put_signal.tex | 6 ++++-- example_code/shmem_put_signal_example.c | 6 ++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 5a7a5fbd4..e8edb013d 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -78,8 +78,10 @@ \begin{apiexamples} \apicexample - { The following example shows a simple ring-based broadcast operation using - \FUNC{shmem\_put\_signal}:} + {The following example demonstrates the usage of \FUNC{shmem\_put\_signal}. + It shows the implementation of a broadcast operation from \ac{PE} 0 to + itself and all other \acp{PE} in the job as a simple ring-based algorithm + using \FUNC{shmem\_put\_signal}:} {./example_code/shmem_put_signal_example.c} {} \end{apiexamples} diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index 22c1f4dff..202ccbf0d 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -24,10 +24,8 @@ int main(void) if (me != 0) { shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); - memcpy(message, data, size * sizeof(uint64_t)); - } - - if (me != (n - 1)) { + shmem_put_signal(data, data, size, &sig_addr, 1, pe); + } else { shmem_put_signal(data, message, size, &sig_addr, 1, pe); } From 266564e2ab17bc5c066d20888bcb9ce7c22c9dac Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 18 Oct 2018 12:30:41 -0500 Subject: [PATCH 19/30] Adding overlapping semantics in put-with-signal --- content/shmem_put_signal.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index e8edb013d..e657a7a00 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -66,6 +66,8 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. + The \VAR{sig\_addr} and \VAR{dest} may not be overlapping in memory. + The delivery of \signal{} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied From 9e879c41709b0ffa2d8d9c9d4f6c4c6d50b0fc4b Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 22 Oct 2018 13:04:18 -0500 Subject: [PATCH 20/30] Reorder RMA operations in put-with-signal example Performing some quick cleanup on the put-with-signal example. --- example_code/shmem_put_signal_example.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index 202ccbf0d..a0a4ed36b 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -22,11 +22,11 @@ int main(void) uint64_t *data = shmem_calloc(size, sizeof(uint64_t)); - if (me != 0) { + if (me == 0) { + shmem_put_signal(data, message, size, &sig_addr, 1, pe); + } else { shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); shmem_put_signal(data, data, size, &sig_addr, 1, pe); - } else { - shmem_put_signal(data, message, size, &sig_addr, 1, pe); } free(message); From 6c20a843aec34ef623af9f30eb94fd3298d28f1d Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Tue, 8 Jan 2019 12:18:58 -0600 Subject: [PATCH 21/30] Explicitly state the signal update is AMO Based on recent review comments, it looks like it would be more clear if we state that the signal update is an atomic operation. We have added this as part of the Notes to Implementers section. --- content/shmem_put_signal.tex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index e657a7a00..15f9f0e4a 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -77,6 +77,13 @@ routine does not imply delivery of the put routine's data. } +\apiimpnotes{ + Implementations must ensure that put-with-signal routines are compatible + with all point-to-point synchronization interfaces. The delivery of + \signal{} flag on the remote \ac{PE} must not cause partial updates. This + requires the update on \signal{} flag to be an atomic memory operation. +} + \begin{apiexamples} \apicexample From 441d681208548d0b32a76f97424cc8fd9ff14a0f Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Wed, 9 Jan 2019 14:42:28 -0600 Subject: [PATCH 22/30] Move NBI put-with-signal apiimpnotes to apinotes Previously, we had the information about the signal updates atomicity guarantees in the notes to implementors section for put-with-signal. We are not now moving this into main notes section. We have also clarified the atomicity guarantees by refering to atomicty section. --- content/shmem_put_signal.tex | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 15f9f0e4a..17c461b7c 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -75,13 +75,12 @@ routine and another data transfer. For example, the delivery of the signal word in a sequence consisting of a put routine followed by a put-with-signal routine does not imply delivery of the put routine's data. -} -\apiimpnotes{ - Implementations must ensure that put-with-signal routines are compatible - with all point-to-point synchronization interfaces. The delivery of - \signal{} flag on the remote \ac{PE} must not cause partial updates. This - requires the update on \signal{} flag to be an atomic memory operation. + The put-with-signal routines are compatible with all point-to-point + synchronization interfaces. The delivery of \VAR{signal} flag on the remote + \ac{PE} must not cause partial updates. This requires the update on + \VAR{signal} flag to be an atomic operation, with atomicity guarantees + described in Section~\ref{subsec:amo_guarantees}. } \begin{apiexamples} From c8ab4837c7f8feccb9b36cdab8c533cbae011036 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 12:32:54 -0600 Subject: [PATCH 23/30] Change put-with-signal in backmatter --- content/backmatter.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/backmatter.tex b/content/backmatter.tex index 107652419..915d9947a 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -510,7 +510,7 @@ \section{Version 1.5} The following list describes the specific changes in \openshmem[1.5]: \begin{itemize} % -\item Added support for blocking put with signal functions. +\item Added support for blocking put-with-signal functions. \\ See Section \ref{subsec:shmem_put_signal}. % \item Specified the validity of communication contexts, added the constant From 45fef4a5c41bc6410676819278049070de5c1428 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 12:57:18 -0600 Subject: [PATCH 24/30] RM unnecessary \signal and use \VAR{signal} instead --- content/shmem_put_signal.tex | 4 ++-- utils/defs.tex | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 17c461b7c..38ba8f51f 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -51,7 +51,7 @@ contiguous local data object to a data object on a specified \ac{PE} and subsequently setting a remote flag to signal completion. The routines return after the data has been copied out of the \source{} array on the - local \ac{PE}. The delivery of \signal{} flag on the remote \ac{PE} + local \ac{PE}. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. } @@ -68,7 +68,7 @@ The \VAR{sig\_addr} and \VAR{dest} may not be overlapping in memory. - The delivery of \signal{} flag on the remote \ac{PE} indicates only the + The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. Without a memory-ordering operation, there is no implied ordering between the delivery of the signal word of a put-with-signal diff --git a/utils/defs.tex b/utils/defs.tex index d39b6303e..8bc13b224 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -59,7 +59,6 @@ \newcommand{\source}{\textit{source}} \newcommand{\dest}{\textit{dest}} -\newcommand{\signal}{\textit{signal}} \newcommand{\PUT}{\textit{Put}} \newcommand{\GET}{\textit{Get}} \newcommand{\OPR}[1]{\textit{#1}} From 5ad32c7ec612dd170c721718f8d7b41ed0b704a0 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 13:00:46 -0600 Subject: [PATCH 25/30] Fix \VAR usage correctly for dest and source --- content/shmem_put_signal.tex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 38ba8f51f..ddffa1a00 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -36,7 +36,7 @@ \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} - \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} + \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote \ac{PE} as the signal. This signal data object must be @@ -61,12 +61,12 @@ } \apinotes{ - The \VAR{dest} and \VAR{sig\_addr} data objects must both be remotely - accessible. The \VAR{sig\_addr} and \VAR{dest} could be of different kinds, + The \dest{} and \VAR{sig\_addr} data objects must both be remotely + accessible. The \VAR{sig\_addr} and \dest{} could be of different kinds, for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The \VAR{sig\_addr} and \VAR{dest} may not be overlapping in memory. + The \VAR{sig\_addr} and \dest{} may not be overlapping in memory. The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words into the data object on From ada81c9327c699b72688ba586652f00b87b8b9ce Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Sat, 12 Jan 2019 13:06:34 -0600 Subject: [PATCH 26/30] RM unnecessary restrict qualifier from macros --- utils/defs.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/defs.tex b/utils/defs.tex index 8bc13b224..7772da02d 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -409,7 +409,7 @@ \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, - uint64_t, restrict}, + uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} @@ -417,7 +417,7 @@ \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, - uint64_t, restrict}, + uint64_t}, aboveskip=0pt, belowskip=0pt}}{} @@ -427,7 +427,7 @@ \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, escapechar=@, morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, - uint64_t, restrict}, + uint64_t}, aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} From 0908edaacb1f0ab2d57cf5ddfeb6328942c08de7 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Mon, 14 Jan 2019 22:27:45 -0600 Subject: [PATCH 27/30] Reframe signal-put compatibility with p2p syncs --- content/shmem_put_signal.tex | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index ddffa1a00..2956ae879 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -76,11 +76,11 @@ word in a sequence consisting of a put routine followed by a put-with-signal routine does not imply delivery of the put routine's data. - The put-with-signal routines are compatible with all point-to-point - synchronization interfaces. The delivery of \VAR{signal} flag on the remote - \ac{PE} must not cause partial updates. This requires the update on - \VAR{signal} flag to be an atomic operation, with atomicity guarantees - described in Section~\ref{subsec:amo_guarantees}. + The signal set by the put-with-signal routines is compatible + with all point-to-point synchronization interfaces. The delivery of + \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This + requires the update on \VAR{signal} flag to be an atomic operation, with + atomicity guarantees described in Section~\ref{subsec:amo_guarantees}. } \begin{apiexamples} From da476d8eb51badf8493f4f1bb52cf1a1f78386c4 Mon Sep 17 00:00:00 2001 From: Naveen Namashivayam Ravichandrasekaran Date: Thu, 11 Apr 2019 00:53:46 -0500 Subject: [PATCH 28/30] Add support for different sig update operations --- content/library_constants.tex | 14 ++++++ content/shmem_put_signal.tex | 62 ++++++++++++++----------- example_code/shmem_put_signal_example.c | 4 +- 3 files changed, 52 insertions(+), 28 deletions(-) diff --git a/content/library_constants.tex b/content/library_constants.tex index 6ef572170..754081a5d 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -67,6 +67,20 @@ See Section~\ref{subsec:shmem_ctx_create} for more detail about its use. \tabularnewline \hline %% +\color{ForestGreen} +\LibConstDecl{SHMEM\_SIGNAL\_SET} & +\color{ForestGreen} +An integer constant expression corresponding to the signal update set operation. +See Section~\ref{subsec:shmem_put_signal} for more detail about its use. +\tabularnewline \hline +%% +\color{ForestGreen} +\LibConstDecl{SHMEM\_SIGNAL\_ADD} & +\color{ForestGreen} +An integer constant expression corresponding to the signal update add operation. +See Section~\ref{subsec:shmem_put_signal} for more detail about its use. +\tabularnewline \hline +%% \LibConstDecl{SHMEM\_SYNC\_VALUE} \begin{DeprecateBlock} \LibConstDecl{\_SHMEM\_SYNC\_VALUE} diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index 2956ae879..c6eef2e00 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -1,32 +1,33 @@ +\color{ForestGreen} \apisummary{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently setting a remote flag to signal completion. + and subsequently update a remote flag to signal completion. } \begin{apidefinition} \begin{C11synopsis} -void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_signal}@(TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_signal}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); -void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int pe); +void @\FuncDecl{shmem\_putmem\_signal}@(void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_signal}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, uint64_t *sig_addr, uint64_t signal, int sig_op, int pe); \end{CsynopsisCol} \begin{apiarguments} @@ -43,17 +44,23 @@ remotely accessible.} \apiargument{IN}{signal}{Unsigned 64-bit value that is assigned to the remote \VAR{sig\_addr} signal data object.} + \apiargument{IN}{sig\_op}{Signal operator that represents the type of update + to be performed to the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} \end{apiarguments} \apidescription{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently setting a remote flag to signal completion. The routines + and subsequently update a remote flag to signal completion. The routines return after the data has been copied out of the \source{} array on the - local \ac{PE}. The delivery of \VAR{signal} flag on the remote \ac{PE} - indicates the delivery of its corresponding \dest{} data words into the - data object on the remote \ac{PE}. + local \ac{PE}. + + The \VAR{sig\_op} signal operator determines the type of update to be + performed on the remote \VAR{sig\_addr} signal data object. The completion + of signal update based on the \VAR{sig\_op} signal operator using the + \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its + corresponding \dest{} data words into the data object on the remote \ac{PE}. } \apireturnvalues{ @@ -68,19 +75,21 @@ The \VAR{sig\_addr} and \dest{} may not be overlapping in memory. - The delivery of \VAR{signal} flag on the remote \ac{PE} indicates only the - delivery of its corresponding \dest{} data words into the data object on - the remote \ac{PE}. Without a memory-ordering operation, there is no implied - ordering between the delivery of the signal word of a put-with-signal - routine and another data transfer. For example, the delivery of the signal - word in a sequence consisting of a put routine followed by a put-with-signal - routine does not imply delivery of the put routine's data. - - The signal set by the put-with-signal routines is compatible - with all point-to-point synchronization interfaces. The delivery of - \VAR{signal} flag on the remote \ac{PE} must not cause partial updates. This - requires the update on \VAR{signal} flag to be an atomic operation, with - atomicity guarantees described in Section~\ref{subsec:amo_guarantees}. + The completion of signal update using the \VAR{signal} flag on the remote + \ac{PE} indicates only the delivery of its corresponding \dest{} data words + into the data object on the remote \ac{PE}. Without a memory-ordering + operation, there is no implied ordering between the signal update of a + put-with-signal routine and another data transfer. For example, the + completion of the signal update in a sequence consisting of a put routine + followed by a put-with-signal routine does not imply delivery of the put + routine's data. + + The signal update by the put-with-signal routines is compatible with all + point-to-point synchronization interfaces. The delivery of \VAR{signal} flag + based on the \VAR{sig\_op} signal operator on the remote \ac{PE} must not + cause partial updates. Only concurrent accesses on \VAR{sig\_addr} by + different signal update operations using the same signal update operator is + guaranteed to be exclusive. } \begin{apiexamples} @@ -95,3 +104,4 @@ \end{apiexamples} \end{apidefinition} +\color{black} diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index a0a4ed36b..179da6d26 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -23,10 +23,10 @@ int main(void) uint64_t *data = shmem_calloc(size, sizeof(uint64_t)); if (me == 0) { - shmem_put_signal(data, message, size, &sig_addr, 1, pe); + shmem_put_signal(data, message, size, &sig_addr, 1, SHMEM_SIGNAL_SET, pe); } else { shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); - shmem_put_signal(data, data, size, &sig_addr, 1, pe); + shmem_put_signal(data, data, size, &sig_addr, 1, SHMEM_SIGNAL_SET, pe); } free(message); From 3d300e9f10af05057127f23292fb3673d9f0e3e3 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 3 May 2019 15:59:26 -0500 Subject: [PATCH 29/30] Update put-with-signal operation Move the atomicity semantics to the API description section --- content/shmem_put_signal.tex | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index c6eef2e00..fed9870c3 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -40,9 +40,8 @@ \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote - \ac{PE} as the signal. This signal data object must be - remotely accessible.} - \apiargument{IN}{signal}{Unsigned 64-bit value that is assigned to the + \ac{PE} as the signal. This signal data object must be remotely accessible.} + \apiargument{IN}{signal}{Unsigned 64-bit value that is used to update the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{sig\_op}{Signal operator that represents the type of update to be performed to the remote \VAR{sig\_addr} signal data object.} @@ -61,6 +60,12 @@ of signal update based on the \VAR{sig\_op} signal operator using the \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. + + The signal update by the put-with-signal routine is compatible with all + point-to-point synchronization interfaces. The delivery of \VAR{signal} flag + based on the remote \ac{PE} must not cause partial updates. Only concurrent + accesses on \VAR{sig\_addr} by different put-with-signal operations using + the same signal update operator is guaranteed to be exclusive. } \apireturnvalues{ @@ -83,13 +88,6 @@ completion of the signal update in a sequence consisting of a put routine followed by a put-with-signal routine does not imply delivery of the put routine's data. - - The signal update by the put-with-signal routines is compatible with all - point-to-point synchronization interfaces. The delivery of \VAR{signal} flag - based on the \VAR{sig\_op} signal operator on the remote \ac{PE} must not - cause partial updates. Only concurrent accesses on \VAR{sig\_addr} by - different signal update operations using the same signal update operator is - guaranteed to be exclusive. } \begin{apiexamples} From cee7f53a252ff6684c6684d99f5a5cc9415388d6 Mon Sep 17 00:00:00 2001 From: Naveen Ravichandrasekaran Date: Fri, 3 May 2019 17:09:00 -0500 Subject: [PATCH 30/30] Reframe the atomicity guarantees for p-w-s Changing the text to confirm the atomicity guarantees of the put with signal operation. The signal update is atomic only with respect to itself, and other put-with-signal of the same operator, and any point-to-point synchronization routines --- content/shmem_put_signal.tex | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/content/shmem_put_signal.tex b/content/shmem_put_signal.tex index fed9870c3..95b7186c7 100644 --- a/content/shmem_put_signal.tex +++ b/content/shmem_put_signal.tex @@ -2,7 +2,7 @@ \apisummary{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently update a remote flag to signal completion. + and subsequently updating a remote flag to signal completion. } \begin{apidefinition} @@ -34,24 +34,24 @@ \apiargument{IN}{ctx}{A context handle specifying the context on which to perform the operation. When this argument is not provided, the operation is performed on the default context.} - \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This - data object must be remotely accessible.} + \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. + This data object must be remotely accessible.} \apiargument{IN}{source}{Data object containing the data to be copied.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd.} \apiargument{OUT}{sig\_addr}{Data object to be updated on the remote \ac{PE} as the signal. This signal data object must be remotely accessible.} - \apiargument{IN}{signal}{Unsigned 64-bit value that is used to update the + \apiargument{IN}{signal}{Unsigned 64-bit value that is used for updating the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{sig\_op}{Signal operator that represents the type of update - to be performed to the remote \VAR{sig\_addr} signal data object.} + to be performed on the remote \VAR{sig\_addr} signal data object.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.} \end{apiarguments} \apidescription{ The put-with-signal routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE} - and subsequently update a remote flag to signal completion. The routines + and subsequently updating a remote flag to signal completion. The routines return after the data has been copied out of the \source{} array on the local \ac{PE}. @@ -61,11 +61,11 @@ \VAR{signal} flag on the remote \ac{PE} indicates the delivery of its corresponding \dest{} data words into the data object on the remote \ac{PE}. - The signal update by the put-with-signal routine is compatible with all - point-to-point synchronization interfaces. The delivery of \VAR{signal} flag - based on the remote \ac{PE} must not cause partial updates. Only concurrent - accesses on \VAR{sig\_addr} by different put-with-signal operations using - the same signal update operator is guaranteed to be exclusive. + An update to the \VAR{sig\_addr} signal data object through a put-with-signal + routine completes as if performed atomically with respect to any other + put-with-signal routine that updates the \VAR{sig\_addr} signal data object + using the same \VAR{sig\_op} signal update operator and any point-to-point + synchronization routine that accesses the \VAR{sig\_addr} signal data object. } \apireturnvalues{ @@ -78,7 +78,7 @@ for example, one could be a global/static \Cstd variable and the other could be allocated on the symmetric heap. - The \VAR{sig\_addr} and \dest{} may not be overlapping in memory. + \VAR{sig\_addr} and \dest{} may not be overlapping in memory. The completion of signal update using the \VAR{signal} flag on the remote \ac{PE} indicates only the delivery of its corresponding \dest{} data words