diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..263726cd3 --- /dev/null +++ b/.clang-format @@ -0,0 +1,5 @@ +BasedOnStyle: LLVM +BreakBeforeBraces: Custom +BraceWrapping: + BeforeElse: true +ColumnLimit: 93 diff --git a/.gitignore b/.gitignore index 9bd1c40a0..e81729c9f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ -chappage.txt main_spec.aux main_spec.idx main_spec.ilg @@ -7,5 +6,4 @@ main_spec.log main_spec.out main_spec.pdf main_spec.toc -*.log -*~ \ No newline at end of file +*~ diff --git a/Makefile b/Makefile index 0a48e04d4..9112aaf35 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,8 @@ ${TARGET}.pdf: ${SOURCES} ${FIGURES} ${EXAMPLES} makeindex ${TARGET} pdflatex $(LATEXOPT) ${TARGET} +RM ?= rm -f +CLEAN_EXTENSIONS = aux idx ilg ind log out pdf toc .PHONY: clean clean: - rm -f ${TARGET}.{log,aux,ps,dvi,bbl,blg,log,idx,out,toc,pdf,out} chappage.txt - + for e in ${CLEAN_EXTENSIONS}; do $(RM) ${TARGET}.$$e; done diff --git a/content/backmatter.tex b/content/backmatter.tex index de73b8d08..ac9811652 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -3,8 +3,7 @@ \appendix %defining pagestyle for annex -%\pagestyle{plain} \withlinenumbers -\pagestyle{fancy} \withlinenumbers +\pagestyle{fancy} \fancyhf{} \fancyhead[RE, LO]{\leftmark} \fancyhead[RO, LE]{\thepage} @@ -32,18 +31,19 @@ \section*{Incorporating OpenSHMEM into Programs}\label{sec:writing_programs} In \openshmem, the order in which lines appear in the output is not deterministic because \acp{PE} execute asynchronously in parallel. -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{caption={``Hello World'' example program in \Cstd},label=openshmem-hello,language=OSH2+C} - {example_code/hello-openshmem.c} -\outputlisting{language=bash,caption={Possible ordering of expected output with 4 \acp{PE} from the program in Listing~\ref{openshmem-hello}}} - {example_code/hello-openshmem-c.output} -\vspace{0.1in} -\end{minipage} +\SourceExample{example_code/hello-openshmem.c}{ + \label{openshmem-hello} + ``Hello World'' example program in \Cstd +} + +\ProgramOutput{example_code/hello-openshmem-c.output}{ + Possible ordering of expected output with 4 \acp{PE} from the + program in Example~\ref{openshmem-hello} +} \clearpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -The example in Listing~\ref{openshmem-hello-symmetric} shows a more complex +Example~\ref{openshmem-hello-symmetric} shows a more complex \openshmem program that illustrates the use of symmetric data objects. Note the declaration of the \VAR{static short dest} array and its use as the remote destination in \hyperref[subsec:shmem_put]{\FUNC{shmem\_put}}. @@ -61,17 +61,15 @@ \section*{Incorporating OpenSHMEM into Programs}\label{sec:writing_programs} The \source{} object does not need to be symmetric because \PUT{} handles the references to the \VAR{source} array only on the active (local) side. -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{caption={Example program with symmetric data objects},label=openshmem-hello-symmetric,language=OSH2+C} - {example_code/writing_shmem_example.c} -\outputlisting{language=bash,caption={Possible ordering of expected output with 4 \acp{PE} from the program in Listing~\ref{openshmem-hello-symmetric}}} - {example_code/writing_shmem_example.output} -\vspace{0.1in} -\end{minipage} - - +\SourceExample{example_code/writing_shmem_example.c}{ + \label{openshmem-hello-symmetric} + Example program with symmetric data objects +} +\ProgramOutput{example_code/writing_shmem_example.output}{ + Possible ordering of expected output with 4~\acp{PE} from the + program in Example~\ref{openshmem-hello-symmetric} +} \chapter{Compiling and Running Programs}\label{sec:compiling} The \openshmem Specification does not specify how @@ -86,7 +84,7 @@ \subsection*{Programs written in \Cstd} \textbf{oshcc}, to aid in the compilation of \Cstd programs. The wrapper may be called as follows: -\begin{lstlisting}[language=bash] +\begin{lstlisting}[] oshcc -o myprogram myprogram.c \end{lstlisting} Where the $\langle\mbox{compiler options}\rangle$ are options understood by the @@ -99,7 +97,7 @@ \subsection*{Programs written in \Cpp} \textbf{oshc++}, to aid in the compilation of \Cpp programs. The wrapper may be called as follows: -\begin{lstlisting}[language=bash] +\begin{lstlisting}[] oshc++ -o myprogram myprogram.cpp \end{lstlisting} Where the $\langle\mbox{compiler options}\rangle$ are options understood by the @@ -112,7 +110,7 @@ \section{Running Programs} \textbf{oshrun}, to launch \openshmem programs. The wrapper may be called as follows: -\begin{lstlisting}[language=bash] +\begin{lstlisting}[] oshrun -np <#> \end{lstlisting} The arguments for \textbf{oshrun} are: diff --git a/content/chappage.txt b/content/chappage.txt deleted file mode 100644 index df1ce60d9..000000000 --- a/content/chappage.txt +++ /dev/null @@ -1,8 +0,0 @@ -Chapter 0 - 3 -Chapter A - 7 -Chapter B - 10 -Chapter C - 12 -Chapter D - 13 -Chapter E - 14 -Chapter F - 15 -Chapter G - 16 diff --git a/content/frontmatter.tex b/content/frontmatter.tex index db1b46499..1398dcb5f 100644 --- a/content/frontmatter.tex +++ b/content/frontmatter.tex @@ -23,7 +23,6 @@ % Set header/footer for main content \pagestyle{fancy} %replacing {headings} with {fancy} for customization -\withlinenumbers %adds line numbers to edges of normal pages \fancyhf{} \fancyhead[RE, LO]{\rightmark} \fancyhead[RO, LE]{\thepage} diff --git a/content/interoperability.tex b/content/interoperability.tex index 6fee69632..949885529 100644 --- a/content/interoperability.tex +++ b/content/interoperability.tex @@ -126,23 +126,22 @@ \subsection{Mapping Process Identification Numbers} \subsubsection*{Examples} \label{subsubsec:interoperability:id:example} -The following example demonstrates how to manage the mapping between \openshmem -\ac{PE} numbers and \ac{MPI} ranks in \VAR{MPI\_COMM\_WORLD} in a hybrid \openshmem -and \ac{MPI} program. - -\lstinputlisting[language={C}, tabsize=2, - basicstyle=\ttfamily\footnotesize] - {example_code/hybrid_mpi_mapping_id.c} - -The following example demonstrates an alternative approach for managing the mapping -of process identification numbers in a hybrid program. The program creates a -new MPI communicator, named \VAR{shmem\_comm}, that contains all -processes in \VAR{MPI\_COMM\_WORLD} and each process has the same \ac{MPI} rank -number as its \openshmem \ac{PE} number. - -\lstinputlisting[language={C}, tabsize=2, - basicstyle=\ttfamily\footnotesize] - {example_code/hybrid_mpi_mapping_id_shmem_comm.c} + +\SourceExample{example_code/hybrid_mpi_mapping_id.c}{ + The following example demonstrates how to manage the mapping between + \openshmem \ac{PE} numbers and \ac{MPI} ranks in + \VAR{MPI\_COMM\_WORLD} in a hybrid \openshmem and \ac{MPI} program. +} + + +\SourceExample{example_code/hybrid_mpi_mapping_id_shmem_comm.c}{ + The following example demonstrates an alternative approach for + managing the mapping of process identification numbers in a hybrid + program. The program creates a new MPI communicator, named + \VAR{shmem\_comm}, that contains all processes in + \VAR{MPI\_COMM\_WORLD} and each process has the same \ac{MPI} rank + number as its \openshmem \ac{PE} number. +} \subsection{RMA Programming Models} \label{subsec:interoperability:rma} diff --git a/content/library_handles.tex b/content/library_handles.tex index 2f674b3c3..8c70ecda6 100644 --- a/content/library_handles.tex +++ b/content/library_handles.tex @@ -23,10 +23,12 @@ %% \LibHandleDecl{SHMEM\_TEAM\_SHARED} & Handle of type \CTYPE{shmem\_team\_t} that corresponds to a team of \acp{PE} -that share a memory domain. When this handle is used by some \ac{PE}, -it will refer to the team of all \acp{PE} that would return a non-null -pointer from \FUNC{shmem\_ptr} for symmetric objects on that \ac{PE}, -and vice versa. This means that symmetric objects on each \ac{PE} are +that share a memory domain. \LibHandleRef{SHMEM\_TEAM\_SHARED} refers to +the team of all PEs that would mutually return a non-null address from a +call to \FUNC{shmem\_ptr} for all symmetric heap objects. That is, +\FUNC{shmem\_ptr} must return a non-null pointer to the local PE for all +symmetric heap objects on all target \acp{PE} in the team. This means that +symmetric heap objects on each \ac{PE} are directly load/store accessible by all \acp{PE} in the team. See Section~\ref{subsec:team} for more detail about its use. \tabularnewline \hline diff --git a/content/memory_model.tex b/content/memory_model.tex index 93a41c337..3b8c42d12 100644 --- a/content/memory_model.tex +++ b/content/memory_model.tex @@ -129,37 +129,37 @@ \subsection{Atomicity Guarantees}\label{subsec:amo_guarantees} with non-atomic operations, such as one-sided \OPR{put} or \OPR{get} operations, will invalidate the atomicity guarantees. -\cexample - { - The following \CorCpp example illustrates scenario 1. - In this example, different atomicity domains are used to access - the same location, resulting in undefined behavior. - The undefined behavior can be resolved by using communication - contexts in the same atomicity domain in all concurrent operations. - } {./example_code/amo_scenario_1.c} +\SourceExample{./example_code/amo_scenario_1.c}{ + The following \CorCpp example illustrates scenario 1. + In this example, different atomicity domains are used to access + the same location, resulting in undefined behavior. + The undefined behavior can be resolved by using communication + contexts in the same atomicity domain in all concurrent operations. +} -\cexample - {The following \CorCpp example illustrates scenario 2. In this example, - different datatypes are used to access the same location concurrently, - resulting in undefined behavior. The undefined behavior can be resolved by - using the same datatype in all concurrent operations. For example, the - 32-bit value can be left-shifted and a 64-bit atomic OR operation can be - used.} - {./example_code/amo_scenario_2.c} +\SourceExample{./example_code/amo_scenario_2.c}{ + The following \CorCpp example illustrates scenario 2. In this example, + different datatypes are used to access the same location concurrently, + resulting in undefined behavior. The undefined behavior can be resolved by + using the same datatype in all concurrent operations. For example, the + 32-bit value can be left-shifted and a 64-bit atomic OR operation can be + used. +} -\cexample - {The following \CorCpp example illustrates scenario 3. In this example, - atomic increment operations are concurrent with a non-atomic reduction - operation, resulting in undefined behavior. The undefined behavior can be - resolved by inserting a barrier operation before the reduction. The - barrier ensures that all local and remote AMOs have completed before the - reduction operation accesses $x$.} - {./example_code/amo_scenario_3.c} +\SourceExample{./example_code/amo_scenario_3.c}{ + The following \CorCpp example illustrates scenario 3. In this example, + atomic increment operations are concurrent with a non-atomic reduction + operation, resulting in undefined behavior. The undefined behavior can be + resolved by inserting a barrier operation before the reduction. The + barrier ensures that all local and remote AMOs have completed before the + reduction operation accesses $x$. +} + +\SourceExample{./example_code/amo_scenario_4.c}{ + The following \CorCpp example illustrates scenario 4. In this example, an + \openshmem atomic increment operation is concurrent with a local increment + operation, resulting in undefined behavior. The undefined behavior can be + resolved by replacing the local increment operation with an \openshmem + atomic increment. +} -\cexample - {The following \CorCpp example illustrates scenario 4. In this example, an - \openshmem atomic increment operation is concurrent with a local increment - operation, resulting in undefined behavior. The undefined behavior can be - resolved by replacing the local increment operation with an \openshmem - atomic increment.} - {./example_code/amo_scenario_4.c} diff --git a/content/shmem_get.tex b/content/shmem_get.tex index 7123457e6..5827598a5 100644 --- a/content/shmem_get.tex +++ b/content/shmem_get.tex @@ -18,7 +18,7 @@ \begin{CsynopsisCol} void @\FuncDecl{shmem\_get\FuncParam{SIZE}}@(void *dest, const void *source, size_t nelems, int pe); -void @\FuncDecl{shmem\_ctx\_get\FuncParam{SIZE}}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_get\FuncParam{SIZE}}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. diff --git a/content/shmem_malloc_hints.tex b/content/shmem_malloc_hints.tex index b5a6c0945..165901e06 100644 --- a/content/shmem_malloc_hints.tex +++ b/content/shmem_malloc_hints.tex @@ -86,4 +86,3 @@ load/store operations from the origin \ac{PE} or vice versa. } \end{apidefinition} -\newpage diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 02ab0e8d8..7c738a0bb 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -315,7 +315,7 @@ \begin{apiexamples} \apicexample - {This \CorCpp reduction example gets integers from an external + {This \Cstd[11] reduction example gets integers from an external source (random generator in this example), tests to see if the \ac{PE} got a valid value, and outputs the sum of values for which all \acp{PE} got a valid value.} diff --git a/content/shmem_signal_wait_until.tex b/content/shmem_signal_wait_until.tex index 5d93ec7f3..43d6935bf 100644 --- a/content/shmem_signal_wait_until.tex +++ b/content/shmem_signal_wait_until.tex @@ -33,10 +33,6 @@ blocks until the value of \VAR{sig\_addr} at the calling \ac{PE} satisfies the wait condition specified by the comparison operator, \VAR{cmp}, and comparison value, \VAR{cmp\_value}. - - Implementations must ensure that \FUNC{shmem\_signal\_wait\_until} do not - return before the update of the memory indicated by \VAR{sig\_addr} is - fully complete. } \apireturnvalues{ @@ -44,4 +40,11 @@ calling \ac{PE} that satisfies the wait condition. } +\apiimpnotes{ + Implementations must ensure that \FUNC{shmem\_signal\_wait\_until} do not + return before the update of the memory indicated by \VAR{sig\_addr} is fully + complete. Partial updates to the memory must not cause + \FUNC{shmem\_signal\_wait\_until} to return. +} + \end{apidefinition} diff --git a/content/shmem_test_some.tex b/content/shmem_test_some.tex index b38c4fbfe..f4108e8e6 100644 --- a/content/shmem_test_some.tex +++ b/content/shmem_test_some.tex @@ -89,7 +89,7 @@ \begin{apiexamples} \apicexample {The following \Cstd[11] example demonstrates the use of - \FUNC{shmem\_test\_some} to process a simple all-to-all transfer of N + \FUNC{shmem\_test\_some} to process a simple all-to-all transfer of $N$ data elements via a sum reduction, while potentially overlapping communication with computation. This pattern is similar to the \FUNC{shmem\_test\_any} example above, but each while loop iteration may diff --git a/example_code/.gitignore b/example_code/.gitignore new file mode 100644 index 000000000..43c1827e9 --- /dev/null +++ b/example_code/.gitignore @@ -0,0 +1 @@ +*.[cf]x diff --git a/example_code/Makefile b/example_code/Makefile index c37cfc99f..f80fc2a13 100644 --- a/example_code/Makefile +++ b/example_code/Makefile @@ -1,5 +1,5 @@ CC = oshcc -CFLAGS ?= -Wall -Wextra +CFLAGS ?= -Wall -Wextra -pedantic -Werror FC = oshfort FFLAGS ?= -Wall -Wextra @@ -13,6 +13,8 @@ C_BINS = $(C_TESTS:.c=.cx) F_TESTS = $(wildcard *.f90) F_BINS = $(F_TESTS:.f90=.fx) +shmem_ctx.cx: CFLAGS += -fopenmp + .PHONY: all run clean all: $(C_BINS) $(F_BINS) @@ -24,9 +26,10 @@ all: $(C_BINS) $(F_BINS) $(FC) $(FFLAGS) -o $@ $+ run: $(C_BINS) - @for bin in $+; do \ - echo --$$bin------------------------------; \ - $(RUNCMD) $(RUNOPT) ./$$bin || exit $$?; \ + @for bin in $+; do \ + echo -- $$bin ------------------------------; \ + $(RUNCMD) $(RUNOPT) ./$$bin; \ + echo -- exit status: $$?; \ done clean: diff --git a/example_code/amo_scenario_2.c b/example_code/amo_scenario_2.c index 0ab058cb8..9b538f40d 100644 --- a/example_code/amo_scenario_2.c +++ b/example_code/amo_scenario_2.c @@ -1,16 +1,16 @@ #include int main(void) { - static uint64_t x = 0; + static uint64_t x = 0; - shmem_init(); - /* Undefined behavior: The following AMOs access the same location concurrently using - * different types. */ - if (shmem_my_pe() > 0) - shmem_uint32_atomic_or((uint32_t*)&x, shmem_my_pe()+1, 0); - else - shmem_uint64_atomic_or(&x, shmem_my_pe()+1, 0); + shmem_init(); + /* Undefined behavior: The following AMOs access the same location + * concurrently using different types. */ + if (shmem_my_pe() > 0) + shmem_uint32_atomic_or((uint32_t *)&x, shmem_my_pe() + 1, 0); + else + shmem_uint64_atomic_or(&x, shmem_my_pe() + 1, 0); - shmem_finalize(); - return 0; + shmem_finalize(); + return 0; } diff --git a/example_code/amo_scenario_3.c b/example_code/amo_scenario_3.c index d89b7696f..93586779c 100644 --- a/example_code/amo_scenario_3.c +++ b/example_code/amo_scenario_3.c @@ -1,21 +1,20 @@ #include int main(void) { - static long psync[SHMEM_REDUCE_SYNC_SIZE]; - static int pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - static int x = 0, y = 0; + static long psync[SHMEM_REDUCE_SYNC_SIZE]; + static int pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; + static int x = 0, y = 0; - for (int i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) - psync[i] = SHMEM_SYNC_VALUE; + for (int i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) + psync[i] = SHMEM_SYNC_VALUE; - shmem_init(); - shmem_int_atomic_inc(&x, (shmem_my_pe()+1) % shmem_n_pes()); - /* Undefined behavior: The following reduction operation performs accesses to symmetric - * variable 'x' that are concurrent with previously issued atomic increment operations - * on the same variable. */ - shmem_int_sum_to_all(&y, &x, 1, 0, 0, shmem_n_pes(), pwrk, psync); + shmem_init(); + shmem_int_atomic_inc(&x, (shmem_my_pe() + 1) % shmem_n_pes()); + /* Undefined behavior: The following reduction operation performs accesses to + * symmetric variable 'x' that are concurrent with previously issued atomic + * increment operations on the same variable. */ + shmem_int_sum_to_all(&y, &x, 1, 0, 0, shmem_n_pes(), pwrk, psync); - shmem_finalize(); - return 0; + shmem_finalize(); + return 0; } - diff --git a/example_code/amo_scenario_4.c b/example_code/amo_scenario_4.c index 9c5f16e97..4b0a83f48 100644 --- a/example_code/amo_scenario_4.c +++ b/example_code/amo_scenario_4.c @@ -1,16 +1,16 @@ #include int main(void) { - static int x = 0; + static int x = 0; - shmem_init(); - /* Undefined behavior: OpenSHMEM atomic increment operations are concurrent with the local - * increment of symmetric variable 'x'. */ - if (shmem_my_pe() > 0) - shmem_int_atomic_inc(&x, 0); - else - x++; + shmem_init(); + /* Undefined behavior: OpenSHMEM atomic increment operations are concurrent + * with the local increment of symmetric variable 'x'. */ + if (shmem_my_pe() > 0) + shmem_int_atomic_inc(&x, 0); + else + x++; - shmem_finalize(); - return 0; + shmem_finalize(); + return 0; } diff --git a/example_code/hello-openshmem.c b/example_code/hello-openshmem.c index 8432b20dd..1d6e0f32e 100644 --- a/example_code/hello-openshmem.c +++ b/example_code/hello-openshmem.c @@ -1,12 +1,11 @@ -#include #include /* The OpenSHMEM header file */ +#include -int main (void) -{ - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); - printf("Hello from %d of %d\n", me, npes); - shmem_finalize(); - return 0; +int main(void) { + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + printf("Hello from %d of %d\n", mype, npes); + shmem_finalize(); + return 0; } diff --git a/example_code/hybrid_mpi_mapping_id.c b/example_code/hybrid_mpi_mapping_id.c index e99ff5bcc..4afae6c81 100644 --- a/example_code/hybrid_mpi_mapping_id.c +++ b/example_code/hybrid_mpi_mapping_id.c @@ -1,29 +1,28 @@ -#include -#include #include +#include +#include -int main(int argc, char *argv[]) -{ - MPI_Init(&argc, &argv); - shmem_init(); +int main(int argc, char *argv[]) { + MPI_Init(&argc, &argv); + shmem_init(); - int mype = shmem_team_my_pe(SHMEM_TEAM_WORLD); - int npes = shmem_team_n_pes(SHMEM_TEAM_WORLD); + int mype = shmem_team_my_pe(SHMEM_TEAM_WORLD); + int npes = shmem_team_n_pes(SHMEM_TEAM_WORLD); - static int myrank; - MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + static int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); - int *mpi_ranks = shmem_calloc(npes, sizeof(int)); + int *mpi_ranks = shmem_calloc(npes, sizeof(int)); - shmem_int_collect(SHMEM_TEAM_WORLD, mpi_ranks, &myrank, 1); - if (mype == 0) - for (int i = 0; i < npes; i++) - printf("PE %d's MPI rank is %d\n", i, mpi_ranks[i]); + shmem_int_collect(SHMEM_TEAM_WORLD, mpi_ranks, &myrank, 1); + if (mype == 0) + for (int i = 0; i < npes; i++) + printf("PE %d's MPI rank is %d\n", i, mpi_ranks[i]); - shmem_free(mpi_ranks); + shmem_free(mpi_ranks); - shmem_finalize(); - MPI_Finalize(); + shmem_finalize(); + MPI_Finalize(); - return 0; + return 0; } diff --git a/example_code/hybrid_mpi_mapping_id_shmem_comm.c b/example_code/hybrid_mpi_mapping_id_shmem_comm.c index cf2b86809..54a05e98b 100644 --- a/example_code/hybrid_mpi_mapping_id_shmem_comm.c +++ b/example_code/hybrid_mpi_mapping_id_shmem_comm.c @@ -1,24 +1,23 @@ -#include -#include #include +#include +#include -int main(int argc, char *argv[]) -{ - MPI_Init(&argc, &argv); - shmem_init(); +int main(int argc, char *argv[]) { + MPI_Init(&argc, &argv); + shmem_init(); - int mype = shmem_my_pe(); + int mype = shmem_my_pe(); - MPI_Comm shmem_comm; - MPI_Comm_split(MPI_COMM_WORLD, 0, mype, &shmem_comm); + MPI_Comm shmem_comm; + MPI_Comm_split(MPI_COMM_WORLD, 0, mype, &shmem_comm); - int myrank; - MPI_Comm_rank(shmem_comm, &myrank); - printf("PE %d's MPI rank is %d\n", mype, myrank); + int myrank; + MPI_Comm_rank(shmem_comm, &myrank); + printf("PE %d's MPI rank is %d\n", mype, myrank); - MPI_Comm_free(&shmem_comm); - shmem_finalize(); - MPI_Finalize(); + MPI_Comm_free(&shmem_comm); + shmem_finalize(); + MPI_Finalize(); - return 0; + return 0; } diff --git a/example_code/pshmem_example.c b/example_code/pshmem_example.c index 8e14f55f4..89b707720 100644 --- a/example_code/pshmem_example.c +++ b/example_code/pshmem_example.c @@ -1,27 +1,26 @@ +#include #include #include -#include static double total_put_time = 0.0; static double avg_put_time = 0.0; static long put_count = 0; static inline double get_wtime(void) { - double wtime = 0.0; - struct timeval tv; - gettimeofday(&tv, NULL); - wtime = tv.tv_sec; - wtime += (double)tv.tv_usec / 1.0e6; - return wtime; + double wtime = 0.0; + struct timeval tv; + gettimeofday(&tv, NULL); + wtime = tv.tv_sec; + wtime += (double)tv.tv_usec / 1.0e6; + return wtime; } -void shmem_long_put(long *dest, const long *source, size_t nelems, int pe) -{ - double t_start = get_wtime(); /* Start timer */ - pshmem_long_put(dest, source, nelems, pe); /* Name shifted call to put */ - total_put_time += get_wtime() - t_start; /* Calculate total time elapsed */ - put_count += 1; /* Increment put counts */ - avg_put_time = total_put_time / (double) put_count; /* Calculate average put latency */ +void shmem_long_put(long *dest, const long *source, size_t nelems, int pe) { + double t_start = get_wtime(); /* Start timer */ + pshmem_long_put(dest, source, nelems, pe); /* Name shifted call to put */ + total_put_time += get_wtime() - t_start; /* Calculate total time elapsed */ + put_count += 1; /* Increment put counts */ + avg_put_time = total_put_time / (double)put_count; /* Calculate average put latency */ - return; + return; } diff --git a/example_code/pshmem_no_weak_symbol_1.c b/example_code/pshmem_no_weak_symbol_1.c index cde2053c0..3d27025c4 100644 --- a/example_code/pshmem_no_weak_symbol_1.c +++ b/example_code/pshmem_no_weak_symbol_1.c @@ -1,5 +1,5 @@ #ifdef BUILD_PSHMEM_INTERFACES -# define SHFN(fn) p##fn +#define SHFN(fn) p##fn #else -# define SHFN(fn) fn +#define SHFN(fn) fn #endif diff --git a/example_code/pshmem_no_weak_symbol_2.c b/example_code/pshmem_no_weak_symbol_2.c index c4074f773..7d0ae3a43 100644 --- a/example_code/pshmem_no_weak_symbol_2.c +++ b/example_code/pshmem_no_weak_symbol_2.c @@ -1,4 +1,2 @@ -void SHFN(shmem_example)(/* appropriate arguments */) -{ - /* function body */ +void SHFN(shmem_example)(/* appropriate arguments */) { /* function body */ } diff --git a/example_code/pshmem_weak_symbol_1.c b/example_code/pshmem_weak_symbol_1.c index a15e29d62..51182ca0e 100644 --- a/example_code/pshmem_weak_symbol_1.c +++ b/example_code/pshmem_weak_symbol_1.c @@ -1,6 +1,4 @@ #pragma weak shmem_example = pshmem_example -void pshmem_example(/* appropriate arguments */) -{ - /* function body */ +void pshmem_example(/* appropriate arguments */) { /* function body */ } diff --git a/example_code/pshmem_weak_symbol_2.c b/example_code/pshmem_weak_symbol_2.c index dde07c925..b269ceca7 100644 --- a/example_code/pshmem_weak_symbol_2.c +++ b/example_code/pshmem_weak_symbol_2.c @@ -1,6 +1,5 @@ -void pshmem_example(/* appropriate arguments */) -{ - /* function body */ +void pshmem_example(/* appropriate arguments */) { /* function body */ } -void shmem_example(/* appropriate arguments */) __attribute__ ((weak, alias("pshmem_example"))); +void shmem_example(/* appropriate arguments */) + __attribute__((weak, alias("pshmem_example"))); diff --git a/example_code/shmem_alltoall_example.c b/example_code/shmem_alltoall_example.c index a5d102545..5c5ece60a 100644 --- a/example_code/shmem_alltoall_example.c +++ b/example_code/shmem_alltoall_example.c @@ -1,42 +1,41 @@ -#include #include #include +#include -int main(void) -{ - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); +int main(void) { + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); - const int count = 2; - int64_t* dest = (int64_t*) shmem_malloc(count * npes * sizeof(int64_t)); - int64_t* source = (int64_t*) shmem_malloc(count * npes * sizeof(int64_t)); + const int count = 2; + int64_t *dest = (int64_t *)shmem_malloc(count * npes * sizeof(int64_t)); + int64_t *source = (int64_t *)shmem_malloc(count * npes * sizeof(int64_t)); - /* assign source values */ - for (int pe = 0; pe < npes; pe++) { - for (int i = 0; i < count; i++) { - source[(pe * count) + i] = me + pe; - dest[(pe * count) + i] = 9999; - } - } - /* wait for all PEs to initialize source/dest */ - shmem_team_sync(SHMEM_TEAM_WORLD); + /* assign source values */ + for (int pe = 0; pe < npes; pe++) { + for (int i = 0; i < count; i++) { + source[(pe * count) + i] = mype + pe; + dest[(pe * count) + i] = 9999; + } + } + /* wait for all PEs to initialize source/dest */ + shmem_team_sync(SHMEM_TEAM_WORLD); - /* alltoall on all PES */ - shmem_int64_alltoall(SHMEM_TEAM_WORLD, dest, source, count); + /* alltoall on all PES */ + shmem_int64_alltoall(SHMEM_TEAM_WORLD, dest, source, count); - /* verify results */ - for (int pe = 0; pe < npes; pe++) { - for (int i = 0; i < count; i++) { - if (dest[(pe * count) + i] != pe + me) { - printf("[%d] ERROR: dest[%d]=%" PRId64 ", should be %d\n", - me, (pe * count) + i, dest[(pe * count) + i], pe + me); - } - } - } + /* verify results */ + for (int pe = 0; pe < npes; pe++) { + for (int i = 0; i < count; i++) { + if (dest[(pe * count) + i] != pe + mype) { + printf("[%d] ERROR: dest[%d]=%" PRId64 ", should be %d\n", mype, (pe * count) + i, + dest[(pe * count) + i], pe + mype); + } + } + } - shmem_free(dest); - shmem_free(source); - shmem_finalize(); - return 0; + shmem_free(dest); + shmem_free(source); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_alltoalls_example.c b/example_code/shmem_alltoalls_example.c index 5c135185a..f8adeab16 100644 --- a/example_code/shmem_alltoalls_example.c +++ b/example_code/shmem_alltoalls_example.c @@ -1,45 +1,44 @@ -#include #include #include +#include -int main(void) -{ - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); +int main(void) { + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); - const int count = 2; - const ptrdiff_t dst = 2; - const ptrdiff_t sst = 3; - int64_t* dest = (int64_t*) shmem_malloc(count * dst * npes * sizeof(int64_t)); - int64_t* source = (int64_t*) shmem_malloc(count * sst * npes * sizeof(int64_t)); + const int count = 2; + const ptrdiff_t dst = 2; + const ptrdiff_t sst = 3; + int64_t *dest = (int64_t *)shmem_malloc(count * dst * npes * sizeof(int64_t)); + int64_t *source = (int64_t *)shmem_malloc(count * sst * npes * sizeof(int64_t)); - /* assign source values */ - for (int pe = 0; pe < npes; pe++) { - for (int i = 0; i < count; i++) { - source[sst * ((pe * count) + i)] = me + pe; - dest[dst * ((pe * count) + i)] = 9999; - } - } - /* wait for all PEs to initialize source/dest */ - shmem_team_sync(SHMEM_TEAM_WORLD); + /* assign source values */ + for (int pe = 0; pe < npes; pe++) { + for (int i = 0; i < count; i++) { + source[sst * ((pe * count) + i)] = mype + pe; + dest[dst * ((pe * count) + i)] = 9999; + } + } + /* wait for all PEs to initialize source/dest */ + shmem_team_sync(SHMEM_TEAM_WORLD); - /* alltoalls on all PES */ - shmem_int64_alltoalls(SHMEM_TEAM_WORLD, dest, source, dst, sst, count); + /* alltoalls on all PES */ + shmem_int64_alltoalls(SHMEM_TEAM_WORLD, dest, source, dst, sst, count); - /* verify results */ - for (int pe = 0; pe < npes; pe++) { - for (int i = 0; i < count; i++) { - int j = dst * ((pe * count) + i); - if (dest[j] != pe + me) { - printf("[%d] ERROR: dest[%d]=%" PRId64 ", should be %d\n", - me, j, dest[j], pe + me); - } - } - } + /* verify results */ + for (int pe = 0; pe < npes; pe++) { + for (int i = 0; i < count; i++) { + int j = dst * ((pe * count) + i); + if (dest[j] != pe + mype) { + printf("[%d] ERROR: dest[%d]=%" PRId64 ", should be %d\n", mype, j, dest[j], + pe + mype); + } + } + } - shmem_free(dest); - shmem_free(source); - shmem_finalize(); - return 0; + shmem_free(dest); + shmem_free(source); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_atomic_add_example.c b/example_code/shmem_atomic_add_example.c index 022b8ecfc..1caf33cf1 100644 --- a/example_code/shmem_atomic_add_example.c +++ b/example_code/shmem_atomic_add_example.c @@ -1,15 +1,14 @@ -#include #include +#include -int main(void) -{ - static int dst = 22; - shmem_init(); - int me = shmem_my_pe(); - if (me == 1) - shmem_atomic_add(&dst, 44, 0); - shmem_barrier_all(); - printf("%d: dst = %d\n", me, dst); - shmem_finalize(); - return 0; +int main(void) { + static int dst = 22; + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 1) + shmem_atomic_add(&dst, 44, 0); + shmem_barrier_all(); + printf("%d: dst = %d\n", mype, dst); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_atomic_compare_swap_example.c b/example_code/shmem_atomic_compare_swap_example.c index a3b7743fb..dfa4e7bb5 100644 --- a/example_code/shmem_atomic_compare_swap_example.c +++ b/example_code/shmem_atomic_compare_swap_example.c @@ -1,13 +1,13 @@ -#include #include +#include -int main(void) -{ - static int race_winner = -1; - shmem_init(); - int me = shmem_my_pe(); - int oldval = shmem_atomic_compare_swap(&race_winner, -1, me, 0); - if (oldval == -1) printf("PE %d was first\n", me); - shmem_finalize(); - return 0; +int main(void) { + static int race_winner = -1; + shmem_init(); + int mype = shmem_my_pe(); + int oldval = shmem_atomic_compare_swap(&race_winner, -1, mype, 0); + if (oldval == -1) + printf("PE %d was first\n", mype); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_atomic_fetch_add_example.c b/example_code/shmem_atomic_fetch_add_example.c index 21a32cc5b..571e8feab 100644 --- a/example_code/shmem_atomic_fetch_add_example.c +++ b/example_code/shmem_atomic_fetch_add_example.c @@ -1,16 +1,15 @@ -#include #include +#include -int main(void) -{ - int old = -1; - static int dst = 22; - shmem_init(); - int me = shmem_my_pe(); - if (me == 1) - old = shmem_atomic_fetch_add(&dst, 44, 0); - shmem_barrier_all(); - printf("%d: old = %d, dst = %d\n", me, old, dst); - shmem_finalize(); - return 0; +int main(void) { + int old = -1; + static int dst = 22; + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 1) + old = shmem_atomic_fetch_add(&dst, 44, 0); + shmem_barrier_all(); + printf("%d: old = %d, dst = %d\n", mype, old, dst); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_atomic_fetch_inc_example.c b/example_code/shmem_atomic_fetch_inc_example.c index f064136a4..e183bf0ea 100644 --- a/example_code/shmem_atomic_fetch_inc_example.c +++ b/example_code/shmem_atomic_fetch_inc_example.c @@ -1,16 +1,15 @@ -#include #include +#include -int main(void) -{ - int old = -1; - static int dst = 22; - shmem_init(); - int me = shmem_my_pe(); - if (me == 0) - old = shmem_atomic_fetch_inc(&dst, 1); - shmem_barrier_all(); - printf("%d: old = %d, dst = %d\n", me, old, dst); - shmem_finalize(); - return 0; +int main(void) { + int old = -1; + static int dst = 22; + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 0) + old = shmem_atomic_fetch_inc(&dst, 1); + shmem_barrier_all(); + printf("%d: old = %d, dst = %d\n", mype, old, dst); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_atomic_inc_example.c b/example_code/shmem_atomic_inc_example.c index a51f59d0e..7112486b8 100644 --- a/example_code/shmem_atomic_inc_example.c +++ b/example_code/shmem_atomic_inc_example.c @@ -1,15 +1,14 @@ -#include #include +#include -int main(void) -{ - static int dst = 74; - shmem_init(); - int me = shmem_my_pe(); - if (me == 0) - shmem_atomic_inc(&dst, 1); - shmem_barrier_all(); - printf("%d: dst = %d\n", me, dst); - shmem_finalize(); - return 0; +int main(void) { + static int dst = 74; + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 0) + shmem_atomic_inc(&dst, 1); + shmem_barrier_all(); + printf("%d: dst = %d\n", mype, dst); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_atomic_swap_example.c b/example_code/shmem_atomic_swap_example.c index ac817271d..710e6a5d5 100644 --- a/example_code/shmem_atomic_swap_example.c +++ b/example_code/shmem_atomic_swap_example.c @@ -1,19 +1,18 @@ -#include #include +#include -int main(void) -{ - static long dest; - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); - dest = me; - shmem_barrier_all(); - long new_val = me; - if (me & 1) { - long swapped_val = shmem_atomic_swap(&dest, new_val, (me + 1) % npes); - printf("%d: dest = %ld, swapped = %ld\n", me, dest, swapped_val); - } - shmem_finalize(); - return 0; +int main(void) { + static long dest; + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + dest = mype; + shmem_barrier_all(); + long new_val = mype; + if (mype & 1) { + long swapped_val = shmem_atomic_swap(&dest, new_val, (mype + 1) % npes); + printf("%d: dest = %ld, swapped = %ld\n", mype, dest, swapped_val); + } + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_barrier_example.c b/example_code/shmem_barrier_example.c index 5fa27bec4..32aea43db 100644 --- a/example_code/shmem_barrier_example.c +++ b/example_code/shmem_barrier_example.c @@ -1,24 +1,23 @@ -#include #include +#include -int main(void) -{ - static int x = 10101; - static long pSync[SHMEM_BARRIER_SYNC_SIZE]; - for (int i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) - pSync[i] = SHMEM_SYNC_VALUE; +int main(void) { + static int x = 10101; + static long pSync[SHMEM_BARRIER_SYNC_SIZE]; + for (int i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) + pSync[i] = SHMEM_SYNC_VALUE; - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); - if (me % 2 == 0) { - /* put to next even PE in a circular fashion */ - shmem_p(&x, 4, (me + 2) % npes); - /* synchronize all even pes */ - shmem_barrier(0, 1, (npes / 2 + npes % 2), pSync); - } - printf("%d: x = %d\n", me, x); - shmem_finalize(); - return 0; + if (mype % 2 == 0) { + /* put to next even PE in a circular fashion */ + shmem_p(&x, 4, (mype + 2) % npes); + /* synchronize all even pes */ + shmem_barrier(0, 1, (npes / 2 + npes % 2), pSync); + } + printf("%d: x = %d\n", mype, x); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_barrierall_example.c b/example_code/shmem_barrierall_example.c index bf23be573..cadc93c02 100644 --- a/example_code/shmem_barrierall_example.c +++ b/example_code/shmem_barrierall_example.c @@ -1,20 +1,19 @@ -#include #include +#include -int main(void) -{ - static int x = 1010; +int main(void) { + static int x = 1010; - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); - /* put to next PE in a circular fashion */ - shmem_p(&x, 4, (me + 1) % npes); + /* put to next PE in a circular fashion */ + shmem_p(&x, 4, (mype + 1) % npes); - /* synchronize all PEs */ - shmem_barrier_all(); - printf("%d: x = %d\n", me, x); - shmem_finalize(); - return 0; + /* synchronize all PEs */ + shmem_barrier_all(); + printf("%d: x = %d\n", mype, x); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_broadcast_example.c b/example_code/shmem_broadcast_example.c index 8c0b84037..a88c2084c 100644 --- a/example_code/shmem_broadcast_example.c +++ b/example_code/shmem_broadcast_example.c @@ -1,22 +1,21 @@ +#include #include #include -#include -int main(void) -{ - static long source[4], dest[4]; +int main(void) { + static long source[4], dest[4]; - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); - if (me == 0) - for (int i = 0; i < 4; i++) - source[i] = i; + if (mype == 0) + for (int i = 0; i < 4; i++) + source[i] = i; - shmem_broadcast(SHMEM_TEAM_WORLD, dest, source, 4, 0); + shmem_broadcast(SHMEM_TEAM_WORLD, dest, source, 4, 0); - printf("%d: %ld, %ld, %ld, %ld\n", me, dest[0], dest[1], dest[2], dest[3]); - shmem_finalize(); - return 0; + printf("%d: %ld, %ld, %ld, %ld\n", mype, dest[0], dest[1], dest[2], dest[3]); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_collect_example.c b/example_code/shmem_collect_example.c index 9eb569627..b3329c9fd 100644 --- a/example_code/shmem_collect_example.c +++ b/example_code/shmem_collect_example.c @@ -1,36 +1,35 @@ +#include #include #include -#include -int main(void) -{ - static long lock = 0; +int main(void) { + static long lock = 0; - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); - int my_nelem = me + 1; /* linearly increasing number of elements with PE */ - int total_nelem = (npes * (npes + 1)) / 2; + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + int my_nelem = mype + 1; /* linearly increasing number of elements with PE */ + int total_nelem = (npes * (npes + 1)) / 2; - int* source = (int*) shmem_malloc(npes*sizeof(int)); /* symmetric alloc */ - int* dest = (int*) shmem_malloc(total_nelem*sizeof(int)); + int *source = (int *)shmem_malloc(npes * sizeof(int)); /* symmetric alloc */ + int *dest = (int *)shmem_malloc(total_nelem * sizeof(int)); - for (int i = 0; i < my_nelem; i++) - source[i] = (me * (me + 1)) / 2 + i; - for (int i = 0; i < total_nelem; i++) - dest[i] = -9999; + for (int i = 0; i < my_nelem; i++) + source[i] = (mype * (mype + 1)) / 2 + i; + for (int i = 0; i < total_nelem; i++) + dest[i] = -9999; - /* Wait for all PEs to initialize source/dest: */ - shmem_team_sync(SHMEM_TEAM_WORLD); + /* Wait for all PEs to initialize source/dest: */ + shmem_team_sync(SHMEM_TEAM_WORLD); - shmem_int_collect(SHMEM_TEAM_WORLD, dest, source, my_nelem); + shmem_int_collect(SHMEM_TEAM_WORLD, dest, source, my_nelem); - shmem_set_lock(&lock); /* Lock prevents interleaving printfs */ - printf("%d: %d", me, dest[0]); - for (int i = 1; i < total_nelem; i++) - printf(", %d", dest[i]); - printf("\n"); - shmem_clear_lock(&lock); - shmem_finalize(); - return 0; + shmem_set_lock(&lock); /* Lock prevents interleaving printfs */ + printf("%d: %d", mype, dest[0]); + for (int i = 1; i < total_nelem; i++) + printf(", %d", dest[i]); + printf("\n"); + shmem_clear_lock(&lock); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_ctx.c b/example_code/shmem_ctx.c index 61837f8f5..b122e874a 100644 --- a/example_code/shmem_ctx.c +++ b/example_code/shmem_ctx.c @@ -1,56 +1,57 @@ -#include #include +#include long pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; long psync[SHMEM_REDUCE_SYNC_SIZE]; -long task_cntr = 0; /* Next task counter */ +long task_cntr = 0; /* Next task counter */ long tasks_done = 0; /* Tasks done by this PE */ long total_done = 0; /* Total tasks done by all PEs */ int main(void) { - int tl, i; - long ntasks = 1024; /* Total tasks per PE */ - - for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) - psync[i] = SHMEM_SYNC_VALUE; - - shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); - if (tl != SHMEM_THREAD_MULTIPLE) shmem_global_exit(1); - - int me = shmem_my_pe(); - int npes = shmem_n_pes(); - -#pragma omp parallel reduction (+:tasks_done) - { - shmem_ctx_t ctx; - int task_pe = me, pes_done = 0; - int ret = shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx); - - if (ret != 0) { - printf("%d: Error creating context (%d)\n", me, ret); - shmem_global_exit(2); - } - - /* Process tasks on all PEs, starting with the local PE. After - * all tasks on a PE are completed, help the next PE. */ - while (pes_done < npes) { - long task = shmem_atomic_fetch_inc(ctx, &task_cntr, task_pe); - while (task < ntasks) { - /* Perform task (task_pe, task) */ - tasks_done++; - task = shmem_atomic_fetch_inc(ctx, &task_cntr, task_pe); - } - pes_done++; - task_pe = (task_pe + 1) % npes; - } - - shmem_ctx_destroy(ctx); + int tl, i; + long ntasks = 1024; /* Total tasks per PE */ + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) + psync[i] = SHMEM_SYNC_VALUE; + + shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + if (tl != SHMEM_THREAD_MULTIPLE) + shmem_global_exit(1); + + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + +#pragma omp parallel reduction(+ : tasks_done) + { + shmem_ctx_t ctx; + int task_pe = mype, pes_done = 0; + int ret = shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx); + + if (ret != 0) { + printf("%d: Error creating context (%d)\n", mype, ret); + shmem_global_exit(2); } - shmem_long_sum_to_all(&total_done, &tasks_done, 1, 0, 0, npes, pwrk, psync); + /* Process tasks on all PEs, starting with the local PE. After + * all tasks on a PE are completed, help the next PE. */ + while (pes_done < npes) { + long task = shmem_atomic_fetch_inc(ctx, &task_cntr, task_pe); + while (task < ntasks) { + /* Perform task (task_pe, task) */ + tasks_done++; + task = shmem_atomic_fetch_inc(ctx, &task_cntr, task_pe); + } + pes_done++; + task_pe = (task_pe + 1) % npes; + } + + shmem_ctx_destroy(ctx); + } + + shmem_long_sum_to_all(&total_done, &tasks_done, 1, 0, 0, npes, pwrk, psync); - int result = (total_done != ntasks * npes); - shmem_finalize(); - return result; + int result = (total_done != ntasks * npes); + shmem_finalize(); + return result; } diff --git a/example_code/shmem_ctx_invalid.c b/example_code/shmem_ctx_invalid.c index d7962d3fb..9e6226135 100644 --- a/example_code/shmem_ctx_invalid.c +++ b/example_code/shmem_ctx_invalid.c @@ -1,13 +1,12 @@ -#include -#include #include +#include +#include _Thread_local shmem_ctx_t thread_ctx = SHMEM_CTX_INVALID; void lib_thread_register(void) { if (thread_ctx == SHMEM_CTX_INVALID) - if (shmem_ctx_create(SHMEM_CTX_PRIVATE, &thread_ctx) && - shmem_ctx_create( 0, &thread_ctx)) + if (shmem_ctx_create(SHMEM_CTX_PRIVATE, &thread_ctx) && shmem_ctx_create(0, &thread_ctx)) thread_ctx = SHMEM_CTX_DEFAULT; } @@ -29,13 +28,13 @@ int main() { if (provided != SHMEM_THREAD_MULTIPLE) shmem_global_exit(2); - const int my_pe = shmem_my_pe(); - const int n_pes = shmem_n_pes(); + const int mype = shmem_my_pe(); + const int npes = shmem_n_pes(); const int count = 1 << 15; - int *src_bufs[n_pes]; - int *dst_bufs[n_pes]; - for (int i = 0; i < n_pes; i++) { + int *src_bufs[npes]; + int *dst_bufs[npes]; + for (int i = 0; i < npes; i++) { src_bufs[i] = shmem_calloc(count, sizeof(*src_bufs[i])); if (src_bufs[i] == NULL) shmem_global_exit(3); @@ -48,16 +47,15 @@ int main() { { int my_thrd = omp_get_thread_num(); #pragma omp for - for (int i = 0; i < n_pes; i++) + for (int i = 0; i < npes; i++) for (int j = 0; j < count; j++) - src_bufs[i][j] = (my_pe << 10) + my_thrd; + src_bufs[i][j] = (mype << 10) + my_thrd; lib_thread_register(); #pragma omp for - for (int i = 0; i < n_pes; i++) - lib_thread_putmem(dst_bufs[my_pe], src_bufs[i], - count * sizeof(*src_bufs[i]), i); + for (int i = 0; i < npes; i++) + lib_thread_putmem(dst_bufs[mype], src_bufs[i], count * sizeof(*src_bufs[i]), i); lib_thread_unregister(); } diff --git a/example_code/shmem_ctx_pipelined_reduce.c b/example_code/shmem_ctx_pipelined_reduce.c index c21d7ff89..c235af059 100644 --- a/example_code/shmem_ctx_pipelined_reduce.c +++ b/example_code/shmem_ctx_pipelined_reduce.c @@ -1,56 +1,59 @@ +#include #include #include -#include -#define LEN 8192 /* Full buffer length */ -#define PLEN 512 /* Length of each pipeline stage */ +#define LEN 8192 /* Full buffer length */ +#define PLEN 512 /* Length of each pipeline stage */ int in_buf[LEN], out_buf[LEN]; int main(void) { - int i, j, *pbuf[2]; - shmem_ctx_t ctx[2]; - - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); - - pbuf[0] = shmem_malloc(PLEN * npes * sizeof(int)); - pbuf[1] = shmem_malloc(PLEN * npes * sizeof(int)); - - int ret_0 = shmem_ctx_create(0, &ctx[0]); - int ret_1 = shmem_ctx_create(0, &ctx[1]); - if (ret_0 || ret_1) shmem_global_exit(1); - - for (i = 0; i < LEN; i++) { - in_buf[i] = me; out_buf[i] = 0; - } - - int p_idx = 0, p = 0; /* Index of ctx and pbuf (p_idx) for current pipeline stage (p) */ + int i, j, *pbuf[2]; + shmem_ctx_t ctx[2]; + + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + + pbuf[0] = shmem_malloc(PLEN * npes * sizeof(int)); + pbuf[1] = shmem_malloc(PLEN * npes * sizeof(int)); + + int ret_0 = shmem_ctx_create(0, &ctx[0]); + int ret_1 = shmem_ctx_create(0, &ctx[1]); + if (ret_0 || ret_1) + shmem_global_exit(1); + + for (i = 0; i < LEN; i++) { + in_buf[i] = mype; + out_buf[i] = 0; + } + + int p_idx = 0, p = 0; /* Index of ctx and pbuf (p_idx) for current pipeline stage (p) */ + for (i = 1; i <= npes; i++) + shmem_put_nbi(ctx[p_idx], &pbuf[p_idx][PLEN * mype], &in_buf[PLEN * p], PLEN, + (mype + i) % npes); + + /* Issue communication for pipeline stage p, then accumulate results for stage + * p-1 */ + for (p = 1; p < LEN / PLEN; p++) { + p_idx ^= 1; for (i = 1; i <= npes; i++) - shmem_put_nbi(ctx[p_idx], &pbuf[p_idx][PLEN*me], &in_buf[PLEN*p], - PLEN, (me+i) % npes); - - /* Issue communication for pipeline stage p, then accumulate results for stage p-1 */ - for (p = 1; p < LEN/PLEN; p++) { - p_idx ^= 1; - for (i = 1; i <= npes; i++) - shmem_put_nbi(ctx[p_idx], &pbuf[p_idx][PLEN*me], &in_buf[PLEN*p], - PLEN, (me+i) % npes); - - shmem_ctx_quiet(ctx[p_idx^1]); - shmem_sync_all(); - for (i = 0; i < npes; i++) - for (j = 0; j < PLEN; j++) - out_buf[PLEN*(p-1)+j] += pbuf[p_idx^1][PLEN*i+j]; - } - - shmem_ctx_quiet(ctx[p_idx]); + shmem_put_nbi(ctx[p_idx], &pbuf[p_idx][PLEN * mype], &in_buf[PLEN * p], PLEN, + (mype + i) % npes); + + shmem_ctx_quiet(ctx[p_idx ^ 1]); shmem_sync_all(); for (i = 0; i < npes; i++) - for (j = 0; j < PLEN; j++) - out_buf[PLEN*(p-1)+j] += pbuf[p_idx][PLEN*i+j]; - - shmem_finalize(); - return 0; + for (j = 0; j < PLEN; j++) + out_buf[PLEN * (p - 1) + j] += pbuf[p_idx ^ 1][PLEN * i + j]; + } + + shmem_ctx_quiet(ctx[p_idx]); + shmem_sync_all(); + for (i = 0; i < npes; i++) + for (j = 0; j < PLEN; j++) + out_buf[PLEN * (p - 1) + j] += pbuf[p_idx][PLEN * i + j]; + + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_fence_example.c b/example_code/shmem_fence_example.c index 3f72c8fb1..ac34b40dd 100644 --- a/example_code/shmem_fence_example.c +++ b/example_code/shmem_fence_example.c @@ -1,23 +1,22 @@ -#include #include +#include -int main(void) -{ - int src = 99; - long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; - static long dest[10]; - static int targ; - shmem_init(); - int me = shmem_my_pe(); - if (me == 0) { - shmem_put(dest, source, 10, 1); /* put1 */ - shmem_put(dest, source, 10, 2); /* put2 */ - shmem_fence(); - shmem_put(&targ, &src, 1, 1); /* put3 */ - shmem_put(&targ, &src, 1, 2); /* put4 */ - } - shmem_barrier_all(); /* sync sender and receiver */ - printf("dest[0] on PE %d is %ld\n", me, dest[0]); - shmem_finalize(); - return 0; +int main(void) { + int src = 99; + long source[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + static long dest[10]; + static int targ; + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 0) { + shmem_put(dest, source, 10, 1); /* put1 */ + shmem_put(dest, source, 10, 2); /* put2 */ + shmem_fence(); + shmem_put(&targ, &src, 1, 1); /* put3 */ + shmem_put(&targ, &src, 1, 2); /* put4 */ + } + shmem_barrier_all(); /* sync sender and receiver */ + printf("dest[0] on PE %d is %ld\n", mype, dest[0]); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_finalize_example.c b/example_code/shmem_finalize_example.c index 26d7860ea..370c2b26c 100644 --- a/example_code/shmem_finalize_example.c +++ b/example_code/shmem_finalize_example.c @@ -1,20 +1,19 @@ +#include #include -#include -int main(void) -{ - static long x = 10101; - long y = -1; +int main(void) { + static long x = 10101; + long y = -1; - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); - if (me == 0) - y = shmem_g(&x, npes-1); + if (mype == 0) + y = shmem_g(&x, npes - 1); - printf("%d: y = %ld\n", me, y); + printf("%d: y = %ld\n", mype, y); - shmem_finalize(); - return 0; + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_g_example.c b/example_code/shmem_g_example.c index f1f49ee38..9e4e530df 100644 --- a/example_code/shmem_g_example.c +++ b/example_code/shmem_g_example.c @@ -1,16 +1,15 @@ -#include #include +#include -int main(void) -{ - long y = -1; - static long x = 10101; - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); - if (me == 0) - y = shmem_g(&x, npes-1); - printf("%d: y = %ld\n", me, y); - shmem_finalize(); - return 0; +int main(void) { + long y = -1; + static long x = 10101; + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + if (mype == 0) + y = shmem_g(&x, npes - 1); + printf("%d: y = %ld\n", mype, y); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_global_exit_example.c b/example_code/shmem_global_exit_example.c index a3c21e379..e01360764 100644 --- a/example_code/shmem_global_exit_example.c +++ b/example_code/shmem_global_exit_example.c @@ -1,18 +1,17 @@ +#include #include #include -#include -int main(void) -{ - shmem_init(); - int me = shmem_my_pe(); - if (me == 0) { - FILE *fp = fopen("input.txt", "r"); - if (fp == NULL) { /* Input file required by program is not available */ - shmem_global_exit(EXIT_FAILURE); - } - /* do something with the file */ - fclose(fp); +int main(void) { + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 0) { + FILE *fp = fopen("input.txt", "r"); + if (fp == NULL) { /* Input file required by program is not available */ + shmem_global_exit(EXIT_FAILURE); + } + /* do something with the file */ + fclose(fp); } shmem_finalize(); return 0; diff --git a/example_code/shmem_init_example.c b/example_code/shmem_init_example.c index e3192e9ff..13b63df4d 100644 --- a/example_code/shmem_init_example.c +++ b/example_code/shmem_init_example.c @@ -1,23 +1,23 @@ -#include #include +#include int main(void) { - static int targ = 0; + static int targ = 0; - shmem_init(); - int me = shmem_my_pe(); - int receiver = 1 % shmem_n_pes(); + shmem_init(); + int mype = shmem_my_pe(); + int receiver = 1 % shmem_n_pes(); - if (me == 0) { - int src = 33; - shmem_put(&targ, &src, 1, receiver); - } + if (mype == 0) { + int src = 33; + shmem_put(&targ, &src, 1, receiver); + } - shmem_barrier_all(); /* Synchronizes sender and receiver */ + shmem_barrier_all(); /* Synchronizes sender and receiver */ - if (me == receiver) - printf("PE %d targ=%d (expect 33)\n", me, targ); + if (mype == receiver) + printf("PE %d targ=%d (expect 33)\n", mype, targ); - shmem_finalize(); - return 0; + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_iput_example.c b/example_code/shmem_iput_example.c index 101533563..09f8c69c9 100644 --- a/example_code/shmem_iput_example.c +++ b/example_code/shmem_iput_example.c @@ -1,19 +1,18 @@ -#include #include +#include -int main(void) -{ - short source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; - static short dest[10]; - shmem_init(); - int me = shmem_my_pe(); - if (me == 0) /* put 5 elements into dest on PE 1 */ - shmem_iput(dest, source, 1, 2, 5, 1); - shmem_barrier_all(); /* sync sender and receiver */ - if (me == 1) { - printf("dest on PE %d is %hd %hd %hd %hd %hd\n", me, - dest[0], dest[1], dest[2], dest[3], dest[4]); - } - shmem_finalize(); - return 0; +int main(void) { + short source[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + static short dest[10]; + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 0) /* put 5 elements into dest on PE 1 */ + shmem_iput(dest, source, 1, 2, 5, 1); + shmem_barrier_all(); /* sync sender and receiver */ + if (mype == 1) { + printf("dest on PE %d is %hd %hd %hd %hd %hd\n", mype, dest[0], dest[1], dest[2], + dest[3], dest[4]); + } + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_lock_example.c b/example_code/shmem_lock_example.c index ee0835413..57624144d 100644 --- a/example_code/shmem_lock_example.c +++ b/example_code/shmem_lock_example.c @@ -1,18 +1,17 @@ -#include #include +#include -int main(void) -{ - static long lock = 0; - static int count = 0; - shmem_init(); - int me = shmem_my_pe(); - shmem_set_lock(&lock); - int val = shmem_g(&count, 0); /* get count value on PE 0 */ - printf("%d: count is %d\n", me, val); - val++; /* incrementing and updating count on PE 0 */ - shmem_p(&count, val, 0); - shmem_clear_lock(&lock); /* ensures count update has completed before clearing the lock */ - shmem_finalize(); - return 0; +int main(void) { + static long lock = 0; + static int count = 0; + shmem_init(); + int mype = shmem_my_pe(); + shmem_set_lock(&lock); + int val = shmem_g(&count, 0); /* get count value on PE 0 */ + printf("%d: count is %d\n", mype, val); + val++; /* incrementing and updating count on PE 0 */ + shmem_p(&count, val, 0); + shmem_clear_lock(&lock); /* ensures count update has completed before clearing the lock */ + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_npes_example.c b/example_code/shmem_npes_example.c index 6c6812841..d4158950c 100644 --- a/example_code/shmem_npes_example.c +++ b/example_code/shmem_npes_example.c @@ -1,12 +1,11 @@ -#include #include +#include -int main(void) -{ - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); - printf("I am #%d of %d PEs executing this program\n", me, npes); - shmem_finalize(); - return 0; +int main(void) { + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + printf("I am #%d of %d PEs executing this program\n", mype, npes); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_p_example.c b/example_code/shmem_p_example.c index 8ebfd4e61..e7bb97968 100644 --- a/example_code/shmem_p_example.c +++ b/example_code/shmem_p_example.c @@ -1,19 +1,18 @@ -#include #include #include +#include -int main(void) -{ - const double e = 2.71828182; - const double epsilon = 0.00000001; - static double f = 3.1415927; - shmem_init(); - int me = shmem_my_pe(); - if (me == 0) - shmem_p(&f, e, 1); - shmem_barrier_all(); - if (me == 1) - printf("%s\n", (fabs(f - e) < epsilon) ? "OK" : "FAIL"); - shmem_finalize(); - return 0; +int main(void) { + const double e = 2.71828182; + const double epsilon = 0.00000001; + static double f = 3.1415927; + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 0) + shmem_p(&f, e, 1); + shmem_barrier_all(); + if (mype == 1) + printf("%s\n", (fabs(f - e) < epsilon) ? "OK" : "FAIL"); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_ptr_example.c b/example_code/shmem_ptr_example.c index 9ac26ca4d..6f4d0a2c6 100644 --- a/example_code/shmem_ptr_example.c +++ b/example_code/shmem_ptr_example.c @@ -1,23 +1,21 @@ -#include #include +#include -int main(void) -{ - static int dest[4]; - shmem_init(); - int me = shmem_my_pe(); - if (me == 0) { /* initialize PE 1's dest array */ - int* ptr = shmem_ptr(dest, 1); - if (ptr == NULL) - printf("can't use pointer to directly access PE 1's dest array\n"); - else - for (int i = 0; i < 4; i++) - *ptr++ = i + 1; - } - shmem_barrier_all(); - if (me == 1) - printf("PE 1 dest: %d, %d, %d, %d\n", - dest[0], dest[1], dest[2], dest[3]); - shmem_finalize(); - return 0; +int main(void) { + static int dest[4]; + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 0) { /* initialize PE 1's dest array */ + int *ptr = shmem_ptr(dest, 1); + if (ptr == NULL) + printf("can't use pointer to directly access PE 1's dest array\n"); + else + for (int i = 0; i < 4; i++) + *ptr++ = i + 1; + } + shmem_barrier_all(); + if (mype == 1) + printf("PE 1 dest: %d, %d, %d, %d\n", dest[0], dest[1], dest[2], dest[3]); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_put_example.c b/example_code/shmem_put_example.c index a5f6ffb44..84990c6cc 100644 --- a/example_code/shmem_put_example.c +++ b/example_code/shmem_put_example.c @@ -1,16 +1,15 @@ -#include #include +#include -int main(void) -{ - long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; - static long dest[10]; - shmem_init(); - int me = shmem_my_pe(); - if (me == 0) /* put 10 words into dest on PE 1 */ - shmem_put(dest, source, 10, 1); - shmem_barrier_all(); /* sync sender and receiver */ - printf("dest[0] on PE %d is %ld\n", me, dest[0]); - shmem_finalize(); - return 0; +int main(void) { + long source[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + static long dest[10]; + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 0) /* put 10 words into dest on PE 1 */ + shmem_put(dest, source, 10, 1); + shmem_barrier_all(); /* sync sender and receiver */ + printf("dest[0] on PE %d is %ld\n", mype, dest[0]); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_put_signal_example.c b/example_code/shmem_put_signal_example.c index 179da6d26..9e463f542 100644 --- a/example_code/shmem_put_signal_example.c +++ b/example_code/shmem_put_signal_example.c @@ -3,35 +3,35 @@ #include #include -int main(void) -{ - int i, err_count = 0; +int main(void) { + int i, err_count = 0; - shmem_init(); + shmem_init(); - size_t size = 2048; - int me = shmem_my_pe(); - int n = shmem_n_pes(); - int pe = (me + 1) % n; - uint64_t * message = malloc(size * sizeof(uint64_t)); - static uint64_t sig_addr = 0; + size_t size = 2048; + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + int pe = (mype + 1) % npes; + uint64_t *message = malloc(size * sizeof(uint64_t)); + static uint64_t sig_addr = 0; - for (i = 0; i < size; i++) { - message[i] = me; - } + for (i = 0; i < size; i++) { + message[i] = mype; + } - uint64_t *data = shmem_calloc(size, sizeof(uint64_t)); + uint64_t *data = shmem_calloc(size, sizeof(uint64_t)); - if (me == 0) { - shmem_put_signal(data, message, size, &sig_addr, 1, SHMEM_SIGNAL_SET, pe); - } else { - shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); - shmem_put_signal(data, data, size, &sig_addr, 1, SHMEM_SIGNAL_SET, pe); - } + if (mype == 0) { + shmem_put_signal(data, message, size, &sig_addr, 1, SHMEM_SIGNAL_SET, pe); + } + else { + shmem_wait_until(&sig_addr, SHMEM_CMP_EQ, 1); + shmem_put_signal(data, data, size, &sig_addr, 1, SHMEM_SIGNAL_SET, pe); + } - free(message); - shmem_free(data); + free(message); + shmem_free(data); - shmem_finalize(); - return 0; + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_quiet_example.c b/example_code/shmem_quiet_example.c index a23163eb0..c414193fe 100644 --- a/example_code/shmem_quiet_example.c +++ b/example_code/shmem_quiet_example.c @@ -1,27 +1,26 @@ -#include #include +#include -int main(void) -{ - static long dest[3]; - static long source[3] = { 1, 2, 3 }; - static int targ; - static int src = 90; - long x[3] = { 0 }; - int y = 0; - shmem_init(); - int me = shmem_my_pe(); - if (me == 0) { - shmem_put(dest, source, 3, 1); /* put1 */ - shmem_put(&targ, &src, 1, 2); /* put2 */ - shmem_quiet(); - shmem_get(x, dest, 3, 1); /* gets updated value from dest on PE 1 to local array x */ - shmem_get(&y, &targ, 1, 2); /* gets updated value from targ on PE 2 to local variable y */ - printf("x: { %ld, %ld, %ld }\n", x[0], x[1], x[2]); /* x: { 1, 2, 3 } */ - printf("y: %d\n", y); /* y: 90 */ - shmem_put(&targ, &src, 1, 1); /* put3 */ - shmem_put(&targ, &src, 1, 2); /* put4 */ - } - shmem_finalize(); - return 0; +int main(void) { + static long dest[3]; + static long source[3] = {1, 2, 3}; + static int targ; + static int src = 90; + long x[3] = {0}; + int y = 0; + shmem_init(); + int mype = shmem_my_pe(); + if (mype == 0) { + shmem_put(dest, source, 3, 1); /* put1 */ + shmem_put(&targ, &src, 1, 2); /* put2 */ + shmem_quiet(); + shmem_get(x, dest, 3, 1); /* get updated array from dest on PE 1 to local array x */ + shmem_get(&y, &targ, 1, 2); /* get updated value from targ on PE 2 to local variable y */ + printf("x: { %ld, %ld, %ld }\n", x[0], x[1], x[2]); /* x: { 1, 2, 3 } */ + printf("y: %d\n", y); /* y: 90 */ + shmem_put(&targ, &src, 1, 1); /* put3 */ + shmem_put(&targ, &src, 1, 2); /* put4 */ + } + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_reduce_example.c b/example_code/shmem_reduce_example.c index 24f8cbb2b..7d5b28b16 100644 --- a/example_code/shmem_reduce_example.c +++ b/example_code/shmem_reduce_example.c @@ -1,6 +1,6 @@ +#include #include #include -#include /* As if we receive some value from external source */ long recv_a_value(unsigned seed, int npes) { @@ -10,16 +10,14 @@ long recv_a_value(unsigned seed, int npes) { /* Validate the value we recieved */ unsigned char is_valid(long value, int npes) { - if (value == (npes-1)) + if (value == (npes - 1)) return 0; return 1; } -int main(void) -{ - +int main(void) { shmem_init(); - int me = shmem_my_pe(); + int mype = shmem_my_pe(); int npes = shmem_n_pes(); size_t num = 32; @@ -29,11 +27,11 @@ int main(void) unsigned char *valid_me = shmem_malloc(num * sizeof(unsigned char)); unsigned char *valid_all = shmem_malloc(num * sizeof(unsigned char)); - values[0] = recv_a_value((unsigned)me, npes); + values[0] = recv_a_value((unsigned)mype, npes); valid_me[0] = is_valid(values[0], npes); - for (int i=1; i < num; i++) { - values[i] = recv_a_value((unsigned)values[i-1], npes); + for (int i = 1; i < num; i++) { + values[i] = recv_a_value((unsigned)values[i - 1], npes); valid_me[i] = is_valid(values[i], npes); } @@ -43,12 +41,12 @@ int main(void) shmem_and_reduce(SHMEM_TEAM_WORLD, valid_all, valid_me, num); shmem_sum_reduce(SHMEM_TEAM_WORLD, sums, values, num); - for (int i=0; i < num; i++) { + for (int i = 0; i < num; i++) { if (valid_all[i]) { - printf ("[%d] = %ld\n", i, sums[i]); + printf("[%d] = %ld\n", i, sums[i]); } else { - printf ("[%d] = invalid on one or more pe\n", i); + printf("[%d] = invalid on one or more pe\n", i); } } diff --git a/example_code/shmem_sync_example.c b/example_code/shmem_sync_example.c index 9f8f7fcf5..f551537a4 100644 --- a/example_code/shmem_sync_example.c +++ b/example_code/shmem_sync_example.c @@ -1,60 +1,61 @@ -#include #include +#include -int main(void) -{ - static int x = 10101; - - shmem_team_t twos_team = SHMEM_TEAM_INVALID; - shmem_team_t threes_team = SHMEM_TEAM_INVALID; - shmem_team_config_t *config; - - shmem_init(); - config = NULL; - int me = shmem_my_pe(); - int npes = shmem_n_pes(); - - if (npes > 2) - shmem_team_split_strided(SHMEM_TEAM_WORLD, 2, 2, (npes-1) / 2, config, - 0, &twos_team); - - if (npes > 3) - shmem_team_split_strided(SHMEM_TEAM_WORLD, 3, 3, (npes-1) / 3, config, - 0, &threes_team); - - int my_pe_twos = shmem_team_my_pe(twos_team); - int my_pe_threes = shmem_team_my_pe(threes_team); - int npes_twos = shmem_team_n_pes(twos_team); - int npes_threes = shmem_team_n_pes(threes_team); - - if (twos_team != SHMEM_TEAM_INVALID) { - /* put the value 2 to the next team member in a circular fashion */ - shmem_p(&x, 2, shmem_team_translate_pe(twos_team, (my_pe_twos + 1) % - npes_twos, SHMEM_TEAM_WORLD)); - shmem_quiet(); - shmem_sync(twos_team); - } - - shmem_sync(SHMEM_TEAM_WORLD); - - if (threes_team != SHMEM_TEAM_INVALID) { - /* put the value 3 to the next team member in a circular fashion */ - shmem_p(&x, 3, shmem_team_translate_pe(threes_team, (my_pe_threes + 1) % - npes_threes, SHMEM_TEAM_WORLD)); - shmem_quiet(); - shmem_sync(threes_team); - } - - if (me && me % 3 == 0) { - if (x != 3) shmem_global_exit(3); - } - else if (me && me % 2 == 0) { - if (x != 2) shmem_global_exit(2); - } - else if (x != 10101) { - shmem_global_exit(1); - } - - shmem_finalize(); - return 0; +int main(void) { + static int x = 10101; + + shmem_team_t twos_team = SHMEM_TEAM_INVALID; + shmem_team_t threes_team = SHMEM_TEAM_INVALID; + shmem_team_config_t *config = NULL; + + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + + if (npes > 2) + shmem_team_split_strided(SHMEM_TEAM_WORLD, 2, 2, (npes - 1) / 2, config, 0, &twos_team); + + if (npes > 3) + shmem_team_split_strided(SHMEM_TEAM_WORLD, 3, 3, (npes - 1) / 3, config, 0, + &threes_team); + + int mype_twos = shmem_team_my_pe(twos_team); + int mype_threes = shmem_team_my_pe(threes_team); + int npes_twos = shmem_team_n_pes(twos_team); + int npes_threes = shmem_team_n_pes(threes_team); + + if (twos_team != SHMEM_TEAM_INVALID) { + /* put the value 2 to the next team member in a circular fashion */ + shmem_p( + &x, 2, + shmem_team_translate_pe(twos_team, (mype_twos + 1) % npes_twos, SHMEM_TEAM_WORLD)); + shmem_quiet(); + shmem_sync(twos_team); + } + + shmem_sync(SHMEM_TEAM_WORLD); + + if (threes_team != SHMEM_TEAM_INVALID) { + /* put the value 3 to the next team member in a circular fashion */ + shmem_p(&x, 3, + shmem_team_translate_pe(threes_team, (mype_threes + 1) % npes_threes, + SHMEM_TEAM_WORLD)); + shmem_quiet(); + shmem_sync(threes_team); + } + + if (mype && mype % 3 == 0) { + if (x != 3) + shmem_global_exit(3); + } + else if (mype && mype % 2 == 0) { + if (x != 2) + shmem_global_exit(2); + } + else if (x != 10101) { + shmem_global_exit(1); + } + + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_team_context.c b/example_code/shmem_team_context.c index 5b1e07ffc..0e3559946 100644 --- a/example_code/shmem_team_context.c +++ b/example_code/shmem_team_context.c @@ -3,8 +3,7 @@ int isum, ival; -int my_ctx_translate_pe(shmem_ctx_t src_ctx, int src_pe, shmem_ctx_t dest_ctx) -{ +int my_ctx_translate_pe(shmem_ctx_t src_ctx, int src_pe, shmem_ctx_t dest_ctx) { if (src_ctx == SHMEM_CTX_INVALID) { return -1; } @@ -25,33 +24,29 @@ shmem_ctx_t my_team_create_ctx(shmem_team_t team) { shmem_ctx_t ctx; if (shmem_team_create_ctx(team, 0, &ctx) != 0) { - fprintf (stderr, "Failed to create context for PE team\n"); + fprintf(stderr, "Failed to create context for PE team\n"); return SHMEM_CTX_INVALID; } return ctx; } -void my_send_to_neighbor(shmem_ctx_t ctx, int *val) -{ +void my_send_to_neighbor(shmem_ctx_t ctx, int *val) { if (ctx == SHMEM_CTX_INVALID) { - fprintf (stderr, "Send to neighbor fail due to invalid context\n"); + fprintf(stderr, "Send to neighbor fail due to invalid context\n"); return; } shmem_team_t team; shmem_ctx_get_team(ctx, &team); - int pe = shmem_team_my_pe(team); - int npes = shmem_team_n_pes(team); - int rpe = (pe + 1) % npes; + int team_mype = shmem_team_my_pe(team); + int team_npes = shmem_team_n_pes(team); + int rpe = (team_mype + 1) % team_npes; // put my pe number in the buffer on my right hand neighbor shmem_ctx_int_put(ctx, val, &pe, 1, rpe); } - - -int main() -{ +int main() { shmem_init(); int npes = shmem_n_pes(); @@ -59,8 +54,7 @@ int main() shmem_team_t team_2s, team_3s; shmem_ctx_t ctx_2s, ctx_3s; - shmem_team_config_t conf; - conf.num_contexts = 1; + shmem_team_config_t conf = {.num_contexts = 1}; long cmask = SHMEM_TEAM_NUM_CONTEXTS; // Create team with PEs numbered 0, 2, 4, ... @@ -85,7 +79,7 @@ int main() int pe4_of_3s_in_2s = my_ctx_translate_pe(ctx_3s, 4, ctx_2s); if (pe4_of_3s_in_2s < 0) { - fprintf (stderr, "Fail to translate pe 4 from 3s context to 2s context\n"); + fprintf(stderr, "Fail to translate pe 4 from 3s context to 2s context\n"); } else { // Add up the results on pe 4 of the 3s team, using the 2s team context @@ -98,7 +92,7 @@ int main() shmem_team_sync(SHMEM_TEAM_WORLD); if (shmem_team_my_pe(team_3s) == 4) { - printf ("The total value on PE 4 of the 3s team is %d\n", isum); + printf("The total value on PE 4 of the 3s team is %d\n", isum); } // Destroy contexts before teams @@ -109,4 +103,5 @@ int main() shmem_team_destroy(team_3s); shmem_finalize(); + return 0; } diff --git a/example_code/shmem_team_split_2D.c b/example_code/shmem_team_split_2D.c index 2059f3279..08a77c15d 100644 --- a/example_code/shmem_team_split_2D.c +++ b/example_code/shmem_team_split_2D.c @@ -1,21 +1,20 @@ -#include #include +#include -int main(void) -{ +int main(void) { int xdim = 3; int ydim = 4; shmem_init(); - int pe = shmem_my_pe(); + int mype = shmem_my_pe(); int npes = shmem_n_pes(); - if (npes < (xdim*ydim)) { - printf ("Not enough PEs to create 4x3xN layout\n"); + if (npes < (xdim * ydim)) { + printf("Not enough PEs to create 4x3xN layout\n"); exit(1); } - int zdim = (npes / (xdim*ydim)) + ( ((npes % (xdim*ydim)) > 0) ? 1 : 0 ); + int zdim = (npes / (xdim * ydim)) + (((npes % (xdim * ydim)) > 0) ? 1 : 0); shmem_team_t xteam, yzteam, yteam, zteam; shmem_team_split_2d(SHMEM_TEAM_WORLD, xdim, NULL, 0, &xteam, NULL, 0, &yzteam); @@ -29,14 +28,17 @@ int main(void) int my_y = shmem_team_my_pe(yteam); int my_z = shmem_team_my_pe(zteam); - for (int zdx = 0; zdx < zdim; zdx++) - for (int ydx = 0; ydx < ydim; ydx++) + for (int zdx = 0; zdx < zdim; zdx++) { + for (int ydx = 0; ydx < ydim; ydx++) { for (int xdx = 0; xdx < xdim; xdx++) { if ((my_x == xdx) && (my_y == ydx) && (my_z == zdx)) { - printf ("(%d, %d, %d) is me = %d\n", my_x, my_y, my_z, pe); + printf("(%d, %d, %d) is mype = %d\n", my_x, my_y, my_z, mype); } shmem_team_sync(SHMEM_TEAM_WORLD); } + } + } shmem_finalize(); + return 0; } diff --git a/example_code/shmem_team_split_strided.c b/example_code/shmem_team_split_strided.c index 42973ba57..d141cb4ef 100644 --- a/example_code/shmem_team_split_strided.c +++ b/example_code/shmem_team_split_strided.c @@ -1,35 +1,26 @@ -/* - * OpenSHMEM shmem_team_split_strided example to create a team of all even - * ranked PEs from SHMEM_TEAM_WORLD - */ - #include #include -int main(int argc, char *argv[]) -{ - int rank, npes; - int t_pe, t_size; - shmem_team_t new_team; - shmem_team_config_t *config; +int main(void) { + shmem_team_t new_team; + shmem_team_config_t *config; - shmem_init(); - config = NULL; - rank = shmem_my_pe(); - npes = shmem_n_pes(); + shmem_init(); + config = NULL; + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); - shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, - &new_team); + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, npes / 2, config, 0, &new_team); - if (new_team != SHMEM_TEAM_INVALID) { - t_size = shmem_team_n_pes(new_team); - t_pe = shmem_team_my_pe(new_team); + if (new_team != SHMEM_TEAM_INVALID) { + int team_npes = shmem_team_n_pes(new_team); + int team_mype = shmem_team_my_pe(new_team); - if ((rank % 2 != 0) || (rank / 2 != t_pe) || (npes / 2 != t_size)) { - shmem_global_exit(1); - } + if ((mype % 2 != 0) || (mype / 2 != team_mype) || (npes / 2 != team_npes)) { + shmem_global_exit(1); } + } - shmem_finalize(); - return 0; + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_team_translate_pe.c b/example_code/shmem_team_translate_pe.c index 15aec0a6e..4d30a75eb 100644 --- a/example_code/shmem_team_translate_pe.c +++ b/example_code/shmem_team_translate_pe.c @@ -1,32 +1,24 @@ -#include #include +#include -int main(void) -{ - int my_pe; - int n_pes; - int t_pe; - int t_global; - shmem_team_t new_team; - shmem_team_config_t *config; - - shmem_init(); - config = NULL; - my_pe = shmem_my_pe(); - n_pes = shmem_n_pes(); +int main(void) { + shmem_init(); + shmem_team_config_t *config = NULL; + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); - shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, (n_pes + 1) / 2, - config, 0, &new_team); + shmem_team_t new_team; + shmem_team_split_strided(SHMEM_TEAM_WORLD, 0, 2, (npes + 1) / 2, config, 0, &new_team); - if (new_team != SHMEM_TEAM_INVALID) { - t_pe = shmem_team_my_pe(new_team); - t_global = shmem_team_translate_pe(new_team, t_pe, SHMEM_TEAM_WORLD); + if (new_team != SHMEM_TEAM_INVALID) { + int team_mype = shmem_team_my_pe(new_team); + int global_mype = shmem_team_translate_pe(new_team, team_mype, SHMEM_TEAM_WORLD); - if (t_global != my_pe) { - shmem_global_exit(1); - } + if (global_mype != mype) { + shmem_global_exit(1); } + } - shmem_finalize(); - return 0; + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_test_any_example.c b/example_code/shmem_test_any_example.c index 64c784c3e..e4c5a6941 100644 --- a/example_code/shmem_test_any_example.c +++ b/example_code/shmem_test_any_example.c @@ -1,8 +1,7 @@ #include #include -int main(void) -{ +int main(void) { shmem_init(); int mype = shmem_my_pe(); int npes = shmem_n_pes(); @@ -11,19 +10,20 @@ int main(void) int *status = calloc(npes, sizeof(int)); for (int i = 0; i < npes; i++) - shmem_atomic_set(&flags[mype], 1, i); + shmem_atomic_set(&flags[mype], 1, i); int ncompleted = 0; size_t completed_idx; while (ncompleted < npes) { - completed_idx = shmem_test_any(flags, npes, status, SHMEM_CMP_EQ, 1); - if (completed_idx != SIZE_MAX) { - ncompleted++; - status[completed_idx] = 1; - } else { - /* Overlap some computation here */ - } + completed_idx = shmem_test_any(flags, npes, status, SHMEM_CMP_EQ, 1); + if (completed_idx != SIZE_MAX) { + ncompleted++; + status[completed_idx] = 1; + } + else { + /* Overlap some computation here */ + } } free(status); diff --git a/example_code/shmem_test_example1.c b/example_code/shmem_test_example1.c index 8c0f5c9e9..8ed1eb0dc 100644 --- a/example_code/shmem_test_example1.c +++ b/example_code/shmem_test_example1.c @@ -1,23 +1,20 @@ -#include #include +#include -int user_wait_any(long *ivar, int count, int cmp, long value) -{ +int user_wait_any(long *ivar, int count, int cmp, long value) { int idx = 0; while (!shmem_test(&ivar[idx], cmp, value)) idx = (idx + 1) % count; return idx; } -int main(void) -{ +int main(void) { shmem_init(); const int mype = shmem_my_pe(); const int npes = shmem_n_pes(); long *wait_vars = shmem_calloc(npes, sizeof(long)); - if (mype == 0) - { + if (mype == 0) { int who = user_wait_any(wait_vars, npes, SHMEM_CMP_NE, 0); printf("PE %d observed first update from PE %d\n", mype, who); } diff --git a/example_code/shmem_test_some_example.c b/example_code/shmem_test_some_example.c index 9c73a6cdc..7c69c06f3 100644 --- a/example_code/shmem_test_some_example.c +++ b/example_code/shmem_test_some_example.c @@ -3,8 +3,7 @@ #define N 100 -int main(void) -{ +int main(void) { int total_sum = 0; shmem_init(); @@ -19,37 +18,38 @@ int main(void) int *status = calloc(npes, sizeof(int)); for (int i = 0; i < N; i++) - my_data[i] = mype*N + i; + my_data[i] = mype * N + i; for (int i = 0; i < npes; i++) - shmem_put_nbi(&all_data[mype*N], my_data, N, i); + shmem_put_nbi(&all_data[mype * N], my_data, N, i); shmem_fence(); for (int i = 0; i < npes; i++) - shmem_atomic_set(&flags[mype], 1, i); + shmem_atomic_set(&flags[mype], 1, i); int ncompleted = 0; while (ncompleted < npes) { - int ntested = shmem_test_some(flags, npes, indices, status, SHMEM_CMP_NE, 0); - if (ntested > 0) { - for (int i = 0; i < ntested; i++) { - for (int j = 0; j < N; j++) { - total_sum += all_data[indices[i]*N + j]; - } - status[indices[i]] = 1; - } - ncompleted += ntested; - } else { - /* Overlap some computation here */ + int ntested = shmem_test_some(flags, npes, indices, status, SHMEM_CMP_NE, 0); + if (ntested > 0) { + for (int i = 0; i < ntested; i++) { + for (int j = 0; j < N; j++) { + total_sum += all_data[indices[i] * N + j]; + } + status[indices[i]] = 1; } + ncompleted += ntested; + } + else { + /* Overlap some computation here */ + } } /* check the result */ int M = N * npes - 1; if (total_sum != M * (M + 1) / 2) { - shmem_global_exit(1); + shmem_global_exit(1); } shmem_finalize(); diff --git a/example_code/shmem_wait_until_all.c b/example_code/shmem_wait_until_all.c index ed2eacaf7..a2e9724f6 100644 --- a/example_code/shmem_wait_until_all.c +++ b/example_code/shmem_wait_until_all.c @@ -1,7 +1,6 @@ #include -int main(void) -{ +int main(void) { shmem_init(); int mype = shmem_my_pe(); int npes = shmem_n_pes(); @@ -10,7 +9,7 @@ int main(void) int *status = NULL; for (int i = 0; i < npes; i++) - shmem_atomic_set(&flags[mype], 1, i); + shmem_atomic_set(&flags[mype], 1, i); shmem_wait_until_all(flags, npes, status, SHMEM_CMP_EQ, 1); diff --git a/example_code/shmem_wait_until_any_all2all_sum.c b/example_code/shmem_wait_until_any_all2all_sum.c index 3317fec6a..ddaee5f5b 100644 --- a/example_code/shmem_wait_until_any_all2all_sum.c +++ b/example_code/shmem_wait_until_any_all2all_sum.c @@ -3,8 +3,7 @@ #define N 100 -int main(void) -{ +int main(void) { int total_sum = 0; shmem_init(); @@ -18,28 +17,28 @@ int main(void) int *status = calloc(npes, sizeof(int)); for (int i = 0; i < N; i++) - my_data[i] = mype*N + i; + my_data[i] = mype * N + i; for (int i = 0; i < npes; i++) - shmem_put_nbi(&all_data[mype*N], my_data, N, i); + shmem_put_nbi(&all_data[mype * N], my_data, N, i); shmem_fence(); for (int i = 0; i < npes; i++) - shmem_atomic_set(&flags[mype], 1, i); + shmem_atomic_set(&flags[mype], 1, i); for (int i = 0; i < npes; i++) { - size_t completed_idx = shmem_wait_until_any(flags, npes, status, SHMEM_CMP_NE, 0); - for (int j = 0; j < N; j++) { - total_sum += all_data[completed_idx * N + j]; - } - status[completed_idx] = 1; + size_t completed_idx = shmem_wait_until_any(flags, npes, status, SHMEM_CMP_NE, 0); + for (int j = 0; j < N; j++) { + total_sum += all_data[completed_idx * N + j]; + } + status[completed_idx] = 1; } /* check the result */ int M = N * npes - 1; if (total_sum != M * (M + 1) / 2) { - shmem_global_exit(1); + shmem_global_exit(1); } shmem_finalize(); diff --git a/example_code/shmem_wait_until_any_vector.c b/example_code/shmem_wait_until_any_vector.c index 266585e51..23c77d259 100644 --- a/example_code/shmem_wait_until_any_vector.c +++ b/example_code/shmem_wait_until_any_vector.c @@ -3,8 +3,7 @@ #define N 100 -int main(void) -{ +int main(void) { int total_sum = 0; shmem_init(); @@ -17,24 +16,24 @@ int main(void) /* All odd PEs put 2 and all even PEs put 1 */ for (int i = 0; i < npes; i++) { - shmem_atomic_set(&ivars[mype], mype % 2 + 1, i); + shmem_atomic_set(&ivars[mype], mype % 2 + 1, i); - /* Set cmp_values to the expected values coming from each PE */ - cmp_values[i] = i % 2 + 1; + /* Set cmp_values to the expected values coming from each PE */ + cmp_values[i] = i % 2 + 1; } for (int i = 0; i < npes; i++) { - size_t completed_idx = shmem_wait_until_any_vector(ivars, npes, status, - SHMEM_CMP_EQ, cmp_values); - status[completed_idx] = 1; - total_sum += ivars[completed_idx]; + size_t completed_idx = + shmem_wait_until_any_vector(ivars, npes, status, SHMEM_CMP_EQ, cmp_values); + status[completed_idx] = 1; + total_sum += ivars[completed_idx]; } /* check the result */ int correct_result = npes + npes / 2; if (total_sum != correct_result) { - shmem_global_exit(1); + shmem_global_exit(1); } shmem_finalize(); diff --git a/example_code/shmem_wait_until_some_all2all_sum.c b/example_code/shmem_wait_until_some_all2all_sum.c index 1c1a2f951..e80e8984b 100644 --- a/example_code/shmem_wait_until_some_all2all_sum.c +++ b/example_code/shmem_wait_until_some_all2all_sum.c @@ -3,8 +3,7 @@ #define N 100 -int main(void) -{ +int main(void) { int total_sum = 0; shmem_init(); @@ -19,31 +18,31 @@ int main(void) int *status = calloc(npes, sizeof(int)); for (int i = 0; i < N; i++) - my_data[i] = mype*N + i; + my_data[i] = mype * N + i; for (int i = 0; i < npes; i++) - shmem_put_nbi(&all_data[mype*N], my_data, N, i); + shmem_put_nbi(&all_data[mype * N], my_data, N, i); shmem_fence(); for (int i = 0; i < npes; i++) - shmem_atomic_set(&flags[mype], 1, i); + shmem_atomic_set(&flags[mype], 1, i); size_t ncompleted; - while ((ncompleted = shmem_wait_until_some(flags, npes, indices, - status, SHMEM_CMP_NE, 0))) { - for (size_t i = 0; i < ncompleted; i++) { - for (size_t j = 0; j < N; j++) { - total_sum += all_data[indices[i]*N + j]; - } - status[indices[i]] = 1; + while ( + (ncompleted = shmem_wait_until_some(flags, npes, indices, status, SHMEM_CMP_NE, 0))) { + for (size_t i = 0; i < ncompleted; i++) { + for (size_t j = 0; j < N; j++) { + total_sum += all_data[indices[i] * N + j]; } + status[indices[i]] = 1; + } } /* check the result */ int M = N * npes - 1; if (total_sum != M * (M + 1) / 2) { - shmem_global_exit(1); + shmem_global_exit(1); } shmem_finalize(); diff --git a/example_code/writing_shmem_example.c b/example_code/writing_shmem_example.c index e04139e09..96537f3cd 100644 --- a/example_code/writing_shmem_example.c +++ b/example_code/writing_shmem_example.c @@ -1,35 +1,34 @@ -#include #include +#include -#define SIZE 16 +#define N 16 -int main(void) -{ - short source[SIZE]; - static short dest[SIZE]; - static long lock = 0; - shmem_init(); - int me = shmem_my_pe(); - int npes = shmem_n_pes(); - if (me == 0) { - /* initialize array */ - for (int i = 0; i < SIZE; i++) - source[i] = i; - /* local, not symmetric */ - /* static makes it symmetric */ - /* put "size" words into dest on each PE */ - for (int i = 1; i < npes; i++) - shmem_put(dest, source, SIZE, i); - } - shmem_barrier_all(); /* sync sender and receiver */ - if (me != 0) { - shmem_set_lock(&lock); - printf("dest on PE %d is \t", me); - for (int i = 0; i < SIZE; i++) - printf("%hd \t", dest[i]); - printf("\n"); - shmem_clear_lock(&lock); - } - shmem_finalize(); - return 0; +int main(void) { + short source[N]; + static short dest[N]; + static long lock = 0; + shmem_init(); + int mype = shmem_my_pe(); + int npes = shmem_n_pes(); + if (mype == 0) { + /* initialize array */ + for (int i = 0; i < N; i++) + source[i] = i; + /* local, not symmetric */ + /* static makes it symmetric */ + /* put "size" words into dest on each PE */ + for (int i = 1; i < npes; i++) + shmem_put(dest, source, N, i); + } + shmem_barrier_all(); /* sync sender and receiver */ + if (mype != 0) { + shmem_set_lock(&lock); + printf("dest on PE %d is \t", mype); + for (int i = 0; i < N; i++) + printf("%hd \t", dest[i]); + printf("\n"); + shmem_clear_lock(&lock); + } + shmem_finalize(); + return 0; } diff --git a/main_spec.tex b/main_spec.tex index d11ab6857..b0eaadc14 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -100,7 +100,6 @@ \subsection{Memory Management Routines} \subsubsection{\textbf{SHMEM\_MALLOC, SHMEM\_FREE, SHMEM\_REALLOC, SHMEM\_ALIGN}}\label{subsec:shfree} \input{content/shmem_malloc.tex} -\newpage \subsubsection{\textbf{SHMEM\_MALLOC\_WITH\_HINTS}}\label{subsec:shmmallochint} \input{content/shmem_malloc_hints.tex} diff --git a/utils/defs.tex b/utils/defs.tex index 59c0972dd..78a430972 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -84,7 +84,7 @@ %% Specialized declaration/creation and generate reference. \newcommand{\EnvVarDecl}[1]{\EnvVarRef{#1}} \newcommand{\FuncDecl}[1]{{\ListingsCurrentStyle{#1}}\FuncIndex{#1}} -\newcommand{\FuncParam}[1]{<{\ListingsKeywordStyle{#1}}>} +\newcommand{\FuncParam}[1]{{\ListingsParamStyle{#1}}} \newcommand{\LibConstDecl}[2][\CorCpp]{% \parbox[t]{5cm}{~\\[-4pt] #1: \\\hspace*{8mm} \LibConstRef{#2} \\~}} \newcommand{\LibHandleDecl}[2][\CorCpp]{% @@ -126,62 +126,8 @@ \newcommand\ListingsCurrentStyle{} \lst@AddToHook{Output}{\global\let\ListingsCurrentStyle\lst@thestyle} \lst@AddToHook{OutputOther}{\global\let\ListingsCurrentStyle\lst@thestyle} -\newcommand\ListingsKeywordStyle{} -\lst@AddToHook{Output}{\global\let\ListingsKeywordStyle\lst@keywordstyle} -\lst@AddToHook{OutputOther}{\global\let\ListingsKeywordStyle\lst@keywordstyle} \makeatother -% -% This is used to put line numbers on plain pages. Used in draft.tex -% -\makeatletter - -\def\withlinenumbers{\relax - \def\@evenfoot{\hbox to 0pt{\hss\LineNumberRuler\hskip 1.5pc}\hfil}\relax - \def\@oddfoot{\hfil\hbox to 0pt{\hskip 1.5pc\LineNumberRuler\hss}}} - -\def\LineNumberRuler{\vbox to 0pt{\vss\normalsize \baselineskip13.6pt - \lineskip 1pt \normallineskip 1pt \def\baselinestretch{1}\relax - \LNR{1}\LNR{2}\LNR{3}\LNR{4}\LNR{5}\LNR{6}\LNR{7}\LNR{8}\LNR{9} - \LNR{10}\LNR{11}\LNR{12}\LNR{13}\LNR{14} - \LNR{15}\LNR{16}\LNR{17}\LNR{18}\LNR{19} - \LNR{20}\LNR{21}\LNR{22}\LNR{23}\LNR{24} - \LNR{25}\LNR{26}\LNR{27}\LNR{28}\LNR{29} - \LNR{30}\LNR{31}\LNR{32}\LNR{33}\LNR{34}\LNR{35} - \LNR{36}\LNR{37}\LNR{38}\LNR{39} - \LNR{40}\LNR{41}\LNR{42}\LNR{43}\LNR{44} - \LNR{45}\LNR{46}\LNR{47}\LNR{48} - \vskip 31pt}} -\def\LNR#1{\hbox to 1pc{\hfil\tiny#1\hfil}} - -\def\ps@plainwithlinenumbers{\let\@mkboth\@gobbletwo - \def\@oddhead{} - \def\@oddfoot{\hfil\rm\thepage\hfil - \hbox to 0pt{\hskip 1.5pc\LineNumberRuler\hss}} - \def\@evenhead{} - \def\@evenfoot{\hbox to 0pt{\hss - \LineNumberRuler\hskip 1.5pc}\rm\hfil\thepage\hfil}} - - % Contents is done with \chapter*{Contents}, so we need to turn off the - % line numbers in this case. Easiest to look at def - -\newwrite\chappages -\immediate\openout\chappages=chappage.txt -\def\writespace{ } - -\def\incontents{0} -\newif\ifcontents -\contentsfalse -\def\chapter{\clearpage \ifcontents\else\thispagestyle{plainwithlinenumbers}\fi - \write\chappages{Chapter \thechapter\writespace - \the\count0} - \global\@topnum\z@ \@afterindentfalse \secdef\@chapter\@schapter} - -\makeatother - -% -% End this is used to put line numbers on plain pages. Used in draft.tex -% - % % Use Sans Serif font for sections, etc. % @@ -204,146 +150,65 @@ % % This section is for example code listings % -\definecolor{gray}{rgb}{0.92,0.92,0.92} - -\lstset{ % set defaults for languages not otherwise defined - breakatwhitespace=true, % sets if automatic breaks should only happen at whitespace - basicstyle=\ttfamily\footnotesize, - breaklines=true, % sets automatic line breaking - extendedchars=true, % lets you use non-ASCII characters; for 8-bits - % encodings only, does not work with UTF-8 - keepspaces=true, % keeps spaces in text, useful for keeping indentation of code - % (possibly needs columns=flexible) - morekeywords={*,...}, % if you want to add more keywords to the set - showspaces=false, % show spaces everywhere adding particular underscores; - % it overrides 'showstringspaces' - showstringspaces=false, % underline spaces within strings only - showtabs=false, % show tabs within strings adding particular underscores -} - -\def\StandardListing { - \lstset { - breakatwhitespace=false, % sets if automatic breaks should only happen at whitespace - basicstyle=\ttfamily\footnotesize, - breaklines=true, % sets automatic line breaking - escapeinside={\%*}{*)}, % if you want to add LaTeX within your code - extendedchars=true, % lets you use non-ASCII characters; for 8-bits - % encodings only, does not work with UTF-8 - keepspaces=true, % keeps spaces in text, useful for keeping - % indentation of code (possibly needs columns=flexible) - morekeywords={*,...}, % if you want to add more keywords to the set - showspaces=false, % show spaces everywhere adding particular underscores; - % it overrides 'showstringspaces' - showstringspaces=false, % underline spaces within strings only - showtabs=false, % show tabs within strings adding particular underscores - backgroundcolor=\color{gray}, - } -} - -\def\ProgramNumberedListing { - \StandardListing - \lstset { - numbers=left, - numberstyle=\footnotesize - } -} - -\newcommand{\numberedlisting}[2] { - \ProgramNumberedListing - \lstinputlisting[#1]{#2} - \StandardListing -} -\newcommand{\outputlisting}[2] { -\begin{minipage}{\linewidth} -\vspace{0.1in} - \lstinputlisting[#1]{#2} - \StandardListing -\vspace{0.1in} -\end{minipage} -} +\newcommand{\ListingsParamStyle}{\bfseries\itshape\color{CadetBlue}} -\lstdefinelanguage{OSH+C}[]{C}{ - classoffset=1, - morekeywords={ - size_t, ptrdiff_t, - SHMEM_BCAST_SYNC_SIZE, SHMEM_SYNC_VALUE, - start_pes, - my_pe, _my_pe, shmem_my_pe, - num_pes, _num_pes, shmem_n_pes, - shmem_int_p, shmem_short_p, shmem_long_p, - shmem_int_put, shmem_short_put, shmem_long_put, - shmem_barrier_all, shmem_barrier, - shmalloc, shfree, shrealloc, - shmem_broadcast32, shmem_broadcast64, - shmem_short_inc, shmem_int_inc, shmem_long_inc, - shmem_short_add, shmem_int_add, shmem_long_add, - shmem_short_finc, shmem_int_finc, shmem_long_finc, - shmem_short_fadd, shmem_int_fadd, shmem_long_fadd, - shmem_set_lock, shmem_test_lock, shmem_clear_lock, - shmem_long_sum_to_all, - shmem_complexd_sum_to_all - }, - keywordstyle=\color{black}\textbf, +\lstdefinestyle{SourceListingsDefault}{ + language={C}, + breaklines=true, + breakatwhitespace=true, + escapechar=@, + tabsize=2, + basicstyle=\ttfamily\footnotesize, + showstringspaces=false, + % Preprocessor directives classoffset=0, - sensitive=true -} - -\lstdefinelanguage{OSH2+C}[]{OSH+C}{ + keywords={\#include, \#pragma, \#if, \#ifdef, \#else, \#endif}, + keywordstyle=\bfseries\color{CadetBlue}, + % Language keywords classoffset=1, - morekeywords={ - shmem_init, - shmem_finalize, - shmem_malloc, - shmem_my_pe, - shmem_error, - shmem_global_exit, + keywords={ + if, else, for, while, do, return, break, continue, + static, const, volatile, + sizeof, typedef, struct, union, + switch, case, default, goto, }, - keywordstyle=\color{black}\textbf, - classoffset=0, - sensitive=true -} - -\lstdefinelanguage{OSH+F}[]{Fortran}{ - classoffset=1, - morekeywords={ - SHMEM_BCAST_SYNC_SIZE, SHMEM_SYNC_VALUE, - start_pes, - my_pe, shmem_my_pe, - num_pes, shmem_n_pes, - shmem_int_p, shmem_short_p, shmem_long_p, - shmem_int_put, shmem_short_put, shmem_long_put, - shmem_barrier_all, shmem_barrier, - shpalloc, shpdeallc, shpclmove, - shmem_broadcast32, shmem_broadcast64, - shmem_broadcast4, shmem_broadcast8, - shmem_short_inc, shmem_int_inc, shmem_long_inc, - shmem_short_add, shmem_int_add, shmem_long_add, - shmem_short_finc, shmem_int_finc, shmem_long_finc, - shmem_short_fadd, shmem_int_fadd, shmem_long_fadd, - shmem_set_lock, shmem_test_lock, shmem_clear_lock, - shmem_long_sum_to_all, + keywordstyle=\bfseries\color{Orchid}, + % Types + classoffset=2, + keywords={ + _Thread_local, _Noreturn, + void, bool, _Bool, unsigned, char, short, int, long, + float, double, _Complex, complex, size_t, ptrdiff_t, + int8_t, int16_t, int32_t, int64_t, + uint8_t, uint16_t, uint32_t, uint64_t, + shmem_ctx_t, shmem_team_t, shmem_team_config_t, + FILE, }, - keywordstyle=\color{black}\textbf, - classoffset=0, - sensitive=false + keywordstyle=\color{OliveGreen}, + % Other elements + classoffset=3, + keywords={NULL}, + keywordstyle=\color{Cyan}, + % Type-generic parameters (see FuncParam) + classoffset=4, + keywords={TYPE, TYPENAME, SIZE}, + keywordstyle=\ListingsParamStyle, + % Other syntax elements + commentstyle=\itshape\color{BrickRed}, + stringstyle=\color{DarkOrchid}, + backgroundcolor=\color{gray}, } -\lstdefinelanguage{OSH2+F}[]{OSH+F}{ - classoffset=1, - morekeywords={ - shmem_init, - shmem_finalize, - shmem_malloc, - shmem_my_pe, - shmem_error, - shmem_global_exit, - }, - keywordstyle=\color{black}\textbf, - classoffset=0, - sensitive=true +\definecolor{gray}{rgb}{0.92,0.92,0.92} + +\lstdefinestyle{PrototypeListingsDefault}{ + style=SourceListingsDefault, + backgroundcolor=\color{gray}, } +\lstset{style=SourceListingsDefault} + % % End this section is for example code listings % @@ -408,57 +273,34 @@ \lstnewenvironment{Cpp11synopsis} { \textbf{C++11:} - \lstset{language={C++}, backgroundcolor=\color{gray}, lineskip=2pt, - escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, noreturn}, - aboveskip=0pt, belowskip=0pt}}{} + \lstset{style=PrototypeListingsDefault} +}{} \lstnewenvironment{C11synopsis} { \textbf{C11:} - \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, - escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t, - shmem_team_t, shmem_team_config_t, uint64_t}, - aboveskip=0pt, belowskip=0pt}}{} + \lstset{style=PrototypeListingsDefault} +}{} \lstnewenvironment{CsynopsisCol} { - \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, - escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, - shmem_team_t, shmem_team_config_t, uint64_t}, - aboveskip=0pt, belowskip=0pt}}{} + \lstset{style=PrototypeListingsDefault} +}{} \lstnewenvironment{Csynopsis} { \textbf{C/C++:} - \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, - escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, - shmem_team_t, shmem_team_config_t, uint64_t}, - aboveskip=0pt, belowskip=0pt}}{} + \lstset{style=PrototypeListingsDefault} +}{} \lstnewenvironment{CsynopsisST} { \textbf{C/C++:} - \color{red} - {\lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, - escapechar=@, - morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t, - shmem_team_t, uint64_t}, - aboveskip=0pt, belowskip=0pt}}}{} - -\lstnewenvironment{Fsynopsis} -{ \deprecationstart \\ - \textbf{FORTRAN:} - \lstset{language={Fortran}, backgroundcolor=\color{gray}, lineskip=3pt, - escapechar=@, - deletekeywords=[2]{STATUS}, - deletekeywords=[3]{LOG}, aboveskip=0pt, - belowskip=0pt}} -{ \deprecationend } + \color{red}{ + \lstset{style=PrototypeListingsDefault} + } +}{} \newenvironment{apiarguments}{ \newcommand{\apiargument}[3]{ @@ -529,25 +371,47 @@ \end{description} } +\theoremstyle{definition} +\newtheorem{source_example}{Example} +\newtheorem{program_output}{Output} + \newenvironment{apiexamples}{ -\newcommand{\apicexample}[3]{ - ##1 - \lstinputlisting[language={C}, tabsize=2, - basicstyle=\ttfamily\footnotesize, - morekeywords={size_t, ptrdiff_t, shmem_ctx_t, _Thread_local, shmem_team_t, uint64_t}]{##2} - ##3 } -\vspace{-2pt} + \newcommand{\apicexample}[4][]{ + \begin{source_example} + \ifthenelse{\equal{##1}{}}{}{\label{##1}} + ##2 + \end{source_example} + \lstinputlisting[style=SourceListingsDefault]{##3} + ##4} + \vspace{-2pt} \item[EXAMPLES] \hfill \\ -\vspace{-2pt} + \vspace{-2pt} +}{} + +\newcommand{\SourceExample}[2]{ + \noindent + \begin{minipage}{\linewidth} + \vspace{0.1in} + \begin{source_example} + #2 \hfill + \lstinputlisting[style=SourceListingsDefault]{#1} + \end{source_example} + \vspace{0.1in} + \end{minipage} } -{ + +\newcommand{\ProgramOutput}[2]{ + \noindent + \begin{minipage}{\linewidth} + \vspace{0.1in} + \begin{program_output} + #2 \hfill + \lstinputlisting[style=SourceListingsDefault]{#1} + \end{program_output} + \vspace{0.1in} + \end{minipage} } -\newcommand{\cexample}[2]{ - #1 - \lstinputlisting[language={C}, tabsize=2, - basicstyle=\ttfamily\footnotesize, - morekeywords={size_t, ptrdiff_t, shmem_ctx_t}]{#2}} % % End library API description template commands % diff --git a/utils/packages.tex b/utils/packages.tex index d3f54c6c4..f4871c3c9 100644 --- a/utils/packages.tex +++ b/utils/packages.tex @@ -8,6 +8,7 @@ \usepackage{float} \usepackage[usenames,dvipsnames]{color} \usepackage{amsmath} +\usepackage{amsthm} \usepackage{amsfonts} \usepackage[table]{xcolor} \usepackage{xspace}