Skip to content

Commit ea99bf7

Browse files
author
ferrol aderholdt
committed
Add nonblocking collectives examples
1 parent 02a539a commit ea99bf7

File tree

5 files changed

+152
-4
lines changed

5 files changed

+152
-4
lines changed

content/execution_model.tex

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ \subsection{Progress of OpenSHMEM Operations}\label{subsec:progress}
4040
without that \ac{PE} issuing any explicit \openshmem calls. One-sided \openshmem
4141
communication calls involving that \ac{PE} should progress regardless of when
4242
that \ac{PE} next engages in an \openshmem call. Similarly,
43-
for non-blocking collectives, consider the \acp{PE} that are part of a team
44-
issuing a non-blocking collective and overlapping collective completion with
45-
computation. Once a non-blocking collective operation is initiated by
43+
for nonblocking collectives, consider the \acp{PE} that are part of a team
44+
issuing a nonblocking collective and overlapping collective completion with
45+
computation. Once a nonblocking collective operation is initiated by
4646
all of the \acp{PE} in the team of the collective, any \ac{PE} in the team must
4747
eventually observe completion through a call to \FUNC{shmem\_req\_test} or a
4848
call to \FUNC{shmem\_req\_wait}.

content/shmem_alltoall_nb.tex

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,5 +119,13 @@
119119
Zero on successful local completion. Nonzero otherwise.
120120
}
121121

122-
\end{apidefinition}
122+
\begin{apiexamples}
123+
124+
\apicexample
125+
{This \CorCpp{} example shows overlapping nonblocking \FUNC{shmem\_int64\_alltoall\_nb} on two 64-bit integers among all \acp{PE}.}
126+
{./example_code/shmem_alltoall_nb_example.c}
127+
{}
123128

129+
\end{apiexamples}
130+
131+
\end{apidefinition}

content/shmem_broadcast_nb.tex

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,4 +100,14 @@
100100
\LibConstRef{SHMEM\_TEAM\_INVALID}.
101101
}
102102

103+
\begin{apiexamples}
104+
105+
\apicexample
106+
{In the following \Cstd[11] example, the call to multiple \FUNC{shmem\_broadcast\_nb} copies the \source{}
107+
params on \ac{PE} $0$ to the \dest{} params on \acp{PE} $0\dots npes-1$.}
108+
{./example_code/shmem_broadcast_nb_example.c}
109+
{}
110+
111+
\end{apiexamples}
112+
103113
\end{apidefinition}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#include <inttypes.h>
2+
#include <shmem.h>
3+
#include <stdio.h>
4+
#include <stdlib.h>
5+
6+
int main(void) {
7+
int status = 0;
8+
const int count = 2;
9+
const int nr_a2a = 2;
10+
int64_t **source;
11+
int64_t **dest;
12+
shmem_req_h *requests;
13+
14+
shmem_init();
15+
int mype = shmem_my_pe();
16+
int npes = shmem_n_pes();
17+
18+
source = (int64_t **)shmem_malloc(nr_a2a * sizeof(int64_t *));
19+
dest = (int64_t **)shmem_malloc(nr_a2a * sizeof(int64_t *));
20+
requests = (shmem_req_h *) malloc(npes * sizeof(shmem_req_h));
21+
for (int i = 0; i < npes; i++) {
22+
requests[i] = SHMEM_REQ_INVALID;
23+
}
24+
25+
for (int nr = 0; nr < nr_a2a; nr++) {
26+
dest[nr] = (int64_t *)shmem_malloc(count * npes * sizeof(int64_t));
27+
source[nr] = (int64_t *)shmem_malloc(count * npes * sizeof(int64_t));
28+
for (int pe = 0; pe < npes; pe++) {
29+
for (int i = 0; i < count; i++) {
30+
source[nr][(pe * count) + i] = mype + pe;
31+
dest[nr][(pe * count) + i] = 9999;
32+
}
33+
}
34+
}
35+
36+
/* wait for all PEs to update sources/dests */
37+
shmem_team_sync(SHMEM_TEAM_WORLD);
38+
39+
/* overlap alltoall operations on all PEs */
40+
for (int i = 0; i < nr_a2a; i++) {
41+
status = shmem_int64_alltoall_nb(SHMEM_TEAM_WORLD, dest[i], source[i], count, &requests[i]);
42+
if (0 != status) {
43+
fprintf(stderr, "shmem alltoall nb failed with status %d\n", status);
44+
goto out;
45+
}
46+
}
47+
48+
for (int i = 0; i < npes; i++) {
49+
status = shmem_req_wait(&requests[i]);
50+
if (0 != status) {
51+
fprintf(stderr, "shmem req wait failed on request %d\n", i);
52+
goto out;
53+
}
54+
}
55+
56+
for (int nr = 0; nr < nr_a2a; nr++) {
57+
for (int pe = 0; pe < npes; pe++) {
58+
for (int i = 0; i < count; i++) {
59+
if (dest[nr][(pe * count) + i] != pe + mype) {
60+
printf("[%d] ERROR: dest[%d]=%" PRId64 ", should be %d\n",
61+
mype, (pe * count) + i, dest[nr][(pe * count) + i], pe + mype);
62+
}
63+
}
64+
}
65+
}
66+
67+
out:
68+
for (int nr = 0; nr < nr_a2a; nr++) {
69+
shmem_free(source[nr]);
70+
shmem_free(dest[nr]);
71+
}
72+
shmem_free(source);
73+
shmem_free(dest);
74+
free(requests);
75+
shmem_finalize();
76+
return status;
77+
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#include <shmem.h>
2+
#include <stdio.h>
3+
#include <stdlib.h>
4+
5+
int main(void) {
6+
int status = 0;
7+
static long source;
8+
long *dest;
9+
shmem_req_h *requests;
10+
11+
shmem_init();
12+
int mype = shmem_my_pe();
13+
int npes = shmem_n_pes();
14+
15+
dest = (long *)shmem_malloc(npes * sizeof(long));
16+
requests = (shmem_req_h *) malloc(npes * sizeof(shmem_req_h));
17+
for (int i = 0; i < npes; i++) {
18+
requests[i] = SHMEM_REQ_INVALID;
19+
}
20+
21+
source = mype;
22+
for (int i = 0; i < npes; i++) {
23+
status = shmem_broadcast_nb(SHMEM_TEAM_WORLD, &dest[i], source,
24+
1, i, &requests[i]);
25+
if (0 != status) {
26+
fprintf(stderr, "shmem broadcast nb failed with root %d and status %d\n",
27+
i, status);
28+
goto out;
29+
}
30+
}
31+
32+
for (int i = 0; i < npes; i++) {
33+
status = shmem_req_wait(&requests[i]);
34+
if (0 != status) {
35+
fprintf(stderr, "shmem req wait failed on request %d\n", i);
36+
goto out;
37+
}
38+
}
39+
40+
if (mype == 0) {
41+
for (int i = 0; i < npes; i++) {
42+
if (i > 0 && !(i % 8)) {
43+
printf("\n");
44+
}
45+
printf("%8d", dest[i]);
46+
}
47+
}
48+
out:
49+
shmem_free(dest);
50+
free(requests);
51+
shmem_finalize();
52+
return status;
53+
}

0 commit comments

Comments
 (0)