-
Notifications
You must be signed in to change notification settings - Fork 53
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix several bugs in the team_split_2D operation #920
Changes from all commits
6c4ba31
e2d15bd
918539d
a5c922d
4ecc5cd
bf626c4
8df0a83
b7c9a08
65b5163
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -249,7 +249,7 @@ int shmem_internal_team_split_strided(shmem_internal_team_t *parent_team, int PE | |
*new_team = SHMEMX_TEAM_INVALID; | ||
|
||
if (parent_team == SHMEMX_TEAM_INVALID) { | ||
return 0; | ||
return 1; | ||
} | ||
|
||
int global_PE_start = shmem_internal_team_pe(parent_team, PE_start); | ||
|
@@ -365,39 +365,62 @@ int shmem_internal_team_split_2d(shmem_internal_team_t *parent_team, int xrange, | |
shmem_internal_team_t **xaxis_team, const shmemx_team_config_t *yaxis_config, | ||
long yaxis_mask, shmem_internal_team_t **yaxis_team) | ||
{ | ||
*xaxis_team = SHMEMX_TEAM_INVALID; | ||
*yaxis_team = SHMEMX_TEAM_INVALID; | ||
|
||
if (parent_team == SHMEMX_TEAM_INVALID) { | ||
return 1; | ||
} | ||
|
||
if (xrange > parent_team->size) { | ||
xrange = parent_team->size; | ||
} | ||
|
||
const int parent_start = parent_team->start; | ||
const int parent_stride = parent_team->stride; | ||
const int parent_size = parent_team->size; | ||
const int num_xteams = ceil( parent_size / (float)xrange ); | ||
const int num_yteams = xrange; | ||
|
||
int start = parent_start; | ||
int start = 0; | ||
int ret = 0; | ||
|
||
for (int i = 0; i < num_xteams; i++) { | ||
shmem_internal_team_t *my_xteam; | ||
int xsize = (i == num_xteams - 1 && parent_size % xrange) ? parent_size % xrange : xrange; | ||
|
||
ret = shmem_internal_team_split_strided(parent_team, start, parent_stride, | ||
xsize, xaxis_config, xaxis_mask, xaxis_team); | ||
xsize, xaxis_config, xaxis_mask, &my_xteam); | ||
if (ret) { | ||
RAISE_ERROR_MSG("Creation of x-axis team %d of %d failed\n", i, num_xteams); | ||
RAISE_ERROR_MSG("Creation of x-axis team %d of %d failed\n", i+1, num_xteams); | ||
} | ||
start += xrange; | ||
|
||
if (my_xteam != SHMEMX_TEAM_INVALID) { | ||
shmem_internal_assert(*xaxis_team == SHMEMX_TEAM_INVALID); | ||
*xaxis_team = my_xteam; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we want to add an assert here to ensure that |
||
} | ||
start += xrange * parent_stride; | ||
} | ||
|
||
start = parent_start; | ||
start = 0; | ||
|
||
for (int i = 0; i < num_yteams; i++) { | ||
shmem_internal_team_t *my_yteam; | ||
int remainder = parent_size % xrange; | ||
int yrange = parent_size / xrange; | ||
int ysize = (remainder && i < remainder) ? yrange + 1 : yrange; | ||
|
||
ret = shmem_internal_team_split_strided(parent_team, start, xrange*parent_stride, | ||
ysize, yaxis_config, yaxis_mask, yaxis_team); | ||
ysize, yaxis_config, yaxis_mask, &my_yteam); | ||
if (ret) { | ||
RAISE_ERROR_MSG("Creation of y-axis team %d of %d failed\n", i, num_yteams); | ||
RAISE_ERROR_MSG("Creation of y-axis team %d of %d failed\n", i+1, num_yteams); | ||
} | ||
start += 1; | ||
|
||
if (my_yteam != SHMEMX_TEAM_INVALID) { | ||
shmem_internal_assert(*yaxis_team == SHMEMX_TEAM_INVALID); | ||
*yaxis_team = my_yteam; | ||
} | ||
start += parent_stride; | ||
} | ||
|
||
long *psync = shmem_internal_team_choose_psync(parent_team, SYNC); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
/* | ||
* Copyright (c) 2019 Intel Corporation. All rights reserved. | ||
* This software is available to you under the BSD license below: | ||
* | ||
* Redistribution and use in source and binary forms, with or | ||
* without modification, are permitted provided that the following | ||
* conditions are met: | ||
* | ||
* - Redistributions of source code must retain the above | ||
* copyright notice, this list of conditions and the following | ||
* disclaimer. | ||
* | ||
* - Redistributions in binary form must reproduce the above | ||
* copyright notice, this list of conditions and the following | ||
* disclaimer in the documentation and/or other materials | ||
* provided with the distribution. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
* SOFTWARE. | ||
*/ | ||
|
||
#include <stdio.h> | ||
#include <shmem.h> | ||
#include <shmemx.h> | ||
|
||
static int check_2d(shmemx_team_t parent_team, int xdim) { | ||
int me = shmemx_team_my_pe(parent_team); | ||
|
||
shmemx_team_t xteam = SHMEMX_TEAM_INVALID; | ||
shmemx_team_t yteam = SHMEMX_TEAM_INVALID; | ||
|
||
int ret = shmemx_team_split_2d(parent_team, xdim, NULL, 0, &xteam, NULL, 0, &yteam); | ||
int errors = 0; | ||
|
||
if (ret == 0) { | ||
int me_x = shmemx_team_my_pe(xteam); | ||
int me_y = shmemx_team_my_pe(yteam); | ||
int npes_x = shmemx_team_n_pes(xteam); | ||
int npes_y = shmemx_team_n_pes(yteam); | ||
|
||
if (xteam == SHMEMX_TEAM_INVALID || yteam == SHMEMX_TEAM_INVALID) { | ||
printf("%d: Error, received an invalid team\n", shmem_my_pe()); | ||
++errors; | ||
} | ||
|
||
/* Try converting the PE ids from xteam and yteam to parent and global | ||
* PE indices and compare with the expected indices */ | ||
for (int i = 0; i < npes_x; i++) { | ||
int expected_parent = me_y * xdim + i; /* row (fixed) + column */ | ||
int pe_parent = shmemx_team_translate_pe(xteam, i, parent_team); | ||
int pe_world = shmemx_team_translate_pe(xteam, i, SHMEMX_TEAM_WORLD); | ||
int expected_world = shmemx_team_translate_pe(parent_team, expected_parent, SHMEMX_TEAM_WORLD); | ||
|
||
if (expected_parent != pe_parent) { | ||
printf("%d: xteam[%d] expected parent PE id %d, got %d\n", | ||
me, i, expected_parent, pe_parent); | ||
errors++; | ||
} | ||
|
||
if (expected_world != pe_world) { | ||
printf("%d: xteam[%d] expected world PE id %d, got %d\n", | ||
me, i, expected_world, pe_world); | ||
errors++; | ||
} | ||
} | ||
|
||
for (int i = 0; i < npes_y; i++) { | ||
int expected_parent = i * xdim + me_x; /* row + column (fixed) */ | ||
int pe_parent = shmemx_team_translate_pe(yteam, i, parent_team); | ||
int pe_world = shmemx_team_translate_pe(yteam, i, SHMEMX_TEAM_WORLD); | ||
int expected_world = shmemx_team_translate_pe(parent_team, expected_parent, SHMEMX_TEAM_WORLD); | ||
|
||
if (expected_parent != pe_parent) { | ||
printf("%d: yteam[%d] expected parent PE id %d, got %d\n", | ||
me, i, expected_parent, pe_parent); | ||
errors++; | ||
} | ||
|
||
if (expected_world != pe_world) { | ||
printf("%d: yteam[%d] expected world PE id %d, got %d\n", | ||
me, i, expected_world, pe_world); | ||
errors++; | ||
} | ||
} | ||
} | ||
else { | ||
printf("%d: 2d split failed\n", shmem_my_pe()); | ||
} | ||
|
||
if (xteam != SHMEMX_TEAM_INVALID) | ||
shmemx_team_destroy(xteam); | ||
if (yteam != SHMEMX_TEAM_INVALID) | ||
shmemx_team_destroy(yteam); | ||
|
||
return errors != 0; | ||
} | ||
|
||
int main(void) { | ||
int errors = 0, me, npes, ret; | ||
shmemx_team_t even_team; | ||
|
||
shmem_init(); | ||
|
||
me = shmem_my_pe(); | ||
npes = shmem_n_pes(); | ||
|
||
if (me == 0) printf("Performing 2d split test on SHMEM_TEAM_WORLD\n"); | ||
|
||
errors += check_2d(SHMEMX_TEAM_WORLD, 1); | ||
errors += check_2d(SHMEMX_TEAM_WORLD, 2); | ||
errors += check_2d(SHMEMX_TEAM_WORLD, 3); | ||
|
||
ret = shmemx_team_split_strided(SHMEMX_TEAM_WORLD, 0, 2, (npes-1)/2 + 1, | ||
NULL, 0, &even_team); | ||
|
||
if (ret == 0) { | ||
if (me == 0) printf("Performing 2d split test on even team\n"); | ||
|
||
errors += check_2d(even_team, 1); | ||
errors += check_2d(even_team, 2); | ||
errors += check_2d(even_team, 3); | ||
} else { | ||
if (me == 0) printf("Unable to create even team\n"); | ||
} | ||
|
||
shmem_finalize(); | ||
return errors != 0; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the parent team's stride the correct thing to use here? I suspect we need some more unit tests to cover the various cases in here, for example where the parent team stride is more than 1.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah - I think it's correct. I believe the "3D" split test (
spec-examples/shmem_team_split_2D.c
) covers this case during the 2nd split: In the 1st split, the parent stride (team_world) is 1, but in the 2nd split, the parent team has a stride of 3 in the x-splits, and a stride of 12 in the y-splits.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't the stride argument supposed to be relative to the PE numbers of the parent team? And isn't the parent team's stride relative to the global PE number space?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see any unit tests (in the branch of this PR, ignoring the awful spec test) that call
shmem_team_split_2d
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, Yes, and... Yes?
This stride is relative to the parent team, which is relative to the global PE space... maybe we should discuss in person, since I don't see an alternative stride that would make sense here.
Right, there are no unit tests with
split_2d
after moving the spec example, but I'm working on a 2D heat program, which might actually be a better fit inapps
? Unfortunately, I don't believe a 2D heat example would exercise a non-unit stride in the parent, IIUC, but I believe the 3D split does.