Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v5.0.x: Fix bugs with dynamic windows #11115

Merged
merged 3 commits into from
Nov 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ompi/mca/osc/ucx/osc_ucx.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ typedef struct ompi_osc_ucx_epoch_type {
#define OSC_UCX_STATE_COMPLETE_COUNT_OFFSET (sizeof(uint64_t) * 3)
#define OSC_UCX_STATE_POST_INDEX_OFFSET (sizeof(uint64_t) * 4)
#define OSC_UCX_STATE_POST_STATE_OFFSET (sizeof(uint64_t) * 5)
#define OSC_UCX_STATE_DYNAMIC_LOCK_OFFSET (sizeof(uint64_t) * 6)
#define OSC_UCX_STATE_DYNAMIC_LOCK_OFFSET (sizeof(uint64_t) * (5 + OMPI_OSC_UCX_POST_PEER_MAX))
#define OSC_UCX_STATE_DYNAMIC_WIN_CNT_OFFSET (sizeof(uint64_t) * (6 + OMPI_OSC_UCX_POST_PEER_MAX))

typedef struct ompi_osc_dynamic_win_info {
Expand All @@ -105,6 +105,7 @@ typedef struct ompi_osc_ucx_state {
volatile uint64_t complete_count; /* # msgs received from complete processes */
volatile uint64_t post_index;
volatile uint64_t post_state[OMPI_OSC_UCX_POST_PEER_MAX];
volatile uint64_t dynamic_lock;
volatile uint64_t dynamic_win_count;
volatile ompi_osc_dynamic_win_info_t dynamic_wins[OMPI_OSC_UCX_ATTACH_MAX];
} ompi_osc_ucx_state_t;
Expand Down
9 changes: 3 additions & 6 deletions ompi/mca/osc/ucx/osc_ucx_comm.c
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,9 @@ static inline int get_dynamic_win_info(uint64_t remote_addr,
free(temp_buf);

/* unlock the dynamic lock */
return ompi_osc_ucx_dynamic_unlock(module, target);
ompi_osc_ucx_dynamic_unlock(module, target);

return ret;
}

static inline
Expand Down Expand Up @@ -841,7 +843,6 @@ static inline int ompi_osc_ucx_acc_rputget(void *stage_addr, int stage_count,
ompi_osc_ucx_accumulate_request_t *ucx_req = NULL;
bool sync_check;
int ret = OMPI_SUCCESS;
CHECK_DYNAMIC_WIN(remote_addr, module, target, ret);

if (acc_type != NONE) {
OMPI_OSC_UCX_ACCUMULATE_REQUEST_ALLOC(win, ucx_req);
Expand Down Expand Up @@ -1392,8 +1393,6 @@ int ompi_osc_ucx_rput(const void *origin_addr, int origin_count,
return ret;
}

CHECK_DYNAMIC_WIN(remote_addr, module, target, ret);

ret = ompi_osc_ucx_put(origin_addr, origin_count, origin_dt, target, target_disp,
target_count, target_dt, win);
if (ret != OMPI_SUCCESS) {
Expand Down Expand Up @@ -1448,8 +1447,6 @@ int ompi_osc_ucx_rget(void *origin_addr, int origin_count,
return ret;
}

CHECK_DYNAMIC_WIN(remote_addr, module, target, ret);

ret = ompi_osc_ucx_get(origin_addr, origin_count, origin_dt, target, target_disp,
target_count, target_dt, win);
if (ret != OMPI_SUCCESS) {
Expand Down
1 change: 1 addition & 0 deletions ompi/mca/osc/ucx/osc_ucx_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,7 @@ static int component_select(struct ompi_win_t *win, void **base, size_t size, in
module->state.complete_count = 0;
module->state.req_flag = 0;
module->state.acc_lock = TARGET_LOCK_UNLOCKED;
module->state.dynamic_lock = TARGET_LOCK_UNLOCKED;
module->state.dynamic_win_count = 0;
for (i = 0; i < OMPI_OSC_UCX_ATTACH_MAX; i++) {
module->local_dynamic_win_info[i].refcnt = 0;
Expand Down