Skip to content

Commit

Permalink
Merge pull request #6360 from Akshay-Venkatesh/topic/cuda-ipc-fix-pee…
Browse files Browse the repository at this point in the history
…r-access-map-init

UCT/CUDA_IPC: fix peer-access-map init
  • Loading branch information
yosefe authored Feb 21, 2021
2 parents 9be3d20 + 04e9f90 commit 4b94414
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/uct/cuda/cuda_ipc/cuda_ipc_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ static ucs_status_t uct_cuda_ipc_open_memhandle(const uct_cuda_ipc_key_t *key,
status = UCS_OK;
} else {
cuGetErrorString(cuerr, &cu_err_str);
ucs_error("cuIpcOpenMemHandle() failed: %s", cu_err_str);
ucs_debug("cuIpcOpenMemHandle() failed: %s", cu_err_str);
status = (cuerr == CUDA_ERROR_ALREADY_MAPPED) ? UCS_ERR_ALREADY_EXISTS :
UCS_ERR_INVALID_PARAM;
}
Expand Down
7 changes: 5 additions & 2 deletions src/uct/cuda/cuda_ipc/cuda_ipc_md.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ ucs_status_t uct_cuda_ipc_get_unique_index_for_uuid(int* idx,
int i;
int num_devices;
int original_capacity, new_capacity;
int original_count, new_count;

for (i = 0; i < md->uuid_map_size; i++) {
if (uct_cuda_ipc_uuid_equals(&rkey->uuid, &md->uuid_map[i])) {
Expand All @@ -92,6 +93,8 @@ ucs_status_t uct_cuda_ipc_get_unique_index_for_uuid(int* idx,
original_capacity = md->uuid_map_capacity;
new_capacity = md->uuid_map_capacity ?
(md->uuid_map_capacity * 2) : 16;
original_count = original_capacity * num_devices;
new_count = new_capacity * num_devices;
md->uuid_map_capacity = new_capacity;
md->uuid_map = ucs_realloc(md->uuid_map,
new_capacity * sizeof(CUuuid),
Expand All @@ -101,14 +104,14 @@ ucs_status_t uct_cuda_ipc_get_unique_index_for_uuid(int* idx,
}

md->peer_accessible_cache = ucs_realloc(md->peer_accessible_cache,
new_capacity * num_devices *
new_count *
sizeof(ucs_ternary_auto_value_t),
"uct_cuda_ipc_peer_accessible_cache");
if (md->peer_accessible_cache == NULL) {
return UCS_ERR_NO_MEMORY;
}

for (i = original_capacity; i < new_capacity; i++) {
for (i = original_count; i < new_count; i++) {
md->peer_accessible_cache[i] = UCS_TRY;
}
}
Expand Down

0 comments on commit 4b94414

Please sign in to comment.