Skip to content

Commit

Permalink
Less hardcoding of CXI specific changes
Browse files Browse the repository at this point in the history
  • Loading branch information
franzpoeschel committed Nov 28, 2023
1 parent 82b98c3 commit 3eec910
Showing 1 changed file with 75 additions and 39 deletions.
114 changes: 75 additions & 39 deletions source/adios2/toolkit/sst/dp/rdma_dp.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,26 +200,24 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
FI_ASYNC_IOV | FI_RX_CQ_DATA;
hints->ep_attr->type = FI_EP_RDM;

// FI_ADDR_CXI is also available
// FI_ADDR_CXI_COMPAT is the one that MPICH chooses
// FI_ADDR_OPX is sneakily used to denote FI_ADDR_CXI_COMPAT
hints->addr_format = FI_ADDR_OPX;

// The following differ from the SST configuration in ADIOS2
hints->domain_attr->mr_mode = FI_MR_ENDPOINT; //
hints->domain_attr->control_progress = FI_PROGRESS_MANUAL; //
hints->domain_attr->data_progress = FI_PROGRESS_MANUAL; //

// Authentication is needed
// TODO: the first ID in SLINGSHOT_SVC_IDS is chosen, but we should rather
// choose the one corresponding with the FABRIC_IFACE
// example:
// SLINGSHOT_SVC_IDS=5,5,5,5
// SLINGSHOT_VNIS=1310,1271
// SLINGSHOT_DEVICES=cxi0,cxi1,cxi2,cxi3
// FABRIC_IFACE=cxi2 (user specified)
if(fabric->cxi_auth_key)
uint32_t fi_version;
if (fabric->cxi_auth_key)
{
fi_version = FI_VERSION(1, 11);

hints->domain_attr->mr_mode = FI_MR_ENDPOINT;
hints->domain_attr->control_progress = FI_PROGRESS_MANUAL;
hints->domain_attr->data_progress = FI_PROGRESS_MANUAL;

// Authentication is needed
// TODO: the first ID in SLINGSHOT_SVC_IDS is chosen, but we should
// rather choose the one corresponding with the FABRIC_IFACE
// example:
// SLINGSHOT_SVC_IDS=5,5,5,5
// SLINGSHOT_VNIS=1310,1271
// SLINGSHOT_DEVICES=cxi0,cxi1,cxi2,cxi3
// FABRIC_IFACE=cxi2 (user specified)

hints->ep_attr->auth_key = malloc(sizeof(struct cxi_auth_key));
memcpy(hints->ep_attr->auth_key, fabric->cxi_auth_key,
sizeof(struct cxi_auth_key));
Expand All @@ -230,6 +228,14 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
sizeof(struct cxi_auth_key));
hints->domain_attr->auth_key_size = sizeof(struct cxi_auth_key);
}
else
{
fi_version = FI_VERSION(1, 5);

hints->domain_attr->mr_mode = FI_MR_BASIC;
hints->domain_attr->control_progress = FI_PROGRESS_AUTO;
hints->domain_attr->data_progress = FI_PROGRESS_AUTO;
}

if (Params->DataInterface)
{
Expand All @@ -243,7 +249,7 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
fabric->info = NULL;

pthread_mutex_lock(&fabric_mutex);
fi_getinfo(FI_VERSION(1,11), NULL, NULL, 0, hints, &info);
fi_getinfo(fi_version, NULL, NULL, 0, hints, &info);
pthread_mutex_unlock(&fabric_mutex);
if (!info)
{
Expand All @@ -269,7 +275,8 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
}
if ((((strcmp(prov_name, "verbs") == 0) && info->src_addr) ||
(strcmp(prov_name, "gni") == 0) ||
(strcmp(prov_name, "psm2") == 0)) &&
(strcmp(prov_name, "psm2") == 0) ||
(strcmp(prov_name, "cxi") == 0)) &&
(!useinfo || !ifname ||
(strcmp(useinfo->domain_attr->name, ifname) != 0)))
{
Expand All @@ -280,7 +287,8 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
useinfo = info;
}
else if (((strstr(prov_name, "verbs") && info->src_addr) ||
strstr(prov_name, "gni") || strstr(prov_name, "psm2")) &&
strstr(prov_name, "gni") || strstr(prov_name, "psm2") ||
strstr(prov_name, "cxi")) &&
!useinfo)
{
Svcs->verbose(CP_Stream, DPTraceVerbose,
Expand Down Expand Up @@ -359,16 +367,23 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params,
fabric->addr_len = info->src_addrlen;

/*
* FI_MR_ALLOCATED and FI_MR_ENDPOINT are required for the CXI provider.
* FI_MR_LOCAL and FI_MR_PROV_KEY are for compatibility
* with the rest of the legacy SST-libfabric implementation (where mr_mode
* used to be FI_MR_BASIC which is equivalent to FI_MR_VIRT_ADDR |
* FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_LOCAL)
* The libfabric data-plane of SST was originally programmed to use
* FI_MR_BASIC as mr_mode, which is equivalent to
* FI_MR_VIRT_ADDR | FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_LOCAL.
*
* However, HPE's CXI provider requires two changes to that:
* (1) It does not support FI_MR_VIRT_ADDR.
* (2) It requires use of FI_MR_ENDPOINT.
*
* So we propagate the bit value currently contained in the mr_mode
* for these flags.
*/
info->domain_attr->mr_mode =
FI_MR_ALLOCATED | FI_MR_ENDPOINT | FI_MR_PROV_KEY | FI_MR_LOCAL;
info->domain_attr->mr_mode = FI_MR_ALLOCATED | FI_MR_PROV_KEY |
FI_MR_LOCAL |
(FI_MR_ENDPOINT & info->domain_attr->mr_mode) |
(FI_MR_VIRT_ADDR & info->domain_attr->mr_mode);

fabric->mr_virt_addr = 0;
fabric->mr_virt_addr = info->domain_attr->mr_mode & FI_MR_VIRT_ADDR ? 1 : 0;

#ifdef SST_HAVE_CRAY_DRC
if (strstr(info->fabric_attr->prov_name, "gni") && fabric->auth_key)
Expand Down Expand Up @@ -2026,15 +2041,36 @@ static int RdmaGetPriority(CP_Services Svcs, void *CP_Stream,
FI_ASYNC_IOV | FI_RX_CQ_DATA;
hints->ep_attr->type = FI_EP_RDM;

// FI_ADDR_CXI is also available
// FI_ADDR_CXI_COMPAT is the one that MPICH chooses
// FI_ADDR_OPX is sneakily used to denote FI_ADDR_CXI_COMPAT
hints->addr_format = FI_ADDR_OPX;
char const *vni_env_str = getenv("SLINGSHOT_VNIS");

// The following differ from the SST configuration in ADIOS2
hints->domain_attr->mr_mode = FI_MR_ENDPOINT; //
hints->domain_attr->control_progress = FI_PROGRESS_MANUAL; //
hints->domain_attr->data_progress = FI_PROGRESS_MANUAL; //
uint32_t fi_version;
if (vni_env_str)
{
// try fishing for the CXI provider
Svcs->verbose(CP_Stream, DPSummaryVerbose,
"RDMA Dataplane trying to check for an available CXI "
"provider since environment variable SLINGSHOT_VNIS is "
"defined (value: '%s').\n",
vni_env_str);
fi_version = FI_VERSION(1, 11);

hints->domain_attr->mr_mode = FI_MR_ENDPOINT;
hints->domain_attr->control_progress = FI_PROGRESS_MANUAL;
hints->domain_attr->data_progress = FI_PROGRESS_MANUAL;
}
else
{
Svcs->verbose(CP_Stream, DPSummaryVerbose,
"RDMA Dataplane trying to check for an available non-CXI "
"provider since environment variable SLINGSHOT_VNIS is "
"not defined.\n");

fi_version = FI_VERSION(1, 5);

hints->domain_attr->mr_mode = FI_MR_BASIC;
hints->domain_attr->control_progress = FI_PROGRESS_AUTO;
hints->domain_attr->data_progress = FI_PROGRESS_AUTO;
}

if (Params->DataInterface)
{
Expand All @@ -2052,7 +2088,7 @@ static int RdmaGetPriority(CP_Services Svcs, void *CP_Stream,
}

pthread_mutex_lock(&fabric_mutex);
fi_getinfo(FI_VERSION(1, 11), NULL, NULL, 0, hints, &info);
fi_getinfo(fi_version, NULL, NULL, 0, hints, &info);
pthread_mutex_unlock(&fabric_mutex);
fi_freeinfo(hints);

Expand Down

0 comments on commit 3eec910

Please sign in to comment.