diff --git a/source/adios2/toolkit/sst/dp/rdma_dp.c b/source/adios2/toolkit/sst/dp/rdma_dp.c index 12d8f7c27a..5d7b542ecb 100644 --- a/source/adios2/toolkit/sst/dp/rdma_dp.c +++ b/source/adios2/toolkit/sst/dp/rdma_dp.c @@ -58,12 +58,17 @@ pthread_mutex_t wsr_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t ts_mutex = PTHREAD_MUTEX_INITIALIZER; int sst_fi_mr_reg(struct fid_domain *domain, const void *buf, size_t len, - uint64_t access, uint64_t offset, uint64_t requested_key, + uint64_t acs, uint64_t offset, uint64_t requested_key, uint64_t flags, struct fid_mr **mr, void *context, - struct fid_ep *signal) + struct fid_ep *endpoint, int mr_mode) { - int res = fi_mr_reg(domain, buf, len, access, offset, requested_key, flags, - mr, context); + int res = fi_mr_reg(domain, buf, len, acs, offset, requested_key, flags, mr, + context); + bool is_mr_endpoint = (mr_mode & FI_MR_ENDPOINT) != 0; + if (!is_mr_endpoint) + { + return res; + } if (res != FI_SUCCESS) { printf("fi_mr_reg failed with %ul (%s)\n", res, fi_strerror(res)); @@ -71,10 +76,10 @@ int sst_fi_mr_reg(struct fid_domain *domain, const void *buf, size_t len, } /* - * When the domain_attr->mr_mode includes FI_MR_ENDPOINT, the memory region + * When the domain_attr->mr_mode includes FI_MR_ENDPOINT, the memory region * needs to be bound to the endpoint and explicitly enabled after that. */ - res = fi_mr_bind(*mr, &signal->fid, 0); + res = fi_mr_bind(*mr, &endpoint->fid, 0); if (res != FI_SUCCESS) { printf("fi_mr_bind failed with %ul (%s)\n", res, fi_strerror(res)); @@ -106,6 +111,7 @@ struct fabric_state struct fi_info *info; // struct fi_info *linfo; int local_mr_req; + int mr_virt_addr; int rx_cq_data; size_t addr_len; size_t msg_prefix_size; @@ -336,13 +342,16 @@ static void init_fabric(struct fabric_state *fabric, struct _SstParams *Params, /* * FI_MR_ALLOCATED and FI_MR_ENDPOINT are required for the CXI provider. - * FI_MR_VIRT_ADDR, FI_MR_LOCAL and FI_MR_PROV_KEY are for compatibility + * FI_MR_LOCAL and FI_MR_PROV_KEY are for compatibility * with the rest of the legacy SST-libfabric implementation (where mr_mode * used to be FI_MR_BASIC which is equivalent to FI_MR_VIRT_ADDR | * FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_LOCAL) */ - info->domain_attr->mr_mode = FI_MR_ALLOCATED | FI_MR_ENDPOINT | - FI_MR_VIRT_ADDR | FI_MR_PROV_KEY | FI_MR_LOCAL; + info->domain_attr->mr_mode = + FI_MR_ALLOCATED | FI_MR_ENDPOINT | FI_MR_PROV_KEY | FI_MR_LOCAL; + + fabric->mr_virt_addr = 0; + #ifdef SST_HAVE_CRAY_DRC if (strstr(info->fabric_attr->prov_name, "gni") && fabric->auth_key) { @@ -1183,7 +1192,7 @@ static DP_WSR_Stream RdmaInitWriterPerReader(CP_Services Svcs, sst_fi_mr_reg(Fabric->domain, ReaderRollHandle->Block, readerCohortSize * sizeof(struct _RdmaBuffer), FI_REMOTE_WRITE, 0, 0, 0, - &WSR_Stream->rrmr, Fabric->ctx, Fabric->signal); + &WSR_Stream->rrmr, Fabric->ctx, Fabric->signal, Fabric->info->domain_attr->mr_mode); ReaderRollHandle->Key = fi_mr_key(WSR_Stream->rrmr); WSR_Stream->WriterContactInfo = ContactInfo; @@ -1336,11 +1345,19 @@ static ssize_t PostRead(CP_Services Svcs, Rdma_RS_Stream RS_Stream, int Rank, // register dest buffer sst_fi_mr_reg(Fabric->domain, Buffer, Length, FI_READ, 0, 0, 0, &ret->LocalMR, Fabric->ctx, - Fabric->signal); + Fabric->signal, Fabric->info->domain_attr->mr_mode); LocalDesc = fi_mr_desc(ret->LocalMR); } - Addr = Info->Block + Offset; + if (Fabric->mr_virt_addr) + { + Addr = Info->Block + Offset; + } + else + { + Addr = NULL; + Addr += Offset; + } Svcs->verbose( RS_Stream->CP_Stream, DPTraceVerbose, @@ -1687,7 +1704,7 @@ static void RdmaProvideTimestep(CP_Services Svcs, DP_WS_Stream Stream_v, sst_fi_mr_reg(Fabric->domain, Data->block, Data->DataSize, FI_WRITE | FI_REMOTE_READ, 0, 0, 0, - &Entry->mr, Fabric->ctx, Fabric->signal); + &Entry->mr, Fabric->ctx, Fabric->signal, Fabric->info->domain_attr->mr_mode); Entry->Key = fi_mr_key(Entry->mr); if (Fabric->local_mr_req) { @@ -2201,16 +2218,16 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep) sst_fi_mr_reg(Fabric->domain, PreloadBuffer->Handle.Block, PreloadBuffer->BufferLen, FI_REMOTE_WRITE, 0, 0, 0, &Stream->pbmr, Fabric->ctx, - Fabric->signal); + Fabric->signal, Fabric->info->domain_attr->mr_mode); PreloadKey = fi_mr_key(Stream->pbmr); SBSize = sizeof(*SendBuffer) * StepLog->WRanks; SendBuffer = malloc(SBSize); if (Fabric->local_mr_req) { - sst_fi_mr_reg(Fabric->domain, SendBuffer, SBSize, FI_WRITE, 0, - 0, 0, &sbmr, Fabric->ctx, - Fabric->signal); + sst_fi_mr_reg(Fabric->domain, SendBuffer, SBSize, FI_WRITE, 0, 0, 0, + &sbmr, Fabric->ctx, Fabric->signal, + Fabric->info->domain_attr->mr_mode); sbdesc = fi_mr_desc(sbmr); } @@ -2219,8 +2236,8 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep) RBLen = 2 * StepLog->Entries * DP_DATA_RECV_SIZE; Stream->RecvDataBuffer = malloc(RBLen); sst_fi_mr_reg(Fabric->domain, Stream->RecvDataBuffer, RBLen, FI_RECV, 0, - 0, 0, &Stream->rbmr, Fabric->ctx, - Fabric->signal); + 0, 0, &Stream->rbmr, Fabric->ctx, Fabric->signal, + Fabric->info->domain_attr->mr_mode); Stream->rbdesc = fi_mr_desc(Stream->rbmr); RecvBuffer = (uint8_t *)Stream->RecvDataBuffer; for (i = 0; i < 2 * StepLog->Entries; i++) @@ -2247,7 +2264,7 @@ static void PostPreload(CP_Services Svcs, Rdma_RS_Stream Stream, long Timestep) (sizeof(struct _RdmaBuffer) * RankLog->Entries) + sizeof(uint64_t), FI_REMOTE_READ, 0, 0, 0, - &RankLog->preqbmr, Fabric->ctx, Fabric->signal); + &RankLog->preqbmr, Fabric->ctx, Fabric->signal, Fabric->info->domain_attr->mr_mode); for (j = 0; j < RankLog->Entries; j++) { ReqLog = &RankLog->ReqLog[j]; @@ -2408,7 +2425,7 @@ static void PullSelection(CP_Services Svcs, Rdma_WSR_Stream Stream) { sst_fi_mr_reg(Fabric->domain, ReqBuffer.Handle.Block, ReqBuffer.BufferLen, FI_READ, 0, 0, 0, - &rrmr, Fabric->ctx, Fabric->signal); + &rrmr, Fabric->ctx, Fabric->signal, Fabric->info->domain_attr->mr_mode); rrdesc = fi_mr_desc(rrmr); }