From eaf7877d4a83bdff6edebe12517f08efb262a085 Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Sun, 14 Jan 2024 17:50:22 +0200 Subject: [PATCH] UCT/IB/MLX5: Refactor KSM functions and extend logging --- src/uct/ib/mlx5/dv/ib_mlx5dv_md.c | 235 +++++++++++++++--------------- 1 file changed, 120 insertions(+), 115 deletions(-) diff --git a/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c b/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c index 2281eeec185..658039ca88a 100644 --- a/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c +++ b/src/uct/ib/mlx5/dv/ib_mlx5dv_md.c @@ -55,9 +55,9 @@ static ucs_status_t uct_ib_mlx5_alloc_mkey_inbox(int list_size, char **in_p) } static ucs_status_t -uct_ib_mlx5_devx_reg_ksm(uct_ib_mlx5_md_t *md, int atomic, uint64_t address, - size_t length, int list_size, size_t entity_size, - char *in, uint32_t mkey_index, const char *reason, +uct_ib_mlx5_devx_reg_ksm(uct_ib_mlx5_md_t *md, uint64_t address, size_t length, + int atomic, uint32_t mkey_index, const char *reason, + int list_size, size_t entity_size, char *in, struct mlx5dv_devx_obj **mr_p, uint32_t *mkey) { char out[UCT_IB_MLX5DV_ST_SZ_BYTES(create_mkey_out)] = {}; @@ -88,12 +88,10 @@ uct_ib_mlx5_devx_reg_ksm(uct_ib_mlx5_md_t *md, int atomic, uint64_t address, uct_ib_mlx5_calc_mkey_inlen(list_size), out, sizeof(out)); if (mr == NULL) { - if (reason != NULL) { - ucs_debug("mlx5dv_devx_obj_create(CREATE_MKEY, mode=KSM) failed, " - "syndrome 0x%x: %m", - UCT_IB_MLX5DV_GET(create_mkey_out, out, syndrome)); - } - + ucs_debug("%s: mlx5dv_devx_obj_create(CREATE_MKEY, mode=KSM, " + "start_addr=0x%lx length=%zu) failed, syndrome 0x%x: %m", + uct_ib_device_name(&md->super.dev), address, length, + UCT_IB_MLX5DV_GET(create_mkey_out, out, syndrome)); return UCS_ERR_UNSUPPORTED; } @@ -112,15 +110,39 @@ uct_ib_mlx5_devx_reg_ksm(uct_ib_mlx5_md_t *md, int atomic, uint64_t address, return UCS_OK; } -static void uct_ib_mlx5_devx_ksm_log(uct_ib_mlx5_md_t *md, int atomic, - void *address, size_t length, - uint64_t iova, ucs_status_t status, int mt) +static void uct_ib_mlx5_devx_ksm_log(uct_ib_mlx5_md_t *md, void *address, + size_t length, uint64_t iova, int atomic, + int mt, uint32_t mkey_index, + const char *reason, ucs_status_t status) { - ucs_debug("KSM %s-thread memory registration status \"%s\" " - "range %p..%p iova 0x%" PRIx64 "%s on %s", - mt ? "multi" : "single", ucs_status_string(status), address, + ucs_assert(reason != NULL); + ucs_debug("%s: KSM%s %s memory registration status \"%s\" " + "range %p..%p iova 0x%" PRIx64 "%s mkey_index 0x%x", + uct_ib_device_name(&md->super.dev), mt ? "-mt" : "", reason, + ucs_status_string(status), address, UCS_PTR_BYTE_OFFSET(address, length), iova, - atomic ? " atomic" : "", uct_ib_device_name(&md->super.dev)); + atomic ? " atomic" : "", mkey_index); +} + +static void uct_ib_mlx5_devx_ksm_list_log(uct_ib_mlx5_md_t *md, void *address, + size_t length, uint64_t iova, int mt, + const char *reason) +{ + ucs_trace("%s: init KSM%s list %s address %p length %zu iova 0x%" PRIx64, + uct_ib_device_name(&md->super.dev), mt ? "-mt" : "", reason, + address, length, iova); +} + +static void uct_ib_mlx5_devx_klm_entry_set(void **klm_p, size_t klm_idx, + void *address, struct ibv_mr *mr) +{ + void *klm = *klm_p; + + ucs_trace("klm[%ld] va %p mr [addr %p len %zu lkey 0x%x]", klm_idx, address, + mr->addr, mr->length, mr->lkey); + UCT_IB_MLX5DV_SET64(klm, klm, address, (uintptr_t)address); + UCT_IB_MLX5DV_SET(klm, klm, mkey, mr->lkey); + *klm_p = UCS_PTR_BYTE_OFFSET(klm, UCT_IB_MLX5DV_ST_SZ_BYTES(klm)); } /* @@ -128,86 +150,85 @@ static void uct_ib_mlx5_devx_ksm_log(uct_ib_mlx5_md_t *md, int atomic, * that were registered in multi-threaded mode. */ static ucs_status_t -uct_ib_mlx5_devx_reg_ksm_data_mt(uct_ib_mlx5_md_t *md, int atomic, - void *address, - uct_ib_mlx5_devx_ksm_data_t *ksm_data, - uint64_t iova, uint32_t mkey_index, +uct_ib_mlx5_devx_reg_ksm_data_mt(uct_ib_mlx5_md_t *md, void *address, + uint64_t iova, int atomic, uint32_t mkey_index, const char *reason, + uct_ib_mlx5_devx_ksm_data_t *ksm_data, struct mlx5dv_devx_obj **mr_p, uint32_t *mkey) { - struct ibv_mr *last_mr = ksm_data->mrs[ksm_data->mr_num - 1]; - uint64_t iova_offset = iova - (uint64_t)address; - void *mr_address = address; - size_t list_size = ksm_data->mr_num; + void *mr_address = address; + size_t list_size = ksm_data->mr_num; ucs_status_t status; + struct ibv_mr **mr; char *in; void *klm; - int i; - - /* Add offset to workaround CREATE_MKEY range check issue */ - if (iova_offset > 0) { - ++list_size; - } - status = uct_ib_mlx5_alloc_mkey_inbox(list_size, &in); + status = uct_ib_mlx5_alloc_mkey_inbox(list_size + 1, &in); if (status != UCS_OK) { goto out; } + uct_ib_mlx5_devx_ksm_list_log(md, address, ksm_data->length, iova, 1, + reason); + ucs_log_indent(+1); klm = UCT_IB_MLX5DV_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - for (i = 0; i < ksm_data->mr_num; i++) { - UCT_IB_MLX5DV_SET64(klm, klm, address, (uintptr_t)mr_address); - UCT_IB_MLX5DV_SET(klm, klm, mkey, ksm_data->mrs[i]->lkey); - klm = UCS_PTR_BYTE_OFFSET(klm, UCT_IB_MLX5DV_ST_SZ_BYTES(klm)); - mr_address = UCS_PTR_BYTE_OFFSET(mr_address, ksm_data->mrs[i]->length); + ucs_carray_for_each(mr, ksm_data->mrs, ksm_data->mr_num) { + uct_ib_mlx5_devx_klm_entry_set(&klm, mr - ksm_data->mrs, mr_address, + *mr); + mr_address = UCS_PTR_BYTE_OFFSET(mr_address, (*mr)->length); } - if (iova_offset > 0) { - UCT_IB_MLX5DV_SET64(klm, klm, address, (uintptr_t)mr_address); - UCT_IB_MLX5DV_SET(klm, klm, mkey, last_mr->lkey); + if ((void*)iova != address) { + /* Add offset to workaround CREATE_MKEY range check issue */ + uct_ib_mlx5_devx_klm_entry_set(&klm, list_size, mr_address, + ksm_data->mrs[ksm_data->mr_num - 1]); + ++list_size; } + ucs_log_indent(-1); - status = uct_ib_mlx5_devx_reg_ksm(md, atomic, iova, ksm_data->length, - list_size, ksm_data->mrs[0]->length, in, - mkey_index, reason, mr_p, mkey); + status = uct_ib_mlx5_devx_reg_ksm(md, iova, ksm_data->length, atomic, + mkey_index, reason, list_size, + ksm_data->mrs[0]->length, in, mr_p, mkey); ucs_free(in); - uct_ib_mlx5_devx_ksm_log(md, atomic, address, ksm_data->length, iova, - status, 1); + uct_ib_mlx5_devx_ksm_log(md, address, ksm_data->length, iova, atomic, 1, + mkey_index, reason, status); out: return status; } -static ucs_status_t -uct_ib_mlx5_devx_reg_ksm_data_addr(uct_ib_mlx5_md_t *md, struct ibv_mr *mr, - uint64_t address, size_t length, - uint64_t iova, int atomic, int list_size, - uint32_t mkey_index, const char *reason, - struct mlx5dv_devx_obj **mr_p, - uint32_t *mkey) +static ucs_status_t uct_ib_mlx5_devx_reg_ksm_data_addr( + uct_ib_mlx5_md_t *md, void *address, size_t length, uint64_t iova, + int atomic, uint32_t mkey_index, const char *reason, struct ibv_mr *mr, + int list_size, struct mlx5dv_devx_obj **mr_p, uint32_t *mkey) { - int i; - char *in; - void *klm; ucs_status_t status; + void *klm; + char *in; + int i; status = uct_ib_mlx5_alloc_mkey_inbox(list_size, &in); if (status != UCS_OK) { return status; } + uct_ib_mlx5_devx_ksm_list_log(md, address, length, iova, 0, reason); + ucs_log_indent(+1); klm = UCT_IB_MLX5DV_ADDR_OF(create_mkey_in, in, klm_pas_mtt); for (i = 0; i < list_size; i++) { - UCT_IB_MLX5DV_SET(klm, klm, mkey, mr->lkey); - UCT_IB_MLX5DV_SET64(klm, klm, address, - address + (i * UCT_IB_MD_MAX_MR_SIZE)); - klm = UCS_PTR_BYTE_OFFSET(klm, UCT_IB_MLX5DV_ST_SZ_BYTES(klm)); + uct_ib_mlx5_devx_klm_entry_set( + &klm, i, + UCS_PTR_BYTE_OFFSET(address, i * UCT_IB_MD_MAX_MR_SIZE), mr); } + ucs_log_indent(-1); - status = uct_ib_mlx5_devx_reg_ksm(md, atomic, iova, length, list_size, - UCT_IB_MD_MAX_MR_SIZE, in, mkey_index, - reason, mr_p, mkey); + status = uct_ib_mlx5_devx_reg_ksm(md, iova, length, atomic, mkey_index, + reason, list_size, UCT_IB_MD_MAX_MR_SIZE, + in, mr_p, mkey); ucs_free(in); + + uct_ib_mlx5_devx_ksm_log(md, address, length, iova, atomic, 0, mkey_index, + reason, status); return status; } @@ -216,12 +237,11 @@ uct_ib_mlx5_devx_reg_ksm_data_addr(uct_ib_mlx5_md_t *md, struct ibv_mr *mr, * that were registered in single-threaded mode. */ static ucs_status_t uct_ib_mlx5_devx_reg_ksm_data_contig( - uct_ib_mlx5_md_t *md, uct_ib_mlx5_devx_mr_t *mr, void *address, - uint64_t iova, int atomic, uint32_t mkey_index, const char *reason, + uct_ib_mlx5_md_t *md, void *address, uint32_t iova_offset, int atomic, + uint32_t mkey_index, const char *reason, uct_ib_mlx5_devx_mr_t *mr, struct mlx5dv_devx_obj **mr_p, uint32_t *mkey) { size_t mr_length = mr->super.ib->length; - ucs_status_t status; uint64_t ksm_address; uint64_t ksm_iova; size_t ksm_length; @@ -230,7 +250,7 @@ static ucs_status_t uct_ib_mlx5_devx_reg_ksm_data_contig( /* FW requires indirect atomic MR address and length to be aligned * to max supported atomic argument size */ ksm_address = ucs_align_down_pow2((uint64_t)address, UCT_IB_MD_MAX_MR_SIZE); - ksm_iova = iova + ksm_address - (uint64_t)address; + ksm_iova = ksm_address + iova_offset; ksm_length = mr_length + (uint64_t)address - ksm_address; ksm_length = ucs_align_up(ksm_length, md->super.dev.atomic_align); @@ -238,13 +258,10 @@ static ucs_status_t uct_ib_mlx5_devx_reg_ksm_data_contig( list_size = ucs_div_round_up(ksm_length + ucs_get_page_size(), UCT_IB_MD_MAX_MR_SIZE); - status = uct_ib_mlx5_devx_reg_ksm_data_addr(md, mr->super.ib, ksm_address, - ksm_length, ksm_iova, atomic, - list_size, mkey_index, reason, - mr_p, mkey); - - uct_ib_mlx5_devx_ksm_log(md, atomic, address, mr_length, ksm_iova, status, 0); - return status; + return uct_ib_mlx5_devx_reg_ksm_data_addr(md, (void*)ksm_address, + ksm_length, ksm_iova, atomic, + mkey_index, reason, mr->super.ib, + list_size, mr_p, mkey); } static void * @@ -415,21 +432,17 @@ uct_ib_mlx5_devx_reg_ksm_data(uct_ib_mlx5_md_t *md, { uct_ib_mlx5_devx_mr_t *mr = &memh->mrs[mr_type]; void *address = uct_ib_mlx5_devx_memh_base_address(memh); - uint64_t iova = (uint64_t)address + iova_offset; - ucs_status_t status; if (memh->super.flags & UCT_IB_MEM_MULTITHREADED) { - status = uct_ib_mlx5_devx_reg_ksm_data_mt(md, atomic, address, - mr->ksm_data, iova, - mkey_index, reason, mr_p, - mkey); + return uct_ib_mlx5_devx_reg_ksm_data_mt(md, address, + (uint64_t)address + iova_offset, + atomic, mkey_index, reason, + mr->ksm_data, mr_p, mkey); } else { - status = uct_ib_mlx5_devx_reg_ksm_data_contig(md, mr, address, iova, - atomic, mkey_index, - reason, mr_p, mkey); + return uct_ib_mlx5_devx_reg_ksm_data_contig(md, address, iova_offset, + atomic, mkey_index, reason, + mr, mr_p, mkey); } - - return status; } UCS_PROFILE_FUNC_ALWAYS(ucs_status_t, uct_ib_mlx5_devx_reg_indirect_key, @@ -443,7 +456,7 @@ UCS_PROFILE_FUNC_ALWAYS(ucs_status_t, uct_ib_mlx5_devx_reg_indirect_key, do { status = uct_ib_mlx5_devx_reg_ksm_data(md, memh, UCT_IB_MR_DEFAULT, 0, - 0, 0, "indirect key", + 0, 0, "indirect-key", &memh->indirect_dvmr, &memh->indirect_rkey); if (status != UCS_OK) { @@ -504,7 +517,7 @@ UCS_PROFILE_FUNC_ALWAYS(ucs_status_t, uct_ib_mlx5_devx_reg_atomic_key, is_atomic = memh->super.flags & UCT_IB_MEM_ACCESS_REMOTE_ATOMIC; return uct_ib_mlx5_devx_reg_ksm_data(md, memh, mr_type, atomic_offset, - is_atomic, mkey_index, "atomic key", + is_atomic, mkey_index, "atomic-key", &memh->atomic_dvmr, &memh->atomic_rkey); } @@ -552,10 +565,10 @@ uct_ib_mlx5_devx_reg_mt(uct_ib_mlx5_md_t *md, void *address, size_t length, goto err_free; } - status = uct_ib_mlx5_devx_reg_ksm_data_mt(md, is_atomic, address, ksm_data, - (uint64_t)address, 0, - "multi-thread key", - &ksm_data->dvmr, mkey_p); + status = uct_ib_mlx5_devx_reg_ksm_data_mt(md, address, (uint64_t)address, + is_atomic, 0, "multi-thread-key", + ksm_data, &ksm_data->dvmr, + mkey_p); if (status != UCS_OK) { goto err_dereg; } @@ -619,17 +632,12 @@ static void uct_ib_mlx5_devx_reg_symmetric(uct_ib_mlx5_md_t *md, /* Best effort, only allocate in the range below the atomic keys. */ while (md->smkey_index < md->super.mkey_by_name_reserve.size) { status = uct_ib_mlx5_devx_reg_ksm_data_contig( - md, &memh->mrs[UCT_IB_MR_DEFAULT], address, (uint64_t)address, + md, address, 0, (memh->super.flags & UCT_IB_MEM_ACCESS_REMOTE_ATOMIC), - md->super.mkey_by_name_reserve.base + md->smkey_index, NULL, - &smkey_mr, &symmetric_rkey); + md->super.mkey_by_name_reserve.base + md->smkey_index, + "symmetric-key", &memh->mrs[UCT_IB_MR_DEFAULT], &smkey_mr, + &symmetric_rkey); if (status == UCS_OK) { - ucs_trace("%s: symmetric rkey created for addr=%p " - "mkey_index=0x%x smkey_mr=%p rkey=0x%x", - uct_ib_device_name(&md->super.dev), address, - md->super.mkey_by_name_reserve.base + start, smkey_mr, - symmetric_rkey); - memh->smkey_mr = smkey_mr; memh->super.rkey = symmetric_rkey; md->smkey_index++; @@ -643,9 +651,10 @@ static void uct_ib_mlx5_devx_reg_symmetric(uct_ib_mlx5_md_t *md, md->super.config.smkey_block_size); } - ucs_debug("%s: symmetric rkey create failed for addr=%p mkey_index=0x%x", - uct_ib_device_name(&md->super.dev), address, - md->super.mkey_by_name_reserve.base + start); + ucs_debug("%s: failed to allocate symmetric key start index 0x%x size %u", + uct_ib_device_name(&md->super.dev), + md->super.mkey_by_name_reserve.base + start, + md->super.mkey_by_name_reserve.size); } static UCS_F_ALWAYS_INLINE int @@ -1171,20 +1180,16 @@ static void uct_ib_mlx5_devx_init_flush_mr(uct_ib_mlx5_md_t *md) goto err; } - status = uct_ib_mlx5_devx_reg_ksm_data_addr(md, md->flush_mr, - (uintptr_t)md->zero_buf, + status = uct_ib_mlx5_devx_reg_ksm_data_addr(md, md->zero_buf, UCT_IB_MD_FLUSH_REMOTE_LENGTH, - 0, 0, 1, 0, "flush mr", + 0, 0, 0, "flush-mr", + md->flush_mr, 1, &md->flush_dvmr, &md->super.flush_rkey); if (status != UCS_OK) { - ucs_error("failed to create flush_remote rkey: %s", - ucs_status_string(status)); goto err_dereg_mr; } - ucs_debug("created indirect rkey 0x%x for remote flush", - md->super.flush_rkey); ucs_assert((md->super.flush_rkey & 0xff) == 0); return; @@ -1369,8 +1374,9 @@ uct_ib_mlx5_devx_device_mem_alloc(uct_md_h uct_md, size_t *length_p, /* Mapping address to the dm allocation (which is zero based) using ksm, enabling put / get operations using that address range */ status = uct_ib_mlx5_devx_reg_ksm_data_addr( - devx_md, memh->mrs[UCT_IB_MR_DEFAULT].super.ib, 0, dm_attr.length, - (uintptr_t)address, 0, 1, 0, alloc_name, &memh->dm_addr_dvmr, &mkey); + devx_md, address, dm_attr.length, 0, 0, 0, "dm", + memh->mrs[UCT_IB_MR_DEFAULT].super.ib, 1, &memh->dm_addr_dvmr, + &mkey); if (status != UCS_OK) { goto err_dereg_dm; } @@ -2069,12 +2075,11 @@ UCS_PROFILE_FUNC_ALWAYS(ucs_status_t, uct_ib_mlx5_devx_reg_exported_key, } ucs_assert(!(memh->super.flags & UCT_IB_MEM_MULTITHREADED)); - status = uct_ib_mlx5_devx_reg_ksm_data_contig(md, - &memh->mrs[UCT_IB_MR_DEFAULT], - memh->address, + status = uct_ib_mlx5_devx_reg_ksm_data_contig(md, memh->address, (uint64_t)memh->address, 0, 0, - "exported key", &cross_mr, - &exported_lkey); + "exported-key", + &memh->mrs[UCT_IB_MR_DEFAULT], + &cross_mr, &exported_lkey); if (status != UCS_OK) { return status; }