From 3bf2296621019286e1bf2576dfa737c34fe942d3 Mon Sep 17 00:00:00 2001 From: Alexander Oganezov Date: Tue, 11 Jun 2024 10:11:31 -0700 Subject: [PATCH] DAOS-15961 cart: Reorganize how envs are handled (#14504) Change to how cart deals with envariables: - env list is now controlled by CRT_ENV_LIST macro in crt_types_internal.h - all envs stored in structure generated from CRT_ENV_LST (crrt_genv) now, ENV is read out at crt_env_init() time and env strings deallocated at crt_env_fini(). - accsor functions/macros crt_env_init/fini/get/dump are provided. - string-type envs no longer need to be freed after retrieval. - Change cart utility function to populate crt_init_options_t instead of setting and querying env. With this change, any env to be used will need to appear on the list first, ensuring it gets dumped as well as ensuring proper name usage later. Env name typos when using crt_env_get() will now result in compile time errors Required-githooks: true Signed-off-by: Alexander A Oganezov --- src/cart/crt_init.c | 194 +++++++++----------------------- src/cart/crt_internal_types.h | 155 +++++++++++++++++++++++++ src/cart/utils/crt_utils.c | 156 +++++++++---------------- src/cart/utils/crt_utils.h | 4 +- src/utils/self_test/self_test.c | 14 ++- 5 files changed, 274 insertions(+), 249 deletions(-) diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index f428ae711a4..658bc34293d 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -13,53 +13,11 @@ #include "crt_internal.h" struct crt_gdata crt_gdata; +struct crt_envs crt_genvs; static volatile int gdata_init_flag; struct crt_plugin_gdata crt_plugin_gdata; static bool g_prov_settings_applied[CRT_PROV_COUNT]; -/* List of the environment variables used in CaRT */ -static const char *crt_env_names[] = { - "D_PROVIDER", - "D_INTERFACE", - "D_DOMAIN", - "D_PORT", - "CRT_PHY_ADDR_STR", - "D_LOG_STDERR_IN_LOG", - "D_LOG_SIZE", - "D_LOG_FILE", - "D_LOG_FILE_APPEND_PID", - "D_LOG_MASK", - "DD_MASK", - "DD_STDERR", - "DD_SUBSYS", - "CRT_TIMEOUT", - "CRT_ATTACH_INFO_PATH", - "OFI_PORT", - "OFI_INTERFACE", - "OFI_DOMAIN", - "CRT_CREDIT_EP_CTX", - "CRT_CTX_SHARE_ADDR", - "CRT_CTX_NUM", - "D_FI_CONFIG", - "FI_UNIVERSE_SIZE", - "CRT_ENABLE_MEM_PIN", - "FI_OFI_RXM_USE_SRX", - "D_LOG_FLUSH", - "CRT_MRC_ENABLE", - "CRT_SECONDARY_PROVIDER", - "D_PROVIDER_AUTH_KEY", - "D_PORT_AUTO_ADJUST", - "D_POLL_TIMEOUT", - "D_LOG_FILE_APPEND_RANK", - "D_QUOTA_RPCS", - "D_POST_INIT", - "D_POST_INCR", - "DAOS_SIGNAL_REGISTER", - "D_CLIENT_METRICS_ENABLE", - "D_CLIENT_METRICS_RETAIN", - "D_CLIENT_METRICS_DUMP_DIR", -}; - static void crt_lib_init(void) __attribute__((__constructor__)); @@ -95,6 +53,9 @@ crt_lib_init(void) crt_gdata.cg_rpcid = start_rpcid; crt_gdata.cg_num_cores = sysconf(_SC_NPROCESSORS_ONLN); crt_gdata.cg_iv_inline_limit = 19456; /* 19KB */ + + /* envs not inited until crt_init() time */ + memset(&crt_genvs, 0x0, sizeof(crt_genvs)); } /* Library deinit */ @@ -104,26 +65,6 @@ crt_lib_fini(void) D_RWLOCK_DESTROY(&crt_gdata.cg_rwlock); } -static void -dump_envariables(void) -{ - int i; - - D_INFO("-- ENVARS: --\n"); - for (i = 0; i < ARRAY_SIZE(crt_env_names); i++) { - char *val = NULL; - - d_agetenv_str(&val, crt_env_names[i]); - if (val == NULL) - continue; - if (strcmp(crt_env_names[i], "D_PROVIDER_AUTH_KEY") == 0) - D_INFO("%s = %s\n", crt_env_names[i], "********"); - else - D_INFO("%s = %s\n", crt_env_names[i], val); - d_freeenv_str(&val); - } -} - static void dump_opt(crt_init_options_t *opt) { @@ -134,6 +75,18 @@ dump_opt(crt_init_options_t *opt) D_INFO("provider = %s\n", opt->cio_provider); D_INFO("interface = %s\n", opt->cio_interface); D_INFO("domain = %s\n", opt->cio_domain); + D_INFO("port = %s\n", opt->cio_port); + D_INFO("Flags: fi: %d, use_credits: %d, use_esnsors: %d\n", opt->cio_fault_inject, + opt->cio_use_credits, opt->cio_use_sensors); + + if (opt->cio_use_expected_size) + D_INFO("max_expected_size = %d\n", opt->cio_max_expected_size); + if (opt->cio_use_unexpected_size) + D_INFO("max_unexpect_size = %d\n", opt->cio_max_unexpected_size); + + /* Handle similar to D_PROVIDER_AUTH_KEY */ + if (opt->cio_auth_key) + D_INFO("auth_key is set\n"); } static int @@ -207,7 +160,7 @@ prov_data_init(struct crt_prov_gdata *prov_data, crt_provider_t provider, max_num_ctx = CRT_SRV_CONTEXT_NUM; } else { /* Only limit the number of contexts for clients */ - d_getenv_uint("CRT_CTX_NUM", &ctx_num); + crt_env_get(CRT_CTX_NUM, &ctx_num); /* Default setting to the number of cores */ if (opt) @@ -271,28 +224,25 @@ static int data_init(int server, crt_init_options_t *opt) uint32_t credits; uint32_t fi_univ_size = 0; uint32_t mem_pin_enable = 0; - uint32_t is_secondary; - char ucx_ib_fork_init = 0; + uint32_t is_secondary; uint32_t post_init = CRT_HG_POST_INIT, post_incr = CRT_HG_POST_INCR; int rc = 0; D_DEBUG(DB_ALL, "initializing crt_gdata...\n"); - - dump_envariables(); - + crt_env_dump(); D_DEBUG(DB_ALL, "Starting RPCID %#lx. Num cores: %ld\n", crt_gdata.cg_rpcid, crt_gdata.cg_num_cores); /* Set context post init / post incr to tune number of pre-posted recvs */ - d_getenv_uint32_t("D_POST_INIT", &post_init); + crt_env_get(D_POST_INIT, &post_init); crt_gdata.cg_post_init = post_init; - d_getenv_uint32_t("D_POST_INCR", &post_incr); + crt_env_get(D_POST_INCR, &post_incr); crt_gdata.cg_post_incr = post_incr; is_secondary = 0; /* Apply CART-890 workaround for server side only */ if (server) { - d_getenv_uint("CRT_ENABLE_MEM_PIN", &mem_pin_enable); + crt_env_get(CRT_ENABLE_MEM_PIN, &mem_pin_enable); if (mem_pin_enable == 1) mem_pin_workaround(); } else { @@ -300,14 +250,14 @@ static int data_init(int server, crt_init_options_t *opt) * Client-side envariable to indicate that the cluster * is running using a secondary provider */ - d_getenv_uint("CRT_SECONDARY_PROVIDER", &is_secondary); + crt_env_get(CRT_SECONDARY_PROVIDER, &is_secondary); } crt_gdata.cg_provider_is_primary = (is_secondary) ? 0 : 1; if (opt && opt->cio_crt_timeout != 0) timeout = opt->cio_crt_timeout; else - d_getenv_uint("CRT_TIMEOUT", &timeout); + crt_env_get(CRT_TIMEOUT, &timeout); if (timeout == 0 || timeout > 3600) crt_gdata.cg_timeout = CRT_DEFAULT_TIMEOUT_S; @@ -326,36 +276,26 @@ static int data_init(int server, crt_init_options_t *opt) credits = opt->cio_ep_credits; } else { credits = CRT_DEFAULT_CREDITS_PER_EP_CTX; - d_getenv_uint("CRT_CREDIT_EP_CTX", &credits); + crt_env_get(CRT_CREDIT_EP_CTX, &credits); } /* Enable quotas by default only on clients */ crt_gdata.cg_rpc_quota = server ? 0 : CRT_QUOTA_RPCS_DEFAULT; - - d_getenv_uint("D_QUOTA_RPCS", &crt_gdata.cg_rpc_quota); + crt_env_get(D_QUOTA_RPCS, &crt_gdata.cg_rpc_quota); /* Must be set on the server when using UCX, will not affect OFI */ - d_getenv_char("UCX_IB_FORK_INIT", &ucx_ib_fork_init); - if (ucx_ib_fork_init) { - if (server) { - D_INFO("UCX_IB_FORK_INIT was set to %c, setting to n\n", ucx_ib_fork_init); - } else { - D_INFO("UCX_IB_FORK_INIT was set to %c on client\n", ucx_ib_fork_init); - } - } if (server) d_setenv("UCX_IB_FORK_INIT", "n", 1); /* This is a workaround for CART-871 if universe size is not set */ - d_getenv_uint("FI_UNIVERSE_SIZE", &fi_univ_size); + crt_env_get(FI_UNIVERSE_SIZE, &fi_univ_size); if (fi_univ_size == 0) { D_INFO("FI_UNIVERSE_SIZE was not set; setting to 2048\n"); d_setenv("FI_UNIVERSE_SIZE", "2048", 1); } if (credits == 0) { - D_DEBUG(DB_ALL, "CRT_CREDIT_EP_CTX set as 0, flow control " - "disabled.\n"); + D_DEBUG(DB_ALL, "CRT_CREDIT_EP_CTX set as 0, flow control disabled.\n"); } else if (credits > CRT_MAX_CREDITS_PER_EP_CTX) { D_DEBUG(DB_ALL, "ENV CRT_CREDIT_EP_CTX's value %d exceed max " "allowed value, use %d for flow control.\n", @@ -380,15 +320,13 @@ static int data_init(int server, crt_init_options_t *opt) "total number of URI requests for self", "", "net/uri/lookup_self"); if (ret) - D_WARN("Failed to create uri self sensor: "DF_RC"\n", - DP_RC(ret)); + D_WARN("Failed to create uri self sensor: " DF_RC "\n", DP_RC(ret)); ret = d_tm_add_metric(&crt_gdata.cg_uri_other, D_TM_COUNTER, "total number of URI requests for other " "ranks", "", "net/uri/lookup_other"); if (ret) - D_WARN("Failed to create uri other sensor: "DF_RC"\n", - DP_RC(ret)); + D_WARN("Failed to create uri other sensor: " DF_RC "\n", DP_RC(ret)); } gdata_init_flag = 1; @@ -571,7 +509,7 @@ prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt) if (prov == CRT_PROV_OFI_CXI) mrc_enable = 1; - d_getenv_uint("CRT_MRC_ENABLE", &mrc_enable); + crt_env_get(CRT_MRC_ENABLE, &mrc_enable); if (mrc_enable == 0) { D_INFO("Disabling MR CACHE (FI_MR_CACHE_MAX_COUNT=0)\n"); d_setenv("FI_MR_CACHE_MAX_COUNT", "0", 1); @@ -607,23 +545,23 @@ warn_deprecated(const char *old_env, const char *new_env) int crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) { - char *provider; - char *provider_env = NULL; - char *interface; - char *interface_env = NULL; - char *domain; - char *domain_env = NULL; - char *auth_key; - char *auth_key_env = NULL; - char *path; bool server = flags & CRT_FLAG_BIT_SERVER; int rc = 0; - char *provider_str0 = NULL; - char *provider_str1 = NULL; crt_provider_t primary_provider; crt_provider_t secondary_provider; crt_provider_t tmp_prov; - char *port; + char *provider = NULL; + char *provider_env = NULL; + char *interface = NULL; + char *interface_env = NULL; + char *domain = NULL; + char *domain_env = NULL; + char *auth_key = NULL; + char *auth_key_env = NULL; + char *path = NULL; + char *provider_str0 = NULL; + char *provider_str1 = NULL; + char *port = NULL; char *port_env = NULL; char *port0 = NULL; char *port1 = NULL; @@ -648,7 +586,8 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) crt_setup_log_fac(); - D_INFO("libcart version %s initializing\n", CART_VERSION); + D_INFO("libcart (%s) v%s initializing\n", server ? "server" : "client", CART_VERSION); + crt_env_init(); if (opt) dump_opt(opt); @@ -680,7 +619,7 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) crt_gdata.cg_auto_swim_disable = (flags & CRT_FLAG_BIT_AUTO_SWIM_DISABLE) ? 1 : 0; - d_agetenv_str(&path, "CRT_ATTACH_INFO_PATH"); + crt_env_get(CRT_ATTACH_INFO_PATH, &path); if (path != NULL && strlen(path) > 0) { rc = crt_group_config_path_set(path); if (rc != 0) @@ -690,65 +629,43 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) else D_DEBUG(DB_ALL, "set group_config_path as %s.\n", path); } - d_freeenv_str(&path); if (opt && opt->cio_auth_key) auth_key = opt->cio_auth_key; else { - d_agetenv_str(&auth_key_env, "D_PROVIDER_AUTH_KEY"); + crt_env_get(D_PROVIDER_AUTH_KEY, &auth_key_env); auth_key = auth_key_env; } if (opt && opt->cio_provider) provider = opt->cio_provider; else { - d_agetenv_str(&provider_env, "D_PROVIDER"); - if (provider_env == NULL) { - d_agetenv_str(&provider_env, CRT_PHY_ADDR_ENV); - if (provider_env != NULL) - warn_deprecated(CRT_PHY_ADDR_ENV, "D_PROVIDER"); - } + crt_env_get(D_PROVIDER, &provider_env); provider = provider_env; } if (opt && opt->cio_interface) interface = opt->cio_interface; else { - d_agetenv_str(&interface_env, "D_INTERFACE"); - if (interface_env == NULL) { - d_agetenv_str(&interface_env, "OFI_INTERFACE"); - if (interface_env != NULL) - warn_deprecated("OFI_INTERFACE", "D_INTERFACE"); - } + crt_env_get(D_INTERFACE, &interface_env); interface = interface_env; } if (opt && opt->cio_domain) domain = opt->cio_domain; else { - d_agetenv_str(&domain_env, "D_DOMAIN"); - if (domain_env == NULL) { - d_agetenv_str(&domain_env, "OFI_DOMAIN"); - if (domain_env != NULL) - warn_deprecated("OFI_DOMAIN", "D_DOMAIN"); - } + crt_env_get(D_DOMAIN, &domain_env); domain = domain_env; } if (opt && opt->cio_port) port = opt->cio_port; else { - d_agetenv_str(&port_env, "D_PORT"); - if (port_env == NULL) { - d_agetenv_str(&port_env, "OFI_PORT"); - if (port_env != NULL) - warn_deprecated("OFI_PORT", "D_PORT"); - } + crt_env_get(D_PORT, &port_env); port = port_env; } - d_getenv_bool("D_PORT_AUTO_ADJUST", &port_auto_adjust); - + crt_env_get(D_PORT_AUTO_ADJUST, &port_auto_adjust); rc = __split_arg(provider, ",", &provider_str0, &provider_str1); if (rc != 0) D_GOTO(unlock, rc); @@ -964,11 +881,6 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) D_FREE(domain0); D_FREE(provider_str0); D_FREE(auth_key0); - d_freeenv_str(&port_env); - d_freeenv_str(&domain_env); - d_freeenv_str(&interface_env); - d_freeenv_str(&provider_env); - d_freeenv_str(&auth_key_env); if (rc != 0) { D_ERROR("failed, "DF_RC"\n", DP_RC(rc)); @@ -1070,6 +982,8 @@ crt_finalize(void) else D_ERROR("failed, rc: "DF_RC"\n", DP_RC(rc)); + crt_env_fini(); + return rc; } diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 4aefdeebc72..24291f62e63 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -166,6 +166,161 @@ struct crt_event_cb_priv { #define CRT_CALLBACKS_NUM (4) /* start number of CBs */ #endif +/* + * List of environment variables to read at CaRT library load time. + * for integer envs use ENV() + * for string ones ENV_STR() or ENV_STR_NO_PRINT() + **/ +#define CRT_ENV_LIST \ + ENV_STR(CRT_ATTACH_INFO_PATH) \ + ENV(CRT_CREDIT_EP_CTX) \ + ENV(CRT_CTX_NUM) \ + ENV(CRT_ENABLE_MEM_PIN) \ + ENV_STR(CRT_L_GRP_CFG) \ + ENV(CRT_L_RANK) \ + ENV(CRT_MRC_ENABLE) \ + ENV(CRT_SECONDARY_PROVIDER) \ + ENV(CRT_TIMEOUT) \ + ENV(DAOS_RPC_SIZE_LIMIT) \ + ENV(DAOS_SIGNAL_REGISTER) \ + ENV_STR(DAOS_TEST_SHARED_DIR) \ + ENV_STR(DD_MASK) \ + ENV_STR(DD_STDERR) \ + ENV_STR(DD_SUBSYS) \ + ENV_STR(D_CLIENT_METRICS_DUMP_DIR) \ + ENV(D_CLIENT_METRICS_ENABLE) \ + ENV(D_CLIENT_METRICS_RETAIN) \ + ENV_STR(D_DOMAIN) \ + ENV_STR(D_FI_CONFIG) \ + ENV_STR(D_INTERFACE) \ + ENV_STR(D_LOG_FILE) \ + ENV_STR(D_LOG_FILE_APPEND_PID) \ + ENV_STR(D_LOG_FILE_APPEND_RANK) \ + ENV_STR(D_LOG_FLUSH) \ + ENV_STR(D_LOG_MASK) \ + ENV_STR(D_LOG_SIZE) \ + ENV(D_LOG_STDERR_IN_LOG) \ + ENV(D_POLL_TIMEOUT) \ + ENV_STR(D_PORT) \ + ENV(D_PORT_AUTO_ADJUST) \ + ENV(D_POST_INCR) \ + ENV(D_POST_INIT) \ + ENV_STR(D_PROVIDER) \ + ENV_STR_NO_PRINT(D_PROVIDER_AUTH_KEY) \ + ENV(D_QUOTA_RPCS) \ + ENV(FI_OFI_RXM_USE_SRX) \ + ENV(FI_UNIVERSE_SIZE) \ + ENV(SWIM_PING_TIMEOUT) \ + ENV(SWIM_PROTOCOL_PERIOD_LEN) \ + ENV(SWIM_SUSPECT_TIMEOUT) \ + ENV_STR(UCX_IB_FORK_INIT) + +/* uint env */ +#define ENV(x) \ + unsigned int _##x; \ + int _rc_##x; \ + int _no_print_##x; + +/* char* env */ +#define ENV_STR(x) \ + char *_##x; \ + int _rc_##x; \ + int _no_print_##x; + +#define ENV_STR_NO_PRINT(x) ENV_STR(x) + +struct crt_envs { + CRT_ENV_LIST; + bool inited; +}; + +#undef ENV +#undef ENV_STR +#undef ENV_STR_NO_PRINT + +extern struct crt_envs crt_genvs; + +static inline void +crt_env_fini(void); + +static inline void +crt_env_init(void) +{ + /* release strings if already inited previously */ + if (crt_genvs.inited) + crt_env_fini(); + +#define ENV(x) \ + do { \ + crt_genvs._rc_##x = d_getenv_uint(#x, &crt_genvs._##x); \ + crt_genvs._no_print_##x = 0; \ + } while (0); + +#define ENV_STR(x) \ + do { \ + crt_genvs._rc_##x = d_agetenv_str(&crt_genvs._##x, #x); \ + crt_genvs._no_print_##x = 0; \ + } while (0); + +#define ENV_STR_NO_PRINT(x) \ + do { \ + crt_genvs._rc_##x = d_agetenv_str(&crt_genvs._##x, #x); \ + crt_genvs._no_print_##x = 1; \ + } while (0); + + CRT_ENV_LIST; +#undef ENV +#undef ENV_STR +#undef ENV_STR_NO_PRINT + + crt_genvs.inited = true; +} + +static inline void +crt_env_fini(void) +{ +#define ENV(x) (void) +#define ENV_STR(x) d_freeenv_str(&crt_genvs._##x); +#define ENV_STR_NO_PRINT ENV_STR + + CRT_ENV_LIST + +#undef ENV +#undef ENV_STR +#undef ENV_STR_NO_PRINT + + crt_genvs.inited = false; +} + +/* Returns value if env was present at load time */ +#define crt_env_get(name, val) \ + D_ASSERT(crt_genvs.inited); \ + if (crt_genvs._rc_##name == 0) \ + *val = crt_genvs._##name; + +static inline void +crt_env_dump(void) +{ + D_INFO("--- ENV ---\n"); + + /* Only dump envariables that were set */ +#define ENV(x) \ + if (!crt_genvs._rc_##x && crt_genvs._no_print_##x == 0) \ + D_INFO("%s = %d\n", #x, crt_genvs._##x); + +#define ENV_STR(x) \ + if (!crt_genvs._rc_##x) \ + D_INFO("%s = %s\n", #x, crt_genvs._no_print_##x ? "****" : crt_genvs._##x); + +#define ENV_STR_NO_PRINT ENV_STR + + CRT_ENV_LIST; + +#undef ENV +#undef ENV_STR +#undef ENV_STR_NO_PRINT +} + /* structure of global fault tolerance data */ struct crt_plugin_gdata { /* list of progress callbacks */ diff --git a/src/cart/utils/crt_utils.c b/src/cart/utils/crt_utils.c index a522afc4e66..bf9c279d166 100644 --- a/src/cart/utils/crt_utils.c +++ b/src/cart/utils/crt_utils.c @@ -98,15 +98,14 @@ void write_completion_file(void) { FILE *fptr; - char *dir; + char *dir = NULL; char *completion_file = NULL; - d_agetenv_str(&dir, "DAOS_TEST_SHARED_DIR"); + crt_env_get(DAOS_TEST_SHARED_DIR, &dir); + D_ASSERTF(dir != NULL, - "DAOS_TEST_SHARED_DIR must be set for --write_completion_file " - "option.\n"); + "DAOS_TEST_SHARED_DIR must be set for --write_completion_file option.\n"); D_ASPRINTF(completion_file, "%s/test-servers-completed.txt.%d", dir, getpid()); - d_freeenv_str(&dir); D_ASSERTF(completion_file != NULL, "Error allocating completion_file string\n"); unlink(completion_file); @@ -411,22 +410,21 @@ crtu_dc_mgmt_net_cfg_rank_add(const char *name, crt_group_t *group, } int -crtu_dc_mgmt_net_cfg_setenv(const char *name) +crtu_dc_mgmt_net_cfg_setenv(const char *name, crt_init_options_t *opt) { int rc; - char *provider; - char *cli_srx_set = NULL; - char *crt_timeout = NULL; - char *d_interface; - char *d_interface_env = NULL; - char *d_domain; - char *d_domain_env = NULL; + int cli_srx_set = 0; struct dc_mgmt_sys_info crt_net_cfg_info = {0}; + Mgmt__GetAttachInfoResp *crt_net_cfg_resp = NULL; + if (opt == NULL) { + D_ERROR("Wrong NULL opt\n"); + return -DER_INVAL; + } + /* Query the agent for the CaRT network configuration parameters */ - rc = dc_get_attach_info(name, true /* all_ranks */, - &crt_net_cfg_info, &crt_net_cfg_resp); + rc = dc_get_attach_info(name, true, &crt_net_cfg_info, &crt_net_cfg_resp); if (opts.assert_on_error) D_ASSERTF(rc == 0, "dc_get_attach_info() failed, rc=%d\n", rc); @@ -435,87 +433,31 @@ crtu_dc_mgmt_net_cfg_setenv(const char *name) D_GOTO(cleanup, rc); } - /* These two are always set */ - provider = crt_net_cfg_info.provider; - D_INFO("setenv D_PROVIDER=%s\n", provider); - rc = d_setenv("D_PROVIDER", provider, 1); - if (rc != 0) - D_GOTO(cleanup, rc = d_errno2der(errno)); + D_STRNDUP(opt->cio_provider, crt_net_cfg_info.provider, DAOS_SYS_INFO_STRING_MAX); + D_STRNDUP(opt->cio_interface, crt_net_cfg_info.interface, DAOS_SYS_INFO_STRING_MAX); + D_STRNDUP(opt->cio_domain, crt_net_cfg_info.domain, DAOS_SYS_INFO_STRING_MAX); - D_INFO("Agent response: provider: '%s', domain: '%s', interface: '%s'\n", - crt_net_cfg_info.provider, crt_net_cfg_info.domain, crt_net_cfg_info.interface); + if (!opt->cio_provider || !opt->cio_interface || !opt->cio_domain) + D_GOTO(cleanup, rc = -DER_NOMEM); /* If the server has set this, the client must use the same value. */ - if (crt_net_cfg_info.srv_srx_set != -1) { - rc = asprintf(&cli_srx_set, "%d", crt_net_cfg_info.srv_srx_set); - if (rc < 0) { - cli_srx_set = NULL; - D_GOTO(cleanup, rc = -DER_NOMEM); - } - D_INFO("setenv FI_OFI_RXM_USE_SRX=%s\n", cli_srx_set); - rc = d_setenv("FI_OFI_RXM_USE_SRX", cli_srx_set, 1); - if (rc != 0) - D_GOTO(cleanup, rc = d_errno2der(errno)); - } else { - /* Client may not set it if the server hasn't. */ - d_agetenv_str(&cli_srx_set, "FI_OFI_RXM_USE_SRX"); - if (cli_srx_set) { - D_ERROR("Client set FI_OFI_RXM_USE_SRX to %s, " - "but server is unset!\n", cli_srx_set); - D_GOTO(cleanup, rc = -DER_INVAL); - } - } - - /* Allow client env overrides for these three */ - d_agetenv_str(&crt_timeout, "CRT_TIMEOUT"); - if (!crt_timeout) { - rc = asprintf(&crt_timeout, "%d", crt_net_cfg_info.crt_timeout); - if (rc < 0) { - crt_timeout = NULL; - D_GOTO(cleanup, rc = -DER_NOMEM); - } - D_INFO("setenv CRT_TIMEOUT=%s\n", crt_timeout); - rc = d_setenv("CRT_TIMEOUT", crt_timeout, 1); - if (rc != 0) - D_GOTO(cleanup, rc = d_errno2der(errno)); - } else { - D_DEBUG(DB_MGMT, "Using client provided CRT_TIMEOUT: %s\n", crt_timeout); - } + if (crt_net_cfg_info.srv_srx_set != -1) + cli_srx_set = crt_net_cfg_info.srv_srx_set; + else + cli_srx_set = 0; - d_agetenv_str(&d_interface_env, "D_INTERFACE"); - if (!d_interface_env) { - d_interface = crt_net_cfg_info.interface; - D_INFO("Setting D_INTERFACE=%s\n", d_interface); - rc = d_setenv("D_INTERFACE", d_interface, 1); - if (rc != 0) - D_GOTO(cleanup, rc = d_errno2der(errno)); - } else { - d_interface = d_interface_env; - D_DEBUG(DB_MGMT, "Using client provided D_INTERFACE: %s\n", d_interface); - } - - d_agetenv_str(&d_domain_env, "D_DOMAIN"); - if (!d_domain_env) { - d_domain = crt_net_cfg_info.domain; - D_INFO("Setting D_DOMAIN=%s\n", d_domain); - rc = d_setenv("D_DOMAIN", d_domain, 1); - if (rc != 0) - D_GOTO(cleanup, rc = d_errno2der(errno)); - } else { - d_domain = d_domain_env; - D_DEBUG(DB_MGMT, "Using client provided D_DOMAIN: %s\n", d_domain); - } + rc = d_setenv("FI_OFI_RXM_USE_SRX", cli_srx_set ? "1" : "0", 1); + if (rc != 0) + D_GOTO(cleanup, rc = d_errno2der(errno)); - D_INFO("CaRT env setup with:\n" - "\tD_INTERFACE=%s, D_DOMAIN: %s, D_PROVIDER: %s, CRT_TIMEOUT: %s\n", - d_interface, d_domain, provider, crt_timeout); + opt->cio_crt_timeout = crt_net_cfg_info.crt_timeout; cleanup: - d_freeenv_str(&d_domain_env); - d_freeenv_str(&d_interface_env); - d_freeenv_str(&crt_timeout); - d_freeenv_str(&cli_srx_set); dc_put_attach_info(&crt_net_cfg_info, crt_net_cfg_resp); + D_FREE(opt->cio_provider); + D_FREE(opt->cio_interface); + D_FREE(opt->cio_domain); + return rc; } @@ -526,9 +468,10 @@ crtu_cli_start_basic(char *local_group_name, char *srv_group_name, unsigned int total_srv_ctx, bool use_cfg, crt_init_options_t *init_opt, bool use_daos_agent_env) { - char *grp_cfg_file; - uint32_t grp_size; - int rc = 0; + char *grp_cfg_file = NULL; + uint32_t grp_size = 0; + int rc = 0; + crt_init_options_t local_opt = {0}; if (opts.assert_on_error) D_ASSERTF(opts.is_initialized == true, "crtu_test_init not called.\n"); @@ -538,9 +481,10 @@ crtu_cli_start_basic(char *local_group_name, char *srv_group_name, D_GOTO(out, rc); if (use_daos_agent_env) { - rc = crtu_dc_mgmt_net_cfg_setenv(srv_group_name); + rc = crtu_dc_mgmt_net_cfg_setenv(srv_group_name, &local_opt); if (rc != 0) D_GOTO(out, rc); + init_opt = &local_opt; } rc = crt_init_opt(local_group_name, 0, init_opt); @@ -587,15 +531,12 @@ crtu_cli_start_basic(char *local_group_name, char *srv_group_name, if (*grp == NULL) D_GOTO(out, rc = -DER_INVAL); - d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); + crt_env_get(CRT_L_GRP_CFG, &grp_cfg_file); /* load group info from a config file and * delete file upon return */ - rc = crtu_load_group_from_file(grp_cfg_file, - *crt_ctx, *grp, - -1, true); - d_freeenv_str(&grp_cfg_file); + rc = crtu_load_group_from_file(grp_cfg_file, *crt_ctx, *grp, -1, true); if (rc != 0) D_GOTO(out, rc); } @@ -649,6 +590,9 @@ crtu_cli_start_basic(char *local_group_name, char *srv_group_name, assert(0); } + D_FREE(local_opt.cio_provider); + D_FREE(local_opt.cio_interface); + D_FREE(local_opt.cio_domain); return rc; } @@ -657,17 +601,14 @@ crtu_srv_start_basic(char *srv_group_name, crt_context_t *crt_ctx, pthread_t *progress_thread, crt_group_t **grp, uint32_t *grp_size, crt_init_options_t *init_opt) { - char *grp_cfg_file; - char *my_uri; - d_rank_t my_rank; + char *grp_cfg_file = NULL; + char *my_uri = NULL; + d_rank_t my_rank = CRT_NO_RANK; int rc = 0; if (opts.assert_on_error) D_ASSERTF(opts.is_initialized == true, "crtu_test_init not called.\n"); - rc = d_getenv_uint32_t("CRT_L_RANK", &my_rank); - D_ASSERTF(rc == DER_SUCCESS, "Rank can not be retrieve: " DF_RC "\n", DP_RC(rc)); - rc = d_log_init(); if (rc != 0) D_GOTO(out, rc); @@ -683,6 +624,12 @@ crtu_srv_start_basic(char *srv_group_name, crt_context_t *crt_ctx, if (rc != 0) D_GOTO(out, rc); + crt_env_get(CRT_L_RANK, &my_rank); + if (my_rank == CRT_NO_RANK) { + D_ERROR("CRT_L_RANK environment variable should have been set by crt_launch\n"); + D_GOTO(out, rc = -DER_INVAL); + } + *grp = crt_group_lookup(NULL); if (!(*grp)) { D_ERROR("Group lookup failed\n"); @@ -712,11 +659,10 @@ crtu_srv_start_basic(char *srv_group_name, crt_context_t *crt_ctx, D_GOTO(out, rc); D_FREE(my_uri); - rc = d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); + crt_env_get(CRT_L_GRP_CFG, &grp_cfg_file); /* load group info from a config file and delete file upon return */ rc = crtu_load_group_from_file(grp_cfg_file, crt_ctx[0], *grp, my_rank, true); - d_freeenv_str(&grp_cfg_file); if (rc != 0) D_GOTO(out, rc); diff --git a/src/cart/utils/crt_utils.h b/src/cart/utils/crt_utils.h index 6f32c531224..fb65947fac9 100644 --- a/src/cart/utils/crt_utils.h +++ b/src/cart/utils/crt_utils.h @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2022 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -75,7 +75,7 @@ int crtu_dc_mgmt_net_cfg_rank_add(const char *name, crt_group_t *group, crt_context_t *context); int -crtu_dc_mgmt_net_cfg_setenv(const char *name); +crtu_dc_mgmt_net_cfg_setenv(const char *name, crt_init_options_t *opt); int crtu_cli_start_basic(char *local_group_name, char *srv_group_name, diff --git a/src/utils/self_test/self_test.c b/src/utils/self_test/self_test.c index e4ebdaa076e..e1833f00db3 100644 --- a/src/utils/self_test/self_test.c +++ b/src/utils/self_test/self_test.c @@ -95,6 +95,8 @@ self_test_init(char *dest_name, crt_context_t *crt_ctx, crt_group_t **srv_grp, p int i; d_rank_t max_rank = 0; int ret; + crt_init_options_t opt = {0}; + crt_init_options_t *init_opt; /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(0, attach_retries, false, false); @@ -105,19 +107,27 @@ self_test_init(char *dest_name, crt_context_t *crt_ctx, crt_group_t **srv_grp, p fprintf(stderr, "dc_agent_init() failed. ret: %d\n", ret); return ret; } - ret = crtu_dc_mgmt_net_cfg_setenv(dest_name); + ret = crtu_dc_mgmt_net_cfg_setenv(dest_name, &opt); if (ret != 0) { D_ERROR("crtu_dc_mgmt_net_cfg_setenv() failed; ret = %d\n", ret); return ret; } + + init_opt = &opt; + } else { + init_opt = NULL; } if (listen) init_flags |= (CRT_FLAG_BIT_SERVER | CRT_FLAG_BIT_AUTO_SWIM_DISABLE); - ret = crt_init(CRT_SELF_TEST_GROUP_NAME, init_flags); + + ret = crt_init_opt(CRT_SELF_TEST_GROUP_NAME, init_flags, init_opt); if (ret != 0) return ret; + D_FREE(opt.cio_provider); + D_FREE(opt.cio_interface); + D_FREE(opt.cio_domain); g_cart_inited = true; if (attach_info_path) {