From 663457e3c99acd2ebc79154001f9e1e31971e1e0 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Mon, 1 Jul 2024 10:43:14 -0500 Subject: [PATCH] HG/NA: fix init info changes made to prevent ABI breakage Fix HG NA init info to v4.0 for now and duplicate tclass info --- src/mercury_core.c | 22 ++++++++++++++------ src/mercury_core_types.h | 43 +++++++++++++++++++++------------------- src/mercury_private.h | 18 +++++++++-------- src/na/na.c | 8 ++++---- src/na/na.h | 20 +++---------------- src/na/na_types.h | 39 +++++++++++++++++++++++++++++------- 6 files changed, 88 insertions(+), 62 deletions(-) diff --git a/src/mercury_core.c b/src/mercury_core.c index 885ae059..4389cd6b 100644 --- a/src/mercury_core.c +++ b/src/mercury_core.c @@ -1164,6 +1164,9 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version, struct hg_core_private_class **class_p) { struct hg_init_info hg_init_info = HG_INIT_INFO_INITIALIZER; + struct na_init_info na_init_info = NA_INIT_INFO_INITIALIZER; + unsigned int na_version = NA_VERSION(NA_VERSION_MAJOR, NA_VERSION_MINOR); + struct na_init_info *na_init_info_p = NULL; struct hg_core_private_class *hg_core_class = NULL; #ifdef NA_HAS_SM const char *na_class_name; @@ -1171,6 +1174,11 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version, hg_return_t ret; int rc; + /* Prevent newer versions */ + HG_CHECK_SUBSYS_ERROR(cls, + HG_VERSION_LT(HG_VERSION(HG_VERSION_MAJOR, HG_VERSION_MINOR), version), + error, ret, HG_INVALID_ARG, "API version cannot be newer than library"); + /* Create new HG class */ hg_core_class = (struct hg_core_private_class *) calloc(1, sizeof(*hg_core_class)); @@ -1203,11 +1211,13 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version, "API version cannot be 0"); HG_LOG_SUBSYS_DEBUG(cls, "Init info version used: v%d.%d", HG_MAJOR(version), HG_MINOR(version)); + na_init_info_p = &na_init_info; /* Get init info and overwrite defaults */ - if (HG_VERSION_GE(version, HG_VERSION(2, 4))) + if (HG_VERSION_GE(version, HG_VERSION(2, 4))) { hg_init_info = *hg_init_info_p; - else if (HG_VERSION_GE(version, HG_VERSION(2, 3))) + na_init_info.traffic_class = hg_init_info.traffic_class; + } else if (HG_VERSION_GE(version, HG_VERSION(2, 3))) hg_init_info_dup_2_3(&hg_init_info, (const struct hg_init_info_2_3 *) hg_init_info_p); else @@ -1271,8 +1281,8 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version, hg_core_class->init_info.na_ext_init = true; } else { /* Initialize NA if not provided externally */ - hg_core_class->core_class.na_class = NA_Initialize_opt(na_info_string, - hg_core_class->init_info.listen, &hg_init_info.na_init_info); + hg_core_class->core_class.na_class = NA_Initialize_opt2(na_info_string, + hg_core_class->init_info.listen, na_version, na_init_info_p); HG_CHECK_SUBSYS_ERROR(cls, hg_core_class->core_class.na_class == NULL, error, ret, HG_NA_ERROR, "Could not initialize NA class (info_string=%s, listen=%d)", @@ -1325,8 +1335,8 @@ hg_core_init(const char *na_info_string, bool na_listen, unsigned int version, info_string_p = "na+sm"; /* Initialize NA SM first so that tmp directories are created */ - hg_core_class->core_class.na_sm_class = NA_Initialize_opt(info_string_p, - hg_core_class->init_info.listen, &hg_init_info.na_init_info); + hg_core_class->core_class.na_sm_class = NA_Initialize_opt2(info_string_p, + hg_core_class->init_info.listen, na_version, na_init_info_p); HG_CHECK_SUBSYS_ERROR(cls, hg_core_class->core_class.na_sm_class == NULL, error, ret, HG_NA_ERROR, diff --git a/src/mercury_core_types.h b/src/mercury_core_types.h index 9be834b3..a557fd9d 100644 --- a/src/mercury_core_types.h +++ b/src/mercury_core_types.h @@ -32,7 +32,7 @@ typedef enum hg_checksum_level { */ struct hg_init_info { /* NA init info struct, see na_types.h for documentation */ - struct na_init_info na_init_info; + struct na_init_info_4_0 na_init_info; /* Optional NA class that can be used for initializing an HG class. Using * that option makes the init string passed to HG_Init() ignored. @@ -54,17 +54,10 @@ struct hg_init_info { * Default value is: 512 */ int32_t request_post_incr; - /* Controls the number of multi-recv buffers that are posted. Incrementing - * this value may be beneficial in cases where RPC handles remain in use for - * longer periods of time and release_input_early is not set, preventing - * existing buffers from being reposted. - * Default value is: 4 */ - unsigned int multi_recv_op_max; - /* Controls whether the NA shared-memory interface should be automatically * used if/when the RPC target address shares the same node as its origin. * Default is: false */ - bool auto_sm; + uint8_t auto_sm; /* Overrides the default info string used to initialize the NA shared-memory * interface when auto_sm is set to true (e.g., "foo-bar" will create @@ -80,33 +73,43 @@ struct hg_init_info { /* Controls whether mercury should _NOT_ attempt to transfer small bulk data * along with the RPC request. * Default is: false */ - bool no_bulk_eager; + uint8_t no_bulk_eager; /* Disable internal loopback interface that enables forwarding of RPC * requests to self addresses. Doing so will force traffic to be routed * through NA. For performance reasons, users should be cautious when using * that option. * Default is: false */ - bool no_loopback; + uint8_t no_loopback; /* (Debug) Print stats at exit. * Default is: false */ - bool stats; + uint8_t stats; /* Disable use of multi_recv when available and post separate buffers. * Default is: false */ - bool no_multi_recv; + uint8_t no_multi_recv; /* Release input buffers as early as possible (usually after HG_Get_input()) * as opposed to releasing them after a call to handle destroy. This may be * beneficial in cases where the RPC execution time is longer than usual. * Default is: false */ - bool release_input_early; + uint8_t release_input_early; + + /* Preferred traffic class. Default is NA_TC_UNSPEC */ + enum na_traffic_class traffic_class; /* Disable use of overflow buffers when RPC message size is above the eager * message size threshold. * Default is: false */ bool no_overflow; + + /* Controls the number of multi-recv buffers that are posted. Incrementing + * this value may be beneficial in cases where RPC handles remain in use for + * longer periods of time and release_input_early is not set, preventing + * existing buffers from being reposted. + * Default value is: 4 */ + unsigned int multi_recv_op_max; }; /* Error return codes: @@ -196,12 +199,12 @@ typedef enum { #define HG_INIT_INFO_INITIALIZER \ (struct hg_init_info) \ { \ - .na_init_info = NA_INIT_INFO_INITIALIZER, .na_class = NULL, \ - .request_post_init = 0, .request_post_incr = 0, \ - .multi_recv_op_max = 0, .auto_sm = false, .sm_info_string = NULL, \ - .checksum_level = HG_CHECKSUM_NONE, .no_bulk_eager = false, \ - .no_loopback = false, .stats = false, .no_multi_recv = false, \ - .release_input_early = false, .no_overflow = false \ + .na_init_info = NA_INIT_INFO_INITIALIZER_4_0, .na_class = NULL, \ + .request_post_init = 0, .request_post_incr = 0, .auto_sm = false, \ + .sm_info_string = NULL, .checksum_level = HG_CHECKSUM_NONE, \ + .no_bulk_eager = false, .no_loopback = false, .stats = false, \ + .no_multi_recv = false, .release_input_early = false, \ + .no_overflow = false, .multi_recv_op_max = 0 \ } #endif /* MERCURY_CORE_TYPES_H */ diff --git a/src/mercury_private.h b/src/mercury_private.h index 016ad9f1..4883f24d 100644 --- a/src/mercury_private.h +++ b/src/mercury_private.h @@ -157,10 +157,10 @@ static HG_INLINE void hg_init_info_dup_2_3( struct hg_init_info *new_info, const struct hg_init_info_2_3 *old_info) { - *new_info = (struct hg_init_info){.na_class = old_info->na_class, + *new_info = (struct hg_init_info){.na_init_info = old_info->na_init_info, + .na_class = old_info->na_class, .request_post_init = old_info->request_post_init, .request_post_incr = (int32_t) old_info->request_post_incr, - .multi_recv_op_max = 0, .auto_sm = old_info->auto_sm, .sm_info_string = old_info->sm_info_string, .checksum_level = old_info->checksum_level, @@ -169,8 +169,9 @@ hg_init_info_dup_2_3( .stats = old_info->stats, .no_multi_recv = old_info->no_multi_recv, .release_input_early = old_info->release_input_early, - .no_overflow = false}; - na_init_info_dup_4_0(&new_info->na_init_info, &old_info->na_init_info); + .traffic_class = NA_TC_UNSPEC, + .no_overflow = false, + .multi_recv_op_max = 0}; } /*---------------------------------------------------------------------------*/ @@ -178,10 +179,10 @@ static HG_INLINE void hg_init_info_dup_2_2( struct hg_init_info *new_info, const struct hg_init_info_2_2 *old_info) { - *new_info = (struct hg_init_info){.na_class = old_info->na_class, + *new_info = (struct hg_init_info){.na_init_info = old_info->na_init_info, + .na_class = old_info->na_class, .request_post_init = old_info->request_post_init, .request_post_incr = (int32_t) old_info->request_post_incr, - .multi_recv_op_max = 0, .auto_sm = old_info->auto_sm, .sm_info_string = old_info->sm_info_string, .checksum_level = old_info->checksum_level, @@ -190,8 +191,9 @@ hg_init_info_dup_2_2( .stats = old_info->stats, .no_multi_recv = false, .release_input_early = false, - .no_overflow = false}; - na_init_info_dup_4_0(&new_info->na_init_info, &old_info->na_init_info); + .traffic_class = NA_TC_UNSPEC, + .no_overflow = false, + .multi_recv_op_max = 0}; } #ifdef __cplusplus diff --git a/src/na/na.c b/src/na/na.c index 098e1e16..27e5a0cd 100644 --- a/src/na/na.c +++ b/src/na/na.c @@ -767,9 +767,9 @@ na_class_t * NA_Initialize_opt(const char *info_string, bool listen, const struct na_init_info *na_init_info) { - /* Keep as latest version until info struct is modified */ - return NA_Initialize_opt2(info_string, listen, - NA_VERSION(NA_VERSION_MAJOR, NA_VERSION_MINOR), na_init_info); + /* v4.0 is latest version for which init struct was not versioned */ + return NA_Initialize_opt2( + info_string, listen, NA_VERSION(4, 0), na_init_info); } /*---------------------------------------------------------------------------*/ @@ -802,7 +802,7 @@ NA_Initialize_opt2(const char *info_string, bool listen, unsigned int version, NA_MAJOR(version), NA_MINOR(version)); /* Get init info and overwrite defaults */ - if (NA_VERSION_GE(version, NA_VERSION(4, 1))) + if (NA_VERSION_GE(version, NA_VERSION(5, 0))) na_info->na_init_info = *na_init_info; else na_init_info_dup_4_0(&na_info->na_init_info, diff --git a/src/na/na.h b/src/na/na.h index 8379da1b..779f3ef3 100644 --- a/src/na/na.h +++ b/src/na/na.h @@ -984,20 +984,6 @@ NA_Error_to_string(na_return_t errnum) NA_WARN_UNUSED_RESULT; /* Local Type and Struct Definition */ /************************************/ -/* Previous versions of init info to keep compatiblity with older versions, - * see public definition in na_types.h */ -struct na_init_info_4_0 { - const char *ip_subnet; - const char *auth_key; - size_t max_unexpected_size; - size_t max_expected_size; - uint8_t progress_mode; - enum na_addr_format addr_format; - uint8_t max_contexts; - uint8_t thread_mode; - bool request_mem_device; -}; - /* NA info definition */ struct na_info { char *protocol_name; /* Protocol (e.g., tcp, ib) */ @@ -1129,12 +1115,12 @@ na_init_info_dup_4_0( .auth_key = old_info->auth_key, .max_unexpected_size = old_info->max_unexpected_size, .max_expected_size = old_info->max_expected_size, - .addr_format = old_info->addr_format, - .traffic_class = NA_TC_UNSPEC, .progress_mode = old_info->progress_mode, + .addr_format = old_info->addr_format, .max_contexts = old_info->max_contexts, .thread_mode = old_info->thread_mode, - .request_mem_device = old_info->request_mem_device}; + .request_mem_device = old_info->request_mem_device, + .traffic_class = NA_TC_UNSPEC}; } /*---------------------------------------------------------------------------*/ diff --git a/src/na/na_types.h b/src/na/na_types.h index 0c3797b0..e436f4dd 100644 --- a/src/na/na_types.h +++ b/src/na/na_types.h @@ -76,16 +76,13 @@ struct na_init_info { * used. */ size_t max_expected_size; - /* Preferred address format. Default is NA_ADDR_UNSPEC. */ - enum na_addr_format addr_format; - - /* Preferred traffic class. Default is NA_TC_UNSPEC */ - enum na_traffic_class traffic_class; - /* Progress mode flag. Setting NA_NO_BLOCK will force busy-spin on progress * and remove any wait/notification calls. */ uint8_t progress_mode; + /* Preferred address format. Default is NA_ADDR_UNSPEC. */ + enum na_addr_format addr_format; + /* Maximum number of contexts that are expected to be created. */ uint8_t max_contexts; @@ -97,6 +94,22 @@ struct na_init_info { /* Request support for tranfers to/from memory devices (e.g., GPU, etc). * Default is: false. */ bool request_mem_device; + + /* Preferred traffic class. Default is NA_TC_UNSPEC */ + enum na_traffic_class traffic_class; +}; + +/* Previous versions of init info to keep compatiblity with older versions */ +struct na_init_info_4_0 { + const char *ip_subnet; + const char *auth_key; + size_t max_unexpected_size; + size_t max_expected_size; + uint8_t progress_mode; + enum na_addr_format addr_format; + uint8_t max_contexts; + uint8_t thread_mode; + bool request_mem_device; }; /* Segment */ @@ -253,9 +266,21 @@ typedef void (*na_cb_t)(const struct na_cb_info *callback_info); .auth_key = NULL, \ .max_unexpected_size = 0, \ .max_expected_size = 0, \ + .progress_mode = 0, \ .addr_format = NA_ADDR_UNSPEC, \ - .traffic_class = NA_TC_UNSPEC, \ + .max_contexts = 1, \ + .thread_mode = 0, \ + .request_mem_device = false, \ + .traffic_class = NA_TC_UNSPEC}) + +/* NA init info initializer */ +#define NA_INIT_INFO_INITIALIZER_4_0 \ + ((struct na_init_info_4_0){.ip_subnet = NULL, \ + .auth_key = NULL, \ + .max_unexpected_size = 0, \ + .max_expected_size = 0, \ .progress_mode = 0, \ + .addr_format = NA_ADDR_UNSPEC, \ .max_contexts = 1, \ .thread_mode = 0, \ .request_mem_device = false})