From 91f8a2fbb7b776baed2bb3822adf3c897206443d Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Fri, 27 Sep 2024 15:53:06 +0200 Subject: [PATCH 01/11] DAOS-16636 cart: force port range for tcp provider Force port range on the client side for the tcp provider and document what ports to open up on firewall. Signed-off-by: Johann Lombardi --- docs/QSG/build_from_scratch.md | 8 ++++---- docs/admin/predeployment_check.md | 30 +++++++++++++++++++++++++++--- src/cart/crt_init.c | 17 +++++++++++++++++ src/cart/crt_internal_types.h | 4 +++- 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/docs/QSG/build_from_scratch.md b/docs/QSG/build_from_scratch.md index a3bd9c69add..b6a00f9b961 100644 --- a/docs/QSG/build_from_scratch.md +++ b/docs/QSG/build_from_scratch.md @@ -81,10 +81,10 @@ the user outside of a virtual environment, in which case `~/.local/bin` will nee PATH. ```bash - $ python3 -m venv venv - $ source venv/bin/activate - $ python3 -m pip --no-cache-dir install --upgrade pip - $ python3 -m pip install -r requirements-build.txt +$ python3 -m venv venv +$ source venv/bin/activate +$ python3 -m pip --no-cache-dir install --upgrade pip +$ python3 -m pip install -r requirements-build.txt ``` ## Build DAOS diff --git a/docs/admin/predeployment_check.md b/docs/admin/predeployment_check.md index 8b5a391d8a9..6c1a7adee76 100644 --- a/docs/admin/predeployment_check.md +++ b/docs/admin/predeployment_check.md @@ -87,9 +87,9 @@ The DAOS Agent (running on the client nodes) is responsible for resolving a user UID/GID to user/group names, which are then added to a signed credential and sent to the DAOS storage nodes. -## HPC Fabric setup +## Network Setup -DAOS depends on the HPC fabric software stack and drivers. Depending on the type of HPC fabric +DAOS depends on the network fabric software stack and drivers. Depending on the type of fabric that is used, a supported version of the fabric stack needs to be installed. Note that for InfiniBand fabrics, DAOS is only supported with the MLNX\_OFED stack that is @@ -162,9 +162,33 @@ Some distributions install a firewall as part of the base OS installation. DAOS for its management service. If this port is blocked by firewall rules, neither `dmg` nor the `daos_agent` on a remote node will be able to contact the DAOS server(s). -Either configure the firewall to allow traffic for this port, or disable the firewall +If telemetry is enabled in the server configuration file, the telemetry port (9191 by default) +must also be accessible on the DAOS server nodes. + +Depending of the provider that is used, ech engine might also listens on a range of ports. This is +the case for the tcp provider. This range will start at the fabric_iface_port specificed in the +server yaml file and use 2 ports for management, 1 port per target and helper xstream. For instance, +with fabric_iface_port set to 20000, 16 targets and 4 helper streams, the engine will listen on port +range from 20000 to 20021 for a total of 22 ports. + +Moreover, there are cases where a engine might have to initiate a connection to a running application. +In this case, inbound connection from the storage nodes to the compute nodes must be allowed. +The default port range use by applications is 20100-21100 with the tcp provider. This can be modified +by setting the FI_TCP_PORT_LOW_RANGE and FI_TCP_PORT_HIGH_RANGE environment variables before running +the application. + +Either configure the firewall to allow traffic for these ports, or disable the firewall (for example, by running `systemctl stop firewalld; systemctl disable firewalld`). +The table below summarizes all ports that should be opened on the firewall: + +| Node Type | Component | Process | Settings | Default | +| --------- | --------------|-------------|-------------------------------------------------------|-------------| +| Server | Control plane | daos_server | port: | 10001 | +| Server | Telemetry | daos_server | telemetry_port: | 9191 | +| Server | Data plane | daos_engine | fabric_iface_port: + 2 + targets: + nr_xs_helpers: | 20000-20019 | +| Client | libdaos | application | FI_TCP_PORT_LOW_RANGE/FI_TCP_PORT_HIGH_RANGE env vars | 20100-21100 | + ## Install from Source When DAOS is installed from source (and not from pre-built packages), extra manual diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index d341a2a6dac..9d2feeb5576 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -523,6 +523,23 @@ prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt) if (prov != CRT_PROV_OFI_CXI && prov != CRT_PROV_OFI_TCP) d_setenv("NA_OFI_UNEXPECTED_TAG_MSG", "1", 0); + /** + * Force specific port range for application when using tcp provider to know what + * ports to open when firewall is used. + */ + if (crt_is_service() && (prov == CRT_PROV_OFI_TCP || prov = CRT_PROV_OFI_TCP_RXM)) { + uint32_t port_low_range = UINT32_MAX; + uint32_t port_high_range = UINT32_MAX; + + crt_env_get(FI_TCP_PORT_LOW_RANGE, &port_low_range); + crt_env_get(FI_TCP_PORT_HIGH_RANGE, &port_high_range); + + if (port_low_range != UINT32_MAX && port_high_range != UINT32_MAX) { + d_setenv("FI_TCP_PORT_LOW_RANGE", "20100", 0); + d_setenv("FI_TCP_PORT_HIGH_RANGE", "21100", 0); + } + } + g_prov_settings_applied[prov] = true; } diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 857c1a4522d..53b0b9cbcdf 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -220,7 +220,9 @@ struct crt_event_cb_priv { ENV(SWIM_PING_TIMEOUT) \ ENV(SWIM_PROTOCOL_PERIOD_LEN) \ ENV(SWIM_SUSPECT_TIMEOUT) \ - ENV_STR(UCX_IB_FORK_INIT) + ENV_STR(UCX_IB_FORK_INIT) \ + ENV(FI_TCP_PORT_LOW_RANGE) \ + ENV(FI_TCP_PORT_HIGH_RANGE) /* uint env */ #define ENV(x) \ From 206def2e2663df7d7874da0260a9304a203b0071 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Fri, 27 Sep 2024 15:56:41 +0200 Subject: [PATCH 02/11] Fix nit Signed-off-by: Johann Lombardi --- src/cart/crt_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 9d2feeb5576..429a00e6904 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -527,14 +527,14 @@ prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt) * Force specific port range for application when using tcp provider to know what * ports to open when firewall is used. */ - if (crt_is_service() && (prov == CRT_PROV_OFI_TCP || prov = CRT_PROV_OFI_TCP_RXM)) { + if (!crt_is_service() && (prov == CRT_PROV_OFI_TCP || prov = CRT_PROV_OFI_TCP_RXM)) { uint32_t port_low_range = UINT32_MAX; uint32_t port_high_range = UINT32_MAX; crt_env_get(FI_TCP_PORT_LOW_RANGE, &port_low_range); crt_env_get(FI_TCP_PORT_HIGH_RANGE, &port_high_range); - if (port_low_range != UINT32_MAX && port_high_range != UINT32_MAX) { + if (port_low_range == UINT32_MAX && port_high_range == UINT32_MAX) { d_setenv("FI_TCP_PORT_LOW_RANGE", "20100", 0); d_setenv("FI_TCP_PORT_HIGH_RANGE", "21100", 0); } From a919e10e2233ae39801d6154e645f56198c1fd95 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Fri, 27 Sep 2024 16:56:18 +0200 Subject: [PATCH 03/11] Fix nit Signed-off-by: Johann Lombardi --- docs/admin/predeployment_check.md | 2 +- src/cart/crt_init.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/admin/predeployment_check.md b/docs/admin/predeployment_check.md index 6c1a7adee76..f2a3ed1d8f5 100644 --- a/docs/admin/predeployment_check.md +++ b/docs/admin/predeployment_check.md @@ -166,7 +166,7 @@ If telemetry is enabled in the server configuration file, the telemetry port (91 must also be accessible on the DAOS server nodes. Depending of the provider that is used, ech engine might also listens on a range of ports. This is -the case for the tcp provider. This range will start at the fabric_iface_port specificed in the +the case for the tcp provider. This range will start at the fabric_iface_port specified in the server yaml file and use 2 ports for management, 1 port per target and helper xstream. For instance, with fabric_iface_port set to 20000, 16 targets and 4 helper streams, the engine will listen on port range from 20000 to 20021 for a total of 22 ports. diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 429a00e6904..9bf03e91749 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -527,8 +527,8 @@ prov_settings_apply(bool primary, crt_provider_t prov, crt_init_options_t *opt) * Force specific port range for application when using tcp provider to know what * ports to open when firewall is used. */ - if (!crt_is_service() && (prov == CRT_PROV_OFI_TCP || prov = CRT_PROV_OFI_TCP_RXM)) { - uint32_t port_low_range = UINT32_MAX; + if (!crt_is_service() && (prov == CRT_PROV_OFI_TCP || prov == CRT_PROV_OFI_TCP_RXM)) { + uint32_t port_low_range = UINT32_MAX; uint32_t port_high_range = UINT32_MAX; crt_env_get(FI_TCP_PORT_LOW_RANGE, &port_low_range); From 9ce00cab8bbf9526f82613f3e67e162e03e70c86 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Fri, 27 Sep 2024 17:14:40 +0200 Subject: [PATCH 04/11] Try that Signed-off-by: Johann Lombardi --- src/cart/crt_internal_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cart/crt_internal_types.h b/src/cart/crt_internal_types.h index 53b0b9cbcdf..db896ef0638 100644 --- a/src/cart/crt_internal_types.h +++ b/src/cart/crt_internal_types.h @@ -220,9 +220,9 @@ struct crt_event_cb_priv { ENV(SWIM_PING_TIMEOUT) \ ENV(SWIM_PROTOCOL_PERIOD_LEN) \ ENV(SWIM_SUSPECT_TIMEOUT) \ - ENV_STR(UCX_IB_FORK_INIT) \ ENV(FI_TCP_PORT_LOW_RANGE) \ - ENV(FI_TCP_PORT_HIGH_RANGE) + ENV(FI_TCP_PORT_HIGH_RANGE) \ + ENV_STR(UCX_IB_FORK_INIT) /* uint env */ #define ENV(x) \ From c76d96dbb5a1e479cbc1bf99544e105963f59cfa Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Tue, 1 Oct 2024 11:44:35 +0200 Subject: [PATCH 05/11] Update docs/admin/predeployment_check.md Co-authored-by: cdavis28 Signed-off-by: Johann Lombardi --- docs/admin/predeployment_check.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/admin/predeployment_check.md b/docs/admin/predeployment_check.md index f2a3ed1d8f5..2d5c39b987c 100644 --- a/docs/admin/predeployment_check.md +++ b/docs/admin/predeployment_check.md @@ -165,7 +165,7 @@ for its management service. If this port is blocked by firewall rules, neither ` If telemetry is enabled in the server configuration file, the telemetry port (9191 by default) must also be accessible on the DAOS server nodes. -Depending of the provider that is used, ech engine might also listens on a range of ports. This is +Depending on the provider used, each engine might also listen on a range of ports. This is the case for the tcp provider. This range will start at the fabric_iface_port specified in the server yaml file and use 2 ports for management, 1 port per target and helper xstream. For instance, with fabric_iface_port set to 20000, 16 targets and 4 helper streams, the engine will listen on port From d8185bd1779ce7c83c94dab81afac4a42e02c045 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Tue, 1 Oct 2024 11:44:44 +0200 Subject: [PATCH 06/11] Update docs/admin/predeployment_check.md Co-authored-by: cdavis28 Signed-off-by: Johann Lombardi --- docs/admin/predeployment_check.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/admin/predeployment_check.md b/docs/admin/predeployment_check.md index 2d5c39b987c..362e226ead2 100644 --- a/docs/admin/predeployment_check.md +++ b/docs/admin/predeployment_check.md @@ -168,7 +168,7 @@ must also be accessible on the DAOS server nodes. Depending on the provider used, each engine might also listen on a range of ports. This is the case for the tcp provider. This range will start at the fabric_iface_port specified in the server yaml file and use 2 ports for management, 1 port per target and helper xstream. For instance, -with fabric_iface_port set to 20000, 16 targets and 4 helper streams, the engine will listen on port +with fabric_iface_port set to 20000, 16 targets and 4 helper streams, the engine will listen on ports range from 20000 to 20021 for a total of 22 ports. Moreover, there are cases where a engine might have to initiate a connection to a running application. From 04f2cc8d4f6fdffce48824ffe1fd2132a949ff21 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Tue, 1 Oct 2024 11:44:52 +0200 Subject: [PATCH 07/11] Update docs/admin/predeployment_check.md Co-authored-by: cdavis28 Signed-off-by: Johann Lombardi --- docs/admin/predeployment_check.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/admin/predeployment_check.md b/docs/admin/predeployment_check.md index 362e226ead2..01a75b5871e 100644 --- a/docs/admin/predeployment_check.md +++ b/docs/admin/predeployment_check.md @@ -171,7 +171,7 @@ server yaml file and use 2 ports for management, 1 port per target and helper xs with fabric_iface_port set to 20000, 16 targets and 4 helper streams, the engine will listen on ports range from 20000 to 20021 for a total of 22 ports. -Moreover, there are cases where a engine might have to initiate a connection to a running application. +Moreover, there are cases where an engine might have to initiate a connection to a running application. In this case, inbound connection from the storage nodes to the compute nodes must be allowed. The default port range use by applications is 20100-21100 with the tcp provider. This can be modified by setting the FI_TCP_PORT_LOW_RANGE and FI_TCP_PORT_HIGH_RANGE environment variables before running From 361ac2ff642b44ff20847a14847bf0375fd5c2a5 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Tue, 1 Oct 2024 11:44:59 +0200 Subject: [PATCH 08/11] Update docs/admin/predeployment_check.md Co-authored-by: cdavis28 Signed-off-by: Johann Lombardi --- docs/admin/predeployment_check.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/admin/predeployment_check.md b/docs/admin/predeployment_check.md index 01a75b5871e..2fd2b29e728 100644 --- a/docs/admin/predeployment_check.md +++ b/docs/admin/predeployment_check.md @@ -173,7 +173,7 @@ range from 20000 to 20021 for a total of 22 ports. Moreover, there are cases where an engine might have to initiate a connection to a running application. In this case, inbound connection from the storage nodes to the compute nodes must be allowed. -The default port range use by applications is 20100-21100 with the tcp provider. This can be modified +The default port range used by applications is 20100-21100 with the tcp provider. This can be modified by setting the FI_TCP_PORT_LOW_RANGE and FI_TCP_PORT_HIGH_RANGE environment variables before running the application. From 117b44ace11c43181346668efe145d65d2af80f3 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Tue, 1 Oct 2024 16:24:13 +0200 Subject: [PATCH 09/11] Update docs/admin/predeployment_check.md Co-authored-by: cdavis28 Signed-off-by: Johann Lombardi --- docs/admin/predeployment_check.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/admin/predeployment_check.md b/docs/admin/predeployment_check.md index 2fd2b29e728..78785b77a3e 100644 --- a/docs/admin/predeployment_check.md +++ b/docs/admin/predeployment_check.md @@ -167,7 +167,7 @@ must also be accessible on the DAOS server nodes. Depending on the provider used, each engine might also listen on a range of ports. This is the case for the tcp provider. This range will start at the fabric_iface_port specified in the -server yaml file and use 2 ports for management, 1 port per target and helper xstream. For instance, +server YAML file and use two ports for management, one port per target and helper xstream. For instance, with fabric_iface_port set to 20000, 16 targets and 4 helper streams, the engine will listen on ports range from 20000 to 20021 for a total of 22 ports. From 5dfed1e613b0e3adfff4edef22d9a96425ecf197 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Tue, 1 Oct 2024 16:24:40 +0200 Subject: [PATCH 10/11] Update docs/admin/predeployment_check.md Co-authored-by: cdavis28 Signed-off-by: Johann Lombardi --- docs/admin/predeployment_check.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/admin/predeployment_check.md b/docs/admin/predeployment_check.md index 78785b77a3e..c1a062c7da9 100644 --- a/docs/admin/predeployment_check.md +++ b/docs/admin/predeployment_check.md @@ -169,7 +169,7 @@ Depending on the provider used, each engine might also listen on a range of port the case for the tcp provider. This range will start at the fabric_iface_port specified in the server YAML file and use two ports for management, one port per target and helper xstream. For instance, with fabric_iface_port set to 20000, 16 targets and 4 helper streams, the engine will listen on ports -range from 20000 to 20021 for a total of 22 ports. +in the range from 20000 to 20021 for a total of 22 ports. Moreover, there are cases where an engine might have to initiate a connection to a running application. In this case, inbound connection from the storage nodes to the compute nodes must be allowed. From 947a35d7f679a3c5682d5a84514ccd4133e670f4 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Tue, 1 Oct 2024 16:24:56 +0200 Subject: [PATCH 11/11] Update docs/admin/predeployment_check.md Co-authored-by: cdavis28 Signed-off-by: Johann Lombardi --- docs/admin/predeployment_check.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/admin/predeployment_check.md b/docs/admin/predeployment_check.md index c1a062c7da9..0968bde8a0e 100644 --- a/docs/admin/predeployment_check.md +++ b/docs/admin/predeployment_check.md @@ -172,7 +172,7 @@ with fabric_iface_port set to 20000, 16 targets and 4 helper streams, the engine in the range from 20000 to 20021 for a total of 22 ports. Moreover, there are cases where an engine might have to initiate a connection to a running application. -In this case, inbound connection from the storage nodes to the compute nodes must be allowed. +In this case, inbound connections from the storage nodes to the compute nodes must be allowed. The default port range used by applications is 20100-21100 with the tcp provider. This can be modified by setting the FI_TCP_PORT_LOW_RANGE and FI_TCP_PORT_HIGH_RANGE environment variables before running the application.