From 998c76e1703ec3c46929ee156e3875def064375d Mon Sep 17 00:00:00 2001 From: Alessandro Bellina Date: Tue, 20 Sep 2022 14:08:19 -0500 Subject: [PATCH] Updates UCX to 1.13.1 in Dockerfile-blossom.ubuntu and sets UCX_TLS=^posix (#6573) Signed-off-by: Alessandro Bellina Signed-off-by: Alessandro Bellina --- jenkins/Dockerfile-blossom.ubuntu | 4 ++-- jenkins/spark-premerge-build.sh | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/jenkins/Dockerfile-blossom.ubuntu b/jenkins/Dockerfile-blossom.ubuntu index e30551c4a6d..1011eaf20d3 100644 --- a/jenkins/Dockerfile-blossom.ubuntu +++ b/jenkins/Dockerfile-blossom.ubuntu @@ -22,12 +22,12 @@ # CUDA_VER=11.0+ # UBUNTU_VER=18.04 or 20.04 # UCX_CUDA_VER=11 (major CUDA version) -# UCX_VER=1.12.1 +# UCX_VER=1.13.1 ### ARG CUDA_VER=11.0 ARG UBUNTU_VER=18.04 -ARG UCX_VER=1.12.1 +ARG UCX_VER=1.13.1 ARG UCX_CUDA_VER=11 FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER} ARG CUDA_VER diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh index 87b54c30145..d874cfe5d10 100755 --- a/jenkins/spark-premerge-build.sh +++ b/jenkins/spark-premerge-build.sh @@ -109,9 +109,12 @@ rapids_shuffle_smoke_test() { } # using UCX shuffle - # Disabled temporarily due to: https://github.com/NVIDIA/spark-rapids/issues/6572 - # PYSP_TEST_spark_executorEnv_UCX_ERROR_SIGNALS="" \ - # invoke_shuffle_integration_test + # The UCX_TLS=^posix config is removing posix from the list of memory transports + # so that IPC regions are obtained using SysV API instead. This was done because of + # itermittent test failures. See: https://github.com/NVIDIA/spark-rapids/issues/6572 + PYSP_TEST_spark_executorEnv_UCX_ERROR_SIGNALS="" \ + PYSP_TEST_spark_executorEnv_UCX_TLS="^posix" \ + invoke_shuffle_integration_test # using MULTITHREADED shuffle PYSP_TEST_spark_rapids_shuffle_mode=MULTITHREADED \