diff --git a/jenkins/Dockerfile-blossom.ubuntu b/jenkins/Dockerfile-blossom.ubuntu index e30551c4a6d..1011eaf20d3 100644 --- a/jenkins/Dockerfile-blossom.ubuntu +++ b/jenkins/Dockerfile-blossom.ubuntu @@ -22,12 +22,12 @@ # CUDA_VER=11.0+ # UBUNTU_VER=18.04 or 20.04 # UCX_CUDA_VER=11 (major CUDA version) -# UCX_VER=1.12.1 +# UCX_VER=1.13.1 ### ARG CUDA_VER=11.0 ARG UBUNTU_VER=18.04 -ARG UCX_VER=1.12.1 +ARG UCX_VER=1.13.1 ARG UCX_CUDA_VER=11 FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER} ARG CUDA_VER diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh index 87b54c30145..d874cfe5d10 100755 --- a/jenkins/spark-premerge-build.sh +++ b/jenkins/spark-premerge-build.sh @@ -109,9 +109,12 @@ rapids_shuffle_smoke_test() { } # using UCX shuffle - # Disabled temporarily due to: https://github.com/NVIDIA/spark-rapids/issues/6572 - # PYSP_TEST_spark_executorEnv_UCX_ERROR_SIGNALS="" \ - # invoke_shuffle_integration_test + # The UCX_TLS=^posix config is removing posix from the list of memory transports + # so that IPC regions are obtained using SysV API instead. This was done because of + # itermittent test failures. See: https://github.com/NVIDIA/spark-rapids/issues/6572 + PYSP_TEST_spark_executorEnv_UCX_ERROR_SIGNALS="" \ + PYSP_TEST_spark_executorEnv_UCX_TLS="^posix" \ + invoke_shuffle_integration_test # using MULTITHREADED shuffle PYSP_TEST_spark_rapids_shuffle_mode=MULTITHREADED \