From 2cd96289ec7d76f6b8e4d11eadd7ed1dfbb13a3b Mon Sep 17 00:00:00 2001 From: MoFHeka Date: Sat, 16 Dec 2023 03:19:11 +0800 Subject: [PATCH] [fix] Lack of HorovodJoin CPU kernels when install Horovod with NCCL, which make unable to run horovod_sync_train_test. --- tools/testing/build_and_run_tests.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/build_and_run_tests.sh b/tools/testing/build_and_run_tests.sh index f3971a361..575a92d26 100644 --- a/tools/testing/build_and_run_tests.sh +++ b/tools/testing/build_and_run_tests.sh @@ -46,8 +46,19 @@ if ! [ -x "$(command -v nvidia-smi)" ]; then EXTRA_ARGS="-n auto" fi +# Lack of HorovodJoin CPU kernels when install Horovod with NCCL +python -m pip uninstall horovod -y +HOROVOD_WITH_TENSORFLOW=1 \ +HOROVOD_WITHOUT_PYTORCH=1 \ +HOROVOD_WITHOUT_MXNET=1 \ +HOROVOD_WITH_MPI=1 \ +HOROVOD_WITHOUT_GLOO=1 \ +python -m pip install horovod==$HOROVOD_VERSION # TODO(jamesrong): Test on GPU. CUDA_VISIBLE_DEVICES="" mpirun -np 2 -H localhost:2 --allow-run-as-root pytest -v ./tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/horovod_sync_train_test.py +# Reinstall Horovod after tests +python -m pip uninstall horovod -y +bash /install/install_horovod.sh $HOROVOD_VERSION # Only use GPU 0 if available. if [ -x "$(command -v nvidia-smi)" ]; then