From e39ba45b886ae3e232016628cd231ef043a7f904 Mon Sep 17 00:00:00 2001 From: MoFHeka Date: Wed, 31 Jan 2024 02:51:46 +0800 Subject: [PATCH] [fix] get_temp_dir function may return different directory. And filesystem may be slower than files asserting check. --- .../python/kernel_tests/horovod_sync_train_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/horovod_sync_train_test.py b/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/horovod_sync_train_test.py index 22c69ae8f..1f05c37a1 100644 --- a/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/horovod_sync_train_test.py +++ b/tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests/horovod_sync_train_test.py @@ -22,6 +22,7 @@ import os import numpy as np import shutil +from time import sleep import tensorflow as tf @@ -407,6 +408,7 @@ def call(self, x): def check_TFRADynamicEmbedding_directory(save_dir, save_it=None, should_be_exist=True): + sleep(4) # Wait for filesystem operation hvd_size = hvd.size() if hvd_size <= 1: hvd_size = 1 @@ -439,6 +441,10 @@ def check_TFRADynamicEmbedding_directory(save_dir, new_de_opt_compared = {} save_dir = self.get_temp_dir() + # Need to use same directory when horovod save + save_dir = hvd.broadcast_object(save_dir, + root_rank=0, + name=f'{__file__}_broadcast_save_dir') model = NoCompileModel('ones') base_opt = Adam(1.0)