Skip to content

Commit

Permalink
add a global variable as flag to control DDP never run twice
Browse files Browse the repository at this point in the history
  • Loading branch information
FANGAreNotGnu committed Oct 23, 2023
1 parent 18c0088 commit d47df62
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 1 deletion.
3 changes: 3 additions & 0 deletions multimodal/src/autogluon/multimodal/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,9 @@

# strategies
DDP = "ddp"
DDP_FIND_UNUSED_PARAMETERS_FALSE = "ddp_find_unused_parameters_false"
DDP_FIND_UNUSED_PARAMETERS_TRUE = "ddp_find_unused_parameters_true"
DDP_STRATEGIES = [DDP, DDP_FIND_UNUSED_PARAMETERS_FALSE, DDP_FIND_UNUSED_PARAMETERS_TRUE]

# torch constants
TORCH_COMPILE_MIN_VERSION = "2.2.0.dev20230908"
5 changes: 5 additions & 0 deletions multimodal/src/autogluon/multimodal/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
init_df_preprocessor,
load_text_tokenizers,
predict,
run_ddp_only_once,
save_pretrained_model_configs,
save_text_tokenizers,
select_model,
Expand Down Expand Up @@ -944,6 +945,8 @@ def _fit(
# save artifacts for the current running, except for model checkpoint, which will be saved in trainer
self.save(save_path)

num_gpus = run_ddp_only_once(num_gpus, strategy)

blacklist_msgs = ["already configured with model summary"]
log_filter = LogFilter(blacklist_msgs)
with apply_log_filter(log_filter):
Expand Down Expand Up @@ -1278,6 +1281,8 @@ def _default_predict(
match_label=match_label,
)

num_gpus = run_ddp_only_once(num_gpus, strategy)

blacklist_msgs = []
if self._verbosity <= 3: # turn off logging in prediction
blacklist_msgs.append("Automatic Mixed Precision")
Expand Down
5 changes: 5 additions & 0 deletions multimodal/src/autogluon/multimodal/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@
modify_duplicate_model_names,
object_detection_data_to_df,
predict,
run_ddp_only_once,
save_ovd_result_df,
save_pretrained_model_configs,
save_result_df,
Expand Down Expand Up @@ -1505,6 +1506,8 @@ def _fit(
# save artifacts for the current running, except for model checkpoint, which will be saved in trainer
self.save(save_path, standalone=standalone)

num_gpus = run_ddp_only_once(num_gpus, strategy)

blacklist_msgs = ["already configured with model summary"]
log_filter = LogFilter(blacklist_msgs)
with apply_log_filter(log_filter):
Expand Down Expand Up @@ -1801,6 +1804,8 @@ def _default_predict(
**optimization_kwargs,
)

num_gpus = run_ddp_only_once(num_gpus, strategy)

blacklist_msgs = []
if self._verbosity <= 3: # turn off logging in prediction
blacklist_msgs.append("Automatic Mixed Precision")
Expand Down
14 changes: 13 additions & 1 deletion multimodal/src/autogluon/multimodal/utils/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from autogluon.common.utils.resource_utils import ResourceManager

from ..constants import DDP, OBJECT_DETECTION, OCR
from ..constants import DDP_STRATEGIES, OBJECT_DETECTION, OCR

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -347,3 +347,15 @@ def _get_mmlab_installation_guide(package_name):
raise ValueError("Available package_name are: mmdet, mmcv, mmengine.")

return err_msg


def run_ddp_only_once(num_gpus, strategy):
if strategy in DDP_STRATEGIES:
global FIRST_TIME_DDP
if "FIRST_TIME_DDP" in globals():
# not the first time running DDP, set number of devices to 1 (use single GPU)
return 1
else:
if num_gpus > 1:
FIRST_TIME_DDP = False # now we run DDP for the first time
return num_gpus

0 comments on commit d47df62

Please sign in to comment.