Skip to content

Commit

Permalink
fix bug on non-DLTS infra when no output path set (#523)
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffra authored Nov 11, 2020
1 parent be1147c commit eea1c28
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions deepspeed/runtime/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
from .utils import ensure_directory_exists

MEMORY_OPT_ALLREDUCE_SIZE = 500000000
SUMMARY_WRITER_DIR_NAME = "JobId"

try:
from apex_C import flatten
Expand Down Expand Up @@ -271,9 +270,17 @@ def get_summary_writer(self,
else:
if self.tensorboard_job_name():
name = self.tensorboard_job_name()

# Infrastructure-specific job-id
if 'DLWS_JOB_ID' in os.environ:
SUMMARY_WRITER_DIR_NAME = os.path.join(os.environ['DLWS_JOB_ID'], "logs")
log_dir = os.path.join(base, SUMMARY_WRITER_DIR_NAME, name)
infra_job_id = os.environ['DLWS_JOB_ID']
elif 'DLTS_JOB_ID' in os.environ:
infra_job_id = os.environ['DLTS_JOB_ID']
else:
infra_job_id = 'unknown-job-id'

summary_writer_dir_name = os.path.join(infra_job_id, "logs")
log_dir = os.path.join(base, summary_writer_dir_name, name)

os.makedirs(log_dir, exist_ok=True)

Expand Down

0 comments on commit eea1c28

Please sign in to comment.