Skip to content

Commit

Permalink
Make update job status more readable
Browse files Browse the repository at this point in the history
  • Loading branch information
Michaelvll committed Jun 6, 2023
1 parent 46b7c11 commit eb65949
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
17 changes: 7 additions & 10 deletions sky/skylet/job_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def colored_str(self):
# to avoid race condition with `ray job` to make sure it job has been
# correctly updated.
# TODO(zhwu): This number should be tuned based on heuristics.
_PENDING_SUBMIT_TIMEOUT = 5
_PENDING_SUBMIT_TIMEOUT = 60

_PRE_RESOURCE_STATUSES = [JobStatus.PENDING]

Expand Down Expand Up @@ -541,19 +541,16 @@ def update_job_status(job_owner: str,
ray_status = job_details[ray_job_id].status
job_statuses[i] = _RAY_TO_JOB_STATUS_MAP[ray_status]
if job_id in pending_jobs:
# Gives a 5 second timeout between job being submit from the
# pending queue until appearing in ray jobs
if pending_jobs[job_id]['submit'] > 0 and pending_jobs[job_id][
'submit'] < time.time() - _PENDING_SUBMIT_TIMEOUT:
continue
if pending_jobs[job_id]['created_time'] < psutil.boot_time():
# The job is stale as it is created before the instance
# is booted, e.g. the instance is rebooted.
job_statuses[i] = JobStatus.FAILED
else:
# Set the job status to PENDING even though the job can be
# in any later status, because the code will take the max
# of this status and the status in the jobs table.
elif (pending_jobs[job_id]['submit'] >
max(0, time.time() - _PENDING_SUBMIT_TIMEOUT)):
# Gives a 60 second timeout between job being submit from the
# pending queue until appearing in ray jobs
# Reset the job status to PENDING even though it may not appear
# in the ray jobs, so that it will not be considered as stale.
job_statuses[i] = JobStatus.PENDING

assert len(job_statuses) == len(job_ids), (job_statuses, job_ids)
Expand Down
5 changes: 4 additions & 1 deletion sky/skylet/skylet.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
from sky import sky_logging
from sky.skylet import events

logger = sky_logging.init_logger(__name__)
# Use the explicit logger name so that the logger is under the
# `sky.skylet.skylet` namespace when executed directly, so as
# to inherit the setup from the `sky` logger.
logger = sky_logging.init_logger('sky.skylet.skylet')
logger.info('skylet started')

EVENTS = [
Expand Down

0 comments on commit eb65949

Please sign in to comment.