From d6b2b72f8e9de85f3faefc7fa057d86fcd8dbac2 Mon Sep 17 00:00:00 2001 From: Yuan Fengbo Date: Sat, 31 Jul 2021 14:48:57 +0800 Subject: [PATCH] better error message in handle_unexpected_submission_state. --- dpdispatcher/submission.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dpdispatcher/submission.py b/dpdispatcher/submission.py index df26addc..ceb8fa1c 100644 --- a/dpdispatcher/submission.py +++ b/dpdispatcher/submission.py @@ -215,7 +215,12 @@ def handle_unexpected_submission_state(self): job.handle_unexpected_job_state() except Exception as e: self.submission_to_json() - raise e + raise RuntimeError( + f"Meet errors will handle unexpected submission state.\n" + f"Debug information: remote_root=={self.remote_root}.\n" + f"Debug information: submission_hash=={self.submission_hash}.\n" + f"Please check the dirs and scripts in remote_root" + ) from e # not used here, submitting job is in handle_unexpected_submission_state. @@ -522,8 +527,8 @@ def handle_unexpected_job_state(self): self.fail_count += 1 dlog.info(f"job: {self.job_hash} {self.job_id} terminated;" f"fail_cout is {self.fail_count}; resubmitting job") - if ( self.fail_count ) > 0 and ( self.fail_count % 3 == 0 ) : - raise RuntimeError(f"job:{self.job_hash} {self.job_id} failed {self.fail_count} times.job_detail:{self}") + if self.fail_count > 3: + raise RuntimeError(f"job:{self.job_hash}failed 3 times.job_detail:{self}") self.submit_job() dlog.info("job:{job_hash} re-submit after terminated; new job_id is {job_id}".format(job_hash=self.job_hash, job_id=self.job_id)) self.get_job_state() @@ -534,8 +539,8 @@ def handle_unexpected_job_state(self): if job_state == JobStatus.unsubmitted: dlog.info(f"job: {self.job_hash} unsubmitted; submit it") - # if ( self.fail_count > 0 ) and (self.fail_cound % 3 == 0): - # raise RuntimeError(f"job:job {self} failed {self.fail_count} times") + if self.fail_count > 3: + raise RuntimeError("job:job {job} failed 3 times".format(job=self)) # self.fail_count += 1 self.submit_job() dlog.info("job: {job_hash} submit; job_id is {job_id}".format(job_hash=self.job_hash, job_id=self.job_id))