Skip to content

Commit

Permalink
fix:retry_count
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaoyeqiannian committed Aug 25, 2023
1 parent 53ba2e0 commit c106c3a
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 2 deletions.
1 change: 1 addition & 0 deletions dpdispatcher/dp_cloud_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,5 +282,6 @@ def _parse_job_id(self, str_job_id: str) -> int:
job_id, _ = int(ids[0]), int(ids[1])
return job_id


DpCloudServer = Bohrium
Lebesgue = Bohrium
2 changes: 1 addition & 1 deletion dpdispatcher/dp_cloud_server_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def machine_subfields(cls) -> List[Argument]:
"retry_count",
[int, type(None)],
optional=True,
default=3,
default=2,
doc=doc_retry_count,
),
Argument(
Expand Down
2 changes: 1 addition & 1 deletion dpdispatcher/submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,7 @@ def handle_unexpected_job_state(self):
retry_count = 3
assert self.machine is not None
if hasattr(self.machine, "retry_count") and self.machine.retry_count > 0:
retry_count = self.machine.retry_count
retry_count = self.machine.retry_count + 1
if (self.fail_count) > 0 and (self.fail_count % retry_count == 0):
raise RuntimeError(
f"job:{self.job_hash} {self.job_id} failed {self.fail_count} times.job_detail:{self}"
Expand Down

0 comments on commit c106c3a

Please sign in to comment.