diff --git a/buildbot_nix/__init__.py b/buildbot_nix/__init__.py index bd8f2704e..ecb631aef 100644 --- a/buildbot_nix/__init__.py +++ b/buildbot_nix/__init__.py @@ -7,6 +7,7 @@ from collections import defaultdict from collections.abc import Generator from dataclasses import dataclass +from datetime import UTC, datetime from pathlib import Path from typing import TYPE_CHECKING, Any @@ -194,13 +195,14 @@ def schedule_eval_failure( return (self.failed_eval_scheduler, props) def schedule_cached_failure( - self, job: NixEvalJobSuccess, report_status: bool + self, job: NixEvalJobSuccess, report_status: bool, first_failure: datetime ) -> tuple[str, Properties]: source = "nix-eval-nix" props = BuildTrigger.set_common_properties( Properties(), source, job, report_status ) + props.setProperty("first_failure", str(first_failure), source) return (self.cached_failure_scheduler, props) @@ -440,32 +442,29 @@ def run(self) -> Generator[Any, Any, None]: # check which jobs should be scheduled now schedule_now = [] for build in list(build_schedule_order): + failed_build = failed_builds.check_build(build.drvPath) if job_closures.get(build.drvPath): pass - elif ( - failed_builds.check_build(build.drvPath) - and self.build.reason != "rebuild" - ): - failed_builds.remove_build(build.drvPath) + elif failed_build is not None and self.build.reason != "rebuild": scheduler_log.addStdout( - f"\t- skipping {build.attr} due to cached failure\n" + f"\t- skipping {build.attr} due to cached failure, first failed at {failed_build.time}\n" ) build_schedule_order.remove(build) brids, results_deferred = yield self.schedule( ss_for_trigger, - *self.schedule_cached_failure(build, self.report_status), + *self.schedule_cached_failure( + build, self.report_status, failed_build.time + ), ) scheduled.append( BuildTrigger.ScheduledJob(build, brids, results_deferred) ) self.brids.extend(brids.values()) - elif ( - failed_builds.check_build(build.drvPath) - and self.build.reason == "rebuild" - ): + elif failed_build is not None and self.build.reason == "rebuild": + failed_builds.remove_build(build.drvPath) scheduler_log.addStdout( - f"\t- not skipping {build.attr} with cached failure due to rebuild\n" + f"\t- not skipping {build.attr} with cached failure due to rebuild, first failed at {failed_build.time}\n" ) build_schedule_order.remove(build) @@ -514,7 +513,7 @@ def run(self) -> Generator[Any, Any, None]: # if it failed, remove all dependent jobs, schedule placeholders and add them to the list of scheduled jobs if result != SUCCESS: - failed_builds.add_build(job.drvPath) + failed_builds.add_build(job.drvPath, datetime.now(tz=UTC)) removed = self.get_failed_dependents( job, build_schedule_order, job_closures @@ -705,7 +704,13 @@ def run(self) -> Generator[Any, object, int]: # show eval error error_log: StreamLog = yield self.addLog("nix_error") error_log.addStderr( - f"{attr} was failed because it has failed previous and its failure has been cached.\n" + "\n".join( + [ + f"{attr} was failed because it has failed previously and its failure has been cached.", + f" first failure time: {self.getProperty('first_failure')}", + ] + ) + + "\n" ) return util.FAILURE diff --git a/buildbot_nix/failed_builds.py b/buildbot_nix/failed_builds.py new file mode 100644 index 000000000..1b004b16c --- /dev/null +++ b/buildbot_nix/failed_builds.py @@ -0,0 +1,61 @@ +import dbm +from datetime import datetime +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from pydantic import BaseModel + +if TYPE_CHECKING: + database: None | dbm._Database = None +else: + database: Any = None + + +class FailedBuildsError(Exception): + pass + + +class FailedBuild(BaseModel): + derivation: str + time: datetime + + +DB_NOT_INIT_MSG = "Database not initialized" + + +def initialize_database(db_path: Path) -> None: + global database # noqa: PLW0603 + + if not database: + database = dbm.open(str(db_path), "c") + + +def add_build(derivation: str, time: datetime) -> None: + global database # noqa: PLW0602 + + if database is not None: + database[derivation] = FailedBuild( + derivation=derivation, time=time + ).model_dump_json() + else: + raise FailedBuildsError(DB_NOT_INIT_MSG) + + +def check_build(derivation: str) -> FailedBuild | None: + global database # noqa: PLW0602 + + if database is not None: + if derivation in database: + # TODO create dummy if deser fails? + return FailedBuild.model_validate_json(database[derivation]) + return None + raise FailedBuildsError(DB_NOT_INIT_MSG) + + +def remove_build(derivation: str) -> None: + global database # noqa: PLW0602 + + if database is not None: + del database[derivation] + else: + raise FailedBuildsError(DB_NOT_INIT_MSG)