Epistimio · bouthilx · Dec 19, 2022 · Nov 14, 2022 · Nov 14, 2022 · Nov 8, 2022
diff --git a/docs/src/user/web_api.rst b/docs/src/user/web_api.rst
@@ -161,6 +161,57 @@ retrieve individual experiments as well as a list of all your experiments.
    :statuscode 400: When an invalid query parameter is passed in the request.
    :statuscode 404: When the specified experiment doesn't exist in the database.
 
+.. http:get:: /experiments/status/:name
+
+   Retrieve the stats of the existing experiment named ``name``.
+
+   **Example response**
+
+   .. sourcecode:: http
+
+      HTTP/1.1 200 OK
+      Content-Type: text/javascript
+
+   .. code-block:: json
+
+      {
+        "trials_completed": 40,
+        "best_trials_id": "955c77e7f567c2625f48546188a6cda1",
+        "best_evaluation": -0.788720013597263,
+        "start_time": "2019-11-25 16:02:02.872583",
+        "finish_time": "2019-11-27 21:13:27.043519",
+        "max_trials": 40,
+        "nb_trials": 40,
+        "progress": 1,
+        "trial_status_count": {
+          "completed": 40
+        },
+        "duration": "2 days, 5:11:24.006755",
+        "whole_clock_time": "8 days, 23:15:15.594405",
+        "eta": "0:00:00",
+        "eta_milliseconds": 0
+      }
+
+   :query version: Optional version of the experiment to retrieve. If unspecified, the latest
+      version of the experiment is retrieved.
+
+   :>json trials_completed: The number of trials completed.
+   :>json best_trial_id: The best trial ID.
+   :>json best_evaluation: Best evaluation.
+   :>json start_time: The timestamp when the experiment started.
+   :>json finish_time: The timestamp when the experiment finished.
+   :>json max_trials: The number of max trials for this experiment.
+   :>json nb_trials: The current number of trials in this experiment.
+   :>json progress: Floating value between 0 and 1 representing experiment progression.
+   :>json trial_status_count: A dictionary mapping trial status to number of trials with this status in the experiment.
+   :>json duration: The time elapsed since experiment started.
+   :>json whole_clock_time: The sum of trials execution times.
+   :>json eta: The estimation of remaining time for experiment to finish.
+   :>json eta_milliseconds: The ETA in milliseconds (convenient for usages in Javascript).
+
+   :statuscode 400: When an invalid query parameter is passed in the request.
+   :statuscode 404: When the specified experiment doesn't exist in the database.
+
 Trials
 ------
 

diff --git a/src/orion/core/utils/format_terminal.py b/src/orion/core/utils/format_terminal.py
@@ -391,7 +391,7 @@ def format_stats(experiment):
 
     """
     stats = experiment.stats
-    if not stats:
+    if not stats.trials_completed:
         return NO_STATS_TEMPLATE.format(title=format_title("Stats"))
 
     best_params = get_trial_params(stats.best_trials_id, experiment)

diff --git a/src/orion/core/worker/experiment.py b/src/orion/core/worker/experiment.py
@@ -1,4 +1,4 @@
-# pylint:disable=protected-access,too-many-public-methods,too-many-lines
+# pylint:disable=protected-access,too-many-public-methods,too-many-lines,too-many-branches
 """
 Description of an optimization attempt
 ======================================
@@ -10,10 +10,13 @@
 
 import contextlib
 import copy
+import dataclasses
 import datetime
 import inspect
 import logging
+import math
 import typing
+from collections import Counter
 from dataclasses import dataclass, field
 from typing import Generator, Generic, TypeVar
 
@@ -57,14 +60,46 @@ class ExperimentStats:
        When Experiment reached terminating condition and stopped running.
     duration: `datetime.timedelta`
        Elapsed time.
+    max_trials: int
+        Experiment max_trials
+    nb_trials: int
+        Number of trials in experiment
+    progress: float
+        Experiment progression (between 0 and 1).
+    trial_status_count: Dict[str, int]
+        Dictionary mapping trial status to number of trials that have this status
+    whole_clock_time: `datetime.timedelta`
+        Sum of trial duration
+    eta: `datetime.timedelta`
+        Estimated remaining time
+    eta_milliseconds: float
+        ETA in milliseconds (used to get ETA in other programming languages, e.g. Javascript)
     """
 
     trials_completed: int
     best_trials_id: int
     best_evaluation: float
     start_time: datetime.datetime
     finish_time: datetime.datetime
+    max_trials: int = 0
+    nb_trials: int = 0
+    progress: float = 0
+    trial_status_count: dict = field(default_factory=dict)
     duration: datetime.timedelta = field(default_factory=datetime.timedelta)
+    whole_clock_time: datetime.timedelta = field(default_factory=datetime.timedelta)
+    eta: datetime.timedelta = field(default_factory=datetime.timedelta)
+    eta_milliseconds: float = 0
+
+    def to_json(self):
+        """Return a JSON-compatible dictionary of stats."""
+        return {
+            key: (
+                str(value)
+                if isinstance(value, (datetime.datetime, datetime.timedelta))
+                else value
+            )
+            for key, value in dataclasses.asdict(self).items()
+        }
 
 
 # pylint: disable=too-many-public-methods
@@ -615,35 +650,101 @@ def stats(self):
         """Calculate :py:class:`orion.core.worker.experiment.ExperimentStats` for this particular
         experiment.
         """
+        trials = self.fetch_trials(with_evc_tree=False)
         completed_trials = self.fetch_trials_by_status("completed")
 
-        if not completed_trials:
-            return {}
-        trials_completed = len(completed_trials)
+        # Retrieve the best evaluation, best trial ID, start time and finish time
+        # TODO: should we compute finish time as min(completed_trials.start_time)
+        # instead of metadata["datetime"]? For duration below, we do not use metadata["datetime"]
+        best_evaluation = None
         best_trials_id = None
-        trial = completed_trials[0]
-        best_evaluation = trial.objective.value
-        best_trials_id = trial.id
-        start_time = self.metadata["datetime"]
+        start_time = self.metadata.get("datetime", None)
         finish_time = start_time
-        for trial in completed_trials:
-            # All trials are going to finish certainly after the start date
-            # of the experiment they belong to
-            if trial.end_time > finish_time:  # pylint:disable=no-member
-                finish_time = trial.end_time
-            objective = trial.objective.value
-            if objective < best_evaluation:
-                best_evaluation = objective
-                best_trials_id = trial.id
-        duration = finish_time - start_time
+        if start_time and completed_trials:
+            trial = completed_trials[0]
+            best_evaluation = trial.objective.value
+            best_trials_id = trial.id
+            for trial in completed_trials:
+                # All trials are going to finish certainly after the start date
+                # of the experiment they belong to
+                if trial.end_time > finish_time:  # pylint:disable=no-member
+                    finish_time = trial.end_time
+                objective = trial.objective.value
+                if objective < best_evaluation:
+                    best_evaluation = objective
+                    best_trials_id = trial.id
+
+        # Compute duration using all finished/stopped/running experiments
+        # i.e. all trials that have an execution interval
+        # (from a start time to an end time or heartbeat)
+        intervals = []
+        for trial in trials:
+            interval = trial.execution_interval
+            if interval:
+                intervals.append(interval)
+        if intervals:
+            min_start_time = min(interval[0] for interval in intervals)
+            max_end_time = max(interval[1] for interval in intervals)
+            duration = max_end_time - min_start_time
+        else:
+            duration = datetime.timedelta()
+
+        # Compute ETA
+        if not self.max_trials or math.isinf(self.max_trials):
+            # If max_trials is None, 0 or infinite, we cannot compute ETA
+            eta = None
+        elif len(completed_trials) > self.max_trials:
+            # If there are more completed trials than max trials, then ETA should be 0 (?)
-            # If there are more completed trials than max trials, then ETA should be 0 (?)
+            # If there are more completed trials than max trials, then ETA should be 0
-            # If there are more completed trials than max trials, then ETA should be 0 (?)
+            # If there are more completed trials than max trials, then ETA should be 0
+            eta = datetime.timedelta()
+        elif not completed_trials:
+            # If there are no completed trials, then we set ETA to infinite
+            # NB: float("inf") may lead to wrong JSON syntax, so we just write "infinite"
+            eta = "infinite"
+        else:
+            # Compute ETA using duration of completed trials
+            completed_intervals = [
+                trial.execution_interval for trial in completed_trials
+            ]
+            min_start_time = min(interval[0] for interval in completed_intervals)
+            max_end_time = max(interval[1] for interval in completed_intervals)
+            completed_duration = max_end_time - min_start_time
+            eta = (completed_duration / len(completed_trials)) * (
+                self.max_trials - len(completed_trials)
+            )
+
+        # Compute progress
+        if self.max_trials is None or math.isinf(self.max_trials):
+            progress = None
+        else:
+            progress_base = max(self.max_trials, len(trials))
+            if progress_base == 0:
+                progress = None
+            else:
+                progress = len(completed_trials) / progress_base
 
         return ExperimentStats(
-            trials_completed=trials_completed,
+            trials_completed=len(completed_trials),
             best_trials_id=best_trials_id,
             best_evaluation=best_evaluation,
             start_time=start_time,
             finish_time=finish_time,
             duration=duration,
+            whole_clock_time=sum(
+                (trial.duration for trial in trials),
+                datetime.timedelta(),
+            ),
+            nb_trials=len(trials),
+            eta=eta,
+            eta_milliseconds=eta.total_seconds() * 1000
+            if isinstance(eta, datetime.timedelta)
+            else None,
+            trial_status_count={**Counter(trial.status for trial in trials)},
+            progress=progress,
+            max_trials=(
+                "infinite"
+                if self.max_trials is not None and math.isinf(self.max_trials)
+                else self.max_trials
+            ),
         )
 
     def __repr__(self):

diff --git a/src/orion/core/worker/trial.py b/src/orion/core/worker/trial.py
@@ -13,6 +13,7 @@
 import logging
 import os
 import warnings
+from datetime import timedelta
 
 from orion.core.utils.exceptions import InvalidResult
 from orion.core.utils.flatten import unflatten
@@ -477,6 +478,26 @@ def full_name(self):
             )
         return self.format_values(self._params, sep="-").replace("/", ".")
 
+    @property
+    def duration(self):
+        """Return trial duration as a timedelta() object"""
+        execution_interval = self.execution_interval
+        if execution_interval:
+            from_time, to_time = execution_interval
+            return to_time - from_time
+        else:
+            return timedelta()
+
+    @property
+    def execution_interval(self):
+        """Return execution interval, or None if unavailable"""
+        if self.start_time:
+            if self.end_time:
+                return self.start_time, self.end_time
+            elif self.heartbeat:
+                return self.start_time, self.heartbeat
+        return None
+
     def _repr_values(self, values, sep=","):
         """Represent with a string the given values."""
         return Trial.format_values(values, sep)

diff --git a/src/orion/serving/experiments_resource.py b/src/orion/serving/experiments_resource.py
@@ -49,6 +49,16 @@ def on_get_experiment(self, req: Request, resp: Response, name: str):
         response = build_experiment_response(experiment, status, algorithm, best_trial)
         resp.body = json.dumps(response)
 
+    def on_get_experiment_status(self, req: Request, resp: Response, name: str):
+        """
+        Handle GET requests for experiments/status/:name where `name` is
+        the user-defined name of the experiment
+        """
+        verify_query_parameters(req.params, ["version"])
+        version = req.get_param_as_int("version")
+        experiment = retrieve_experiment(self.storage, name, version)
+        resp.body = json.dumps(experiment.stats.to_json())
+
 
 def _find_latest_versions(experiments):
     """Find the latest versions of the experiments"""
@@ -86,7 +96,7 @@ def _retrieve_algorithm(experiment: Experiment) -> dict:
 
 def _retrieve_best_trial(experiment: Experiment) -> Optional[Trial]:
     """Constructs the view of the best trial if there is one"""
-    if not experiment.stats:
+    if not experiment.stats.trials_completed:
         return None
 
     return experiment.get_trial(uid=experiment.stats.best_trials_id)
diff --git a/src/orion/serving/responses.py b/src/orion/serving/responses.py
@@ -42,6 +42,7 @@ def build_trial_response(trial: Trial) -> dict:
         "statistics": {
             statistic.name: statistic.value for statistic in trial.statistics
         },
+        "status": trial.status,
     }
 
 
@@ -87,7 +88,7 @@ def build_experiment_response(
     }
 
     stats = experiment.stats
-    if stats:
+    if stats.trials_completed:
         data["trialsCompleted"] = stats.trials_completed
         data["startTime"] = str(stats.start_time)
         data["endTime"] = str(stats.finish_time)

diff --git a/src/orion/serving/webapi.py b/src/orion/serving/webapi.py
@@ -120,6 +120,11 @@ def __init__(self, storage, config=None):
         self.add_route("/experiments/{name}", experiments_resource, suffix="experiment")
         self.add_route("/benchmarks", benchmarks_resource)
         self.add_route("/benchmarks/{name}", benchmarks_resource, suffix="benchmark")
+        self.add_route(
+            "/experiments/status/{name}",
+            experiments_resource,
+            suffix="experiment_status",
+        )
         self.add_route(
             "/trials/{experiment_name}", trials_resource, suffix="trials_in_experiment"
         )