-
Notifications
You must be signed in to change notification settings - Fork 50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve experiment stats #1038
Improve experiment stats #1038
Changes from 37 commits
0f4f079
066739c
3b3fabf
5ea026c
15aa04f
d091ce3
585eb6a
c1c722c
88400db
e43c344
c762498
a9bea8f
7f3cd1f
401d30a
3ba9c0d
fcabfa6
2c4e01b
7c01198
0db2c34
216e254
3b84803
041b7a8
588527a
b07db38
a79eed5
5eb5829
f45cc84
e3d513f
077619b
2ef1dac
64ff509
d09ead9
d553a52
61b89a2
bb042f5
ef660f1
8d125df
f9f4e8d
eab42e6
ea16bb6
da60151
a28c66b
babbf8b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,4 +1,4 @@ | ||||||
# pylint:disable=protected-access,too-many-public-methods,too-many-lines | ||||||
# pylint:disable=protected-access,too-many-public-methods,too-many-lines,too-many-branches | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this exception specific to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok! |
||||||
""" | ||||||
Description of an optimization attempt | ||||||
====================================== | ||||||
|
@@ -10,10 +10,13 @@ | |||||
|
||||||
import contextlib | ||||||
import copy | ||||||
import dataclasses | ||||||
import datetime | ||||||
import inspect | ||||||
import logging | ||||||
import math | ||||||
import typing | ||||||
from collections import Counter | ||||||
from dataclasses import dataclass, field | ||||||
from typing import Generator, Generic, TypeVar | ||||||
|
||||||
|
@@ -57,14 +60,46 @@ class ExperimentStats: | |||||
When Experiment reached terminating condition and stopped running. | ||||||
duration: `datetime.timedelta` | ||||||
Elapsed time. | ||||||
max_trials: int | ||||||
Experiment max_trials | ||||||
nb_trials: int | ||||||
Number of trials in experiment | ||||||
progress: float | ||||||
Experiment progression (between 0 and 1). | ||||||
trial_status_count: Dict[str, int] | ||||||
Dictionary mapping trial status to number of trials that have this status | ||||||
whole_clock_time: `datetime.timedelta` | ||||||
Sum of trial duration | ||||||
eta: `datetime.timedelta` | ||||||
Estimated remaining time | ||||||
eta_milliseconds: float | ||||||
ETA in milliseconds (used to get ETA in other programming languages, e.g. Javascript) | ||||||
""" | ||||||
|
||||||
trials_completed: int | ||||||
best_trials_id: int | ||||||
best_evaluation: float | ||||||
start_time: datetime.datetime | ||||||
finish_time: datetime.datetime | ||||||
max_trials: int = 0 | ||||||
nb_trials: int = 0 | ||||||
progress: float = 0 | ||||||
trial_status_count: dict = field(default_factory=dict) | ||||||
duration: datetime.timedelta = field(default_factory=datetime.timedelta) | ||||||
whole_clock_time: datetime.timedelta = field(default_factory=datetime.timedelta) | ||||||
eta: datetime.timedelta = field(default_factory=datetime.timedelta) | ||||||
eta_milliseconds: float = 0 | ||||||
|
||||||
def to_json(self): | ||||||
"""Return a JSON-compatible dictionary of stats.""" | ||||||
return { | ||||||
key: ( | ||||||
str(value) | ||||||
if isinstance(value, (datetime.datetime, datetime.timedelta)) | ||||||
else value | ||||||
) | ||||||
for key, value in dataclasses.asdict(self).items() | ||||||
} | ||||||
|
||||||
|
||||||
# pylint: disable=too-many-public-methods | ||||||
|
@@ -615,35 +650,101 @@ def stats(self): | |||||
"""Calculate :py:class:`orion.core.worker.experiment.ExperimentStats` for this particular | ||||||
experiment. | ||||||
""" | ||||||
trials = self.fetch_trials(with_evc_tree=False) | ||||||
completed_trials = self.fetch_trials_by_status("completed") | ||||||
|
||||||
if not completed_trials: | ||||||
return {} | ||||||
trials_completed = len(completed_trials) | ||||||
# Retrieve the best evaluation, best trial ID, start time and finish time | ||||||
# TODO: should we compute finish time as min(completed_trials.start_time) | ||||||
# instead of metadata["datetime"]? For duration below, we do not use metadata["datetime"] | ||||||
best_evaluation = None | ||||||
best_trials_id = None | ||||||
trial = completed_trials[0] | ||||||
best_evaluation = trial.objective.value | ||||||
best_trials_id = trial.id | ||||||
start_time = self.metadata["datetime"] | ||||||
start_time = self.metadata.get("datetime", None) | ||||||
finish_time = start_time | ||||||
for trial in completed_trials: | ||||||
# All trials are going to finish certainly after the start date | ||||||
# of the experiment they belong to | ||||||
if trial.end_time > finish_time: # pylint:disable=no-member | ||||||
finish_time = trial.end_time | ||||||
objective = trial.objective.value | ||||||
if objective < best_evaluation: | ||||||
best_evaluation = objective | ||||||
best_trials_id = trial.id | ||||||
duration = finish_time - start_time | ||||||
if start_time and completed_trials: | ||||||
trial = completed_trials[0] | ||||||
best_evaluation = trial.objective.value | ||||||
best_trials_id = trial.id | ||||||
for trial in completed_trials: | ||||||
# All trials are going to finish certainly after the start date | ||||||
# of the experiment they belong to | ||||||
if trial.end_time > finish_time: # pylint:disable=no-member | ||||||
finish_time = trial.end_time | ||||||
objective = trial.objective.value | ||||||
if objective < best_evaluation: | ||||||
best_evaluation = objective | ||||||
best_trials_id = trial.id | ||||||
|
||||||
# Compute duration using all finished/stopped/running experiments | ||||||
# i.e. all trials that have an execution interval | ||||||
# (from a start time to an end time or heartbeat) | ||||||
intervals = [] | ||||||
for trial in trials: | ||||||
interval = trial.execution_interval | ||||||
if interval: | ||||||
intervals.append(interval) | ||||||
if intervals: | ||||||
min_start_time = min(interval[0] for interval in intervals) | ||||||
max_end_time = max(interval[1] for interval in intervals) | ||||||
duration = max_end_time - min_start_time | ||||||
else: | ||||||
duration = datetime.timedelta() | ||||||
|
||||||
# Compute ETA | ||||||
if not self.max_trials or math.isinf(self.max_trials): | ||||||
# If max_trials is None, 0 or infinite, we cannot compute ETA | ||||||
eta = None | ||||||
elif len(completed_trials) > self.max_trials: | ||||||
# If there are more completed trials than max trials, then ETA should be 0 (?) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Yep! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok! |
||||||
eta = datetime.timedelta() | ||||||
elif not completed_trials: | ||||||
# If there are no completed trials, then we set ETA to infinite | ||||||
# NB: float("inf") may lead to wrong JSON syntax, so we just write "infinite" | ||||||
eta = "infinite" | ||||||
else: | ||||||
# Compute ETA using duration of completed trials | ||||||
completed_intervals = [ | ||||||
trial.execution_interval for trial in completed_trials | ||||||
] | ||||||
min_start_time = min(interval[0] for interval in completed_intervals) | ||||||
max_end_time = max(interval[1] for interval in completed_intervals) | ||||||
completed_duration = max_end_time - min_start_time | ||||||
eta = (completed_duration / len(completed_trials)) * ( | ||||||
self.max_trials - len(completed_trials) | ||||||
) | ||||||
|
||||||
# Compute progress | ||||||
if self.max_trials is None or math.isinf(self.max_trials): | ||||||
progress = None | ||||||
else: | ||||||
progress_base = max(self.max_trials, len(trials)) | ||||||
if progress_base == 0: | ||||||
progress = None | ||||||
else: | ||||||
progress = len(completed_trials) / progress_base | ||||||
|
||||||
return ExperimentStats( | ||||||
trials_completed=trials_completed, | ||||||
trials_completed=len(completed_trials), | ||||||
best_trials_id=best_trials_id, | ||||||
best_evaluation=best_evaluation, | ||||||
start_time=start_time, | ||||||
finish_time=finish_time, | ||||||
duration=duration, | ||||||
whole_clock_time=sum( | ||||||
(trial.duration for trial in trials), | ||||||
datetime.timedelta(), | ||||||
), | ||||||
nb_trials=len(trials), | ||||||
eta=eta, | ||||||
eta_milliseconds=eta.total_seconds() * 1000 | ||||||
if isinstance(eta, datetime.timedelta) | ||||||
else None, | ||||||
trial_status_count={**Counter(trial.status for trial in trials)}, | ||||||
progress=progress, | ||||||
max_trials=( | ||||||
"infinite" | ||||||
if self.max_trials is not None and math.isinf(self.max_trials) | ||||||
bouthilx marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
else self.max_trials | ||||||
), | ||||||
) | ||||||
|
||||||
def __repr__(self): | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be good to normalize these names with the labels in the frontend before we release it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, done !
duration
->elapsed_time
andwhole_clock_time
->sum_of_trials_time
. I note thatduration
was already used elsewhere in the code (e.g. in moduleformat_terminal
). I updated everywhere.