Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use batch index at optimum #329

Merged
merged 6 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 23 additions & 22 deletions alphadia/workflow/optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def step(
"score_cutoff": self.workflow.optimization_manager.score_cutoff,
"fwhm_rt": self.workflow.optimization_manager.fwhm_rt,
"fwhm_mobility": self.workflow.optimization_manager.fwhm_mobility,
"batch_idx": self.workflow.optlock.batch_idx,
}
]
)
Expand All @@ -143,7 +144,7 @@ def step(
if just_converged:
self.has_converged = True

self._update_optimization_manager()
self._update_workflow()

self.reporter.log_string(
f"✅ {self.parameter_name:<15}: optimization complete. Optimal parameter {self.workflow.optimization_manager.__dict__[self.parameter_name]:.4f} found after {len(self.history_df)} searches.",
Expand Down Expand Up @@ -306,14 +307,14 @@ def _find_index_of_optimum(self):
else:
return self.history_df[self.feature_name].idxmax()

def _update_optimization_manager(self):
def _update_workflow(self):
"""Updates the optimization manager with the results of the optimization, namely:
the classifier version,
the optimal parameter,
score cutoff,
FWHM_RT,
and FWHM_mobility
at the optimal parameter.
at the optimal parameter. Also updates the optlock with the batch index at the optimum.

"""
index_of_optimum = self._find_index_of_optimum()
Expand Down Expand Up @@ -343,6 +344,11 @@ def _update_optimization_manager(self):
{"fwhm_mobility": fwhm_mobility_at_optimum}
)

batch_index_at_optimum = self.history_df["batch_idx"].loc[index_of_optimum]
odespard marked this conversation as resolved.
Show resolved Hide resolved
# Take the batch index of the optimum, at the cost of potentially getting the batch library twice if this is the same as the current batch index.
# The time impact of this is negligible and the benefits can be significant.
self.workflow.optlock.batch_idx = batch_index_at_optimum

@abstractmethod
def _get_feature_value(
self, precursors_df: pd.DataFrame, fragments_df: pd.DataFrame
Expand Down Expand Up @@ -612,7 +618,7 @@ def __init__(


class OptimizationLock:
def __init__(self, library, config: dict):
def __init__(self, library: SpecLibFlat, config: dict):
"""Sets and updates the optimization lock, which is the data used for calibration and optimization of the search parameters.

Parameters
Expand Down Expand Up @@ -670,7 +676,9 @@ def set_batch_plan(self):

self.batch_plan = plan

def update_with_extraction(self, feature_df, fragment_df):
def update_with_extraction(
self, feature_df: pd.DataFrame, fragment_df: pd.DataFrame
):
"""Extract features and fragments from the current batch of the optimization lock.

Parameters
Expand All @@ -687,7 +695,7 @@ def update_with_extraction(self, feature_df, fragment_df):

self.total_elution_groups = self.features_df.elution_group_idx.nunique()

def update_with_fdr(self, precursor_df):
def update_with_fdr(self, precursor_df: pd.DataFrame):
"""Calculates the number of precursors at 1% FDR for the current optimization lock and determines if it is sufficient to perform calibration and optimization.

Parameters
Expand Down Expand Up @@ -747,33 +755,26 @@ def update(self):
"""
if not self.has_target_num_precursors:
self.increase_batch_idx()
eg_idxes = self.elution_group_order[
self.start_idx : self.stop_idx
] # Take only additional elution groups in the new step of the batch plan.
# The target has not been reached and hence recalibration and optimization have not been performed, so previous extractions can be re-used.

else:
self.decrease_batch_idx()
eg_idxes = self.elution_group_order[
: self.stop_idx
] # Take all elution groups up the the stop index.
# The target has been reached and hence the recalibration and optimization have been performed, so everything must be extracted afresh.

self.feature_dfs = []
self.fragment_dfs = []

eg_idxes = self.elution_group_order[self.start_idx : self.stop_idx]
self.set_batch_dfs(eg_idxes)

def set_batch_dfs(self, eg_idxes):
def set_batch_dfs(self, eg_idxes: None | np.ndarray = None):
"""
Sets the batch library to use for the next round of optimization, either adjusting it upwards or downwards depending on whether the target has been reached.

Parameters
----------
eg_idxes: np.ndarray
The elution group indexes to use for the next round of optimization.
eg_idxes: None | np.ndarray
The elution group indexes to use for the next round of optimization. If None, the eg_idxes for the current self.start_idx and self.stop_idx are used.
"""
if eg_idxes is None:
eg_idxes = self.elution_group_order[self.start_idx : self.stop_idx]
self.batch_library = SpecLibFlat()
self.batch_library._precursor_df, (self.batch_library._fragment_df,) = (
remove_unused_fragments(
Expand All @@ -787,15 +788,15 @@ def set_batch_dfs(self, eg_idxes):
)

@property
def features_df(self):
def features_df(self) -> pd.DataFrame:
return pd.concat(self.feature_dfs)

@property
def fragments_df(self):
def fragments_df(self) -> pd.DataFrame:
return pd.concat(self.fragment_dfs)

@property
def start_idx(self):
def start_idx(self) -> int:
if self.has_target_num_precursors:
return 0
elif self.batch_idx >= len(self.batch_plan):
Expand All @@ -804,5 +805,5 @@ def start_idx(self):
return self.batch_plan[self.batch_idx][0]

@property
def stop_idx(self):
def stop_idx(self) -> int:
return self.batch_plan[self.batch_idx][1]
11 changes: 10 additions & 1 deletion alphadia/workflow/peptidecentric.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,15 @@ def get_ordered_optimizers(self):
)

if config_search["target_rt_tolerance"] > 0:
gradient_length = self.dia_data.rt_values.max()
target_rt_error = (
config_search["target_rt_tolerance"]
if config_search["target_rt_tolerance"] > 1
else config_search["target_rt_tolerance"] * gradient_length
)
rt_optimizer = optimization.TargetedRTOptimizer(
self.optimization_manager.rt_error,
config_search["target_rt_tolerance"],
target_rt_error,
self,
)
else:
Expand Down Expand Up @@ -413,6 +419,9 @@ def search_parameter_optimization(self):
verbosity="progress",
)

self.optlock.set_batch_dfs()
self.optlock.update_with_calibration(self.calibration_manager)

for optimizer in optimizers:
optimizer.plot()

Expand Down
1 change: 1 addition & 0 deletions tests/unit_tests/test_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ def create_workflow_instance():

class MockOptlock:
total_elution_groups = 2000
batch_idx = 1

workflow.optlock = MockOptlock()

Expand Down
Loading