Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#230: ruff rule B006: auto fixes #242

Merged
merged 1 commit into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions alphadia/calibration/property.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ def __init__(
self,
name: str = "",
function: object = None,
input_columns: list[str] = [],
target_columns: list[str] = [],
output_columns: list[str] = [],
input_columns: list[str] | None = None,
target_columns: list[str] | None = None,
output_columns: list[str] | None = None,
transform_deviation: None | float = None,
**kwargs,
):
Expand Down Expand Up @@ -59,7 +59,12 @@ def __init__(
If set to None, the deviation is expressed in absolute units.

"""

if output_columns is None:
output_columns = []
if target_columns is None:
target_columns = []
if input_columns is None:
input_columns = []
self.name = name
self.function = function
self.input_columns = input_columns
Expand Down
4 changes: 3 additions & 1 deletion alphadia/fdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def perform_fdr(
def keep_best(
df: pd.DataFrame,
score_column: str = "proba",
group_columns: list[str] = ["channel", "precursor_idx"],
group_columns: list[str] | None = None,
):
"""Keep the best PSM for each group of PSMs with the same precursor_idx.
This function is used to select the best candidate PSM for each precursor.
Expand All @@ -196,6 +196,8 @@ def keep_best(
pd.DataFrame
The dataframe containing the best PSM for each group.
"""
if group_columns is None:
group_columns = ["channel", "precursor_idx"]
temp_df = df.reset_index(drop=True)
temp_df = temp_df.sort_values(score_column, ascending=True)
temp_df = temp_df.groupby(group_columns).head(1)
Expand Down
18 changes: 12 additions & 6 deletions alphadia/fdrexperimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def __init__(
epochs: int = 10,
learning_rate: float = 0.0002,
weight_decay: float = 0.00001,
layers: list[int] = [100, 50, 20, 5],
layers: list[int] | None = None,
dropout: float = 0.001,
calculate_metrics: bool = True,
metric_interval: int = 1,
Expand Down Expand Up @@ -186,6 +186,8 @@ def __init__(
Whether to use GPU acceleration if available.
"""

if layers is None:
layers = [100, 50, 20, 5]
self.test_size = test_size
self.max_batch_size = max_batch_size
self.min_batch_number = min_batch_number
Expand Down Expand Up @@ -605,7 +607,7 @@ def __init__(
epochs: int = 10,
learning_rate: float = 0.0002,
weight_decay: float = 0.00001,
layers: list[int] = [100, 50, 20, 5],
layers: list[int] | None = None,
dropout: float = 0.001,
metric_interval: int = 1000,
**kwargs,
Expand Down Expand Up @@ -646,7 +648,8 @@ def __init__(
Interval for logging metrics during training.

"""

if layers is None:
layers = [100, 50, 20, 5]
self.test_size = test_size
self.batch_size = batch_size
self.epochs = epochs
Expand Down Expand Up @@ -919,7 +922,7 @@ def __init__(
epochs: int = 10,
learning_rate: float = 0.0002,
weight_decay: float = 0.00001,
layers: list[int] = [100, 50, 20, 5],
layers: list[int] | None = None,
dropout: float = 0.001,
metric_interval: int = 1000,
**kwargs,
Expand Down Expand Up @@ -960,7 +963,8 @@ def __init__(
Interval for logging metrics during training.

"""

if layers is None:
layers = [100, 50, 20, 5]
self.test_size = test_size
self.batch_size = batch_size
self.epochs = epochs
Expand Down Expand Up @@ -1236,13 +1240,15 @@ def __init__(
self,
input_dim,
output_dim=2,
layers=[20, 10, 5],
layers: list[int] | None = None,
dropout=0.5,
):
"""
built a simple feed forward network for FDR estimation

"""
if layers is None:
layers = [20, 10, 5]
super().__init__()
self.input_dim = input_dim
self.output_dim = output_dim
Expand Down
4 changes: 3 additions & 1 deletion alphadia/fdrx/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def fdr_to_q_values(fdr_values: np.ndarray):
def keep_best(
df: pd.DataFrame,
score_column: str = "decoy_proba",
group_columns: list[str] = ["channel", "mod_seq_charge_hash"],
group_columns: list[str] | None = None,
):
"""Keep the best PSM for each group of PSMs with the same precursor_idx.
This function is used to select the best candidate PSM for each precursor.
Expand All @@ -159,6 +159,8 @@ def keep_best(
pd.DataFrame
The dataframe containing the best PSM for each group.
"""
if group_columns is None:
group_columns = ["channel", "mod_seq_charge_hash"]
df = df.reset_index(drop=True)
df = df.sort_values(score_column, ascending=True)
df = df.groupby(group_columns).head(1)
Expand Down
35 changes: 24 additions & 11 deletions alphadia/libtransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __call__(self, input: typing.Any) -> typing.Any:


class DynamicLoader(ProcessingStep):
def __init__(self, modification_mapping={}) -> None:
def __init__(self, modification_mapping: dict | None = None) -> None:
"""Load a spectral library from a file. The file type is dynamically inferred from the file ending.
Expects a `str` as input and will return a `SpecLibBase` object.

Expand All @@ -98,6 +98,8 @@ def __init__(self, modification_mapping={}) -> None:
The classical spectral library format as returned by MSFragger.
It will be imported and converted to a `SpecLibBase` format. This might require additional parsing information.
"""
if modification_mapping is None:
modification_mapping = {}
self.modification_mapping = modification_mapping

def validate(self, input: str) -> bool:
Expand Down Expand Up @@ -137,20 +139,27 @@ class FastaDigest(ProcessingStep):
def __init__(
self,
enzyme: str = "trypsin",
fixed_modifications: list[str] = ["Carbamidomethyl@C"],
variable_modifications: list[str] = [
"Oxidation@M",
"Acetyl@Prot N-term",
],
fixed_modifications: list[str] | None = None,
variable_modifications: list[str] | None = None,
missed_cleavages: int = 1,
precursor_len: list[int] = [7, 35],
precursor_charge: list[int] = [2, 4],
precursor_mz: list[int] = [400, 1200],
precursor_len: list[int] | None = None,
precursor_charge: list[int] | None = None,
precursor_mz: list[int] | None = None,
max_var_mod_num: int = 1,
) -> None:
"""Digest a FASTA file into a spectral library.
Expects a `List[str]` object as input and will return a `SpecLibBase` object.
"""
if precursor_mz is None:
precursor_mz = [400, 1200]
if precursor_charge is None:
precursor_charge = [2, 4]
if precursor_len is None:
precursor_len = [7, 35]
if variable_modifications is None:
variable_modifications = ["Oxidation@M", "Acetyl@Prot N-term"]
if fixed_modifications is None:
fixed_modifications = ["Carbamidomethyl@C"]
super().__init__()
self.enzyme = enzyme
self.fixed_modifications = fixed_modifications
Expand Down Expand Up @@ -242,11 +251,11 @@ def __init__(
self,
use_gpu: bool = True,
mp_process_num: int = 8,
fragment_mz: list[int] = [100, 2000],
fragment_mz: list[int] | None = None,
nce: int = 25,
instrument: str = "Lumos",
checkpoint_folder_path: str | None = None,
fragment_types: list[str] = ["b", "y"],
fragment_types: list[str] | None = None,
max_fragment_charge: int = 2,
) -> None:
"""Predict the retention time of a spectral library using PeptDeep.
Expand Down Expand Up @@ -278,6 +287,10 @@ def __init__(
max_fragment_charge : int, optional
Maximum charge state to predict. Default is 2.
"""
if fragment_types is None:
fragment_types = ["b", "y"]
if fragment_mz is None:
fragment_mz = [100, 2000]
super().__init__()
self.use_gpu = use_gpu
self.fragment_mz = fragment_mz
Expand Down
44 changes: 23 additions & 21 deletions alphadia/outputaccumulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,27 +80,7 @@ def _calculate_fragment_position(self):
def parse_output_folder(
self,
folder: str,
selected_precursor_columns: list[str] = [
"precursor_idx",
"sequence",
"flat_frag_start_idx",
"flat_frag_stop_idx",
"charge",
"rt_library",
"rt_observed",
"rt_calibrated",
"mobility_library",
"mobility_observed",
"mz_library",
"mz_observed",
"mz_calibrated",
"proteins",
"genes",
"mods",
"mod_sites",
"proba",
"decoy",
],
selected_precursor_columns: list[str] | None = None,
) -> tuple[pd.DataFrame, pd.DataFrame]:
"""
Parse the output folder to get a precursor and fragment dataframe in the flat format.
Expand All @@ -121,6 +101,28 @@ def parse_output_folder(


"""
if selected_precursor_columns is None:
selected_precursor_columns = [
"precursor_idx",
"sequence",
"flat_frag_start_idx",
"flat_frag_stop_idx",
"charge",
"rt_library",
"rt_observed",
"rt_calibrated",
"mobility_library",
"mobility_observed",
"mz_library",
"mz_observed",
"mz_calibrated",
"proteins",
"genes",
"mods",
"mod_sites",
"proba",
"decoy",
]
psm_df = pd.read_parquet(os.path.join(folder, "psm.parquet"))
frag_df = pd.read_parquet(os.path.join(folder, "frag.parquet"))

Expand Down
10 changes: 7 additions & 3 deletions alphadia/outputtransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,7 +829,9 @@ def build_library(
return mbr_spec_lib


def _build_run_stat_df(raw_name: str, run_df: pd.DataFrame, channels: list[int] = [0]):
def _build_run_stat_df(
raw_name: str, run_df: pd.DataFrame, channels: list[int] | None = None
):
"""Build stat dataframe for a single run.

Parameters
Expand All @@ -841,8 +843,8 @@ def _build_run_stat_df(raw_name: str, run_df: pd.DataFrame, channels: list[int]
run_df: pd.DataFrame
Dataframe containing the precursor data

channels: List[int]
List of channels to include in the output
channels: List[int], optional
List of channels to include in the output, default=[0]

Returns
-------
Expand All @@ -851,6 +853,8 @@ def _build_run_stat_df(raw_name: str, run_df: pd.DataFrame, channels: list[int]

"""

if channels is None:
channels = [0]
out_df = []

for channel in channels:
Expand Down
16 changes: 12 additions & 4 deletions alphadia/planning.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ class Plan:
def __init__(
self,
output_folder: str,
raw_path_list: list[str] = [],
raw_path_list: list[str] | None = None,
library_path: str | None = None,
fasta_path_list: list[str] = [],
config: dict | None = {},
fasta_path_list: list[str] | None = None,
config: dict | None = None,
config_base_path: str | None = None,
) -> None:
"""Highest level class to plan a DIA Search.
Expand All @@ -75,6 +75,12 @@ def __init__(
dict to update the default config. Can be used for debugging purposes etc.

"""
if config is None:
config = {}
if fasta_path_list is None:
fasta_path_list = []
if raw_path_list is None:
raw_path_list = []
self.output_folder = output_folder
reporting.init_logging(self.output_folder)

Expand Down Expand Up @@ -288,10 +294,12 @@ def run(
self,
figure_path=None,
neptune_token=None,
neptune_tags=[],
neptune_tags=None,
keep_decoys=False,
fdr=0.01,
):
if neptune_tags is None:
neptune_tags = []
logger.progress("Starting Search Workflows")

workflow_folder_list = []
Expand Down
9 changes: 6 additions & 3 deletions alphadia/plexscoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

def candidate_features_to_candidates(
candidate_features_df: pd.DataFrame,
optional_columns: list[str] = ["proba"],
optional_columns: list[str] | None = None,
):
"""create candidates_df from candidate_features_df

Expand All @@ -50,6 +50,8 @@ def candidate_features_to_candidates(
"""

# validate candidate_features_df input
if optional_columns is None:
optional_columns = ["proba"]
validate.candidate_features_df(candidate_features_df.copy())

required_columns = [
Expand All @@ -76,7 +78,7 @@ def multiplex_candidates(
candidates_df: pd.DataFrame,
precursors_flat_df: pd.DataFrame,
remove_decoys: bool = True,
channels: list[int] = [0, 4, 8, 12],
channels: list[int] | None = None,
):
"""Takes a candidates dataframe and a precursors dataframe and returns a multiplexed candidates dataframe.
All original candidates will be retained. For missing candidates, the best scoring candidate in the elution group will be used and multiplexed across all missing channels.
Expand All @@ -103,7 +105,8 @@ def multiplex_candidates(
Multiplexed candidates dataframe

"""

if channels is None:
channels = [0, 4, 8, 12]
precursors_flat_view = precursors_flat_df.copy()
best_candidate_view = candidates_df.copy()

Expand Down
7 changes: 6 additions & 1 deletion alphadia/transferlearning/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,13 @@ class FinetuneManager(ModelManager):
"""

def __init__(
self, mask_modloss: bool = False, device: str = "gpu", settings: dict = {}
self,
mask_modloss: bool = False,
device: str = "gpu",
settings: dict | None = None,
):
if settings is None:
settings = {}
super().__init__(mask_modloss, device)
self.device = device
self.settings = settings
Expand Down
Loading
Loading