Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

230 basic linting rule sim #238

Merged
merged 14 commits into from
Jun 19, 2024
Merged
9 changes: 4 additions & 5 deletions alphadia/calibration/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,10 @@ def fit(self, x: np.ndarray, y: np.ndarray):
# === start === sanity checks ===
# Does not yet work with more than one input dimension
# axis-wise scaling and improved distance function need to be implemented
if len(x.shape) > 1:
if x.shape[1] > 1:
raise ValueError(
"Input arrays with more than one feature not yet supported. Please provide a matrix of shape (n_datapoints, 1) or (n_datapoints,)"
)
if len(x.shape) > 1 and x.shape[1] > 1:
raise ValueError(
"Input arrays with more than one feature not yet supported. Please provide a matrix of shape (n_datapoints, 1) or (n_datapoints,)"
)

# at least two datapoints required
if len(x.flat) < 2:
Expand Down
6 changes: 3 additions & 3 deletions alphadia/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,13 @@ def parse_raw_path_list(args: argparse.Namespace, config: dict) -> list:
raw_path_list : list
List of raw files.
"""
config_raw_path_list = config["raw_path_list"] if "raw_path_list" in config else []
config_raw_path_list = config.get("raw_path_list", [])
raw_path_list = (
utils.windows_to_wsl(config_raw_path_list) if args.wsl else config_raw_path_list
)
raw_path_list += utils.windows_to_wsl(args.file) if args.wsl else args.file

config_directory = config["directory"] if "directory" in config else None
config_directory = config.get("directory")
directory = utils.windows_to_wsl(config_directory) if args.wsl else config_directory
if directory is not None:
raw_path_list += [os.path.join(directory, f) for f in os.listdir(directory)]
Expand Down Expand Up @@ -274,7 +274,7 @@ def parse_fasta(args: argparse.Namespace, config: dict) -> list:
List of fasta files.
"""

config_fasta_path_list = config["fasta_list"] if "fasta_list" in config else []
config_fasta_path_list = config.get("fasta_list", [])
fasta_path_list = (
utils.windows_to_wsl(config_fasta_path_list)
if args.wsl
Expand Down
17 changes: 8 additions & 9 deletions alphadia/data/alpharaw.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,15 +383,14 @@ def filter_spectra(self, cv: float = None, astral_ms1: bool = False, **kwargs):
]

# filter for cv values if multiple cv values are present
if cv is not None:
if "cv" in self.spectrum_df.columns:
# use np.isclose to account for floating point errors
logger.info(f"Filtering for CV {cv}")
logger.info(f"Before: {len(self.spectrum_df)}")
self.spectrum_df = self.spectrum_df[
np.isclose(self.spectrum_df["cv"], cv, atol=0.1)
]
logger.info(f"After: {len(self.spectrum_df)}")
if cv is not None and "cv" in self.spectrum_df.columns:
# use np.isclose to account for floating point errors
logger.info(f"Filtering for CV {cv}")
logger.info(f"Before: {len(self.spectrum_df)}")
self.spectrum_df = self.spectrum_df[
np.isclose(self.spectrum_df["cv"], cv, atol=0.1)
]
logger.info(f"After: {len(self.spectrum_df)}")

self.spectrum_df["spec_idx"] = np.arange(len(self.spectrum_df))

Expand Down
33 changes: 16 additions & 17 deletions alphadia/fdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,24 +134,23 @@ def perform_fdr(

psm_df = get_q_values(psm_df, "proba", "_decoy")

if dia_cycle is not None:
if dia_cycle.shape[2] <= 2:
# use a FDR of 10% as starting point
# if there are no PSMs with a FDR < 10% use all PSMs
start_idx = psm_df["qval"].searchsorted(fdr_heuristic, side="left")
if start_idx == 0:
start_idx = len(psm_df)

# make sure fragments are not reused
if df_fragments is not None:
if dia_cycle is None:
raise ValueError(
"dia_cycle must be provided if reuse_fragments is False"
)
fragment_competition = fragcomp.FragmentCompetition()
psm_df = fragment_competition(
psm_df.iloc[:start_idx], df_fragments, dia_cycle
if dia_cycle is not None and dia_cycle.shape[2] <= 2:
# use a FDR of 10% as starting point
# if there are no PSMs with a FDR < 10% use all PSMs
start_idx = psm_df["qval"].searchsorted(fdr_heuristic, side="left")
if start_idx == 0:
start_idx = len(psm_df)

# make sure fragments are not reused
if df_fragments is not None:
if dia_cycle is None:
raise ValueError(
"dia_cycle must be provided if reuse_fragments is False"
)
fragment_competition = fragcomp.FragmentCompetition()
psm_df = fragment_competition(
psm_df.iloc[:start_idx], df_fragments, dia_cycle
)

psm_df = keep_best(psm_df, group_columns=group_columns)
psm_df = get_q_values(psm_df, "proba", "_decoy")
Expand Down
51 changes: 27 additions & 24 deletions alphadia/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,14 +557,13 @@ def build_features(
features["mean_observation_score"] = 0
features["var_observation_score"] = 1

if np.sum(peak_fragment_mask_1d) > 0:
if n_observations > 1:
observation_score = cosine_similarity_a1(
total_template_intensity,
observed_fragment_intensity[peak_fragment_mask_1d],
).astype(np.float32)
features["mean_observation_score"] = np.mean(observation_score)
features["var_observation_score"] = np.var(observation_score)
if np.sum(peak_fragment_mask_1d) > 0 and n_observations > 1:
observation_score = cosine_similarity_a1(
total_template_intensity,
observed_fragment_intensity[peak_fragment_mask_1d],
).astype(np.float32)
features["mean_observation_score"] = np.mean(observation_score)
features["var_observation_score"] = np.var(observation_score)

fragment_features["mz_library"] = fragments.mz_library[fragment_mask_1d]
fragment_features["mz_observed"] = observed_fragment_mz_mean[
Expand Down Expand Up @@ -1336,14 +1335,16 @@ def rank_features(current_candidate_idx, candidate_list):
count = 0

for i_candidate in range(len(candidate_list)):
if i_candidate != current_candidate_idx:
if feature in candidate_list[i_candidate].features:
if (
candidate_list[i_candidate].features[feature]
< candidate_list[current_candidate_idx].features[feature]
):
rank += 1
count += 1
if (
i_candidate != current_candidate_idx
and feature in candidate_list[i_candidate].features
):
if (
candidate_list[i_candidate].features[feature]
< candidate_list[current_candidate_idx].features[feature]
):
rank += 1
count += 1

if count > 0:
feature_dict[feature + "_rank"] = rank / count
Expand All @@ -1356,14 +1357,16 @@ def rank_features(current_candidate_idx, candidate_list):
count = 0

for i_candidate in range(len(candidate_list)):
if i_candidate != current_candidate_idx:
if feature in candidate_list[i_candidate].features:
if (
candidate_list[i_candidate].features[feature]
> candidate_list[current_candidate_idx].features[feature]
):
rank += 1
count += 1
if (
i_candidate != current_candidate_idx
and feature in candidate_list[i_candidate].features
):
if (
candidate_list[i_candidate].features[feature]
> candidate_list[current_candidate_idx].features[feature]
):
rank += 1
count += 1

if count > 0:
feature_dict[feature + "_rank"] = rank / count
Expand Down
19 changes: 9 additions & 10 deletions alphadia/libtransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def __init__(self, modification_mapping={}) -> None:
def validate(self, input: str) -> bool:
"""Validate the input object. It is expected that the input is a path to a file which exists."""
valid = True
valid &= isinstance(input, str) or isinstance(input, Path)
valid &= isinstance(input, str | Path)

if not os.path.exists(input):
logger.error(f"Input path {input} does not exist")
Expand Down Expand Up @@ -569,15 +569,14 @@ def forward(self, input: SpecLibBase) -> SpecLibBase:
)
return input

if "rt" not in input.precursor_df.columns:
if (
"rt_norm" in input.precursor_df.columns
or "rt_norm_pred" in input.precursor_df.columns
):
logger.warning(
"Input library already contains normalized RT information. Skipping RT normalization"
)
return input
if "rt" not in input.precursor_df.columns and (
"rt_norm" in input.precursor_df.columns
or "rt_norm_pred" in input.precursor_df.columns
):
logger.warning(
"Input library already contains normalized RT information. Skipping RT normalization"
)
return input

percentiles = np.percentile(input.precursor_df["rt"], [0.1, 99.9])
input._precursor_df["rt"] = np.clip(
Expand Down
34 changes: 19 additions & 15 deletions alphadia/numba/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,26 +147,30 @@ def __init__(self):
)

# check if dtype matches
if isinstance(value, np.ndarray):
if value.dtype != getattr(self, key).dtype:
try:
value = value.astype(getattr(self, key).dtype)
except Exception:
self.reporter.log_string(
f"Parameter {key} has wrong dtype {value.dtype}",
verbosity="error",
)
continue

# check if dimensions match
if isinstance(value, np.ndarray):
if value.shape != getattr(self, key).shape:
if (
isinstance(value, np.ndarray)
and value.dtype != getattr(self, key).dtype
):
try:
value = value.astype(getattr(self, key).dtype)
except Exception:
self.reporter.log_string(
f"Parameter {key} has wrong shape {value.shape}",
f"Parameter {key} has wrong dtype {value.dtype}",
verbosity="error",
)
continue

# check if dimensions match
if (
isinstance(value, np.ndarray)
and value.shape != getattr(self, key).shape
):
self.reporter.log_string(
f"Parameter {key} has wrong shape {value.shape}",
verbosity="error",
)
continue

# update attribute
setattr(self, key, value)

Expand Down
5 changes: 1 addition & 4 deletions alphadia/numba/fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,10 +336,7 @@ def get_ion_group_mapping(
):
# score_group_idx = precursor_group[precursor]

if len(grouped_mz) == 0:
grouped_mz.append(mz)

elif np.abs(grouped_mz[-1] - mz) > EPSILON:
if len(grouped_mz) == 0 or np.abs(grouped_mz[-1] - mz) > EPSILON:
grouped_mz.append(mz)

idx = len(grouped_mz) - 1
Expand Down
5 changes: 1 addition & 4 deletions alphadia/numba/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,7 @@ def get_mean_sparse0(dense, scan, cycle, threshold):
mask = mz_window < threshold
fraction_nonzero = np.mean(mask.astype("int8"))

if fraction_nonzero > 0:
values = np.mean(mz_window[mask])
else:
values = threshold
values = np.mean(mz_window[mask]) if fraction_nonzero > 0 else threshold

return values

Expand Down
13 changes: 5 additions & 8 deletions alphadia/peakgroup/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,11 +394,11 @@ def select_candidates(
# "Empty dense precursor matrix"
return

if not dense_fragments.shape[2] % 2 == 0:
if dense_fragments.shape[2] % 2 != 0:
# "Dense fragment matrix not divisible by 2"
return

if not dense_fragments.shape[2] % 2 == 0:
if dense_fragments.shape[2] % 2 != 0:
# "Dense fragment matrix not divisible by 2"
return

Expand Down Expand Up @@ -628,20 +628,17 @@ def build_candidates(
):
cycle_length = jit_data.cycle.shape[1]

if weights is None:
feature_weights = np.ones(1)
else:
feature_weights = weights
feature_weights = np.ones(1) if weights is None else weights

feature_weights = feature_weights.reshape(-1, 1, 1)

smooth_precursor = fft.convolve_fourier(dense_precursors, kernel)
smooth_fragment = fft.convolve_fourier(dense_fragments, kernel)

if not smooth_precursor.shape == dense_precursors.shape:
if smooth_precursor.shape != dense_precursors.shape:
print(smooth_precursor.shape, dense_precursors.shape)
print("smooth_precursor shape does not match dense_precursors shape")
if not smooth_fragment.shape == dense_fragments.shape:
if smooth_fragment.shape != dense_fragments.shape:
print(smooth_fragment.shape, dense_fragments.shape)
print("smooth_fragment shape does not match dense_fragments shape")

Expand Down
2 changes: 1 addition & 1 deletion alphadia/plexscoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,7 @@ def get_feature_columns(self):
if len(candidate.features) not in known_feature_lengths:
known_feature_lengths += [len(candidate.features)]
# add all new features to the list of known columns
for key in candidate.features.keys():
for key in candidate.features.keys(): # noqa: SIM118
mschwoer marked this conversation as resolved.
Show resolved Hide resolved
if key not in known_columns:
known_columns += [key]
return known_columns
Expand Down
2 changes: 1 addition & 1 deletion alphadia/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def recursive_update(full_dict: dict, update_dict: dict):

"""
for key, value in update_dict.items():
if key in full_dict.keys():
if key in full_dict:
if isinstance(value, dict):
recursive_update(full_dict[key], update_dict[key])
else:
Expand Down
5 changes: 2 additions & 3 deletions alphadia/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,8 @@ def __call__(self, df, logging=True):
If True, log the validation results
"""

if self.name in df.columns:
if df[self.name].dtype != self.type:
df[self.name] = df[self.name].astype(self.type)
if self.name in df.columns and df[self.name].dtype != self.type:
df[self.name] = df[self.name].astype(self.type)

return True

Expand Down
17 changes: 6 additions & 11 deletions alphadia/workflow/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,7 @@ def _get_dia_data_object(
shutil.copyfile(dia_data_path, tmp_dia_data_path)
dia_data_path = tmp_dia_data_path

if file_extension.lower() == ".d":
self.reporter.log_metric("raw_data_type", "bruker")
dia_data = bruker.TimsTOFTranspose(
dia_data_path,
mmap_detector_events=self.config["general"]["mmap_detector_events"],
)

elif file_extension.lower() == ".hdf":
if file_extension.lower() == ".d" or file_extension.lower() == ".hdf":
self.reporter.log_metric("raw_data_type", "bruker")
dia_data = bruker.TimsTOFTranspose(
dia_data_path,
Expand All @@ -194,9 +187,11 @@ def _get_dia_data_object(
self.reporter.log_metric("raw_data_type", "thermo")
# check if cv selection exists
cv = None
if "raw_data_loading" in self.config:
if "cv" in self.config["raw_data_loading"]:
cv = self.config["raw_data_loading"]["cv"]
if (
"raw_data_loading" in self.config
and "cv" in self.config["raw_data_loading"]
):
cv = self.config["raw_data_loading"]["cv"]

dia_data = alpharaw.Thermo(
dia_data_path,
Expand Down
2 changes: 1 addition & 1 deletion alphadia/workflow/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def print_recursively(
)
continue

elif isinstance(value, list) or isinstance(value, dict):
elif isinstance(value, list | dict):
print_w_style(
f"{key}",
style=style,
Expand Down
Loading
Loading