From 793ffffab0fce83a61b14a0712ad98ad4cc8d2b8 Mon Sep 17 00:00:00 2001 From: Christian Michelsen Date: Fri, 12 Aug 2022 09:53:28 +0200 Subject: [PATCH] feat: Better fits for low-damage groups --- src/metaDMG/fit/fits.py | 14 ++++++++++---- src/metaDMG/fit/frequentist.py | 13 ++++++++++--- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/metaDMG/fit/fits.py b/src/metaDMG/fit/fits.py index 3d763d4..0637bfc 100644 --- a/src/metaDMG/fit/fits.py +++ b/src/metaDMG/fit/fits.py @@ -482,15 +482,21 @@ def compute(config, df_mismatches): # filter out tax_id's with too few reads df_mismatches = filter_tax_ids(config, df_stats, df_stat_cut, df_mismatches) - # filter out tax_id's with 0 k_sum_total - df_mismatches = filter_k_sum(config, df_mismatches) - if len(df_mismatches) == 0: - s = f"{config['sample']} df_mismatches.query('k_sum_total > 0') is empty." + s = f"{config['sample']} cut on min reads > {config['min_reads']} made data empty." logger.debug("WARNING: BadDataError") logger.debug(s) raise BadDataError(s) + # # filter out tax_id's with 0 k_sum_total + # df_mismatches = filter_k_sum(config, df_mismatches) + + # if len(df_mismatches) == 0: + # s = f"{config['sample']} df_mismatches.query('k_sum_total > 0') is empty." + # logger.debug("WARNING: BadDataError") + # logger.debug(s) + # raise BadDataError(s) + # find unique tax_ids (when compairing the mismatches matrices) # such that only those are fitted unique, duplicates = compute_duplicates(df_mismatches) diff --git a/src/metaDMG/fit/frequentist.py b/src/metaDMG/fit/frequentist.py index ccce3ad..235a515 100644 --- a/src/metaDMG/fit/frequentist.py +++ b/src/metaDMG/fit/frequentist.py @@ -167,7 +167,14 @@ def fit(self): if not self.m.valid: self.m.migrad() + # second try time with a totally flat guess + if not self.m.valid: + p0_flat = {"q": 0.0, "A": 0.0, "c": 0.01, "phi": 100} + self._setup_p0(p0_flat) + self.m.migrad() + # if not working, continue with new guesses + MAX_GUESSES = 100 if not self.m.valid: self.i = 0 while True: @@ -175,10 +182,10 @@ def fit(self): for key, val in p0.items(): self.m.values[key] = val self.m.migrad() - if self.m.valid or self.i >= 100: + if self.m.valid or self.i >= MAX_GUESSES: break self.m.migrad() - if self.m.valid or self.i >= 100: + if self.m.valid or self.i >= MAX_GUESSES: break self.i += 1 @@ -623,7 +630,7 @@ def make_forward_reverse_fits(fit_result, data, sample, tax_id): fit_result[f"{var}"] = getattr(fit_all, var) numerator = fit_forward.D_max - fit_reverse.D_max - denominator = np.sqrt(fit_forward.D_max_std ** 2 + fit_reverse.D_max_std ** 2) + denominator = np.sqrt(fit_forward.D_max_std**2 + fit_reverse.D_max_std**2) fit_result["asymmetry"] = np.abs(numerator) / denominator for var in vars_to_keep: