From 89670fa84efedf5460804fc55abf1c10e17d619c Mon Sep 17 00:00:00 2001 From: Christian Michelsen Date: Mon, 13 Jun 2022 18:15:28 +0200 Subject: [PATCH] fix: Fix bug with mismatch-to-mapDamage when forward-only mode --- src/metaDMG/fit/mismatch_to_mapDamage.py | 27 +++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/metaDMG/fit/mismatch_to_mapDamage.py b/src/metaDMG/fit/mismatch_to_mapDamage.py index 7f3d5f9..53b1f58 100644 --- a/src/metaDMG/fit/mismatch_to_mapDamage.py +++ b/src/metaDMG/fit/mismatch_to_mapDamage.py @@ -10,10 +10,34 @@ #%% +def make_reverse_group(group): + group_reverse = pd.DataFrame(0, index=group.index, columns=group.columns) + group_reverse.loc[:, "tax_id"] = group.loc[:, "tax_id"] + group_reverse.loc[:, "direction"] = "3'" + group_reverse.loc[:, "position"] = -group.loc[:, "position"] + group_reverse.loc[:, "|x|"] = group.loc[:, "|x|"] + group_reverse.loc[:, "sample"] = group.loc[:, "sample"] + return group_reverse + + +def append_reverse_groups(df_mismatch): + groups = [] + for i, group in df_mismatch.groupby(["tax_id"], sort=False): + group_reverse = make_reverse_group(group) + group_combined = pd.concat([group, group_reverse]) + groups.append(group_combined) + df_mismatch = pd.concat(groups) + return df_mismatch + + def df_mismatch_to_mapDamage(df_mismatch): df_mapDamage = df_mismatch.copy() + # if forward only, fill in zeros for reverse direction + if "3'" not in df_mapDamage["direction"].unique(): + df_mapDamage = append_reverse_groups(df_mapDamage) + bases = ["A", "C", "G", "T"] for base in bases: if base not in df_mapDamage.columns: @@ -73,6 +97,3 @@ def convert(filename, csv_out): with open(csv_out, "w", encoding="utf-8") as file: file.write(out) - - -# %%