Skip to content

Commit

Permalink
fix: Fix bug with mismatch-to-mapDamage when forward-only mode
Browse files Browse the repository at this point in the history
  • Loading branch information
ChristianMichelsen committed Jun 13, 2022
1 parent fd5a4cd commit 89670fa
Showing 1 changed file with 24 additions and 3 deletions.
27 changes: 24 additions & 3 deletions src/metaDMG/fit/mismatch_to_mapDamage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,34 @@
#%%


def make_reverse_group(group):
group_reverse = pd.DataFrame(0, index=group.index, columns=group.columns)
group_reverse.loc[:, "tax_id"] = group.loc[:, "tax_id"]
group_reverse.loc[:, "direction"] = "3'"
group_reverse.loc[:, "position"] = -group.loc[:, "position"]
group_reverse.loc[:, "|x|"] = group.loc[:, "|x|"]
group_reverse.loc[:, "sample"] = group.loc[:, "sample"]
return group_reverse


def append_reverse_groups(df_mismatch):
groups = []
for i, group in df_mismatch.groupby(["tax_id"], sort=False):
group_reverse = make_reverse_group(group)
group_combined = pd.concat([group, group_reverse])
groups.append(group_combined)
df_mismatch = pd.concat(groups)
return df_mismatch


def df_mismatch_to_mapDamage(df_mismatch):

df_mapDamage = df_mismatch.copy()

# if forward only, fill in zeros for reverse direction
if "3'" not in df_mapDamage["direction"].unique():
df_mapDamage = append_reverse_groups(df_mapDamage)

bases = ["A", "C", "G", "T"]
for base in bases:
if base not in df_mapDamage.columns:
Expand Down Expand Up @@ -73,6 +97,3 @@ def convert(filename, csv_out):

with open(csv_out, "w", encoding="utf-8") as file:
file.write(out)


# %%

0 comments on commit 89670fa

Please sign in to comment.