Skip to content

Commit

Permalink
Merge pull request #73 from alneberg/umi_format_explore_fix
Browse files Browse the repository at this point in the history
Umi format explore fix
  • Loading branch information
alneberg authored Feb 14, 2024
2 parents 580fc97 + de9ff8b commit a82f3fc
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 16 deletions.
46 changes: 33 additions & 13 deletions anglerfish/demux/adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,24 +90,44 @@ def __init__(self, sequence, name, delim, index):
self.name = name
self.delim = delim
self.index = index
self.umi_after = 0
self.umi_before = 0
self.len_after_index = 0
self.len_before_index = 0

# Dynamically assign attributes
self.umi = re.findall(udelim, self.sequence)

# TODO Duplicated from Adaptor class, will be merged later
# Check if UMI is before or after index
if len(self.umi) > 0 and ">" + self.umi[0] in self.sequence:
# The index region is INDEX+UMI
self.umi_after = int(re.search(ulen, self.umi[0]).group(1))
self.len_before_index = len(idelim.split(self.sequence)[0])
self.len_after_index = len(udelim.split(self.sequence)[-1])
elif len(self.umi) > 0 and self.umi[0] + "<" in self.sequence:
# The index region is UMI+INDEX
self.umi_before = int(re.search(ulen, self.umi[0]).group(1))
self.len_before_index = len(udelim.split(self.sequence)[0])
self.len_after_index = len(idelim.split(self.sequence)[-1])
elif len(self.umi) > 0:
# TODO give details which adaptor has the problem
raise UserWarning(
"Found adaptor with UMI but it does not flank an index. This is not supported."
)
# Non UMI cases
elif has_match(idelim, self.sequence):
self.len_before_index = len(idelim.split(self.sequence)[0])
self.len_after_index = len(idelim.split(self.sequence)[-1])

def has_index(self):
return self.sequence.find(self.delim) > -1

def len_before_index(self):
return self.sequence.find(self.delim)
def len_before_index_region(self):
return self.len_before_index

def len_after_index(self):
return len(self.sequence) - self.sequence.find(self.delim) - len(self.delim)

def get_mask(self, insert_Ns):
if self.has_index():
if not insert_Ns:
return self.sequence.replace(self.delim, "")
else:
return self.sequence.replace(self.delim, "N" * len(self.index))
else:
return self.sequence
def len_after_index_region(self):
return self.len_after_index


# General function to check if a string contains a pattern
Expand Down
11 changes: 8 additions & 3 deletions anglerfish/explore/explore.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,11 @@ def run_explore(

# Alignment thresholds
before_thres = round(
adaptor_end.len_before_index() * good_hit_threshold
adaptor_end.len_before_index_region() * good_hit_threshold
)
after_thres = round(
adaptor_end.len_after_index_region() * good_hit_threshold
)
after_thres = round(adaptor_end.len_after_index() * good_hit_threshold)
insert_thres_low = insert_thres_low
insert_thres_high = insert_thres_high

Expand All @@ -133,7 +135,10 @@ def run_explore(
] = match_col_df

thres = round(
(adaptor_end.len_before_index() + adaptor_end.len_after_index())
(
adaptor_end.len_before_index_region()
+ adaptor_end.len_after_index_region()
)
* good_hit_threshold
)
df_good_hits = df_good_hits[df_good_hits["match_1_len"] >= thres]
Expand Down

0 comments on commit a82f3fc

Please sign in to comment.