Skip to content

Commit

Permalink
Merge pull request #276 from replikation/add_n-in-spike
Browse files Browse the repository at this point in the history
added `nextclade_percentN_spike` column to datatables
  • Loading branch information
replikation authored Aug 14, 2024
2 parents f86681e + d6333ab commit 0bf2ee7
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions bin/summary_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,25 @@ def add_nextclade_results(self, nextclade_results):
# N information
self.add_column_raw('nextclade_missing', res_data["missing"])

# N percentage information in spike
# inspired by https://github.com/nextstrain/nextclade/issues/715
def get_percent_N_in_region(positions_and_ranges, positions_of_interest=set(range(int(21563), int(25384)+1))):
# Nextclade: ranges are closed (they include both left and right boundaries)
# python: ranges: start inclusive, end exclusive
# get missing positions (= Ns) from nextclade missing output
missing_set = set()
if positions_and_ranges == 'nan' or positions_and_ranges == '':
return ''
else:
for region in positions_and_ranges.split(','):
if '-' not in region:
missing_set.update([int(region)])
elif '-' in region:
missing_set.update(range(int(region.split('-')[0]), int(region.split('-')[1])+1))
# intersect the sets to get all positions of interest that are missing
return len(missing_set & positions_of_interest)/len(positions_of_interest)*100
self.add_column_raw('nextclade_percentN_spike', res_data['missing'].apply(lambda x: get_percent_N_in_region(str(x))))

res_data['mutations_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaSubstitutions']]
res_data['deletions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaDeletions']]
res_data['insertions_formatted'] = [m.replace(',', ', ') if type(m) == str else '-' for m in res_data['aaInsertions']]
Expand Down

0 comments on commit 0bf2ee7

Please sign in to comment.