diff --git a/snipit/command.py b/snipit/command.py index 25ff9a7..1423ebe 100644 --- a/snipit/command.py +++ b/snipit/command.py @@ -4,7 +4,6 @@ import sys import os import argparse -import textwrap import pkg_resources import collections @@ -28,7 +27,6 @@ def main(sysargs = sys.argv[1:]): i_group = parser.add_argument_group('Input options') i_group.add_argument('alignment',help="Input alignment fasta file") - i_group.add_argument("-t","--sequence-type", choices=['nt','aa'], action="store",help="Input sequence type: aa or nt", default="nt", dest="sequence_type") i_group.add_argument("-r","--reference", action="store",help="Indicates which sequence in the alignment is\nthe reference (by sequence ID).\nDefault: first sequence in alignment", dest="reference") i_group.add_argument("-l","--labels", action="store",help="Optional csv file of labels to show in output snipit plot. Default: sequence names", dest="labels") i_group.add_argument("--l-header", action="store",help="Comma separated string of column headers in label csv. First field indicates sequence name column, second the label column. Default: 'name,label'", dest="label_headers",default="name,label") @@ -49,10 +47,7 @@ def main(sysargs = sys.argv[1:]): f_group.add_argument("--width",action="store",type=float,help="Overwrite the default figure width",default=0) f_group.add_argument("--size-option",action="store",help="Specify options for sizing. Options: expand, scale",dest="size_option",default="scale") f_group.add_argument("--solid-background",action="store_true",help="Force the plot to have a solid background, rather than a transparent one.",dest="solid_background") - f_group.add_argument("-c","--colour-palette",dest="colour_palette",action="store", - help="Specify colour palette. Options: [classic, classic_extended, primary, purine-pyrimidine, greyscale, wes, verity, ugene]. Use ugene for protein alignments.",default="classic", - choices=["classic","classic_extended","primary","purine-pyrimidine","greyscale","wes","verity","ugene"], - metavar='') + f_group.add_argument("-c","--colour-palette",dest="colour_palette",action="store",help="Specify colour palette. Options: primary, classic, purine-pyrimidine, greyscale, wes, verity",default="classic") f_group.add_argument("--flip-vertical",action='store_true',help="Flip the orientation of the plot so sequences are below the reference rather than above it.",dest="flip_vertical") f_group.add_argument("--sort-by-mutation-number", action='store_true', help="Render the graph with sequences sorted by the number of SNPs relative to the reference (fewest to most). Default: False", dest="sort_by_mutation_number") @@ -68,11 +63,8 @@ def main(sysargs = sys.argv[1:]): s_group.add_argument("--show-indels",action='store_true',help="Include insertion and deletion mutations in snipit plot.",dest="show_indels") s_group.add_argument('--include-positions', dest='included_positions', type=sfunks.bp_range, nargs='+', default=None, help="One or more range (closed, inclusive; one-indexed) or specific position only included in the output. Ex. '100-150' or Ex. '100 101' Considered before '--exclude-positions'.") s_group.add_argument('--exclude-positions', dest='excluded_positions', type=sfunks.bp_range, nargs='+', default=None, help="One or more range (closed, inclusive; one-indexed) or specific position to exclude in the output. Ex. '100-150' or Ex. '100 101' Considered after '--include-positions'.") - s_group.add_argument("--ambig-mode", dest="ambig_mode",choices=['all', 'snps', 'exclude'], default='snpsambi', - help=textwrap.dedent('''Controls how ambiguous bases are handled - - [all] include all ambig such as N,Y,B in all positions; - [snps] only include ambig if a snp is present at the same position; - [exclude] remove all ambig, same as depreciated --exclude-ambig-pos''')) + s_group.add_argument("--exclude-ambig-pos",dest="exclude_ambig_pos",action='store_true',help="Exclude positions with ambig base in any sequences. Considered after '--include-positions'") + misc_group = parser.add_argument_group('Misc options') misc_group.add_argument("-v","--version", action='version', version=f"snipit {__version__}") @@ -106,9 +98,9 @@ def main(sysargs = sys.argv[1:]): reference,alignment = sfunks.get_ref_and_alignment(args.alignment,ref_input,label_map) - snp_dict,record_snps,num_snps = sfunks.find_snps(reference,alignment,args.show_indels,args.sequence_type,args.ambig_mode) + snp_dict,record_snps,num_snps = sfunks.find_snps(reference,alignment,args.show_indels) - record_ambs = sfunks.find_ambiguities(alignment, snp_dict, args.sequence_type) + record_ambs = sfunks.find_ambiguities(alignment, snp_dict) colours = sfunks.get_colours(args.colour_palette) @@ -134,13 +126,14 @@ def main(sysargs = sys.argv[1:]): args.flip_vertical, args.included_positions, args.excluded_positions, + args.exclude_ambig_pos, args.sort_by_mutation_number, args.high_to_low, args.sort_by_id, args.sort_by_mutations, args.recombi_mode, args.recombi_references) - print(sfunks.green(f"Snipping Complete: {output}")) + if __name__ == '__main__': main() diff --git a/snipit/scripts/snp_functions.py b/snipit/scripts/snp_functions.py index 2fcae8a..3558230 100644 --- a/snipit/scripts/snp_functions.py +++ b/snipit/scripts/snp_functions.py @@ -14,6 +14,7 @@ import warnings warnings.filterwarnings('ignore') + # imports from other modules from Bio import SeqIO from Bio.Seq import Seq @@ -34,11 +35,6 @@ CYAN = '\u001b[36m' DIM = '\033[2m' -NT_BASES = ["A","T","G","C"] -NT_AMBIG = ["W","S","M","K","R","Y","B","D","H","V","N"] -AA_BASES = ["A","R","N","D","C","Q","E","G","H","I","L","K","M","F","P","S","T","W","Y","V"] -AA_AMBIG = ["X","B","Z","J"] - def bp_range(s): """ @@ -228,24 +224,8 @@ def merge_indels(indel_list,prefix): return indel_list -def find_snps(reference_seq,input_seqs,show_indels,sequence_type,ambig_mode): - - # set the appropriate genetic code to use for snp calling - if sequence_type == 'nt': - if ambig_mode == 'snps': - gcode = NT_BASES - elif ambig_mode == 'all': - gcode = NT_BASES + NT_AMBIG - else: # exclude - gcode = NT_BASES - if sequence_type == 'aa': - if ambig_mode == 'snps': - gcode = AA_BASES - elif ambig_mode == 'all': - gcode = AA_BASES + AA_AMBIG - else: #exclude - gcode = AA_BASES - +def find_snps(reference_seq,input_seqs,show_indels): + non_amb = ["A","T","G","C"] snp_dict = {} record_snps = {} @@ -257,7 +237,7 @@ def find_snps(reference_seq,input_seqs,show_indels,sequence_type,ambig_mode): for i in range(len(query_seq)): bases = [query_seq[i],reference_seq[i]] if bases[0] != bases[1]: - if bases[0] in gcode and bases[1] in gcode: + if bases[0] in non_amb and bases[1] in non_amb: snp = f"{i+1}:{bases[1]}{bases[0]}" # position-reference-query @@ -288,6 +268,7 @@ def find_snps(reference_seq,input_seqs,show_indels,sequence_type,ambig_mode): return snp_dict,record_snps,len(var_counter) + def find_ambiguities(alignment, snp_dict,sequence_type): if sequence_type == "nt": @@ -295,6 +276,7 @@ def find_ambiguities(alignment, snp_dict,sequence_type): if sequence_type == "aa": amb = AA_AMBIG + snp_sites = collections.defaultdict(list) for seq in snp_dict: snps = snp_dict[seq] @@ -313,7 +295,7 @@ def find_ambiguities(alignment, snp_dict,sequence_type): for i in snp_sites: bases = [query_seq[i],snp_sites[i]] #if query not same as ref allele if bases[0] != bases[1]: - if bases[0] not in amb: + if bases[0] not in ["A","T","G","C"]: snp = f"{i+1}:{bases[1]}{bases[0]}" # position-outgroup-query snps.append(snp) @@ -381,6 +363,7 @@ def make_graph(num_seqs, flip_vertical=False, included_positions=None, excluded_positions=None, + exclude_ambig_pos=False, sort_by_mutation_number=False, high_to_low=True, sort_by_id=False, @@ -481,7 +464,7 @@ def make_graph(num_seqs, x_position = int(pos) # if positions with any ambiguities should be ignored, note the position - if ambig_mode == 'exclude': + if exclude_ambig_pos: excluded_positions.add(x_position) else: ref = var[0] @@ -666,21 +649,13 @@ def make_graph(num_seqs, def get_colours(colour_palette): palettes = {"classic": {"A":"steelblue","C":"indianred","T":"darkseagreen","G":"skyblue"}, - "classic_extended": {"A":"steelblue","C":"indianred","T":"darkseagreen", - "G":"skyblue","W":"#FFCC00","S":"#66FF00","M":"#6600FF", - "K":"#66FFCC","R":"#FF00FF","Y":"#FFFF99","B":"#CCFF99", - "D":"#FFFF00","H":"##33FF00","V":"#FF6699","N":"#333333"}, "wes": {"A":"#CC8B3C","C":"#456355","T":"#541F12","G":"#B62A3D"}, "primary": {"A":"green","C":"goldenrod","T":"steelblue","G":"indianred"}, "purine-pyrimidine":{"A":"indianred","C":"teal","T":"teal","G":"indianred"}, "greyscale":{"A":"#CCCCCC","C":"#999999","T":"#666666","G":"#333333"}, "blues":{"A":"#3DB19D","C":"#76C5BF","T":"#423761","G":"steelblue"}, "verity":{"A":"#EC799A","C":"#df6eb7","T":"#FF0080","G":"#9F0251"}, - "recombi":{"lineage_1":"steelblue","lineage_2":"#EA5463","Both":"darkseagreen","Private":"goldenrod"}, - "ugene":{"A":"#00ccff","R":"#d5c700","N":"#33ff00","D":"#ffff00","C":"#6600ff","Q":"#3399ff", - "E":"#c0bdbb","G":"#ff5082","H":"#fff233","I":"#00abed","L":"#008fc6","K":"#ffee00", - "M":"#1dc0ff","F":"#3df490","P":"#d5426c","S":"#ff83a7","T":"#ffd0dd","W":"#33cc78", - "Y":"#65ffab","V":"#ff6699","X":"#999999","B":"#999999","Z":"#999999","J":"#999999"} + "recombi":{"lineage_1":"steelblue","lineage_2":"#EA5463","Both":"darkseagreen","Private":"goldenrod"} } if colour_palette not in palettes: sys.stderr.write(red(f"Error: please select one of {palettes} for --colour-palette option\n"))