From 936d08360ddc990f10b57a52f867ce83aba49aab Mon Sep 17 00:00:00 2001 From: luponzo86 Date: Sat, 29 Feb 2020 11:37:57 -0800 Subject: [PATCH] add "sequence" kwarg to Uniprot.seqScanning() --- rhapsody/features/Uniprot.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/rhapsody/features/Uniprot.py b/rhapsody/features/Uniprot.py index 4c016cb..0974a92 100644 --- a/rhapsody/features/Uniprot.py +++ b/rhapsody/features/Uniprot.py @@ -865,26 +865,44 @@ def mapSAVs2PDB(SAV_coords, custom_PDB=None, refresh=False, return mapped_SAVs -def seqScanning(Uniprot_coord): - '''Returns a list of SAVs. If the string 'Uniprot_coord' is just a Uniprot ID, - the list will contain all possible amino acid substitutions at all positions - in the sequence. If 'Uniprot_coord' also includes a specific position, the list - will only contain all possible amino acid variants at that position. +def seqScanning(Uniprot_coord, sequence=None): + '''Returns a list of SAVs. If the string 'Uniprot_coord' is just a + Uniprot ID, the list will contain all possible amino acid substitutions + at all positions in the sequence. If 'Uniprot_coord' also includes a + specific position, the list will only contain all possible amino acid + variants at that position. If 'sequence' is 'None' (default), the + sequence will be downloaded from Uniprot. ''' assert isinstance(Uniprot_coord, str), "Must be a string." coord = Uniprot_coord.strip().split() assert len(coord) < 3, "Invalid format. Examples: 'Q9BW27' or 'Q9BW27 10'." - Uniprot_record = pd.queryUniprot(coord[0]) - sequence = Uniprot_record['sequence 0'].replace("\n", "") + aa_list = 'ACDEFGHIKLMNPQRSTVWY' + if sequence is None: + Uniprot_record = pd.queryUniprot(coord[0]) + sequence = Uniprot_record['sequence 0'].replace("\n", "") + else: + assert isinstance(sequence, str), "Must be a string." + sequence = sequence.upper() + assert set(sequence).issubset(aa_list), "Invalid list of amino acids." if len(coord) == 1: + # user asks for full-sequence scanning positions = range(len(sequence)) else: - positions = [int(coord[1]) - 1] + # user asks for single-site scanning + site = int(coord[1]) + positions = [site - 1] + # if user provides only one amino acid as 'sequence', interpret it + # as the amino acid at the specified position + if len(sequence) == 1: + sequence = sequence*site + else: + assert len(sequence) >= site, ("Requested position is not found " + "in input sequence.") SAV_list = [] acc = coord[0] for i in positions: wt_aa = sequence[i] - for aa in 'ACDEFGHIKLMNPQRSTVWY': + for aa in aa_list: if aa == wt_aa: continue s = ' '.join([acc, str(i+1), wt_aa, aa])