Skip to content

Commit

Permalink
add "sequence" kwarg to Uniprot.seqScanning()
Browse files Browse the repository at this point in the history
  • Loading branch information
luponzo86 committed Feb 29, 2020
1 parent eb4e2f3 commit 936d083
Showing 1 changed file with 27 additions and 9 deletions.
36 changes: 27 additions & 9 deletions rhapsody/features/Uniprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -865,26 +865,44 @@ def mapSAVs2PDB(SAV_coords, custom_PDB=None, refresh=False,
return mapped_SAVs


def seqScanning(Uniprot_coord):
'''Returns a list of SAVs. If the string 'Uniprot_coord' is just a Uniprot ID,
the list will contain all possible amino acid substitutions at all positions
in the sequence. If 'Uniprot_coord' also includes a specific position, the list
will only contain all possible amino acid variants at that position.
def seqScanning(Uniprot_coord, sequence=None):
'''Returns a list of SAVs. If the string 'Uniprot_coord' is just a
Uniprot ID, the list will contain all possible amino acid substitutions
at all positions in the sequence. If 'Uniprot_coord' also includes a
specific position, the list will only contain all possible amino acid
variants at that position. If 'sequence' is 'None' (default), the
sequence will be downloaded from Uniprot.
'''
assert isinstance(Uniprot_coord, str), "Must be a string."
coord = Uniprot_coord.strip().split()
assert len(coord) < 3, "Invalid format. Examples: 'Q9BW27' or 'Q9BW27 10'."
Uniprot_record = pd.queryUniprot(coord[0])
sequence = Uniprot_record['sequence 0'].replace("\n", "")
aa_list = 'ACDEFGHIKLMNPQRSTVWY'
if sequence is None:
Uniprot_record = pd.queryUniprot(coord[0])
sequence = Uniprot_record['sequence 0'].replace("\n", "")
else:
assert isinstance(sequence, str), "Must be a string."
sequence = sequence.upper()
assert set(sequence).issubset(aa_list), "Invalid list of amino acids."
if len(coord) == 1:
# user asks for full-sequence scanning
positions = range(len(sequence))
else:
positions = [int(coord[1]) - 1]
# user asks for single-site scanning
site = int(coord[1])
positions = [site - 1]
# if user provides only one amino acid as 'sequence', interpret it
# as the amino acid at the specified position
if len(sequence) == 1:
sequence = sequence*site
else:
assert len(sequence) >= site, ("Requested position is not found "
"in input sequence.")
SAV_list = []
acc = coord[0]
for i in positions:
wt_aa = sequence[i]
for aa in 'ACDEFGHIKLMNPQRSTVWY':
for aa in aa_list:
if aa == wt_aa:
continue
s = ' '.join([acc, str(i+1), wt_aa, aa])
Expand Down

0 comments on commit 936d083

Please sign in to comment.