Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code files for protein contact files generation #1

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions generate_contacts_multi_dimers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 20 22:56:26 2020

@author: anes
"""

import concurrent.futures
import os
import sys
import copy
import time

#time.sleep(30)
start_time = time.time()

file1=open("homodimers_missing.txt")
Pairs_file=file1.readlines()
file1.close()

start_part=int(sys.argv[1])
distance=6
part_range=10

def in_contact(Pr):
Pair=copy.copy(Pr)
Pair=Pair.strip()
Pair_pdb=Pair.split(",")
Pdb1="reindexed_pdb/"+Pair_pdb[0]+".pdb"
Pdb2="reindexed_pdb/"+Pair_pdb[1]+".pdb"
Dt=6
Pdb1_out=Pair_pdb[0]#.split(".")[0]
Pdb2_out=Pair_pdb[1]#.split(".")[0]
outfile="contacts_homodimers_952/"+Pdb1_out+"_"+Pdb2_out+"_contact"

Rxt_cd=os.system("python pdb2distance_inter_heavy.py "+Pdb1+" "+Pdb2+" "+str(Dt)+" "+outfile)
line=[Pair,Rxt_cd]

return line

with concurrent.futures.ProcessPoolExecutor() as executor:
for lists in list(range(1)):
print("part: ",start_part+lists)
start_pair=(start_part+lists)*part_range
end_pair=start_pair+part_range

if end_pair>=len(Pairs_file):
end_pair=Pairs_file

list_part=Pairs_file[start_pair:end_pair]

line_result=executor.map(in_contact,list_part)



print("--- %s seconds ---" % (time.time() - start_time))
121 changes: 121 additions & 0 deletions pdb2distance_inter_heavy_contact_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 17 13:28:19 2019

@author: farhan
"""

#this script creates a contact map text file for all atoms
#usage: python pdb2distance_inter_heavy.py <chainA.atom> <chainB.atom> <distance_threshold> <PDB_ID>

import os,sys
from readPDBColumns import readPDB,readAtom,write2File,contents2Info,addColumn,reassembleLines,getChain,getName
import numpy as np

#%%

def pdb2FastaFromSplitContents(split_contents):
fst=""
letters = {'ALA':'A','ARG':'R','ASN':'N','ASP':'D','CYS':'C','GLU':'E','GLN':'Q','GLY':'G','HIS':'H',
'ILE':'I','LEU':'L','LYS':'K','MET':'M','PHE':'F','PRO':'P','SER':'S','THR':'T','TRP':'W',
'TYR':'Y','VAL':'V'}
prev_res_num=split_contents[0]["res_num"]
fst+=letters[split_contents[0]["res_name"]]
for items in split_contents:
if (items["res_num"]==prev_res_num): continue
fst+=letters[items["res_name"]]
prev_res_num=items["res_num"]
#fst+=letters[items["res_name"]]
return fst

def getCoordinate(atom):
coordinate={}
coordinate["x"]=atom["x"]
coordinate["y"]=atom["y"]
coordinate["z"]=atom["z"]
return coordinate

def distance(coord1, coord2):
x1=float(coord1["x"])
y1=float(coord1["y"])
z1=float(coord1["z"])
x2=float(coord2["x"])
y2=float(coord2["y"])
z2=float(coord2["z"])
d=np.sqrt((x1-x2)**2+(y1-y2)**2+(z1-z2)**2)

return d

def createDistanceMapAllAtoms(atom_list_A,atom_list_B,dist):
result_list_string=[]

for atom_A in atom_list_A:
for atom_B in atom_list_B:
string=""
if (atom_A["chain"]==" " or atom_A["chain"]==" " or atom_A["chain"]==""): atom_A["chain"]=chain_1
if (atom_B["chain"]==" " or atom_B["chain"]==" " or atom_B["chain"]==""): atom_B["chain"]=chain_2

string+=atom_A["chain"]+" "+atom_A["serial"]+" "+atom_A["res_name"]+" "+atom_A["res_num"]+" "+atom_A["x"]+" "+atom_A["y"]+" "+atom_A["z"]+" "+atom_A["atom_name"]+" | "+ atom_B["atom_name"]+" "+atom_B["chain"]+" "+atom_B["serial"]+" "+atom_B["res_name"]+" "+atom_B["res_num"]+" "+atom_B["x"]+" "+atom_B["y"]+" "+atom_B["z"]+" | " + str(distance(getCoordinate(atom_A),getCoordinate(atom_B)))


if (distance(getCoordinate(atom_A),getCoordinate(atom_B))<dist): result_list_string.append(string+"\n")

return result_list_string

def removeRedundantContacts(contact_list):
new_list=[]
contact_dict={}
for contact in contact_list:
x_y=contact.split()[0]+" "+contact.split()[1]
contact_dict[x_y]=contact
#contact_dict=dict(set(contact_dict))
for key in contact_dict.keys():
new_list.append(contact_dict[key])

return new_list

def createDistanceMapHeavyAtoms(atom_list_A,atom_list_B,dist=6.0): #new version. Chose all atoms but the hydrogens
result_list_string=[]
distance_list_string=[]
rr_list_string=[]
for atom_A in atom_list_A:
for atom_B in atom_list_B:
string=""

if (atom_A["element"].strip()=="H" or atom_A["element"].strip()=="D"): continue
if (atom_B["element"].strip()=="H" or atom_A["element"].strip()=="D"): continue
if (distance(getCoordinate(atom_A),getCoordinate(atom_B))<=dist):
sys.exit(0)

distance_list_string=removeRedundantContacts(distance_list_string)
rr_list_string=removeRedundantContacts(rr_list_string)
return result_list_string,distance_list_string,rr_list_string

#%%

pdbfile_A=sys.argv[1]

pdbfile_B=sys.argv[2]

dist=sys.argv[3]

pdb=getName(pdbfile_A)

fasta_dict_file="fasta_dictionary.txt"

split_contents_A=contents2Info(readPDB(pdbfile_A))
split_contents_B=contents2Info(readPDB(pdbfile_B))
chain_1=getChain(pdbfile_A)
chain_2=getChain(pdbfile_B)


atom_list_A=split_contents_A
atom_list_B=split_contents_B


result_list,pw_dist,pw_rr=createDistanceMapHeavyAtoms(atom_list_A,atom_list_B,float(dist))

if (len(pw_dist)==0):

sys.exit("-2")
108 changes: 108 additions & 0 deletions pdbPairwizeTaskList.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@

import sys
import pickle

list_file = sys.argv[1]
#file1 = open("1_AB_all_pdb_atom.lst","r")

file1 = open(list_file,"r")
PDBs_list = file1.readlines()
file1.close()

############################## oligomer selection #############################

Eliminate_list = [] # proteins that are eliminated because the protein name does not have any chain indication
monomer_list = [] # proteins with single chain
Oligomer_list = [] # list of lists where each individual list contains chains that belong to the same protein
dimer = [] # protiens that have two chains only (this is kept for the sake of statistics only)
multimer = [] # proteins with more than two chains
dimer_num = 0
multimer_num = 0
flag = 0
Id_num_temp = 1
Id_num = 0

while Id_num <= len(PDBs_list):
Id = PDBs_list[Id_num]
Id = Id.strip()
pdb_id = Id.split(".")[0]
temp_list = [] # list of chains that belong to the same protein
PDB1 = pdb_id[0:4]

if len(pdb_id) == 4: # proteins to be ingored
Eliminate_list.append(Id)
Id_num+ = 1
else:
temp_list.append(Id)
flag = 1
Id_num_temp = 0
while flag == 1:
if Id_num == len(PDBs_list)-1:# last protein in the list
flag = 0
if len(temp_list) == 1:
monomer_list.append(Id)
else:
Oligomer_list.append(temp_list)
break

else:
Id_num_temp = Id_num+1
Id_temp = PDBs_list[Id_num_temp]
Id_temp = Id_temp.strip()
pdb_id_temp = Id_temp.split(".")[0]

if len(pdb_id_temp) == 4: # proteins to be ingored
Eliminate_list.append(pdb_id_temp)
Id_num+= 1
continue
else:
PDB2 = pdb_id_temp[0:4]

if PDB1 == PDB2:
temp_list.append(Id_temp)
flag = 1
Id_num = Id_num_temp
#print("end of iteration Id_num = ",Id_num)
else:
Id_num = Id_num_temp
flag=0
#print("end of iteration Id_num= ",Id_num)

if len(temp_list) == 1:
monomer_list.append(Id)

else:
Oligomer_list.append(temp_list)
if len(temp_list) == 2:
dimer.append(temp_list)
dimer_num+= 1
else:
multimer.append(temp_list)
multimer_num+= 1

if Id_num == len(PDBs_list)-1:
break

############################## dimer arrangement ##############################

All_pairs_list = [] # list that contains all possible pairs for each oligomer without repetition
Total_pairs_number = 0
line_counter = 0
fout = open("pairs.txt","w")

for Oligo_list in Oligomer_list:
List_len = len(Oligo_list)
Pair_num = sum(list(range(1,List_len)))
Total_pairs_number+= Pair_num
#print("list length : ",list_len," number of pairs ", pair_num)
List_current = []
for i in list(range(List_len)):
for j in list(range(i,List_len)):
if i == j:
continue
else:
List_current.append([Oligo_list[i],Oligo_list[j]])
fout.write(Oligo_list[i]+","+Oligo_list[j]+"\n" )
All_pairs_list = All_pairs_list+List_current
fout.close()

13 changes: 13 additions & 0 deletions pdb_unzip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

input="proteins_to_extract.txt"

while IFS= read -r line
do
echo "$line"
gunzip -c "atom/$line.atom.gz" > "pdb_atom/$line.atom"

done < "$input"

echo -e "\n"

36 changes: 36 additions & 0 deletions test_common_chains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 11 23:34:09 2020

@author: anes
"""
import os
import sys
import time

start_time = time.time()
file1=open("pairs2.txt")
Pairs_file=file1.readlines()
file1.close()
distance=6
#path=sys.argv[1]
path="/home/anes/Desktop/Cheng_new_project/"
for Pair in Pairs_file:

Pair=Pair.strip()
Pair_pdb=Pair.split(",")
Pdb1=Pair_pdb[0]
Pdb2=Pair_pdb[1]

pdb1=Pair_pdb[0].split(".")[0]
pdb2=Pair_pdb[1].split(".")[0]
outfile=path+pdb1+"_"+pdb2+".txt"

exit_code=os.system("python pdb2distance_inter_heavy_contact_test.py "+Pdb1+" "+Pdb2+" "+str(distance)+" "+outfile)
print(exit_code)
if exit_code== 0:
print("in contact")
else:
print("not in contact")
print("--- %s seconds ---" % (time.time() - start_time))
Loading