forked from anastasiaangelo/ProjectAnastasia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Ising_Ham_filtered.py
168 lines (118 loc) · 6.03 KB
/
Ising_Ham_filtered.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
### Take input pdb, score, repack and extract one and two body energies
import pyrosetta;
pyrosetta.init()
from pyrosetta.teaching import *
from pyrosetta import *
import csv
import sys
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from pyrosetta.rosetta.core.pack.rotamer_set import RotamerSets
from pyrosetta.rosetta.core.pack.task import TaskFactory
from pyrosetta.rosetta.core.pack.rotamer_set import *
from pyrosetta.rosetta.core.pack.interaction_graph import InteractionGraphFactory
from pyrosetta.rosetta.core.pack.task import *
from pyrosetta import PyMOLMover
# Initiate structure, scorefunction
pose = pyrosetta.pose_from_pdb("input_files/test1.pdb")
N_res = 4
num_rot = 4
residue_count = pose.total_residue()
sfxn = get_score_function(True)
print(pose.sequence())
print(residue_count)
relax_protocol = pyrosetta.rosetta.protocols.relax.FastRelax()
relax_protocol.set_scorefxn(sfxn)
relax_protocol.apply(pose)
# Define task, interaction graph and rotamer sets (model_protein_csv.py)
task_pack = TaskFactory.create_packer_task(pose)
rotsets = RotamerSets()
pose.update_residue_neighbors()
sfxn.setup_for_packing(pose, task_pack.designing_residues(), task_pack.designing_residues())
packer_neighbor_graph = pyrosetta.rosetta.core.pack.create_packer_graph(pose, sfxn, task_pack)
rotsets.set_task(task_pack)
rotsets.build_rotamers(pose, sfxn, packer_neighbor_graph)
rotsets.prepare_sets_for_packing(pose, sfxn)
ig = InteractionGraphFactory.create_interaction_graph(task_pack, rotsets, pose, sfxn, packer_neighbor_graph)
print("built", rotsets.nrotamers(), "rotamers at", rotsets.nmoltenres(), "positions.")
rotsets.compute_energies(pose, sfxn, packer_neighbor_graph, ig, 1)
# Output structure to be visualised in pymol
pose.dump_pdb("output_repacked.pdb")
# Define dimension for matrix
max_rotamers = 0
for residue_number in range(1, residue_count+1):
n_rots = rotsets.nrotamers_for_moltenres(residue_number)
print(f"Residue {residue_number} has {n_rots} rotamers.")
if n_rots > max_rotamers:
max_rotamers = n_rots
print("Maximum number of rotamers:", max_rotamers)
E = np.zeros((max_rotamers, max_rotamers))
Hamiltonian = np.zeros((max_rotamers, max_rotamers))
E1 = np.zeros((max_rotamers, max_rotamers))
Hamiltonian1 = np.zeros((max_rotamers, max_rotamers))
output_file = "energy_files/two_body_terms.csv"
output_file1 = "energy_files/one_body_terms.csv"
data_list = []
data_list1 = []
df = pd.DataFrame(columns=['res i', 'res j', 'rot A_i', 'rot B_j', 'E_ij'])
df1 = pd.DataFrame(columns=['res i', 'rot A_i', 'E_ii'])
# Loop to find Hamiltonian values Jij - interaction of rotamers on NN residues
for residue_number in range(1, residue_count):
rotamer_set_i = rotsets.rotamer_set_for_residue(residue_number)
if rotamer_set_i == None: # skip if no rotamers for the residue
continue
residue_number2 = residue_number + 1
residue2 = pose.residue(residue_number2)
rotamer_set_j = rotsets.rotamer_set_for_residue(residue_number2)
if rotamer_set_j == None:
continue
molten_res_i = rotsets.resid_2_moltenres(residue_number)
molten_res_j = rotsets.resid_2_moltenres(residue_number2)
edge_exists = ig.find_edge(molten_res_i, molten_res_j)
if not edge_exists:
continue
for rot_i in range(1, rotamer_set_i.num_rotamers() + 1):
for rot_j in range(1, rotamer_set_j.num_rotamers() + 1):
E[rot_i-1, rot_j-1] = ig.get_two_body_energy_for_edge(molten_res_i, molten_res_j, rot_i, rot_j)
Hamiltonian[rot_i-1, rot_j-1] = E[rot_i-1, rot_j-1]
for rot_i in range(1, rotamer_set_i.num_rotamers() + 1):
for rot_j in range(1, rotamer_set_j.num_rotamers() + 1):
# print(f"Interaction energy between rotamers of residue {residue_number} rotamer {rot_i} and residue {residue_number2} rotamer {rot_j} :", Hamiltonian[rot_i-1, rot_j-1])
data = {'res i': residue_number, 'res j': residue_number2, 'rot A_i': rot_i, 'rot B_j': rot_j, 'E_ij': Hamiltonian[rot_i-1, rot_j-1]}
data_list.append(data)
# Save the two-body energies to a csv file
df = pd.DataFrame(data_list)
df.to_csv('energy_files/two_body_terms.csv', index=False)
df = pd.read_csv('energy_files/two_body_terms.csv')
mean_two = df['E_ij'].mean()
std_two = df['E_ij'].std()
upper_bound2 = mean_two + std_two
filtered_df = df[(df['E_ij'] <= upper_bound2)]
filtered_df.to_csv('energy_files/filtered_file_two.csv', index=False)
# to choose the two rotamers with the largest energy in absolute value
# filtered_df.assign(abs_E=df['E_ij'].abs()).nlargest((num_rot ** 2) * (N_res-1), 'abs_E').drop(columns=['abs_E']).to_csv('two_body_terms.csv', index=False)
# Loop to find Hamiltonian values Jii
for residue_number in range(1, residue_count + 1):
residue1 = pose.residue(residue_number)
rotamer_set_i = rotsets.rotamer_set_for_residue(residue_number)
if rotamer_set_i == None:
continue
molten_res_i = rotsets.resid_2_moltenres(residue_number)
for rot_i in range(1, rotamer_set_i.num_rotamers() + 1):
E1[rot_i-1, rot_i-1] = ig.get_one_body_energy_for_node_state(molten_res_i, rot_i)
Hamiltonian1[rot_i-1, rot_i-1] = E1[rot_i-1, rot_i-1]
# print(f"Interaction score values of {residue1.name3()} rotamer {rot_i} with itself {Hamiltonian[rot_i-1,rot_i-1]}")
data1 = {'res i': residue_number, 'rot A_i': rot_i, 'E_ii': Hamiltonian1[rot_i-1, rot_i-1]}
data_list1.append(data1)
# Save the one-body energies to a csv file
df1 = pd.DataFrame(data_list1)
df1.to_csv('energy_files/one_body_terms.csv', index=False)
df1 = pd.read_csv('energy_files/one_body_terms.csv')
mean_one = df1['E_ii'].mean()
std_one = df1['E_ii'].std()
upper_bound1 = mean_one + 6 * std_one
filtered_df1 = df1[(df1['E_ii'] <= upper_bound1)]
filtered_df1.to_csv('energy_files/filtered_file_one.csv', index=False)
# to choose the two rotamers with the largest energy in absolute value
# filtered_df1.assign(abs_Ei=df1['E_ii'].abs()).nlargest(N_res * num_rot, 'abs_Ei').drop(columns=['abs_Ei']).to_csv('one_body_terms.csv', index=False)