-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbench.py
executable file
·120 lines (94 loc) · 4.75 KB
/
bench.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python3
"""
Benchmarking script
Process to the benchmarking if needed and then generate nice plots and other
interesting information about the results of the benchmarking
"""
import sys
import os
import re
import numpy as np
import pandas as pd
import subprocess as sub
import src.external as ext
import src.plotting as plot
# MAIN:
if __name__ == "__main__":
if not os.path.isfile('bench.csv'):
# Get dataset table:
RIPC_txt = pd.read_csv('data/RIPC_dataset.txt', sep='\t')
# Write header of the file containing the results of the bench:
with open('bench.csv', 'w') as bench_file:
bench_file.write("peel_pdb_id-ref_pdb_id;TMscore_ref;TM_parMATT;" +
"best_peel_TM_rev;best_peel_TM;max_peel_TM;best_peel_gdt;" +
"best_peel_gdt_rev;max_peel_gdt\n")
for idx, row in RIPC_txt.iterrows():
len_dom1, len_dom2 = row['Length1'],row['Length2']
dom1_sid, dom2_sid = row['Domain1'], row['Domain2']
pdb_id_dom1, pdb_id_dom2 = dom1_sid[1:5], dom2_sid[1:5]
chainID_sid1, chainID_sid2 = dom1_sid[5], dom2_sid[5]
# If the PDB is absent from the data/ folder, it is downloaded:
if not os.path.isfile("data/" + dom1_sid + '.pdb'):
url_dom1 = ext.get_url_dom(dom1_sid)
ext.dl_pdb(url_dom1, pdb_id_dom1, dom1_sid)
if not os.path.isfile("data/" + dom2_sid + '.pdb'):
url_dom2 = ext.get_url_dom(dom2_sid)
ext.dl_pdb(url_dom2, pdb_id_dom2, dom2_sid)
cmd_main = ("./main.py -p data/" + dom1_sid + '.pdb -r data/' +
dom2_sid + ".pdb -b t")
os.system(cmd_main)
else: # Display plots and information about results:
results = pd.read_csv('bench.csv', sep=';', index_col=0)
all_means = results.loc[:, ['TMscore_ref', 'TM_parMATT', 'max_peel_TM',
'max_peel_gdt']].mean()
plot.disp_barplot(all_means)
print("Couples dont le peeled-TMscore est meilleur que celui de reference:")
TM_sup_ref = results.index[results['max_peel_TM'] > results['TMscore_ref']]
print(results.loc[TM_sup_ref, ['TMscore_ref', 'max_peel_TM']])
print('\n')
print("Couples dont le peeled-gdt-TMscore est meilleur que celui de " +
"reference:")
gdt_sup_ref = results.index[results['max_peel_gdt'] > results['TMscore_ref']]
print(results.loc[gdt_sup_ref, ['TMscore_ref', 'max_peel_gdt']])
# Couples with high TMscores, according the different methods:
cutoff_high = 0.5
print('\n\n')
print("Couples dont le peeled-TMscore est supérieur à " +
str(cutoff_high) + ":")
TM_high = results.index[results['max_peel_TM'] > cutoff_high]
print(results.loc[TM_high, ['TMscore_ref', 'max_peel_gdt',
'max_peel_TM']])
print('\n')
print("Couples dont le peeled-gdt-TMscore est supérieur à " +
str(cutoff_high) + ":")
gdt_high = results.index[results['max_peel_gdt'] > cutoff_high]
print(results.loc[gdt_high, ['TMscore_ref', 'max_peel_gdt',
'max_peel_TM']])
print('\n')
print("Couples dont le TMscore de reference est supérieur à " +
str(cutoff_high) + ":")
TM_ref_high = results.index[results['TMscore_ref'] > cutoff_high]
print(results.loc[TM_ref_high, ['TMscore_ref', 'max_peel_gdt',
'max_peel_TM']])
# Couples with low TMscores, according the different methods:
cutoff_low = 0.17
print('\n\n')
print("Couples dont le peeled-TMscore est inferieur à " + str(cutoff_low) +
":")
TM_low = results.index[results['max_peel_TM'] < cutoff_low]
print(results.loc[TM_low, ['TMscore_ref', 'max_peel_gdt',
'max_peel_TM']])
print("NB mauvais peeled-TMscore:", len(TM_low))
print('\n')
print("Couples dont le peeled-gdt-TMscore est inferieur à " +
str(cutoff_low) + ":")
gdt_low = results.index[results['max_peel_gdt'] < cutoff_low]
print(results.loc[gdt_low, ['TMscore_ref', 'max_peel_gdt',
'max_peel_TM']])
print("NB mauvais peeled-gdt-TMscore:", len(gdt_low))
print('\n')
print("Couples dont le TMscore de reference est inferieur à " +
str(cutoff_low) + ":")
TM_ref_low = results.index[results['TMscore_ref'] < cutoff_low]
print(results.loc[TM_ref_low, ['TMscore_ref', 'max_peel_gdt',
'max_peel_TM']], '\n')