-
Notifications
You must be signed in to change notification settings - Fork 0
/
create_features.py
51 lines (41 loc) · 1.41 KB
/
create_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
import pandas
import features_database
import dssp
import psiblast
from pconpy import pconpy
def add_structure_to_db(db, structure_id, filename):
dssp_result = dssp.assign(filename)
pssm = psiblast.pssm(dssp_result.sequence)
residues = pconpy.get_residues(filename)
distance_map = pconpy.calc_dist_matrix(residues).filled(0)
db.save(structure_id, dssp_result, pssm, distance_map)
def add_structure(db, structure_id):
filename = astral + structure_id[2:4] + "/" + structure_id + ".ent"
if not os.path.isfile(filename):
print('File not found: ' + filename)
for i in range(1, 10):
filename = list(filename)
filename[-5] = str(i)
filename = "".join(filename)
print('Trying to use ' + filename)
if os.path.isfile(filename):
add_structure_to_db(db, structure_id, filename)
print('Added structure ' + structure_id)
return
filename = list(filename)
filename[-5] ="_"
filename = "".join(filename)
print('Trying to use ' + filename)
if os.path.isfile(filename):
add_structure_to_db(db, structure_id, filename)
print('Added structure ' + structure_id)
else:
print('File not found: ' + filename)
db = features_database.SpatialFeaturesDatabase("spatial_features.db")
db.create()
dataset = pandas.read_csv('astral/dataset.xls', sep='\s+')
structures = dataset['DOMAIN'].tolist()
astral = "astral/pdbstyle-2.06/"
for structure_id in structures:
add_structure(db, structure_id)