-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
222 lines (199 loc) · 10.2 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
# -*- coding: utf-8 -*-
# Copyright (c) 2018 João Martins
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import json
import random
from utils.SparseGenerator import SparseGenerator
import glob
import argparse
import time
import tensorflow as tf
# See the __init__ script in the models folder
# `make_models` is a helper function to load any models you have
from models import make_model
from models import available_models
# from hpsearch import hyperband, randomsearch
# I personally always like to make my paths absolute
# to be independent from where the python binary is called
dir = os.path.dirname(os.path.realpath(__file__))
def main(_):
# config = flags.FLAGS.__flags.copy()
# fixed_params must be a string to be passed in the shell, let's use JSON
config.fixed_params = json.loads(config.fixed_params)
high_res_protein_feature_filenames = sorted(
glob.glob(os.path.join(config.data_dir, "*protein_features.npz")))
high_res_grid_feature_filenames = sorted(
glob.glob(os.path.join(config.data_dir, "*residue_features.npz")))
validation_end = int(len(high_res_protein_feature_filenames) * (1. - config.test_fraction))
train_end = validation_start = int(validation_end * (1. - config.validation_fraction))
if not config.mode == 'infer' and not config.mode == 'test':
train_data = SparseGenerator()
train_data.load_data(high_res_protein_feature_filenames[:train_end],
high_res_grid_feature_filenames[:train_end])
validation_data = SparseGenerator()
validation_data.load_data(high_res_protein_feature_filenames[validation_start:validation_end],
high_res_grid_feature_filenames[validation_start:validation_end])
elif config.mode == 'test':
test_data = SparseGenerator()
test_data.load_data(high_res_protein_feature_filenames[validation_end:],
high_res_grid_feature_filenames[validation_end:])
if config.fullsearch:
pass
# Some code for HP search ...
elif config.dry_run:
model = make_model(config)
else:
model = make_model(config)
if config.mode == 'infer':
high_res_protein_feature_filenames = sorted(
glob.glob(os.path.join(config.pdb_folder, "*protein_features.npz")))
high_res_grid_feature_filenames = sorted(
glob.glob(os.path.join(config.pdb_folder, "*residue_features.npz")))
infer_data = SparseGenerator()
infer_data.load_data(high_res_protein_feature_filenames,
high_res_grid_feature_filenames)
model.infer(infer_data, config.residue_index)
else:
if config.mode == 'test':
model.test(test_data)
elif config.mode == 'train':
model.train(train_data, validation_data)
model.save('end')
# No need to capture wrong mode, not allowed by argparser
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='''\t\t+-------------------------------------+
\t\t| |
\t\t| ProtNets |
\t\t| |
\t\t| Neural networks for |
\t\t| protein spherical representations |
\t\t| |
\t\t| |
\t\t+-------------------------------------+
''', formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--fullsearch',
action='store_true',
help='Perform a full search of hyperparameter space ex:(hyperband > lr search > hyperband '
'with best lr) (default: %(default)s)')
parser.add_argument('--dry_run',
action='store_true',
help='Perform a dry_run (default: %(default)s)')
parser.add_argument('--nb_process',
default=4,
type=int,
help='Number of parallel process to perform a HP search (default: %(default)s)')
# fixed_params is a trick I use to be able to fix some parameters inside the model random function
# For example, one might want to explore different models fixing the learning rate, see the basic_model
# get_random_config function
parser.add_argument('--fixed_params',
default='{}',
type=str,
help='JSON inputs to fix some params in a HP search, ex: {"lr": 0.001} (default: %(default)s)')
# Agent configuration
parser.add_argument('--model_name',
default='CNN',
type=str,
choices=available_models,
help='Unique name of the model (default: %(default)s)')
parser.add_argument('--optimizer',
default='Adam',
type=str,
choices=['Adam', 'Nesterov', 'AdaDelta'],
help='Model optimizer (default: %(default)s)')
parser.add_argument('--best',
action='store_true',
help='Force to use the best known configuration (default: %(default)s)')
parser.add_argument('--l2_beta',
default=0.001,
type=float,
help='Initial mean for the neural network (default: %(default)s)')
parser.add_argument('--initial_stddev',
default=0.1,
type=float,
help='Initial standard deviation for the neural network (default: %(default)s)')
parser.add_argument('--lr',
default=1e-3,
type=float,
help='The learning rate of SGD (default: %(default)s)')
parser.add_argument('--dropout',
default=0.5,
type=float,
help='Dropout value for training (default: %(default)s)')
parser.add_argument('--no_batch_norm',
action='store_false',
help='Disable batch normalization (default: %(default)s)')
# Environment configuration
parser.add_argument('--debug',
action='store_true',
help='Debug mode (default: %(default)s)')
parser.add_argument('--max_iter',
default=10,
type=int,
help='Number of training steps (default: %(default)s)')
parser.add_argument('--mode',
type=str,
default='train', # FIXME: Change back to infer when finished
choices=['train', 'test', 'infer'],
help='Infer from single data input (default: %(default)s)')
# This is very important for TensorBoard
# each model will end up in its own unique folder using time module
# Obviously one can also choose to name the output folder
parser.add_argument('--result_dir',
default=os.path.join(dir, 'results', str(int(time.time()))),
help='Name of the directory to store/log the model (if it exists, the model will be loaded '
'from it) (default: %(default)s)')
parser.add_argument('--data_dir',
default=os.path.join(dir, 'data', 'culled_pc30', 'atomistic_features_spherical'),
type=str,
help='Name of the directory with the data for training and testing (default: %(default)s)')
# Another important point, you must provide an access to the random seed
# to be able to fully reproduce an experiment
parser.add_argument('--seed',
default=random.randint(0, sys.maxsize),
type=int,
help='Explicit value for dropout seed, otherwise random integer')
# Data division
parser.add_argument('--validation_fraction',
default=0.10,
type=float,
help='Validation data fraction from train set (default: %(default)s)')
parser.add_argument('--test_fraction',
default=0.10,
type=float,
help='Validation data fraction from train set (default: %(default)s)')
parser.add_argument('--data_type',
default='aa',
type=str,
help='Data type to be trained on (default: %(default)s)',
choices=['aa', 'ss'])
parser.add_argument("--batch-size",
help="Maximum batch size used for gradient calculation (default: %(default)s)", type=int,
default=25)
parser.add_argument("--residue-index",
help="Residue index for inference mode", type=int)
parser.add_argument("--chain-id",
help="Chain ID for inference mode", type=str)
parser.add_argument("--pdb-folder",
help="PDB folder for inference mode", type=str)
config = parser.parse_args()
if config.mode != 'infer' and (config.residue_index or config.chain_id or config.pdb_folder):
parser.error('Residue index and/or chain ID and/or PDB folder only used for inference mode.')
elif config.mode == 'infer' and not (config.residue_index or config.chain_id or config.pdb_folder):
parser.error('Residue index and/or chain ID and/or PDB folder not set.')
tf.app.run(main=main)