-
Notifications
You must be signed in to change notification settings - Fork 6
/
nas_bench_macro.py
157 lines (121 loc) · 6.55 KB
/
nas_bench_macro.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os
import json
import openai
import numpy as np
from decimal import Decimal
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--openai_key', type=str, required=True)
parser.add_argument('--openai_organization', type=str, required=True)
args = parser.parse_args()
print(args)
openai.api_key = args.openai_key
openai.organization = args.openai_organization
benchmark_file = open('benchmark/nas-bench-macro_cifar10.json')
data = json.load(benchmark_file)
keys = list(data.keys())
rank = np.array([data[k]['mean_acc'] for k in keys]).argsort().argsort()
for k, r in zip(keys, rank):
data[k]['rank'] = (3 ** 8) - r
system_content = "You are an expert in the field of neural architecture search."
user_input = '''Your task is to assist me in selecting the best operations for a given model architecture, which includes some undefined layers and available operations. The model will be trained and tested on CIFAR10, and your objective will be to maximize the model's performance on CIFAR10.
We define the 3 available operations as the following:
0: Identity(in_channels, out_channels, stride)
1: InvertedResidual(in_channels, out_channels, stride expansion=3, kernel_size=3)
2: InvertedResidual(in_channels, out_channels, stride expansion=6, kernel_size=5)
The implementation of the Identity is as follows:
class Identity(nn.Module):
def __init__(self, in_channels, out_channels, stride):
super(Identity, self).__init__()
if stride != 1 or in_channels != out_channels:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels),
)
else:
self.downsample = None
def forward(self, x):
if self.downsample is not None:
x = self.downsample(x)
return x
The implementation of the InvertedResidual is as follows:
class InvertedResidual(nn.Module):
def __init__(self, in_channels, out_channels, stride, expansion, kernel_size):
super(InvertedResidual, self).__init__()
hidden_dim = in_channels * expansion
self.conv = nn.Sequential(
nn.Conv2d(in_channels, hidden_dim, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU(inplace=True),
nn.Conv2d(hidden_dim, hidden_dim, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU(inplace=True),
nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False),
nn.BatchNorm2d(out_channels),
)
self.use_shortcut = in_channels == out_channels and stride == 1
def forward(self, x):
if self.use_shortcut:
return self.conv(x) + x
return self.conv(x)
The model architecture will be defined as the following.
{
layer1: {defined: True, operation: nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1, bias=False)},
layer2: {defined: False, downsample: True , in_channels: 32, out_channels: 64 , stride: 2},
layer3: {defined: False, downsample: False, in_channels: 64, out_channels: 64 , stride: 1},
layer4: {defined: False, downsample: True , in_channels: 64, out_channels: 128, stride: 2},
layer5: {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1},
layer6: {defined: False, downsample: False, in_channels: 128, out_channels: 128, stride: 1},
layer7: {defined: False, downsample: True , in_channels: 128, out_channels: 256, stride: 2},
layer8: {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1},
layer9: {defined: False, downsample: False, in_channels: 256, out_channels: 256, stride: 1},
layer10: {defined: True, operation: nn.Conv2d(in_channels=256, out_channels=1280, kernel_size=1, bias=False, stride=1)},
layer11: {defined: True, operation: nn.AdaptiveAvgPool2d(output_size=1)},
layer12: {defined: True, operation: nn.Linear(in_features=1280, out_features=10)},
}
The currently undefined layers are layer2 - layer9, and the in_channels and out_channels have already been defined for each layer. To maximize the model's performance on CIFAR10, please provide me with your suggested operation for the undefined layers only.
Your response should be an operation ID list for the undefined layers. For example:
[1, 2, ..., 0] means we use operation 1 for layer2, operation 2 for layer3, ..., operation 0 for layer9.
'''
experiments_prompt = lambda arch_list, acc_list : '''Here are some experimental results that you can use as a reference:
{}
Please suggest a better operation ID list that can improve the model's performance on CIFAR10 beyond the experimental results provided above.
'''.format(''.join(['{} gives an accuracy of {:.2f}%\n'.format(arch, acc) for arch, acc in zip(arch_list, acc_list)]))
suffix = '''Please do not include anything other than the operation ID list in your response.'''
arch_list = []
acc_list = []
messages = [
{"role": "system", "content": system_content},
{"role": "user", "content": user_input + suffix},
]
performance_history = []
messages_history = []
if not os.path.exists('history'):
os.makedirs('history')
for iteration in range(10):
res = openai.ChatCompletion.create(model='gpt-4', messages=messages, temperature=0, n=1)['choices'][0]['message']
messages.append(res)
messages_history.append(messages)
print(res['content'])
operation_id_list = json.loads(res['content'])
operation_id_list_str = ''.join(str(opid) for opid in operation_id_list)
accuracy = data[operation_id_list_str]['mean_acc']
accuracy = float(Decimal(accuracy).quantize(Decimal("0.01"), rounding = "ROUND_HALF_UP"))
arch_list.append(operation_id_list)
acc_list.append(accuracy)
performance = {
'arch' : operation_id_list_str,
'rank' : str(data[operation_id_list_str]['rank']),
'acc' : str(data[operation_id_list_str]['mean_acc']),
'flops': str(data[operation_id_list_str]['flops']),
}
print(iteration+1, performance)
performance_history.append(performance)
with open('history/nas_bench_macro_messages.json', 'w') as f:
json.dump(messages_history, f)
with open('history/nas_bench_macro_performance.json', 'w') as f:
json.dump(performance_history, f)
messages = [
{"role": "system", "content": system_content},
{"role": "user", "content": user_input + experiments_prompt(arch_list, acc_list) + suffix},
]