-
Notifications
You must be signed in to change notification settings - Fork 4k
/
hetzner_manager.go
283 lines (239 loc) · 7.79 KB
/
hetzner_manager.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hetzner
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"net/http"
"os"
"strconv"
"strings"
"time"
apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/hetzner/hcloud-go/hcloud"
"k8s.io/autoscaler/cluster-autoscaler/version"
)
var (
httpClient = &http.Client{
Transport: instrumentedRoundTripper(),
}
)
// hetznerManager handles Hetzner communication and data caching of
// node groups
type hetznerManager struct {
client *hcloud.Client
nodeGroups map[string]*hetznerNodeGroup
apiCallContext context.Context
clusterConfig *ClusterConfig
sshKey *hcloud.SSHKey
network *hcloud.Network
firewall *hcloud.Firewall
createTimeout time.Duration
publicIPv4 bool
publicIPv6 bool
cachedServerType *serverTypeCache
cachedServers *serversCache
}
// ClusterConfig holds the configuration for all the nodepools
type ClusterConfig struct {
ImagesForArch ImageList
NodeConfigs map[string]*NodeConfig
IsUsingNewFormat bool
LegacyConfig LegacyConfig
}
// ImageList holds the image id/names for the different architectures
type ImageList struct {
Arm64 string
Amd64 string
}
// NodeConfig holds the configuration for a single nodepool
type NodeConfig struct {
CloudInit string
Taints []apiv1.Taint
Labels map[string]string
}
// LegacyConfig holds the configuration in the legacy format
type LegacyConfig struct {
CloudInit string
ImageName string
}
func newManager() (*hetznerManager, error) {
token := os.Getenv("HCLOUD_TOKEN")
if token == "" {
return nil, errors.New("`HCLOUD_TOKEN` is not specified")
}
client := hcloud.NewClient(
hcloud.WithToken(token),
hcloud.WithHTTPClient(httpClient),
hcloud.WithApplication("cluster-autoscaler", version.ClusterAutoscalerVersion),
hcloud.WithPollBackoffFunc(hcloud.ExponentialBackoff(2, 500*time.Millisecond)),
hcloud.WithDebugWriter(&debugWriter{}),
)
ctx := context.Background()
var err error
clusterConfigBase64 := os.Getenv("HCLOUD_CLUSTER_CONFIG")
cloudInitBase64 := os.Getenv("HCLOUD_CLOUD_INIT")
if clusterConfigBase64 == "" && cloudInitBase64 == "" {
return nil, errors.New("`HCLOUD_CLUSTER_CONFIG` or `HCLOUD_CLOUD_INIT` is not specified")
}
var clusterConfig *ClusterConfig = &ClusterConfig{}
if clusterConfigBase64 != "" {
clusterConfig.IsUsingNewFormat = true
}
if clusterConfig.IsUsingNewFormat {
clusterConfigEnv, err := base64.StdEncoding.DecodeString(clusterConfigBase64)
if err != nil {
return nil, fmt.Errorf("failed to parse cluster config error: %s", err)
}
err = json.Unmarshal(clusterConfigEnv, &clusterConfig)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal cluster config JSON: %s", err)
}
}
if !clusterConfig.IsUsingNewFormat {
cloudInit, err := base64.StdEncoding.DecodeString(cloudInitBase64)
if err != nil {
return nil, fmt.Errorf("failed to parse cloud init error: %s", err)
}
imageName := os.Getenv("HCLOUD_IMAGE")
if imageName == "" {
imageName = "ubuntu-20.04"
}
clusterConfig.LegacyConfig.CloudInit = string(cloudInit)
clusterConfig.LegacyConfig.ImageName = imageName
}
publicIPv4 := true
publicIPv4Str := os.Getenv("HCLOUD_PUBLIC_IPV4")
if publicIPv4Str != "" {
publicIPv4, err = strconv.ParseBool(publicIPv4Str)
if err != nil {
return nil, fmt.Errorf("failed to parse HCLOUD_PUBLIC_IPV4: %s", err)
}
}
publicIPv6 := true
publicIPv6Str := os.Getenv("HCLOUD_PUBLIC_IPV6")
if publicIPv6Str != "" {
publicIPv6, err = strconv.ParseBool(publicIPv6Str)
if err != nil {
return nil, fmt.Errorf("failed to parse HCLOUD_PUBLIC_IPV6: %s", err)
}
}
var sshKey *hcloud.SSHKey
sshKeyIdOrName := os.Getenv("HCLOUD_SSH_KEY")
if sshKeyIdOrName != "" {
sshKey, _, err = client.SSHKey.Get(ctx, sshKeyIdOrName)
if err != nil {
return nil, fmt.Errorf("failed to get ssh key error: %s", err)
}
}
var network *hcloud.Network
networkIdOrName := os.Getenv("HCLOUD_NETWORK")
if networkIdOrName != "" {
network, _, err = client.Network.Get(ctx, networkIdOrName)
if err != nil {
return nil, fmt.Errorf("failed to get network error: %s", err)
}
}
createTimeout := serverCreateTimeoutDefault
v, err := strconv.Atoi(os.Getenv("HCLOUD_SERVER_CREATION_TIMEOUT"))
if err == nil && v != 0 {
createTimeout = time.Duration(v) * time.Minute
}
var firewall *hcloud.Firewall
firewallIdOrName := os.Getenv("HCLOUD_FIREWALL")
if firewallIdOrName != "" {
firewall, _, err = client.Firewall.Get(ctx, firewallIdOrName)
if err != nil {
return nil, fmt.Errorf("failed to get firewall error: %s", err)
}
}
m := &hetznerManager{
client: client,
nodeGroups: make(map[string]*hetznerNodeGroup),
sshKey: sshKey,
network: network,
firewall: firewall,
createTimeout: createTimeout,
apiCallContext: ctx,
publicIPv4: publicIPv4,
publicIPv6: publicIPv6,
clusterConfig: clusterConfig,
cachedServerType: newServerTypeCache(ctx, client),
cachedServers: newServersCache(ctx, client),
}
m.nodeGroups[drainingNodePoolId] = &hetznerNodeGroup{
manager: m,
instanceType: "cx11",
region: "fsn1",
targetSize: 0,
maxSize: 0,
minSize: 0,
id: drainingNodePoolId,
}
return m, nil
}
// Refresh refreshes the cache holding the nodegroups. This is called by the CA
// based on the `--scan-interval`. By default it's 10 seconds.
func (m *hetznerManager) Refresh() error {
return nil
}
func (m *hetznerManager) allServers(nodeGroup string) ([]*hcloud.Server, error) {
servers, err := m.cachedServers.getServersByNodeGroupName(nodeGroup)
if err != nil {
return nil, fmt.Errorf("failed to get servers for hcloud: %v", err)
}
return servers, nil
}
func (m *hetznerManager) deleteByNode(node *apiv1.Node) error {
server, err := m.serverForNode(node)
if err != nil {
return fmt.Errorf("failed to delete node %s error: %v", node.Name, err)
}
if server == nil {
return fmt.Errorf("failed to delete node %s server not found", node.Name)
}
return m.deleteServer(server)
}
func (m *hetznerManager) deleteServer(server *hcloud.Server) error {
_, err := m.client.Server.Delete(m.apiCallContext, server)
return err
}
func (m *hetznerManager) addNodeToDrainingPool(node *apiv1.Node) (*hetznerNodeGroup, error) {
m.nodeGroups[drainingNodePoolId].targetSize += 1
return m.nodeGroups[drainingNodePoolId], nil
}
func (m *hetznerManager) validProviderID(providerID string) bool {
return strings.HasPrefix(providerID, providerIDPrefix)
}
func (m *hetznerManager) serverForNode(node *apiv1.Node) (*hcloud.Server, error) {
var nodeIdOrName string
if node.Spec.ProviderID != "" {
if !m.validProviderID(node.Spec.ProviderID) {
// This cluster-autoscaler provider only handles Hetzner Cloud servers.
// Any other provider ID prefix is invalid, and we return no server. Returning an error here breaks hybrid
// clusters with nodes from Hetzner Cloud & Robot (or other providers).
return nil, nil
}
nodeIdOrName = strings.TrimPrefix(node.Spec.ProviderID, providerIDPrefix)
} else {
nodeIdOrName = node.Name
}
server, err := m.cachedServers.getServer(nodeIdOrName)
if err != nil {
return nil, fmt.Errorf("failed to get servers for node %s error: %v", node.Name, err)
}
return server, nil
}