-
Notifications
You must be signed in to change notification settings - Fork 150
/
Copy pathmain.go
326 lines (276 loc) · 15.6 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package main is the GCE PD CSI Driver entrypoint.
package main
import (
"context"
"errors"
"flag"
"fmt"
"math/rand"
"net/url"
"os"
"runtime"
"strings"
"time"
"k8s.io/klog/v2"
"k8s.io/utils/strings/slices"
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/deviceutils"
gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute"
metadataservice "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/metadata"
driver "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-pd-csi-driver"
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/metrics"
mountmanager "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/mount-manager"
)
var (
cloudConfigFilePath = flag.String("cloud-config", "", "Path to GCE cloud provider config")
endpoint = flag.String("endpoint", "unix:/tmp/csi.sock", "CSI endpoint")
runControllerService = flag.Bool("run-controller-service", true, "If set to false then the CSI driver does not activate its controller service (default: true)")
runNodeService = flag.Bool("run-node-service", true, "If set to false then the CSI driver does not activate its node service (default: true)")
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
grpcLogCharCap = flag.Int("grpc-log-char-cap", 10000, "The maximum amount of characters logged for every grpc responses")
enableOtelTracing = flag.Bool("enable-otel-tracing", false, "If set, enable opentelemetry tracing for the driver. The tracing is disabled by default. Configure the exporter endpoint with OTEL_EXPORTER_OTLP_ENDPOINT and other env variables, see https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration.")
errorBackoffInitialDurationMs = flag.Int("backoff-initial-duration-ms", 200, "The amount of ms for the initial duration of the backoff condition for controller publish/unpublish CSI operations. Default is 200.")
errorBackoffMaxDurationMs = flag.Int("backoff-max-duration-ms", 300000, "The amount of ms for the max duration of the backoff condition for controller publish/unpublish CSI operations. Default is 300000 (5m).")
extraVolumeLabelsStr = flag.String("extra-labels", "", "Extra labels to attach to each PD created. It is a comma separated list of key value pairs like '<key1>=<value1>,<key2>=<value2>'. See https://cloud.google.com/compute/docs/labeling-resources for details")
attachDiskBackoffDuration = flag.Duration("attach-disk-backoff-duration", 5*time.Second, "Duration for attachDisk backoff")
attachDiskBackoffFactor = flag.Float64("attach-disk-backoff-factor", 0.0, "Factor for attachDisk backoff")
attachDiskBackoffJitter = flag.Float64("attach-disk-backoff-jitter", 0.0, "Jitter for attachDisk backoff")
attachDiskBackoffSteps = flag.Int("attach-disk-backoff-steps", 24, "Steps for attachDisk backoff")
attachDiskBackoffCap = flag.Duration("attach-disk-backoff-cap", 0, "Cap for attachDisk backoff")
waitForOpBackoffDuration = flag.Duration("wait-op-backoff-duration", 3*time.Second, "Duration for wait for operation backoff")
waitForOpBackoffFactor = flag.Float64("wait-op-backoff-factor", 0.0, "Factor for wait for operation backoff")
waitForOpBackoffJitter = flag.Float64("wait-op-backoff-jitter", 0.0, "Jitter for wait for operation backoff")
waitForOpBackoffSteps = flag.Int("wait-op-backoff-steps", 100, "Steps for wait for operation backoff")
waitForOpBackoffCap = flag.Duration("wait-op-backoff-cap", 0, "Cap for wait for operation backoff")
maxProcs = flag.Int("maxprocs", 1, "GOMAXPROCS override")
maxConcurrentFormat = flag.Int("max-concurrent-format", 1, "The maximum number of concurrent format exec calls")
concurrentFormatTimeout = flag.Duration("concurrent-format-timeout", 1*time.Minute, "The maximum duration of a format operation before its concurrency token is released")
maxConcurrentFormatAndMount = flag.Int("max-concurrent-format-and-mount", 1, "If set then format and mount operations are serialized on each node. This is stronger than max-concurrent-format as it includes fsck and other mount operations")
formatAndMountTimeout = flag.Duration("format-and-mount-timeout", 1*time.Minute, "The maximum duration of a format and mount operation before another such operation will be started. Used only if --serialize-format-and-mount")
fallbackRequisiteZonesFlag = flag.String("fallback-requisite-zones", "", "Comma separated list of requisite zones that will be used if there are not sufficient zones present in requisite topologies when provisioning a disk")
enableStoragePoolsFlag = flag.Bool("enable-storage-pools", false, "If set to true, the CSI Driver will allow volumes to be provisioned in Storage Pools")
multiZoneVolumeHandleDiskTypesFlag = flag.String("multi-zone-volume-handle-disk-types", "", "Comma separated list of allowed disk types that can use the multi-zone volumeHandle. Used only if --multi-zone-volume-handle-enable")
multiZoneVolumeHandleEnableFlag = flag.Bool("multi-zone-volume-handle-enable", false, "If set to true, the multi-zone volumeHandle feature will be enabled")
computeEnvironment gce.Environment = gce.EnvironmentProduction
computeEndpoint *url.URL
allowedComputeEnvironment = []gce.Environment{gce.EnvironmentStaging, gce.EnvironmentProduction}
useInstanceAPIOnWaitForAttachDiskTypesFlag = flag.String("use-instance-api-to-poll-attachment-disk-types", "", "Comma separated list of disk types that should use instances.get API when polling for disk attach during ControllerPublish")
useInstanceAPIForListVolumesPublishedNodesFlag = flag.Bool("use-instance-api-to-list-volumes-published-nodes", false, "Enables using the instances.list API to determine published_node_ids in ListVolumes. When false (default), the disks.list API is used")
instancesListFiltersFlag = flag.String("instances-list-filters", "", "Comma separated list of filters to use when calling the instances.list API. By default instances.list fetches all instances in a region")
diskSupportsIopsChangeFlag = flag.String("supports-dynamic-iops-provisioning", "", "Comma separated list of disk types that support dynamic IOPS provisioning")
diskSupportsThroughputChangeFlag = flag.String("supports-dynamic-throughput-provisioning", "", "Comma separated list of disk types that support dynamic throughput provisioning")
extraTagsStr = flag.String("extra-tags", "", "Extra tags to attach to each Compute Disk, Image, Snapshot created. It is a comma separated list of parent id, key and value like '<parent_id1>/<tag_key1>/<tag_value1>,...,<parent_idN>/<tag_keyN>/<tag_valueN>'. parent_id is the Organization or the Project ID or Project name where the tag key and the tag value resources exist. A maximum of 50 tags bindings is allowed for a resource. See https://cloud.google.com/resource-manager/docs/tags/tags-overview, https://cloud.google.com/resource-manager/docs/tags/tags-creating-and-managing for details")
version string
)
const (
driverName = "pd.csi.storage.gke.io"
)
func init() {
// klog verbosity guide for this package
// Use V(2) for one time config information
// Use V(4) for general debug information logging
// Use V(5) for GCE Cloud Provider Call informational logging
// Use V(6) for extra repeated/polling information
stringEnumFlag(&computeEnvironment, "compute-environment", allowedComputeEnvironment, "Operating compute environment")
urlFlag(&computeEndpoint, "compute-endpoint", "Compute endpoint")
klog.InitFlags(flag.CommandLine)
flag.Set("logtostderr", "true")
}
func main() {
flag.Parse()
rand.Seed(time.Now().UnixNano())
klog.Infof("Operating compute environment set to: %s and computeEndpoint is set to: %v", computeEnvironment, computeEndpoint)
handle()
os.Exit(0)
}
func handle() {
var err error
runtime.GOMAXPROCS(*maxProcs)
klog.Infof("Sys info: NumCPU: %v MAXPROC: %v", runtime.NumCPU(), runtime.GOMAXPROCS(0))
if version == "" {
klog.Fatalf("version must be set at compile time")
}
klog.V(2).Infof("Driver vendor version %v", version)
// Start tracing as soon as possible
if *enableOtelTracing {
exporter, err := driver.InitOtelTracing()
if err != nil {
klog.Fatalf("Failed to initialize otel tracing: %v", err.Error())
}
// Exporter will flush traces on shutdown
defer func() {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := exporter.Shutdown(ctx); err != nil {
klog.Errorf("Could not shutdown otel exporter: %v", err.Error())
}
}()
}
var metricsManager *metrics.MetricsManager = nil
if *runControllerService && *httpEndpoint != "" {
mm := metrics.NewMetricsManager()
mm.InitializeHttpHandler(*httpEndpoint, *metricsPath)
mm.RegisterPDCSIMetric()
if metrics.IsGKEComponentVersionAvailable() {
mm.EmitGKEComponentVersion()
}
metricsManager = &mm
}
if len(*extraVolumeLabelsStr) > 0 && !*runControllerService {
klog.Fatalf("Extra volume labels provided but not running controller")
}
extraVolumeLabels, err := common.ConvertLabelsStringToMap(*extraVolumeLabelsStr)
if err != nil {
klog.Fatalf("Bad extra volume labels: %v", err.Error())
}
if len(*extraTagsStr) > 0 && !*runControllerService {
klog.Fatalf("Extra tags provided but not running controller")
}
extraTags, err := common.ConvertTagsStringToMap(*extraTagsStr)
if err != nil {
klog.Fatalf("Bad extra tags: %v", err.Error())
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// Initialize driver
gceDriver := driver.GetGCEDriver()
// Initialize identity server
identityServer := driver.NewIdentityServer(gceDriver)
// Initialize requisite zones
fallbackRequisiteZones := parseCSVFlag(*fallbackRequisiteZonesFlag)
// Initialize multi-zone disk types
multiZoneVolumeHandleDiskTypes := parseCSVFlag(*multiZoneVolumeHandleDiskTypesFlag)
multiZoneVolumeHandleConfig := driver.MultiZoneVolumeHandleConfig{
Enable: *multiZoneVolumeHandleEnableFlag,
DiskTypes: multiZoneVolumeHandleDiskTypes,
}
// Initialize waitForAttach config
useInstanceAPIOnWaitForAttachDiskTypes := parseCSVFlag(*useInstanceAPIOnWaitForAttachDiskTypesFlag)
waitForAttachConfig := gce.WaitForAttachConfig{
UseInstancesAPIForDiskTypes: useInstanceAPIOnWaitForAttachDiskTypes,
}
// Initialize listVolumes config
instancesListFilters := parseCSVFlag(*instancesListFiltersFlag)
listInstancesConfig := gce.ListInstancesConfig{
Filters: instancesListFilters,
}
listVolumesConfig := driver.ListVolumesConfig{
UseInstancesAPIForPublishedNodes: *useInstanceAPIForListVolumesPublishedNodesFlag,
}
// Initialize provisionableDisks config
supportsIopsChange := parseCSVFlag(*diskSupportsIopsChangeFlag)
supportsThroughputChange := parseCSVFlag(*diskSupportsThroughputChangeFlag)
provisionableDisksConfig := driver.ProvisionableDisksConfig{
SupportsIopsChange: supportsIopsChange,
SupportsThroughputChange: supportsThroughputChange,
}
// Initialize requirements for the controller service
var controllerServer *driver.GCEControllerServer
if *runControllerService {
cloudProvider, err := gce.CreateCloudProvider(ctx, version, *cloudConfigFilePath, computeEndpoint, computeEnvironment, waitForAttachConfig, listInstancesConfig)
if err != nil {
klog.Fatalf("Failed to get cloud provider: %v", err.Error())
}
initialBackoffDuration := time.Duration(*errorBackoffInitialDurationMs) * time.Millisecond
maxBackoffDuration := time.Duration(*errorBackoffMaxDurationMs) * time.Millisecond
controllerServer = driver.NewControllerServer(gceDriver, cloudProvider, initialBackoffDuration, maxBackoffDuration, fallbackRequisiteZones, *enableStoragePoolsFlag, multiZoneVolumeHandleConfig, listVolumesConfig, provisionableDisksConfig)
} else if *cloudConfigFilePath != "" {
klog.Warningf("controller service is disabled but cloud config given - it has no effect")
}
// Initialize requirements for the node service
var nodeServer *driver.GCENodeServer
if *runNodeService {
mounter, err := mountmanager.NewSafeMounter(*maxConcurrentFormat, *concurrentFormatTimeout)
if err != nil {
klog.Fatalf("Failed to get safe mounter: %v", err.Error())
}
deviceUtils := deviceutils.NewDeviceUtils()
statter := mountmanager.NewStatter(mounter)
meta, err := metadataservice.NewMetadataService()
if err != nil {
klog.Fatalf("Failed to set up metadata service: %v", err.Error())
}
nodeServer = driver.NewNodeServer(gceDriver, mounter, deviceUtils, meta, statter)
if *maxConcurrentFormatAndMount > 0 {
nodeServer = nodeServer.WithSerializedFormatAndMount(*formatAndMountTimeout, *maxConcurrentFormatAndMount)
}
}
err = gceDriver.SetupGCEDriver(driverName, version, extraVolumeLabels, extraTags, identityServer, controllerServer, nodeServer)
if err != nil {
klog.Fatalf("Failed to initialize GCE CSI Driver: %v", err.Error())
}
gce.AttachDiskBackoff.Duration = *attachDiskBackoffDuration
gce.AttachDiskBackoff.Factor = *attachDiskBackoffFactor
gce.AttachDiskBackoff.Jitter = *attachDiskBackoffJitter
gce.AttachDiskBackoff.Steps = *attachDiskBackoffSteps
gce.AttachDiskBackoff.Cap = *attachDiskBackoffCap
gce.WaitForOpBackoff.Duration = *waitForOpBackoffDuration
gce.WaitForOpBackoff.Factor = *waitForOpBackoffFactor
gce.WaitForOpBackoff.Jitter = *waitForOpBackoffJitter
gce.WaitForOpBackoff.Steps = *waitForOpBackoffSteps
gce.WaitForOpBackoff.Cap = *waitForOpBackoffCap
gceDriver.Run(*endpoint, *grpcLogCharCap, *enableOtelTracing, metricsManager)
}
func notEmpty(v string) bool {
return v != ""
}
func parseCSVFlag(list string) []string {
return slices.Filter(nil, strings.Split(list, ","), notEmpty)
}
type enumConverter[T any] interface {
convert(v string) (T, error)
eq(a, b T) bool
}
type stringConverter[T ~string] struct{}
func (s stringConverter[T]) convert(v string) (T, error) {
return T(v), nil
}
func (s stringConverter[T]) eq(a, b T) bool {
return a == b
}
func stringEnumFlag[T ~string](target *T, name string, allowed []T, usage string) {
enumFlag(target, name, stringConverter[T]{}, allowed, usage)
}
func enumFlag[T any](target *T, name string, converter enumConverter[T], allowed []T, usage string) {
flag.Func(name, usage, func(flagValue string) error {
tValue, err := converter.convert(flagValue)
if err != nil {
return err
}
for _, allowedValue := range allowed {
if converter.eq(allowedValue, tValue) {
*target = tValue
return nil
}
}
errMsg := fmt.Sprintf(`must be one of %v`, allowedComputeEnvironment)
return errors.New(errMsg)
})
}
func urlFlag(target **url.URL, name string, usage string) {
flag.Func(name, usage, func(flagValue string) error {
if flagValue == "" {
return nil
}
computeURL, err := url.ParseRequestURI(flagValue)
if err == nil {
*target = computeURL
return nil
}
klog.Errorf("Error parsing endpoint compute endpoint %v", err)
return err
})
}