-
Notifications
You must be signed in to change notification settings - Fork 2k
/
client.go
3609 lines (3097 loc) · 111 KB
/
client.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: BUSL-1.1
package client
import (
"context"
"errors"
"fmt"
"maps"
"net"
"net/rpc"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"time"
metrics "github.com/armon/go-metrics"
consulapi "github.com/hashicorp/consul/api"
hclog "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/client/allocrunner"
"github.com/hashicorp/nomad/client/allocrunner/interfaces"
arstate "github.com/hashicorp/nomad/client/allocrunner/state"
"github.com/hashicorp/nomad/client/allocrunner/taskrunner/getter"
"github.com/hashicorp/nomad/client/allocwatcher"
"github.com/hashicorp/nomad/client/config"
consulApiShim "github.com/hashicorp/nomad/client/consul"
"github.com/hashicorp/nomad/client/devicemanager"
"github.com/hashicorp/nomad/client/dynamicplugins"
"github.com/hashicorp/nomad/client/fingerprint"
"github.com/hashicorp/nomad/client/hoststats"
hvm "github.com/hashicorp/nomad/client/hostvolumemanager"
cinterfaces "github.com/hashicorp/nomad/client/interfaces"
"github.com/hashicorp/nomad/client/lib/cgroupslib"
"github.com/hashicorp/nomad/client/lib/numalib"
"github.com/hashicorp/nomad/client/lib/proclib"
"github.com/hashicorp/nomad/client/pluginmanager"
"github.com/hashicorp/nomad/client/pluginmanager/csimanager"
"github.com/hashicorp/nomad/client/pluginmanager/drivermanager"
"github.com/hashicorp/nomad/client/servers"
"github.com/hashicorp/nomad/client/serviceregistration"
"github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore"
"github.com/hashicorp/nomad/client/serviceregistration/nsd"
"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
"github.com/hashicorp/nomad/client/state"
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/client/vaultclient"
"github.com/hashicorp/nomad/client/widmgr"
"github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/envoy"
"github.com/hashicorp/nomad/helper/goruntime"
"github.com/hashicorp/nomad/helper/group"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/pool"
"github.com/hashicorp/nomad/helper/tlsutil"
"github.com/hashicorp/nomad/helper/users/dynamic"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/lib/lang"
"github.com/hashicorp/nomad/nomad/structs"
nconfig "github.com/hashicorp/nomad/nomad/structs/config"
"github.com/hashicorp/nomad/plugins/csi"
"github.com/hashicorp/nomad/plugins/device"
vaultapi "github.com/hashicorp/vault/api"
"github.com/shirou/gopsutil/v3/host"
)
const (
// clientRPCCache controls how long we keep an idle connection
// open to a server
clientRPCCache = 5 * time.Minute
// clientMaxStreams controls how many idle streams we keep
// open to a server
clientMaxStreams = 2
// datacenterQueryLimit searches through up to this many adjacent
// datacenters looking for the Nomad server service.
datacenterQueryLimit = 9
// registerRetryIntv is minimum interval on which we retry
// registration. We pick a value between this and 2x this.
registerRetryIntv = 15 * time.Second
// getAllocRetryIntv is minimum interval on which we retry
// to fetch allocations. We pick a value between this and 2x this.
getAllocRetryIntv = 30 * time.Second
// devModeRetryIntv is the retry interval used for development
devModeRetryIntv = time.Second
// noServerRetryIntv is the retry interval used when client has not
// connected to server yet
noServerRetryIntv = time.Second
// stateSnapshotIntv is how often the client snapshots state
stateSnapshotIntv = 60 * time.Second
// initialHeartbeatStagger is used to stagger the interval between
// starting and the initial heartbeat. After the initial heartbeat,
// we switch to using the TTL specified by the servers.
initialHeartbeatStagger = 10 * time.Second
// nodeUpdateRetryIntv is how often the client checks for updates to the
// node attributes or meta map.
nodeUpdateRetryIntv = 5 * time.Second
// allocSyncIntv is the batching period of allocation updates before they
// are synced with the server.
allocSyncIntv = 200 * time.Millisecond
// allocSyncRetryIntv is the interval on which we retry updating
// the status of the allocation
allocSyncRetryIntv = 5 * time.Second
)
var (
// grace period to allow for batch fingerprint processing
batchFirstFingerprintsProcessingGrace = batchFirstFingerprintsTimeout + 5*time.Second
)
// ClientStatsReporter exposes all the APIs related to resource usage of a Nomad
// Client
type ClientStatsReporter interface {
// GetAllocStats returns the AllocStatsReporter for the passed allocation.
// If it does not exist an error is reported.
GetAllocStats(allocID string) (interfaces.AllocStatsReporter, error)
// LatestHostStats returns the latest resource usage stats for the host
LatestHostStats() *hoststats.HostStats
}
// Client is used to implement the client interaction with Nomad. Clients
// are expected to register as a schedule-able node to the servers, and to
// run allocations as determined by the servers.
type Client struct {
start time.Time
// stateDB is used to efficiently store client state.
stateDB state.StateDB
// config must only be accessed with lock held. To update the config, use the
// Client.UpdateConfig() helper. If you need more fine grained control use
// the following pattern:
//
// c.configLock.Lock()
// newConfig := c.config.Copy()
// // <mutate newConfig>
// c.config = newConfig
// c.configLock.Unlock()
configLock sync.Mutex
config *config.Config
metaDynamic map[string]*string // dynamic node metadata
// metaStatic are the Node's static metadata set via the agent configuration
// and defaults during client initialization. Since this map is never updated
// at runtime it may be accessed outside of locks.
metaStatic map[string]string
logger hclog.InterceptLogger
rpcLogger hclog.Logger
connPool *pool.ConnPool
// tlsWrap is used to wrap outbound connections using TLS. It should be
// accessed using the lock.
tlsWrap tlsutil.RegionWrapper
tlsWrapLock sync.RWMutex
// servers is the list of nomad servers
servers *servers.Manager
// heartbeat related times for tracking how often to heartbeat
heartbeatTTL time.Duration
haveHeartbeated bool
heartbeatLock sync.Mutex
heartbeatStop *heartbeatStop
// triggerDiscoveryCh triggers Consul discovery; see triggerDiscovery
triggerDiscoveryCh chan struct{}
// triggerNodeUpdate triggers the client to mark the Node as changed and
// update it.
triggerNodeUpdate chan struct{}
// triggerEmitNodeEvent sends an event and triggers the client to update the
// server for the node event
triggerEmitNodeEvent chan *structs.NodeEvent
// rpcRetryCh is closed when there an event such as server discovery or a
// successful RPC occurring happens such that a retry should happen. Access
// should only occur via the getter method
rpcRetryCh chan struct{}
rpcRetryLock sync.Mutex
// allocs maps alloc IDs to their AllocRunner. This map includes all
// AllocRunners - running and GC'd - until the server GCs them.
allocs map[string]interfaces.AllocRunner
allocLock sync.RWMutex
// allocrunnerFactory is the function called to create new allocrunners
allocrunnerFactory config.AllocRunnerFactory
// invalidAllocs is a map that tracks allocations that failed because
// the client couldn't initialize alloc or task runners for it. This can
// happen due to driver errors
invalidAllocs map[string]struct{}
invalidAllocsLock sync.Mutex
// pendingUpdates stores allocations that need to be synced to the server.
pendingUpdates *pendingClientUpdates
// consulServices gets a Consul handler implementation for managing
// services and checks.
consulServices serviceregistration.Handler
// nomadService is the Nomad handler implementation for managing service
// registrations.
nomadService serviceregistration.Handler
// checkStore is used to store group and task checks and their current pass/fail
// status.
checkStore checkstore.Shim
// serviceRegWrapper wraps the consulService and nomadService
// implementations so that the alloc and task runner service hooks can call
// this without needing to identify which backend provider should be used.
serviceRegWrapper *wrapper.HandlerWrapper
// consulProxiesFunc gets an interface to Nomad's custom Consul client for
// looking up supported envoy versions
consulProxiesFunc consulApiShim.SupportedProxiesAPIFunc
// consulCatalog is the subset of Consul's Catalog API Nomad uses for self
// service discovery
consulCatalog consul.CatalogAPI
// HostStatsCollector collects host resource usage stats
hostStatsCollector *hoststats.HostStatsCollector
// shutdown is true when the Client has been shutdown. Must hold
// shutdownLock to access.
shutdown bool
// shutdownCh is closed to signal the Client is shutting down.
shutdownCh chan struct{}
shutdownLock sync.Mutex
// shutdownGroup are goroutines that exit when shutdownCh is closed.
// Shutdown() blocks on Wait() after closing shutdownCh.
shutdownGroup group.Group
// tokensClient is Nomad Client's custom Consul client for requesting Consul
// Service Identity tokens through Nomad Server.
tokensClient consulApiShim.ServiceIdentityAPI
// vaultClients is used to interact with Vault for token and secret renewals
vaultClients map[string]vaultclient.VaultClient
// garbageCollector is used to garbage collect terminal allocations present
// in the node automatically
garbageCollector *AllocGarbageCollector
// clientACLResolver holds the ACL resolution state
clientACLResolver
// rpcServer is used to serve RPCs by the local agent.
rpcServer *rpc.Server
endpoints rpcEndpoints
streamingRpcs *structs.StreamingRpcRegistry
// fingerprintManager is the FingerprintManager registered by the client
fingerprintManager *FingerprintManager
// pluginManagers is the set of PluginManagers registered by the client
pluginManagers *pluginmanager.PluginGroup
// csimanager is responsible for managing csi plugins.
csimanager csimanager.Manager
// devicemanger is responsible for managing device plugins.
devicemanager devicemanager.Manager
// drivermanager is responsible for managing driver plugins
drivermanager drivermanager.Manager
hostVolumeManager *hvm.HostVolumeManager
// baseLabels are used when emitting tagged metrics. All client metrics will
// have these tags, and optionally more.
baseLabels []metrics.Label
// batchNodeUpdates is used to batch initial updates to the node
batchNodeUpdates *batchNodeUpdates
// fpInitialized chan is closed when the first batch of fingerprints are
// applied to the node
fpInitialized chan struct{}
// registeredCh is closed when Node.Register has successfully run once.
registeredCh chan struct{}
registeredOnce sync.Once
// serversContactedCh is closed when GetClientAllocs and runAllocs have
// successfully run once.
serversContactedCh chan struct{}
serversContactedOnce sync.Once
// dynamicRegistry provides access to plugins that are dynamically registered
// with a nomad client. Currently only used for CSI.
dynamicRegistry dynamicplugins.Registry
// EnterpriseClient is used to set and check enterprise features for clients
EnterpriseClient *EnterpriseClient
// getter is an interface for retrieving artifacts.
getter cinterfaces.ArtifactGetter
// wranglers is used to keep track of processes and manage their interaction
// with drivers and stuff
wranglers *proclib.Wranglers
// topology represents the system memory / cpu topology detected via
// fingerprinting
topology *numalib.Topology
// partitions is used for managing cpuset partitioning on linux systems
partitions cgroupslib.Partition
// widsigner signs workload identities
widsigner widmgr.IdentitySigner
// users is a pool of dynamic workload users
users dynamic.Pool
}
var (
// noServersErr is returned by the RPC method when the client has no
// configured servers. This is used to trigger Consul discovery if
// enabled.
noServersErr = errors.New("no servers")
)
// NewClient is used to create a new client from the given configuration.
// `rpcs` is a map of RPC names to RPC structs that, if non-nil, will be
// registered via https://golang.org/pkg/net/rpc/#Server.RegisterName in place
// of the client's normal RPC handlers. This allows server tests to override
// the behavior of the client.
func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxiesFunc consulApiShim.SupportedProxiesAPIFunc, consulServices serviceregistration.Handler, rpcs map[string]interface{}) (*Client, error) {
// Create the tls wrapper
var tlsWrap tlsutil.RegionWrapper
if cfg.TLSConfig.EnableRPC {
tw, err := tlsutil.NewTLSConfiguration(cfg.TLSConfig, true, true)
if err != nil {
return nil, err
}
tlsWrap, err = tw.OutgoingTLSWrapper()
if err != nil {
return nil, err
}
}
if cfg.StateDBFactory == nil {
cfg.StateDBFactory = state.GetStateDBFactory(cfg.DevMode)
}
// Create the logger
logger := cfg.Logger.ResetNamedIntercept("client")
// Create the client
c := &Client{
config: cfg,
consulCatalog: consulCatalog,
consulProxiesFunc: consulProxiesFunc,
consulServices: consulServices,
start: time.Now(),
connPool: pool.NewPool(logger, clientRPCCache, clientMaxStreams, tlsWrap),
tlsWrap: tlsWrap,
streamingRpcs: structs.NewStreamingRpcRegistry(),
logger: logger,
rpcLogger: logger.Named("rpc"),
allocs: make(map[string]interfaces.AllocRunner),
pendingUpdates: newPendingClientUpdates(),
shutdownCh: make(chan struct{}),
triggerDiscoveryCh: make(chan struct{}),
triggerNodeUpdate: make(chan struct{}, 8),
triggerEmitNodeEvent: make(chan *structs.NodeEvent, 8),
fpInitialized: make(chan struct{}),
invalidAllocs: make(map[string]struct{}),
serversContactedCh: make(chan struct{}),
serversContactedOnce: sync.Once{},
registeredCh: make(chan struct{}),
registeredOnce: sync.Once{},
getter: getter.New(cfg.Artifact, logger),
EnterpriseClient: newEnterpriseClient(logger),
allocrunnerFactory: cfg.AllocRunnerFactory,
}
// we can't have this set in the default Config because of import cycles
if c.allocrunnerFactory == nil {
c.allocrunnerFactory = allocrunner.NewAllocRunner
}
c.batchNodeUpdates = newBatchNodeUpdates(
c.updateNodeFromDriver,
c.updateNodeFromDevices,
c.updateNodeFromCSI,
c.updateNodeFromHostVol,
)
// Initialize the server manager
c.servers = servers.New(c.logger, c.shutdownCh, c)
// Start server manager rebalancing go routine
go c.servers.Start()
// initialize the client
if err := c.init(); err != nil {
return nil, fmt.Errorf("failed to initialize client: %v", err)
}
// initialize the dynamic registry (needs to happen after init)
c.dynamicRegistry =
dynamicplugins.NewRegistry(c.stateDB, map[string]dynamicplugins.PluginDispenser{
dynamicplugins.PluginTypeCSIController: func(info *dynamicplugins.PluginInfo) (interface{}, error) {
return csi.NewClient(info.ConnectionInfo.SocketPath, logger.Named("csi_client").With("plugin.name", info.Name, "plugin.type", "controller")), nil
},
dynamicplugins.PluginTypeCSINode: func(info *dynamicplugins.PluginInfo) (interface{}, error) {
return csi.NewClient(info.ConnectionInfo.SocketPath, logger.Named("csi_client").With("plugin.name", info.Name, "plugin.type", "client")), nil
},
})
// Setup the clients RPC server
c.setupClientRpc(rpcs)
// Initialize the ACL state
c.clientACLResolver.init()
// Setup the node
if err := c.setupNode(); err != nil {
return nil, fmt.Errorf("node setup failed: %v", err)
}
// Add workload identity signer after node secret has been generated/loaded
c.widsigner = widmgr.NewSigner(widmgr.SignerConfig{
NodeSecret: c.secretNodeID(),
Region: cfg.Region,
RPC: c,
})
c.fingerprintManager = NewFingerprintManager(
cfg.PluginSingletonLoader,
c.GetConfig,
cfg.Node,
c.shutdownCh,
c.updateNodeFromFingerprint,
c.logger,
)
c.pluginManagers = pluginmanager.New(c.logger)
// Fingerprint the node and scan for drivers
if ir, err := c.fingerprintManager.Run(); err != nil {
return nil, fmt.Errorf("fingerprinting failed: %v", err)
} else {
c.topology = numalib.NoImpl(ir.Topology)
}
// Create the dynamic workload users pool
c.users = dynamic.New(&dynamic.PoolConfig{
MinUGID: cfg.Users.MinDynamicUser,
MaxUGID: cfg.Users.MaxDynamicUser,
})
// Create the cpu core partition manager
c.partitions = cgroupslib.GetPartition(c.logger.Named("partitions"),
c.topology.UsableCores(),
)
// Create the process wranglers
wranglers, err := proclib.New(&proclib.Configs{
UsableCores: c.topology.UsableCores(),
Logger: c.logger.Named("proclib"),
})
if err != nil {
return nil, fmt.Errorf("failed to initialize process manager: %w", err)
}
c.wranglers = wranglers
// Build the allow/denylists of drivers.
// COMPAT(1.0) uses inclusive language. white/blacklist are there for backward compatible reasons only.
allowlistDrivers := cfg.ReadStringListToMap("driver.allowlist", "driver.whitelist")
blocklistDrivers := cfg.ReadStringListToMap("driver.denylist", "driver.blacklist")
// Setup the csi manager
csiConfig := &csimanager.Config{
Logger: c.logger,
DynamicRegistry: c.dynamicRegistry,
UpdateNodeCSIInfoFunc: c.batchNodeUpdates.updateNodeFromCSI,
TriggerNodeEvent: c.triggerNodeEvent,
}
csiManager := csimanager.New(csiConfig)
c.csimanager = csiManager
c.pluginManagers.RegisterAndRun(csiManager.PluginManager())
// Setup the driver manager
driverConfig := &drivermanager.Config{
Logger: c.logger,
Loader: cfg.PluginSingletonLoader,
PluginConfig: cfg.NomadPluginConfig(c.topology),
Updater: c.batchNodeUpdates.updateNodeFromDriver,
EventHandlerFactory: c.GetTaskEventHandler,
State: c.stateDB,
AllowedDrivers: allowlistDrivers,
BlockedDrivers: blocklistDrivers,
}
drvManager := drivermanager.New(driverConfig)
c.drivermanager = drvManager
c.pluginManagers.RegisterAndRun(drvManager)
// Setup the device manager
devConfig := &devicemanager.Config{
Logger: c.logger,
Loader: cfg.PluginSingletonLoader,
PluginConfig: cfg.NomadPluginConfig(c.topology),
Updater: c.batchNodeUpdates.updateNodeFromDevices,
StatsInterval: cfg.StatsCollectionInterval,
State: c.stateDB,
}
devManager := devicemanager.New(devConfig)
c.devicemanager = devManager
c.pluginManagers.RegisterAndRun(devManager)
// set up dynamic host volume manager
c.hostVolumeManager = hvm.NewHostVolumeManager(logger, hvm.Config{
PluginDir: cfg.HostVolumePluginDir,
SharedMountDir: cfg.AllocMountsDir,
StateMgr: c.stateDB,
UpdateNodeVols: c.batchNodeUpdates.updateNodeFromHostVolume,
})
c.pluginManagers.RegisterAndRun(c.hostVolumeManager)
// Set up the service registration wrapper using the Consul and Nomad
// implementations. The Nomad implementation is only ever used on the
// client, so we do that here rather than within the agent.
c.setupNomadServiceRegistrationHandler()
c.serviceRegWrapper = wrapper.NewHandlerWrapper(c.logger, c.consulServices, c.nomadService)
// Batching of initial fingerprints is done to reduce the number of node
// updates sent to the server on startup.
go c.batchFirstFingerprints()
// create heartbeatStop. We go after the first attempt to connect to the server, so
// that our grace period for connection goes for the full time
c.heartbeatStop = newHeartbeatStop(c.getAllocRunner, batchFirstFingerprintsTimeout, logger, c.shutdownCh)
// Watch for disconnection, and heartbeatStopAllocs configured to have a maximum
// lifetime when out of touch with the server
go c.heartbeatStop.watch()
// Add the stats collector
statsCollector := hoststats.NewHostStatsCollector(c.logger, c.topology, c.GetConfig().AllocDir, c.devicemanager.AllStats)
c.hostStatsCollector = statsCollector
// Add the garbage collector
gcConfig := &GCConfig{
MaxAllocs: cfg.GCMaxAllocs,
DiskUsageThreshold: cfg.GCDiskUsageThreshold,
InodeUsageThreshold: cfg.GCInodeUsageThreshold,
Interval: cfg.GCInterval,
ParallelDestroys: cfg.GCParallelDestroys,
ReservedDiskMB: cfg.Node.Reserved.DiskMB,
}
c.garbageCollector = NewAllocGarbageCollector(c.logger, statsCollector, c, gcConfig)
go c.garbageCollector.Run()
// Set the preconfigured list of static servers
if len(cfg.Servers) > 0 {
if _, err := c.setServersImpl(cfg.Servers, true); err != nil {
logger.Warn("none of the configured servers are valid", "error", err)
}
}
// Setup Consul discovery if enabled
if cfg.GetDefaultConsul().ClientAutoJoin != nil && *cfg.GetDefaultConsul().ClientAutoJoin {
c.shutdownGroup.Go(c.consulDiscovery)
if c.servers.NumServers() == 0 {
// No configured servers; trigger discovery manually
c.triggerDiscoveryCh <- struct{}{}
}
}
if err := c.setupConsulTokenClient(); err != nil {
return nil, fmt.Errorf("failed to setup consul tokens client: %w", err)
}
// Setup the vault client for token and secret renewals
if err := c.setupVaultClients(); err != nil {
return nil, fmt.Errorf("failed to setup vault client: %v", err)
}
// wait until drivers are healthy before restoring or registering with servers
select {
case <-c.fpInitialized:
case <-time.After(batchFirstFingerprintsProcessingGrace):
logger.Warn("batch fingerprint operation timed out; proceeding to register with fingerprinted plugins so far")
}
// Register and then start heartbeating to the servers.
c.shutdownGroup.Go(c.registerAndHeartbeat)
// Restore the state
if err := c.restoreState(); err != nil {
logger.Error("failed to restore state", "error", err)
logger.Error("Nomad is unable to start due to corrupt state. "+
"The safest way to proceed is to manually stop running task processes "+
"and remove Nomad's state and alloc directories before "+
"restarting. Lost allocations will be rescheduled.",
"state_dir", cfg.StateDir, "alloc_dir", cfg.AllocDir)
logger.Error("Corrupt state is often caused by a bug. Please " +
"report as much information as possible to " +
"https://github.com/hashicorp/nomad/issues")
return nil, fmt.Errorf("failed to restore state")
}
// Begin periodic snapshotting of state.
c.shutdownGroup.Go(c.periodicSnapshot)
// Begin syncing allocations to the server
c.shutdownGroup.Go(c.allocSync)
// Ensure our base labels are generated and stored before we start the
// client and begin emitting stats.
c.setupStatsLabels()
// Start the client! Don't use the shutdownGroup as run handles
// shutdowns manually to prevent updates from being applied during
// shutdown.
go c.run()
// Start collecting stats
c.shutdownGroup.Go(c.emitStats)
c.logger.Info("started client", "node_id", c.NodeID())
return c, nil
}
// Ready returns a chan that is closed when the client is fully initialized
func (c *Client) Ready() <-chan struct{} {
return c.serversContactedCh
}
// init is used to initialize the client and perform any setup
// needed before we begin starting its various components.
func (c *Client) init() error {
// Ensure the state dir exists if we have one
conf := c.GetConfig()
if conf.StateDir != "" {
if err := os.MkdirAll(conf.StateDir, 0700); err != nil {
return fmt.Errorf("failed creating state dir: %s", err)
}
} else {
// Otherwise make a temp directory to use.
p, err := os.MkdirTemp("", "NomadClient")
if err != nil {
return fmt.Errorf("failed creating temporary directory for the StateDir: %v", err)
}
p, err = filepath.EvalSymlinks(p)
if err != nil {
return fmt.Errorf("failed to find temporary directory for the StateDir: %v", err)
}
conf = c.UpdateConfig(func(c *config.Config) {
c.StateDir = p
})
}
c.logger.Info("using state directory", "state_dir", conf.StateDir)
// Open the state database
db, err := conf.StateDBFactory(c.logger, conf.StateDir)
if err != nil {
return fmt.Errorf("failed to open state database: %v", err)
}
// Upgrade the state database
if err := db.Upgrade(); err != nil {
// Upgrade only returns an error on critical persistence
// failures in which an operator should intervene before the
// node is accessible. Upgrade drops and logs corrupt state it
// encounters, so failing to start the agent should be extremely
// rare.
return fmt.Errorf("failed to upgrade state database: %v", err)
}
c.stateDB = db
// Ensure the alloc mounts dir exists if we are configured with a custom path.
if conf.AllocMountsDir != "" {
if err := os.MkdirAll(conf.AllocMountsDir, 0o711); err != nil {
return fmt.Errorf("failed creating alloc mounts dir: %w", err)
}
}
// Ensure the alloc dir exists if we are configured with a custom path.
if conf.AllocDir != "" {
if err := os.MkdirAll(conf.AllocDir, 0o711); err != nil {
return fmt.Errorf("failed creating alloc dir: %w", err)
}
} else {
// Otherwise make a temp directory to use.
p, err := os.MkdirTemp("", "NomadClient")
if err != nil {
return fmt.Errorf("failed creating temporary directory for the AllocDir: %v", err)
}
p, err = filepath.EvalSymlinks(p)
if err != nil {
return fmt.Errorf("failed to find temporary directory for the AllocDir: %v", err)
}
// Change the permissions to have the execute bit
if err := os.Chmod(p, 0o711); err != nil {
return fmt.Errorf("failed to change directory permissions for the AllocDir: %v", err)
}
conf = c.UpdateConfig(func(c *config.Config) {
c.AllocDir = p
c.AllocMountsDir = p
})
}
c.logger.Info("using alloc directory", "alloc_dir", conf.AllocDir)
reserved := "<none>"
if conf.Node != nil && conf.Node.ReservedResources != nil {
// Node should always be non-nil due to initialization in the
// agent package, but don't risk a panic just for a long line.
reserved = conf.Node.ReservedResources.Networks.ReservedHostPorts
}
c.logger.Info("using dynamic ports",
"min", conf.MinDynamicPort,
"max", conf.MaxDynamicPort,
"reserved", reserved,
)
// setup the nsd check store
c.checkStore = checkstore.NewStore(c.logger, c.stateDB)
return nil
}
// reloadTLSConnections allows a client to reload its TLS configuration on the fly
func (c *Client) reloadTLSConnections(newConfig *nconfig.TLSConfig) error {
var tlsWrap tlsutil.RegionWrapper
if newConfig != nil && newConfig.EnableRPC {
tw, err := tlsutil.NewTLSConfiguration(newConfig, true, true)
if err != nil {
return err
}
twWrap, err := tw.OutgoingTLSWrapper()
if err != nil {
return err
}
tlsWrap = twWrap
}
// Store the new tls wrapper.
c.tlsWrapLock.Lock()
c.tlsWrap = tlsWrap
c.tlsWrapLock.Unlock()
// Keep the client configuration up to date as we use configuration values to
// decide on what type of connections to accept
c.UpdateConfig(func(c *config.Config) {
c.TLSConfig = newConfig
})
c.connPool.ReloadTLS(tlsWrap)
return nil
}
// Reload allows a client to reload parts of its configuration on the fly
func (c *Client) Reload(newConfig *config.Config) error {
existing := c.GetConfig()
shouldReloadTLS, err := tlsutil.ShouldReloadRPCConnections(existing.TLSConfig, newConfig.TLSConfig)
if err != nil {
c.logger.Error("error parsing TLS configuration", "error", err)
return err
}
if shouldReloadTLS {
if err := c.reloadTLSConnections(newConfig.TLSConfig); err != nil {
return err
}
}
c.fingerprintManager.Reload()
return nil
}
// Leave is used to prepare the client to leave the cluster
func (c *Client) Leave() error {
if c.GetConfig().DevMode {
return nil
}
// In normal mode optionally drain the node
return c.DrainSelf()
}
// GetConfig returns the config of the client. Do *not* mutate without first
// calling Copy().
func (c *Client) GetConfig() *config.Config {
c.configLock.Lock()
defer c.configLock.Unlock()
return c.config
}
// UpdateConfig allows mutating the configuration. The updated configuration is
// returned.
func (c *Client) UpdateConfig(cb func(*config.Config)) *config.Config {
c.configLock.Lock()
defer c.configLock.Unlock()
// Create a copy of the active config
newConfig := c.config.Copy()
// Pass the copy to the supplied callback for mutation
cb(newConfig)
// Set new config struct
c.config = newConfig
return newConfig
}
// UpdateNode allows mutating just the Node portion of the client
// configuration. The updated Node is returned.
//
// This is similar to UpdateConfig but avoids deep copying the entier Config
// struct when only the Node is updated.
func (c *Client) UpdateNode(cb func(*structs.Node)) *structs.Node {
c.configLock.Lock()
defer c.configLock.Unlock()
// Create a new copy of Node for updating
newNode := c.config.Node.Copy()
// newNode is now a fresh unshared copy, mutate away!
cb(newNode)
// Shallow copy config before mutating Node pointer which might have
// concurrent readers
newConfig := *c.config
newConfig.Node = newNode
c.config = &newConfig
return newNode
}
// Datacenter returns the datacenter for the given client
func (c *Client) Datacenter() string {
return c.GetConfig().Node.Datacenter
}
// Region returns the region for the given client
func (c *Client) Region() string {
return c.GetConfig().Region
}
// NodeID returns the node ID for the given client
func (c *Client) NodeID() string {
return c.GetConfig().Node.ID
}
// secretNodeID returns the secret node ID for the given client
func (c *Client) secretNodeID() string {
return c.GetConfig().Node.SecretID
}
// Shutdown is used to tear down the client
func (c *Client) Shutdown() error {
c.shutdownLock.Lock()
defer c.shutdownLock.Unlock()
if c.shutdown {
c.logger.Info("already shutdown")
return nil
}
c.logger.Info("shutting down")
// Stop renewing tokens and secrets
for _, vaultClient := range c.vaultClients {
vaultClient.Stop()
}
// Stop Garbage collector
c.garbageCollector.Stop()
arGroup := group.Group{}
if c.GetConfig().DevMode {
// In DevMode destroy all the running allocations.
for _, ar := range c.getAllocRunners() {
ar.Destroy()
arGroup.AddCh(ar.DestroyCh())
}
} else {
// In normal mode call shutdown
for _, ar := range c.getAllocRunners() {
ar.Shutdown()
arGroup.AddCh(ar.ShutdownCh())
}
}
arGroup.Wait()
// Assert the implementation, so we can trigger the shutdown call. This is
// the only place this occurs, so it's OK to store the interface rather
// than the implementation.
if h, ok := c.nomadService.(*nsd.ServiceRegistrationHandler); ok {
h.Shutdown()
}
// Shutdown the plugin managers
c.pluginManagers.Shutdown()
c.shutdown = true
close(c.shutdownCh)
// Must close connection pool to unblock alloc watcher
c.connPool.Shutdown()
// Wait for goroutines to stop
c.shutdownGroup.Wait()
// One final save state
c.saveState()
return c.stateDB.Close()
}
// Stats is used to return statistics for debugging and insight
// for various sub-systems
func (c *Client) Stats() map[string]map[string]string {
c.heartbeatLock.Lock()
defer c.heartbeatLock.Unlock()
stats := map[string]map[string]string{
"client": {
"node_id": c.NodeID(),
"known_servers": strings.Join(c.GetServers(), ","),
"num_allocations": strconv.Itoa(c.NumAllocs()),
"last_heartbeat": fmt.Sprintf("%v", time.Since(c.lastHeartbeat())),
"heartbeat_ttl": fmt.Sprintf("%v", c.heartbeatTTL),
},
"runtime": goruntime.RuntimeStats(),
}
return stats
}
// GetAlloc returns an allocation or an error.
func (c *Client) GetAlloc(allocID string) (*structs.Allocation, error) {
ar, err := c.getAllocRunner(allocID)
if err != nil {
return nil, err
}
return ar.Alloc(), nil
}
// SignalAllocation sends a signal to the tasks within an allocation.
// If the provided task is empty, then every allocation will be signalled.
// If a task is provided, then only an exactly matching task will be signalled.
func (c *Client) SignalAllocation(allocID, task, signal string) error {
ar, err := c.getAllocRunner(allocID)
if err != nil {
return err
}
return ar.Signal(task, signal)
}
// PauseAllocation sets the pause state of the given task for the allocation.
func (c *Client) PauseAllocation(allocID, task string, scheduleState structs.TaskScheduleState) error {
ar, err := c.getAllocRunner(allocID)
if err != nil {
return err
}
return ar.SetTaskPauseState(task, scheduleState)
}
// GetPauseAllocation gets the pause state of the given task for the allocation.
func (c *Client) GetPauseAllocation(allocID, task string) (structs.TaskScheduleState, error) {