diff --git a/.changelog/24601.txt b/.changelog/24601.txt new file mode 100644 index 00000000000..e8b8807f300 --- /dev/null +++ b/.changelog/24601.txt @@ -0,0 +1,3 @@ +```release-note:breaking-change +sentinel: The sentinel apply command now requires the -scope option +``` diff --git a/acl/acl_test.go b/acl/acl_test.go index cf0c4bda3f4..24ccf2b4103 100644 --- a/acl/acl_test.go +++ b/acl/acl_test.go @@ -79,10 +79,12 @@ func TestACLManagement(t *testing.T) { // Check default namespace rights must.True(t, acl.AllowNamespaceOperation("default", NamespaceCapabilityListJobs)) must.True(t, acl.AllowNamespaceOperation("default", NamespaceCapabilitySubmitJob)) + must.True(t, acl.AllowNamespaceOperation("default", NamespaceCapabilityHostVolumeCreate)) must.True(t, acl.AllowNamespace("default")) // Check non-specified namespace must.True(t, acl.AllowNamespaceOperation("foo", NamespaceCapabilityListJobs)) + must.True(t, acl.AllowNamespaceOperation("foo", NamespaceCapabilityHostVolumeCreate)) must.True(t, acl.AllowNamespace("foo")) // Check node pool rights. @@ -155,9 +157,11 @@ func TestACLMerge(t *testing.T) { // Check default namespace rights must.True(t, acl.AllowNamespaceOperation("default", NamespaceCapabilityListJobs)) must.False(t, acl.AllowNamespaceOperation("default", NamespaceCapabilitySubmitJob)) + must.False(t, acl.AllowNamespaceOperation("default", NamespaceCapabilityHostVolumeRegister)) // Check non-specified namespace must.False(t, acl.AllowNamespaceOperation("foo", NamespaceCapabilityListJobs)) + must.False(t, acl.AllowNamespaceOperation("foo", NamespaceCapabilityHostVolumeCreate)) // Check rights in the node pool specified in policies. must.True(t, acl.AllowNodePoolOperation("my-pool", NodePoolCapabilityRead)) diff --git a/acl/policy.go b/acl/policy.go index c4fe9e4d673..17a7aed2170 100644 --- a/acl/policy.go +++ b/acl/policy.go @@ -47,6 +47,11 @@ const ( NamespaceCapabilityCSIReadVolume = "csi-read-volume" NamespaceCapabilityCSIListVolume = "csi-list-volume" NamespaceCapabilityCSIMountVolume = "csi-mount-volume" + NamespaceCapabilityHostVolumeCreate = "host-volume-create" + NamespaceCapabilityHostVolumeRegister = "host-volume-register" + NamespaceCapabilityHostVolumeRead = "host-volume-read" + NamespaceCapabilityHostVolumeWrite = "host-volume-write" + NamespaceCapabilityHostVolumeDelete = "host-volume-delete" NamespaceCapabilityListScalingPolicies = "list-scaling-policies" NamespaceCapabilityReadScalingPolicy = "read-scaling-policy" NamespaceCapabilityReadJobScaling = "read-job-scaling" @@ -207,7 +212,7 @@ func isNamespaceCapabilityValid(cap string) bool { NamespaceCapabilityReadFS, NamespaceCapabilityAllocLifecycle, NamespaceCapabilityAllocExec, NamespaceCapabilityAllocNodeExec, NamespaceCapabilityCSIReadVolume, NamespaceCapabilityCSIWriteVolume, NamespaceCapabilityCSIListVolume, NamespaceCapabilityCSIMountVolume, NamespaceCapabilityCSIRegisterPlugin, - NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, NamespaceCapabilityReadJobScaling, NamespaceCapabilityScaleJob: + NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, NamespaceCapabilityReadJobScaling, NamespaceCapabilityScaleJob, NamespaceCapabilityHostVolumeCreate, NamespaceCapabilityHostVolumeRegister, NamespaceCapabilityHostVolumeWrite, NamespaceCapabilityHostVolumeRead: return true // Separate the enterprise-only capabilities case NamespaceCapabilitySentinelOverride, NamespaceCapabilitySubmitRecommendation: @@ -241,6 +246,7 @@ func expandNamespacePolicy(policy string) []string { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, } write := make([]string, len(read)) @@ -257,6 +263,7 @@ func expandNamespacePolicy(policy string) []string { NamespaceCapabilityCSIMountVolume, NamespaceCapabilityCSIWriteVolume, NamespaceCapabilitySubmitRecommendation, + NamespaceCapabilityHostVolumeCreate, }...) switch policy { @@ -278,6 +285,32 @@ func expandNamespacePolicy(policy string) []string { } } +// expandNamespaceCapabilities adds extra capabilities implied by fine-grained +// capabilities. +func expandNamespaceCapabilities(ns *NamespacePolicy) { + extraCaps := []string{} + for _, cap := range ns.Capabilities { + switch cap { + case NamespaceCapabilityHostVolumeWrite: + extraCaps = append(extraCaps, + NamespaceCapabilityHostVolumeRegister, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeDelete, + NamespaceCapabilityHostVolumeRead) + case NamespaceCapabilityHostVolumeRegister: + extraCaps = append(extraCaps, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeRead) + case NamespaceCapabilityHostVolumeCreate: + extraCaps = append(extraCaps, NamespaceCapabilityHostVolumeRead) + } + } + + // These may end up being duplicated, but they'll get deduplicated in NewACL + // when inserted into the radix tree. + ns.Capabilities = append(ns.Capabilities, extraCaps...) +} + func isNodePoolCapabilityValid(cap string) bool { switch cap { case NodePoolCapabilityDelete, NodePoolCapabilityRead, NodePoolCapabilityWrite, @@ -388,6 +421,9 @@ func Parse(rules string) (*Policy, error) { ns.Capabilities = append(ns.Capabilities, extraCap...) } + // Expand implicit capabilities + expandNamespaceCapabilities(ns) + if ns.Variables != nil { if len(ns.Variables.Paths) == 0 { return nil, fmt.Errorf("Invalid variable policy: no variable paths in namespace %s", ns.Name) diff --git a/acl/policy_test.go b/acl/policy_test.go index 117b82ba3d6..938557aa08a 100644 --- a/acl/policy_test.go +++ b/acl/policy_test.go @@ -5,7 +5,6 @@ package acl import ( "fmt" - "strings" "testing" "github.com/hashicorp/nomad/ci" @@ -17,9 +16,9 @@ func TestParse(t *testing.T) { ci.Parallel(t) type tcase struct { - Raw string - ErrStr string - Expect *Policy + Raw string + ExpectErr string + Expect *Policy } tcases := []tcase{ { @@ -43,6 +42,7 @@ func TestParse(t *testing.T) { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, }, }, }, @@ -118,6 +118,7 @@ func TestParse(t *testing.T) { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, }, }, { @@ -132,6 +133,7 @@ func TestParse(t *testing.T) { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, NamespaceCapabilityScaleJob, NamespaceCapabilitySubmitJob, NamespaceCapabilityDispatchJob, @@ -142,6 +144,8 @@ func TestParse(t *testing.T) { NamespaceCapabilityCSIMountVolume, NamespaceCapabilityCSIWriteVolume, NamespaceCapabilitySubmitRecommendation, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeRead, }, }, { @@ -338,6 +342,7 @@ func TestParse(t *testing.T) { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, }, }, { @@ -352,6 +357,7 @@ func TestParse(t *testing.T) { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, NamespaceCapabilityScaleJob, NamespaceCapabilitySubmitJob, NamespaceCapabilityDispatchJob, @@ -362,6 +368,8 @@ func TestParse(t *testing.T) { NamespaceCapabilityCSIMountVolume, NamespaceCapabilityCSIWriteVolume, NamespaceCapabilitySubmitRecommendation, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeRead, }, }, { @@ -638,6 +646,54 @@ func TestParse(t *testing.T) { }, }, }, + { + ` + namespace "default" { + capabilities = ["host-volume-register"] + } + + namespace "other" { + capabilities = ["host-volume-create"] + } + + namespace "foo" { + capabilities = ["host-volume-write"] + } + `, + "", + &Policy{ + Namespaces: []*NamespacePolicy{ + { + Name: "default", + Policy: "", + Capabilities: []string{ + NamespaceCapabilityHostVolumeRegister, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeRead, + }, + }, + { + Name: "other", + Policy: "", + Capabilities: []string{ + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeRead, + }, + }, + { + Name: "foo", + Policy: "", + Capabilities: []string{ + NamespaceCapabilityHostVolumeWrite, + NamespaceCapabilityHostVolumeRegister, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeDelete, + NamespaceCapabilityHostVolumeRead, + }, + }, + }, + }, + }, { ` node_pool "pool-read-only" { @@ -878,22 +934,18 @@ func TestParse(t *testing.T) { } for idx, tc := range tcases { - t.Run(fmt.Sprintf("%d", idx), func(t *testing.T) { + t.Run(fmt.Sprintf("%02d", idx), func(t *testing.T) { p, err := Parse(tc.Raw) - if err != nil { - if tc.ErrStr == "" { - t.Fatalf("Unexpected err: %v", err) - } - if !strings.Contains(err.Error(), tc.ErrStr) { - t.Fatalf("Unexpected err: %v", err) - } - return + if tc.ExpectErr == "" { + must.NoError(t, err) + } else { + must.ErrorContains(t, err, tc.ExpectErr) } - if err == nil && tc.ErrStr != "" { - t.Fatalf("Missing expected err") + + if tc.Expect != nil { + tc.Expect.Raw = tc.Raw + must.Eq(t, tc.Expect, p) } - tc.Expect.Raw = tc.Raw - assert.EqualValues(t, tc.Expect, p) }) } } diff --git a/api/allocations.go b/api/allocations.go index b35e338c559..bf8059d32c2 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -278,6 +278,8 @@ type Allocation struct { Resources *Resources TaskResources map[string]*Resources AllocatedResources *AllocatedResources + HostVolumeIDs []string + CSIVolumeIDs []string Services map[string]string Metrics *AllocationMetric DesiredStatus string diff --git a/api/contexts/contexts.go b/api/contexts/contexts.go index 5176f5b8290..20f099a38e7 100644 --- a/api/contexts/contexts.go +++ b/api/contexts/contexts.go @@ -23,6 +23,7 @@ const ( Plugins Context = "plugins" Variables Context = "vars" Volumes Context = "volumes" + HostVolumes Context = "host_volumes" // These Context types are used to associate a search result from a lower // level Nomad object with one of the higher level Context types above. diff --git a/api/host_volumes.go b/api/host_volumes.go new file mode 100644 index 00000000000..661ac77c183 --- /dev/null +++ b/api/host_volumes.go @@ -0,0 +1,248 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package api + +import "net/url" + +// HostVolume represents a Dynamic Host Volume: a volume associated with a +// specific Nomad client agent but created via API. +type HostVolume struct { + // Namespace is the Nomad namespace for the host volume, which constrains + // which jobs can mount it. + Namespace string `mapstructure:"namespace" hcl:"namespace"` + + // ID is a UUID-like string generated by the server. + ID string `mapstructure:"id" hcl:"id"` + + // Name is the name that group.volume will use to identify the volume + // source. Not expected to be unique. + Name string `mapstructure:"name" hcl:"name"` + + // PluginID is the name of the host volume plugin on the client that will be + // used for creating the volume. If omitted, the client will use its default + // built-in plugin. + PluginID string `mapstructure:"plugin_id" hcl:"plugin_id"` + + // NodePool is the node pool of the node where the volume is placed. If the + // user doesn't provide a node ID, a node will be selected using the + // NodePool and Constraints. If the user provides both NodePool and NodeID, + // NodePool will be used to validate the request. If omitted, the server + // will populate this value in before writing the volume to Raft. + NodePool string `mapstructure:"node_pool" hcl:"node_pool"` + + // NodeID is the node where the volume is placed. If the user doesn't + // provide a NodeID, one will be selected using the NodePool and + // Constraints. If omitted, this field will then be populated by the server + // before writing the volume to Raft. + NodeID string `mapstructure:"node_id" hcl:"node_id"` + + // Constraints are optional. If the NodeID is not provided, the NodePool and + // Constraints are used to select a node. If the NodeID is provided, + // Constraints are used to validate that the node meets those constraints at + // the time of volume creation. + Constraints []*Constraint `json:",omitempty" hcl:"constraint"` + + // Because storage may allow only specific intervals of size, we accept a + // min and max and return the actual capacity when the volume is created or + // updated on the client + RequestedCapacityMinBytes int64 `mapstructure:"capacity_min" hcl:"capacity_min"` + RequestedCapacityMaxBytes int64 `mapstructure:"capacity_max" hcl:"capacity_max"` + CapacityBytes int64 + + // RequestedCapabilities defines the options available to group.volume + // blocks. The scheduler checks against the listed capability blocks and + // selects a node for placement if *any* capability block works. + RequestedCapabilities []*HostVolumeCapability `hcl:"capability"` + + // Parameters are an opaque map of parameters for the host volume plugin. + Parameters map[string]string `json:",omitempty"` + + // HostPath is the path on disk where the volume's mount point was + // created. We record this to make debugging easier. + HostPath string `mapstructure:"host_path" hcl:"host_path"` + + // State represents the overall state of the volume. One of pending, ready, + // deleted. + State HostVolumeState + + CreateIndex uint64 + CreateTime int64 + + ModifyIndex uint64 + ModifyTime int64 + + // Allocations is the list of non-client-terminal allocations with claims on + // this host volume. They are denormalized on read and this field will be + // never written to Raft + Allocations []*AllocationListStub `json:",omitempty" mapstructure:"-" hcl:"-"` +} + +// HostVolume state reports the current status of the host volume +type HostVolumeState string + +const ( + HostVolumeStatePending HostVolumeState = "pending" + HostVolumeStateReady HostVolumeState = "ready" + HostVolumeStateDeleted HostVolumeState = "deleted" +) + +// HostVolumeCapability is the requested attachment and access mode for a volume +type HostVolumeCapability struct { + AttachmentMode HostVolumeAttachmentMode `mapstructure:"attachment_mode" hcl:"attachment_mode"` + AccessMode HostVolumeAccessMode `mapstructure:"access_mode" hcl:"access_mode"` +} + +// HostVolumeAttachmentMode chooses the type of storage API that will be used to +// interact with the device. +type HostVolumeAttachmentMode string + +const ( + HostVolumeAttachmentModeUnknown HostVolumeAttachmentMode = "" + HostVolumeAttachmentModeBlockDevice HostVolumeAttachmentMode = "block-device" + HostVolumeAttachmentModeFilesystem HostVolumeAttachmentMode = "file-system" +) + +// HostVolumeAccessMode indicates how Nomad should make the volume available to +// concurrent allocations. +type HostVolumeAccessMode string + +const ( + HostVolumeAccessModeUnknown HostVolumeAccessMode = "" + + HostVolumeAccessModeSingleNodeReader HostVolumeAccessMode = "single-node-reader-only" + HostVolumeAccessModeSingleNodeWriter HostVolumeAccessMode = "single-node-writer" + + HostVolumeAccessModeMultiNodeReader HostVolumeAccessMode = "multi-node-reader-only" + HostVolumeAccessModeMultiNodeSingleWriter HostVolumeAccessMode = "multi-node-single-writer" + HostVolumeAccessModeMultiNodeMultiWriter HostVolumeAccessMode = "multi-node-multi-writer" +) + +// HostVolumeStub is used for responses for the List Volumes endpoint +type HostVolumeStub struct { + Namespace string + ID string + Name string + PluginID string + NodePool string + NodeID string + CapacityBytes int64 + State HostVolumeState + + CreateIndex uint64 + CreateTime int64 + + ModifyIndex uint64 + ModifyTime int64 +} + +// HostVolumes is used to access the host volumes API. +type HostVolumes struct { + client *Client +} + +// HostVolumes returns a new handle on the host volumes API. +func (c *Client) HostVolumes() *HostVolumes { + return &HostVolumes{client: c} +} + +type HostVolumeCreateRequest struct { + Volume *HostVolume + + // PolicyOverride overrides Sentinel soft-mandatory policy enforcement + PolicyOverride bool +} + +type HostVolumeRegisterRequest struct { + Volume *HostVolume + + // PolicyOverride overrides Sentinel soft-mandatory policy enforcement + PolicyOverride bool +} + +type HostVolumeCreateResponse struct { + Volume *HostVolume + Warnings string +} + +type HostVolumeRegisterResponse struct { + Volume *HostVolume + Warnings string +} + +type HostVolumeListRequest struct { + NodeID string + NodePool string +} + +type HostVolumeDeleteRequest struct { + ID string +} + +// Create forwards to client agents so a host volume can be created on those +// hosts, and registers the volume with Nomad servers. +func (hv *HostVolumes) Create(req *HostVolumeCreateRequest, opts *WriteOptions) (*HostVolumeCreateResponse, *WriteMeta, error) { + var out *HostVolumeCreateResponse + wm, err := hv.client.put("/v1/volume/host/create", req, &out, opts) + if err != nil { + return nil, wm, err + } + return out, wm, nil +} + +// Register registers a host volume that was created out-of-band with the Nomad +// servers. +func (hv *HostVolumes) Register(req *HostVolumeRegisterRequest, opts *WriteOptions) (*HostVolumeRegisterResponse, *WriteMeta, error) { + var out *HostVolumeRegisterResponse + wm, err := hv.client.put("/v1/volume/host/register", req, &out, opts) + if err != nil { + return nil, wm, err + } + return out, wm, nil +} + +// Get queries for a single host volume, by ID +func (hv *HostVolumes) Get(id string, opts *QueryOptions) (*HostVolume, *QueryMeta, error) { + var out *HostVolume + path, err := url.JoinPath("/v1/volume/host/", url.PathEscape(id)) + if err != nil { + return nil, nil, err + } + qm, err := hv.client.query(path, &out, opts) + if err != nil { + return nil, qm, err + } + return out, qm, nil +} + +// List queries for a set of host volumes, by namespace, node, node pool, or +// name prefix. +func (hv *HostVolumes) List(req *HostVolumeListRequest, opts *QueryOptions) ([]*HostVolumeStub, *QueryMeta, error) { + var out []*HostVolumeStub + qv := url.Values{} + qv.Set("type", "host") + if req != nil { + if req.NodeID != "" { + qv.Set("node_id", req.NodeID) + } + if req.NodePool != "" { + qv.Set("node_pool", req.NodePool) + } + } + + qm, err := hv.client.query("/v1/volumes?"+qv.Encode(), &out, opts) + if err != nil { + return nil, qm, err + } + return out, qm, nil +} + +// Delete deletes a host volume +func (hv *HostVolumes) Delete(req *HostVolumeDeleteRequest, opts *WriteOptions) (*WriteMeta, error) { + path, err := url.JoinPath("/v1/volume/host/", url.PathEscape(req.ID)) + if err != nil { + return nil, err + } + wm, err := hv.client.delete(path, nil, nil, opts) + return wm, err +} diff --git a/api/nodes.go b/api/nodes.go index 809382bf79b..1d4cf4e65d9 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -517,6 +517,8 @@ type DriverInfo struct { type HostVolumeInfo struct { Path string ReadOnly bool + // ID is set for dynamic host volumes only. + ID string } // HostNetworkInfo is used to return metadata about a given HostNetwork diff --git a/api/sentinel.go b/api/sentinel.go index e8a0644ae16..1e93308847d 100644 --- a/api/sentinel.go +++ b/api/sentinel.go @@ -82,3 +82,9 @@ type SentinelPolicyListStub struct { CreateIndex uint64 ModifyIndex uint64 } + +// Possible Sentinel scopes +const ( + SentinelScopeSubmitJob = "submit-job" + SentinelScopeSubmitHostVolume = "submit-host-volume" +) diff --git a/api/tasks.go b/api/tasks.go index 21d99bf4c2c..b39c55ad56b 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -455,6 +455,7 @@ type VolumeRequest struct { Type string `hcl:"type,optional"` Source string `hcl:"source,optional"` ReadOnly bool `hcl:"read_only,optional"` + Sticky bool `hcl:"sticky,optional"` AccessMode string `hcl:"access_mode,optional"` AttachmentMode string `hcl:"attachment_mode,optional"` MountOptions *CSIMountOptions `hcl:"mount_options,block"` diff --git a/ci/test-core.json b/ci/test-core.json index 95f354fbe99..5ec461809ed 100644 --- a/ci/test-core.json +++ b/ci/test-core.json @@ -17,6 +17,7 @@ "client/dynamicplugins/...", "client/fingerprint/...", "client/hoststats/...", + "client/hostvolumemanager/...", "client/interfaces/...", "client/lib/...", "client/logmon/...", diff --git a/client/client.go b/client/client.go index fa539f51ed2..40453f1ab77 100644 --- a/client/client.go +++ b/client/client.go @@ -34,6 +34,7 @@ import ( "github.com/hashicorp/nomad/client/dynamicplugins" "github.com/hashicorp/nomad/client/fingerprint" "github.com/hashicorp/nomad/client/hoststats" + hvm "github.com/hashicorp/nomad/client/hostvolumemanager" cinterfaces "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/client/lib/cgroupslib" "github.com/hashicorp/nomad/client/lib/numalib" @@ -289,6 +290,8 @@ type Client struct { // drivermanager is responsible for managing driver plugins drivermanager drivermanager.Manager + hostVolumeManager *hvm.HostVolumeManager + // baseLabels are used when emitting tagged metrics. All client metrics will // have these tags, and optionally more. baseLabels []metrics.Label @@ -408,6 +411,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie c.updateNodeFromDriver, c.updateNodeFromDevices, c.updateNodeFromCSI, + c.updateNodeFromHostVol, ) // Initialize the server manager @@ -532,6 +536,15 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie c.devicemanager = devManager c.pluginManagers.RegisterAndRun(devManager) + // set up dynamic host volume manager + c.hostVolumeManager = hvm.NewHostVolumeManager(logger, hvm.Config{ + PluginDir: cfg.HostVolumePluginDir, + SharedMountDir: cfg.AllocMountsDir, + StateMgr: c.stateDB, + UpdateNodeVols: c.batchNodeUpdates.updateNodeFromHostVolume, + }) + c.pluginManagers.RegisterAndRun(c.hostVolumeManager) + // Set up the service registration wrapper using the Consul and Nomad // implementations. The Nomad implementation is only ever used on the // client, so we do that here rather than within the agent. diff --git a/client/config/config.go b/client/config/config.go index c6b18bcd08f..1fe177432e8 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -314,6 +314,9 @@ type Config struct { // HostVolumes is a map of the configured host volumes by name. HostVolumes map[string]*structs.ClientHostVolumeConfig + // HostVolumePluginDir is the directory with dynamic host volume plugins. + HostVolumePluginDir string + // HostNetworks is a map of the conigured host networks by name. HostNetworks map[string]*structs.ClientHostNetworkConfig diff --git a/client/fingerprint/dynamic_host_volumes.go b/client/fingerprint/dynamic_host_volumes.go new file mode 100644 index 00000000000..73cdd4fe4a2 --- /dev/null +++ b/client/fingerprint/dynamic_host_volumes.go @@ -0,0 +1,120 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package fingerprint + +import ( + "context" + "os" + "strings" + "sync" + "time" + + "github.com/hashicorp/go-hclog" + hvm "github.com/hashicorp/nomad/client/hostvolumemanager" + "github.com/hashicorp/nomad/helper" +) + +func NewPluginsHostVolumeFingerprint(logger hclog.Logger) Fingerprint { + return &DynamicHostVolumePluginFingerprint{ + logger: logger.Named("host_volume_plugins"), + } +} + +var _ ReloadableFingerprint = &DynamicHostVolumePluginFingerprint{} + +type DynamicHostVolumePluginFingerprint struct { + logger hclog.Logger +} + +func (h *DynamicHostVolumePluginFingerprint) Reload() { + // host volume plugins are re-detected on agent reload +} + +func (h *DynamicHostVolumePluginFingerprint) Fingerprint(request *FingerprintRequest, response *FingerprintResponse) error { + // always add "mkdir" plugin + h.logger.Debug("detected plugin built-in", + "plugin_id", hvm.HostVolumePluginMkdirID, "version", hvm.HostVolumePluginMkdirVersion) + defer response.AddAttribute("plugins.host_volume."+hvm.HostVolumePluginMkdirID+".version", hvm.HostVolumePluginMkdirVersion) + response.Detected = true + + // this config value will be empty in -dev mode + pluginDir := request.Config.HostVolumePluginDir + if pluginDir == "" { + return nil + } + + plugins, err := GetHostVolumePluginVersions(h.logger, pluginDir) + if err != nil { + if os.IsNotExist(err) { + h.logger.Debug("plugin dir does not exist", "dir", pluginDir) + } else { + h.logger.Warn("error finding plugins", "dir", pluginDir, "error", err) + } + return nil // don't halt agent start + } + + // if this was a reload, wipe what was there before + for k := range request.Node.Attributes { + if strings.HasPrefix(k, "plugins.host_volume.") { + response.RemoveAttribute(k) + } + } + + // set the attribute(s) + for plugin, version := range plugins { + h.logger.Debug("detected plugin", "plugin_id", plugin, "version", version) + response.AddAttribute("plugins.host_volume."+plugin+".version", version) + } + + return nil +} + +func (h *DynamicHostVolumePluginFingerprint) Periodic() (bool, time.Duration) { + return false, 0 +} + +// GetHostVolumePluginVersions finds all the executable files on disk +// that respond to a Version call (arg $1 = 'version' / env $OPERATION = 'version') +// The return map's keys are plugin IDs, and the values are version strings. +func GetHostVolumePluginVersions(log hclog.Logger, pluginDir string) (map[string]string, error) { + files, err := helper.FindExecutableFiles(pluginDir) + if err != nil { + return nil, err + } + + plugins := make(map[string]string) + mut := sync.Mutex{} + var wg sync.WaitGroup + + for file, fullPath := range files { + wg.Add(1) + go func(file, fullPath string) { + defer wg.Done() + // really should take way less than a second + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + + log := log.With("plugin_id", file) + + p, err := hvm.NewHostVolumePluginExternal(log, file, fullPath, "") + if err != nil { + log.Warn("error getting plugin", "error", err) + return + } + + fprint, err := p.Fingerprint(ctx) + if err != nil { + log.Debug("failed to get version from plugin", "error", err) + return + } + + mut.Lock() + plugins[file] = fprint.Version.String() + mut.Unlock() + }(file, fullPath) + } + + wg.Wait() + return plugins, nil +} diff --git a/client/fingerprint/dynamic_host_volumes_test.go b/client/fingerprint/dynamic_host_volumes_test.go new file mode 100644 index 00000000000..28b331bcfc1 --- /dev/null +++ b/client/fingerprint/dynamic_host_volumes_test.go @@ -0,0 +1,89 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package fingerprint + +import ( + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/hashicorp/nomad/client/config" + hvm "github.com/hashicorp/nomad/client/hostvolumemanager" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +// this is more of a full integration test of: +// fingerprint <- find plugins <- find executables +func TestPluginsHostVolumeFingerprint(t *testing.T) { + cfg := &config.Config{HostVolumePluginDir: ""} + node := &structs.Node{Attributes: map[string]string{}} + req := &FingerprintRequest{Config: cfg, Node: node} + fp := NewPluginsHostVolumeFingerprint(testlog.HCLogger(t)) + + // this fingerprint is not mandatory, so no error should be returned + for name, path := range map[string]string{ + "empty": "", + "non-existent": "/nowhere", + "impossible": "dynamic_host_volumes_test.go", + } { + t.Run(name, func(t *testing.T) { + resp := FingerprintResponse{} + cfg.HostVolumePluginDir = path + err := fp.Fingerprint(req, &resp) + must.NoError(t, err) + must.True(t, resp.Detected) // always true due to "mkdir" built-in + }) + } + + if runtime.GOOS == "windows" { + t.Skip("test scripts not built for windows") // db TODO(1.10.0) + } + + // happy path: dir exists. this one will contain a single valid plugin. + tmp := t.TempDir() + cfg.HostVolumePluginDir = tmp + + files := []struct { + name string + contents string + perm os.FileMode + }{ + // only this first one should be detected as a valid plugin + {"happy-plugin", "#!/usr/bin/env sh\necho '{\"version\": \"0.0.1\"}'", 0700}, + {"not-a-plugin", "#!/usr/bin/env sh\necho 'not a version'", 0700}, + {"unhappy-plugin", "#!/usr/bin/env sh\necho 'sad plugin is sad'; exit 1", 0700}, + {"not-executable", "do not execute me", 0400}, + } + for _, f := range files { + must.NoError(t, os.WriteFile(filepath.Join(tmp, f.name), []byte(f.contents), f.perm)) + } + // directories should be ignored + must.NoError(t, os.Mkdir(filepath.Join(tmp, "a-directory"), 0700)) + + // do the fingerprint + resp := FingerprintResponse{} + err := fp.Fingerprint(req, &resp) + must.NoError(t, err) + must.Eq(t, map[string]string{ + "plugins.host_volume.mkdir.version": hvm.HostVolumePluginMkdirVersion, // built-in + "plugins.host_volume.happy-plugin.version": "0.0.1", + }, resp.Attributes) + + // do it again after deleting our one good plugin. + // repeat runs should wipe attributes, so nothing should remain. + node.Attributes = resp.Attributes + must.NoError(t, os.Remove(filepath.Join(tmp, "happy-plugin"))) + + resp = FingerprintResponse{} + err = fp.Fingerprint(req, &resp) + must.NoError(t, err) + must.Eq(t, map[string]string{ + "plugins.host_volume.happy-plugin.version": "", // empty value means removed + + "plugins.host_volume.mkdir.version": hvm.HostVolumePluginMkdirVersion, // built-in + }, resp.Attributes) +} diff --git a/client/fingerprint/fingerprint.go b/client/fingerprint/fingerprint.go index 3654db07830..5eb638009ec 100644 --- a/client/fingerprint/fingerprint.go +++ b/client/fingerprint/fingerprint.go @@ -32,19 +32,20 @@ var ( // hostFingerprinters contains the host fingerprints which are available for a // given platform. hostFingerprinters = map[string]Factory{ - "arch": NewArchFingerprint, - "consul": NewConsulFingerprint, - "cni": NewCNIFingerprint, // networks - "cpu": NewCPUFingerprint, - "host": NewHostFingerprint, - "landlock": NewLandlockFingerprint, - "memory": NewMemoryFingerprint, - "network": NewNetworkFingerprint, - "nomad": NewNomadFingerprint, - "plugins_cni": NewPluginsCNIFingerprint, - "signal": NewSignalFingerprint, - "storage": NewStorageFingerprint, - "vault": NewVaultFingerprint, + "arch": NewArchFingerprint, + "consul": NewConsulFingerprint, + "cni": NewCNIFingerprint, // networks + "cpu": NewCPUFingerprint, + "host": NewHostFingerprint, + "landlock": NewLandlockFingerprint, + "memory": NewMemoryFingerprint, + "network": NewNetworkFingerprint, + "nomad": NewNomadFingerprint, + "plugins_cni": NewPluginsCNIFingerprint, + "host_volume_plugins": NewPluginsHostVolumeFingerprint, + "signal": NewSignalFingerprint, + "storage": NewStorageFingerprint, + "vault": NewVaultFingerprint, } // envFingerprinters contains the fingerprints that are environment specific. diff --git a/client/host_volume_endpoint.go b/client/host_volume_endpoint.go new file mode 100644 index 00000000000..5c73ee4c394 --- /dev/null +++ b/client/host_volume_endpoint.go @@ -0,0 +1,65 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package client + +import ( + "context" + "time" + + metrics "github.com/armon/go-metrics" + cstructs "github.com/hashicorp/nomad/client/structs" +) + +type HostVolume struct { + c *Client +} + +func newHostVolumesEndpoint(c *Client) *HostVolume { + v := &HostVolume{c: c} + return v +} + +var hostVolumeRequestTimeout = time.Minute + +func (v *HostVolume) Create( + req *cstructs.ClientHostVolumeCreateRequest, + resp *cstructs.ClientHostVolumeCreateResponse) error { + + defer metrics.MeasureSince([]string{"client", "host_volume", "create"}, time.Now()) + ctx, cancelFn := v.requestContext() + defer cancelFn() + + cresp, err := v.c.hostVolumeManager.Create(ctx, req) + if err != nil { + v.c.logger.Error("failed to create host volume", "name", req.Name, "error", err) + return err + } + + resp.CapacityBytes = cresp.CapacityBytes + resp.HostPath = cresp.HostPath + + v.c.logger.Info("created host volume", "id", req.ID, "path", resp.HostPath) + return nil +} + +func (v *HostVolume) Delete( + req *cstructs.ClientHostVolumeDeleteRequest, + resp *cstructs.ClientHostVolumeDeleteResponse) error { + defer metrics.MeasureSince([]string{"client", "host_volume", "create"}, time.Now()) + ctx, cancelFn := v.requestContext() + defer cancelFn() + + _, err := v.c.hostVolumeManager.Delete(ctx, req) + if err != nil { + v.c.logger.Error("failed to delete host volume", "ID", req.ID, "error", err) + return err + } + + v.c.logger.Info("deleted host volume", "id", req.ID, "path", req.HostPath) + return nil +} + +func (v *HostVolume) requestContext() (context.Context, context.CancelFunc) { + return context.WithTimeout(context.Background(), hostVolumeRequestTimeout) +} diff --git a/client/host_volume_endpoint_test.go b/client/host_volume_endpoint_test.go new file mode 100644 index 00000000000..a85eae0b1f9 --- /dev/null +++ b/client/host_volume_endpoint_test.go @@ -0,0 +1,138 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package client + +import ( + "path/filepath" + "testing" + + "github.com/hashicorp/nomad/ci" + hvm "github.com/hashicorp/nomad/client/hostvolumemanager" + "github.com/hashicorp/nomad/client/state" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func TestHostVolume(t *testing.T) { + ci.Parallel(t) + + client, cleanup := TestClient(t, nil) + defer cleanup() + + memdb := state.NewMemDB(testlog.HCLogger(t)) + client.stateDB = memdb + + tmp := t.TempDir() + manager := hvm.NewHostVolumeManager(testlog.HCLogger(t), hvm.Config{ + StateMgr: client.stateDB, + UpdateNodeVols: client.updateNodeFromHostVol, + PluginDir: "/no/ext/plugins", + SharedMountDir: tmp, + }) + client.hostVolumeManager = manager + expectDir := filepath.Join(tmp, "test-vol-id") + + t.Run("happy", func(t *testing.T) { + + /* create */ + + req := &cstructs.ClientHostVolumeCreateRequest{ + Name: "test-vol-name", + ID: "test-vol-id", + PluginID: "mkdir", // real plugin really makes a dir + } + var resp cstructs.ClientHostVolumeCreateResponse + err := client.ClientRPC("HostVolume.Create", req, &resp) + must.NoError(t, err) + must.Eq(t, cstructs.ClientHostVolumeCreateResponse{ + HostPath: expectDir, + CapacityBytes: 0, // "mkdir" always returns zero + }, resp) + // technically this is testing "mkdir" more than the RPC + must.DirExists(t, expectDir) + // ensure we saved to client state + vols, err := memdb.GetDynamicHostVolumes() + must.NoError(t, err) + must.Len(t, 1, vols) + expectState := &cstructs.HostVolumeState{ + ID: req.ID, + CreateReq: req, + } + must.Eq(t, expectState, vols[0]) + // and should be fingerprinted + must.Eq(t, hvm.VolumeMap{ + req.Name: { + ID: req.ID, + Name: req.Name, + Path: expectDir, + }, + }, client.Node().HostVolumes) + + /* delete */ + + delReq := &cstructs.ClientHostVolumeDeleteRequest{ + Name: "test-vol-name", + ID: "test-vol-id", + PluginID: "mkdir", + HostPath: expectDir, + } + var delResp cstructs.ClientHostVolumeDeleteResponse + err = client.ClientRPC("HostVolume.Delete", delReq, &delResp) + must.NoError(t, err) + must.NotNil(t, delResp) + // again, actually testing the "mkdir" plugin + must.DirNotExists(t, expectDir) + // client state should be deleted + vols, err = memdb.GetDynamicHostVolumes() + must.NoError(t, err) + must.Len(t, 0, vols) + // and the fingerprint, too + must.Eq(t, map[string]*structs.ClientHostVolumeConfig{}, client.Node().HostVolumes) + }) + + t.Run("missing plugin", func(t *testing.T) { + req := &cstructs.ClientHostVolumeCreateRequest{ + PluginID: "non-existent", + } + var resp cstructs.ClientHostVolumeCreateResponse + err := client.ClientRPC("HostVolume.Create", req, &resp) + must.EqError(t, err, `no such plugin: "non-existent"`) + + delReq := &cstructs.ClientHostVolumeDeleteRequest{ + PluginID: "non-existent", + } + var delResp cstructs.ClientHostVolumeDeleteResponse + err = client.ClientRPC("HostVolume.Delete", delReq, &delResp) + must.EqError(t, err, `no such plugin: "non-existent"`) + }) + + t.Run("error from plugin", func(t *testing.T) { + // "mkdir" plugin can't create a directory within a file + client.hostVolumeManager = hvm.NewHostVolumeManager(testlog.HCLogger(t), hvm.Config{ + StateMgr: client.stateDB, + UpdateNodeVols: client.updateNodeFromHostVol, + PluginDir: "/no/ext/plugins", + SharedMountDir: "host_volume_endpoint_test.go", + }) + + req := &cstructs.ClientHostVolumeCreateRequest{ + ID: "test-vol-id", + Name: "test-vol-name", + PluginID: "mkdir", + } + var resp cstructs.ClientHostVolumeCreateResponse + err := client.ClientRPC("HostVolume.Create", req, &resp) + must.ErrorContains(t, err, "host_volume_endpoint_test.go/test-vol-id: not a directory") + + delReq := &cstructs.ClientHostVolumeDeleteRequest{ + ID: "test-vol-id", + PluginID: "mkdir", + } + var delResp cstructs.ClientHostVolumeDeleteResponse + err = client.ClientRPC("HostVolume.Delete", delReq, &delResp) + must.ErrorContains(t, err, "host_volume_endpoint_test.go/test-vol-id: not a directory") + }) +} diff --git a/client/hostvolumemanager/host_volume_plugin.go b/client/hostvolumemanager/host_volume_plugin.go new file mode 100644 index 00000000000..961466b1223 --- /dev/null +++ b/client/hostvolumemanager/host_volume_plugin.go @@ -0,0 +1,255 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-multierror" + "github.com/hashicorp/go-version" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper" +) + +type PluginFingerprint struct { + Version *version.Version `json:"version"` +} + +type HostVolumePlugin interface { + Fingerprint(ctx context.Context) (*PluginFingerprint, error) + Create(ctx context.Context, req *cstructs.ClientHostVolumeCreateRequest) (*HostVolumePluginCreateResponse, error) + Delete(ctx context.Context, req *cstructs.ClientHostVolumeDeleteRequest) error + // db TODO(1.10.0): update? resize? ?? +} + +type HostVolumePluginCreateResponse struct { + Path string `json:"path"` + SizeBytes int64 `json:"bytes"` +} + +const HostVolumePluginMkdirID = "mkdir" +const HostVolumePluginMkdirVersion = "0.0.1" + +var _ HostVolumePlugin = &HostVolumePluginMkdir{} + +type HostVolumePluginMkdir struct { + ID string + TargetPath string + + log hclog.Logger +} + +func (p *HostVolumePluginMkdir) Fingerprint(_ context.Context) (*PluginFingerprint, error) { + v, err := version.NewVersion(HostVolumePluginMkdirVersion) + return &PluginFingerprint{ + Version: v, + }, err +} + +func (p *HostVolumePluginMkdir) Create(_ context.Context, + req *cstructs.ClientHostVolumeCreateRequest) (*HostVolumePluginCreateResponse, error) { + + path := filepath.Join(p.TargetPath, req.ID) + log := p.log.With( + "operation", "create", + "volume_id", req.ID, + "path", path) + log.Debug("running plugin") + + resp := &HostVolumePluginCreateResponse{ + Path: path, + SizeBytes: 0, + } + + if _, err := os.Stat(path); err == nil { + // already exists + return resp, nil + } else if !os.IsNotExist(err) { + // doesn't exist, but some other path error + log.Debug("error with plugin", "error", err) + return nil, err + } + + err := os.Mkdir(path, 0o700) + if err != nil { + log.Debug("error with plugin", "error", err) + return nil, err + } + + log.Debug("plugin ran successfully") + return resp, nil +} + +func (p *HostVolumePluginMkdir) Delete(_ context.Context, req *cstructs.ClientHostVolumeDeleteRequest) error { + path := filepath.Join(p.TargetPath, req.ID) + log := p.log.With( + "operation", "delete", + "volume_id", req.ID, + "path", path) + log.Debug("running plugin") + + err := os.RemoveAll(path) + if err != nil { + log.Debug("error with plugin", "error", err) + return err + } + + log.Debug("plugin ran successfully") + return nil +} + +var _ HostVolumePlugin = &HostVolumePluginExternal{} + +func NewHostVolumePluginExternal(log hclog.Logger, + id, executable, targetPath string) (*HostVolumePluginExternal, error) { + // this should only be called with already-detected executables, + // but we'll double-check it anyway, so we can provide a tidy error message + // if it has changed between fingerprinting and execution. + f, err := os.Stat(executable) + if err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("%w: %q", ErrPluginNotExists, id) + } + return nil, err + } + if !helper.IsExecutable(f) { + return nil, fmt.Errorf("%w: %q", ErrPluginNotExecutable, id) + } + return &HostVolumePluginExternal{ + ID: id, + Executable: executable, + TargetPath: targetPath, + log: log, + }, nil +} + +type HostVolumePluginExternal struct { + ID string + Executable string + TargetPath string + + log hclog.Logger +} + +func (p *HostVolumePluginExternal) Fingerprint(ctx context.Context) (*PluginFingerprint, error) { + cmd := exec.CommandContext(ctx, p.Executable, "fingerprint") + cmd.Env = []string{"OPERATION=fingerprint"} + stdout, stderr, err := runCommand(cmd) + if err != nil { + p.log.Debug("error with plugin", + "operation", "version", + "stdout", string(stdout), + "stderr", string(stderr), + "error", err) + return nil, fmt.Errorf("error getting version from plugin %q: %w", p.ID, err) + } + fprint := &PluginFingerprint{} + if err := json.Unmarshal(stdout, fprint); err != nil { + return nil, fmt.Errorf("error parsing fingerprint output as json: %w", err) + } + return fprint, nil +} + +func (p *HostVolumePluginExternal) Create(ctx context.Context, + req *cstructs.ClientHostVolumeCreateRequest) (*HostVolumePluginCreateResponse, error) { + + params, err := json.Marshal(req.Parameters) // db TODO(1.10.0): document if this is nil, then PARAMETERS env will be "null" + if err != nil { + // this is a proper error, because users can set this in the volume spec + return nil, fmt.Errorf("error marshaling volume pramaters: %w", err) + } + envVars := []string{ + "NODE_ID=" + req.NodeID, + "VOLUME_NAME=" + req.Name, + fmt.Sprintf("CAPACITY_MIN_BYTES=%d", req.RequestedCapacityMinBytes), + fmt.Sprintf("CAPACITY_MAX_BYTES=%d", req.RequestedCapacityMaxBytes), + "PARAMETERS=" + string(params), + } + + stdout, _, err := p.runPlugin(ctx, "create", req.ID, envVars) + if err != nil { + return nil, fmt.Errorf("error creating volume %q with plugin %q: %w", req.ID, p.ID, err) + } + + var pluginResp HostVolumePluginCreateResponse + err = json.Unmarshal(stdout, &pluginResp) // db TODO(1.10.0): if this fails, then the volume may have been created, according to the plugin, but Nomad will not save it + if err != nil { + return nil, err + } + return &pluginResp, nil +} + +func (p *HostVolumePluginExternal) Delete(ctx context.Context, + req *cstructs.ClientHostVolumeDeleteRequest) error { + + params, err := json.Marshal(req.Parameters) + if err != nil { + return fmt.Errorf("error marshaling volume pramaters: %w", err) + } + envVars := []string{ + "NODE_ID=" + req.NodeID, + "PARAMETERS=" + string(params), + } + + _, _, err = p.runPlugin(ctx, "delete", req.ID, envVars) + if err != nil { + return fmt.Errorf("error deleting volume %q with plugin %q: %w", req.ID, p.ID, err) + } + return nil +} + +func (p *HostVolumePluginExternal) runPlugin(ctx context.Context, + op, volID string, env []string) (stdout, stderr []byte, err error) { + + path := filepath.Join(p.TargetPath, volID) + log := p.log.With( + "operation", op, + "volume_id", volID, + "path", path) + log.Debug("running plugin") + + // set up plugin execution + cmd := exec.CommandContext(ctx, p.Executable, op, path) + + cmd.Env = append([]string{ + "OPERATION=" + op, + "HOST_PATH=" + path, + }, env...) + + stdout, stderr, err = runCommand(cmd) + + log = log.With( + "stdout", string(stdout), + "stderr", string(stderr), + ) + if err != nil { + log.Debug("error with plugin", "error", err) + return stdout, stderr, err + } + log.Debug("plugin ran successfully") + return stdout, stderr, nil +} + +func runCommand(cmd *exec.Cmd) (stdout, stderr []byte, err error) { + var errBuf bytes.Buffer + cmd.Stderr = io.Writer(&errBuf) + mErr := &multierror.Error{} + stdout, err = cmd.Output() + if err != nil { + mErr = multierror.Append(mErr, err) + } + stderr, err = io.ReadAll(&errBuf) + if err != nil { + mErr = multierror.Append(mErr, err) + } + return stdout, stderr, helper.FlattenMultierror(mErr.ErrorOrNil()) +} diff --git a/client/hostvolumemanager/host_volume_plugin_test.go b/client/hostvolumemanager/host_volume_plugin_test.go new file mode 100644 index 00000000000..0552810bb84 --- /dev/null +++ b/client/hostvolumemanager/host_volume_plugin_test.go @@ -0,0 +1,215 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "bytes" + "context" + "io" + "path/filepath" + "runtime" + "testing" + "time" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-version" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/shoenig/test" + "github.com/shoenig/test/must" +) + +func TestHostVolumePluginMkdir(t *testing.T) { + volID := "test-vol-id" + tmp := t.TempDir() + target := filepath.Join(tmp, volID) + + plug := &HostVolumePluginMkdir{ + ID: "test-mkdir-plugin", + TargetPath: tmp, + log: testlog.HCLogger(t), + } + + // contexts don't matter here, since they're thrown away by this plugin, + // but sending timeout contexts anyway, in case the plugin changes later. + _, err := plug.Fingerprint(timeout(t)) + must.NoError(t, err) + + t.Run("happy", func(t *testing.T) { + resp, err := plug.Create(timeout(t), + &cstructs.ClientHostVolumeCreateRequest{ + ID: volID, // minimum required by this plugin + }) + must.NoError(t, err) + must.Eq(t, &HostVolumePluginCreateResponse{ + Path: target, + SizeBytes: 0, + }, resp) + must.DirExists(t, target) + + err = plug.Delete(timeout(t), + &cstructs.ClientHostVolumeDeleteRequest{ + ID: volID, + }) + must.NoError(t, err) + must.DirNotExists(t, target) + }) + + t.Run("sad", func(t *testing.T) { + // can't mkdir inside a file + plug.TargetPath = "host_volume_plugin_test.go" + + resp, err := plug.Create(timeout(t), + &cstructs.ClientHostVolumeCreateRequest{ + ID: volID, // minimum required by this plugin + }) + must.ErrorContains(t, err, "host_volume_plugin_test.go/test-vol-id: not a directory") + must.Nil(t, resp) + + err = plug.Delete(timeout(t), + &cstructs.ClientHostVolumeDeleteRequest{ + ID: volID, + }) + must.ErrorContains(t, err, "host_volume_plugin_test.go/test-vol-id: not a directory") + }) +} + +func TestHostVolumePluginExternal(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("skipped because windows") // db TODO(1.10.0) + } + + volID := "test-vol-id" + tmp := t.TempDir() + target := filepath.Join(tmp, volID) + + expectVersion, err := version.NewVersion("0.0.2") + must.NoError(t, err) + + t.Run("happy", func(t *testing.T) { + + log, getLogs := logRecorder(t) + plug := &HostVolumePluginExternal{ + ID: "test-external-plugin", + Executable: "./test_fixtures/test_plugin.sh", + TargetPath: tmp, + log: log, + } + + v, err := plug.Fingerprint(timeout(t)) + must.NoError(t, err) + must.Eq(t, expectVersion, v.Version) + + resp, err := plug.Create(timeout(t), + &cstructs.ClientHostVolumeCreateRequest{ + ID: volID, + NodeID: "test-node", + RequestedCapacityMinBytes: 5, + RequestedCapacityMaxBytes: 10, + Parameters: map[string]string{"key": "val"}, + }) + must.NoError(t, err) + + must.Eq(t, &HostVolumePluginCreateResponse{ + Path: target, + SizeBytes: 5, + }, resp) + must.DirExists(t, target) + logged := getLogs() + must.StrContains(t, logged, "OPERATION=create") // stderr from `env` + must.StrContains(t, logged, `stdout="{`) // stdout from printf + + // reset logger for next call + log, getLogs = logRecorder(t) + plug.log = log + + err = plug.Delete(timeout(t), + &cstructs.ClientHostVolumeDeleteRequest{ + ID: volID, + NodeID: "test-node", + Parameters: map[string]string{"key": "val"}, + }) + must.NoError(t, err) + must.DirNotExists(t, target) + logged = getLogs() + must.StrContains(t, logged, "OPERATION=delete") // stderr from `env` + must.StrContains(t, logged, "removed directory") // stdout from `rm -v` + }) + + t.Run("sad", func(t *testing.T) { + + log, getLogs := logRecorder(t) + plug := &HostVolumePluginExternal{ + ID: "test-external-plugin-sad", + Executable: "./test_fixtures/test_plugin_sad.sh", + TargetPath: tmp, + log: log, + } + + v, err := plug.Fingerprint(timeout(t)) + must.EqError(t, err, `error getting version from plugin "test-external-plugin-sad": exit status 1`) + must.Nil(t, v) + logged := getLogs() + must.StrContains(t, logged, "fingerprint: sad plugin is sad") + must.StrContains(t, logged, "fingerprint: it tells you all about it in stderr") + + // reset logger + log, getLogs = logRecorder(t) + plug.log = log + + resp, err := plug.Create(timeout(t), + &cstructs.ClientHostVolumeCreateRequest{ + ID: volID, + NodeID: "test-node", + RequestedCapacityMinBytes: 5, + RequestedCapacityMaxBytes: 10, + Parameters: map[string]string{"key": "val"}, + }) + must.EqError(t, err, `error creating volume "test-vol-id" with plugin "test-external-plugin-sad": exit status 1`) + must.Nil(t, resp) + logged = getLogs() + must.StrContains(t, logged, "create: sad plugin is sad") + must.StrContains(t, logged, "create: it tells you all about it in stderr") + + log, getLogs = logRecorder(t) + plug.log = log + + err = plug.Delete(timeout(t), + &cstructs.ClientHostVolumeDeleteRequest{ + ID: volID, + NodeID: "test-node", + Parameters: map[string]string{"key": "val"}, + }) + must.EqError(t, err, `error deleting volume "test-vol-id" with plugin "test-external-plugin-sad": exit status 1`) + logged = getLogs() + must.StrContains(t, logged, "delete: sad plugin is sad") + must.StrContains(t, logged, "delete: it tells you all about it in stderr") + }) +} + +// timeout provides a context that times out in 1 second +func timeout(t *testing.T) context.Context { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + t.Cleanup(cancel) + return ctx +} + +// logRecorder is here so we can assert that stdout/stderr appear in logs +func logRecorder(t *testing.T) (hclog.Logger, func() string) { + t.Helper() + buf := &bytes.Buffer{} + logger := hclog.New(&hclog.LoggerOptions{ + Name: "log-recorder", + Output: buf, + Level: hclog.Debug, + IncludeLocation: true, + DisableTime: true, + }) + return logger, func() string { + bts, err := io.ReadAll(buf) + test.NoError(t, err) + return string(bts) + } +} diff --git a/client/hostvolumemanager/host_volumes.go b/client/hostvolumemanager/host_volumes.go new file mode 100644 index 00000000000..82ccb8f47a4 --- /dev/null +++ b/client/hostvolumemanager/host_volumes.go @@ -0,0 +1,210 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "context" + "errors" + "path/filepath" + "sync" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-multierror" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/nomad/structs" +) + +var ( + ErrPluginNotExists = errors.New("no such plugin") + ErrPluginNotExecutable = errors.New("plugin not executable") +) + +type HostVolumeStateManager interface { + PutDynamicHostVolume(*cstructs.HostVolumeState) error + GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) + DeleteDynamicHostVolume(string) error +} + +type Config struct { + // PluginDir is where external plugins may be found. + PluginDir string + + // SharedMountDir is where plugins should place the directory + // that will later become a volume HostPath + SharedMountDir string + + // StateMgr manages client state to restore on agent restarts. + StateMgr HostVolumeStateManager + + // UpdateNodeVols is run to update the node when a volume is created + // or deleted. + UpdateNodeVols HostVolumeNodeUpdater +} + +type HostVolumeManager struct { + pluginDir string + sharedMountDir string + stateMgr HostVolumeStateManager + updateNodeVols HostVolumeNodeUpdater + log hclog.Logger +} + +func NewHostVolumeManager(logger hclog.Logger, config Config) *HostVolumeManager { + // db TODO(1.10.0): document plugin config options + return &HostVolumeManager{ + pluginDir: config.PluginDir, + sharedMountDir: config.SharedMountDir, + stateMgr: config.StateMgr, + updateNodeVols: config.UpdateNodeVols, + log: logger.Named("host_volume_manager"), + } +} + +func genVolConfig(req *cstructs.ClientHostVolumeCreateRequest, resp *HostVolumePluginCreateResponse) *structs.ClientHostVolumeConfig { + if req == nil || resp == nil { + return nil + } + return &structs.ClientHostVolumeConfig{ + Name: req.Name, + ID: req.ID, + Path: resp.Path, + + // dynamic volumes, like CSI, have more robust `capabilities`, + // so we always set ReadOnly to false, and let the scheduler + // decide when to ignore this and check capabilities instead. + ReadOnly: false, + } +} + +func (hvm *HostVolumeManager) restoreFromState(ctx context.Context) (VolumeMap, error) { + vols, err := hvm.stateMgr.GetDynamicHostVolumes() + if err != nil { + return nil, err + } + + volumes := make(VolumeMap) + var mut sync.Mutex + + if len(vols) == 0 { + return volumes, nil // nothing to do + } + + // re-"create" the volumes - plugins have the best knowledge of their + // side effects, and they must be idempotent. + group := multierror.Group{} + for _, vol := range vols { + group.Go(func() error { // db TODO(1.10.0): document that plugins must be safe to run concurrently + // missing plugins with associated volumes in state are considered + // client-stopping errors. they need to be fixed by cluster admins. + plug, err := hvm.getPlugin(vol.CreateReq.PluginID) + if err != nil { + return err + } + + resp, err := plug.Create(ctx, vol.CreateReq) + if err != nil { + // plugin execution errors are only logged + hvm.log.Error("failed to restore", "plugin_id", vol.CreateReq.PluginID, "volume_id", vol.ID, "error", err) + return nil + } + mut.Lock() + volumes[vol.CreateReq.Name] = genVolConfig(vol.CreateReq, resp) + mut.Unlock() + return nil + }) + } + mErr := group.Wait() + return volumes, helper.FlattenMultierror(mErr.ErrorOrNil()) +} + +func (hvm *HostVolumeManager) getPlugin(id string) (HostVolumePlugin, error) { + log := hvm.log.With("plugin_id", id) + + if id == HostVolumePluginMkdirID { + return &HostVolumePluginMkdir{ + ID: HostVolumePluginMkdirID, + TargetPath: hvm.sharedMountDir, + log: log, + }, nil + } + + path := filepath.Join(hvm.pluginDir, id) + return NewHostVolumePluginExternal(log, id, path, hvm.sharedMountDir) +} + +func (hvm *HostVolumeManager) Create(ctx context.Context, + req *cstructs.ClientHostVolumeCreateRequest) (*cstructs.ClientHostVolumeCreateResponse, error) { + + plug, err := hvm.getPlugin(req.PluginID) + if err != nil { + return nil, err + } + + pluginResp, err := plug.Create(ctx, req) + if err != nil { + return nil, err + } + + volState := &cstructs.HostVolumeState{ + ID: req.ID, + CreateReq: req, + } + if err := hvm.stateMgr.PutDynamicHostVolume(volState); err != nil { + // if we fail to write to state, delete the volume so it isn't left + // lying around without Nomad knowing about it. + hvm.log.Error("failed to save volume in state, so deleting", "volume_id", req.ID, "error", err) + delErr := plug.Delete(ctx, &cstructs.ClientHostVolumeDeleteRequest{ + ID: req.ID, + PluginID: req.PluginID, + NodeID: req.NodeID, + HostPath: hvm.sharedMountDir, + Parameters: req.Parameters, + }) + if delErr != nil { + hvm.log.Warn("error deleting volume after state store failure", "volume_id", req.ID, "error", delErr) + err = multierror.Append(err, delErr) + } + return nil, helper.FlattenMultierror(err) + } + + hvm.updateNodeVols(req.Name, genVolConfig(req, pluginResp)) + + resp := &cstructs.ClientHostVolumeCreateResponse{ + VolumeName: req.Name, + VolumeID: req.ID, + HostPath: pluginResp.Path, + CapacityBytes: pluginResp.SizeBytes, + } + + return resp, nil +} + +func (hvm *HostVolumeManager) Delete(ctx context.Context, + req *cstructs.ClientHostVolumeDeleteRequest) (*cstructs.ClientHostVolumeDeleteResponse, error) { + + plug, err := hvm.getPlugin(req.PluginID) + if err != nil { + return nil, err + } + + err = plug.Delete(ctx, req) + if err != nil { + return nil, err + } + + if err := hvm.stateMgr.DeleteDynamicHostVolume(req.ID); err != nil { + hvm.log.Error("failed to delete volume in state", "volume_id", req.ID, "error", err) + return nil, err // bail so a user may retry + } + + hvm.updateNodeVols(req.Name, nil) + + resp := &cstructs.ClientHostVolumeDeleteResponse{ + VolumeName: req.Name, + VolumeID: req.ID, + } + + return resp, nil +} diff --git a/client/hostvolumemanager/host_volumes_test.go b/client/hostvolumemanager/host_volumes_test.go new file mode 100644 index 00000000000..15a3a2fca2e --- /dev/null +++ b/client/hostvolumemanager/host_volumes_test.go @@ -0,0 +1,99 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "context" + "path/filepath" + "testing" + "time" + + cstate "github.com/hashicorp/nomad/client/state" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +// db TODO(1.10.0): improve hostvolumemanager tests. + +func TestNewHostVolumeManager_restoreState(t *testing.T) { + log := testlog.HCLogger(t) + vol := &cstructs.HostVolumeState{ + ID: "test-vol-id", + CreateReq: &cstructs.ClientHostVolumeCreateRequest{ + Name: "test-vol-name", + ID: "test-vol-id", + PluginID: "mkdir", + }, + } + fNode := newFakeNode() + + t.Run("happy", func(t *testing.T) { + // put our volume in state + state := cstate.NewMemDB(log) + must.NoError(t, state.PutDynamicHostVolume(vol)) + + // new volume manager should load it from state and run Create, + // resulting in a volume directory in this mountDir. + mountDir := t.TempDir() + volPath := filepath.Join(mountDir, vol.ID) + + hvm := NewHostVolumeManager(log, Config{ + StateMgr: state, + UpdateNodeVols: fNode.updateVol, + PluginDir: "/wherever", + SharedMountDir: mountDir, + }) + + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + vols, err := hvm.restoreFromState(ctx) + must.NoError(t, err) + + expect := map[string]*structs.ClientHostVolumeConfig{ + "test-vol-name": { + Name: "test-vol-name", + ID: "test-vol-id", + Path: volPath, + ReadOnly: false, + }, + } + must.Eq(t, expect, vols) + + must.DirExists(t, volPath) + }) + + t.Run("get error", func(t *testing.T) { + state := &cstate.ErrDB{} + hvm := NewHostVolumeManager(log, Config{ + StateMgr: state, + UpdateNodeVols: fNode.updateVol, + PluginDir: "/wherever", + SharedMountDir: "/wherever", + }) + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + vols, err := hvm.restoreFromState(ctx) + // error loading state should break the world + must.ErrorIs(t, err, cstate.ErrDBError) + must.Nil(t, vols) + }) + + // db TODO: test plugin error +} + +type fakeNode struct { + vols VolumeMap +} + +func (n *fakeNode) updateVol(name string, volume *structs.ClientHostVolumeConfig) { + UpdateVolumeMap(n.vols, name, volume) +} + +func newFakeNode() *fakeNode { + return &fakeNode{ + vols: make(VolumeMap), + } +} diff --git a/client/hostvolumemanager/test_fixtures/test_plugin.sh b/client/hostvolumemanager/test_fixtures/test_plugin.sh new file mode 100755 index 00000000000..e93e37bbc76 --- /dev/null +++ b/client/hostvolumemanager/test_fixtures/test_plugin.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +# plugin for host_volume_plugin_test.go +set -xeuo pipefail + +env 1>&2 + +test "$1" == "$OPERATION" + +echo 'all operations should ignore stderr' 1>&2 + +case $1 in + fingerprint) + echo '{"version": "0.0.2"}' ;; + create) + test "$2" == "$HOST_PATH" + test "$NODE_ID" == 'test-node' + test "$PARAMETERS" == '{"key":"val"}' + test "$CAPACITY_MIN_BYTES" -eq 5 + test "$CAPACITY_MAX_BYTES" -eq 10 + mkdir "$2" + printf '{"path": "%s", "bytes": 5, "context": %s}' "$2" "$PARAMETERS" + ;; + delete) + test "$2" == "$HOST_PATH" + test "$NODE_ID" == 'test-node' + test "$PARAMETERS" == '{"key":"val"}' + rm -rfv "$2" ;; + *) + echo "unknown operation $1" + exit 1 ;; +esac diff --git a/client/hostvolumemanager/test_fixtures/test_plugin_sad.sh b/client/hostvolumemanager/test_fixtures/test_plugin_sad.sh new file mode 100755 index 00000000000..6f883297a98 --- /dev/null +++ b/client/hostvolumemanager/test_fixtures/test_plugin_sad.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +echo "$1: sad plugin is sad" +echo "$1: it tells you all about it in stderr" 1>&2 +exit 1 diff --git a/client/hostvolumemanager/volume_fingerprint.go b/client/hostvolumemanager/volume_fingerprint.go new file mode 100644 index 00000000000..37b0c84fbfb --- /dev/null +++ b/client/hostvolumemanager/volume_fingerprint.go @@ -0,0 +1,65 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "context" + + "github.com/hashicorp/nomad/nomad/structs" +) + +// this file is for fingerprinting *volumes* +// *plugins* are detected in client/fingerprint/dynamic_host_volumes.go + +// HostVolumeNodeUpdater is used to add or remove volumes from the Node. +type HostVolumeNodeUpdater func(name string, volume *structs.ClientHostVolumeConfig) + +// VolumeMap keys are volume `name`s, identical to Node.HostVolumes. +type VolumeMap map[string]*structs.ClientHostVolumeConfig + +// UpdateVolumeMap returns true if it changes the provided `volumes` map. +// If `vol` is nil, key `name` will be removed from the map, if present. +// If it is not nil, `name: vol` will be set on the map, if different. +// +// Since it may mutate the map, the caller should make a copy +// or acquire a lock as appropriate for their context. +func UpdateVolumeMap(volumes VolumeMap, name string, vol *structs.ClientHostVolumeConfig) (changed bool) { + current, exists := volumes[name] + if vol == nil { + if exists { + delete(volumes, name) + changed = true + } + } else { + if !exists || !vol.Equal(current) { + volumes[name] = vol + changed = true + } + } + return changed +} + +// WaitForFirstFingerprint implements client.FingerprintingPluginManager +func (hvm *HostVolumeManager) WaitForFirstFingerprint(ctx context.Context) <-chan struct{} { + // the fingerprint manager puts batchFirstFingerprintsTimeout (50 seconds) + // on the context that it sends to us here so we don't need another + // timeout. we just need to cancel to report when we are done. + ctx, cancel := context.WithCancel(ctx) + defer cancel() + volumes, err := hvm.restoreFromState(ctx) + if err != nil { + hvm.log.Error("failed to restore state", "error", err) + return ctx.Done() + } + for name, vol := range volumes { + hvm.updateNodeVols(name, vol) // => batchNodeUpdates.updateNodeFromHostVolume() + } + return ctx.Done() +} +func (hvm *HostVolumeManager) Run() {} +func (hvm *HostVolumeManager) Shutdown() {} +func (hvm *HostVolumeManager) PluginType() string { + // "Plugin"Type is misleading, because this is for *volumes* but ok. + return "dynamic_host_volume" +} diff --git a/client/hostvolumemanager/volume_fingerprint_test.go b/client/hostvolumemanager/volume_fingerprint_test.go new file mode 100644 index 00000000000..c5198eb7c71 --- /dev/null +++ b/client/hostvolumemanager/volume_fingerprint_test.go @@ -0,0 +1,81 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "testing" + + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func TestUpdateVolumeMap(t *testing.T) { + cases := []struct { + name string + + vols VolumeMap + volName string + vol *structs.ClientHostVolumeConfig + + expectMap VolumeMap + expectChange bool + }{ + { + name: "delete absent", + vols: VolumeMap{}, + volName: "anything", + vol: nil, + expectMap: VolumeMap{}, + expectChange: false, + }, + { + name: "delete present", + vols: VolumeMap{"deleteme": {}}, + volName: "deleteme", + vol: nil, + expectMap: VolumeMap{}, + expectChange: true, + }, + { + name: "add absent", + vols: VolumeMap{}, + volName: "addme", + vol: &structs.ClientHostVolumeConfig{}, + expectMap: VolumeMap{"addme": {}}, + expectChange: true, + }, + { + name: "add present", + vols: VolumeMap{"ignoreme": {}}, + volName: "ignoreme", + vol: &structs.ClientHostVolumeConfig{}, + expectMap: VolumeMap{"ignoreme": {}}, + expectChange: false, + }, + { + // this should not happen, but test anyway + name: "change present", + vols: VolumeMap{"changeme": {Path: "before"}}, + volName: "changeme", + vol: &structs.ClientHostVolumeConfig{Path: "after"}, + expectMap: VolumeMap{"changeme": {Path: "after"}}, + expectChange: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + + changed := UpdateVolumeMap(tc.vols, tc.volName, tc.vol) + must.Eq(t, tc.expectMap, tc.vols) + + if tc.expectChange { + must.True(t, changed, must.Sprint("expect volume to have been changed")) + } else { + must.False(t, changed, must.Sprint("expect volume not to have been changed")) + } + + }) + } +} diff --git a/client/node_updater.go b/client/node_updater.go index 6fe51cdf56e..c02a2dd9950 100644 --- a/client/node_updater.go +++ b/client/node_updater.go @@ -10,6 +10,7 @@ import ( "time" "github.com/hashicorp/nomad/client/devicemanager" + hvm "github.com/hashicorp/nomad/client/hostvolumemanager" "github.com/hashicorp/nomad/client/pluginmanager/csimanager" "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" "github.com/hashicorp/nomad/nomad/structs" @@ -30,7 +31,7 @@ func (c *Client) batchFirstFingerprints() { ch, err := c.pluginManagers.WaitForFirstFingerprint(ctx) if err != nil { - c.logger.Warn("failed to batch initial fingerprint updates, switching to incemental updates") + c.logger.Warn("failed to batch initial fingerprint updates, switching to incremental updates") goto SEND_BATCH } @@ -46,6 +47,12 @@ SEND_BATCH: newConfig := c.config.Copy() + // host volume updates + var hostVolChanged bool + c.batchNodeUpdates.batchHostVolumeUpdates(func(name string, vol *structs.ClientHostVolumeConfig) { + hostVolChanged = hvm.UpdateVolumeMap(newConfig.Node.HostVolumes, name, vol) + }) + // csi updates var csiChanged bool c.batchNodeUpdates.batchCSIUpdates(func(name string, info *structs.CSIInfo) { @@ -85,7 +92,7 @@ SEND_BATCH: }) // only update the node if changes occurred - if driverChanged || devicesChanged || csiChanged { + if driverChanged || devicesChanged || csiChanged || hostVolChanged { c.config = newConfig c.updateNode() } @@ -123,6 +130,23 @@ func (c *Client) updateNodeFromCSI(name string, info *structs.CSIInfo) { } } +func (c *Client) updateNodeFromHostVol(name string, vol *structs.ClientHostVolumeConfig) { + c.configLock.Lock() + defer c.configLock.Unlock() + + newConfig := c.config.Copy() + + if newConfig.Node.HostVolumes == nil { + newConfig.Node.HostVolumes = make(map[string]*structs.ClientHostVolumeConfig) + } + + changed := hvm.UpdateVolumeMap(newConfig.Node.HostVolumes, name, vol) + if changed { + c.config = newConfig + c.updateNode() + } +} + // updateNodeFromCSIControllerLocked makes the changes to the node from a csi // update but does not send the update to the server. c.configLock must be held // before calling this func. @@ -336,12 +360,18 @@ type batchNodeUpdates struct { csiBatched bool csiCB csimanager.UpdateNodeCSIInfoFunc csiMu sync.Mutex + + hostVolumes hvm.VolumeMap + hostVolumesBatched bool + hostVolumeCB hvm.HostVolumeNodeUpdater + hostVolumeMu sync.Mutex } func newBatchNodeUpdates( driverCB drivermanager.UpdateNodeDriverInfoFn, devicesCB devicemanager.UpdateNodeDevicesFn, - csiCB csimanager.UpdateNodeCSIInfoFunc) *batchNodeUpdates { + csiCB csimanager.UpdateNodeCSIInfoFunc, + hostVolumeCB hvm.HostVolumeNodeUpdater) *batchNodeUpdates { return &batchNodeUpdates{ drivers: make(map[string]*structs.DriverInfo), @@ -351,7 +381,34 @@ func newBatchNodeUpdates( csiNodePlugins: make(map[string]*structs.CSIInfo), csiControllerPlugins: make(map[string]*structs.CSIInfo), csiCB: csiCB, + hostVolumes: make(hvm.VolumeMap), + hostVolumeCB: hostVolumeCB, + } +} + +// this is the one that the volume manager runs +func (b *batchNodeUpdates) updateNodeFromHostVolume(name string, vol *structs.ClientHostVolumeConfig) { + b.hostVolumeMu.Lock() + defer b.hostVolumeMu.Unlock() + if b.hostVolumesBatched { + b.hostVolumeCB(name, vol) // => Client.updateNodeFromHostVol() + return + } + hvm.UpdateVolumeMap(b.hostVolumes, name, vol) +} + +// this one runs on client start +func (b *batchNodeUpdates) batchHostVolumeUpdates(f hvm.HostVolumeNodeUpdater) error { + b.hostVolumeMu.Lock() + defer b.hostVolumeMu.Unlock() + if b.hostVolumesBatched { + return fmt.Errorf("host volume updates already batched") + } + b.hostVolumesBatched = true + for name, vol := range b.hostVolumes { + f(name, vol) // => c.batchNodeUpdates.batchHostVolumeUpdates(FUNC } + return nil } // updateNodeFromCSI implements csimanager.UpdateNodeCSIInfoFunc and is used in diff --git a/client/rpc.go b/client/rpc.go index bfdf051c121..8d2525d5af1 100644 --- a/client/rpc.go +++ b/client/rpc.go @@ -28,6 +28,7 @@ type rpcEndpoints struct { Allocations *Allocations Agent *Agent NodeMeta *NodeMeta + HostVolume *HostVolume } // ClientRPC is used to make a local, client only RPC call @@ -293,6 +294,7 @@ func (c *Client) setupClientRpc(rpcs map[string]interface{}) { c.endpoints.Allocations = NewAllocationsEndpoint(c) c.endpoints.Agent = NewAgentEndpoint(c) c.endpoints.NodeMeta = newNodeMetaEndpoint(c) + c.endpoints.HostVolume = newHostVolumesEndpoint(c) c.setupClientRpcServer(c.rpcServer) } @@ -308,6 +310,7 @@ func (c *Client) setupClientRpcServer(server *rpc.Server) { server.Register(c.endpoints.Allocations) server.Register(c.endpoints.Agent) server.Register(c.endpoints.NodeMeta) + server.Register(c.endpoints.HostVolume) } // rpcConnListener is a long lived function that listens for new connections diff --git a/client/state/db_bolt.go b/client/state/db_bolt.go index 2471cda3d14..bef111f6e9a 100644 --- a/client/state/db_bolt.go +++ b/client/state/db_bolt.go @@ -138,6 +138,8 @@ var ( // nodeRegistrationKey is the key at which node registration data is stored. nodeRegistrationKey = []byte("node_registration") + + hostVolBucket = []byte("host_volumes_to_create") ) // taskBucketName returns the bucket name for the given task name. @@ -1048,6 +1050,45 @@ func (s *BoltStateDB) GetNodeRegistration() (*cstructs.NodeRegistration, error) return ®, err } +func (s *BoltStateDB) PutDynamicHostVolume(vol *cstructs.HostVolumeState) error { + return s.db.Update(func(tx *boltdd.Tx) error { + b, err := tx.CreateBucketIfNotExists(hostVolBucket) + if err != nil { + return err + } + return b.Put([]byte(vol.ID), vol) + }) +} + +func (s *BoltStateDB) GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) { + var vols []*cstructs.HostVolumeState + err := s.db.View(func(tx *boltdd.Tx) error { + b := tx.Bucket(hostVolBucket) + if b == nil { + return nil + } + return b.BoltBucket().ForEach(func(k, v []byte) error { + var vol cstructs.HostVolumeState + err := b.Get(k, &vol) + if err != nil { + return err + } + vols = append(vols, &vol) + return nil + }) + }) + if boltdd.IsErrNotFound(err) { + return nil, nil + } + return vols, err +} + +func (s *BoltStateDB) DeleteDynamicHostVolume(id string) error { + return s.db.Update(func(tx *boltdd.Tx) error { + return tx.Bucket(hostVolBucket).Delete([]byte(id)) + }) +} + // init initializes metadata entries in a newly created state database. func (s *BoltStateDB) init() error { return s.db.Update(func(tx *boltdd.Tx) error { diff --git a/client/state/db_error.go b/client/state/db_error.go index 78ef01b7850..6c99defa2ad 100644 --- a/client/state/db_error.go +++ b/client/state/db_error.go @@ -4,6 +4,7 @@ package state import ( + "errors" "fmt" arstate "github.com/hashicorp/nomad/client/allocrunner/state" @@ -16,6 +17,10 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +var _ StateDB = &ErrDB{} + +var ErrDBError = errors.New("Error!") + // ErrDB implements a StateDB that returns errors on restore methods, used for testing type ErrDB struct { // Allocs is a preset slice of allocations used in GetAllAllocations @@ -154,6 +159,16 @@ func (m *ErrDB) GetNodeRegistration() (*cstructs.NodeRegistration, error) { return nil, fmt.Errorf("Error!") } +func (m *ErrDB) PutDynamicHostVolume(_ *cstructs.HostVolumeState) error { + return ErrDBError +} +func (m *ErrDB) GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) { + return nil, ErrDBError +} +func (m *ErrDB) DeleteDynamicHostVolume(_ string) error { + return ErrDBError +} + func (m *ErrDB) Close() error { return fmt.Errorf("Error!") } diff --git a/client/state/db_mem.go b/client/state/db_mem.go index 91e6481b4c9..32abd883e77 100644 --- a/client/state/db_mem.go +++ b/client/state/db_mem.go @@ -60,6 +60,8 @@ type MemDB struct { nodeRegistration *cstructs.NodeRegistration + dynamicHostVolumes map[string]*cstructs.HostVolumeState + logger hclog.Logger mu sync.RWMutex @@ -68,15 +70,16 @@ type MemDB struct { func NewMemDB(logger hclog.Logger) *MemDB { logger = logger.Named("memdb") return &MemDB{ - allocs: make(map[string]*structs.Allocation), - deployStatus: make(map[string]*structs.AllocDeploymentStatus), - networkStatus: make(map[string]*structs.AllocNetworkStatus), - acknowledgedState: make(map[string]*arstate.State), - localTaskState: make(map[string]map[string]*state.LocalState), - taskState: make(map[string]map[string]*structs.TaskState), - checks: make(checks.ClientResults), - identities: make(map[string][]*structs.SignedWorkloadIdentity), - logger: logger, + allocs: make(map[string]*structs.Allocation), + deployStatus: make(map[string]*structs.AllocDeploymentStatus), + networkStatus: make(map[string]*structs.AllocNetworkStatus), + acknowledgedState: make(map[string]*arstate.State), + localTaskState: make(map[string]map[string]*state.LocalState), + taskState: make(map[string]map[string]*structs.TaskState), + checks: make(checks.ClientResults), + identities: make(map[string][]*structs.SignedWorkloadIdentity), + dynamicHostVolumes: make(map[string]*cstructs.HostVolumeState), + logger: logger, } } @@ -354,6 +357,28 @@ func (m *MemDB) GetNodeRegistration() (*cstructs.NodeRegistration, error) { return m.nodeRegistration, nil } +func (m *MemDB) PutDynamicHostVolume(vol *cstructs.HostVolumeState) error { + m.mu.Lock() + defer m.mu.Unlock() + m.dynamicHostVolumes[vol.ID] = vol + return nil +} +func (m *MemDB) GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) { + m.mu.Lock() + defer m.mu.Unlock() + var vols []*cstructs.HostVolumeState + for _, vol := range m.dynamicHostVolumes { + vols = append(vols, vol) + } + return vols, nil +} +func (m *MemDB) DeleteDynamicHostVolume(s string) error { + m.mu.Lock() + defer m.mu.Unlock() + delete(m.dynamicHostVolumes, s) + return nil +} + func (m *MemDB) Close() error { m.mu.Lock() defer m.mu.Unlock() diff --git a/client/state/db_noop.go b/client/state/db_noop.go index 345025a4d52..09488c181a1 100644 --- a/client/state/db_noop.go +++ b/client/state/db_noop.go @@ -14,6 +14,8 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +var _ StateDB = &NoopDB{} + // NoopDB implements a StateDB that does not persist any data. type NoopDB struct{} @@ -145,6 +147,16 @@ func (n NoopDB) GetNodeRegistration() (*cstructs.NodeRegistration, error) { return nil, nil } +func (n NoopDB) PutDynamicHostVolume(_ *cstructs.HostVolumeState) error { + return nil +} +func (n NoopDB) GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) { + return nil, nil +} +func (n NoopDB) DeleteDynamicHostVolume(_ string) error { + return nil +} + func (n NoopDB) Close() error { return nil } diff --git a/client/state/db_test.go b/client/state/db_test.go index d13431a6207..3a03cf3a2cc 100644 --- a/client/state/db_test.go +++ b/client/state/db_test.go @@ -15,6 +15,7 @@ import ( dmstate "github.com/hashicorp/nomad/client/devicemanager/state" "github.com/hashicorp/nomad/client/dynamicplugins" driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state" + cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" @@ -384,6 +385,41 @@ func TestStateDB_DynamicRegistry(t *testing.T) { }) } +// TestStateDB_HostVolumes asserts the behavior of dynamic host volume state. +func TestStateDB_HostVolumes(t *testing.T) { + ci.Parallel(t) + + testDB(t, func(t *testing.T, db StateDB) { + vols, err := db.GetDynamicHostVolumes() + must.NoError(t, err) + must.Len(t, 0, vols) + + vol := &cstructs.HostVolumeState{ + ID: "test-vol-id", + CreateReq: &cstructs.ClientHostVolumeCreateRequest{ + ID: "test-vol-id", + Name: "test-vol-name", + PluginID: "test-plugin-id", + NodeID: "test-node-id", + RequestedCapacityMinBytes: 5, + RequestedCapacityMaxBytes: 10, + Parameters: map[string]string{"test": "ing"}, + }, + } + + must.NoError(t, db.PutDynamicHostVolume(vol)) + vols, err = db.GetDynamicHostVolumes() + must.NoError(t, err) + must.Len(t, 1, vols) + must.Eq(t, vol, vols[0]) + + must.NoError(t, db.DeleteDynamicHostVolume(vol.ID)) + vols, err = db.GetDynamicHostVolumes() + must.NoError(t, err) + must.Len(t, 0, vols) + }) +} + func TestStateDB_CheckResult_keyForCheck(t *testing.T) { ci.Parallel(t) diff --git a/client/state/interface.go b/client/state/interface.go index a9cd4845038..0460a75e20f 100644 --- a/client/state/interface.go +++ b/client/state/interface.go @@ -137,6 +137,10 @@ type StateDB interface { PutNodeRegistration(*cstructs.NodeRegistration) error GetNodeRegistration() (*cstructs.NodeRegistration, error) + PutDynamicHostVolume(*cstructs.HostVolumeState) error + GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) + DeleteDynamicHostVolume(string) error + // Close the database. Unsafe for further use after calling regardless // of return value. Close() error diff --git a/client/structs/host_volumes.go b/client/structs/host_volumes.go new file mode 100644 index 00000000000..bff543588f4 --- /dev/null +++ b/client/structs/host_volumes.go @@ -0,0 +1,79 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package structs + +type HostVolumeState struct { + ID string + CreateReq *ClientHostVolumeCreateRequest +} + +type ClientHostVolumeCreateRequest struct { + // ID is a UUID-like string generated by the server. + ID string + + // Name is the name that group.volume will use to identify the volume + // source. Not expected to be unique. + Name string + + // PluginID is the name of the host volume plugin on the client that will be + // used for creating the volume. If omitted, the client will use its default + // built-in plugin. + PluginID string + + // NodeID is the node where the volume is placed. It's included in the + // client RPC request so that the server can route the request to the + // correct node. + NodeID string + + // Because storage may allow only specific intervals of size, we accept a + // min and max and return the actual capacity when the volume is created or + // updated on the client + RequestedCapacityMinBytes int64 + RequestedCapacityMaxBytes int64 + + // Parameters are an opaque map of parameters for the host volume plugin. + Parameters map[string]string +} + +type ClientHostVolumeCreateResponse struct { + VolumeName string + VolumeID string + + // Path is the host path where the volume's mount point was created. We send + // this back to the server to make debugging easier. + HostPath string + + // Capacity is the size in bytes that was actually provisioned by the host + // volume plugin. + CapacityBytes int64 +} + +type ClientHostVolumeDeleteRequest struct { + // ID is a UUID-like string generated by the server. + ID string + + Name string + + // PluginID is the name of the host volume plugin on the client that will be + // used for deleting the volume. If omitted, the client will use its default + // built-in plugin. + PluginID string + + // NodeID is the node where the volume is placed. It's included in the + // client RPC request so that the server can route the request to the + // correct node. + NodeID string + + // Path is the host path where the volume's mount point was created. We send + // this from the server to allow verification by plugins + HostPath string + + // Parameters are an opaque map of parameters for the host volume plugin. + Parameters map[string]string +} + +type ClientHostVolumeDeleteResponse struct { + VolumeName string + VolumeID string +} diff --git a/command/agent/agent.go b/command/agent/agent.go index 6095e2dce99..40d467e23d5 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -724,6 +724,7 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { if agentConfig.DataDir != "" { conf.StateDir = filepath.Join(agentConfig.DataDir, "client") conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc") + conf.HostVolumePluginDir = filepath.Join(agentConfig.DataDir, "host_volume_plugins") dataParent := filepath.Dir(agentConfig.DataDir) conf.AllocMountsDir = filepath.Join(dataParent, "alloc_mounts") } @@ -736,6 +737,9 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { if agentConfig.Client.AllocMountsDir != "" { conf.AllocMountsDir = agentConfig.Client.AllocMountsDir } + if agentConfig.Client.HostVolumePluginDir != "" { + conf.HostVolumePluginDir = agentConfig.Client.HostVolumePluginDir + } if agentConfig.Client.NetworkInterface != "" { conf.NetworkInterface = agentConfig.Client.NetworkInterface } diff --git a/command/agent/command.go b/command/agent/command.go index 088bf9a819c..8184a067ac1 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -111,6 +111,7 @@ func (c *Command) readConfig() *Config { flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "") flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "") flags.StringVar(&cmdConfig.Client.AllocMountsDir, "alloc-mounts-dir", "", "") + flags.StringVar(&cmdConfig.Client.HostVolumePluginDir, "host-volume-plugin-dir", "", "") flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "") flags.StringVar(&cmdConfig.Client.NodePool, "node-pool", "", "") flags.StringVar(&servers, "servers", "", "") @@ -384,11 +385,12 @@ func (c *Command) IsValidConfig(config, cmdConfig *Config) bool { // Verify the paths are absolute. dirs := map[string]string{ - "data-dir": config.DataDir, - "plugin-dir": config.PluginDir, - "alloc-dir": config.Client.AllocDir, - "alloc-mounts-dir": config.Client.AllocMountsDir, - "state-dir": config.Client.StateDir, + "data-dir": config.DataDir, + "plugin-dir": config.PluginDir, + "alloc-dir": config.Client.AllocDir, + "alloc-mounts-dir": config.Client.AllocMountsDir, + "host-volume-plugin-dir": config.Client.HostVolumePluginDir, + "state-dir": config.Client.StateDir, } for k, dir := range dirs { if dir == "" { @@ -735,6 +737,7 @@ func (c *Command) AutocompleteFlags() complete.Flags { "-region": complete.PredictAnything, "-data-dir": complete.PredictDirs("*"), "-plugin-dir": complete.PredictDirs("*"), + "-host-volume-plugin-dir": complete.PredictDirs("*"), "-dc": complete.PredictAnything, "-log-level": complete.PredictAnything, "-json-logs": complete.PredictNothing, @@ -1568,6 +1571,10 @@ Client Options: The default speed for network interfaces in MBits if the link speed can not be determined dynamically. + -host-volume-plugin-dir + Directory containing dynamic host volume plugins. The default is + /host_volume_plugins. + ACL Options: -acl-enabled diff --git a/command/agent/config.go b/command/agent/config.go index 4f8e41f02c8..acfb9bc6344 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -229,6 +229,10 @@ type ClientConfig struct { // AllocMountsDir is the directory for storing mounts into allocation data AllocMountsDir string `hcl:"alloc_mounts_dir"` + // HostVolumePluginDir directory contains dynamic host volume plugins + // db TODO(1.10.0): document default directory is alongside alloc_mounts + HostVolumePluginDir string `hcl:"host_volume_plugin_dir"` + // Servers is a list of known server addresses. These are as "host:port" Servers []string `hcl:"servers"` @@ -2316,6 +2320,9 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { if b.AllocMountsDir != "" { result.AllocMountsDir = b.AllocMountsDir } + if b.HostVolumePluginDir != "" { + result.HostVolumePluginDir = b.HostVolumePluginDir + } if b.NodeClass != "" { result.NodeClass = b.NodeClass } diff --git a/command/agent/csi_endpoint.go b/command/agent/csi_endpoint.go index 2f57aef8865..325ce5bb6d0 100644 --- a/command/agent/csi_endpoint.go +++ b/command/agent/csi_endpoint.go @@ -20,22 +20,12 @@ func (s *HTTPServer) CSIVolumesRequest(resp http.ResponseWriter, req *http.Reque return nil, CodedError(405, ErrInvalidMethod) } - // Type filters volume lists to a specific type. When support for non-CSI volumes is - // introduced, we'll need to dispatch here - query := req.URL.Query() - qtype, ok := query["type"] - if !ok { - return []*structs.CSIVolListStub{}, nil - } - if qtype[0] != "csi" { - return nil, nil - } - args := structs.CSIVolumeListRequest{} if s.parse(resp, req, &args.Region, &args.QueryOptions) { return nil, nil } + query := req.URL.Query() args.Prefix = query.Get("prefix") args.PluginID = query.Get("plugin_id") args.NodeID = query.Get("node_id") diff --git a/command/agent/host_volume_endpoint.go b/command/agent/host_volume_endpoint.go new file mode 100644 index 00000000000..db12cca929f --- /dev/null +++ b/command/agent/host_volume_endpoint.go @@ -0,0 +1,143 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package agent + +import ( + "net/http" + "strings" + + "github.com/hashicorp/nomad/nomad/structs" +) + +func (s *HTTPServer) HostVolumesListRequest(resp http.ResponseWriter, req *http.Request) (any, error) { + args := structs.HostVolumeListRequest{} + if s.parse(resp, req, &args.Region, &args.QueryOptions) { + return nil, nil + } + + query := req.URL.Query() + args.Prefix = query.Get("prefix") + args.NodePool = query.Get("node_pool") + args.NodeID = query.Get("node_id") + + var out structs.HostVolumeListResponse + if err := s.agent.RPC("HostVolume.List", &args, &out); err != nil { + return nil, err + } + + setMeta(resp, &out.QueryMeta) + return out.Volumes, nil +} + +// HostVolumeSpecificRequest dispatches GET and PUT +func (s *HTTPServer) HostVolumeSpecificRequest(resp http.ResponseWriter, req *http.Request) (any, error) { + // Tokenize the suffix of the path to get the volume id, tolerating a + // present or missing trailing slash + reqSuffix := strings.TrimPrefix(req.URL.Path, "/v1/volume/host/") + tokens := strings.FieldsFunc(reqSuffix, func(c rune) bool { return c == '/' }) + + if len(tokens) == 0 { + return nil, CodedError(404, resourceNotFoundErr) + } + + switch req.Method { + + // PUT /v1/volume/host/create + // POST /v1/volume/host/create + // PUT /v1/volume/host/register + // POST /v1/volume/host/register + case http.MethodPut, http.MethodPost: + switch tokens[0] { + case "create", "": + return s.hostVolumeCreate(resp, req) + case "register": + return s.hostVolumeRegister(resp, req) + default: + return nil, CodedError(404, resourceNotFoundErr) + } + + // DELETE /v1/volume/host/:id + case http.MethodDelete: + return s.hostVolumeDelete(tokens[0], resp, req) + + // GET /v1/volume/host/:id + case http.MethodGet: + return s.hostVolumeGet(tokens[0], resp, req) + } + + return nil, CodedError(404, resourceNotFoundErr) +} + +func (s *HTTPServer) hostVolumeGet(id string, resp http.ResponseWriter, req *http.Request) (any, error) { + args := structs.HostVolumeGetRequest{ + ID: id, + } + if s.parse(resp, req, &args.Region, &args.QueryOptions) { + return nil, nil + } + + var out structs.HostVolumeGetResponse + if err := s.agent.RPC("HostVolume.Get", &args, &out); err != nil { + return nil, err + } + + setMeta(resp, &out.QueryMeta) + if out.Volume == nil { + return nil, CodedError(404, "volume not found") + } + + return out.Volume, nil +} + +func (s *HTTPServer) hostVolumeRegister(resp http.ResponseWriter, req *http.Request) (any, error) { + + args := structs.HostVolumeRegisterRequest{} + if err := decodeBody(req, &args); err != nil { + return err, CodedError(400, err.Error()) + } + s.parseWriteRequest(req, &args.WriteRequest) + + var out structs.HostVolumeRegisterResponse + if err := s.agent.RPC("HostVolume.Register", &args, &out); err != nil { + return nil, err + } + + setIndex(resp, out.Index) + + return &out, nil +} + +func (s *HTTPServer) hostVolumeCreate(resp http.ResponseWriter, req *http.Request) (any, error) { + + args := structs.HostVolumeCreateRequest{} + if err := decodeBody(req, &args); err != nil { + return err, CodedError(400, err.Error()) + } + s.parseWriteRequest(req, &args.WriteRequest) + + var out structs.HostVolumeCreateResponse + if err := s.agent.RPC("HostVolume.Create", &args, &out); err != nil { + return nil, err + } + + setIndex(resp, out.Index) + + return &out, nil +} + +func (s *HTTPServer) hostVolumeDelete(id string, resp http.ResponseWriter, req *http.Request) (any, error) { + // HTTP API only supports deleting a single ID because of compatibility with + // the existing HTTP routes for CSI + args := structs.HostVolumeDeleteRequest{VolumeID: id} + s.parseWriteRequest(req, &args.WriteRequest) + + var out structs.HostVolumeDeleteResponse + if err := s.agent.RPC("HostVolume.Delete", &args, &out); err != nil { + return nil, err + } + + setIndex(resp, out.Index) + + return nil, nil +} diff --git a/command/agent/host_volume_endpoint_test.go b/command/agent/host_volume_endpoint_test.go new file mode 100644 index 00000000000..ddff7a33fbb --- /dev/null +++ b/command/agent/host_volume_endpoint_test.go @@ -0,0 +1,103 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package agent + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func TestHostVolumeEndpoint_CRUD(t *testing.T) { + httpTest(t, nil, func(s *TestAgent) { + + // Create a volume on the test node + + vol := mock.HostVolumeRequest(structs.DefaultNamespace) + vol.NodePool = "" + vol.Constraints = nil + reqBody := struct { + Volume *structs.HostVolume + }{Volume: vol} + buf := encodeReq(reqBody) + req, err := http.NewRequest(http.MethodPut, "/v1/volume/host/create", buf) + must.NoError(t, err) + respW := httptest.NewRecorder() + + // Make the request and verify we got a valid volume back + + obj, err := s.Server.HostVolumeSpecificRequest(respW, req) + must.NoError(t, err) + must.NotNil(t, obj) + resp := obj.(*structs.HostVolumeCreateResponse) + must.NotNil(t, resp.Volume) + must.Eq(t, vol.Name, resp.Volume.Name) + must.Eq(t, s.client.NodeID(), resp.Volume.NodeID) + must.NotEq(t, "", respW.Result().Header.Get("X-Nomad-Index")) + + volID := resp.Volume.ID + + // Verify volume was created + + path, err := url.JoinPath("/v1/volume/host/", volID) + must.NoError(t, err) + req, err = http.NewRequest(http.MethodGet, path, nil) + must.NoError(t, err) + obj, err = s.Server.HostVolumeSpecificRequest(respW, req) + must.NoError(t, err) + must.NotNil(t, obj) + respVol := obj.(*structs.HostVolume) + must.Eq(t, s.client.NodeID(), respVol.NodeID) + + // Update the volume (note: this doesn't update the volume on the client) + + vol = respVol.Copy() + vol.Parameters = map[string]string{"bar": "foo"} // swaps key and value + reqBody = struct { + Volume *structs.HostVolume + }{Volume: vol} + buf = encodeReq(reqBody) + req, err = http.NewRequest(http.MethodPut, "/v1/volume/host/register", buf) + must.NoError(t, err) + obj, err = s.Server.HostVolumeSpecificRequest(respW, req) + must.NoError(t, err) + must.NotNil(t, obj) + regResp := obj.(*structs.HostVolumeRegisterResponse) + must.NotNil(t, regResp.Volume) + must.Eq(t, map[string]string{"bar": "foo"}, regResp.Volume.Parameters) + + // Verify volume was updated + + path = fmt.Sprintf("/v1/volumes?type=host&node_id=%s", s.client.NodeID()) + req, err = http.NewRequest(http.MethodGet, path, nil) + must.NoError(t, err) + obj, err = s.Server.HostVolumesListRequest(respW, req) + must.NoError(t, err) + vols := obj.([]*structs.HostVolumeStub) + must.Len(t, 1, vols) + + // Delete the volume + + req, err = http.NewRequest(http.MethodDelete, fmt.Sprintf("/v1/volume/host/%s", volID), nil) + must.NoError(t, err) + _, err = s.Server.HostVolumeSpecificRequest(respW, req) + must.NoError(t, err) + + // Verify volume was deleted + + path, err = url.JoinPath("/v1/volume/host/", volID) + must.NoError(t, err) + req, err = http.NewRequest(http.MethodGet, path, nil) + must.NoError(t, err) + obj, err = s.Server.HostVolumeSpecificRequest(respW, req) + must.EqError(t, err, "volume not found") + must.Nil(t, obj) + }) +} diff --git a/command/agent/http.go b/command/agent/http.go index 3f4db49d65c..6d47e4e78dd 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -404,12 +404,14 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) { s.mux.HandleFunc("/v1/deployments", s.wrap(s.DeploymentsRequest)) s.mux.HandleFunc("/v1/deployment/", s.wrap(s.DeploymentSpecificRequest)) + s.mux.HandleFunc("GET /v1/volumes", s.wrap(s.ListVolumesRequest)) s.mux.HandleFunc("/v1/volumes", s.wrap(s.CSIVolumesRequest)) s.mux.HandleFunc("/v1/volumes/external", s.wrap(s.CSIExternalVolumesRequest)) s.mux.HandleFunc("/v1/volumes/snapshot", s.wrap(s.CSISnapshotsRequest)) s.mux.HandleFunc("/v1/volume/csi/", s.wrap(s.CSIVolumeSpecificRequest)) s.mux.HandleFunc("/v1/plugins", s.wrap(s.CSIPluginsRequest)) s.mux.HandleFunc("/v1/plugin/csi/", s.wrap(s.CSIPluginSpecificRequest)) + s.mux.HandleFunc("/v1/volume/host/", s.wrap(s.HostVolumeSpecificRequest)) s.mux.HandleFunc("/v1/acl/policies", s.wrap(s.ACLPoliciesRequest)) s.mux.HandleFunc("/v1/acl/policy/", s.wrap(s.ACLPolicySpecificRequest)) diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index fd0c0c03501..5ebe21e1eb3 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1334,6 +1334,7 @@ func ApiTgToStructsTG(job *structs.Job, taskGroup *api.TaskGroup, tg *structs.Ta Name: v.Name, Type: v.Type, ReadOnly: v.ReadOnly, + Sticky: v.Sticky, Source: v.Source, AttachmentMode: structs.CSIVolumeAttachmentMode(v.AttachmentMode), AccessMode: structs.CSIVolumeAccessMode(v.AccessMode), diff --git a/command/agent/volumes_endpoint.go b/command/agent/volumes_endpoint.go new file mode 100644 index 00000000000..3ee84eceb7f --- /dev/null +++ b/command/agent/volumes_endpoint.go @@ -0,0 +1,27 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package agent + +import ( + "net/http" + + "github.com/hashicorp/nomad/nomad/structs" +) + +// ListVolumesRequest dispatches requests for listing volumes to a specific type. +func (s *HTTPServer) ListVolumesRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + query := req.URL.Query() + qtype, ok := query["type"] + if !ok { + return []*structs.CSIVolListStub{}, nil + } + switch qtype[0] { + case "host": + return s.HostVolumesListRequest(resp, req) + case "csi": + return s.CSIVolumesRequest(resp, req) + default: + return nil, CodedError(404, resourceNotFoundErr) + } +} diff --git a/command/asset/asset.go b/command/asset/asset.go index b6c1d9112f6..3570cb78b05 100644 --- a/command/asset/asset.go +++ b/command/asset/asset.go @@ -22,3 +22,15 @@ var NodePoolSpec []byte //go:embed pool.nomad.json var NodePoolSpecJSON []byte + +//go:embed volume.csi.hcl +var CSIVolumeSpecHCL []byte + +//go:embed volume.csi.json +var CSIVolumeSpecJSON []byte + +//go:embed volume.host.hcl +var HostVolumeSpecHCL []byte + +//go:embed volume.host.json +var HostVolumeSpecJSON []byte diff --git a/command/asset/volume.csi.hcl b/command/asset/volume.csi.hcl new file mode 100644 index 00000000000..998edadeff9 --- /dev/null +++ b/command/asset/volume.csi.hcl @@ -0,0 +1,70 @@ +id = "ebs_prod_db1" +namespace = "default" +name = "database" +type = "csi" +plugin_id = "plugin_id" + +# For 'nomad volume register', provide the external ID from the storage +# provider. This field should be omitted when creating a volume with +# 'nomad volume create' +external_id = "vol-23452345" + +# For 'nomad volume create', specify a snapshot ID or volume to clone. You can +# specify only one of these two fields. +snapshot_id = "snap-12345" +# clone_id = "vol-abcdef" + +# Optional: for 'nomad volume create', specify a maximum and minimum capacity. +# Registering an existing volume will record but ignore these fields. +capacity_min = "10GiB" +capacity_max = "20G" + +# Required (at least one): for 'nomad volume create', specify one or more +# capabilities to validate. Registering an existing volume will record but +# ignore these fields. +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-reader" + attachment_mode = "block-device" +} + +# Optional: for 'nomad volume create', specify mount options to validate for +# 'attachment_mode = "file-system". Registering an existing volume will record +# but ignore these fields. +mount_options { + fs_type = "ext4" + mount_flags = ["ro"] +} + +# Optional: specify one or more locations where the volume must be accessible +# from. Refer to the plugin documentation for what segment values are supported. +topology_request { + preferred { + topology { segments { rack = "R1" } } + } + required { + topology { segments { rack = "R1" } } + topology { segments { rack = "R2", zone = "us-east-1a" } } + } +} + +# Optional: provide any secrets specified by the plugin. +secrets { + example_secret = "xyzzy" +} + +# Optional: provide a map of keys to string values expected by the plugin. +parameters { + skuname = "Premium_LRS" +} + +# Optional: for 'nomad volume register', provide a map of keys to string +# values expected by the plugin. This field will populated automatically by +# 'nomad volume create'. +context { + endpoint = "http://192.168.1.101:9425" +} diff --git a/command/asset/volume.csi.json b/command/asset/volume.csi.json new file mode 100644 index 00000000000..722433ed60e --- /dev/null +++ b/command/asset/volume.csi.json @@ -0,0 +1,72 @@ +{ + "id": "ebs_prod_db1", + "namespace": "default", + "name": "database", + "type": "csi", + "plugin_id": "plugin_id", + "external_id": "vol-23452345", + "snapshot_id": "snap-12345", + "capacity_min": "10GiB", + "capacity_max": "20G", + "capability": [ + { + "access_mode": "single-node-writer", + "attachment_mode": "file-system" + }, + { + "access_mode": "single-node-reader", + "attachment_mode": "block-device" + } + ], + "context": [ + { + "endpoint": "http://192.168.1.101:9425" + } + ], + "mount_options": [ + { + "fs_type": "ext4", + "mount_flags": [ + "ro" + ] + } + ], + "topology_request": { + "preferred": [ + { + "topology": { + "segments": { + "rack": "R1" + } + } + } + ], + "required": [ + { + "topology": { + "segments": { + "rack": "R1" + } + } + }, + { + "topology": { + "segments": { + "rack": "R2", + "zone": "us-east-1a" + } + } + } + ] + }, + "parameters": [ + { + "skuname": "Premium_LRS" + } + ], + "secrets": [ + { + "example_secret": "xyzzy" + } + ] +} diff --git a/command/asset/volume.host.hcl b/command/asset/volume.host.hcl new file mode 100644 index 00000000000..3447eef998f --- /dev/null +++ b/command/asset/volume.host.hcl @@ -0,0 +1,28 @@ +id = "disk_prod_db1" +namespace = "default" +name = "database" +type = "host" +plugin_id = "plugin_id" + +# Optional: for 'nomad volume create', specify a maximum and minimum capacity. +# Registering an existing volume will record but ignore these fields. +capacity_min = "10GiB" +capacity_max = "20G" + +# Required (at least one): for 'nomad volume create', specify one or more +# capabilities to validate. Registering an existing volume will record but +# ignore these fields. +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-reader" + attachment_mode = "block-device" +} + +# Optional: provide a map of keys to string values expected by the plugin. +parameters { + skuname = "Premium_LRS" +} diff --git a/command/asset/volume.host.json b/command/asset/volume.host.json new file mode 100644 index 00000000000..59a4f4ef0ec --- /dev/null +++ b/command/asset/volume.host.json @@ -0,0 +1,24 @@ +{ + "id": "disk_prod_db1", + "namespace": "default", + "name": "database", + "type": "host", + "plugin_id": "plugin_id", + "capacity_min": "10GiB", + "capacity_max": "20G", + "capability": [ + { + "access_mode": "single-node-writer", + "attachment_mode": "file-system" + }, + { + "access_mode": "single-node-reader", + "attachment_mode": "block-device" + } + ], + "parameters": [ + { + "skuname": "Premium_LRS" + } + ] +} diff --git a/command/node_status.go b/command/node_status.go index f7f7b587802..9538e90622b 100644 --- a/command/node_status.go +++ b/command/node_status.go @@ -13,6 +13,7 @@ import ( "time" humanize "github.com/dustin/go-humanize" + "github.com/hashicorp/go-set/v3" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/api/contexts" "github.com/hashicorp/nomad/helper/pointer" @@ -129,8 +130,12 @@ func (c *NodeStatusCommand) AutocompleteFlags() complete.Flags { } func (c *NodeStatusCommand) AutocompleteArgs() complete.Predictor { + return nodePredictor(c.Client, nil) +} + +func nodePredictor(factory ApiClientFactory, filter *set.Set[string]) complete.Predictor { return complete.PredictFunc(func(a complete.Args) []string { - client, err := c.Meta.Client() + client, err := factory() if err != nil { return nil } diff --git a/command/plugin_status.go b/command/plugin_status.go index 02c61c65a9a..92dbdc7f26b 100644 --- a/command/plugin_status.go +++ b/command/plugin_status.go @@ -58,21 +58,10 @@ func (c *PluginStatusCommand) Synopsis() string { return "Display status information about a plugin" } -// predictVolumeType is also used in volume_status -var predictVolumeType = complete.PredictFunc(func(a complete.Args) []string { - types := []string{"csi"} - for _, t := range types { - if strings.Contains(t, a.Last) { - return []string{t} - } - } - return nil -}) - func (c *PluginStatusCommand) AutocompleteFlags() complete.Flags { return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), complete.Flags{ - "-type": predictVolumeType, + "-type": complete.PredictSet("csi"), "-short": complete.PredictNothing, "-verbose": complete.PredictNothing, "-json": complete.PredictNothing, diff --git a/command/sentinel_apply.go b/command/sentinel_apply.go index 7d43c0e6c88..7db40022bd7 100644 --- a/command/sentinel_apply.go +++ b/command/sentinel_apply.go @@ -37,8 +37,9 @@ Apply Options: -description Sets a human readable description for the policy. - -scope (default: submit-job) - Sets the scope of the policy and when it should be enforced. + -scope + Sets the scope of the policy and when it should be enforced. One of + "submit-job" or "submit-host-volume". -level (default: advisory) Sets the enforcement level of the policy. Must be one of advisory, @@ -73,7 +74,7 @@ func (c *SentinelApplyCommand) Run(args []string) int { flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.StringVar(&description, "description", "", "") - flags.StringVar(&scope, "scope", "submit-job", "") + flags.StringVar(&scope, "scope", "", "") flags.StringVar(&enfLevel, "level", "advisory", "") if err := flags.Parse(args); err != nil { return 1 @@ -107,6 +108,16 @@ func (c *SentinelApplyCommand) Run(args []string) int { } } + switch scope { + case api.SentinelScopeSubmitJob, api.SentinelScopeSubmitHostVolume: + case "": + c.Ui.Error("-scope flag is required") + return 1 + default: + c.Ui.Error(fmt.Sprintf("Error: invalid -scope value: %q", scope)) + return 1 + } + // Construct the policy sp := &api.SentinelPolicy{ Name: policyName, diff --git a/command/volume_create.go b/command/volume_create.go index c7d32fbe808..5a4254e52b0 100644 --- a/command/volume_create.go +++ b/command/volume_create.go @@ -25,18 +25,42 @@ Usage: nomad volume create [options] If the supplied path is "-" the volume file is read from stdin. Otherwise, it is read from the file at the supplied path. - When ACLs are enabled, this command requires a token with the - 'csi-write-volume' capability for the volume's namespace. + When ACLs are enabled, this command requires a token with the appropriate + capability in the volume's namespace: the 'csi-write-volume' capability for + CSI volumes or 'host-volume-create' for dynamic host volumes. General Options: - ` + generalOptionsUsage(usageOptsDefault) + ` + generalOptionsUsage(usageOptsDefault) + ` + +Create Options: + + -detach + Return immediately instead of entering monitor mode for dynamic host + volumes. After creating a volume, the volume ID will be printed to the + screen, which can be used to examine the volume using the volume status + command. If -detach is omitted or false, the command will monitor the state + of the volume until it is ready to be scheduled. + + -verbose + Display full information when monitoring volume state. Used for dynamic host + volumes only. + + -policy-override + Sets the flag to force override any soft mandatory Sentinel policies. Used + for dynamic host volumes only. +` return strings.TrimSpace(helpText) } func (c *VolumeCreateCommand) AutocompleteFlags() complete.Flags { - return c.Meta.AutocompleteFlags(FlagSetClient) + return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), + complete.Flags{ + "-detach": complete.PredictNothing, + "-verbose": complete.PredictNothing, + "-policy-override": complete.PredictNothing, + }) } func (c *VolumeCreateCommand) AutocompleteArgs() complete.Predictor { @@ -50,7 +74,11 @@ func (c *VolumeCreateCommand) Synopsis() string { func (c *VolumeCreateCommand) Name() string { return "volume create" } func (c *VolumeCreateCommand) Run(args []string) int { + var detach, verbose, override bool flags := c.Meta.FlagSet(c.Name(), FlagSetClient) + flags.BoolVar(&detach, "detach", false, "detach from monitor") + flags.BoolVar(&verbose, "verbose", false, "display full volume IDs") + flags.BoolVar(&override, "policy-override", false, "override soft mandatory Sentinel policies") flags.Usage = func() { c.Ui.Output(c.Help()) } if err := flags.Parse(args); err != nil { @@ -99,8 +127,9 @@ func (c *VolumeCreateCommand) Run(args []string) int { switch strings.ToLower(volType) { case "csi": - code := c.csiCreate(client, ast) - return code + return c.csiCreate(client, ast) + case "host": + return c.hostVolumeCreate(client, ast, detach, verbose, override) default: c.Ui.Error(fmt.Sprintf("Error unknown volume type: %s", volType)) return 1 diff --git a/command/volume_create_host.go b/command/volume_create_host.go new file mode 100644 index 00000000000..dc0d1e1aef2 --- /dev/null +++ b/command/volume_create_host.go @@ -0,0 +1,375 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "context" + "fmt" + "strconv" + "time" + + "github.com/hashicorp/hcl" + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/helper" + "github.com/mitchellh/go-glint" + "github.com/mitchellh/go-glint/components" + "github.com/mitchellh/mapstructure" +) + +func (c *VolumeCreateCommand) hostVolumeCreate( + client *api.Client, ast *ast.File, detach, verbose, override bool) int { + + vol, err := decodeHostVolume(ast) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error decoding the volume definition: %s", err)) + return 1 + } + + req := &api.HostVolumeCreateRequest{ + Volume: vol, + PolicyOverride: override, + } + resp, _, err := client.HostVolumes().Create(req, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error creating volume: %s", err)) + return 1 + } + vol = resp.Volume + + if resp.Warnings != "" { + c.Ui.Output( + c.Colorize().Color( + fmt.Sprintf("[bold][yellow]Volume Warnings:\n%s[reset]\n", resp.Warnings))) + } + + var volID string + var lastIndex uint64 + + if detach || vol.State == api.HostVolumeStateReady { + c.Ui.Output(fmt.Sprintf( + "Created host volume %s with ID %s", vol.Name, vol.ID)) + return 0 + } else { + c.Ui.Output(fmt.Sprintf( + "==> Created host volume %s with ID %s", vol.Name, vol.ID)) + volID = vol.ID + lastIndex = vol.ModifyIndex + } + + if vol.Namespace != "" { + client.SetNamespace(vol.Namespace) + } + + err = c.monitorHostVolume(client, volID, lastIndex, verbose) + if err != nil { + c.Ui.Error(fmt.Sprintf("==> %s: %v", formatTime(time.Now()), err.Error())) + return 1 + } + return 0 +} + +func (c *VolumeCreateCommand) monitorHostVolume(client *api.Client, id string, lastIndex uint64, verbose bool) error { + length := shortId + if verbose { + length = fullId + } + + opts := formatOpts{ + verbose: verbose, + short: !verbose, + length: length, + } + + if isStdoutTerminal() { + return c.ttyMonitor(client, id, lastIndex, opts) + } else { + return c.nottyMonitor(client, id, lastIndex, opts) + } +} + +func (c *VolumeCreateCommand) ttyMonitor(client *api.Client, id string, lastIndex uint64, opts formatOpts) error { + + gUi := glint.New() + spinner := glint.Layout( + components.Spinner(), + glint.Text(fmt.Sprintf(" Monitoring volume %q in progress...", limit(id, opts.length))), + ).Row().MarginLeft(2) + refreshRate := 100 * time.Millisecond + + gUi.SetRefreshRate(refreshRate) + gUi.Set(spinner) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go gUi.Render(ctx) + + qOpts := &api.QueryOptions{ + AllowStale: true, + WaitIndex: lastIndex, + WaitTime: time.Second * 5, + } + + var statusComponent *glint.LayoutComponent + var endSpinner *glint.LayoutComponent + +DONE: + for { + vol, meta, err := client.HostVolumes().Get(id, qOpts) + if err != nil { + return err + } + str, err := formatHostVolume(vol, opts) + if err != nil { + // should never happen b/c we don't pass json/template via opts here + return err + } + statusComponent = glint.Layout( + glint.Text(""), + glint.Text(formatTime(time.Now())), + glint.Text(c.Colorize().Color(str)), + ).MarginLeft(4) + + statusComponent = glint.Layout(statusComponent) + gUi.Set(spinner, statusComponent) + + endSpinner = glint.Layout( + components.Spinner(), + glint.Text(fmt.Sprintf(" Host volume %q %s", limit(id, opts.length), vol.State)), + ).Row().MarginLeft(2) + + switch vol.State { + case api.HostVolumeStateReady: + endSpinner = glint.Layout( + glint.Text(fmt.Sprintf("✓ Host volume %q %s", limit(id, opts.length), vol.State)), + ).Row().MarginLeft(2) + break DONE + + case api.HostVolumeStateDeleted: + endSpinner = glint.Layout( + glint.Text(fmt.Sprintf("! Host volume %q %s", limit(id, opts.length), vol.State)), + ).Row().MarginLeft(2) + break DONE + + default: + qOpts.WaitIndex = meta.LastIndex + continue + } + + } + + // Render one final time with completion message + gUi.Set(endSpinner, statusComponent, glint.Text("")) + gUi.RenderFrame() + return nil +} + +func (c *VolumeCreateCommand) nottyMonitor(client *api.Client, id string, lastIndex uint64, opts formatOpts) error { + + c.Ui.Info(fmt.Sprintf("==> %s: Monitoring volume %q...", + formatTime(time.Now()), limit(id, opts.length))) + + for { + vol, _, err := client.HostVolumes().Get(id, &api.QueryOptions{ + WaitIndex: lastIndex, + WaitTime: time.Second * 5, + }) + if err != nil { + return err + } + if vol.State == api.HostVolumeStateReady { + c.Ui.Info(fmt.Sprintf("==> %s: Volume %q ready", + formatTime(time.Now()), limit(vol.Name, opts.length))) + return nil + } + } +} + +func decodeHostVolume(input *ast.File) (*api.HostVolume, error) { + var err error + vol := &api.HostVolume{} + + list, ok := input.Node.(*ast.ObjectList) + if !ok { + return nil, fmt.Errorf("error parsing: root should be an object") + } + + // Decode the full thing into a map[string]interface for ease + var m map[string]any + err = hcl.DecodeObject(&m, list) + if err != nil { + return nil, err + } + + // Need to manually parse these fields/blocks + delete(m, "capability") + delete(m, "constraint") + delete(m, "capacity_max") + delete(m, "capacity_min") + delete(m, "type") + + // Decode the rest + err = mapstructure.WeakDecode(m, vol) + if err != nil { + return nil, err + } + + capacityMin, err := parseCapacityBytes(list.Filter("capacity_min")) + if err != nil { + return nil, fmt.Errorf("invalid capacity_min: %v", err) + } + vol.RequestedCapacityMinBytes = capacityMin + capacityMax, err := parseCapacityBytes(list.Filter("capacity_max")) + if err != nil { + return nil, fmt.Errorf("invalid capacity_max: %v", err) + } + vol.RequestedCapacityMaxBytes = capacityMax + + if o := list.Filter("constraint"); len(o.Items) > 0 { + if err := parseConstraints(&vol.Constraints, o); err != nil { + return nil, fmt.Errorf("invalid constraint: %v", err) + } + } + if o := list.Filter("capability"); len(o.Items) > 0 { + if err := parseHostVolumeCapabilities(&vol.RequestedCapabilities, o); err != nil { + return nil, fmt.Errorf("invalid capability: %v", err) + } + } + + return vol, nil +} + +func parseHostVolumeCapabilities(result *[]*api.HostVolumeCapability, list *ast.ObjectList) error { + for _, o := range list.Elem().Items { + valid := []string{"access_mode", "attachment_mode"} + if err := helper.CheckHCLKeys(o.Val, valid); err != nil { + return err + } + + ot, ok := o.Val.(*ast.ObjectType) + if !ok { + break + } + + var m map[string]any + if err := hcl.DecodeObject(&m, ot.List); err != nil { + return err + } + var cap *api.HostVolumeCapability + if err := mapstructure.WeakDecode(&m, &cap); err != nil { + return err + } + + *result = append(*result, cap) + } + + return nil +} + +func parseConstraints(result *[]*api.Constraint, list *ast.ObjectList) error { + for _, o := range list.Elem().Items { + valid := []string{ + "attribute", + "distinct_hosts", + "distinct_property", + "operator", + "regexp", + "set_contains", + "value", + "version", + "semver", + } + if err := helper.CheckHCLKeys(o.Val, valid); err != nil { + return err + } + + var m map[string]any + if err := hcl.DecodeObject(&m, o.Val); err != nil { + return err + } + + m["LTarget"] = m["attribute"] + m["RTarget"] = m["value"] + m["Operand"] = m["operator"] + + // If "version" is provided, set the operand + // to "version" and the value to the "RTarget" + if constraint, ok := m[api.ConstraintVersion]; ok { + m["Operand"] = api.ConstraintVersion + m["RTarget"] = constraint + } + + // If "semver" is provided, set the operand + // to "semver" and the value to the "RTarget" + if constraint, ok := m[api.ConstraintSemver]; ok { + m["Operand"] = api.ConstraintSemver + m["RTarget"] = constraint + } + + // If "regexp" is provided, set the operand + // to "regexp" and the value to the "RTarget" + if constraint, ok := m[api.ConstraintRegex]; ok { + m["Operand"] = api.ConstraintRegex + m["RTarget"] = constraint + } + + // If "set_contains" is provided, set the operand + // to "set_contains" and the value to the "RTarget" + if constraint, ok := m[api.ConstraintSetContains]; ok { + m["Operand"] = api.ConstraintSetContains + m["RTarget"] = constraint + } + + if value, ok := m[api.ConstraintDistinctHosts]; ok { + enabled, err := parseBool(value) + if err != nil { + return fmt.Errorf("distinct_hosts should be set to true or false; %v", err) + } + + // If it is not enabled, skip the constraint. + if !enabled { + continue + } + + m["Operand"] = api.ConstraintDistinctHosts + m["RTarget"] = strconv.FormatBool(enabled) + } + + if property, ok := m[api.ConstraintDistinctProperty]; ok { + m["Operand"] = api.ConstraintDistinctProperty + m["LTarget"] = property + } + + // Build the constraint + var c api.Constraint + if err := mapstructure.WeakDecode(m, &c); err != nil { + return err + } + if c.Operand == "" { + c.Operand = "=" + } + + *result = append(*result, &c) + } + + return nil +} + +// parseBool takes an interface value and tries to convert it to a boolean and +// returns an error if the type can't be converted. +func parseBool(value any) (bool, error) { + var enabled bool + var err error + switch data := value.(type) { + case string: + enabled, err = strconv.ParseBool(data) + case bool: + enabled = data + default: + err = fmt.Errorf("%v couldn't be converted to boolean value", value) + } + + return enabled, err +} diff --git a/command/volume_create_host_test.go b/command/volume_create_host_test.go new file mode 100644 index 00000000000..4ef92dc02a8 --- /dev/null +++ b/command/volume_create_host_test.go @@ -0,0 +1,225 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "os" + "strings" + "testing" + + "github.com/hashicorp/hcl" + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/command/agent" + "github.com/mitchellh/cli" + "github.com/shoenig/test/must" +) + +func TestHostVolumeCreateCommand_Run(t *testing.T) { + ci.Parallel(t) + srv, client, url := testServer(t, true, func(c *agent.Config) { + c.Client.Meta = map[string]string{"rack": "foo"} + }) + t.Cleanup(srv.Shutdown) + + waitForNodes(t, client) + + _, err := client.Namespaces().Register(&api.Namespace{Name: "prod"}, nil) + must.NoError(t, err) + + ui := cli.NewMockUi() + cmd := &VolumeCreateCommand{Meta: Meta{Ui: ui}} + + hclTestFile := ` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "mkdir" +node_pool = "default" + +capacity_min = "10GiB" +capacity_max = "20G" + +constraint { + attribute = "${meta.rack}" + value = "foo" +} + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-reader-only" + attachment_mode = "block-device" +} + +parameters { + foo = "bar" +} +` + + file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") + must.NoError(t, err) + _, err = file.WriteString(hclTestFile) + must.NoError(t, err) + + args := []string{"-address", url, "-detach", file.Name()} + + code := cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + + out := ui.OutputWriter.String() + must.StrContains(t, out, "Created host volume") + parts := strings.Split(out, " ") + id := strings.TrimSpace(parts[len(parts)-1]) + + // Verify volume was created + got, _, err := client.HostVolumes().Get(id, &api.QueryOptions{Namespace: "prod"}) + must.NoError(t, err) + must.NotNil(t, got) +} + +func TestHostVolume_HCLDecode(t *testing.T) { + ci.Parallel(t) + + cases := []struct { + name string + hcl string + expected *api.HostVolume + errMsg string + }{ + { + name: "full spec", + hcl: ` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "mkdir" +node_pool = "default" + +capacity_min = "10GiB" +capacity_max = "20G" + +constraint { + attribute = "${attr.kernel.name}" + value = "linux" +} + +constraint { + attribute = "${meta.rack}" + value = "foo" +} + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-reader-only" + attachment_mode = "block-device" +} + +parameters { + foo = "bar" +} +`, + expected: &api.HostVolume{ + Namespace: "prod", + Name: "database", + PluginID: "mkdir", + NodePool: "default", + Constraints: []*api.Constraint{{ + LTarget: "${attr.kernel.name}", + RTarget: "linux", + Operand: "=", + }, { + LTarget: "${meta.rack}", + RTarget: "foo", + Operand: "=", + }}, + RequestedCapacityMinBytes: 10737418240, + RequestedCapacityMaxBytes: 20000000000, + RequestedCapabilities: []*api.HostVolumeCapability{ + { + AttachmentMode: api.HostVolumeAttachmentModeFilesystem, + AccessMode: api.HostVolumeAccessModeSingleNodeWriter, + }, + { + AttachmentMode: api.HostVolumeAttachmentModeBlockDevice, + AccessMode: api.HostVolumeAccessModeSingleNodeReader, + }, + }, + Parameters: map[string]string{"foo": "bar"}, + }, + }, + + { + name: "mostly empty spec", + hcl: ` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "mkdir" +node_pool = "default" +`, + expected: &api.HostVolume{ + Namespace: "prod", + Name: "database", + PluginID: "mkdir", + NodePool: "default", + }, + }, + + { + name: "invalid capacity", + hcl: ` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "mkdir" +node_pool = "default" + +capacity_min = "a" +`, + expected: nil, + errMsg: "invalid capacity_min: could not parse value as bytes: strconv.ParseFloat: parsing \"\": invalid syntax", + }, + + { + name: "invalid constraint", + hcl: ` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "mkdir" +node_pool = "default" + +constraint { + distinct_hosts = "foo" +} + +`, + expected: nil, + errMsg: "invalid constraint: distinct_hosts should be set to true or false; strconv.ParseBool: parsing \"foo\": invalid syntax", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ast, err := hcl.ParseString(tc.hcl) + must.NoError(t, err) + vol, err := decodeHostVolume(ast) + if tc.errMsg == "" { + must.NoError(t, err) + } else { + must.EqError(t, err, tc.errMsg) + } + must.Eq(t, tc.expected, vol) + }) + } + +} diff --git a/command/volume_delete.go b/command/volume_delete.go index 7dc3df1e128..23a82dbe01a 100644 --- a/command/volume_delete.go +++ b/command/volume_delete.go @@ -40,14 +40,20 @@ Delete Options: -secret Secrets to pass to the plugin to delete the snapshot. Accepts multiple - flags in the form -secret key=value + flags in the form -secret key=value. Only available for CSI volumes. + + -type + Type of volume to delete. Must be one of "csi" or "host". Defaults to "csi". ` return strings.TrimSpace(helpText) } func (c *VolumeDeleteCommand) AutocompleteFlags() complete.Flags { return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), - complete.Flags{}) + complete.Flags{ + "-type": complete.PredictSet("csi", "host"), + "-secret": complete.PredictNothing, + }) } func (c *VolumeDeleteCommand) AutocompleteArgs() complete.Predictor { @@ -63,11 +69,11 @@ func (c *VolumeDeleteCommand) AutocompleteArgs() complete.Predictor { } matches := resp.Matches[contexts.Volumes] - resp, _, err = client.Search().PrefixSearch(a.Last, contexts.Nodes, nil) + resp, _, err = client.Search().PrefixSearch(a.Last, contexts.HostVolumes, nil) if err != nil { return []string{} } - matches = append(matches, resp.Matches[contexts.Nodes]...) + matches = append(matches, resp.Matches[contexts.HostVolumes]...) return matches }) } @@ -80,9 +86,11 @@ func (c *VolumeDeleteCommand) Name() string { return "volume delete" } func (c *VolumeDeleteCommand) Run(args []string) int { var secretsArgs flaghelper.StringFlag + var typeArg string flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.Var(&secretsArgs, "secret", "secrets for snapshot, ex. -secret key=value") + flags.StringVar(&typeArg, "type", "csi", "type of volume (csi or host)") if err := flags.Parse(args); err != nil { c.Ui.Error(fmt.Sprintf("Error parsing arguments %s", err)) @@ -105,6 +113,19 @@ func (c *VolumeDeleteCommand) Run(args []string) int { return 1 } + switch typeArg { + case "csi": + return c.deleteCSIVolume(client, volID, secretsArgs) + case "host": + return c.deleteHostVolume(client, volID) + default: + c.Ui.Error(fmt.Sprintf("No such volume type %q", typeArg)) + return 1 + } +} + +func (c *VolumeDeleteCommand) deleteCSIVolume(client *api.Client, volID string, secretsArgs flaghelper.StringFlag) int { + secrets := api.CSISecrets{} for _, kv := range secretsArgs { if key, value, found := strings.Cut(kv, "="); found { @@ -115,7 +136,7 @@ func (c *VolumeDeleteCommand) Run(args []string) int { } } - err = client.CSIVolumes().DeleteOpts(&api.CSIVolumeDeleteRequest{ + err := client.CSIVolumes().DeleteOpts(&api.CSIVolumeDeleteRequest{ ExternalVolumeID: volID, Secrets: secrets, }, nil) @@ -127,3 +148,14 @@ func (c *VolumeDeleteCommand) Run(args []string) int { c.Ui.Output(fmt.Sprintf("Successfully deleted volume %q!", volID)) return 0 } + +func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string) int { + _, err := client.HostVolumes().Delete(&api.HostVolumeDeleteRequest{ID: volID}, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error deleting volume: %s", err)) + return 1 + } + + c.Ui.Output(fmt.Sprintf("Successfully deleted volume %q!", volID)) + return 0 +} diff --git a/command/volume_delete_host_test.go b/command/volume_delete_host_test.go new file mode 100644 index 00000000000..353063c34cc --- /dev/null +++ b/command/volume_delete_host_test.go @@ -0,0 +1,91 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + "os" + "strings" + "testing" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/ci" + "github.com/mitchellh/cli" + "github.com/posener/complete" + "github.com/shoenig/test/must" +) + +func TestHostVolumeDeleteCommand(t *testing.T) { + ci.Parallel(t) + srv, client, url := testServer(t, true, nil) + t.Cleanup(srv.Shutdown) + + waitForNodes(t, client) + + _, err := client.Namespaces().Register(&api.Namespace{Name: "prod"}, nil) + must.NoError(t, err) + + nodes, _, err := client.Nodes().List(nil) + must.NoError(t, err) + must.Len(t, 1, nodes) + nodeID := nodes[0].ID + + ui := cli.NewMockUi() + + hclTestFile := fmt.Sprintf(` +namespace = "prod" +name = "example" +type = "host" +plugin_id = "mkdir" +node_id = "%s" +node_pool = "default" +capability { + access_mode = "single-node-reader-only" + attachment_mode = "file-system" +} +`, nodeID) + + file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") + must.NoError(t, err) + _, err = file.WriteString(hclTestFile) + must.NoError(t, err) + + args := []string{"-address", url, file.Name()} + regCmd := &VolumeRegisterCommand{Meta: Meta{Ui: ui}} + code := regCmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + + out := ui.OutputWriter.String() + must.StrContains(t, out, "Registered host volume") + parts := strings.Split(out, " ") + id := strings.TrimSpace(parts[len(parts)-1]) + + ui.OutputWriter.Reset() + + // autocomplete + cmd := &VolumeDeleteCommand{Meta: Meta{Ui: ui, namespace: "*", flagAddress: url}} + prefix := id[:len(id)-5] + cargs := complete.Args{Last: prefix} + predictor := cmd.AutocompleteArgs() + + res := predictor.Predict(cargs) + must.SliceLen(t, 1, res) + must.Eq(t, id, res[0]) + + // missing the namespace + cmd = &VolumeDeleteCommand{Meta: Meta{Ui: ui}} + args = []string{"-address", url, "-type", "host", id} + code = cmd.Run(args) + must.Eq(t, 1, code) + must.StrContains(t, ui.ErrorWriter.String(), "no such volume") + ui.ErrorWriter.Reset() + + // fix the namespace + args = []string{"-address", url, "-type", "host", "-namespace", "prod", id} + code = cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + out = ui.OutputWriter.String() + must.StrContains(t, out, fmt.Sprintf("Successfully deleted volume %q!", id)) + +} diff --git a/command/volume_deregister.go b/command/volume_deregister.go index c4c78cfcc2d..60851e66954 100644 --- a/command/volume_deregister.go +++ b/command/volume_deregister.go @@ -53,7 +53,6 @@ func (c *VolumeDeregisterCommand) AutocompleteArgs() complete.Predictor { return nil } - // When multiple volume types are implemented, this search should merge contexts resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Volumes, nil) if err != nil { return []string{} diff --git a/command/volume_init.go b/command/volume_init.go index bd37df8cfa1..93cd9e17321 100644 --- a/command/volume_init.go +++ b/command/volume_init.go @@ -8,17 +8,18 @@ import ( "os" "strings" + "github.com/hashicorp/nomad/command/asset" "github.com/posener/complete" ) const ( - // DefaultHclVolumeInitName is the default name we use when initializing + // defaultHclVolumeInitName is the default name we use when initializing // the example volume file in HCL format - DefaultHclVolumeInitName = "volume.hcl" + defaultHclVolumeInitName = "volume.hcl" // DefaultHclVolumeInitName is the default name we use when initializing // the example volume file in JSON format - DefaultJsonVolumeInitName = "volume.json" + defaultJsonVolumeInitName = "volume.json" ) // VolumeInitCommand generates a new volume spec that you can customize to @@ -39,6 +40,11 @@ Init Options: -json Create an example JSON volume specification. + + -type + Create an example for a specific type of volume (one of "csi" or "host", + defaults to "csi"). + ` return strings.TrimSpace(helpText) } @@ -50,6 +56,7 @@ func (c *VolumeInitCommand) Synopsis() string { func (c *VolumeInitCommand) AutocompleteFlags() complete.Flags { return complete.Flags{ "-json": complete.PredictNothing, + "-type": complete.PredictSet("host", "csi"), } } @@ -61,9 +68,11 @@ func (c *VolumeInitCommand) Name() string { return "volume init" } func (c *VolumeInitCommand) Run(args []string) int { var jsonOutput bool + var volType string flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.BoolVar(&jsonOutput, "json", false, "") + flags.StringVar(&volType, "type", "csi", "type of volume") if err := flags.Parse(args); err != nil { return 1 @@ -77,11 +86,17 @@ func (c *VolumeInitCommand) Run(args []string) int { return 1 } - fileName := DefaultHclVolumeInitName - fileContent := defaultHclVolumeSpec - if jsonOutput { - fileName = DefaultJsonVolumeInitName - fileContent = defaultJsonVolumeSpec + fileName := defaultHclVolumeInitName + fileContent := asset.CSIVolumeSpecHCL + + if volType == "host" && !jsonOutput { + fileContent = asset.HostVolumeSpecHCL + } else if volType == "host" && jsonOutput { + fileName = defaultJsonVolumeInitName + fileContent = asset.HostVolumeSpecJSON + } else if jsonOutput { + fileName = defaultJsonVolumeInitName + fileContent = asset.CSIVolumeSpecJSON } if len(args) == 1 { fileName = args[0] @@ -99,7 +114,7 @@ func (c *VolumeInitCommand) Run(args []string) int { } // Write out the example - err = os.WriteFile(fileName, []byte(fileContent), 0660) + err = os.WriteFile(fileName, fileContent, 0660) if err != nil { c.Ui.Error(fmt.Sprintf("Failed to write %q: %v", fileName, err)) return 1 @@ -109,151 +124,3 @@ func (c *VolumeInitCommand) Run(args []string) int { c.Ui.Output(fmt.Sprintf("Example volume specification written to %s", fileName)) return 0 } - -var defaultHclVolumeSpec = strings.TrimSpace(` -id = "ebs_prod_db1" -namespace = "default" -name = "database" -type = "csi" -plugin_id = "plugin_id" - -# For 'nomad volume register', provide the external ID from the storage -# provider. This field should be omitted when creating a volume with -# 'nomad volume create' -external_id = "vol-23452345" - -# For 'nomad volume create', specify a snapshot ID or volume to clone. You can -# specify only one of these two fields. -snapshot_id = "snap-12345" -# clone_id = "vol-abcdef" - -# Optional: for 'nomad volume create', specify a maximum and minimum capacity. -# Registering an existing volume will record but ignore these fields. -capacity_min = "10GiB" -capacity_max = "20G" - -# Required (at least one): for 'nomad volume create', specify one or more -# capabilities to validate. Registering an existing volume will record but -# ignore these fields. -capability { - access_mode = "single-node-writer" - attachment_mode = "file-system" -} - -capability { - access_mode = "single-node-reader" - attachment_mode = "block-device" -} - -# Optional: for 'nomad volume create', specify mount options to validate for -# 'attachment_mode = "file-system". Registering an existing volume will record -# but ignore these fields. -mount_options { - fs_type = "ext4" - mount_flags = ["ro"] -} - -# Optional: specify one or more locations where the volume must be accessible -# from. Refer to the plugin documentation for what segment values are supported. -topology_request { - preferred { - topology { segments { rack = "R1" } } - } - required { - topology { segments { rack = "R1" } } - topology { segments { rack = "R2", zone = "us-east-1a" } } - } -} - -# Optional: provide any secrets specified by the plugin. -secrets { - example_secret = "xyzzy" -} - -# Optional: provide a map of keys to string values expected by the plugin. -parameters { - skuname = "Premium_LRS" -} - -# Optional: for 'nomad volume register', provide a map of keys to string -# values expected by the plugin. This field will populated automatically by -# 'nomad volume create'. -context { - endpoint = "http://192.168.1.101:9425" -} -`) - -var defaultJsonVolumeSpec = strings.TrimSpace(` -{ - "id": "ebs_prod_db1", - "namespace": "default", - "name": "database", - "type": "csi", - "plugin_id": "plugin_id", - "external_id": "vol-23452345", - "snapshot_id": "snap-12345", - "capacity_min": "10GiB", - "capacity_max": "20G", - "capability": [ - { - "access_mode": "single-node-writer", - "attachment_mode": "file-system" - }, - { - "access_mode": "single-node-reader", - "attachment_mode": "block-device" - } - ], - "context": [ - { - "endpoint": "http://192.168.1.101:9425" - } - ], - "mount_options": [ - { - "fs_type": "ext4", - "mount_flags": [ - "ro" - ] - } - ], - "topology_request": { - "preferred": [ - { - "topology": { - "segments": { - "rack": "R1" - } - } - } - ], - "required": [ - { - "topology": { - "segments": { - "rack": "R1" - } - } - }, - { - "topology": { - "segments": { - "rack": "R2", - "zone": "us-east-1a" - } - } - } - ] - }, - "parameters": [ - { - "skuname": "Premium_LRS" - } - ], - "secrets": [ - { - "example_secret": "xyzzy" - } - ] -} -`) diff --git a/command/volume_register.go b/command/volume_register.go index 3a8815347ff..d47c93b2232 100644 --- a/command/volume_register.go +++ b/command/volume_register.go @@ -28,18 +28,29 @@ Usage: nomad volume register [options] If the supplied path is "-" the volume file is read from stdin. Otherwise, it is read from the file at the supplied path. - When ACLs are enabled, this command requires a token with the - 'csi-write-volume' capability for the volume's namespace. + When ACLs are enabled, this command requires a token with the appropriate + capability in the volume's namespace: the 'csi-write-volume' capability for + CSI volumes or 'host-volume-register' for dynamic host volumes. General Options: - ` + generalOptionsUsage(usageOptsDefault) + ` + generalOptionsUsage(usageOptsDefault) + ` + +Register Options: + + -policy-override + Sets the flag to force override any soft mandatory Sentinel policies. Used + for dynamic host volumes only. +` return strings.TrimSpace(helpText) } func (c *VolumeRegisterCommand) AutocompleteFlags() complete.Flags { - return c.Meta.AutocompleteFlags(FlagSetClient) + return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), + complete.Flags{ + "-policy-override": complete.PredictNothing, + }) } func (c *VolumeRegisterCommand) AutocompleteArgs() complete.Predictor { @@ -53,7 +64,9 @@ func (c *VolumeRegisterCommand) Synopsis() string { func (c *VolumeRegisterCommand) Name() string { return "volume register" } func (c *VolumeRegisterCommand) Run(args []string) int { + var override bool flags := c.Meta.FlagSet(c.Name(), FlagSetClient) + flags.BoolVar(&override, "policy-override", false, "override soft mandatory Sentinel policies") flags.Usage = func() { c.Ui.Output(c.Help()) } if err := flags.Parse(args); err != nil { @@ -103,16 +116,13 @@ func (c *VolumeRegisterCommand) Run(args []string) int { switch volType { case "csi": - code := c.csiRegister(client, ast) - if code != 0 { - return code - } + return c.csiRegister(client, ast) + case "host": + return c.hostVolumeRegister(client, ast, override) default: c.Ui.Error(fmt.Sprintf("Error unknown volume type: %s", volType)) return 1 } - - return 0 } // parseVolume is used to parse the quota specification from HCL diff --git a/command/volume_register_test.go b/command/volume_register_csi_test.go similarity index 100% rename from command/volume_register_test.go rename to command/volume_register_csi_test.go diff --git a/command/volume_register_host.go b/command/volume_register_host.go new file mode 100644 index 00000000000..b6cb213caac --- /dev/null +++ b/command/volume_register_host.go @@ -0,0 +1,41 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/nomad/api" +) + +func (c *VolumeRegisterCommand) hostVolumeRegister(client *api.Client, ast *ast.File, override bool) int { + vol, err := decodeHostVolume(ast) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error decoding the volume definition: %s", err)) + return 1 + } + + req := &api.HostVolumeRegisterRequest{ + Volume: vol, + PolicyOverride: override, + } + resp, _, err := client.HostVolumes().Register(req, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error registering volume: %s", err)) + return 1 + } + vol = resp.Volume + + if resp.Warnings != "" { + c.Ui.Output( + c.Colorize().Color( + fmt.Sprintf("[bold][yellow]Volume Warnings:\n%s[reset]\n", resp.Warnings))) + } + + c.Ui.Output(fmt.Sprintf( + "Registered host volume %s with ID %s", vol.Name, vol.ID)) + + return 0 +} diff --git a/command/volume_register_host_test.go b/command/volume_register_host_test.go new file mode 100644 index 00000000000..0ce33770197 --- /dev/null +++ b/command/volume_register_host_test.go @@ -0,0 +1,93 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + "os" + "strings" + "testing" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/ci" + "github.com/mitchellh/cli" + "github.com/shoenig/test/must" +) + +func TestHostVolumeRegisterCommand_Run(t *testing.T) { + ci.Parallel(t) + srv, client, url := testServer(t, true, nil) + t.Cleanup(srv.Shutdown) + + waitForNodes(t, client) + + _, err := client.Namespaces().Register(&api.Namespace{Name: "prod"}, nil) + must.NoError(t, err) + + nodes, _, err := client.Nodes().List(nil) + must.NoError(t, err) + must.Len(t, 1, nodes) + nodeID := nodes[0].ID + + ui := cli.NewMockUi() + cmd := &VolumeRegisterCommand{Meta: Meta{Ui: ui}} + + hclTestFile := fmt.Sprintf(` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "plugin_id" +node_id = "%s" +node_pool = "default" + +capacity = 150000000 +host_path = "/var/nomad/alloc_mounts/example" +capacity_min = "10GiB" +capacity_max = "20G" + +constraint { + attribute = "${attr.kernel.name}" + value = "linux" +} + +constraint { + attribute = "${meta.rack}" + value = "foo" +} + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-reader-only" + attachment_mode = "block-device" +} + +parameters { + foo = "bar" +} +`, nodeID) + + file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") + must.NoError(t, err) + _, err = file.WriteString(hclTestFile) + must.NoError(t, err) + + args := []string{"-address", url, file.Name()} + + code := cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + + out := ui.OutputWriter.String() + must.StrContains(t, out, "Registered host volume") + parts := strings.Split(out, " ") + id := strings.TrimSpace(parts[len(parts)-1]) + + // Verify volume was registered + got, _, err := client.HostVolumes().Get(id, &api.QueryOptions{Namespace: "prod"}) + must.NoError(t, err) + must.NotNil(t, got) +} diff --git a/command/volume_status.go b/command/volume_status.go index 22fc6afc225..d599e349e83 100644 --- a/command/volume_status.go +++ b/command/volume_status.go @@ -52,6 +52,12 @@ Status Options: -t Format and display volumes using a Go template. + + -node-pool + Filter results by node pool, when no volume ID is provided and -type=host. + + -node + Filter results by node ID, when no volume ID is provided and -type=host. ` return strings.TrimSpace(helpText) } @@ -63,11 +69,13 @@ func (c *VolumeStatusCommand) Synopsis() string { func (c *VolumeStatusCommand) AutocompleteFlags() complete.Flags { return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), complete.Flags{ - "-type": predictVolumeType, - "-short": complete.PredictNothing, - "-verbose": complete.PredictNothing, - "-json": complete.PredictNothing, - "-t": complete.PredictAnything, + "-type": complete.PredictSet("csi", "host"), + "-short": complete.PredictNothing, + "-verbose": complete.PredictNothing, + "-json": complete.PredictNothing, + "-t": complete.PredictAnything, + "-node": nodePredictor(c.Client, nil), + "-node-pool": nodePoolPredictor(c.Client, nil), }) } @@ -82,14 +90,21 @@ func (c *VolumeStatusCommand) AutocompleteArgs() complete.Predictor { if err != nil { return []string{} } - return resp.Matches[contexts.Volumes] + matches := resp.Matches[contexts.Volumes] + + resp, _, err = client.Search().PrefixSearch(a.Last, contexts.HostVolumes, nil) + if err != nil { + return []string{} + } + matches = append(matches, resp.Matches[contexts.HostVolumes]...) + return matches }) } func (c *VolumeStatusCommand) Name() string { return "volume status" } func (c *VolumeStatusCommand) Run(args []string) int { - var typeArg string + var typeArg, nodeID, nodePool string flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } @@ -98,6 +113,8 @@ func (c *VolumeStatusCommand) Run(args []string) int { flags.BoolVar(&c.verbose, "verbose", false, "") flags.BoolVar(&c.json, "json", false, "") flags.StringVar(&c.template, "t", "", "") + flags.StringVar(&nodeID, "node", "", "") + flags.StringVar(&nodePool, "node-pool", "", "") if err := flags.Parse(args); err != nil { c.Ui.Error(fmt.Sprintf("Error parsing arguments %s", err)) @@ -130,12 +147,17 @@ func (c *VolumeStatusCommand) Run(args []string) int { id = args[0] } - code := c.csiStatus(client, id) - if code != 0 { - return code + switch typeArg { + case "csi", "": + if nodeID != "" || nodePool != "" { + c.Ui.Error("-node and -node-pool can only be used with -type host") + return 1 + } + return c.csiStatus(client, id) + case "host": + return c.hostVolumeStatus(client, id, nodeID, nodePool) + default: + c.Ui.Error(fmt.Sprintf("No such volume type %q", typeArg)) + return 1 } - - // Extend this section with other volume implementations - - return 0 } diff --git a/command/volume_status_csi.go b/command/volume_status_csi.go index 31fdeeb2331..01644b513d8 100644 --- a/command/volume_status_csi.go +++ b/command/volume_status_csi.go @@ -23,7 +23,7 @@ func (c *VolumeStatusCommand) csiBanner() { func (c *VolumeStatusCommand) csiStatus(client *api.Client, id string) int { // Invoke list mode if no volume id if id == "" { - return c.listVolumes(client) + return c.listCSIVolumes(client) } // get a CSI volume that matches the given prefix or a list of all matches if an @@ -55,7 +55,7 @@ func (c *VolumeStatusCommand) csiStatus(client *api.Client, id string) int { return 1 } - str, err := c.formatBasic(vol) + str, err := c.formatCSIBasic(vol) if err != nil { c.Ui.Error(fmt.Sprintf("Error formatting volume: %s", err)) return 1 @@ -65,7 +65,7 @@ func (c *VolumeStatusCommand) csiStatus(client *api.Client, id string) int { return 0 } -func (c *VolumeStatusCommand) listVolumes(client *api.Client) int { +func (c *VolumeStatusCommand) listCSIVolumes(client *api.Client) int { c.csiBanner() vols, _, err := client.CSIVolumes().List(nil) @@ -182,7 +182,7 @@ func csiFormatSortedVolumes(vols []*api.CSIVolumeListStub) (string, error) { return formatList(rows), nil } -func (c *VolumeStatusCommand) formatBasic(vol *api.CSIVolume) (string, error) { +func (c *VolumeStatusCommand) formatCSIBasic(vol *api.CSIVolume) (string, error) { if c.json || len(c.template) > 0 { out, err := Format(c.json, c.template, vol) if err != nil { diff --git a/command/volume_status_test.go b/command/volume_status_csi_test.go similarity index 100% rename from command/volume_status_test.go rename to command/volume_status_csi_test.go diff --git a/command/volume_status_host.go b/command/volume_status_host.go new file mode 100644 index 00000000000..ebe035ddb87 --- /dev/null +++ b/command/volume_status_host.go @@ -0,0 +1,198 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + "sort" + "strings" + + humanize "github.com/dustin/go-humanize" + "github.com/hashicorp/nomad/api" +) + +func (c *VolumeStatusCommand) hostVolumeStatus(client *api.Client, id, nodeID, nodePool string) int { + if id == "" { + return c.listHostVolumes(client, nodeID, nodePool) + } + + if nodeID != "" || nodePool != "" { + c.Ui.Error("-node or -node-pool options can only be used when no ID is provided") + return 1 + } + + opts := formatOpts{ + verbose: c.verbose, + short: c.short, + length: c.length, + json: c.json, + template: c.template, + } + + // get a host volume that matches the given prefix or a list of all matches + // if an exact match is not found. note we can't use the shared getByPrefix + // helper here because the List API doesn't match the required signature + + volStub, possible, err := c.getByPrefix(client, id) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error listing volumes: %s", err)) + return 1 + } + if len(possible) > 0 { + out, err := formatHostVolumes(possible, opts) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error formatting: %s", err)) + return 1 + } + c.Ui.Error(fmt.Sprintf("Prefix matched multiple volumes\n\n%s", out)) + return 1 + } + + vol, _, err := client.HostVolumes().Get(volStub.ID, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error querying volume: %s", err)) + return 1 + } + + str, err := formatHostVolume(vol, opts) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error formatting volume: %s", err)) + return 1 + } + c.Ui.Output(c.Colorize().Color(str)) + return 0 +} + +func (c *VolumeStatusCommand) listHostVolumes(client *api.Client, nodeID, nodePool string) int { + vols, _, err := client.HostVolumes().List(&api.HostVolumeListRequest{ + NodeID: nodeID, + NodePool: nodePool, + }, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error querying volumes: %s", err)) + return 1 + } + + opts := formatOpts{ + verbose: c.verbose, + short: c.short, + length: c.length, + json: c.json, + template: c.template, + } + + str, err := formatHostVolumes(vols, opts) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error formatting volumes: %s", err)) + return 1 + } + c.Ui.Output(c.Colorize().Color(str)) + return 0 +} + +func (c *VolumeStatusCommand) getByPrefix(client *api.Client, prefix string) (*api.HostVolumeStub, []*api.HostVolumeStub, error) { + vols, _, err := client.HostVolumes().List(nil, &api.QueryOptions{ + Prefix: prefix, + Namespace: c.namespace, + }) + + if err != nil { + return nil, nil, fmt.Errorf("error querying volumes: %s", err) + } + switch len(vols) { + case 0: + return nil, nil, fmt.Errorf("no volumes with prefix or ID %q found", prefix) + case 1: + return vols[0], nil, nil + default: + // search for exact matches to account for multiple exact ID or name + // matches across namespaces + var match *api.HostVolumeStub + exactMatchesCount := 0 + for _, vol := range vols { + if vol.ID == prefix || vol.Name == prefix { + exactMatchesCount++ + match = vol + } + } + if exactMatchesCount == 1 { + return match, nil, nil + } + return nil, vols, nil + } +} + +func formatHostVolume(vol *api.HostVolume, opts formatOpts) (string, error) { + if opts.json || len(opts.template) > 0 { + out, err := Format(opts.json, opts.template, vol) + if err != nil { + return "", fmt.Errorf("format error: %v", err) + } + return out, nil + } + + output := []string{ + fmt.Sprintf("ID|%s", vol.ID), + fmt.Sprintf("Name|%s", vol.Name), + fmt.Sprintf("Namespace|%s", vol.Namespace), + fmt.Sprintf("Plugin ID|%s", vol.PluginID), + fmt.Sprintf("Node ID|%s", vol.NodeID), + fmt.Sprintf("Node Pool|%s", vol.NodePool), + fmt.Sprintf("Capacity|%s", humanize.IBytes(uint64(vol.CapacityBytes))), + fmt.Sprintf("State|%s", vol.State), + fmt.Sprintf("Host Path|%s", vol.HostPath), + } + + // Exit early + if opts.short { + return formatKV(output), nil + } + + full := []string{formatKV(output)} + + // Format the allocs + banner := "\n[bold]Allocations[reset]" + allocs := formatAllocListStubs(vol.Allocations, opts.verbose, opts.length) + full = append(full, banner) + full = append(full, allocs) + + return strings.Join(full, "\n"), nil +} + +// TODO: we could make a bunch more formatters into shared functions using this +type formatOpts struct { + verbose bool + short bool + length int + json bool + template string +} + +func formatHostVolumes(vols []*api.HostVolumeStub, opts formatOpts) (string, error) { + // Sort the output by volume ID + sort.Slice(vols, func(i, j int) bool { return vols[i].ID < vols[j].ID }) + + if opts.json || len(opts.template) > 0 { + out, err := Format(opts.json, opts.template, vols) + if err != nil { + return "", fmt.Errorf("format error: %v", err) + } + return out, nil + } + + rows := make([]string, len(vols)+1) + rows[0] = "ID|Name|Namespace|Plugin ID|Node ID|Node Pool|State" + for i, v := range vols { + rows[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s|%s", + limit(v.ID, opts.length), + v.Name, + v.Namespace, + v.PluginID, + limit(v.NodeID, opts.length), + v.NodePool, + v.State, + ) + } + return formatList(rows), nil +} diff --git a/command/volume_status_host_test.go b/command/volume_status_host_test.go new file mode 100644 index 00000000000..c51e931096b --- /dev/null +++ b/command/volume_status_host_test.go @@ -0,0 +1,170 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + "os" + "strings" + "testing" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/ci" + "github.com/mitchellh/cli" + "github.com/posener/complete" + "github.com/shoenig/test/must" +) + +func TestHostVolumeStatusCommand_Args(t *testing.T) { + ci.Parallel(t) + ui := cli.NewMockUi() + cmd := &VolumeStatusCommand{Meta: Meta{Ui: ui}} + + code := cmd.Run([]string{ + "-type", "host", + "-node", "6063016a-9d4c-11ef-85fc-9be98efe7e76", + "-node-pool", "prod", + "6e3e80f2-9d4c-11ef-97b1-d38cf64416a4", + }) + must.One(t, code) + + out := ui.ErrorWriter.String() + must.StrContains(t, out, "-node or -node-pool options can only be used when no ID is provided") +} + +func TestHostVolumeStatusCommand_List(t *testing.T) { + ci.Parallel(t) + srv, client, url := testServer(t, true, nil) + t.Cleanup(srv.Shutdown) + + waitForNodes(t, client) + + _, err := client.Namespaces().Register(&api.Namespace{Name: "prod"}, nil) + must.NoError(t, err) + + nodes, _, err := client.Nodes().List(nil) + must.NoError(t, err) + must.Len(t, 1, nodes) + nodeID := nodes[0].ID + + ui := cli.NewMockUi() + + vols := []api.NamespacedID{ + {Namespace: "prod", ID: "database"}, + {Namespace: "prod", ID: "certs"}, + {Namespace: "default", ID: "example"}, + } + + for _, vol := range vols { + hclTestFile := fmt.Sprintf(` +namespace = "%s" +name = "%s" +type = "host" +plugin_id = "mkdir" +node_id = "%s" +node_pool = "default" +capability { + access_mode = "single-node-reader-only" + attachment_mode = "file-system" +} +`, vol.Namespace, vol.ID, nodeID) + + file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") + must.NoError(t, err) + _, err = file.WriteString(hclTestFile) + must.NoError(t, err) + + args := []string{"-address", url, "-detach", file.Name()} + cmd := &VolumeCreateCommand{Meta: Meta{Ui: ui}} + code := cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + + out := ui.OutputWriter.String() + must.StrContains(t, out, "Created host volume") + ui.OutputWriter.Reset() + } + + cmd := &VolumeStatusCommand{Meta: Meta{Ui: ui}} + args := []string{"-address", url, "-type", "host", "-namespace", "prod"} + code := cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + out := ui.OutputWriter.String() + must.StrContains(t, out, "certs") + must.StrContains(t, out, "database") + must.StrNotContains(t, out, "example") +} + +func TestHostVolumeStatusCommand_Get(t *testing.T) { + ci.Parallel(t) + srv, client, url := testServer(t, true, nil) + t.Cleanup(srv.Shutdown) + + waitForNodes(t, client) + + _, err := client.Namespaces().Register(&api.Namespace{Name: "prod"}, nil) + must.NoError(t, err) + + nodes, _, err := client.Nodes().List(nil) + must.NoError(t, err) + must.Len(t, 1, nodes) + nodeID := nodes[0].ID + + ui := cli.NewMockUi() + + hclTestFile := fmt.Sprintf(` +namespace = "prod" +name = "example" +type = "host" +plugin_id = "mkdir" +node_id = "%s" +node_pool = "default" +capability { + access_mode = "single-node-reader-only" + attachment_mode = "file-system" +} +`, nodeID) + + file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") + must.NoError(t, err) + _, err = file.WriteString(hclTestFile) + must.NoError(t, err) + + args := []string{"-address", url, file.Name()} + regCmd := &VolumeRegisterCommand{Meta: Meta{Ui: ui}} + code := regCmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + + out := ui.OutputWriter.String() + must.StrContains(t, out, "Registered host volume") + parts := strings.Split(out, " ") + id := strings.TrimSpace(parts[len(parts)-1]) + + ui.OutputWriter.Reset() + + // autocomplete + cmd := &VolumeStatusCommand{Meta: Meta{Ui: ui, namespace: "*", flagAddress: url}} + cmd.Meta.namespace = "*" + prefix := id[:len(id)-5] + cargs := complete.Args{Last: prefix} + predictor := cmd.AutocompleteArgs() + + res := predictor.Predict(cargs) + must.SliceLen(t, 1, res) + must.Eq(t, id, res[0]) + + // missing the namespace + cmd = &VolumeStatusCommand{Meta: Meta{Ui: ui}} + args = []string{"-address", url, "-type", "host", id} + code = cmd.Run(args) + must.Eq(t, 1, code) + must.StrContains(t, ui.ErrorWriter.String(), + "Error listing volumes: no volumes with prefix or ID") + ui.ErrorWriter.Reset() + + args = []string{"-address", url, "-type", "host", "-namespace", "prod", id} + code = cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + out = ui.OutputWriter.String() + must.StrContains(t, out, "example") +} diff --git a/demo/hostvolume/_test-plugin.sh b/demo/hostvolume/_test-plugin.sh new file mode 100755 index 00000000000..864680e237d --- /dev/null +++ b/demo/hostvolume/_test-plugin.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -euo pipefail + +help() { + cat < [target dir] [uuid] + +Args: + plugin: path to plugin executable + operation: fingerprint, create, or delete + create and delete must be idempotent. + any other operation will be passed into the plugin, + to see how it handles invalid operations. + target dir: directory to create the volume (defaults to /tmp) + uuid: volume id to use (usually assigned by Nomad; + defaults to 74564d17-ce50-0bc1-48e5-6feaa41ede48) + +Examples: + $0 ./example-plugin-mkfs fingerprint + $0 ./example-plugin-mkfs create + $0 ./example-plugin-mkfs create /some/other/place + $0 ./example-plugin-mkfs delete +EOF +} + +if [[ $# -eq 0 || "$*" =~ -h ]]; then + help + exit +fi +if [ $# -lt 2 ]; then + help + exit 1 +fi + +plugin="$1" +op="$2" +alloc_mounts="${3:-/tmp}" +uuid="${4:-74564d17-ce50-0bc1-48e5-6feaa41ede48}" + +case $op in + fingerprint) + args='fingerprint' + ;; + + create) + args="create $alloc_mounts/$uuid" + export HOST_PATH="$alloc_mounts/$uuid" + export VOLUME_NAME=test + export NODE_ID=0b62d807-6101-a80f-374d-e1c430abbf47 + export CAPACITY_MAX_BYTES=50000000 # 50mb + export CAPACITY_MIN_BYTES=50000000 # 50mb + export PARAMETERS='{"a": "ayy"}' + # db TODO(1.10.0): check stdout + ;; + + delete) + args="delete $alloc_mounts/$uuid" + export HOST_PATH="$alloc_mounts/$uuid" + export PARAMETERS='{"a": "ayy"}' + ;; + + *) + args="$*" + ;; +esac + +export OPERATION="$op" +set -x +eval "$plugin $args" diff --git a/demo/hostvolume/check.sh b/demo/hostvolume/check.sh new file mode 100755 index 00000000000..c89a36c5a54 --- /dev/null +++ b/demo/hostvolume/check.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -xeuo pipefail + +nomad volume status -type=host -verbose +nomad operator api /v1/nodes | jq '.[].HostVolumes' + +addr="$(nomad service info -json job | jq -r '.[0].Address'):8000" +curl -sS "$addr/external/" | grep hi +curl -sS "$addr/internal/" | grep hi + +echo '💚 looks good! 💚' diff --git a/demo/hostvolume/e2e.sh b/demo/hostvolume/e2e.sh new file mode 100755 index 00000000000..d27070cafac --- /dev/null +++ b/demo/hostvolume/e2e.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -xeuo pipefail + +./setup.sh +./check.sh +./teardown.sh diff --git a/demo/hostvolume/example-plugin-mkfs b/demo/hostvolume/example-plugin-mkfs new file mode 100755 index 00000000000..5bfaa4e47fa --- /dev/null +++ b/demo/hostvolume/example-plugin-mkfs @@ -0,0 +1,135 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +# db TODO(1.10.0): where does PATH come from here? somewhere implicit? /sbin/ and /bin/ and ...? + +set -euo pipefail + +version='0.0.1' +fingerprint() { + printf '{"version": "%s"}' "$version" +} + +help() { + cat < [path] + +Options: + -v|--verbose: Show shell commands (set -x) + -h|--help: Print this help text and exit + +Operations: + create: Creates and mounts the device at path (required) + required environment: + CAPACITY_MIN_BYTES + delete: Unmounts and deletes the device at path (required) + version: Outputs this plugin's version: $version + fingerprint: Outputs plugin metadata: $(fingerprint) + +EOF +} + +# parse args +[ $# -eq 0 ] && { help; exit 1; } +for arg in "$@"; do + case $arg in + -h|-help|--help) help; exit 0 ;; + fingerprint|fingerprint) fingerprint; exit 0 ;; + version|version) echo "$version"; exit 0 ;; + -v|--verbose) set -x; shift; ;; + esac +done + +# path is required for everything else +[ $# -lt 2 ] && { echo 'path required; seek --help' 1>&2; exit 1; } +host_path="$2" + +# OS detect +if [[ "$OSTYPE" == "linux-"* ]]; then + ext=ext4 + mount=/usr/bin/mount + mkfsExec() { + dd if=/dev/zero of="$1".$ext bs=1M count="$2" + mkfs.ext4 "$1".$ext 1>&2 + } + mountExec() { + $mount "$1".$ext "$1" + } + st() { + stat --format='%s' "$1" + } +elif [[ "$OSTYPE" == "darwin"* ]]; then + ext=dmg + mount=/sbin/mount + mkfsExec() { + hdiutil create -megabytes "$2" -layout NONE -fs apfs -volname "$1" "$1" 1>&2 + } + mountExec() { + hdiutil attach "$1".$ext 1>&2 + } + st() { + stat -f %z "$1" + } +else + echo "$OSTYPE is an unsupported OS" + exit 1 +fi + +validate_path() { + local path="$1" + if [[ ! "$path" =~ [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} ]]; then + 1>&2 echo "expected uuid-lookin ID in the HOST_PATH; got: '$path'" + return 1 + fi +} + +is_mounted() { + $mount | grep -q " $1 " +} + +create_volume() { + local path="$1" + validate_path "$path" + local bytes="$2" + + # translate to mb for dd block size + local megs=$((bytes / 1024 / 1024)) # lazy, approximate + + # the extra conditionals are for idempotency + if [ ! -f "$path.$ext" ]; then + mkfsExec "$path" $megs + fi + if ! is_mounted "$path"; then + mkdir -p "$path" + mountExec "$path" + fi +} + +delete_volume() { + local path="$1" + validate_path "$path" + is_mounted "$path" && umount "$path" + rm -rf "$path" + rm -f "$path"."$ext" +} + +case "$1" in + "create") + create_volume "$host_path" "$CAPACITY_MIN_BYTES" + # output what Nomad expects + bytes="$(st "$host_path".$ext)" + printf '{"path": "%s", "bytes": %s}' "$host_path" "$bytes" + ;; + "delete") + delete_volume "$host_path" ;; + *) + echo "unknown operation: $1" 1>&2 + exit 1 ;; +esac diff --git a/demo/hostvolume/external-plugin.volume.hcl b/demo/hostvolume/external-plugin.volume.hcl new file mode 100644 index 00000000000..6c9f17e8d50 --- /dev/null +++ b/demo/hostvolume/external-plugin.volume.hcl @@ -0,0 +1,22 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +name = "external-plugin" +type = "host" +# the executable named `example-plugin-mkfs` must be placed in the +# -host-volume-plugin-dir (config: client.host_volume_plugin_dir) +# or you will get an error creating the volume: +# * could not place volume "external-plugin": no node meets constraints +# The default location is /host_volume_plugins +plugin_id = "example-plugin-mkfs" +capacity_min = "50mb" +capacity_max = "50mb" + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +parameters { + a = "ayy" +} diff --git a/demo/hostvolume/internal-plugin.volume.hcl b/demo/hostvolume/internal-plugin.volume.hcl new file mode 100644 index 00000000000..bbea133e1a6 --- /dev/null +++ b/demo/hostvolume/internal-plugin.volume.hcl @@ -0,0 +1,14 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +name = "internal-plugin" +type = "host" +# this plugin is built into Nomad; +# it simply creates a directory. +plugin_id = "mkdir" + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + diff --git a/demo/hostvolume/job.nomad.hcl b/demo/hostvolume/job.nomad.hcl new file mode 100644 index 00000000000..1b0e0162665 --- /dev/null +++ b/demo/hostvolume/job.nomad.hcl @@ -0,0 +1,48 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +job "job" { + group "g" { + task "t" { + driver = "docker" + config { + image = "python:slim" + command = "bash" + args = ["-xc", <<-EOF + for dir in internal external; do + touch ${NOMAD_TASK_DIR}/$dir/hiii + done + python -m http.server -d ${NOMAD_TASK_DIR} --bind=:: + EOF + ] + ports = ["http"] + } + volume_mount { + volume = "int" + destination = "${NOMAD_TASK_DIR}/internal" + } + volume_mount { + volume = "ext" + destination = "${NOMAD_TASK_DIR}/external" + } + } + volume "int" { + type = "host" + source = "internal-plugin" + } + volume "ext" { + type = "host" + source = "external-plugin" + } + network { + port "http" { + static = 8000 + } + } + service { + name = "job" + port = "http" + provider = "nomad" + } + } +} diff --git a/demo/hostvolume/setup.sh b/demo/hostvolume/setup.sh new file mode 100755 index 00000000000..9a9fc7be719 --- /dev/null +++ b/demo/hostvolume/setup.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -xeuo pipefail + +nomad volume create external-plugin.volume.hcl +nomad volume create internal-plugin.volume.hcl + +nomad job run job.nomad.hcl + +nomad volume status -type=host -verbose +nomad operator api /v1/nodes | jq '.[].HostVolumes' + diff --git a/demo/hostvolume/teardown.sh b/demo/hostvolume/teardown.sh new file mode 100755 index 00000000000..d4d17d67fa4 --- /dev/null +++ b/demo/hostvolume/teardown.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -xeuo pipefail + +nomad job stop job || true + +for _ in {1..5}; do + sleep 3 + ids="$(nomad volume status -type=host -verbose | awk '/ternal-plugin/ {print$1}')" + test -z "$ids" && break + for id in $ids; do + nomad volume delete -type=host "$id" || continue + done +done + diff --git a/helper/funcs.go b/helper/funcs.go index e251328f697..2695540ffa7 100644 --- a/helper/funcs.go +++ b/helper/funcs.go @@ -9,6 +9,7 @@ import ( "maps" "math" "net/http" + "os" "path/filepath" "reflect" "regexp" @@ -525,3 +526,50 @@ func Merge[T comparable](a, b T) T { } return a } + +// FlattenMultierror takes a multierror and unwraps it if there's only one error +// in the output, otherwise returning the multierror or nil. +func FlattenMultierror(err error) error { + mErr, ok := err.(*multierror.Error) + if !ok { + return err + } + // note: mErr is a pointer so we still need to nil-check even after the cast + if mErr == nil { + return nil + } + if mErr.Len() == 1 { + return mErr.Errors[0] + } + return mErr.ErrorOrNil() +} + +// FindExecutableFiles looks in the provided path for executables and returns +// a map where keys are filenames and values are the absolute path. +func FindExecutableFiles(path string) (map[string]string, error) { + executables := make(map[string]string) + entries, err := os.ReadDir(path) + if err != nil { + return executables, err + } + for _, e := range entries { + i, err := e.Info() + if err != nil { + return executables, err + } + if !IsExecutable(i) { + continue + } + p := filepath.Join(path, i.Name()) + abs, err := filepath.Abs(p) + if err != nil { + return executables, err + } + executables[i.Name()] = abs + } + return executables, nil +} + +func IsExecutable(i os.FileInfo) bool { + return !i.IsDir() && i.Mode()&0o111 != 0 +} diff --git a/helper/funcs_test.go b/helper/funcs_test.go index 4e1947f28d4..86e6fd4c8e8 100644 --- a/helper/funcs_test.go +++ b/helper/funcs_test.go @@ -4,12 +4,14 @@ package helper import ( + "errors" "fmt" "maps" "reflect" "sort" "testing" + multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-set/v3" "github.com/shoenig/test/must" "github.com/stretchr/testify/require" @@ -483,3 +485,46 @@ func Test_SliceSetEq(t *testing.T) { must.True(t, SliceSetEq(a, b)) }) } + +func TestFlattenMultiError(t *testing.T) { + + err := FlattenMultierror(nil) + must.Nil(t, err) + + err0 := errors.New("oh no!") + err = FlattenMultierror(err0) + must.Eq(t, `oh no!`, err.Error()) + + var mErr0 *multierror.Error + err = FlattenMultierror(mErr0) + must.Nil(t, err) + + mErr0 = multierror.Append(mErr0, func() error { + return nil + }()) + err = FlattenMultierror(mErr0) + must.Nil(t, err) + + var mErr1 *multierror.Error + mErr1 = multierror.Append(mErr1, func() error { + var mErr *multierror.Error + mErr = multierror.Append(mErr, errors.New("inner1")) + return mErr + }()) + err = FlattenMultierror(mErr1) + must.Eq(t, `inner1`, err.Error()) + + var mErr2 *multierror.Error + mErr2 = multierror.Append(mErr2, func() error { + var mErr *multierror.Error + mErr = multierror.Append(mErr, errors.New("inner1")) + mErr = multierror.Append(mErr, errors.New("inner2")) + return mErr + }()) + err = FlattenMultierror(mErr2) + must.Eq(t, `2 errors occurred: + * inner1 + * inner2 + +`, err.Error()) +} diff --git a/helper/raftutil/msgtypes.go b/helper/raftutil/msgtypes.go index 615881173c9..af4d7e5f41c 100644 --- a/helper/raftutil/msgtypes.go +++ b/helper/raftutil/msgtypes.go @@ -68,4 +68,6 @@ var msgTypeNames = map[structs.MessageType]string{ structs.WrappedRootKeysUpsertRequestType: "WrappedRootKeysUpsertRequestType", structs.NamespaceUpsertRequestType: "NamespaceUpsertRequestType", structs.NamespaceDeleteRequestType: "NamespaceDeleteRequestType", + structs.HostVolumeRegisterRequestType: "HostVolumeRegisterRequestType", + structs.HostVolumeDeleteRequestType: "HostVolumeDeleteRequestType", } diff --git a/nomad/client_host_volume_endpoint.go b/nomad/client_host_volume_endpoint.go new file mode 100644 index 00000000000..5749643d255 --- /dev/null +++ b/nomad/client_host_volume_endpoint.go @@ -0,0 +1,84 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package nomad + +import ( + "fmt" + "time" + + "github.com/armon/go-metrics" + log "github.com/hashicorp/go-hclog" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/nomad/structs" +) + +// ClientHostVolume is the client RPC endpoint for host volumes +type ClientHostVolume struct { + srv *Server + ctx *RPCContext + logger log.Logger +} + +func NewClientHostVolumeEndpoint(srv *Server, ctx *RPCContext) *ClientHostVolume { + return &ClientHostVolume{srv: srv, ctx: ctx, logger: srv.logger.Named("client_host_volume")} +} + +func (c *ClientHostVolume) Create(args *cstructs.ClientHostVolumeCreateRequest, reply *cstructs.ClientHostVolumeCreateResponse) error { + defer metrics.MeasureSince([]string{"nomad", "client_host_node", "create"}, time.Now()) + return c.sendVolumeRPC( + args.NodeID, + "HostVolume.Create", + "ClientHostVolume.Create", + structs.RateMetricWrite, + args, + reply, + ) +} + +func (c *ClientHostVolume) Delete(args *cstructs.ClientHostVolumeDeleteRequest, reply *cstructs.ClientHostVolumeDeleteResponse) error { + defer metrics.MeasureSince([]string{"nomad", "client_host_volume", "delete"}, time.Now()) + return c.sendVolumeRPC( + args.NodeID, + "HostVolume.Delete", + "ClientHostVolume.Delete", + structs.RateMetricWrite, + args, + reply, + ) +} + +func (c *ClientHostVolume) sendVolumeRPC(nodeID, method, fwdMethod, op string, args any, reply any) error { + // client requests aren't RequestWithIdentity, so we use a placeholder here + // to populate the identity data for metrics + identityReq := &structs.GenericRequest{} + aclObj, err := c.srv.AuthenticateServerOnly(c.ctx, identityReq) + c.srv.MeasureRPCRate("client_host_volume", op, identityReq) + + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied + } + + // Make sure Node is valid and new enough to support RPC + snap, err := c.srv.State().Snapshot() + if err != nil { + return err + } + + _, err = getNodeForRpc(snap, nodeID) + if err != nil { + return err + } + + // Get the connection to the client + state, ok := c.srv.getNodeConn(nodeID) + if !ok { + return findNodeConnAndForward(c.srv, nodeID, fwdMethod, args, reply) + } + + // Make the RPC + if err := NodeRpc(state.Session, method, args, reply); err != nil { + return fmt.Errorf("%s error: %w", method, err) + } + return nil +} diff --git a/nomad/fsm.go b/nomad/fsm.go index 2ae3a2341b0..3cc4bf101b4 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -57,6 +57,7 @@ const ( NodePoolSnapshot SnapshotType = 28 JobSubmissionSnapshot SnapshotType = 29 RootKeySnapshot SnapshotType = 30 + HostVolumeSnapshot SnapshotType = 31 // TimeTableSnapshot // Deprecated: Nomad no longer supports TimeTable snapshots since 1.9.2 @@ -102,6 +103,7 @@ var snapshotTypeStrings = map[SnapshotType]string{ NodePoolSnapshot: "NodePool", JobSubmissionSnapshot: "JobSubmission", RootKeySnapshot: "WrappedRootKeys", + HostVolumeSnapshot: "HostVolumeSnapshot", NamespaceSnapshot: "Namespace", } @@ -381,9 +383,12 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} { return n.applyACLBindingRulesDelete(buf[1:], log.Index) case structs.WrappedRootKeysUpsertRequestType: return n.applyWrappedRootKeysUpsert(msgType, buf[1:], log.Index) - case structs.JobVersionTagRequestType: return n.applyJobVersionTag(buf[1:], log.Index) + case structs.HostVolumeRegisterRequestType: + return n.applyHostVolumeRegister(msgType, buf[1:], log.Index) + case structs.HostVolumeDeleteRequestType: + return n.applyHostVolumeDelete(msgType, buf[1:], log.Index) } // Check enterprise only message types. @@ -1936,6 +1941,17 @@ func (n *nomadFSM) restoreImpl(old io.ReadCloser, filter *FSMFilter) error { return err } + case HostVolumeSnapshot: + vol := new(structs.HostVolume) + if err := dec.Decode(vol); err != nil { + return err + } + if filter.Include(vol) { + if err := restore.HostVolumeRestore(vol); err != nil { + return err + } + } + default: // Check if this is an enterprise only object being restored restorer, ok := n.enterpriseRestorers[snapType] @@ -2404,6 +2420,36 @@ func (n *nomadFSM) applyWrappedRootKeysDelete(msgType structs.MessageType, buf [ return nil } +func (n *nomadFSM) applyHostVolumeRegister(msgType structs.MessageType, buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_host_volume_register"}, time.Now()) + + var req structs.HostVolumeRegisterRequest + if err := structs.Decode(buf, &req); err != nil { + panic(fmt.Errorf("failed to decode request: %v", err)) + } + + if err := n.state.UpsertHostVolume(index, req.Volume); err != nil { + n.logger.Error("UpsertHostVolumes failed", "error", err) + return err + } + return nil +} + +func (n *nomadFSM) applyHostVolumeDelete(msgType structs.MessageType, buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_host_volume_delete"}, time.Now()) + + var req structs.HostVolumeDeleteRequest + if err := structs.Decode(buf, &req); err != nil { + panic(fmt.Errorf("failed to decode request: %v", err)) + } + + if err := n.state.DeleteHostVolume(index, req.RequestNamespace(), req.VolumeID); err != nil { + n.logger.Error("DeleteHostVolumes failed", "error", err) + return err + } + return nil +} + func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) // Register the nodes @@ -2537,6 +2583,10 @@ func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { sink.Cancel() return err } + if err := s.persistHostVolumes(sink, encoder); err != nil { + sink.Cancel() + return err + } return nil } @@ -3274,6 +3324,22 @@ func (s *nomadSnapshot) persistJobSubmissions(sink raft.SnapshotSink, encoder *c return nil } +func (s *nomadSnapshot) persistHostVolumes(sink raft.SnapshotSink, encoder *codec.Encoder) error { + iter, err := s.snap.HostVolumes(nil, state.SortDefault) + if err != nil { + return err + } + for raw := iter.Next(); raw != nil; raw = iter.Next() { + vol := raw.(*structs.HostVolume) + + sink.Write([]byte{byte(HostVolumeSnapshot)}) + if err := encoder.Encode(vol); err != nil { + return err + } + } + return nil +} + // Release is a no-op, as we just need to GC the pointer // to the state store snapshot. There is nothing to explicitly // cleanup. diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go new file mode 100644 index 00000000000..e33b0e8a42b --- /dev/null +++ b/nomad/host_volume_endpoint.go @@ -0,0 +1,626 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package nomad + +import ( + "fmt" + "net/http" + "regexp" + "strings" + "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/acl" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/state/paginator" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/scheduler" +) + +// HostVolume is the server RPC endpoint for host volumes +type HostVolume struct { + srv *Server + ctx *RPCContext + logger hclog.Logger +} + +func NewHostVolumeEndpoint(srv *Server, ctx *RPCContext) *HostVolume { + return &HostVolume{srv: srv, ctx: ctx, logger: srv.logger.Named("host_volume")} +} + +func (v *HostVolume) Get(args *structs.HostVolumeGetRequest, reply *structs.HostVolumeGetResponse) error { + authErr := v.srv.Authenticate(v.ctx, args) + if done, err := v.srv.forward("HostVolume.Get", args, args, reply); done { + return err + } + v.srv.MeasureRPCRate("host_volume", structs.RateMetricRead, args) + if authErr != nil { + return structs.ErrPermissionDenied + } + defer metrics.MeasureSince([]string{"nomad", "host_volume", "get"}, time.Now()) + + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeRead) + aclObj, err := v.srv.ResolveACL(args) + if err != nil { + return err + } + if !allowVolume(aclObj, args.RequestNamespace()) { + return structs.ErrPermissionDenied + } + + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + run: func(ws memdb.WatchSet, store *state.StateStore) error { + + vol, err := store.HostVolumeByID(ws, args.Namespace, args.ID, true) + if err != nil { + return err + } + + reply.Volume = vol + if vol != nil { + reply.Index = vol.ModifyIndex + } else { + index, err := store.Index(state.TableHostVolumes) + if err != nil { + return err + } + + // Ensure we never set the index to zero, otherwise a blocking + // query cannot be used. We floor the index at one, since + // realistically the first write must have a higher index. + if index == 0 { + index = 1 + } + reply.Index = index + } + return nil + }} + return v.srv.blockingRPC(&opts) +} + +func (v *HostVolume) List(args *structs.HostVolumeListRequest, reply *structs.HostVolumeListResponse) error { + authErr := v.srv.Authenticate(v.ctx, args) + if done, err := v.srv.forward("HostVolume.List", args, args, reply); done { + return err + } + v.srv.MeasureRPCRate("host_volume", structs.RateMetricList, args) + if authErr != nil { + return structs.ErrPermissionDenied + } + defer metrics.MeasureSince([]string{"nomad", "host_volume", "list"}, time.Now()) + + aclObj, err := v.srv.ResolveACL(args) + if err != nil { + return err + } + + ns := args.RequestNamespace() + + sort := state.SortOption(args.Reverse) + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + run: func(ws memdb.WatchSet, store *state.StateStore) error { + + var iter memdb.ResultIterator + var err error + + switch { + case args.NodeID != "": + iter, err = store.HostVolumesByNodeID(ws, args.NodeID, sort) + case args.NodePool != "": + iter, err = store.HostVolumesByNodePool(ws, args.NodePool, sort) + default: + iter, err = store.HostVolumes(ws, sort) + } + if err != nil { + return err + } + + // Generate the tokenizer to use for pagination using namespace and + // ID to ensure complete uniqueness. + tokenizer := paginator.NewStructsTokenizer(iter, + paginator.StructsTokenizerOptions{ + WithNamespace: true, + WithID: true, + }, + ) + + filters := []paginator.Filter{ + paginator.GenericFilter{ + Allow: func(raw any) (bool, error) { + vol := raw.(*structs.HostVolume) + // empty prefix doesn't filter + if !strings.HasPrefix(vol.Name, args.Prefix) && + !strings.HasPrefix(vol.ID, args.Prefix) { + return false, nil + } + if args.NodeID != "" && vol.NodeID != args.NodeID { + return false, nil + } + if args.NodePool != "" && vol.NodePool != args.NodePool { + return false, nil + } + + if ns != structs.AllNamespacesSentinel && + vol.Namespace != ns { + return false, nil + } + + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeRead) + return allowVolume(aclObj, ns), nil + }, + }, + } + + // Set up our output after we have checked the error. + var vols []*structs.HostVolumeStub + + // Build the paginator. This includes the function that is + // responsible for appending a variable to the variables + // stubs slice. + paginatorImpl, err := paginator.NewPaginator(iter, tokenizer, filters, args.QueryOptions, + func(raw any) error { + vol := raw.(*structs.HostVolume) + vols = append(vols, vol.Stub()) + return nil + }) + if err != nil { + return structs.NewErrRPCCodedf( + http.StatusBadRequest, "failed to create result paginator: %v", err) + } + + // Calling page populates our output variable stub array as well as + // returns the next token. + nextToken, err := paginatorImpl.Page() + if err != nil { + return structs.NewErrRPCCodedf( + http.StatusBadRequest, "failed to read result page: %v", err) + } + + reply.Volumes = vols + reply.NextToken = nextToken + + // Use the index table to populate the query meta as we have no way + // of tracking the max index on deletes. + return v.srv.setReplyQueryMeta(store, state.TableHostVolumes, &reply.QueryMeta) + }, + } + + return v.srv.blockingRPC(&opts) +} + +func (v *HostVolume) Create(args *structs.HostVolumeCreateRequest, reply *structs.HostVolumeCreateResponse) error { + + authErr := v.srv.Authenticate(v.ctx, args) + if done, err := v.srv.forward("HostVolume.Create", args, args, reply); done { + return err + } + v.srv.MeasureRPCRate("host_volume", structs.RateMetricWrite, args) + if authErr != nil { + return structs.ErrPermissionDenied + } + defer metrics.MeasureSince([]string{"nomad", "host_volume", "create"}, time.Now()) + + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeCreate) + aclObj, err := v.srv.ResolveACL(args) + if err != nil { + return err + } + + if args.Volume == nil { + return fmt.Errorf("missing volume definition") + } + + vol := args.Volume + if vol.Namespace == "" { + vol.Namespace = args.RequestNamespace() + } + if !allowVolume(aclObj, vol.Namespace) { + return structs.ErrPermissionDenied + } + + // ensure we only try to create a valid volume or make valid updates to a + // volume + now := time.Now() + snap, err := v.srv.State().Snapshot() + if err != nil { + return err + } + + vol, err = v.validateVolumeUpdate(vol, snap, now) + if err != nil { + return err + } + + _, err = v.placeHostVolume(snap, vol) + if err != nil { + return fmt.Errorf("could not place volume %q: %w", vol.Name, err) + } + + warn, err := v.enforceEnterprisePolicy( + snap, vol, args.GetIdentity().GetACLToken(), args.PolicyOverride) + if warn != nil { + reply.Warnings = warn.Error() + } + if err != nil { + return err + } + + // Attempt to create the volume on the client. + // + // NOTE: creating the volume on the client via the plugin can't be made + // atomic with the registration, and creating the volume provides values we + // want to write on the Volume in raft anyways. + err = v.createVolume(vol) + if err != nil { + return err + } + + // Write a newly created or modified volume to raft. We create a new request + // here because we've likely mutated the volume. + _, index, err := v.srv.raftApply(structs.HostVolumeRegisterRequestType, + &structs.HostVolumeRegisterRequest{ + Volume: vol, + WriteRequest: args.WriteRequest, + }) + if err != nil { + v.logger.Error("raft apply failed", "error", err, "method", "register") + return err + } + + reply.Volume = vol + reply.Index = index + return nil +} + +func (v *HostVolume) Register(args *structs.HostVolumeRegisterRequest, reply *structs.HostVolumeRegisterResponse) error { + + authErr := v.srv.Authenticate(v.ctx, args) + if done, err := v.srv.forward("HostVolume.Register", args, args, reply); done { + return err + } + v.srv.MeasureRPCRate("host_volume", structs.RateMetricWrite, args) + if authErr != nil { + return structs.ErrPermissionDenied + } + defer metrics.MeasureSince([]string{"nomad", "host_volume", "register"}, time.Now()) + + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeRegister) + aclObj, err := v.srv.ResolveACL(args) + if err != nil { + return err + } + + if args.Volume == nil { + return fmt.Errorf("missing volume definition") + } + + vol := args.Volume + if vol.Namespace == "" { + vol.Namespace = args.RequestNamespace() + } + if !allowVolume(aclObj, vol.Namespace) { + return structs.ErrPermissionDenied + } + + snap, err := v.srv.State().Snapshot() + if err != nil { + return err + } + + now := time.Now() + vol, err = v.validateVolumeUpdate(vol, snap, now) + if err != nil { + return err + } + + warn, err := v.enforceEnterprisePolicy( + snap, vol, args.GetIdentity().GetACLToken(), args.PolicyOverride) + if warn != nil { + reply.Warnings = warn.Error() + } + if err != nil { + return err + } + + // Write a newly created or modified volume to raft. We create a new request + // here because we've likely mutated the volume. + _, index, err := v.srv.raftApply(structs.HostVolumeRegisterRequestType, + &structs.HostVolumeRegisterRequest{ + Volume: vol, + WriteRequest: args.WriteRequest, + }) + if err != nil { + v.logger.Error("raft apply failed", "error", err, "method", "register") + return err + } + + reply.Volume = vol + reply.Index = index + return nil +} + +func (v *HostVolume) validateVolumeUpdate( + vol *structs.HostVolume, + snap *state.StateSnapshot, + now time.Time) (*structs.HostVolume, error) { + + // validate the volume spec + err := vol.Validate() + if err != nil { + return nil, fmt.Errorf("volume validation failed: %v", err) + } + + // validate any update we're making + var existing *structs.HostVolume + volID := vol.ID + if vol.ID != "" { + existing, err = snap.HostVolumeByID(nil, vol.Namespace, vol.ID, true) + if err != nil { + return nil, err // should never hit, bail out + } + if existing == nil { + return nil, fmt.Errorf("cannot update volume %q: volume does not exist", vol.ID) + + } + err = vol.ValidateUpdate(existing) + if err != nil { + return nil, fmt.Errorf("validating volume %q update failed: %v", vol.ID, err) + } + } else { + // capture this for nicer error messages later + volID = vol.Name + } + + // set zero values as needed, possibly from existing + vol.CanonicalizeForUpdate(existing, now) + + // make sure any nodes or pools actually exist + err = v.validateVolumeForState(vol, snap) + if err != nil { + return nil, fmt.Errorf("validating volume %q against state failed: %v", volID, err) + } + + return vol, nil +} + +// validateVolumeForState ensures that any references to node IDs or node pools are valid +func (v *HostVolume) validateVolumeForState(vol *structs.HostVolume, snap *state.StateSnapshot) error { + var poolFromExistingNode string + if vol.NodeID != "" { + node, err := snap.NodeByID(nil, vol.NodeID) + if err != nil { + return err // should never hit, bail out + } + if node == nil { + return fmt.Errorf("node %q does not exist", vol.NodeID) + } + poolFromExistingNode = node.NodePool + } + + if vol.NodePool != "" { + pool, err := snap.NodePoolByName(nil, vol.NodePool) + if err != nil { + return err // should never hit, bail out + } + if pool == nil { + return fmt.Errorf("node pool %q does not exist", vol.NodePool) + } + if poolFromExistingNode != "" && poolFromExistingNode != pool.Name { + return fmt.Errorf("node ID %q is not in pool %q", vol.NodeID, vol.NodePool) + } + } + + return nil +} + +func (v *HostVolume) createVolume(vol *structs.HostVolume) error { + + method := "ClientHostVolume.Create" + cReq := &cstructs.ClientHostVolumeCreateRequest{ + ID: vol.ID, + Name: vol.Name, + PluginID: vol.PluginID, + NodeID: vol.NodeID, + RequestedCapacityMinBytes: vol.RequestedCapacityMinBytes, + RequestedCapacityMaxBytes: vol.RequestedCapacityMaxBytes, + Parameters: vol.Parameters, + } + cResp := &cstructs.ClientHostVolumeCreateResponse{} + err := v.srv.RPC(method, cReq, cResp) + if err != nil { + return err + } + + if vol.State == structs.HostVolumeStateUnknown { + vol.State = structs.HostVolumeStatePending + } + + vol.HostPath = cResp.HostPath + vol.CapacityBytes = cResp.CapacityBytes + + return nil +} + +// placeHostVolume adds a node to volumes that don't already have one. The node +// will match the node pool and constraints, which doesn't already have a volume +// by that name. It returns the node (for testing) and an error indicating +// placement failed. +func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.HostVolume) (*structs.Node, error) { + if vol.NodeID != "" { + node, err := snap.NodeByID(nil, vol.NodeID) + if err != nil { + return nil, err + } + if node == nil { + return nil, fmt.Errorf("no such node %s", vol.NodeID) + } + vol.NodePool = node.NodePool + return node, nil + } + + var iter memdb.ResultIterator + var err error + if vol.NodePool != "" { + iter, err = snap.NodesByNodePool(nil, vol.NodePool) + } else { + iter, err = snap.Nodes(nil) + } + if err != nil { + return nil, err + } + + var checker *scheduler.ConstraintChecker + ctx := &placementContext{ + regexpCache: make(map[string]*regexp.Regexp), + versionCache: make(map[string]scheduler.VerConstraints), + semverCache: make(map[string]scheduler.VerConstraints), + } + constraints := []*structs.Constraint{{ + LTarget: fmt.Sprintf("${attr.plugins.host_volume.%s.version}", vol.PluginID), + Operand: "is_set", + }} + constraints = append(constraints, vol.Constraints...) + checker = scheduler.NewConstraintChecker(ctx, constraints) + + for { + raw := iter.Next() + if raw == nil { + break + } + candidate := raw.(*structs.Node) + + // note: this is a race if multiple users create volumes of the same + // name concurrently, but we can't solve it on the server because we + // haven't yet written to state. The client will reject requests to + // create/register a volume with the same name with a different ID. + if _, hasVol := candidate.HostVolumes[vol.Name]; hasVol { + continue + } + + if checker != nil { + if ok := checker.Feasible(candidate); !ok { + continue + } + } + + vol.NodeID = candidate.ID + vol.NodePool = candidate.NodePool + return candidate, nil + + } + + return nil, fmt.Errorf("no node meets constraints") +} + +// placementContext implements the scheduler.ConstraintContext interface, a +// minimal subset of the scheduler.Context interface that we need to create a +// feasibility checker for constraints +type placementContext struct { + regexpCache map[string]*regexp.Regexp + versionCache map[string]scheduler.VerConstraints + semverCache map[string]scheduler.VerConstraints +} + +func (ctx *placementContext) Metrics() *structs.AllocMetric { return &structs.AllocMetric{} } +func (ctx *placementContext) RegexpCache() map[string]*regexp.Regexp { return ctx.regexpCache } + +func (ctx *placementContext) VersionConstraintCache() map[string]scheduler.VerConstraints { + return ctx.versionCache +} + +func (ctx *placementContext) SemverConstraintCache() map[string]scheduler.VerConstraints { + return ctx.semverCache +} + +func (v *HostVolume) Delete(args *structs.HostVolumeDeleteRequest, reply *structs.HostVolumeDeleteResponse) error { + + authErr := v.srv.Authenticate(v.ctx, args) + if done, err := v.srv.forward("HostVolume.Delete", args, args, reply); done { + return err + } + v.srv.MeasureRPCRate("host_volume", structs.RateMetricWrite, args) + if authErr != nil { + return structs.ErrPermissionDenied + } + defer metrics.MeasureSince([]string{"nomad", "host_volume", "delete"}, time.Now()) + + // Note that all deleted volumes need to be in the same namespace + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeDelete) + aclObj, err := v.srv.ResolveACL(args) + if err != nil { + return err + } + if !allowVolume(aclObj, args.RequestNamespace()) { + return structs.ErrPermissionDenied + } + + if args.VolumeID == "" { + return fmt.Errorf("missing volume ID to delete") + } + + var index uint64 + + snap, err := v.srv.State().Snapshot() + if err != nil { + return err + } + + ns := args.RequestNamespace() + id := args.VolumeID + + vol, err := snap.HostVolumeByID(nil, ns, id, true) + if err != nil { + return fmt.Errorf("could not query host volume: %w", err) + } + if vol == nil { + return fmt.Errorf("no such volume: %s", id) + } + if len(vol.Allocations) > 0 { + allocIDs := helper.ConvertSlice(vol.Allocations, + func(a *structs.AllocListStub) string { return a.ID }) + return fmt.Errorf("volume %s in use by allocations: %v", id, allocIDs) + } + + err = v.deleteVolume(vol) + if err != nil { + return err + } + + _, index, err = v.srv.raftApply(structs.HostVolumeDeleteRequestType, args) + if err != nil { + v.logger.Error("raft apply failed", "error", err, "method", "delete") + return err + } + + reply.Index = index + return nil +} + +func (v *HostVolume) deleteVolume(vol *structs.HostVolume) error { + + method := "ClientHostVolume.Delete" + cReq := &cstructs.ClientHostVolumeDeleteRequest{ + ID: vol.ID, + Name: vol.Name, + PluginID: vol.PluginID, + NodeID: vol.NodeID, + HostPath: vol.HostPath, + Parameters: vol.Parameters, + } + cResp := &cstructs.ClientHostVolumeDeleteResponse{} + err := v.srv.RPC(method, cReq, cResp) + if err != nil { + return err + } + + return nil +} diff --git a/nomad/host_volume_endpoint_ce.go b/nomad/host_volume_endpoint_ce.go new file mode 100644 index 00000000000..756df5f4298 --- /dev/null +++ b/nomad/host_volume_endpoint_ce.go @@ -0,0 +1,23 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build !ent +// +build !ent + +package nomad + +import ( + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" +) + +// enforceEnterprisePolicy is the CE stub for Enterprise governance via +// Sentinel policy, quotas, and node pools +func (v *HostVolume) enforceEnterprisePolicy( + _ *state.StateSnapshot, + _ *structs.HostVolume, + _ *structs.ACLToken, + _ bool, +) (error, error) { + return nil, nil +} diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go new file mode 100644 index 00000000000..e523ae29ae3 --- /dev/null +++ b/nomad/host_volume_endpoint_test.go @@ -0,0 +1,842 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package nomad + +import ( + "context" + "errors" + "fmt" + "sync" + "testing" + "time" + + msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc/v2" + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client" + "github.com/hashicorp/nomad/client/config" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" + "github.com/hashicorp/nomad/version" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" +) + +func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { + ci.Parallel(t) + + srv, _, cleanupSrv := TestACLServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + t.Cleanup(cleanupSrv) + testutil.WaitForLeader(t, srv.RPC) + store := srv.fsm.State() + + c1, node1 := newMockHostVolumeClient(t, srv, "prod") + c2, _ := newMockHostVolumeClient(t, srv, "default") + c2.setCreate(nil, errors.New("this node should never receive create RPC")) + c2.setDelete("this node should never receive delete RPC") + + index := uint64(1001) + + token := mock.CreatePolicyAndToken(t, store, index, "volume-manager", + `namespace "apps" { capabilities = ["host-volume-register"] } + node { policy = "read" }`).SecretID + + index++ + otherToken := mock.CreatePolicyAndToken(t, store, index, "other", + `namespace "foo" { capabilities = ["host-volume-register"] } + node { policy = "read" }`).SecretID + + index++ + powerToken := mock.CreatePolicyAndToken(t, store, index, "cluster-admin", + `namespace "*" { capabilities = ["host-volume-write"] } + node { policy = "read" }`).SecretID + + index++ + ns := "apps" + nspace := mock.Namespace() + nspace.Name = ns + must.NoError(t, store.UpsertNamespaces(index, []*structs.Namespace{nspace})) + + codec := rpcClient(t, srv) + + req := &structs.HostVolumeCreateRequest{ + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + AuthToken: token}, + } + + t.Run("invalid create", func(t *testing.T) { + + req.Namespace = ns + var resp structs.HostVolumeCreateResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, "missing volume definition") + + req.Volume = &structs.HostVolume{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, `volume validation failed: 2 errors occurred: + * missing name + * must include at least one capability block + +`) + + req.Volume = &structs.HostVolume{ + Name: "example", + PluginID: "example_plugin", + Constraints: []*structs.Constraint{{ + RTarget: "r1", + Operand: "=", + }}, + RequestedCapacityMinBytes: 200000, + RequestedCapacityMaxBytes: 100000, + RequestedCapabilities: []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + { + AttachmentMode: "bad", + AccessMode: "invalid", + }, + }, + } + + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, `volume validation failed: 3 errors occurred: + * capacity_max (100000) must be larger than capacity_min (200000) + * invalid attachment mode: "bad" + * invalid constraint: 1 error occurred: + * No LTarget provided but is required by constraint + + + +`) + + invalidNode := &structs.Node{ID: uuid.Generate(), NodePool: "does-not-exist"} + volOnInvalidNode := mock.HostVolumeRequestForNode(ns, invalidNode) + req.Volume = volOnInvalidNode + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, fmt.Sprintf( + `validating volume "example" against state failed: node %q does not exist`, + invalidNode.ID)) + }) + + var expectIndex uint64 + + c1.setCreate(&cstructs.ClientHostVolumeCreateResponse{ + HostPath: "/var/nomad/alloc_mounts/foo", + CapacityBytes: 150000, + }, nil) + + vol1 := mock.HostVolumeRequest("apps") + vol1.Name = "example1" + vol1.NodePool = "prod" + vol2 := mock.HostVolumeRequest("apps") + vol2.Name = "example2" + vol2.NodePool = "prod" + + t.Run("invalid permissions", func(t *testing.T) { + var resp structs.HostVolumeCreateResponse + req.AuthToken = otherToken + + req.Volume = vol1 + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, "Permission denied") + }) + + t.Run("invalid node constraints", func(t *testing.T) { + vol1.Constraints[0].RTarget = "r2" + vol2.Constraints[0].RTarget = "r2" + + defer func() { + vol1.Constraints[0].RTarget = "r1" + vol2.Constraints[0].RTarget = "r1" + }() + + req.Volume = vol1.Copy() + var resp structs.HostVolumeCreateResponse + req.AuthToken = token + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, `could not place volume "example1": no node meets constraints`) + + req.Volume = vol2.Copy() + resp = structs.HostVolumeCreateResponse{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, `could not place volume "example2": no node meets constraints`) + }) + + t.Run("valid create", func(t *testing.T) { + var resp structs.HostVolumeCreateResponse + req.AuthToken = token + req.Volume = vol1.Copy() + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.NoError(t, err) + must.NotNil(t, resp.Volume) + vol1 = resp.Volume + + expectIndex = resp.Index + req.Volume = vol2.Copy() + resp = structs.HostVolumeCreateResponse{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.NoError(t, err) + must.NotNil(t, resp.Volume) + vol2 = resp.Volume + + getReq := &structs.HostVolumeGetRequest{ + ID: vol1.ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns, + AuthToken: otherToken, + }, + } + var getResp structs.HostVolumeGetResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + must.EqError(t, err, "Permission denied") + + getReq.AuthToken = token + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + must.NoError(t, err) + must.NotNil(t, getResp.Volume) + }) + + t.Run("invalid updates", func(t *testing.T) { + + invalidVol1 := vol1.Copy() + invalidVol2 := &structs.HostVolume{} + + createReq := &structs.HostVolumeCreateRequest{ + Volume: invalidVol2, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token}, + } + c1.setCreate(nil, errors.New("should not call this endpoint on invalid RPCs")) + var createResp structs.HostVolumeCreateResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", createReq, &createResp) + must.EqError(t, err, `volume validation failed: 2 errors occurred: + * missing name + * must include at least one capability block + +`, must.Sprint("initial validation failures should exit early")) + + invalidVol1.NodeID = uuid.Generate() + invalidVol1.RequestedCapacityMinBytes = 100 + invalidVol1.RequestedCapacityMaxBytes = 200 + registerReq := &structs.HostVolumeRegisterRequest{ + Volume: invalidVol1, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token}, + } + var registerResp structs.HostVolumeRegisterResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.EqError(t, err, fmt.Sprintf(`validating volume %q update failed: 2 errors occurred: + * node ID cannot be updated + * capacity_max (200) cannot be less than existing provisioned capacity (150000) + +`, invalidVol1.ID), must.Sprint("update validation checks should have failed")) + + }) + + t.Run("blocking Get unblocks on write", func(t *testing.T) { + nextVol1 := vol1.Copy() + nextVol1.RequestedCapacityMaxBytes = 300000 + registerReq := &structs.HostVolumeRegisterRequest{ + Volume: nextVol1, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token}, + } + + c1.setCreate(nil, errors.New("should not call this endpoint on register RPC")) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + t.Cleanup(cancel) + volCh := make(chan *structs.HostVolume) + errCh := make(chan error) + + getReq := &structs.HostVolumeGetRequest{ + ID: vol1.ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token, + MinQueryIndex: expectIndex, + }, + } + + go func() { + codec := rpcClient(t, srv) + var getResp structs.HostVolumeGetResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + if err != nil { + errCh <- err + } + volCh <- getResp.Volume + }() + + // re-register the volume long enough later that we can be sure we won't + // win a race with the get RPC goroutine + time.AfterFunc(200*time.Millisecond, func() { + codec := rpcClient(t, srv) + var registerResp structs.HostVolumeRegisterResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + }) + + select { + case <-ctx.Done(): + t.Fatal("timeout or cancelled") + case vol := <-volCh: + must.Greater(t, expectIndex, vol.ModifyIndex) + case err := <-errCh: + t.Fatalf("unexpected error: %v", err) + } + }) + + t.Run("delete blocked by allocation claims", func(t *testing.T) { + + // claim one of the volumes with a pending allocation + alloc := mock.MinAlloc() + alloc.NodeID = node1.ID + alloc.Job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{"example": { + Name: "example", + Type: structs.VolumeTypeHost, + Source: vol2.Name, + }} + index++ + must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, + index, []*structs.Allocation{alloc})) + + delReq := &structs.HostVolumeDeleteRequest{ + VolumeID: vol2.ID, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token}, + } + var delResp structs.HostVolumeDeleteResponse + + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) + must.EqError(t, err, "Permission denied") + + delReq.AuthToken = powerToken + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) + must.EqError(t, err, fmt.Sprintf("volume %s in use by allocations: [%s]", vol2.ID, alloc.ID)) + + // update the allocations terminal so the delete works + alloc = alloc.Copy() + alloc.ClientStatus = structs.AllocClientStatusFailed + nArgs := &structs.AllocUpdateRequest{ + Alloc: []*structs.Allocation{alloc}, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + AuthToken: node1.SecretID}, + } + err = msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", nArgs, &structs.GenericResponse{}) + + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) + must.NoError(t, err) + + getReq := &structs.HostVolumeGetRequest{ + ID: vol2.ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token, + }, + } + var getResp structs.HostVolumeGetResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + must.NoError(t, err) + must.Nil(t, getResp.Volume) + }) + + // delete vol1 to finish cleaning up + var delResp structs.HostVolumeDeleteResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", &structs.HostVolumeDeleteRequest{ + VolumeID: vol1.ID, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + Namespace: vol1.Namespace, + AuthToken: powerToken, + }, + }, &delResp) + must.NoError(t, err) + + // should be no volumes left + var listResp structs.HostVolumeListResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.List", &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: "*", + AuthToken: token, + }, + }, &listResp) + must.NoError(t, err) + must.Len(t, 0, listResp.Volumes, must.Sprintf("expect no volumes to remain, got: %+v", listResp)) +} + +func TestHostVolumeEndpoint_List(t *testing.T) { + ci.Parallel(t) + + srv, rootToken, cleanupSrv := TestACLServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + t.Cleanup(cleanupSrv) + testutil.WaitForLeader(t, srv.RPC) + store := srv.fsm.State() + codec := rpcClient(t, srv) + + index := uint64(1001) + + token := mock.CreatePolicyAndToken(t, store, index, "volume-manager", + `namespace "apps" { capabilities = ["host-volume-register"] } + node { policy = "read" }`).SecretID + + index++ + otherToken := mock.CreatePolicyAndToken(t, store, index, "other", + `namespace "foo" { capabilities = ["host-volume-read"] } + node { policy = "read" }`).SecretID + + index++ + ns1 := "apps" + ns2 := "system" + nspace1, nspace2 := mock.Namespace(), mock.Namespace() + nspace1.Name = ns1 + nspace2.Name = ns2 + must.NoError(t, store.UpsertNamespaces(index, []*structs.Namespace{nspace1, nspace2})) + + nodes := []*structs.Node{ + mock.Node(), + mock.Node(), + mock.Node(), + } + nodes[2].NodePool = "prod" + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[0], state.NodeUpsertWithNodePool)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[1], state.NodeUpsertWithNodePool)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[2], state.NodeUpsertWithNodePool)) + + vol1 := mock.HostVolumeRequestForNode(ns1, nodes[0]) + vol1.Name = "foobar-example" + + vol2 := mock.HostVolumeRequestForNode(ns1, nodes[1]) + vol2.Name = "foobaz-example" + + vol3 := mock.HostVolumeRequestForNode(ns2, nodes[2]) + vol3.Name = "foobar-example" + + vol4 := mock.HostVolumeRequestForNode(ns2, nodes[1]) + vol4.Name = "foobaz-example" + + // we need to register these rather than upsert them so we have the correct + // indexes for unblocking later. + registerReq := &structs.HostVolumeRegisterRequest{ + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + AuthToken: rootToken.SecretID}, + } + + var registerResp structs.HostVolumeRegisterResponse + + // write the volumes in reverse order so our later test can get a blocking + // query index from a Get it has access to + + registerReq.Volume = vol4 + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + vol4 = registerResp.Volume + + registerReq.Volume = vol3 + registerResp = structs.HostVolumeRegisterResponse{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + vol3 = registerResp.Volume + + registerReq.Volume = vol2 + registerResp = structs.HostVolumeRegisterResponse{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + vol2 = registerResp.Volume + + registerReq.Volume = vol1 + registerResp = structs.HostVolumeRegisterResponse{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + vol1 = registerResp.Volume + + testCases := []struct { + name string + req *structs.HostVolumeListRequest + expectVolIDs []string + }{ + { + name: "wrong namespace for token", + req: &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns1, + AuthToken: otherToken, + }, + }, + expectVolIDs: []string{}, + }, + { + name: "query by namespace", + req: &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns1, + AuthToken: token, + }, + }, + expectVolIDs: []string{vol1.ID, vol2.ID}, + }, + { + name: "wildcard namespace", + req: &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: structs.AllNamespacesSentinel, + AuthToken: token, + }, + }, + expectVolIDs: []string{vol1.ID, vol2.ID, vol3.ID, vol4.ID}, + }, + { + name: "query by prefix", + req: &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns1, + AuthToken: token, + Prefix: "foobar", + }, + }, + expectVolIDs: []string{vol1.ID}, + }, + { + name: "query by node", + req: &structs.HostVolumeListRequest{ + NodeID: nodes[1].ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: structs.AllNamespacesSentinel, + AuthToken: token, + }, + }, + expectVolIDs: []string{vol2.ID, vol4.ID}, + }, + { + name: "query by node pool", + req: &structs.HostVolumeListRequest{ + NodePool: "prod", + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: structs.AllNamespacesSentinel, + AuthToken: token, + }, + }, + expectVolIDs: []string{vol3.ID}, + }, + { + name: "query by incompatible node ID and pool", + req: &structs.HostVolumeListRequest{ + NodeID: nodes[1].ID, + NodePool: "prod", + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: structs.AllNamespacesSentinel, + AuthToken: token, + }, + }, + expectVolIDs: []string{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var resp structs.HostVolumeListResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.List", tc.req, &resp) + must.NoError(t, err) + + gotIDs := helper.ConvertSlice(resp.Volumes, + func(v *structs.HostVolumeStub) string { return v.ID }) + must.SliceContainsAll(t, tc.expectVolIDs, gotIDs, + must.Sprintf("got: %v", gotIDs)) + }) + } + + t.Run("blocking query unblocks", func(t *testing.T) { + + // the Get response from the most-recently written volume will have the + // index we want to block on + getReq := &structs.HostVolumeGetRequest{ + ID: vol1.ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns1, + AuthToken: token, + }, + } + var getResp structs.HostVolumeGetResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + must.NoError(t, err) + must.NotNil(t, getResp.Volume) + + nextVol := getResp.Volume.Copy() + nextVol.RequestedCapacityMaxBytes = 300000 + registerReq.Volume = nextVol + registerReq.Namespace = nextVol.Namespace + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + t.Cleanup(cancel) + respCh := make(chan *structs.HostVolumeListResponse) + errCh := make(chan error) + + // prepare the blocking List query + + req := &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns1, + AuthToken: token, + MinQueryIndex: getResp.Index, + }, + } + + go func() { + codec := rpcClient(t, srv) + var listResp structs.HostVolumeListResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.List", req, &listResp) + if err != nil { + errCh <- err + } + respCh <- &listResp + }() + + // re-register the volume long enough later that we can be sure we won't + // win a race with the get RPC goroutine + time.AfterFunc(200*time.Millisecond, func() { + codec := rpcClient(t, srv) + var registerResp structs.HostVolumeRegisterResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + }) + + select { + case <-ctx.Done(): + t.Fatal("timeout or cancelled") + case listResp := <-respCh: + must.Greater(t, req.MinQueryIndex, listResp.Index) + case err := <-errCh: + t.Fatalf("unexpected error: %v", err) + } + }) +} + +func TestHostVolumeEndpoint_placeVolume(t *testing.T) { + srv, _, cleanupSrv := TestACLServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + t.Cleanup(cleanupSrv) + testutil.WaitForLeader(t, srv.RPC) + store := srv.fsm.State() + + endpoint := &HostVolume{ + srv: srv, + logger: testlog.HCLogger(t), + } + + node0, node1, node2, node3 := mock.Node(), mock.Node(), mock.Node(), mock.Node() + node0.NodePool = structs.NodePoolDefault + node0.Attributes["plugins.host_volume.mkdir.version"] = "0.0.1" + + node1.NodePool = "dev" + node1.Meta["rack"] = "r2" + node1.Attributes["plugins.host_volume.mkdir.version"] = "0.0.1" + + node2.NodePool = "prod" + node2.Attributes["plugins.host_volume.mkdir.version"] = "0.0.1" + + node3.NodePool = "prod" + node3.Meta["rack"] = "r3" + node3.HostVolumes = map[string]*structs.ClientHostVolumeConfig{"example": { + Name: "example", + Path: "/srv", + }} + node3.Attributes["plugins.host_volume.mkdir.version"] = "0.0.1" + + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node0)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node1)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node2)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node3)) + + testCases := []struct { + name string + vol *structs.HostVolume + expect *structs.Node + expectErr string + }{ + { + name: "only one in node pool", + vol: &structs.HostVolume{NodePool: "default", PluginID: "mkdir"}, + expect: node0, + }, + { + name: "only one that matches constraints", + vol: &structs.HostVolume{ + PluginID: "mkdir", + Constraints: []*structs.Constraint{ + { + LTarget: "${meta.rack}", + RTarget: "r2", + Operand: "=", + }, + }}, + expect: node1, + }, + { + name: "only one available in pool", + vol: &structs.HostVolume{NodePool: "prod", Name: "example", PluginID: "mkdir"}, + expect: node2, + }, + { + name: "no matching constraint", + vol: &structs.HostVolume{ + PluginID: "mkdir", + Constraints: []*structs.Constraint{ + { + LTarget: "${meta.rack}", + RTarget: "r6", + Operand: "=", + }, + }}, + expectErr: "no node meets constraints", + }, + { + name: "no matching plugin", + vol: &structs.HostVolume{PluginID: "not-mkdir"}, + expectErr: "no node meets constraints", + }, + { + name: "match already has a volume with the same name", + vol: &structs.HostVolume{ + Name: "example", + PluginID: "mkdir", + Constraints: []*structs.Constraint{ + { + LTarget: "${meta.rack}", + RTarget: "r3", + Operand: "=", + }, + }}, + expectErr: "no node meets constraints", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + snap, _ := store.Snapshot() + node, err := endpoint.placeHostVolume(snap, tc.vol) + if tc.expectErr == "" { + must.NoError(t, err) + must.Eq(t, tc.expect, node) + } else { + must.EqError(t, err, tc.expectErr) + must.Nil(t, node) + } + }) + } +} + +// mockHostVolumeClient models client RPCs that have side-effects on the +// client host +type mockHostVolumeClient struct { + lock sync.Mutex + nextCreateResponse *cstructs.ClientHostVolumeCreateResponse + nextCreateErr error + nextDeleteErr error +} + +// newMockHostVolumeClient configures a RPC-only Nomad test agent and returns a +// mockHostVolumeClient so we can send it client RPCs +func newMockHostVolumeClient(t *testing.T, srv *Server, pool string) (*mockHostVolumeClient, *structs.Node) { + t.Helper() + + mockClientEndpoint := &mockHostVolumeClient{} + + c1, cleanup := client.TestRPCOnlyClient(t, func(c *config.Config) { + c.Node.NodePool = pool + c.Node.Attributes["nomad.version"] = version.Version + c.Node.Attributes["plugins.host_volume.mkdir.version"] = "0.0.1" + c.Node.Meta["rack"] = "r1" + }, srv.config.RPCAddr, map[string]any{"HostVolume": mockClientEndpoint}) + t.Cleanup(cleanup) + + must.Wait(t, wait.InitialSuccess(wait.BoolFunc(func() bool { + node, err := srv.fsm.State().NodeByID(nil, c1.NodeID()) + if err != nil { + return false + } + if node != nil && node.Status == structs.NodeStatusReady { + return true + } + return false + }), + wait.Timeout(time.Second*5), + wait.Gap(time.Millisecond), + ), must.Sprint("client did not fingerprint before timeout")) + + return mockClientEndpoint, c1.Node() +} + +func (v *mockHostVolumeClient) setCreate( + resp *cstructs.ClientHostVolumeCreateResponse, err error) { + v.lock.Lock() + defer v.lock.Unlock() + v.nextCreateResponse = resp + v.nextCreateErr = err +} + +func (v *mockHostVolumeClient) setDelete(errMsg string) { + v.lock.Lock() + defer v.lock.Unlock() + v.nextDeleteErr = errors.New(errMsg) +} + +func (v *mockHostVolumeClient) Create( + req *cstructs.ClientHostVolumeCreateRequest, + resp *cstructs.ClientHostVolumeCreateResponse) error { + v.lock.Lock() + defer v.lock.Unlock() + if v.nextCreateResponse == nil { + return nil // prevents panics from incorrect tests + } + *resp = *v.nextCreateResponse + return v.nextCreateErr +} + +func (v *mockHostVolumeClient) Delete( + req *cstructs.ClientHostVolumeDeleteRequest, + resp *cstructs.ClientHostVolumeDeleteResponse) error { + v.lock.Lock() + defer v.lock.Unlock() + return v.nextDeleteErr +} diff --git a/nomad/mock/host_volumes.go b/nomad/mock/host_volumes.go new file mode 100644 index 00000000000..a87b084dad3 --- /dev/null +++ b/nomad/mock/host_volumes.go @@ -0,0 +1,54 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package mock + +import ( + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/structs" +) + +func HostVolumeRequest(ns string) *structs.HostVolume { + vol := &structs.HostVolume{ + Namespace: ns, + Name: "example", + PluginID: "mkdir", + NodePool: structs.NodePoolDefault, + Constraints: []*structs.Constraint{ + { + LTarget: "${meta.rack}", + RTarget: "r1", + Operand: "=", + }, + }, + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 200000, + RequestedCapabilities: []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + }, + Parameters: map[string]string{"foo": "bar"}, + State: structs.HostVolumeStatePending, + } + return vol + +} + +func HostVolumeRequestForNode(ns string, node *structs.Node) *structs.HostVolume { + vol := HostVolumeRequest(ns) + vol.NodeID = node.ID + vol.NodePool = node.NodePool + return vol +} + +func HostVolume() *structs.HostVolume { + volID := uuid.Generate() + vol := HostVolumeRequest(structs.DefaultNamespace) + vol.ID = volID + vol.NodeID = uuid.Generate() + vol.CapacityBytes = 150000 + vol.HostPath = "/var/data/nomad/alloc_mounts/" + volID + return vol +} diff --git a/nomad/search_endpoint.go b/nomad/search_endpoint.go index 4a66e939238..b6743c42369 100644 --- a/nomad/search_endpoint.go +++ b/nomad/search_endpoint.go @@ -41,6 +41,7 @@ var ( structs.ScalingPolicies, structs.Variables, structs.Namespaces, + structs.HostVolumes, } ) @@ -84,6 +85,8 @@ func (s *Search) getPrefixMatches(iter memdb.ResultIterator, prefix string) ([]s id = t.ID case *structs.CSIVolume: id = t.ID + case *structs.HostVolume: + id = t.ID case *structs.ScalingPolicy: id = t.ID case *structs.Namespace: @@ -405,6 +408,8 @@ func getResourceIter(context structs.Context, aclObj *acl.ACL, namespace, prefix return store.ScalingPoliciesByIDPrefix(ws, namespace, prefix) case structs.Volumes: return store.CSIVolumesByIDPrefix(ws, namespace, prefix) + case structs.HostVolumes: + return store.HostVolumesByIDPrefix(ws, namespace, prefix, state.SortDefault) case structs.Namespaces: iter, err := store.NamespacesByNamePrefix(ws, prefix) if err != nil { @@ -684,6 +689,8 @@ func sufficientSearchPerms(aclObj *acl.ACL, namespace string, context structs.Co acl.NamespaceCapabilityCSIReadVolume, acl.NamespaceCapabilityListJobs, acl.NamespaceCapabilityReadJob)(aclObj, namespace) + case structs.HostVolumes: + return acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeRead)(aclObj, namespace) case structs.Variables: return aclObj.AllowVariableSearch(namespace) case structs.Plugins: @@ -774,7 +781,8 @@ func (s *Search) FuzzySearch(args *structs.FuzzySearchRequest, reply *structs.Fu for _, ctx := range prefixContexts { switch ctx { // only apply on the types that use UUID prefix searching - case structs.Evals, structs.Deployments, structs.ScalingPolicies, structs.Volumes, structs.Quotas, structs.Recommendations: + case structs.Evals, structs.Deployments, structs.ScalingPolicies, + structs.Volumes, structs.HostVolumes, structs.Quotas, structs.Recommendations: iter, err := getResourceIter(ctx, aclObj, namespace, roundUUIDDownIfOdd(args.Prefix, args.Context), ws, state) if err != nil { if !s.silenceError(err) { @@ -790,7 +798,9 @@ func (s *Search) FuzzySearch(args *structs.FuzzySearchRequest, reply *structs.Fu for _, ctx := range fuzzyContexts { switch ctx { // skip the types that use UUID prefix searching - case structs.Evals, structs.Deployments, structs.ScalingPolicies, structs.Volumes, structs.Quotas, structs.Recommendations: + case structs.Evals, structs.Deployments, structs.ScalingPolicies, + structs.Volumes, structs.HostVolumes, structs.Quotas, + structs.Recommendations: continue default: iter, err := getFuzzyResourceIterator(ctx, aclObj, namespace, ws, state) @@ -927,6 +937,11 @@ func filteredSearchContexts(aclObj *acl.ACL, namespace string, context structs.C if volRead { available = append(available, c) } + case structs.HostVolumes: + if acl.NamespaceValidator( + acl.NamespaceCapabilityHostVolumeRead)(aclObj, namespace) { + available = append(available, c) + } case structs.Plugins: if aclObj.AllowPluginList() { available = append(available, c) diff --git a/nomad/search_endpoint_test.go b/nomad/search_endpoint_test.go index e06688ac927..5f9695f3d20 100644 --- a/nomad/search_endpoint_test.go +++ b/nomad/search_endpoint_test.go @@ -1039,6 +1039,53 @@ func TestSearch_PrefixSearch_CSIVolume(t *testing.T) { require.False(t, resp.Truncations[structs.Volumes]) } +func TestSearch_PrefixSearch_HostVolume(t *testing.T) { + ci.Parallel(t) + + srv, cleanup := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + defer cleanup() + codec := rpcClient(t, srv) + testutil.WaitForLeader(t, srv.RPC) + + store := srv.fsm.State() + index, _ := store.LatestIndex() + + node := mock.Node() + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node)) + + id := uuid.Generate() + index++ + err := store.UpsertHostVolume(index, &structs.HostVolume{ + ID: id, + Name: "example", + Namespace: structs.DefaultNamespace, + PluginID: "glade", + NodeID: node.ID, + NodePool: node.NodePool, + }) + must.NoError(t, err) + + req := &structs.SearchRequest{ + Prefix: id[:6], + Context: structs.HostVolumes, + QueryOptions: structs.QueryOptions{ + Region: "global", + Namespace: structs.DefaultNamespace, + }, + } + + var resp structs.SearchResponse + must.NoError(t, msgpackrpc.CallWithCodec(codec, "Search.PrefixSearch", req, &resp)) + + must.Len(t, 1, resp.Matches[structs.HostVolumes]) + must.Len(t, 0, resp.Matches[structs.Volumes]) + must.Eq(t, id, resp.Matches[structs.HostVolumes][0]) + must.False(t, resp.Truncations[structs.HostVolumes]) +} + func TestSearch_PrefixSearch_Namespace(t *testing.T) { ci.Parallel(t) @@ -1932,6 +1979,52 @@ func TestSearch_FuzzySearch_CSIVolume(t *testing.T) { require.False(t, resp.Truncations[structs.Volumes]) } +func TestSearch_FuzzySearch_HostVolume(t *testing.T) { + ci.Parallel(t) + + srv, cleanup := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + defer cleanup() + codec := rpcClient(t, srv) + testutil.WaitForLeader(t, srv.RPC) + + store := srv.fsm.State() + index, _ := store.LatestIndex() + + node := mock.Node() + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node)) + + id := uuid.Generate() + index++ + err := store.UpsertHostVolume(index, &structs.HostVolume{ + ID: id, + Name: "example", + Namespace: structs.DefaultNamespace, + PluginID: "glade", + NodeID: node.ID, + NodePool: node.NodePool, + }) + must.NoError(t, err) + + req := &structs.FuzzySearchRequest{ + Text: id[0:3], // volumes are prefix searched + Context: structs.HostVolumes, + QueryOptions: structs.QueryOptions{ + Region: "global", + Namespace: structs.DefaultNamespace, + }, + } + + var resp structs.FuzzySearchResponse + must.NoError(t, msgpackrpc.CallWithCodec(codec, "Search.FuzzySearch", req, &resp)) + + must.Len(t, 1, resp.Matches[structs.HostVolumes]) + must.Eq(t, id, resp.Matches[structs.HostVolumes][0].ID) + must.False(t, resp.Truncations[structs.HostVolumes]) +} + func TestSearch_FuzzySearch_Namespace(t *testing.T) { ci.Parallel(t) diff --git a/nomad/server.go b/nomad/server.go index d69cb2b8fc7..58a611da886 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -1357,6 +1357,8 @@ func (s *Server) setupRpcServer(server *rpc.Server, ctx *RPCContext) { _ = server.Register(NewStatusEndpoint(s, ctx)) _ = server.Register(NewSystemEndpoint(s, ctx)) _ = server.Register(NewVariablesEndpoint(s, ctx, s.encrypter)) + _ = server.Register(NewHostVolumeEndpoint(s, ctx)) + _ = server.Register(NewClientHostVolumeEndpoint(s, ctx)) // Register non-streaming diff --git a/nomad/state/schema.go b/nomad/state/schema.go index 2c798b06fbe..7ee83e074eb 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -26,6 +26,7 @@ const ( TableACLBindingRules = "acl_binding_rules" TableAllocs = "allocs" TableJobSubmission = "job_submission" + TableHostVolumes = "host_volumes" ) const ( @@ -41,6 +42,7 @@ const ( indexName = "name" indexSigningKey = "signing_key" indexAuthMethod = "auth_method" + indexNodePool = "node_pool" ) var ( @@ -97,6 +99,7 @@ func init() { aclRolesTableSchema, aclAuthMethodsTableSchema, bindingRulesTableSchema, + hostVolumeTableSchema, }...) } @@ -161,8 +164,8 @@ func nodeTableSchema() *memdb.TableSchema { Field: "SecretID", }, }, - "node_pool": { - Name: "node_pool", + indexNodePool: { + Name: indexNodePool, AllowMissing: false, Unique: false, Indexer: &memdb.StringFieldIndex{ @@ -844,8 +847,8 @@ func vaultAccessorTableSchema() *memdb.TableSchema { }, }, - "node_id": { - Name: "node_id", + indexNodeID: { + Name: indexNodeID, AllowMissing: false, Unique: false, Indexer: &memdb.StringFieldIndex{ @@ -882,8 +885,8 @@ func siTokenAccessorTableSchema() *memdb.TableSchema { }, }, - "node_id": { - Name: "node_id", + indexNodeID: { + Name: indexNodeID, AllowMissing: false, Unique: false, Indexer: &memdb.StringFieldIndex{ @@ -1643,3 +1646,61 @@ func bindingRulesTableSchema() *memdb.TableSchema { }, } } + +// HostVolumes are identified by id globally, and searchable by namespace+name, +// node, or node_pool +func hostVolumeTableSchema() *memdb.TableSchema { + return &memdb.TableSchema{ + Name: TableHostVolumes, + Indexes: map[string]*memdb.IndexSchema{ + indexID: { + Name: indexID, + AllowMissing: false, + Unique: true, + Indexer: &memdb.CompoundIndex{ + Indexes: []memdb.Indexer{ + &memdb.StringFieldIndex{ + Field: "Namespace", + }, + &memdb.StringFieldIndex{ + Field: "ID", + Lowercase: true, + }, + }, + }, + }, + indexName: { + Name: indexName, + AllowMissing: false, + Unique: false, + Indexer: &memdb.CompoundIndex{ + Indexes: []memdb.Indexer{ + &memdb.StringFieldIndex{ + Field: "Namespace", + }, + &memdb.StringFieldIndex{ + Field: "Name", + }, + }, + }, + }, + indexNodeID: { + Name: indexNodeID, + AllowMissing: false, + Unique: false, + Indexer: &memdb.StringFieldIndex{ + Field: "NodeID", + Lowercase: true, + }, + }, + indexNodePool: { + Name: indexNodePool, + AllowMissing: false, + Unique: false, + Indexer: &memdb.StringFieldIndex{ + Field: "NodePool", + }, + }, + }, + } +} diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 545c3f3201b..65ce87813db 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -1048,6 +1048,9 @@ func upsertNodeTxn(txn *txn, index uint64, node *structs.Node) error { if err := upsertCSIPluginsForNode(txn, node, index); err != nil { return fmt.Errorf("csi plugin update failed: %v", err) } + if err := upsertHostVolumeForNode(txn, node, index); err != nil { + return fmt.Errorf("dynamic host volumes update failed: %v", err) + } return nil } diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go new file mode 100644 index 00000000000..7e55e6ced43 --- /dev/null +++ b/nomad/state/state_store_host_volumes.go @@ -0,0 +1,252 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package state + +import ( + "fmt" + "strings" + + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/nomad/structs" +) + +// HostVolumeByID retrieve a specific host volume +func (s *StateStore) HostVolumeByID(ws memdb.WatchSet, ns, id string, withAllocs bool) (*structs.HostVolume, error) { + txn := s.db.ReadTxn() + watchCh, obj, err := txn.FirstWatch(TableHostVolumes, indexID, ns, id) + if err != nil { + return nil, err + } + ws.Add(watchCh) + + if obj == nil { + return nil, nil + } + vol := obj.(*structs.HostVolume) + if !withAllocs { + return vol, nil + } + + vol = vol.Copy() + vol.Allocations = []*structs.AllocListStub{} + + // we can't use AllocsByNodeTerminal because we only want to filter out + // allocs that are client-terminal, not server-terminal + allocs, err := s.AllocsByNode(nil, vol.NodeID) + if err != nil { + return nil, fmt.Errorf("could not query allocs to check for host volume claims: %w", err) + } + for _, alloc := range allocs { + if alloc.ClientTerminalStatus() { + continue + } + for _, volClaim := range alloc.Job.LookupTaskGroup(alloc.TaskGroup).Volumes { + if volClaim.Type == structs.VolumeTypeHost && volClaim.Source == vol.Name { + vol.Allocations = append(vol.Allocations, alloc.Stub(nil)) + } + } + } + + return vol, nil +} + +// UpsertHostVolume upserts a host volume +func (s *StateStore) UpsertHostVolume(index uint64, vol *structs.HostVolume) error { + txn := s.db.WriteTxn(index) + defer txn.Abort() + + if exists, err := s.namespaceExists(txn, vol.Namespace); err != nil { + return err + } else if !exists { + return fmt.Errorf("host volume %s is in nonexistent namespace %s", vol.ID, vol.Namespace) + } + + obj, err := txn.First(TableHostVolumes, indexID, vol.Namespace, vol.ID) + if err != nil { + return err + } + if obj != nil { + old := obj.(*structs.HostVolume) + vol.CreateIndex = old.CreateIndex + vol.CreateTime = old.CreateTime + } else { + vol.CreateIndex = index + } + + // If the fingerprint is written from the node before the create RPC handler + // completes, we'll never update from the initial pending, so reconcile that + // here + node, err := s.NodeByID(nil, vol.NodeID) + if err != nil { + return err + } + if node == nil { + return fmt.Errorf("host volume %s has nonexistent node ID %s", vol.ID, vol.NodeID) + } + switch vol.State { + case structs.HostVolumeStateDeleted: + // no-op: don't allow soft-deletes to resurrect a previously fingerprinted volume + default: + // prevent a race between node fingerprint and create RPC that could + // switch a ready volume back to pending + if _, ok := node.HostVolumes[vol.Name]; ok { + vol.State = structs.HostVolumeStateReady + } + } + + // Register RPCs for new volumes may not have the node pool set + vol.NodePool = node.NodePool + + // Allocations are denormalized on read, so we don't want these to be + // written to the state store. + vol.Allocations = nil + vol.ModifyIndex = index + + err = txn.Insert(TableHostVolumes, vol) + if err != nil { + return fmt.Errorf("host volume insert: %w", err) + } + + if err := txn.Insert(tableIndex, &IndexEntry{TableHostVolumes, index}); err != nil { + return fmt.Errorf("index update failed: %w", err) + } + + return txn.Commit() +} + +// DeleteHostVolume deletes a host volume +func (s *StateStore) DeleteHostVolume(index uint64, ns string, id string) error { + txn := s.db.WriteTxn(index) + defer txn.Abort() + + obj, err := txn.First(TableHostVolumes, indexID, ns, id) + if err != nil { + return err + } + if obj != nil { + vol := obj.(*structs.HostVolume) + + allocs, err := s.AllocsByNodeTerminal(nil, vol.NodeID, false) + if err != nil { + return fmt.Errorf("could not query allocs to check for host volume claims: %w", err) + } + for _, alloc := range allocs { + for _, volClaim := range alloc.Job.LookupTaskGroup(alloc.TaskGroup).Volumes { + if volClaim.Type == structs.VolumeTypeHost && volClaim.Name == vol.Name { + return fmt.Errorf("could not delete volume %s in use by alloc %s", + vol.ID, alloc.ID) + } + } + } + + err = txn.Delete(TableHostVolumes, vol) + if err != nil { + return fmt.Errorf("host volume delete: %w", err) + } + } + + if err := txn.Insert(tableIndex, &IndexEntry{TableHostVolumes, index}); err != nil { + return fmt.Errorf("index update failed: %w", err) + } + + return txn.Commit() + +} + +// HostVolumes queries all the host volumes and is mostly used for +// snapshot/restore +func (s *StateStore) HostVolumes(ws memdb.WatchSet, sort SortOption) (memdb.ResultIterator, error) { + return s.hostVolumesIter(ws, indexID, sort) +} + +// HostVolumesByIDPrefix retrieves all host volumes by ID prefix. Because the ID +// index is namespaced, we need to handle the wildcard namespace here as well. +func (s *StateStore) HostVolumesByIDPrefix(ws memdb.WatchSet, ns, prefix string, sort SortOption) (memdb.ResultIterator, error) { + + if ns != structs.AllNamespacesSentinel { + return s.hostVolumesIter(ws, "id_prefix", sort, ns, prefix) + } + + // for wildcard namespace, wrap the iterator in a filter function that + // filters all volumes by prefix + iter, err := s.hostVolumesIter(ws, indexID, sort) + if err != nil { + return nil, err + } + wrappedIter := memdb.NewFilterIterator(iter, func(raw any) bool { + vol, ok := raw.(*structs.HostVolume) + if !ok { + return true + } + return !strings.HasPrefix(vol.ID, prefix) + }) + return wrappedIter, nil +} + +// HostVolumesByName retrieves all host volumes of the same name +func (s *StateStore) HostVolumesByName(ws memdb.WatchSet, ns, name string, sort SortOption) (memdb.ResultIterator, error) { + return s.hostVolumesIter(ws, "name_prefix", sort, ns, name) +} + +// HostVolumesByNodeID retrieves all host volumes on the same node +func (s *StateStore) HostVolumesByNodeID(ws memdb.WatchSet, nodeID string, sort SortOption) (memdb.ResultIterator, error) { + return s.hostVolumesIter(ws, indexNodeID, sort, nodeID) +} + +// HostVolumesByNodePool retrieves all host volumes in the same node pool +func (s *StateStore) HostVolumesByNodePool(ws memdb.WatchSet, nodePool string, sort SortOption) (memdb.ResultIterator, error) { + return s.hostVolumesIter(ws, indexNodePool, sort, nodePool) +} + +func (s *StateStore) hostVolumesIter(ws memdb.WatchSet, index string, sort SortOption, args ...any) (memdb.ResultIterator, error) { + txn := s.db.ReadTxn() + + var iter memdb.ResultIterator + var err error + + switch sort { + case SortReverse: + iter, err = txn.GetReverse(TableHostVolumes, index, args...) + default: + iter, err = txn.Get(TableHostVolumes, index, args...) + } + if err != nil { + return nil, err + } + + ws.Add(iter.WatchCh()) + return iter, nil +} + +// upsertHostVolumeForNode sets newly fingerprinted host volumes to ready state +func upsertHostVolumeForNode(txn *txn, node *structs.Node, index uint64) error { + if len(node.HostVolumes) == 0 { + return nil + } + iter, err := txn.Get(TableHostVolumes, indexNodeID, node.ID) + if err != nil { + return err + } + for { + raw := iter.Next() + if raw == nil { + return nil + } + vol := raw.(*structs.HostVolume) + switch vol.State { + case structs.HostVolumeStateUnknown, structs.HostVolumeStatePending: + if _, ok := node.HostVolumes[vol.Name]; ok { + vol = vol.Copy() + vol.State = structs.HostVolumeStateReady + vol.ModifyIndex = index + err = txn.Insert(TableHostVolumes, vol) + if err != nil { + return fmt.Errorf("host volume insert: %w", err) + } + } + default: + // don't touch ready or soft-deleted volumes + } + } +} diff --git a/nomad/state/state_store_host_volumes_test.go b/nomad/state/state_store_host_volumes_test.go new file mode 100644 index 00000000000..04e18e4016e --- /dev/null +++ b/nomad/state/state_store_host_volumes_test.go @@ -0,0 +1,281 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package state + +import ( + "fmt" + "testing" + + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func TestStateStore_HostVolumes_CRUD(t *testing.T) { + ci.Parallel(t) + store := testStateStore(t) + index, err := store.LatestIndex() + must.NoError(t, err) + + nodes := []*structs.Node{ + mock.Node(), + mock.Node(), + mock.Node(), + } + nodes[2].NodePool = "prod" + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[0], NodeUpsertWithNodePool)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[1], NodeUpsertWithNodePool)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[2], NodeUpsertWithNodePool)) + + ns := mock.Namespace() + must.NoError(t, store.UpsertNamespaces(index, []*structs.Namespace{ns})) + + vols := []*structs.HostVolume{ + mock.HostVolume(), + mock.HostVolume(), + mock.HostVolume(), + mock.HostVolume(), + } + vols[0].NodeID = nodes[0].ID + vols[1].NodeID = nodes[1].ID + vols[1].Name = "another-example" + vols[2].NodeID = nodes[2].ID + vols[2].NodePool = nodes[2].NodePool + vols[3].Namespace = ns.Name + vols[3].NodeID = nodes[2].ID + vols[3].NodePool = nodes[2].NodePool + + index++ + must.NoError(t, store.UpsertHostVolume(index, vols[0])) + must.NoError(t, store.UpsertHostVolume(index, vols[1])) + must.NoError(t, store.UpsertHostVolume(index, vols[2])) + must.NoError(t, store.UpsertHostVolume(index, vols[3])) + + vol, err := store.HostVolumeByID(nil, vols[0].Namespace, vols[0].ID, true) + must.NoError(t, err) + must.NotNil(t, vol) + must.Eq(t, vols[0].ID, vol.ID) + must.NotNil(t, vol.Allocations) + must.Len(t, 0, vol.Allocations) + + vol, err = store.HostVolumeByID(nil, vols[0].Namespace, vols[0].ID, false) + must.NoError(t, err) + must.NotNil(t, vol) + must.Nil(t, vol.Allocations) + + consumeIter := func(iter memdb.ResultIterator) map[string]*structs.HostVolume { + got := map[string]*structs.HostVolume{} + for raw := iter.Next(); raw != nil; raw = iter.Next() { + vol := raw.(*structs.HostVolume) + got[vol.ID] = vol + } + return got + } + + iter, err := store.HostVolumesByName(nil, structs.DefaultNamespace, "example", SortDefault) + must.NoError(t, err) + got := consumeIter(iter) + must.NotNil(t, got[vols[0].ID], must.Sprint("expected vol0")) + must.NotNil(t, got[vols[2].ID], must.Sprint("expected vol2")) + must.MapLen(t, 2, got, must.Sprint(`expected 2 volumes named "example" in default namespace`)) + + iter, err = store.HostVolumesByNodePool(nil, nodes[2].NodePool, SortDefault) + must.NoError(t, err) + got = consumeIter(iter) + must.NotNil(t, got[vols[2].ID], must.Sprint("expected vol2")) + must.NotNil(t, got[vols[3].ID], must.Sprint("expected vol3")) + must.MapLen(t, 2, got, must.Sprint(`expected 2 volumes in prod node pool`)) + + iter, err = store.HostVolumesByNodeID(nil, nodes[2].ID, SortDefault) + must.NoError(t, err) + got = consumeIter(iter) + must.NotNil(t, got[vols[2].ID], must.Sprint("expected vol2")) + must.NotNil(t, got[vols[3].ID], must.Sprint("expected vol3")) + must.MapLen(t, 2, got, must.Sprint(`expected 2 volumes on node 2`)) + + // simulate a node registering one of the volumes + nodes[2] = nodes[2].Copy() + nodes[2].HostVolumes = map[string]*structs.ClientHostVolumeConfig{"example": { + Name: vols[2].Name, + Path: vols[2].HostPath, + }} + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, nodes[2])) + + // update all the volumes, which should update the state of vol2 as well + index++ + for i, vol := range vols { + vol = vol.Copy() + vol.RequestedCapacityMaxBytes = 300000 + vols[i] = vol + must.NoError(t, store.UpsertHostVolume(index, vol)) + } + + iter, err = store.HostVolumesByName(nil, structs.DefaultNamespace, "example", SortDefault) + must.NoError(t, err) + got = consumeIter(iter) + must.MapLen(t, 2, got, must.Sprint(`expected 2 volumes named "example" in default namespace`)) + + vol0 := got[vols[0].ID] + must.NotNil(t, vol0) + must.Eq(t, index, vol0.ModifyIndex) + vol2 := got[vols[2].ID] + must.NotNil(t, vol2) + must.Eq(t, index, vol2.ModifyIndex) + must.Eq(t, structs.HostVolumeStateReady, vol2.State, must.Sprint( + "expected volume state to be updated because its been fingerprinted by a node")) + + alloc := mock.AllocForNode(nodes[2]) + alloc.Job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{"example": { + Name: "example", + Type: structs.VolumeTypeHost, + Source: vols[2].Name, + }} + index++ + must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, + index, []*structs.Allocation{alloc})) + + index++ + err = store.DeleteHostVolume(index, vol2.Namespace, vols[2].ID) + must.EqError(t, err, fmt.Sprintf( + "could not delete volume %s in use by alloc %s", vols[2].ID, alloc.ID)) + + err = store.DeleteHostVolume(index, vol2.Namespace, vols[1].ID) + must.NoError(t, err) + vol, err = store.HostVolumeByID(nil, vols[1].Namespace, vols[1].ID, true) + must.NoError(t, err) + must.Nil(t, vol) + + vol, err = store.HostVolumeByID(nil, vols[2].Namespace, vols[2].ID, true) + must.NoError(t, err) + must.NotNil(t, vol) + must.Len(t, 1, vol.Allocations) + + iter, err = store.HostVolumes(nil, SortReverse) + must.NoError(t, err) + got = consumeIter(iter) + must.MapLen(t, 3, got, must.Sprint(`expected 3 volumes remain`)) + + prefix := vol.ID[:30] // sufficiently long prefix to avoid flakes + iter, err = store.HostVolumesByIDPrefix(nil, "*", prefix, SortDefault) + must.NoError(t, err) + got = consumeIter(iter) + must.MapLen(t, 1, got, must.Sprint(`expected only one volume to match prefix`)) + + iter, err = store.HostVolumesByIDPrefix(nil, vol.Namespace, prefix, SortDefault) + must.NoError(t, err) + got = consumeIter(iter) + must.MapLen(t, 1, got, must.Sprint(`expected only one volume to match prefix`)) + + alloc = alloc.Copy() + alloc.ClientStatus = structs.AllocClientStatusComplete + index++ + must.NoError(t, store.UpdateAllocsFromClient(structs.MsgTypeTestSetup, + index, []*structs.Allocation{alloc})) + for _, v := range vols { + index++ + must.NoError(t, store.DeleteHostVolume(index, v.Namespace, v.ID)) + } + iter, err = store.HostVolumes(nil, SortDefault) + got = consumeIter(iter) + must.MapLen(t, 0, got, must.Sprint(`expected no volumes to remain`)) +} + +func TestStateStore_UpdateHostVolumesFromFingerprint(t *testing.T) { + ci.Parallel(t) + store := testStateStore(t) + index, err := store.LatestIndex() + must.NoError(t, err) + + node := mock.Node() + node.HostVolumes = map[string]*structs.ClientHostVolumeConfig{ + "static-vol": {Name: "static-vol", Path: "/srv/static"}, + "dhv-zero": {Name: "dhv-zero", Path: "/var/nomad/alloc_mounts" + uuid.Generate()}, + } + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, node, NodeUpsertWithNodePool)) + otherNode := mock.Node() + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, otherNode, NodeUpsertWithNodePool)) + + ns := structs.DefaultNamespace + + vols := []*structs.HostVolume{ + mock.HostVolume(), + mock.HostVolume(), + mock.HostVolume(), + mock.HostVolume(), + } + + // a volume that's been fingerprinted before we can write it to state + vols[0].Name = "dhv-zero" + vols[0].NodeID = node.ID + + // a volume that will match the new fingerprint + vols[1].Name = "dhv-one" + vols[1].NodeID = node.ID + + // a volume that matches the new fingerprint but on the wrong node + vols[2].Name = "dhv-one" + vols[2].NodeID = otherNode.ID + + // a volume that won't be fingerprinted + vols[3].Name = "dhv-two" + vols[3].NodeID = node.ID + + index++ + oldIndex := index + must.NoError(t, store.UpsertHostVolume(index, vols[0])) + must.NoError(t, store.UpsertHostVolume(index, vols[1])) + must.NoError(t, store.UpsertHostVolume(index, vols[2])) + must.NoError(t, store.UpsertHostVolume(index, vols[3])) + + vol0, err := store.HostVolumeByID(nil, ns, vols[0].ID, false) + must.NoError(t, err) + must.Eq(t, structs.HostVolumeStateReady, vol0.State, + must.Sprint("previously-fingerprinted volume should be in ready state")) + + // update the fingerprint + + node = node.Copy() + node.HostVolumes["dhv-one"] = &structs.ClientHostVolumeConfig{ + Name: "dhv-one", + Path: "/var/nomad/alloc_mounts" + uuid.Generate(), + } + + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node)) + + vol0, err = store.HostVolumeByID(nil, ns, vols[0].ID, false) + must.NoError(t, err) + must.Eq(t, oldIndex, vol0.ModifyIndex, must.Sprint("expected no further update")) + must.Eq(t, structs.HostVolumeStateReady, vol0.State) + + vol1, err := store.HostVolumeByID(nil, ns, vols[1].ID, false) + must.NoError(t, err) + must.Eq(t, index, vol1.ModifyIndex, + must.Sprint("fingerprint should update pending volume")) + must.Eq(t, structs.HostVolumeStateReady, vol1.State) + + vol2, err := store.HostVolumeByID(nil, ns, vols[2].ID, false) + must.NoError(t, err) + must.Eq(t, oldIndex, vol2.ModifyIndex, + must.Sprint("volume on other node should not change")) + must.Eq(t, structs.HostVolumeStatePending, vol2.State) + + vol3, err := store.HostVolumeByID(nil, ns, vols[3].ID, false) + must.NoError(t, err) + must.Eq(t, oldIndex, vol3.ModifyIndex, + must.Sprint("volume not fingerprinted should not change")) + must.Eq(t, structs.HostVolumeStatePending, vol3.State) + +} diff --git a/nomad/state/state_store_restore.go b/nomad/state/state_store_restore.go index 2072ca727d7..0a1638422f2 100644 --- a/nomad/state/state_store_restore.go +++ b/nomad/state/state_store_restore.go @@ -291,3 +291,11 @@ func (r *StateRestore) JobSubmissionRestore(jobSubmission *structs.JobSubmission } return nil } + +// HostVolumeRestore restores a single host volume into the host_volumes table +func (r *StateRestore) HostVolumeRestore(vol *structs.HostVolume) error { + if err := r.txn.Insert(TableHostVolumes, vol); err != nil { + return fmt.Errorf("host volume insert failed: %w", err) + } + return nil +} diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go index 0f87387ff6c..e394545ad1d 100644 --- a/nomad/structs/diff_test.go +++ b/nomad/structs/diff_test.go @@ -10,7 +10,7 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/pointer" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestJobDiff(t *testing.T) { @@ -4864,6 +4864,12 @@ func TestTaskGroupDiff(t *testing.T) { Old: "", New: "foo-src", }, + { + Type: DiffTypeAdded, + Name: "Sticky", + Old: "", + New: "false", + }, { Type: DiffTypeAdded, Name: "Type", @@ -5475,17 +5481,17 @@ func TestTaskGroupDiff(t *testing.T) { } for i, c := range cases { - require.NotEmpty(t, c.TestCase, "case #%d needs a name", i+1) + must.NotEq(t, c.TestCase, "", must.Sprintf("case #%d needs a name", i+1)) t.Run(c.TestCase, func(t *testing.T) { result, err := c.Old.Diff(c.New, c.Contextual) switch c.ExpErr { case true: - require.Error(t, err, "case %q expected error", c.TestCase) + must.Error(t, err, must.Sprintf("case %q expected error", c.TestCase)) case false: - require.NoError(t, err, "case %q expected no error", c.TestCase) - require.Equal(t, c.Expected, result) + must.NoError(t, err, must.Sprintf("case %q expected no error", c.TestCase)) + must.Eq(t, c.Expected, result) } }) } @@ -9870,10 +9876,10 @@ func TestTaskDiff(t *testing.T) { t.Run(c.Name, func(t *testing.T) { actual, err := c.Old.Diff(c.New, c.Contextual) if c.Error { - require.Error(t, err) + must.Error(t, err) } else { - require.NoError(t, err) - require.Equal(t, c.Expected, actual) + must.NoError(t, err) + must.Eq(t, c.Expected, actual) } }) } @@ -10848,7 +10854,7 @@ func TestServicesDiff(t *testing.T) { for _, c := range cases { t.Run(c.Name, func(t *testing.T) { actual := serviceDiffs(c.Old, c.New, c.Contextual) - require.Equal(t, c.Expected, actual) + must.Eq(t, c.Expected, actual) }) } } diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go new file mode 100644 index 00000000000..440ad956512 --- /dev/null +++ b/nomad/structs/host_volumes.go @@ -0,0 +1,392 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package structs + +import ( + "errors" + "fmt" + "maps" + "strings" + "time" + + "github.com/hashicorp/go-multierror" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/uuid" +) + +type HostVolume struct { + // Namespace is the Nomad namespace for the host volume, which constrains + // which jobs can mount it. + Namespace string + + // ID is a UUID-like string generated by the server. + ID string + + // Name is the name that group.volume will use to identify the volume + // source. Not expected to be unique. + Name string + + // PluginID is the name of the host volume plugin on the client that will be + // used for creating the volume. If omitted, the client will use its default + // built-in plugin. + PluginID string + + // NodePool is the node pool of the node where the volume is placed. If the + // user doesn't provide a node ID, a node will be selected using the + // NodePool and Constraints. If the user provides both NodePool and NodeID, + // NodePool will be used to validate the request. If omitted, the server + // will populate this value in before writing the volume to Raft. + NodePool string + + // NodeID is the node where the volume is placed. If the user doesn't + // provide a NodeID, one will be selected using the NodePool and + // Constraints. If omitted, this field will then be populated by the server + // before writing the volume to Raft. + NodeID string + + // Constraints are optional. If the NodeID is not provided, the NodePool and + // Constraints are used to select a node. If the NodeID is provided, + // Constraints are used to validate that the node meets those constraints at + // the time of volume creation. + Constraints []*Constraint `json:",omitempty"` + + // Because storage may allow only specific intervals of size, we accept a + // min and max and return the actual capacity when the volume is created or + // updated on the client + RequestedCapacityMinBytes int64 + RequestedCapacityMaxBytes int64 + CapacityBytes int64 + + // RequestedCapabilities defines the options available to group.volume + // blocks. The scheduler checks against the listed capability blocks and + // selects a node for placement if *any* capability block works. + RequestedCapabilities []*HostVolumeCapability + + // Parameters are an opaque map of parameters for the host volume plugin. + Parameters map[string]string `json:",omitempty"` + + // HostPath is the path on disk where the volume's mount point was + // created. We record this to make debugging easier. + HostPath string + + // State represents the overall state of the volume. One of pending, ready, + // deleted. + State HostVolumeState + + CreateIndex uint64 + CreateTime int64 // Unix timestamp in nanoseconds since epoch + + ModifyIndex uint64 + ModifyTime int64 // Unix timestamp in nanoseconds since epoch + + // Allocations is the list of non-client-terminal allocations with claims on + // this host volume. They are denormalized on read and this field will be + // never written to Raft + Allocations []*AllocListStub `json:",omitempty"` +} + +type HostVolumeState string + +const ( + HostVolumeStateUnknown HostVolumeState = "" // never write this to Raft + HostVolumeStatePending HostVolumeState = "pending" + HostVolumeStateReady HostVolumeState = "ready" + HostVolumeStateDeleted HostVolumeState = "deleted" +) + +func (hv *HostVolume) Copy() *HostVolume { + if hv == nil { + return nil + } + + nhv := *hv + nhv.Constraints = helper.CopySlice(hv.Constraints) + nhv.RequestedCapabilities = helper.CopySlice(hv.RequestedCapabilities) + nhv.Parameters = maps.Clone(hv.Parameters) + return &nhv +} + +func (hv *HostVolume) Stub() *HostVolumeStub { + if hv == nil { + return nil + } + + return &HostVolumeStub{ + Namespace: hv.Namespace, + ID: hv.ID, + Name: hv.Name, + PluginID: hv.PluginID, + NodePool: hv.NodePool, + NodeID: hv.NodeID, + CapacityBytes: hv.CapacityBytes, + State: hv.State, + CreateIndex: hv.CreateIndex, + CreateTime: hv.CreateTime, + ModifyIndex: hv.ModifyIndex, + ModifyTime: hv.ModifyTime, + } +} + +// Validate verifies that the submitted HostVolume spec has valid field values, +// without validating any changes or state (see ValidateUpdate). +func (hv *HostVolume) Validate() error { + + var mErr *multierror.Error + + if hv.ID != "" && !helper.IsUUID(hv.ID) { + mErr = multierror.Append(mErr, errors.New("invalid ID")) + } + + if hv.Name == "" { + mErr = multierror.Append(mErr, errors.New("missing name")) + } + + if hv.RequestedCapacityMaxBytes < hv.RequestedCapacityMinBytes { + mErr = multierror.Append(mErr, fmt.Errorf( + "capacity_max (%d) must be larger than capacity_min (%d)", + hv.RequestedCapacityMaxBytes, hv.RequestedCapacityMinBytes)) + } + + if len(hv.RequestedCapabilities) == 0 { + mErr = multierror.Append(mErr, errors.New("must include at least one capability block")) + } else { + for _, cap := range hv.RequestedCapabilities { + err := cap.Validate() + if err != nil { + mErr = multierror.Append(mErr, err) + } + } + } + + for _, constraint := range hv.Constraints { + if err := constraint.Validate(); err != nil { + mErr = multierror.Append(mErr, fmt.Errorf("invalid constraint: %v", err)) + } + switch constraint.Operand { + case ConstraintDistinctHosts, ConstraintDistinctProperty: + mErr = multierror.Append(mErr, fmt.Errorf( + "invalid constraint %s: host volumes of the same name are always on distinct hosts", constraint.Operand)) + default: + } + } + + return helper.FlattenMultierror(mErr.ErrorOrNil()) +} + +// ValidateUpdate verifies that an update to a volume is safe to make. +func (hv *HostVolume) ValidateUpdate(existing *HostVolume) error { + if existing == nil { + return nil + } + + var mErr *multierror.Error + if len(existing.Allocations) > 0 { + allocIDs := helper.ConvertSlice(existing.Allocations, + func(a *AllocListStub) string { return a.ID }) + mErr = multierror.Append(mErr, fmt.Errorf( + "cannot update a volume in use: claimed by allocs (%s)", + strings.Join(allocIDs, ", "))) + } + + if hv.NodeID != "" && hv.NodeID != existing.NodeID { + mErr = multierror.Append(mErr, errors.New("node ID cannot be updated")) + } + if hv.NodePool != "" && hv.NodePool != existing.NodePool { + mErr = multierror.Append(mErr, errors.New("node pool cannot be updated")) + } + + if hv.RequestedCapacityMaxBytes < existing.CapacityBytes { + mErr = multierror.Append(mErr, fmt.Errorf( + "capacity_max (%d) cannot be less than existing provisioned capacity (%d)", + hv.RequestedCapacityMaxBytes, existing.CapacityBytes)) + } + + return mErr.ErrorOrNil() +} + +const DefaultHostVolumePlugin = "default" + +// CanonicalizeForUpdate is called in the RPC handler to ensure we call client +// RPCs with correctly populated fields from the existing volume, even if the +// RPC request includes otherwise valid zero-values. This method should be +// called on request objects or a copy, never on a state store object directly. +func (hv *HostVolume) CanonicalizeForUpdate(existing *HostVolume, now time.Time) { + if existing == nil { + hv.ID = uuid.Generate() + if hv.PluginID == "" { + hv.PluginID = DefaultHostVolumePlugin + } + hv.CapacityBytes = 0 // returned by plugin + hv.HostPath = "" // returned by plugin + hv.CreateTime = now.UnixNano() + } else { + hv.PluginID = existing.PluginID + hv.NodePool = existing.NodePool + hv.NodeID = existing.NodeID + hv.Constraints = existing.Constraints + hv.CapacityBytes = existing.CapacityBytes + hv.HostPath = existing.HostPath + hv.CreateTime = existing.CreateTime + } + + hv.State = HostVolumeStatePending // reset on any change + hv.ModifyTime = now.UnixNano() + hv.Allocations = nil // set on read only +} + +// GetNamespace implements the paginator.NamespaceGetter interface +func (hv *HostVolume) GetNamespace() string { + return hv.Namespace +} + +// GetID implements the paginator.IDGetter interface +func (hv *HostVolume) GetID() string { + return hv.ID +} + +// HostVolumeCapability is the requested attachment and access mode for a volume +type HostVolumeCapability struct { + AttachmentMode HostVolumeAttachmentMode + AccessMode HostVolumeAccessMode +} + +func (hvc *HostVolumeCapability) Copy() *HostVolumeCapability { + if hvc == nil { + return nil + } + + nhvc := *hvc + return &nhvc +} + +func (hvc *HostVolumeCapability) Validate() error { + if hvc == nil { + return errors.New("validate called on nil host volume capability") + } + + switch hvc.AttachmentMode { + case HostVolumeAttachmentModeBlockDevice, + HostVolumeAttachmentModeFilesystem: + default: + return fmt.Errorf("invalid attachment mode: %q", hvc.AttachmentMode) + } + + switch hvc.AccessMode { + case HostVolumeAccessModeSingleNodeReader, + HostVolumeAccessModeSingleNodeWriter, + HostVolumeAccessModeSingleNodeSingleWriter, + HostVolumeAccessModeSingleNodeMultiWriter: + default: + return fmt.Errorf("invalid access mode: %q", hvc.AccessMode) + } + + return nil +} + +// HostVolumeAttachmentMode chooses the type of storage API that will be used to +// interact with the device. +type HostVolumeAttachmentMode string + +const ( + HostVolumeAttachmentModeUnknown HostVolumeAttachmentMode = "" + HostVolumeAttachmentModeBlockDevice HostVolumeAttachmentMode = "block-device" + HostVolumeAttachmentModeFilesystem HostVolumeAttachmentMode = "file-system" +) + +// HostVolumeAccessMode indicates how Nomad should make the volume available to +// concurrent allocations. +type HostVolumeAccessMode string + +const ( + HostVolumeAccessModeUnknown HostVolumeAccessMode = "" + + HostVolumeAccessModeSingleNodeReader HostVolumeAccessMode = "single-node-reader-only" + HostVolumeAccessModeSingleNodeWriter HostVolumeAccessMode = "single-node-writer" + HostVolumeAccessModeSingleNodeSingleWriter HostVolumeAccessMode = "single-node-single-writer" + HostVolumeAccessModeSingleNodeMultiWriter HostVolumeAccessMode = "single-node-multi-writer" +) + +// HostVolumeStub is used for responses for the list volumes endpoint +type HostVolumeStub struct { + Namespace string + ID string + Name string + PluginID string + NodePool string + NodeID string + CapacityBytes int64 + State HostVolumeState + + CreateIndex uint64 + CreateTime int64 + + ModifyIndex uint64 + ModifyTime int64 +} + +type HostVolumeCreateRequest struct { + Volume *HostVolume + + // PolicyOverride is set when the user is attempting to override any + // Enterprise policy enforcement + PolicyOverride bool + + WriteRequest +} + +type HostVolumeCreateResponse struct { + Volume *HostVolume + + // Warnings are non-fatal messages from Enterprise policy enforcement + Warnings string + WriteMeta +} + +type HostVolumeRegisterRequest struct { + Volume *HostVolume + + // PolicyOverride is set when the user is attempting to override any + // Enterprise policy enforcement + PolicyOverride bool + + WriteRequest +} + +type HostVolumeRegisterResponse struct { + Volume *HostVolume + + // Warnings are non-fatal messages from Enterprise policy enforcement + Warnings string + WriteMeta +} + +type HostVolumeDeleteRequest struct { + VolumeID string + WriteRequest +} + +type HostVolumeDeleteResponse struct { + WriteMeta +} + +type HostVolumeGetRequest struct { + ID string + QueryOptions +} + +type HostVolumeGetResponse struct { + Volume *HostVolume + QueryMeta +} + +type HostVolumeListRequest struct { + NodeID string // filter + NodePool string // filter + QueryOptions +} + +type HostVolumeListResponse struct { + Volumes []*HostVolumeStub + QueryMeta +} diff --git a/nomad/structs/host_volumes_test.go b/nomad/structs/host_volumes_test.go new file mode 100644 index 00000000000..2a03e838daf --- /dev/null +++ b/nomad/structs/host_volumes_test.go @@ -0,0 +1,253 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package structs + +import ( + "testing" + "time" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/shoenig/test/must" +) + +func TestHostVolume_Copy(t *testing.T) { + ci.Parallel(t) + + out := (*HostVolume)(nil).Copy() + must.Nil(t, out) + + vol := &HostVolume{ + Namespace: DefaultNamespace, + ID: uuid.Generate(), + Name: "example", + PluginID: "example-plugin", + NodePool: NodePoolDefault, + NodeID: uuid.Generate(), + Constraints: []*Constraint{{ + LTarget: "${meta.rack}", + RTarget: "r1", + Operand: "=", + }}, + CapacityBytes: 150000, + RequestedCapabilities: []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeWriter, + }}, + Parameters: map[string]string{"foo": "bar"}, + } + + out = vol.Copy() + must.Eq(t, vol, out) + + out.Allocations = []*AllocListStub{{ID: uuid.Generate()}} + out.Constraints[0].LTarget = "${meta.node_class}" + out.RequestedCapabilities = append(out.RequestedCapabilities, &HostVolumeCapability{ + AttachmentMode: HostVolumeAttachmentModeBlockDevice, + AccessMode: HostVolumeAccessModeSingleNodeMultiWriter, + }) + out.Parameters["foo"] = "baz" + + must.Nil(t, vol.Allocations) + must.Eq(t, "${meta.rack}", vol.Constraints[0].LTarget) + must.Len(t, 1, vol.RequestedCapabilities) + must.Eq(t, "bar", vol.Parameters["foo"]) +} + +func TestHostVolume_Validate(t *testing.T) { + ci.Parallel(t) + + invalid := &HostVolume{} + err := invalid.Validate() + must.EqError(t, err, `2 errors occurred: + * missing name + * must include at least one capability block + +`) + + invalid = &HostVolume{Name: "example"} + err = invalid.Validate() + // single error should be flattened + must.EqError(t, err, "must include at least one capability block") + + invalid = &HostVolume{ + ID: "../../not-a-uuid", + Name: "example", + PluginID: "example-plugin", + Constraints: []*Constraint{{ + RTarget: "r1", + Operand: "=", + }}, + RequestedCapacityMinBytes: 200000, + RequestedCapacityMaxBytes: 100000, + RequestedCapabilities: []*HostVolumeCapability{ + { + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeWriter, + }, + { + AttachmentMode: "bad", + AccessMode: "invalid", + }, + }, + } + err = invalid.Validate() + must.EqError(t, err, `4 errors occurred: + * invalid ID + * capacity_max (100000) must be larger than capacity_min (200000) + * invalid attachment mode: "bad" + * invalid constraint: 1 error occurred: + * No LTarget provided but is required by constraint + + + +`) + + vol := &HostVolume{ + Namespace: DefaultNamespace, + ID: uuid.Generate(), + Name: "example", + PluginID: "example-plugin", + NodePool: NodePoolDefault, + NodeID: uuid.Generate(), + Constraints: []*Constraint{{ + LTarget: "${meta.rack}", + RTarget: "r1", + Operand: "=", + }}, + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 200000, + CapacityBytes: 150000, + RequestedCapabilities: []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeWriter, + }}, + Parameters: map[string]string{"foo": "bar"}, + } + must.NoError(t, vol.Validate()) +} + +func TestHostVolume_ValidateUpdate(t *testing.T) { + ci.Parallel(t) + + vol := &HostVolume{ + NodePool: NodePoolDefault, + NodeID: uuid.Generate(), + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 120000, + Parameters: map[string]string{"baz": "qux"}, + } + err := vol.ValidateUpdate(nil) + must.NoError(t, err) + + existing := &HostVolume{ + NodePool: "prod", + NodeID: uuid.Generate(), + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 200000, + CapacityBytes: 150000, + RequestedCapabilities: []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeWriter, + }}, + Parameters: map[string]string{"foo": "bar"}, + Allocations: []*AllocListStub{ + {ID: "6bd66bfa"}, + {ID: "7032e570"}, + }, + } + + err = vol.ValidateUpdate(existing) + must.EqError(t, err, `4 errors occurred: + * cannot update a volume in use: claimed by allocs (6bd66bfa, 7032e570) + * node ID cannot be updated + * node pool cannot be updated + * capacity_max (120000) cannot be less than existing provisioned capacity (150000) + +`) + +} + +func TestHostVolume_CanonicalizeForUpdate(t *testing.T) { + now := time.Now() + vol := &HostVolume{ + CapacityBytes: 100000, + HostPath: "/etc/passwd", + Allocations: []*AllocListStub{ + {ID: "6bd66bfa"}, + {ID: "7032e570"}, + }, + } + vol.CanonicalizeForUpdate(nil, now) + + must.NotEq(t, "", vol.ID) + must.Eq(t, now.UnixNano(), vol.CreateTime) + must.Eq(t, now.UnixNano(), vol.ModifyTime) + must.Eq(t, HostVolumeStatePending, vol.State) + must.Nil(t, vol.Allocations) + must.Eq(t, "", vol.HostPath) + must.Zero(t, vol.CapacityBytes) + + vol = &HostVolume{ + ID: "82f357d6-a5ec-11ef-9e36-3f9884222736", + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 500000, + RequestedCapabilities: []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeMultiWriter, + }}, + } + existing := &HostVolume{ + ID: "82f357d6-a5ec-11ef-9e36-3f9884222736", + PluginID: "example_plugin", + NodePool: "prod", + NodeID: uuid.Generate(), + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 200000, + CapacityBytes: 150000, + RequestedCapabilities: []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeWriter, + }}, + Constraints: []*Constraint{{ + LTarget: "${meta.rack}", + RTarget: "r1", + Operand: "=", + }}, + Parameters: map[string]string{"foo": "bar"}, + Allocations: []*AllocListStub{ + {ID: "6bd66bfa"}, + {ID: "7032e570"}, + }, + HostPath: "/var/nomad/alloc_mounts/82f357d6.ext4", + CreateTime: 1, + } + + vol.CanonicalizeForUpdate(existing, now) + must.Eq(t, existing.ID, vol.ID) + must.Eq(t, existing.PluginID, vol.PluginID) + must.Eq(t, existing.NodePool, vol.NodePool) + must.Eq(t, existing.NodeID, vol.NodeID) + must.Eq(t, []*Constraint{{ + LTarget: "${meta.rack}", + RTarget: "r1", + Operand: "=", + }}, vol.Constraints) + must.Eq(t, 100000, vol.RequestedCapacityMinBytes) + must.Eq(t, 500000, vol.RequestedCapacityMaxBytes) + must.Eq(t, 150000, vol.CapacityBytes) + + must.Eq(t, []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeMultiWriter, + }}, vol.RequestedCapabilities) + + must.Eq(t, "/var/nomad/alloc_mounts/82f357d6.ext4", vol.HostPath) + must.Eq(t, HostVolumeStatePending, vol.State) + + must.Eq(t, existing.CreateTime, vol.CreateTime) + must.Eq(t, now.UnixNano(), vol.ModifyTime) + must.Nil(t, vol.Allocations) + +} diff --git a/nomad/structs/search.go b/nomad/structs/search.go index b71798c2194..53aebc01e2a 100644 --- a/nomad/structs/search.go +++ b/nomad/structs/search.go @@ -22,6 +22,7 @@ const ( Plugins Context = "plugins" Variables Context = "vars" Volumes Context = "volumes" + HostVolumes Context = "host_volumes" // Subtypes used in fuzzy matching. Groups Context = "groups" diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index b3f0e8e4ac2..e55089f3e12 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -132,6 +132,10 @@ const ( NamespaceUpsertRequestType MessageType = 64 NamespaceDeleteRequestType MessageType = 65 + // MessageTypes 66-74 are in Nomad Enterprise + HostVolumeRegisterRequestType MessageType = 75 + HostVolumeDeleteRequestType MessageType = 76 + // NOTE: MessageTypes are shared between CE and ENT. If you need to add a // new type, check that ENT is not already using that value. ) @@ -11110,6 +11114,13 @@ type Allocation struct { // AllocatedResources is the total resources allocated for the task group. AllocatedResources *AllocatedResources + // HostVolumeIDs is a list of host volume IDs that this allocation + // has claimed. + HostVolumeIDs []string + + // CSIVolumeIDs is a list of CSI volume IDs that this allocation has claimed. + CSIVolumeIDs []string + // Metrics associated with this allocation Metrics *AllocMetric diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index daacd5d8670..f474fe1d28e 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -31,6 +31,18 @@ type ClientHostVolumeConfig struct { Name string `hcl:",key"` Path string `hcl:"path"` ReadOnly bool `hcl:"read_only"` + // ID is set for dynamic host volumes only. + ID string `hcl:"-"` +} + +func (p *ClientHostVolumeConfig) Equal(o *ClientHostVolumeConfig) bool { + if p == nil && o == nil { + return true + } + if p == nil || o == nil { + return false + } + return *p == *o } func (p *ClientHostVolumeConfig) Copy() *ClientHostVolumeConfig { @@ -91,12 +103,14 @@ func HostVolumeSliceMerge(a, b []*ClientHostVolumeConfig) []*ClientHostVolumeCon return n } -// VolumeRequest is a representation of a storage volume that a TaskGroup wishes to use. +// VolumeRequest is a representation of a storage volume that a TaskGroup wishes +// to use. type VolumeRequest struct { Name string Type string Source string ReadOnly bool + Sticky bool AccessMode CSIVolumeAccessMode AttachmentMode CSIVolumeAttachmentMode MountOptions *CSIMountOptions @@ -116,6 +130,8 @@ func (v *VolumeRequest) Equal(o *VolumeRequest) bool { return false case v.ReadOnly != o.ReadOnly: return false + case v.Sticky != o.Sticky: + return false case v.AccessMode != o.AccessMode: return false case v.AttachmentMode != o.AttachmentMode: @@ -149,21 +165,30 @@ func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) e if canaries > 0 { addErr("volume cannot be per_alloc when canaries are in use") } + if v.Sticky { + addErr("volume cannot be per_alloc and sticky at the same time") + } } switch v.Type { case VolumeTypeHost: - if v.AttachmentMode != CSIVolumeAttachmentModeUnknown { - addErr("host volumes cannot have an attachment mode") - } - if v.AccessMode != CSIVolumeAccessModeUnknown { - addErr("host volumes cannot have an access mode") - } if v.MountOptions != nil { + // TODO(1.10.0): support mount options for dynamic host volumes addErr("host volumes cannot have mount options") } + switch v.AccessMode { + case CSIVolumeAccessModeSingleNodeReader, CSIVolumeAccessModeMultiNodeReader: + if !v.ReadOnly { + addErr("%s volumes must be read-only", v.AccessMode) + } + default: + // dynamic host volumes are all "per node" so there's no way to + // validate that other access modes work for a given volume until we + // have access to other allocations (in the scheduler) + } + case VolumeTypeCSI: switch v.AttachmentMode { diff --git a/nomad/structs/volume_test.go b/nomad/structs/volumes_test.go similarity index 93% rename from nomad/structs/volume_test.go rename to nomad/structs/volumes_test.go index 02e0715d1a3..fb5a1a04d64 100644 --- a/nomad/structs/volume_test.go +++ b/nomad/structs/volumes_test.go @@ -9,7 +9,6 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/shoenig/test/must" - "github.com/stretchr/testify/require" ) func TestVolumeRequest_Validate(t *testing.T) { @@ -32,9 +31,9 @@ func TestVolumeRequest_Validate(t *testing.T) { { name: "host volume with CSI volume config", expected: []string{ - "host volumes cannot have an access mode", - "host volumes cannot have an attachment mode", + "volume has an empty source", "host volumes cannot have mount options", + "single-node-reader-only volumes must be read-only", "volume cannot be per_alloc for system or sysbatch jobs", "volume cannot be per_alloc when canaries are in use", }, @@ -86,13 +85,24 @@ func TestVolumeRequest_Validate(t *testing.T) { PerAlloc: true, }, }, + { + name: "per_alloc sticky", + expected: []string{ + "volume cannot be per_alloc and sticky at the same time", + }, + req: &VolumeRequest{ + Type: VolumeTypeCSI, + PerAlloc: true, + Sticky: true, + }, + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { err := tc.req.Validate(JobTypeSystem, tc.taskGroupCount, tc.canariesCount) for _, expected := range tc.expected { - require.Contains(t, err.Error(), expected) + must.StrContains(t, err.Error(), expected) } }) } diff --git a/scheduler/context.go b/scheduler/context.go index 887607cf3be..e48cefc3918 100644 --- a/scheduler/context.go +++ b/scheduler/context.go @@ -51,6 +51,13 @@ type Context interface { SendEvent(event interface{}) } +type ConstraintContext interface { + Metrics() *structs.AllocMetric + RegexpCache() map[string]*regexp.Regexp + VersionConstraintCache() map[string]VerConstraints + SemverConstraintCache() map[string]VerConstraints +} + // EvalCache is used to cache certain things during an evaluation type EvalCache struct { reCache map[string]*regexp.Regexp diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 9ff3878baac..fa1800b2ae0 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -8,6 +8,7 @@ import ( "fmt" "reflect" "regexp" + "slices" "strconv" "strings" @@ -137,40 +138,43 @@ func NewRandomIterator(ctx Context, nodes []*structs.Node) *StaticIterator { // HostVolumeChecker is a FeasibilityChecker which returns whether a node has // the host volumes necessary to schedule a task group. type HostVolumeChecker struct { - ctx Context - - // volumes is a map[HostVolumeName][]RequestedVolume. The requested volumes are - // a slice because a single task group may request the same volume multiple times. - volumes map[string][]*structs.VolumeRequest + ctx Context + volumeReqs []*structs.VolumeRequest + hostVolumeIDs []string + namespace string } // NewHostVolumeChecker creates a HostVolumeChecker from a set of volumes func NewHostVolumeChecker(ctx Context) *HostVolumeChecker { return &HostVolumeChecker{ - ctx: ctx, + ctx: ctx, + volumeReqs: []*structs.VolumeRequest{}, + hostVolumeIDs: []string{}, } } // SetVolumes takes the volumes required by a task group and updates the checker. -func (h *HostVolumeChecker) SetVolumes(allocName string, volumes map[string]*structs.VolumeRequest) { - lookupMap := make(map[string][]*structs.VolumeRequest) - // Convert the map from map[DesiredName]Request to map[Source][]Request to improve - // lookup performance. Also filter non-host volumes. +func (h *HostVolumeChecker) SetVolumes( + allocName, ns string, volumes map[string]*structs.VolumeRequest, allocHostVolumeIDs []string, +) { + h.namespace = ns + h.volumeReqs = []*structs.VolumeRequest{} + h.hostVolumeIDs = allocHostVolumeIDs for _, req := range volumes { if req.Type != structs.VolumeTypeHost { - continue + continue // filter CSI volumes } if req.PerAlloc { // provide a unique volume source per allocation copied := req.Copy() copied.Source = copied.Source + structs.AllocSuffix(allocName) - lookupMap[copied.Source] = append(lookupMap[copied.Source], copied) + h.volumeReqs = append(h.volumeReqs, copied) + } else { - lookupMap[req.Source] = append(lookupMap[req.Source], req) + h.volumeReqs = append(h.volumeReqs, req) } } - h.volumes = lookupMap } func (h *HostVolumeChecker) Feasible(candidate *structs.Node) bool { @@ -183,38 +187,135 @@ func (h *HostVolumeChecker) Feasible(candidate *structs.Node) bool { } func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool { - rLen := len(h.volumes) - hLen := len(n.HostVolumes) - // Fast path: Requested no volumes. No need to check further. - if rLen == 0 { + if len(h.volumeReqs) == 0 { return true } - // Fast path: Requesting more volumes than the node has, can't meet the criteria. - if rLen > hLen { - return false + proposed, err := h.ctx.ProposedAllocs(n.ID) + if err != nil { + return false // only hit this on state store invariant failure } - for source, requests := range h.volumes { - nodeVolume, ok := n.HostVolumes[source] + for _, req := range h.volumeReqs { + volCfg, ok := n.HostVolumes[req.Source] if !ok { return false } - // If the volume supports being mounted as ReadWrite, we do not need to - // do further validation for readonly placement. - if !nodeVolume.ReadOnly { - continue + if volCfg.ID != "" { // dynamic host volume + vol, err := h.ctx.State().HostVolumeByID(nil, h.namespace, volCfg.ID, false) + if err != nil || vol == nil { + // node fingerprint has a dynamic volume that's no longer in the + // state store; this is only possible if the batched fingerprint + // update from a delete RPC is written before the delete RPC's + // raft entry completes + return false + } + if !h.hostVolumeIsAvailable(vol, + structs.HostVolumeAccessMode(req.AccessMode), + structs.HostVolumeAttachmentMode(req.AttachmentMode), + req.ReadOnly, + proposed, + ) { + return false + } + + if req.Sticky { + if slices.Contains(h.hostVolumeIDs, vol.ID) || len(h.hostVolumeIDs) == 0 { + return true + } + + return false + } + + } else if !req.ReadOnly { + // this is a static host volume and can only be mounted ReadOnly, + // validate that no requests for it are ReadWrite. + if volCfg.ReadOnly { + return false + } } + } - // The Volume can only be mounted ReadOnly, validate that no requests for - // it are ReadWrite. - for _, req := range requests { - if !req.ReadOnly { + return true +} + +// hostVolumeIsAvailable determines if a dynamic host volume is available for a request +func (h *HostVolumeChecker) hostVolumeIsAvailable( + vol *structs.HostVolume, + reqAccess structs.HostVolumeAccessMode, + reqAttach structs.HostVolumeAttachmentMode, + readOnly bool, + proposed []*structs.Allocation) bool { + + if vol.State != structs.HostVolumeStateReady { + return false + } + + // pick a default capability based on the read-only flag. this happens here + // in the scheduler rather than job submit because we don't know whether a + // host volume is dynamic or not until we try to schedule it (ex. the same + // name could be static on one node and dynamic on another) + if reqAccess == structs.HostVolumeAccessModeUnknown { + if readOnly { + reqAccess = structs.HostVolumeAccessModeSingleNodeReader + } else { + reqAccess = structs.HostVolumeAccessModeSingleNodeWriter + } + } + if reqAttach == structs.HostVolumeAttachmentModeUnknown { + reqAttach = structs.HostVolumeAttachmentModeFilesystem + } + + // check that the volume has the requested capability at all + var capOk bool + for _, cap := range vol.RequestedCapabilities { + if reqAccess == cap.AccessMode && + reqAttach == cap.AttachmentMode { + capOk = true + break + } + } + if !capOk { + return false + } + + switch reqAccess { + case structs.HostVolumeAccessModeSingleNodeReader: + return readOnly + case structs.HostVolumeAccessModeSingleNodeWriter: + return !readOnly + case structs.HostVolumeAccessModeSingleNodeSingleWriter: + // examine all proposed allocs on the node, including those that might + // not have yet been persisted. they have nil pointers to their Job, so + // we have to go back to the state store to get them + seen := map[string]struct{}{} + for _, alloc := range proposed { + uniqueGroup := alloc.JobNamespacedID().String() + alloc.TaskGroup + if _, ok := seen[uniqueGroup]; ok { + // all allocs for the same group will have the same read-only + // flag and capabilities, so we only need to check a given group + // once + continue + } + seen[uniqueGroup] = struct{}{} + job, err := h.ctx.State().JobByID(nil, alloc.Namespace, alloc.JobID) + if err != nil { return false } + tg := job.LookupTaskGroup(alloc.TaskGroup) + for _, req := range tg.Volumes { + if req.Type == structs.VolumeTypeHost && req.Source == vol.Name { + if !req.ReadOnly { + return false + } + } + } } + + case structs.HostVolumeAccessModeSingleNodeMultiWriter: + // no contraint } return true @@ -752,12 +853,12 @@ func (iter *DistinctPropertyIterator) Reset() { // given set of constraints. This is used to filter on job, task group, and task // constraints. type ConstraintChecker struct { - ctx Context + ctx ConstraintContext constraints []*structs.Constraint } // NewConstraintChecker creates a ConstraintChecker for a set of constraints -func NewConstraintChecker(ctx Context, constraints []*structs.Constraint) *ConstraintChecker { +func NewConstraintChecker(ctx ConstraintContext, constraints []*structs.Constraint) *ConstraintChecker { return &ConstraintChecker{ ctx: ctx, constraints: constraints, @@ -830,7 +931,7 @@ func resolveTarget(target string, node *structs.Node) (string, bool) { // checkConstraint checks if a constraint is satisfied. The lVal and rVal // interfaces may be nil. -func checkConstraint(ctx Context, operand string, lVal, rVal interface{}, lFound, rFound bool) bool { +func checkConstraint(ctx ConstraintContext, operand string, lVal, rVal interface{}, lFound, rFound bool) bool { // Check for constraints not handled by this checker. switch operand { case structs.ConstraintDistinctHosts, structs.ConstraintDistinctProperty: @@ -852,14 +953,14 @@ func checkConstraint(ctx Context, operand string, lVal, rVal interface{}, lFound return !lFound case structs.ConstraintVersion: parser := newVersionConstraintParser(ctx) - return lFound && rFound && checkVersionMatch(ctx, parser, lVal, rVal) + return lFound && rFound && checkVersionMatch(parser, lVal, rVal) case structs.ConstraintSemver: parser := newSemverConstraintParser(ctx) - return lFound && rFound && checkVersionMatch(ctx, parser, lVal, rVal) + return lFound && rFound && checkVersionMatch(parser, lVal, rVal) case structs.ConstraintRegex: return lFound && rFound && checkRegexpMatch(ctx, lVal, rVal) case structs.ConstraintSetContains, structs.ConstraintSetContainsAll: - return lFound && rFound && checkSetContainsAll(ctx, lVal, rVal) + return lFound && rFound && checkSetContainsAll(lVal, rVal) case structs.ConstraintSetContainsAny: return lFound && rFound && checkSetContainsAny(lVal, rVal) default: @@ -943,7 +1044,7 @@ func compareOrder[T cmp.Ordered](op string, left, right T) bool { // checkVersionMatch is used to compare a version on the // left hand side with a set of constraints on the right hand side -func checkVersionMatch(_ Context, parse verConstraintParser, lVal, rVal interface{}) bool { +func checkVersionMatch(parse verConstraintParser, lVal, rVal interface{}) bool { // Parse the version var versionStr string switch v := lVal.(type) { @@ -979,7 +1080,7 @@ func checkVersionMatch(_ Context, parse verConstraintParser, lVal, rVal interfac // checkAttributeVersionMatch is used to compare a version on the // left hand side with a set of constraints on the right hand side -func checkAttributeVersionMatch(_ Context, parse verConstraintParser, lVal, rVal *psstructs.Attribute) bool { +func checkAttributeVersionMatch(parse verConstraintParser, lVal, rVal *psstructs.Attribute) bool { // Parse the version var versionStr string if s, ok := lVal.GetString(); ok { @@ -1014,7 +1115,7 @@ func checkAttributeVersionMatch(_ Context, parse verConstraintParser, lVal, rVal // checkRegexpMatch is used to compare a value on the // left hand side with a regexp on the right hand side -func checkRegexpMatch(ctx Context, lVal, rVal interface{}) bool { +func checkRegexpMatch(ctx ConstraintContext, lVal, rVal interface{}) bool { // Ensure left-hand is string lStr, ok := lVal.(string) if !ok { @@ -1047,7 +1148,7 @@ func checkRegexpMatch(ctx Context, lVal, rVal interface{}) bool { // checkSetContainsAll is used to see if the left hand side contains the // string on the right hand side -func checkSetContainsAll(_ Context, lVal, rVal interface{}) bool { +func checkSetContainsAll(lVal, rVal interface{}) bool { // Ensure left-hand is string lStr, ok := lVal.(string) if !ok { @@ -1424,7 +1525,7 @@ func resolveDeviceTarget(target string, d *structs.NodeDeviceResource) (*psstruc // checkAttributeConstraint checks if a constraint is satisfied. nil equality // comparisons are considered to be false. -func checkAttributeConstraint(ctx Context, operand string, lVal, rVal *psstructs.Attribute, lFound, rFound bool) bool { +func checkAttributeConstraint(ctx ConstraintContext, operand string, lVal, rVal *psstructs.Attribute, lFound, rFound bool) bool { // Check for constraints not handled by this checker. switch operand { case structs.ConstraintDistinctHosts, structs.ConstraintDistinctProperty: @@ -1484,7 +1585,7 @@ func checkAttributeConstraint(ctx Context, operand string, lVal, rVal *psstructs } parser := newVersionConstraintParser(ctx) - return checkAttributeVersionMatch(ctx, parser, lVal, rVal) + return checkAttributeVersionMatch(parser, lVal, rVal) case structs.ConstraintSemver: if !(lFound && rFound) { @@ -1492,7 +1593,7 @@ func checkAttributeConstraint(ctx Context, operand string, lVal, rVal *psstructs } parser := newSemverConstraintParser(ctx) - return checkAttributeVersionMatch(ctx, parser, lVal, rVal) + return checkAttributeVersionMatch(parser, lVal, rVal) case structs.ConstraintRegex: if !(lFound && rFound) { @@ -1516,7 +1617,7 @@ func checkAttributeConstraint(ctx Context, operand string, lVal, rVal *psstructs return false } - return checkSetContainsAll(ctx, ls, rs) + return checkSetContainsAll(ls, rs) case structs.ConstraintSetContainsAny: if !(lFound && rFound) { return false @@ -1550,7 +1651,7 @@ type VerConstraints interface { // or semver). type verConstraintParser func(verConstraint string) VerConstraints -func newVersionConstraintParser(ctx Context) verConstraintParser { +func newVersionConstraintParser(ctx ConstraintContext) verConstraintParser { cache := ctx.VersionConstraintCache() return func(cstr string) VerConstraints { @@ -1568,7 +1669,7 @@ func newVersionConstraintParser(ctx Context) verConstraintParser { } } -func newSemverConstraintParser(ctx Context) verConstraintParser { +func newSemverConstraintParser(ctx ConstraintContext) verConstraintParser { cache := ctx.SemverConstraintCache() return func(cstr string) VerConstraints { diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index f552b70c9f3..18a8153e83c 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -91,7 +91,7 @@ func TestRandomIterator(t *testing.T) { } } -func TestHostVolumeChecker(t *testing.T) { +func TestHostVolumeChecker_Static(t *testing.T) { ci.Parallel(t) _, ctx := testContext(t) @@ -177,20 +177,64 @@ func TestHostVolumeChecker(t *testing.T) { alloc.NodeID = nodes[2].ID for i, c := range cases { - checker.SetVolumes(alloc.Name, c.RequestedVolumes) + checker.SetVolumes(alloc.Name, structs.DefaultNamespace, c.RequestedVolumes, alloc.HostVolumeIDs) if act := checker.Feasible(c.Node); act != c.Result { t.Fatalf("case(%d) failed: got %v; want %v", i, act, c.Result) } } } -func TestHostVolumeChecker_ReadOnly(t *testing.T) { +func TestHostVolumeChecker_Dynamic(t *testing.T) { ci.Parallel(t) - _, ctx := testContext(t) + store, ctx := testContext(t) + nodes := []*structs.Node{ mock.Node(), mock.Node(), + mock.Node(), + mock.Node(), + mock.Node(), + } + + hostVolCapsReadWrite := []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeReader, + }, + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + } + hostVolCapsReadOnly := []*structs.HostVolumeCapability{{ + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeReader, + }} + + dhvNotReady := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Name: "foo", + NodeID: nodes[2].ID, + RequestedCapabilities: hostVolCapsReadOnly, + State: structs.HostVolumeStateDeleted, + } + dhvReadOnly := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Name: "foo", + NodeID: nodes[3].ID, + RequestedCapabilities: hostVolCapsReadOnly, + State: structs.HostVolumeStateReady, + } + dhvReadWrite := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Name: "foo", + NodeID: nodes[4].ID, + RequestedCapabilities: hostVolCapsReadWrite, + State: structs.HostVolumeStateReady, } nodes[0].HostVolumes = map[string]*structs.ClientHostVolumeConfig{ @@ -203,6 +247,23 @@ func TestHostVolumeChecker_ReadOnly(t *testing.T) { ReadOnly: false, }, } + nodes[2].HostVolumes = map[string]*structs.ClientHostVolumeConfig{ + "foo": {ID: dhvNotReady.ID}, + } + nodes[3].HostVolumes = map[string]*structs.ClientHostVolumeConfig{ + "foo": {ID: dhvReadOnly.ID}, + } + nodes[4].HostVolumes = map[string]*structs.ClientHostVolumeConfig{ + "foo": {ID: dhvReadWrite.ID}, + } + + for _, node := range nodes { + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node)) + } + + must.NoError(t, store.UpsertHostVolume(1000, dhvNotReady)) + must.NoError(t, store.UpsertHostVolume(1000, dhvReadOnly)) + must.NoError(t, store.UpsertHostVolume(1000, dhvReadWrite)) readwriteRequest := map[string]*structs.VolumeRequest{ "foo": { @@ -219,43 +280,346 @@ func TestHostVolumeChecker_ReadOnly(t *testing.T) { }, } + dhvReadOnlyRequest := map[string]*structs.VolumeRequest{ + "foo": { + Type: "host", + Source: "foo", + ReadOnly: true, + AccessMode: structs.CSIVolumeAccessModeSingleNodeReader, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + }, + } + dhvReadWriteRequest := map[string]*structs.VolumeRequest{ + "foo": { + Type: "host", + Source: "foo", + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + }, + } + checker := NewHostVolumeChecker(ctx) cases := []struct { - Node *structs.Node - RequestedVolumes map[string]*structs.VolumeRequest - Result bool + name string + node *structs.Node + requestedVolumes map[string]*structs.VolumeRequest + expect bool }{ - { // ReadWrite Request, ReadOnly Host - Node: nodes[0], - RequestedVolumes: readwriteRequest, - Result: false, + { + name: "read-write request / read-only host", + node: nodes[0], + requestedVolumes: readwriteRequest, + expect: false, }, - { // ReadOnly Request, ReadOnly Host - Node: nodes[0], - RequestedVolumes: readonlyRequest, - Result: true, + { + name: "read-only request / read-only host", + node: nodes[0], + requestedVolumes: readonlyRequest, + expect: true, }, - { // ReadOnly Request, ReadWrite Host - Node: nodes[1], - RequestedVolumes: readonlyRequest, - Result: true, + { + name: "read-only request / read-write host", + node: nodes[1], + requestedVolumes: readonlyRequest, + expect: true, }, - { // ReadWrite Request, ReadWrite Host - Node: nodes[1], - RequestedVolumes: readwriteRequest, - Result: true, + { + name: "read-write request / read-write host", + node: nodes[1], + requestedVolumes: readwriteRequest, + expect: true, + }, + { + name: "dynamic single-reader request / host not ready", + node: nodes[2], + requestedVolumes: dhvReadOnlyRequest, + expect: false, + }, + { + name: "dynamic single-reader request / caps match", + node: nodes[3], + requestedVolumes: dhvReadOnlyRequest, + expect: true, + }, + { + name: "dynamic single-reader request / no matching cap", + node: nodes[4], + requestedVolumes: dhvReadOnlyRequest, + expect: true, + }, + { + name: "dynamic single-writer request / caps match", + node: nodes[4], + requestedVolumes: dhvReadWriteRequest, + expect: true, }, } alloc := mock.Alloc() alloc.NodeID = nodes[1].ID - for i, c := range cases { - checker.SetVolumes(alloc.Name, c.RequestedVolumes) - if act := checker.Feasible(c.Node); act != c.Result { - t.Fatalf("case(%d) failed: got %v; want %v", i, act, c.Result) + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + checker.SetVolumes(alloc.Name, structs.DefaultNamespace, tc.requestedVolumes, alloc.HostVolumeIDs) + actual := checker.Feasible(tc.node) + must.Eq(t, tc.expect, actual) + }) + } +} + +func TestHostVolumeChecker_Sticky(t *testing.T) { + ci.Parallel(t) + + store, ctx := testContext(t) + + nodes := []*structs.Node{ + mock.Node(), + mock.Node(), + } + + hostVolCapsReadWrite := []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeReader, + }, + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + } + + dhv := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Name: "foo", + NodeID: nodes[1].ID, + RequestedCapabilities: hostVolCapsReadWrite, + State: structs.HostVolumeStateReady, + } + + nodes[0].HostVolumes = map[string]*structs.ClientHostVolumeConfig{} + nodes[1].HostVolumes = map[string]*structs.ClientHostVolumeConfig{"foo": {ID: dhv.ID}} + + for _, node := range nodes { + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node)) + } + must.NoError(t, store.UpsertHostVolume(1000, dhv)) + + stickyRequest := map[string]*structs.VolumeRequest{ + "foo": { + Type: "host", + Source: "foo", + Sticky: true, + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + }, + } + + checker := NewHostVolumeChecker(ctx) + + // alloc0 wants a previously registered volume ID that's available on node1 + alloc0 := mock.Alloc() + alloc0.NodeID = nodes[1].ID + alloc0.HostVolumeIDs = []string{dhv.ID} + + // alloc1 wants a volume ID that's available on node1 but hasn't used it + // before + alloc1 := mock.Alloc() + alloc1.NodeID = nodes[1].ID + + // alloc2 wants a volume ID that's unrelated + alloc2 := mock.Alloc() + alloc2.NodeID = nodes[1].ID + alloc2.HostVolumeIDs = []string{uuid.Generate()} + + // insert all the allocs into the state + must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc0, alloc1, alloc2})) + + cases := []struct { + name string + node *structs.Node + alloc *structs.Allocation + expect bool + }{ + { + "alloc asking for a sticky volume on an infeasible node", + nodes[0], + alloc0, + false, + }, + { + "alloc asking for a sticky volume on a feasible node", + nodes[1], + alloc0, + true, + }, + { + "alloc asking for a sticky volume on a feasible node for the first time", + nodes[1], + alloc1, + true, + }, + { + "alloc asking for an unrelated volume", + nodes[1], + alloc2, + false, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + checker.SetVolumes(tc.alloc.Name, structs.DefaultNamespace, stickyRequest, tc.alloc.HostVolumeIDs) + actual := checker.Feasible(tc.node) + must.Eq(t, tc.expect, actual) + }) + } +} + +// TestDynamicHostVolumeIsAvailable provides fine-grained coverage of the +// hostVolumeIsAvailable method +func TestDynamicHostVolumeIsAvailable(t *testing.T) { + + store, ctx := testContext(t) + + allCaps := []*structs.HostVolumeCapability{} + + for _, accessMode := range []structs.HostVolumeAccessMode{ + structs.HostVolumeAccessModeSingleNodeReader, + structs.HostVolumeAccessModeSingleNodeWriter, + structs.HostVolumeAccessModeSingleNodeSingleWriter, + structs.HostVolumeAccessModeSingleNodeMultiWriter, + } { + for _, attachMode := range []structs.HostVolumeAttachmentMode{ + structs.HostVolumeAttachmentModeFilesystem, + structs.HostVolumeAttachmentModeBlockDevice, + } { + allCaps = append(allCaps, &structs.HostVolumeCapability{ + AttachmentMode: attachMode, + AccessMode: accessMode, + }) } } + + jobReader, jobWriter := mock.Job(), mock.Job() + jobReader.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{ + "example": { + Type: structs.VolumeTypeHost, + Source: "example", + ReadOnly: true, + }, + } + jobWriter.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{ + "example": { + Type: structs.VolumeTypeHost, + Source: "example", + }, + } + index, _ := store.LatestIndex() + index++ + must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, jobReader)) + index++ + must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, jobWriter)) + + allocReader0, allocReader1 := mock.Alloc(), mock.Alloc() + allocReader0.JobID = jobReader.ID + allocReader1.JobID = jobReader.ID + + allocWriter0, allocWriter1 := mock.Alloc(), mock.Alloc() + allocWriter0.JobID = jobWriter.ID + allocWriter1.JobID = jobWriter.ID + + index++ + must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, + []*structs.Allocation{allocReader0, allocReader1, allocWriter0, allocWriter1})) + + testCases := []struct { + name string + hasProposed []*structs.Allocation + hasCaps []*structs.HostVolumeCapability + wantAccess structs.HostVolumeAccessMode + wantAttach structs.HostVolumeAttachmentMode + readOnly bool + expect bool + }{ + { + name: "enforce attachment mode", + hasCaps: []*structs.HostVolumeCapability{{ + AttachmentMode: structs.HostVolumeAttachmentModeBlockDevice, + AccessMode: structs.HostVolumeAccessModeSingleNodeSingleWriter, + }}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeSingleWriter, + expect: false, + }, + { + name: "enforce read only", + hasProposed: []*structs.Allocation{allocReader0, allocReader1}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeReader, + expect: false, + }, + { + name: "enforce read only ok", + hasProposed: []*structs.Allocation{allocReader0, allocReader1}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeReader, + readOnly: true, + expect: true, + }, + { + name: "enforce single writer", + hasProposed: []*structs.Allocation{allocReader0, allocReader1, allocWriter0}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeSingleWriter, + expect: false, + }, + { + name: "enforce single writer ok", + hasProposed: []*structs.Allocation{allocReader0, allocReader1}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeSingleWriter, + expect: true, + }, + { + name: "multi writer is always ok", + hasProposed: []*structs.Allocation{allocReader0, allocWriter0, allocWriter1}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeMultiWriter, + expect: true, + }, + { + name: "default capabilities ok", + expect: true, + }, + { + name: "default capabilities fail", + readOnly: true, + hasCaps: []*structs.HostVolumeCapability{{ + AttachmentMode: structs.HostVolumeAttachmentModeBlockDevice, + AccessMode: structs.HostVolumeAccessModeSingleNodeSingleWriter, + }}, + expect: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + vol := &structs.HostVolume{ + Name: "example", + State: structs.HostVolumeStateReady, + } + if len(tc.hasCaps) > 0 { + vol.RequestedCapabilities = tc.hasCaps + } else { + vol.RequestedCapabilities = allCaps + } + checker := NewHostVolumeChecker(ctx) + must.Eq(t, tc.expect, checker.hostVolumeIsAvailable( + vol, tc.wantAccess, tc.wantAttach, tc.readOnly, tc.hasProposed)) + }) + } + } func TestCSIVolumeChecker(t *testing.T) { @@ -1263,7 +1627,7 @@ func TestCheckVersionConstraint(t *testing.T) { for _, tc := range cases { _, ctx := testContext(t) p := newVersionConstraintParser(ctx) - if res := checkVersionMatch(ctx, p, tc.lVal, tc.rVal); res != tc.result { + if res := checkVersionMatch(p, tc.lVal, tc.rVal); res != tc.result { t.Fatalf("TC: %#v, Result: %v", tc, res) } } @@ -1345,7 +1709,7 @@ func TestCheckSemverConstraint(t *testing.T) { t.Run(tc.name, func(t *testing.T) { _, ctx := testContext(t) p := newSemverConstraintParser(ctx) - actual := checkVersionMatch(ctx, p, tc.lVal, tc.rVal) + actual := checkVersionMatch(p, tc.lVal, tc.rVal) must.Eq(t, tc.result, actual) }) } diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index f9fd669e592..60b4f7f1eed 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -6,6 +6,7 @@ package scheduler import ( "fmt" "runtime/debug" + "slices" "sort" "time" @@ -657,6 +658,18 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul "old_alloc_name", oldAllocName, "new_alloc_name", newAllocName) } + // Are there sticky volumes requested by the task group for the first time? If + // yes, make sure the allocation stores their IDs for future reschedules. + var newHostVolumeIDs []string + for _, v := range tg.Volumes { + if v.Sticky { + if missing.PreviousAllocation() != nil && len(missing.PreviousAllocation().HostVolumeIDs) > 0 { + continue + } + newHostVolumeIDs = append(newHostVolumeIDs, option.Node.HostVolumes[v.Source].ID) + } + } + // Create an allocation for this alloc := &structs.Allocation{ ID: uuid.Generate(), @@ -681,6 +694,10 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul }, } + if len(newHostVolumeIDs) > 0 { + alloc.HostVolumeIDs = newHostVolumeIDs + } + // If the new allocation is replacing an older allocation then we // set the record the older allocation id so that they are chained if prevAllocation != nil { @@ -689,6 +706,10 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul updateRescheduleTracker(alloc, prevAllocation, now) } + if len(prevAllocation.HostVolumeIDs) > 0 { + alloc.HostVolumeIDs = prevAllocation.HostVolumeIDs + } + // If the allocation has task handles, // copy them to the new allocation propagateTaskState(alloc, prevAllocation, missing.PreviousLost()) @@ -838,6 +859,10 @@ func getSelectOptions(prevAllocation *structs.Allocation, preferredNode *structs } } selectOptions.PenaltyNodeIDs = penaltyNodes + + if prevAllocation.HostVolumeIDs != nil { + selectOptions.AllocationHostVolumeIDs = prevAllocation.HostVolumeIDs + } } if preferredNode != nil { selectOptions.PreferredNodes = []*structs.Node{preferredNode} @@ -910,6 +935,29 @@ func (s *GenericScheduler) findPreferredNode(place placementResult) (*structs.No return preferredNode, nil } } + + for _, vol := range place.TaskGroup().Volumes { + if !vol.Sticky { + continue + } + + var preferredNode *structs.Node + preferredNode, err := s.state.NodeByID(nil, prev.NodeID) + if err != nil { + return nil, err + } + + if preferredNode != nil && preferredNode.Ready() { + // if this node has at least one of the allocation volumes, it's a + // preferred one + for _, vol := range preferredNode.HostVolumes { + if slices.Contains(prev.HostVolumeIDs, vol.ID) { + return preferredNode, nil + } + } + } + } + return nil, nil } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index adda5e2cb2a..3d236b5d289 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -218,6 +218,121 @@ func TestServiceSched_JobRegister_StickyAllocs(t *testing.T) { } } +func TestServiceSched_JobRegister_StickyHostVolumes(t *testing.T) { + ci.Parallel(t) + + h := NewHarness(t) + + nodes := []*structs.Node{ + mock.Node(), + mock.Node(), + } + + hostVolCapsReadWrite := []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeReader, + }, + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + } + + dhv := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Name: "foo", + NodeID: nodes[1].ID, + RequestedCapabilities: hostVolCapsReadWrite, + State: structs.HostVolumeStateReady, + } + + nodes[0].HostVolumes = map[string]*structs.ClientHostVolumeConfig{} + nodes[1].HostVolumes = map[string]*structs.ClientHostVolumeConfig{"foo": {ID: dhv.ID}} + + for _, node := range nodes { + must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, 1000, node)) + } + must.NoError(t, h.State.UpsertHostVolume(1000, dhv)) + + stickyRequest := map[string]*structs.VolumeRequest{ + "foo": { + Type: "host", + Source: "foo", + Sticky: true, + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + }, + } + + // Create a job + job := mock.Job() + job.TaskGroups[0].Volumes = stickyRequest + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job)) + + // Create a mock evaluation to register the job + eval := &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: job.Priority, + TriggeredBy: structs.EvalTriggerJobRegister, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval})) + + // Process the evaluation + must.NoError(t, h.Process(NewServiceScheduler, eval)) + + // Ensure the plan allocated + plan := h.Plans[0] + planned := make(map[string]*structs.Allocation) + for _, allocList := range plan.NodeAllocation { + for _, alloc := range allocList { + planned[alloc.ID] = alloc + } + } + must.MapLen(t, 10, planned) + + // Ensure that the allocations got the host volume ID added + for _, p := range planned { + must.Eq(t, p.PreviousAllocation, "") + must.Eq(t, p.HostVolumeIDs[0], dhv.ID) + } + + // Update the job to force a rolling upgrade + updated := job.Copy() + updated.TaskGroups[0].Tasks[0].Resources.CPU += 10 + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, updated)) + + // Create a mock evaluation to handle the update + eval = &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: job.Priority, + TriggeredBy: structs.EvalTriggerNodeUpdate, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval})) + must.NoError(t, h.Process(NewServiceScheduler, eval)) + + // Ensure we have created only one new allocation + must.SliceLen(t, 2, h.Plans) + plan = h.Plans[0] + var newPlanned []*structs.Allocation + for _, allocList := range plan.NodeAllocation { + newPlanned = append(newPlanned, allocList...) + } + must.SliceLen(t, 10, newPlanned) + + // Ensure that the new allocations retain the host volume ID + for _, new := range newPlanned { + must.Eq(t, new.HostVolumeIDs[0], dhv.ID) + } +} + func TestServiceSched_JobRegister_DiskConstraints(t *testing.T) { ci.Parallel(t) diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 7e22070966f..27f87e79745 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -118,6 +118,13 @@ type State interface { // CSIVolumeByID fetch CSI volumes, containing controller jobs CSIVolumesByNodeID(memdb.WatchSet, string, string) (memdb.ResultIterator, error) + // HostVolumeByID fetches host volume by its ID + HostVolumeByID(memdb.WatchSet, string, string, bool) (*structs.HostVolume, error) + + // HostVolumesByNodeID gets an iterator with all the volumes attached to a + // given node + HostVolumesByNodeID(memdb.WatchSet, string, state.SortOption) (memdb.ResultIterator, error) + // LatestIndex returns the greatest index value for all indexes. LatestIndex() (uint64, error) } diff --git a/scheduler/stack.go b/scheduler/stack.go index 5c897ddf2de..f978c753f68 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -35,10 +35,11 @@ type Stack interface { } type SelectOptions struct { - PenaltyNodeIDs map[string]struct{} - PreferredNodes []*structs.Node - Preempt bool - AllocName string + PenaltyNodeIDs map[string]struct{} + PreferredNodes []*structs.Node + Preempt bool + AllocName string + AllocationHostVolumeIDs []string } // GenericStack is the Stack used for the Generic scheduler. It is @@ -51,6 +52,7 @@ type GenericStack struct { wrappedChecks *FeasibilityWrapper quota FeasibleIterator jobVersion *uint64 + jobNamespace string jobConstraint *ConstraintChecker taskGroupDrivers *DriverChecker taskGroupConstraint *ConstraintChecker @@ -101,6 +103,7 @@ func (s *GenericStack) SetJob(job *structs.Job) { jobVer := job.Version s.jobVersion = &jobVer + s.jobNamespace = job.Namespace s.jobConstraint.SetConstraints(job.Constraints) s.distinctHostsConstraint.SetJob(job) @@ -154,7 +157,7 @@ func (s *GenericStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ra s.taskGroupDrivers.SetDrivers(tgConstr.drivers) s.taskGroupConstraint.SetConstraints(tgConstr.constraints) s.taskGroupDevices.SetTaskGroup(tg) - s.taskGroupHostVolumes.SetVolumes(options.AllocName, tg.Volumes) + s.taskGroupHostVolumes.SetVolumes(options.AllocName, s.jobNamespace, tg.Volumes, options.AllocationHostVolumeIDs) s.taskGroupCSIVolumes.SetVolumes(options.AllocName, tg.Volumes) if len(tg.Networks) > 0 { s.taskGroupNetwork.SetNetwork(tg.Networks[0]) @@ -202,6 +205,7 @@ type SystemStack struct { ctx Context source *StaticIterator + jobNamespace string wrappedChecks *FeasibilityWrapper quota FeasibleIterator jobConstraint *ConstraintChecker @@ -313,6 +317,7 @@ func (s *SystemStack) SetNodes(baseNodes []*structs.Node) { } func (s *SystemStack) SetJob(job *structs.Job) { + s.jobNamespace = job.Namespace s.jobConstraint.SetConstraints(job.Constraints) s.distinctPropertyConstraint.SetJob(job) s.binPack.SetJob(job) @@ -345,7 +350,7 @@ func (s *SystemStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ran s.taskGroupDrivers.SetDrivers(tgConstr.drivers) s.taskGroupConstraint.SetConstraints(tgConstr.constraints) s.taskGroupDevices.SetTaskGroup(tg) - s.taskGroupHostVolumes.SetVolumes(options.AllocName, tg.Volumes) + s.taskGroupHostVolumes.SetVolumes(options.AllocName, s.jobNamespace, tg.Volumes, options.AllocationHostVolumeIDs) s.taskGroupCSIVolumes.SetVolumes(options.AllocName, tg.Volumes) if len(tg.Networks) > 0 { s.taskGroupNetwork.SetNetwork(tg.Networks[0])