From d8c901570b6282213726a4f979d095d38f669a46 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 5 Nov 2024 15:19:34 -0500 Subject: [PATCH 01/35] dynamic host volumes: ACL policies (#24356) This changeset implements the ACLs required for dynamic host volumes RPCs: * `host-volume-write` is a coarse-grained policy that implies all operations. * `host-volume-register` is the highest fine-grained privilege because it potentially bypasses quotas. * `host-volume-create` is implicitly granted by `host-volume-register` * `host-volume-delete` is implicitly granted only by `host-volume-write` * `host-volume-read` is implicitly granted by `policy = "read"`, These are namespaced operations, so the testing here is predominantly around parsing and granting of implicit capabilities rather than the well-tested `AllowNamespaceOperation` method. This changeset does not include any changes to the `host_volumes` policy which we'll need for claiming volumes on job submit. That'll be covered in a later PR. Ref: https://hashicorp.atlassian.net/browse/NET-11549 --- acl/acl_test.go | 4 +++ acl/policy.go | 38 +++++++++++++++++++- acl/policy_test.go | 86 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 110 insertions(+), 18 deletions(-) diff --git a/acl/acl_test.go b/acl/acl_test.go index cf0c4bda3f4..24ccf2b4103 100644 --- a/acl/acl_test.go +++ b/acl/acl_test.go @@ -79,10 +79,12 @@ func TestACLManagement(t *testing.T) { // Check default namespace rights must.True(t, acl.AllowNamespaceOperation("default", NamespaceCapabilityListJobs)) must.True(t, acl.AllowNamespaceOperation("default", NamespaceCapabilitySubmitJob)) + must.True(t, acl.AllowNamespaceOperation("default", NamespaceCapabilityHostVolumeCreate)) must.True(t, acl.AllowNamespace("default")) // Check non-specified namespace must.True(t, acl.AllowNamespaceOperation("foo", NamespaceCapabilityListJobs)) + must.True(t, acl.AllowNamespaceOperation("foo", NamespaceCapabilityHostVolumeCreate)) must.True(t, acl.AllowNamespace("foo")) // Check node pool rights. @@ -155,9 +157,11 @@ func TestACLMerge(t *testing.T) { // Check default namespace rights must.True(t, acl.AllowNamespaceOperation("default", NamespaceCapabilityListJobs)) must.False(t, acl.AllowNamespaceOperation("default", NamespaceCapabilitySubmitJob)) + must.False(t, acl.AllowNamespaceOperation("default", NamespaceCapabilityHostVolumeRegister)) // Check non-specified namespace must.False(t, acl.AllowNamespaceOperation("foo", NamespaceCapabilityListJobs)) + must.False(t, acl.AllowNamespaceOperation("foo", NamespaceCapabilityHostVolumeCreate)) // Check rights in the node pool specified in policies. must.True(t, acl.AllowNodePoolOperation("my-pool", NodePoolCapabilityRead)) diff --git a/acl/policy.go b/acl/policy.go index c4fe9e4d673..17a7aed2170 100644 --- a/acl/policy.go +++ b/acl/policy.go @@ -47,6 +47,11 @@ const ( NamespaceCapabilityCSIReadVolume = "csi-read-volume" NamespaceCapabilityCSIListVolume = "csi-list-volume" NamespaceCapabilityCSIMountVolume = "csi-mount-volume" + NamespaceCapabilityHostVolumeCreate = "host-volume-create" + NamespaceCapabilityHostVolumeRegister = "host-volume-register" + NamespaceCapabilityHostVolumeRead = "host-volume-read" + NamespaceCapabilityHostVolumeWrite = "host-volume-write" + NamespaceCapabilityHostVolumeDelete = "host-volume-delete" NamespaceCapabilityListScalingPolicies = "list-scaling-policies" NamespaceCapabilityReadScalingPolicy = "read-scaling-policy" NamespaceCapabilityReadJobScaling = "read-job-scaling" @@ -207,7 +212,7 @@ func isNamespaceCapabilityValid(cap string) bool { NamespaceCapabilityReadFS, NamespaceCapabilityAllocLifecycle, NamespaceCapabilityAllocExec, NamespaceCapabilityAllocNodeExec, NamespaceCapabilityCSIReadVolume, NamespaceCapabilityCSIWriteVolume, NamespaceCapabilityCSIListVolume, NamespaceCapabilityCSIMountVolume, NamespaceCapabilityCSIRegisterPlugin, - NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, NamespaceCapabilityReadJobScaling, NamespaceCapabilityScaleJob: + NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, NamespaceCapabilityReadJobScaling, NamespaceCapabilityScaleJob, NamespaceCapabilityHostVolumeCreate, NamespaceCapabilityHostVolumeRegister, NamespaceCapabilityHostVolumeWrite, NamespaceCapabilityHostVolumeRead: return true // Separate the enterprise-only capabilities case NamespaceCapabilitySentinelOverride, NamespaceCapabilitySubmitRecommendation: @@ -241,6 +246,7 @@ func expandNamespacePolicy(policy string) []string { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, } write := make([]string, len(read)) @@ -257,6 +263,7 @@ func expandNamespacePolicy(policy string) []string { NamespaceCapabilityCSIMountVolume, NamespaceCapabilityCSIWriteVolume, NamespaceCapabilitySubmitRecommendation, + NamespaceCapabilityHostVolumeCreate, }...) switch policy { @@ -278,6 +285,32 @@ func expandNamespacePolicy(policy string) []string { } } +// expandNamespaceCapabilities adds extra capabilities implied by fine-grained +// capabilities. +func expandNamespaceCapabilities(ns *NamespacePolicy) { + extraCaps := []string{} + for _, cap := range ns.Capabilities { + switch cap { + case NamespaceCapabilityHostVolumeWrite: + extraCaps = append(extraCaps, + NamespaceCapabilityHostVolumeRegister, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeDelete, + NamespaceCapabilityHostVolumeRead) + case NamespaceCapabilityHostVolumeRegister: + extraCaps = append(extraCaps, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeRead) + case NamespaceCapabilityHostVolumeCreate: + extraCaps = append(extraCaps, NamespaceCapabilityHostVolumeRead) + } + } + + // These may end up being duplicated, but they'll get deduplicated in NewACL + // when inserted into the radix tree. + ns.Capabilities = append(ns.Capabilities, extraCaps...) +} + func isNodePoolCapabilityValid(cap string) bool { switch cap { case NodePoolCapabilityDelete, NodePoolCapabilityRead, NodePoolCapabilityWrite, @@ -388,6 +421,9 @@ func Parse(rules string) (*Policy, error) { ns.Capabilities = append(ns.Capabilities, extraCap...) } + // Expand implicit capabilities + expandNamespaceCapabilities(ns) + if ns.Variables != nil { if len(ns.Variables.Paths) == 0 { return nil, fmt.Errorf("Invalid variable policy: no variable paths in namespace %s", ns.Name) diff --git a/acl/policy_test.go b/acl/policy_test.go index 117b82ba3d6..938557aa08a 100644 --- a/acl/policy_test.go +++ b/acl/policy_test.go @@ -5,7 +5,6 @@ package acl import ( "fmt" - "strings" "testing" "github.com/hashicorp/nomad/ci" @@ -17,9 +16,9 @@ func TestParse(t *testing.T) { ci.Parallel(t) type tcase struct { - Raw string - ErrStr string - Expect *Policy + Raw string + ExpectErr string + Expect *Policy } tcases := []tcase{ { @@ -43,6 +42,7 @@ func TestParse(t *testing.T) { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, }, }, }, @@ -118,6 +118,7 @@ func TestParse(t *testing.T) { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, }, }, { @@ -132,6 +133,7 @@ func TestParse(t *testing.T) { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, NamespaceCapabilityScaleJob, NamespaceCapabilitySubmitJob, NamespaceCapabilityDispatchJob, @@ -142,6 +144,8 @@ func TestParse(t *testing.T) { NamespaceCapabilityCSIMountVolume, NamespaceCapabilityCSIWriteVolume, NamespaceCapabilitySubmitRecommendation, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeRead, }, }, { @@ -338,6 +342,7 @@ func TestParse(t *testing.T) { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, }, }, { @@ -352,6 +357,7 @@ func TestParse(t *testing.T) { NamespaceCapabilityReadJobScaling, NamespaceCapabilityListScalingPolicies, NamespaceCapabilityReadScalingPolicy, + NamespaceCapabilityHostVolumeRead, NamespaceCapabilityScaleJob, NamespaceCapabilitySubmitJob, NamespaceCapabilityDispatchJob, @@ -362,6 +368,8 @@ func TestParse(t *testing.T) { NamespaceCapabilityCSIMountVolume, NamespaceCapabilityCSIWriteVolume, NamespaceCapabilitySubmitRecommendation, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeRead, }, }, { @@ -638,6 +646,54 @@ func TestParse(t *testing.T) { }, }, }, + { + ` + namespace "default" { + capabilities = ["host-volume-register"] + } + + namespace "other" { + capabilities = ["host-volume-create"] + } + + namespace "foo" { + capabilities = ["host-volume-write"] + } + `, + "", + &Policy{ + Namespaces: []*NamespacePolicy{ + { + Name: "default", + Policy: "", + Capabilities: []string{ + NamespaceCapabilityHostVolumeRegister, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeRead, + }, + }, + { + Name: "other", + Policy: "", + Capabilities: []string{ + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeRead, + }, + }, + { + Name: "foo", + Policy: "", + Capabilities: []string{ + NamespaceCapabilityHostVolumeWrite, + NamespaceCapabilityHostVolumeRegister, + NamespaceCapabilityHostVolumeCreate, + NamespaceCapabilityHostVolumeDelete, + NamespaceCapabilityHostVolumeRead, + }, + }, + }, + }, + }, { ` node_pool "pool-read-only" { @@ -878,22 +934,18 @@ func TestParse(t *testing.T) { } for idx, tc := range tcases { - t.Run(fmt.Sprintf("%d", idx), func(t *testing.T) { + t.Run(fmt.Sprintf("%02d", idx), func(t *testing.T) { p, err := Parse(tc.Raw) - if err != nil { - if tc.ErrStr == "" { - t.Fatalf("Unexpected err: %v", err) - } - if !strings.Contains(err.Error(), tc.ErrStr) { - t.Fatalf("Unexpected err: %v", err) - } - return + if tc.ExpectErr == "" { + must.NoError(t, err) + } else { + must.ErrorContains(t, err, tc.ExpectErr) } - if err == nil && tc.ErrStr != "" { - t.Fatalf("Missing expected err") + + if tc.Expect != nil { + tc.Expect.Raw = tc.Raw + must.Eq(t, tc.Expect, p) } - tc.Expect.Raw = tc.Raw - assert.EqualValues(t, tc.Expect, p) }) } } From 75c142ff40e033c4da7db03fdea72e61c4394b53 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 7 Nov 2024 09:38:09 -0500 Subject: [PATCH 02/35] dynamic host volumes: initial state store implementation (#24353) This changeset implements the state store schema for Dynamic Host Volumes, and methods used to query the state for RPCs. Ref: https://hashicorp.atlassian.net/browse/NET-11549 --- nomad/mock/host_volumes.go | 42 ++++ nomad/state/schema.go | 73 ++++++- nomad/state/state_store_host_volumes.go | 188 ++++++++++++++++++ nomad/state/state_store_host_volumes_test.go | 165 ++++++++++++++++ nomad/state/state_store_restore.go | 8 + nomad/structs/host_volumes.go | 197 +++++++++++++++++++ nomad/structs/host_volumes_test.go | 55 ++++++ 7 files changed, 722 insertions(+), 6 deletions(-) create mode 100644 nomad/mock/host_volumes.go create mode 100644 nomad/state/state_store_host_volumes.go create mode 100644 nomad/state/state_store_host_volumes_test.go create mode 100644 nomad/structs/host_volumes.go create mode 100644 nomad/structs/host_volumes_test.go diff --git a/nomad/mock/host_volumes.go b/nomad/mock/host_volumes.go new file mode 100644 index 00000000000..748e693fe10 --- /dev/null +++ b/nomad/mock/host_volumes.go @@ -0,0 +1,42 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package mock + +import ( + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/structs" +) + +func HostVolume() *structs.HostVolume { + + volID := uuid.Generate() + vol := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: volID, + Name: "example", + PluginID: "example-plugin", + NodePool: structs.NodePoolDefault, + NodeID: uuid.Generate(), + Constraints: []*structs.Constraint{ + { + LTarget: "${meta.rack}", + RTarget: "r1", + Operand: "=", + }, + }, + RequestedCapacityMin: 100000, + RequestedCapacityMax: 200000, + Capacity: 150000, + RequestedCapabilities: []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + }, + Parameters: map[string]string{"foo": "bar"}, + HostPath: "/var/data/nomad/alloc_mounts/" + volID, + State: structs.HostVolumeStatePending, + } + return vol +} diff --git a/nomad/state/schema.go b/nomad/state/schema.go index 2c798b06fbe..7ee83e074eb 100644 --- a/nomad/state/schema.go +++ b/nomad/state/schema.go @@ -26,6 +26,7 @@ const ( TableACLBindingRules = "acl_binding_rules" TableAllocs = "allocs" TableJobSubmission = "job_submission" + TableHostVolumes = "host_volumes" ) const ( @@ -41,6 +42,7 @@ const ( indexName = "name" indexSigningKey = "signing_key" indexAuthMethod = "auth_method" + indexNodePool = "node_pool" ) var ( @@ -97,6 +99,7 @@ func init() { aclRolesTableSchema, aclAuthMethodsTableSchema, bindingRulesTableSchema, + hostVolumeTableSchema, }...) } @@ -161,8 +164,8 @@ func nodeTableSchema() *memdb.TableSchema { Field: "SecretID", }, }, - "node_pool": { - Name: "node_pool", + indexNodePool: { + Name: indexNodePool, AllowMissing: false, Unique: false, Indexer: &memdb.StringFieldIndex{ @@ -844,8 +847,8 @@ func vaultAccessorTableSchema() *memdb.TableSchema { }, }, - "node_id": { - Name: "node_id", + indexNodeID: { + Name: indexNodeID, AllowMissing: false, Unique: false, Indexer: &memdb.StringFieldIndex{ @@ -882,8 +885,8 @@ func siTokenAccessorTableSchema() *memdb.TableSchema { }, }, - "node_id": { - Name: "node_id", + indexNodeID: { + Name: indexNodeID, AllowMissing: false, Unique: false, Indexer: &memdb.StringFieldIndex{ @@ -1643,3 +1646,61 @@ func bindingRulesTableSchema() *memdb.TableSchema { }, } } + +// HostVolumes are identified by id globally, and searchable by namespace+name, +// node, or node_pool +func hostVolumeTableSchema() *memdb.TableSchema { + return &memdb.TableSchema{ + Name: TableHostVolumes, + Indexes: map[string]*memdb.IndexSchema{ + indexID: { + Name: indexID, + AllowMissing: false, + Unique: true, + Indexer: &memdb.CompoundIndex{ + Indexes: []memdb.Indexer{ + &memdb.StringFieldIndex{ + Field: "Namespace", + }, + &memdb.StringFieldIndex{ + Field: "ID", + Lowercase: true, + }, + }, + }, + }, + indexName: { + Name: indexName, + AllowMissing: false, + Unique: false, + Indexer: &memdb.CompoundIndex{ + Indexes: []memdb.Indexer{ + &memdb.StringFieldIndex{ + Field: "Namespace", + }, + &memdb.StringFieldIndex{ + Field: "Name", + }, + }, + }, + }, + indexNodeID: { + Name: indexNodeID, + AllowMissing: false, + Unique: false, + Indexer: &memdb.StringFieldIndex{ + Field: "NodeID", + Lowercase: true, + }, + }, + indexNodePool: { + Name: indexNodePool, + AllowMissing: false, + Unique: false, + Indexer: &memdb.StringFieldIndex{ + Field: "NodePool", + }, + }, + }, + } +} diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go new file mode 100644 index 00000000000..6805314ff97 --- /dev/null +++ b/nomad/state/state_store_host_volumes.go @@ -0,0 +1,188 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package state + +import ( + "fmt" + + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/nomad/structs" +) + +// HostVolumeByID retrieve a specific host volume +func (s *StateStore) HostVolumeByID(ws memdb.WatchSet, ns, id string, withAllocs bool) (*structs.HostVolume, error) { + txn := s.db.ReadTxn() + obj, err := txn.First(TableHostVolumes, indexID, ns, id) + if err != nil { + return nil, err + } + if obj == nil { + return nil, nil + } + vol := obj.(*structs.HostVolume) + if !withAllocs { + return vol, nil + } + + vol = vol.Copy() + vol.Allocations = []*structs.AllocListStub{} + + // we can't use AllocsByNodeTerminal because we only want to filter out + // allocs that are client-terminal, not server-terminal + allocs, err := s.AllocsByNode(nil, vol.NodeID) + if err != nil { + return nil, fmt.Errorf("could not query allocs to check for host volume claims: %w", err) + } + for _, alloc := range allocs { + if alloc.ClientTerminalStatus() { + continue + } + for _, volClaim := range alloc.Job.LookupTaskGroup(alloc.TaskGroup).Volumes { + if volClaim.Type == structs.VolumeTypeHost && volClaim.Source == vol.Name { + vol.Allocations = append(vol.Allocations, alloc.Stub(nil)) + } + } + } + + return vol, nil +} + +// UpsertHostVolumes upserts a set of host volumes +func (s *StateStore) UpsertHostVolumes(index uint64, volumes []*structs.HostVolume) error { + txn := s.db.WriteTxn(index) + defer txn.Abort() + + for _, v := range volumes { + if exists, err := s.namespaceExists(txn, v.Namespace); err != nil { + return err + } else if !exists { + return fmt.Errorf("host volume %s is in nonexistent namespace %s", v.ID, v.Namespace) + } + + obj, err := txn.First(TableHostVolumes, indexID, v.Namespace, v.ID) + if err != nil { + return err + } + if obj != nil { + old := obj.(*structs.HostVolume) + v.CreateIndex = old.CreateIndex + v.CreateTime = old.CreateTime + } else { + v.CreateIndex = index + } + + // If the fingerprint is written from the node before the create RPC + // handler completes, we'll never update from the initial pending , so + // reconcile that here + node, err := s.NodeByID(nil, v.NodeID) + if err != nil { + return err + } + if node == nil { + return fmt.Errorf("host volume %s has nonexistent node ID %s", v.ID, v.NodeID) + } + if _, ok := node.HostVolumes[v.Name]; ok { + v.State = structs.HostVolumeStateReady + } + + // Allocations are denormalized on read, so we don't want these to be + // written to the state store. + v.Allocations = nil + v.ModifyIndex = index + + err = txn.Insert(TableHostVolumes, v) + if err != nil { + return fmt.Errorf("host volume insert: %w", err) + } + } + + if err := txn.Insert(tableIndex, &IndexEntry{TableHostVolumes, index}); err != nil { + return fmt.Errorf("index update failed: %w", err) + } + + return txn.Commit() +} + +// DeleteHostVolumes deletes a set of host volumes in the same namespace +func (s *StateStore) DeleteHostVolumes(index uint64, ns string, ids []string) error { + txn := s.db.WriteTxn(index) + defer txn.Abort() + + for _, id := range ids { + + obj, err := txn.First(TableHostVolumes, indexID, ns, id) + if err != nil { + return err + } + if obj != nil { + vol := obj.(*structs.HostVolume) + + allocs, err := s.AllocsByNodeTerminal(nil, vol.NodeID, false) + if err != nil { + return fmt.Errorf("could not query allocs to check for host volume claims: %w", err) + } + for _, alloc := range allocs { + for _, volClaim := range alloc.Job.LookupTaskGroup(alloc.TaskGroup).Volumes { + if volClaim.Type == structs.VolumeTypeHost && volClaim.Name == vol.Name { + return fmt.Errorf("could not delete volume %s in use by alloc %s", + vol.ID, alloc.ID) + } + } + } + + err = txn.Delete(TableHostVolumes, vol) + if err != nil { + return fmt.Errorf("host volume delete: %w", err) + } + } + } + + if err := txn.Insert(tableIndex, &IndexEntry{TableHostVolumes, index}); err != nil { + return fmt.Errorf("index update failed: %w", err) + } + + return txn.Commit() + +} + +// HostVolumes queries all the host volumes and is mostly used for +// snapshot/restore +func (s *StateStore) HostVolumes(ws memdb.WatchSet, sort SortOption) (memdb.ResultIterator, error) { + return s.hostVolumesIter(ws, indexID, sort) +} + +// HostVolumesByName retrieves all host volumes of the same name +func (s *StateStore) HostVolumesByName(ws memdb.WatchSet, ns, name string, sort SortOption) (memdb.ResultIterator, error) { + return s.hostVolumesIter(ws, "name_prefix", sort, ns, name) +} + +// HostVolumesByNodeID retrieves all host volumes on the same node +func (s *StateStore) HostVolumesByNodeID(ws memdb.WatchSet, nodeID string, sort SortOption) (memdb.ResultIterator, error) { + return s.hostVolumesIter(ws, indexNodeID, sort, nodeID) +} + +// HostVolumesByNodePool retrieves all host volumes in the same node pool +func (s *StateStore) HostVolumesByNodePool(ws memdb.WatchSet, nodePool string, sort SortOption) (memdb.ResultIterator, error) { + return s.hostVolumesIter(ws, indexNodePool, sort, nodePool) +} + +func (s *StateStore) hostVolumesIter(ws memdb.WatchSet, index string, sort SortOption, args ...any) (memdb.ResultIterator, error) { + txn := s.db.ReadTxn() + + var iter memdb.ResultIterator + var err error + + switch sort { + case SortReverse: + iter, err = txn.GetReverse(TableHostVolumes, index, args...) + default: + iter, err = txn.Get(TableHostVolumes, index, args...) + } + if err != nil { + return nil, err + } + + ws.Add(iter.WatchCh()) + return iter, nil +} diff --git a/nomad/state/state_store_host_volumes_test.go b/nomad/state/state_store_host_volumes_test.go new file mode 100644 index 00000000000..327c54a1634 --- /dev/null +++ b/nomad/state/state_store_host_volumes_test.go @@ -0,0 +1,165 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package state + +import ( + "fmt" + "testing" + + memdb "github.com/hashicorp/go-memdb" + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func TestStateStore_HostVolumes_CRUD(t *testing.T) { + ci.Parallel(t) + store := testStateStore(t) + index, err := store.LatestIndex() + must.NoError(t, err) + + nodes := []*structs.Node{ + mock.Node(), + mock.Node(), + mock.Node(), + } + nodes[2].NodePool = "prod" + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[0], NodeUpsertWithNodePool)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[1], NodeUpsertWithNodePool)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[2], NodeUpsertWithNodePool)) + + ns := mock.Namespace() + must.NoError(t, store.UpsertNamespaces(index, []*structs.Namespace{ns})) + + vols := []*structs.HostVolume{ + mock.HostVolume(), + mock.HostVolume(), + mock.HostVolume(), + mock.HostVolume(), + } + vols[0].NodeID = nodes[0].ID + vols[1].NodeID = nodes[1].ID + vols[1].Name = "another-example" + vols[2].NodeID = nodes[2].ID + vols[2].NodePool = nodes[2].NodePool + vols[3].Namespace = ns.Name + vols[3].NodeID = nodes[2].ID + vols[3].NodePool = nodes[2].NodePool + + index++ + must.NoError(t, store.UpsertHostVolumes(index, vols)) + + vol, err := store.HostVolumeByID(nil, vols[0].Namespace, vols[0].ID, true) + must.NoError(t, err) + must.NotNil(t, vol) + must.Eq(t, vols[0].ID, vol.ID) + must.NotNil(t, vol.Allocations) + must.Len(t, 0, vol.Allocations) + + vol, err = store.HostVolumeByID(nil, vols[0].Namespace, vols[0].ID, false) + must.NoError(t, err) + must.NotNil(t, vol) + must.Nil(t, vol.Allocations) + + consumeIter := func(iter memdb.ResultIterator) map[string]*structs.HostVolume { + got := map[string]*structs.HostVolume{} + for raw := iter.Next(); raw != nil; raw = iter.Next() { + vol := raw.(*structs.HostVolume) + got[vol.ID] = vol + } + return got + } + + iter, err := store.HostVolumesByName(nil, structs.DefaultNamespace, "example", SortDefault) + must.NoError(t, err) + got := consumeIter(iter) + must.NotNil(t, got[vols[0].ID], must.Sprint("expected vol0")) + must.NotNil(t, got[vols[2].ID], must.Sprint("expected vol2")) + must.MapLen(t, 2, got, must.Sprint(`expected 2 volumes named "example" in default namespace`)) + + iter, err = store.HostVolumesByNodePool(nil, nodes[2].NodePool, SortDefault) + must.NoError(t, err) + got = consumeIter(iter) + must.NotNil(t, got[vols[2].ID], must.Sprint("expected vol2")) + must.NotNil(t, got[vols[3].ID], must.Sprint("expected vol3")) + must.MapLen(t, 2, got, must.Sprint(`expected 2 volumes in prod node pool`)) + + iter, err = store.HostVolumesByNodeID(nil, nodes[2].ID, SortDefault) + must.NoError(t, err) + got = consumeIter(iter) + must.NotNil(t, got[vols[2].ID], must.Sprint("expected vol2")) + must.NotNil(t, got[vols[3].ID], must.Sprint("expected vol3")) + must.MapLen(t, 2, got, must.Sprint(`expected 2 volumes on node 2`)) + + // simulate a node registering one of the volumes + nodes[2] = nodes[2].Copy() + nodes[2].HostVolumes = map[string]*structs.ClientHostVolumeConfig{"example": { + Name: vols[2].Name, + Path: vols[2].HostPath, + }} + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, nodes[2])) + + // update all the volumes, which should update the state of vol2 as well + for i, vol := range vols { + vol = vol.Copy() + vol.RequestedCapacityMax = 300000 + vols[i] = vol + } + index++ + must.NoError(t, store.UpsertHostVolumes(index, vols)) + + iter, err = store.HostVolumesByName(nil, structs.DefaultNamespace, "example", SortDefault) + must.NoError(t, err) + got = consumeIter(iter) + must.MapLen(t, 2, got, must.Sprint(`expected 2 volumes named "example" in default namespace`)) + + vol0 := got[vols[0].ID] + must.NotNil(t, vol0) + must.Eq(t, index, vol0.ModifyIndex) + vol2 := got[vols[2].ID] + must.NotNil(t, vol2) + must.Eq(t, index, vol2.ModifyIndex) + must.Eq(t, structs.HostVolumeStateReady, vol2.State, must.Sprint( + "expected volume state to be updated because its been fingerprinted by a node")) + + alloc := mock.AllocForNode(nodes[2]) + alloc.Job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{"example": { + Name: "example", + Type: structs.VolumeTypeHost, + Source: vols[2].Name, + }} + index++ + must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, + index, []*structs.Allocation{alloc})) + + index++ + err = store.DeleteHostVolumes(index, vol2.Namespace, []string{vols[1].ID, vols[2].ID}) + must.EqError(t, err, fmt.Sprintf( + "could not delete volume %s in use by alloc %s", vols[2].ID, alloc.ID)) + vol, err = store.HostVolumeByID(nil, vols[1].Namespace, vols[1].ID, true) + must.NoError(t, err) + must.NotNil(t, vol, must.Sprint("volume that didn't error should not be deleted")) + + err = store.DeleteHostVolumes(index, vol2.Namespace, []string{vols[1].ID}) + must.NoError(t, err) + vol, err = store.HostVolumeByID(nil, vols[1].Namespace, vols[1].ID, true) + must.NoError(t, err) + must.Nil(t, vol) + + vol, err = store.HostVolumeByID(nil, vols[2].Namespace, vols[2].ID, true) + must.NoError(t, err) + must.NotNil(t, vol) + must.Len(t, 1, vol.Allocations) + + iter, err = store.HostVolumes(nil, SortReverse) + must.NoError(t, err) + got = consumeIter(iter) + must.MapLen(t, 3, got, must.Sprint(`expected 3 volumes remain`)) +} diff --git a/nomad/state/state_store_restore.go b/nomad/state/state_store_restore.go index 2072ca727d7..0a1638422f2 100644 --- a/nomad/state/state_store_restore.go +++ b/nomad/state/state_store_restore.go @@ -291,3 +291,11 @@ func (r *StateRestore) JobSubmissionRestore(jobSubmission *structs.JobSubmission } return nil } + +// HostVolumeRestore restores a single host volume into the host_volumes table +func (r *StateRestore) HostVolumeRestore(vol *structs.HostVolume) error { + if err := r.txn.Insert(TableHostVolumes, vol); err != nil { + return fmt.Errorf("host volume insert failed: %w", err) + } + return nil +} diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go new file mode 100644 index 00000000000..ade40647608 --- /dev/null +++ b/nomad/structs/host_volumes.go @@ -0,0 +1,197 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package structs + +import ( + "maps" + + "github.com/hashicorp/nomad/helper" +) + +type HostVolume struct { + // Namespace is the Nomad namespace for the host volume, which constrains + // which jobs can mount it. + Namespace string + + // ID is a UUID-like string generated by the server. + ID string + + // Name is the name that group.volume will use to identify the volume + // source. Not expected to be unique. + Name string + + // PluginID is the name of the host volume plugin on the client that will be + // used for creating the volume. If omitted, the client will use its default + // built-in plugin. + PluginID string + + // NodePool is the node pool of the node where the volume is placed. If the + // user doesn't provide a node ID, a node will be selected using the + // NodePool and Constraints. If the user provides both NodePool and NodeID, + // NodePool will be used to validate the request. If omitted, the server + // will populate this value in before writing the volume to Raft. + NodePool string + + // NodeID is the node where the volume is placed. If the user doesn't + // provide a NodeID, one will be selected using the NodePool and + // Constraints. If omitted, this field will then be populated by the server + // before writing the volume to Raft. + NodeID string + + // Constraints are optional. If the NodeID is not provided, the NodePool and + // Constraints are used to select a node. If the NodeID is provided, + // Constraints are used to validate that the node meets those constraints at + // the time of volume creation. + Constraints []*Constraint + + // Because storage may allow only specific intervals of size, we accept a + // min and max and return the actual capacity when the volume is created or + // updated on the client + RequestedCapacityMin int64 // bytes + RequestedCapacityMax int64 // bytes + Capacity int64 // bytes + + // RequestedCapabilities defines the options available to group.volume + // blocks. The scheduler checks against the listed capability blocks and + // selects a node for placement if *any* capability block works. + RequestedCapabilities []*HostVolumeCapability + + // Parameters are an opaque map of parameters for the host volume plugin. + Parameters map[string]string + + // HostPath is the path on disk where the volume's mount point was + // created. We record this to make debugging easier. + HostPath string + + // State represents the overall state of the volume. One of pending, ready, + // deleted. + State HostVolumeState + + CreateIndex uint64 + CreateTime int64 // Unix timestamp in nanoseconds since epoch + + ModifyIndex uint64 + ModifyTime int64 // Unix timestamp in nanoseconds since epoch + + // Allocations is the list of non-client-terminal allocations with claims on + // this host volume. They are denormalized on read and this field will be + // never written to Raft + Allocations []*AllocListStub +} + +type HostVolumeState string + +const ( + HostVolumeStateUnknown HostVolumeState = "" // never write this to Raft + HostVolumeStatePending HostVolumeState = "pending" + HostVolumeStateReady HostVolumeState = "ready" + HostVolumeStateDeleted HostVolumeState = "deleted" +) + +func (hv *HostVolume) Copy() *HostVolume { + if hv == nil { + return nil + } + + nhv := *hv + nhv.Constraints = helper.CopySlice(hv.Constraints) + nhv.RequestedCapabilities = helper.CopySlice(hv.RequestedCapabilities) + nhv.Parameters = maps.Clone(hv.Parameters) + return &nhv +} + +func (hv *HostVolume) Stub() *HostVolumeStub { + if hv == nil { + return nil + } + + return &HostVolumeStub{ + Namespace: hv.Namespace, + ID: hv.ID, + Name: hv.Name, + PluginID: hv.PluginID, + NodePool: hv.NodePool, + NodeID: hv.NodeID, + Capacity: hv.Capacity, + State: hv.State, + CreateIndex: hv.CreateIndex, + CreateTime: hv.CreateTime, + ModifyIndex: hv.ModifyIndex, + ModifyTime: hv.ModifyTime, + } +} + +func (hv *HostVolume) Validate(existing *HostVolume) error { + // TODO(1.10.0): validate a host volume is validate or that changes to a + // host volume are valid + return nil +} + +// GetNamespace implements the paginator.NamespaceGetter interface +func (hv *HostVolume) GetNamespace() string { + return hv.Namespace +} + +// GetID implements the paginator.IDGetter interface +func (hv *HostVolume) GetID() string { + return hv.ID +} + +// HostVolumeCapability is the requested attachment and access mode for a volume +type HostVolumeCapability struct { + AttachmentMode HostVolumeAttachmentMode + AccessMode HostVolumeAccessMode +} + +func (hvc *HostVolumeCapability) Copy() *HostVolumeCapability { + if hvc == nil { + return nil + } + + nhvc := *hvc + return &nhvc +} + +// HostVolumeAttachmentMode chooses the type of storage API that will be used to +// interact with the device. +type HostVolumeAttachmentMode string + +const ( + HostVolumeAttachmentModeUnknown HostVolumeAttachmentMode = "" + HostVolumeAttachmentModeBlockDevice HostVolumeAttachmentMode = "block-device" + HostVolumeAttachmentModeFilesystem HostVolumeAttachmentMode = "file-system" +) + +// HostVolumeAccessMode indicates how Nomad should make the volume available to +// concurrent allocations. +type HostVolumeAccessMode string + +const ( + HostVolumeAccessModeUnknown HostVolumeAccessMode = "" + + HostVolumeAccessModeSingleNodeReader HostVolumeAccessMode = "single-node-reader-only" + HostVolumeAccessModeSingleNodeWriter HostVolumeAccessMode = "single-node-writer" + + HostVolumeAccessModeMultiNodeReader HostVolumeAccessMode = "multi-node-reader-only" + HostVolumeAccessModeMultiNodeSingleWriter HostVolumeAccessMode = "multi-node-single-writer" + HostVolumeAccessModeMultiNodeMultiWriter HostVolumeAccessMode = "multi-node-multi-writer" +) + +// HostVolumeStub is used for responses for the list volumes endpoint +type HostVolumeStub struct { + Namespace string + ID string + Name string + PluginID string + NodePool string + NodeID string + Capacity int64 // bytes + State HostVolumeState + + CreateIndex uint64 + CreateTime int64 + + ModifyIndex uint64 + ModifyTime int64 +} diff --git a/nomad/structs/host_volumes_test.go b/nomad/structs/host_volumes_test.go new file mode 100644 index 00000000000..a07af92da38 --- /dev/null +++ b/nomad/structs/host_volumes_test.go @@ -0,0 +1,55 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package structs + +import ( + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/shoenig/test/must" +) + +func TestHostVolume_Copy(t *testing.T) { + ci.Parallel(t) + + out := (*HostVolume)(nil).Copy() + must.Nil(t, out) + + vol := &HostVolume{ + Namespace: DefaultNamespace, + ID: uuid.Generate(), + Name: "example", + PluginID: "example-plugin", + NodePool: NodePoolDefault, + NodeID: uuid.Generate(), + Constraints: []*Constraint{{ + LTarget: "${meta.rack}", + RTarget: "r1", + Operand: "=", + }}, + Capacity: 150000, + RequestedCapabilities: []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeWriter, + }}, + Parameters: map[string]string{"foo": "bar"}, + } + + out = vol.Copy() + must.Eq(t, vol, out) + + out.Allocations = []*AllocListStub{{ID: uuid.Generate()}} + out.Constraints[0].LTarget = "${meta.node_class}" + out.RequestedCapabilities = append(out.RequestedCapabilities, &HostVolumeCapability{ + AttachmentMode: HostVolumeAttachmentModeBlockDevice, + AccessMode: HostVolumeAccessModeMultiNodeReader, + }) + out.Parameters["foo"] = "baz" + + must.Nil(t, vol.Allocations) + must.Eq(t, "${meta.rack}", vol.Constraints[0].LTarget) + must.Len(t, 1, vol.RequestedCapabilities) + must.Eq(t, "bar", vol.Parameters["foo"]) +} From 6a3803c31ef18c732e88677c453175fce823ed49 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 8 Nov 2024 15:21:58 -0500 Subject: [PATCH 03/35] dynamic host volumes: RPC handlers (#24373) This changeset implements the RPC handlers for Dynamic Host Volumes, including the plumbing needed to forward requests to clients. The client-side implementation is stubbed and will be done under a separate PR. Ref: https://hashicorp.atlassian.net/browse/NET-11549 --- client/host_volume_endpoint.go | 53 ++ client/rpc.go | 3 + client/structs/host_volumes.go | 61 ++ helper/funcs.go | 12 + helper/funcs_test.go | 35 ++ helper/raftutil/msgtypes.go | 2 + nomad/client_host_volume_endpoint.go | 84 +++ nomad/fsm.go | 68 ++- nomad/host_volume_endpoint.go | 515 +++++++++++++++++ nomad/host_volume_endpoint_test.go | 566 +++++++++++++++++++ nomad/mock/host_volumes.go | 23 +- nomad/server.go | 2 + nomad/state/state_store_host_volumes_test.go | 2 +- nomad/structs/host_volumes.go | 106 +++- nomad/structs/host_volumes_test.go | 2 +- nomad/structs/structs.go | 4 + 16 files changed, 1500 insertions(+), 38 deletions(-) create mode 100644 client/host_volume_endpoint.go create mode 100644 client/structs/host_volumes.go create mode 100644 nomad/client_host_volume_endpoint.go create mode 100644 nomad/host_volume_endpoint.go create mode 100644 nomad/host_volume_endpoint_test.go diff --git a/client/host_volume_endpoint.go b/client/host_volume_endpoint.go new file mode 100644 index 00000000000..c22d22efde0 --- /dev/null +++ b/client/host_volume_endpoint.go @@ -0,0 +1,53 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package client + +import ( + "context" + "path/filepath" + "time" + + metrics "github.com/armon/go-metrics" + cstructs "github.com/hashicorp/nomad/client/structs" +) + +type HostVolume struct { + c *Client +} + +func newHostVolumesEndpoint(c *Client) *HostVolume { + v := &HostVolume{c: c} + return v +} + +var hostVolumeRequestTimeout = time.Minute + +func (v *HostVolume) requestContext() (context.Context, context.CancelFunc) { + return context.WithTimeout(context.Background(), hostVolumeRequestTimeout) +} + +func (v *HostVolume) Create(req *cstructs.ClientHostVolumeCreateRequest, resp *cstructs.ClientHostVolumeCreateResponse) error { + defer metrics.MeasureSince([]string{"client", "host_volume", "create"}, time.Now()) + _, cancelFn := v.requestContext() + defer cancelFn() + + // TODO(1.10.0): call into Client's host volume manager to create the work here + + resp.CapacityBytes = req.RequestedCapacityMinBytes + resp.HostPath = filepath.Join(v.c.config.AllocMountsDir, req.ID) + + v.c.logger.Debug("created host volume", "id", req.ID, "path", resp.HostPath) + return nil +} + +func (v *HostVolume) Delete(req *cstructs.ClientHostVolumeDeleteRequest, resp *cstructs.ClientHostVolumeDeleteResponse) error { + defer metrics.MeasureSince([]string{"client", "host_volume", "create"}, time.Now()) + _, cancelFn := v.requestContext() + defer cancelFn() + + // TODO(1.10.0): call into Client's host volume manager to delete the volume here + + v.c.logger.Debug("deleted host volume", "id", req.ID, "path", req.HostPath) + return nil +} diff --git a/client/rpc.go b/client/rpc.go index bfdf051c121..8d2525d5af1 100644 --- a/client/rpc.go +++ b/client/rpc.go @@ -28,6 +28,7 @@ type rpcEndpoints struct { Allocations *Allocations Agent *Agent NodeMeta *NodeMeta + HostVolume *HostVolume } // ClientRPC is used to make a local, client only RPC call @@ -293,6 +294,7 @@ func (c *Client) setupClientRpc(rpcs map[string]interface{}) { c.endpoints.Allocations = NewAllocationsEndpoint(c) c.endpoints.Agent = NewAgentEndpoint(c) c.endpoints.NodeMeta = newNodeMetaEndpoint(c) + c.endpoints.HostVolume = newHostVolumesEndpoint(c) c.setupClientRpcServer(c.rpcServer) } @@ -308,6 +310,7 @@ func (c *Client) setupClientRpcServer(server *rpc.Server) { server.Register(c.endpoints.Allocations) server.Register(c.endpoints.Agent) server.Register(c.endpoints.NodeMeta) + server.Register(c.endpoints.HostVolume) } // rpcConnListener is a long lived function that listens for new connections diff --git a/client/structs/host_volumes.go b/client/structs/host_volumes.go new file mode 100644 index 00000000000..ba6806051aa --- /dev/null +++ b/client/structs/host_volumes.go @@ -0,0 +1,61 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package structs + +type ClientHostVolumeCreateRequest struct { + // ID is a UUID-like string generated by the server. + ID string + + // Name is the name that group.volume will use to identify the volume + // source. Not expected to be unique. + Name string + + // PluginID is the name of the host volume plugin on the client that will be + // used for creating the volume. If omitted, the client will use its default + // built-in plugin. + PluginID string + + // NodeID is the node where the volume is placed. It's included in the + // client RPC request so that the server can route the request to the + // correct node. + NodeID string + + // Because storage may allow only specific intervals of size, we accept a + // min and max and return the actual capacity when the volume is created or + // updated on the client + RequestedCapacityMinBytes int64 + RequestedCapacityMaxBytes int64 + + // Parameters are an opaque map of parameters for the host volume plugin. + Parameters map[string]string +} + +type ClientHostVolumeCreateResponse struct { + // Path is the host path where the volume's mount point was created. We send + // this back to the server to make debugging easier. + HostPath string + + // Capacity is the size in bytes that was actually provisioned by the host + // volume plugin. + CapacityBytes int64 +} + +type ClientHostVolumeDeleteRequest struct { + // ID is a UUID-like string generated by the server. + ID string + + // NodeID is the node where the volume is placed. It's included in the + // client RPC request so that the server can route the request to the + // correct node. + NodeID string + + // Path is the host path where the volume's mount point was created. We send + // this from the server to allow verification by plugins + HostPath string + + // Parameters are an opaque map of parameters for the host volume plugin. + Parameters map[string]string +} + +type ClientHostVolumeDeleteResponse struct{} diff --git a/helper/funcs.go b/helper/funcs.go index e251328f697..4dbf1223c62 100644 --- a/helper/funcs.go +++ b/helper/funcs.go @@ -525,3 +525,15 @@ func Merge[T comparable](a, b T) T { } return a } + +// FlattenMultierror takes a multierror and unwraps it if there's only one error +// in the output, otherwise returning the multierror or nil. +func FlattenMultierror(mErr *multierror.Error) error { + if mErr == nil { + return nil + } + if mErr.Len() == 1 { + return mErr.Errors[0] + } + return mErr.ErrorOrNil() +} diff --git a/helper/funcs_test.go b/helper/funcs_test.go index 4e1947f28d4..9b4e5426558 100644 --- a/helper/funcs_test.go +++ b/helper/funcs_test.go @@ -4,12 +4,14 @@ package helper import ( + "errors" "fmt" "maps" "reflect" "sort" "testing" + multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-set/v3" "github.com/shoenig/test/must" "github.com/stretchr/testify/require" @@ -483,3 +485,36 @@ func Test_SliceSetEq(t *testing.T) { must.True(t, SliceSetEq(a, b)) }) } + +func TestFlattenMultiError(t *testing.T) { + + var mErr0 *multierror.Error + mErr0 = multierror.Append(mErr0, func() error { + return nil + }()) + err := FlattenMultierror(mErr0) + must.Nil(t, err) + + var mErr1 *multierror.Error + mErr1 = multierror.Append(mErr1, func() error { + var mErr *multierror.Error + mErr = multierror.Append(mErr, errors.New("inner1")) + return mErr + }()) + err = FlattenMultierror(mErr1) + must.Eq(t, `inner1`, err.Error()) + + var mErr2 *multierror.Error + mErr2 = multierror.Append(mErr2, func() error { + var mErr *multierror.Error + mErr = multierror.Append(mErr, errors.New("inner1")) + mErr = multierror.Append(mErr, errors.New("inner2")) + return mErr + }()) + err = FlattenMultierror(mErr2) + must.Eq(t, `2 errors occurred: + * inner1 + * inner2 + +`, err.Error()) +} diff --git a/helper/raftutil/msgtypes.go b/helper/raftutil/msgtypes.go index 615881173c9..af4d7e5f41c 100644 --- a/helper/raftutil/msgtypes.go +++ b/helper/raftutil/msgtypes.go @@ -68,4 +68,6 @@ var msgTypeNames = map[structs.MessageType]string{ structs.WrappedRootKeysUpsertRequestType: "WrappedRootKeysUpsertRequestType", structs.NamespaceUpsertRequestType: "NamespaceUpsertRequestType", structs.NamespaceDeleteRequestType: "NamespaceDeleteRequestType", + structs.HostVolumeRegisterRequestType: "HostVolumeRegisterRequestType", + structs.HostVolumeDeleteRequestType: "HostVolumeDeleteRequestType", } diff --git a/nomad/client_host_volume_endpoint.go b/nomad/client_host_volume_endpoint.go new file mode 100644 index 00000000000..5749643d255 --- /dev/null +++ b/nomad/client_host_volume_endpoint.go @@ -0,0 +1,84 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package nomad + +import ( + "fmt" + "time" + + "github.com/armon/go-metrics" + log "github.com/hashicorp/go-hclog" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/nomad/structs" +) + +// ClientHostVolume is the client RPC endpoint for host volumes +type ClientHostVolume struct { + srv *Server + ctx *RPCContext + logger log.Logger +} + +func NewClientHostVolumeEndpoint(srv *Server, ctx *RPCContext) *ClientHostVolume { + return &ClientHostVolume{srv: srv, ctx: ctx, logger: srv.logger.Named("client_host_volume")} +} + +func (c *ClientHostVolume) Create(args *cstructs.ClientHostVolumeCreateRequest, reply *cstructs.ClientHostVolumeCreateResponse) error { + defer metrics.MeasureSince([]string{"nomad", "client_host_node", "create"}, time.Now()) + return c.sendVolumeRPC( + args.NodeID, + "HostVolume.Create", + "ClientHostVolume.Create", + structs.RateMetricWrite, + args, + reply, + ) +} + +func (c *ClientHostVolume) Delete(args *cstructs.ClientHostVolumeDeleteRequest, reply *cstructs.ClientHostVolumeDeleteResponse) error { + defer metrics.MeasureSince([]string{"nomad", "client_host_volume", "delete"}, time.Now()) + return c.sendVolumeRPC( + args.NodeID, + "HostVolume.Delete", + "ClientHostVolume.Delete", + structs.RateMetricWrite, + args, + reply, + ) +} + +func (c *ClientHostVolume) sendVolumeRPC(nodeID, method, fwdMethod, op string, args any, reply any) error { + // client requests aren't RequestWithIdentity, so we use a placeholder here + // to populate the identity data for metrics + identityReq := &structs.GenericRequest{} + aclObj, err := c.srv.AuthenticateServerOnly(c.ctx, identityReq) + c.srv.MeasureRPCRate("client_host_volume", op, identityReq) + + if err != nil || !aclObj.AllowServerOp() { + return structs.ErrPermissionDenied + } + + // Make sure Node is valid and new enough to support RPC + snap, err := c.srv.State().Snapshot() + if err != nil { + return err + } + + _, err = getNodeForRpc(snap, nodeID) + if err != nil { + return err + } + + // Get the connection to the client + state, ok := c.srv.getNodeConn(nodeID) + if !ok { + return findNodeConnAndForward(c.srv, nodeID, fwdMethod, args, reply) + } + + // Make the RPC + if err := NodeRpc(state.Session, method, args, reply); err != nil { + return fmt.Errorf("%s error: %w", method, err) + } + return nil +} diff --git a/nomad/fsm.go b/nomad/fsm.go index 2ae3a2341b0..16a52e0810f 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -57,6 +57,7 @@ const ( NodePoolSnapshot SnapshotType = 28 JobSubmissionSnapshot SnapshotType = 29 RootKeySnapshot SnapshotType = 30 + HostVolumeSnapshot SnapshotType = 31 // TimeTableSnapshot // Deprecated: Nomad no longer supports TimeTable snapshots since 1.9.2 @@ -102,6 +103,7 @@ var snapshotTypeStrings = map[SnapshotType]string{ NodePoolSnapshot: "NodePool", JobSubmissionSnapshot: "JobSubmission", RootKeySnapshot: "WrappedRootKeys", + HostVolumeSnapshot: "HostVolumeSnapshot", NamespaceSnapshot: "Namespace", } @@ -381,9 +383,12 @@ func (n *nomadFSM) Apply(log *raft.Log) interface{} { return n.applyACLBindingRulesDelete(buf[1:], log.Index) case structs.WrappedRootKeysUpsertRequestType: return n.applyWrappedRootKeysUpsert(msgType, buf[1:], log.Index) - case structs.JobVersionTagRequestType: return n.applyJobVersionTag(buf[1:], log.Index) + case structs.HostVolumeRegisterRequestType: + return n.applyHostVolumeRegister(msgType, buf[1:], log.Index) + case structs.HostVolumeDeleteRequestType: + return n.applyHostVolumeDelete(msgType, buf[1:], log.Index) } // Check enterprise only message types. @@ -1936,6 +1941,17 @@ func (n *nomadFSM) restoreImpl(old io.ReadCloser, filter *FSMFilter) error { return err } + case HostVolumeSnapshot: + vol := new(structs.HostVolume) + if err := dec.Decode(vol); err != nil { + return err + } + if filter.Include(vol) { + if err := restore.HostVolumeRestore(vol); err != nil { + return err + } + } + default: // Check if this is an enterprise only object being restored restorer, ok := n.enterpriseRestorers[snapType] @@ -2404,6 +2420,36 @@ func (n *nomadFSM) applyWrappedRootKeysDelete(msgType structs.MessageType, buf [ return nil } +func (n *nomadFSM) applyHostVolumeRegister(msgType structs.MessageType, buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_host_volume_register"}, time.Now()) + + var req structs.HostVolumeRegisterRequest + if err := structs.Decode(buf, &req); err != nil { + panic(fmt.Errorf("failed to decode request: %v", err)) + } + + if err := n.state.UpsertHostVolumes(index, req.Volumes); err != nil { + n.logger.Error("UpsertHostVolumes failed", "error", err) + return err + } + return nil +} + +func (n *nomadFSM) applyHostVolumeDelete(msgType structs.MessageType, buf []byte, index uint64) interface{} { + defer metrics.MeasureSince([]string{"nomad", "fsm", "apply_host_volume_delete"}, time.Now()) + + var req structs.HostVolumeDeleteRequest + if err := structs.Decode(buf, &req); err != nil { + panic(fmt.Errorf("failed to decode request: %v", err)) + } + + if err := n.state.DeleteHostVolumes(index, req.RequestNamespace(), req.VolumeIDs); err != nil { + n.logger.Error("DeleteHostVolumes failed", "error", err) + return err + } + return nil +} + func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { defer metrics.MeasureSince([]string{"nomad", "fsm", "persist"}, time.Now()) // Register the nodes @@ -2537,6 +2583,10 @@ func (s *nomadSnapshot) Persist(sink raft.SnapshotSink) error { sink.Cancel() return err } + if err := s.persistHostVolumes(sink, encoder); err != nil { + sink.Cancel() + return err + } return nil } @@ -3274,6 +3324,22 @@ func (s *nomadSnapshot) persistJobSubmissions(sink raft.SnapshotSink, encoder *c return nil } +func (s *nomadSnapshot) persistHostVolumes(sink raft.SnapshotSink, encoder *codec.Encoder) error { + iter, err := s.snap.HostVolumes(nil, state.SortDefault) + if err != nil { + return err + } + for raw := iter.Next(); raw != nil; raw = iter.Next() { + vol := raw.(*structs.HostVolume) + + sink.Write([]byte{byte(HostVolumeSnapshot)}) + if err := encoder.Encode(vol); err != nil { + return err + } + } + return nil +} + // Release is a no-op, as we just need to GC the pointer // to the state store snapshot. There is nothing to explicitly // cleanup. diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go new file mode 100644 index 00000000000..3d7f1a720e4 --- /dev/null +++ b/nomad/host_volume_endpoint.go @@ -0,0 +1,515 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package nomad + +import ( + "fmt" + "net/http" + "strings" + "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-memdb" + multierror "github.com/hashicorp/go-multierror" + "github.com/hashicorp/nomad/acl" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/uuid" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/state/paginator" + "github.com/hashicorp/nomad/nomad/structs" +) + +// HostVolume is the server RPC endpoint for host volumes +type HostVolume struct { + srv *Server + ctx *RPCContext + logger hclog.Logger +} + +func NewHostVolumeEndpoint(srv *Server, ctx *RPCContext) *HostVolume { + return &HostVolume{srv: srv, ctx: ctx, logger: srv.logger.Named("host_volume")} +} + +func (v *HostVolume) Get(args *structs.HostVolumeGetRequest, reply *structs.HostVolumeGetResponse) error { + authErr := v.srv.Authenticate(v.ctx, args) + if done, err := v.srv.forward("HostVolume.Get", args, args, reply); done { + return err + } + v.srv.MeasureRPCRate("host_volume", structs.RateMetricRead, args) + if authErr != nil { + return structs.ErrPermissionDenied + } + defer metrics.MeasureSince([]string{"nomad", "host_volume", "get"}, time.Now()) + + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeRead) + aclObj, err := v.srv.ResolveACL(args) + if err != nil { + return err + } + if !allowVolume(aclObj, args.RequestNamespace()) { + return structs.ErrPermissionDenied + } + + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + run: func(ws memdb.WatchSet, store *state.StateStore) error { + + vol, err := store.HostVolumeByID(ws, args.Namespace, args.ID, true) + if err != nil { + return err + } + + reply.Volume = vol + if vol != nil { + reply.Index = vol.ModifyIndex + } else { + index, err := store.Index(state.TableHostVolumes) + if err != nil { + return err + } + + // Ensure we never set the index to zero, otherwise a blocking + // query cannot be used. We floor the index at one, since + // realistically the first write must have a higher index. + if index == 0 { + index = 1 + } + reply.Index = index + } + return nil + }} + return v.srv.blockingRPC(&opts) +} + +func (v *HostVolume) List(args *structs.HostVolumeListRequest, reply *structs.HostVolumeListResponse) error { + authErr := v.srv.Authenticate(v.ctx, args) + if done, err := v.srv.forward("HostVolume.List", args, args, reply); done { + return err + } + v.srv.MeasureRPCRate("host_volume", structs.RateMetricList, args) + if authErr != nil { + return structs.ErrPermissionDenied + } + defer metrics.MeasureSince([]string{"nomad", "host_volume", "list"}, time.Now()) + + aclObj, err := v.srv.ResolveACL(args) + if err != nil { + return err + } + + ns := args.RequestNamespace() + + sort := state.SortOption(args.Reverse) + opts := blockingOptions{ + queryOpts: &args.QueryOptions, + queryMeta: &reply.QueryMeta, + run: func(ws memdb.WatchSet, store *state.StateStore) error { + + var iter memdb.ResultIterator + var err error + + switch { + case args.NodeID != "": + iter, err = store.HostVolumesByNodeID(ws, args.NodeID, sort) + case args.NodePool != "": + iter, err = store.HostVolumesByNodePool(ws, args.NodePool, sort) + default: + iter, err = store.HostVolumes(ws, sort) + } + if err != nil { + return err + } + + // Generate the tokenizer to use for pagination using namespace and + // ID to ensure complete uniqueness. + tokenizer := paginator.NewStructsTokenizer(iter, + paginator.StructsTokenizerOptions{ + WithNamespace: true, + WithID: true, + }, + ) + + filters := []paginator.Filter{ + paginator.GenericFilter{ + Allow: func(raw any) (bool, error) { + vol := raw.(*structs.HostVolume) + // empty prefix doesn't filter + if !strings.HasPrefix(vol.Name, args.Prefix) && + !strings.HasPrefix(vol.ID, args.Prefix) { + return false, nil + } + if args.NodeID != "" && vol.NodeID != args.NodeID { + return false, nil + } + if args.NodePool != "" && vol.NodePool != args.NodePool { + return false, nil + } + + if ns != structs.AllNamespacesSentinel && + vol.Namespace != ns { + return false, nil + } + + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeRead) + return allowVolume(aclObj, ns), nil + }, + }, + } + + // Set up our output after we have checked the error. + var vols []*structs.HostVolumeStub + + // Build the paginator. This includes the function that is + // responsible for appending a variable to the variables + // stubs slice. + paginatorImpl, err := paginator.NewPaginator(iter, tokenizer, filters, args.QueryOptions, + func(raw any) error { + vol := raw.(*structs.HostVolume) + vols = append(vols, vol.Stub()) + return nil + }) + if err != nil { + return structs.NewErrRPCCodedf( + http.StatusBadRequest, "failed to create result paginator: %v", err) + } + + // Calling page populates our output variable stub array as well as + // returns the next token. + nextToken, err := paginatorImpl.Page() + if err != nil { + return structs.NewErrRPCCodedf( + http.StatusBadRequest, "failed to read result page: %v", err) + } + + reply.Volumes = vols + reply.NextToken = nextToken + + // Use the index table to populate the query meta as we have no way + // of tracking the max index on deletes. + return v.srv.setReplyQueryMeta(store, state.TableHostVolumes, &reply.QueryMeta) + }, + } + + return v.srv.blockingRPC(&opts) +} + +func (v *HostVolume) Create(args *structs.HostVolumeCreateRequest, reply *structs.HostVolumeCreateResponse) error { + + authErr := v.srv.Authenticate(v.ctx, args) + if done, err := v.srv.forward("HostVolume.Create", args, args, reply); done { + return err + } + v.srv.MeasureRPCRate("host_volume", structs.RateMetricWrite, args) + if authErr != nil { + return structs.ErrPermissionDenied + } + defer metrics.MeasureSince([]string{"nomad", "host_volume", "create"}, time.Now()) + + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeCreate) + aclObj, err := v.srv.ResolveACL(args) + if err != nil { + return err + } + + if len(args.Volumes) == 0 { + return fmt.Errorf("missing volume definition") + } + + for _, vol := range args.Volumes { + if vol.Namespace == "" { + vol.Namespace = args.RequestNamespace() + } + if !allowVolume(aclObj, vol.Namespace) { + return structs.ErrPermissionDenied + } + } + + // ensure we only try to create valid volumes or make valid updates to + // volumes + validVols, err := v.validateVolumeUpdates(args.Volumes) + if err != nil { + return err + } + + // Attempt to create all the validated volumes and write only successfully + // created volumes to raft. And we'll report errors for any failed volumes + // + // NOTE: creating the volume on the client via the plugin can't be made + // atomic with the registration, and creating the volume provides values we + // want to write on the Volume in raft anyways. + + // This can't reuse the validVols slice because we only want to write + // volumes we've successfully created or updated on the client to get + // updated in Raft. + raftArgs := &structs.HostVolumeRegisterRequest{ + Volumes: []*structs.HostVolume{}, + WriteRequest: args.WriteRequest, + } + + var mErr *multierror.Error + for _, vol := range validVols { + err = v.createVolume(vol) // mutates the vol + if err != nil { + mErr = multierror.Append(mErr, err) + } else { + raftArgs.Volumes = append(raftArgs.Volumes, vol) + } + } + + // if we created or updated any volumes, apply them to raft. + var index uint64 + if len(raftArgs.Volumes) > 0 { + _, index, err = v.srv.raftApply(structs.HostVolumeRegisterRequestType, raftArgs) + if err != nil { + v.logger.Error("raft apply failed", "error", err, "method", "register") + mErr = multierror.Append(mErr, err) + } + } + + reply.Volumes = raftArgs.Volumes + reply.Index = index + return helper.FlattenMultierror(mErr) +} + +func (v *HostVolume) Register(args *structs.HostVolumeRegisterRequest, reply *structs.HostVolumeRegisterResponse) error { + + authErr := v.srv.Authenticate(v.ctx, args) + if done, err := v.srv.forward("HostVolume.Register", args, args, reply); done { + return err + } + v.srv.MeasureRPCRate("host_volume", structs.RateMetricWrite, args) + if authErr != nil { + return structs.ErrPermissionDenied + } + defer metrics.MeasureSince([]string{"nomad", "host_volume", "register"}, time.Now()) + + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeRegister) + aclObj, err := v.srv.ResolveACL(args) + if err != nil { + return err + } + + if len(args.Volumes) == 0 { + return fmt.Errorf("missing volume definition") + } + + for _, vol := range args.Volumes { + if vol.Namespace == "" { + vol.Namespace = args.RequestNamespace() + } + if !allowVolume(aclObj, vol.Namespace) { + return structs.ErrPermissionDenied + } + } + + // ensure we only try to create valid volumes or make valid updates to + // volumes + validVols, err := v.validateVolumeUpdates(args.Volumes) + if err != nil { + return err + } + + raftArgs := &structs.HostVolumeRegisterRequest{ + Volumes: validVols, + WriteRequest: args.WriteRequest, + } + + var mErr *multierror.Error + var index uint64 + if len(raftArgs.Volumes) > 0 { + _, index, err = v.srv.raftApply(structs.HostVolumeRegisterRequestType, raftArgs) + if err != nil { + v.logger.Error("raft apply failed", "error", err, "method", "register") + mErr = multierror.Append(mErr, err) + } + } + + reply.Volumes = raftArgs.Volumes + reply.Index = index + return helper.FlattenMultierror(mErr) +} + +func (v *HostVolume) validateVolumeUpdates(requested []*structs.HostVolume) ([]*structs.HostVolume, error) { + + now := time.Now().UnixNano() + var vols []*structs.HostVolume + + snap, err := v.srv.State().Snapshot() + if err != nil { + return nil, err + } + + var mErr *multierror.Error + for _, vol := range requested { + vol.ModifyTime = now + + if vol.ID == "" { + vol.ID = uuid.Generate() + vol.CreateTime = now + } + + // if the volume already exists, we'll ensure we're validating the + // update + current, err := snap.HostVolumeByID(nil, vol.Namespace, vol.ID, false) + if err != nil { + mErr = multierror.Append(mErr, err) + continue + } + if err = vol.Validate(current); err != nil { + mErr = multierror.Append(mErr, err) + continue + } + + vols = append(vols, vol.Copy()) + } + + return vols, mErr.ErrorOrNil() +} + +func (v *HostVolume) createVolume(vol *structs.HostVolume) error { + + // TODO(1.10.0): proper node selection based on constraints and node + // pool. Also, should we move this into the validator step? + if vol.NodeID == "" { + var iter memdb.ResultIterator + var err error + var raw any + if vol.NodePool != "" { + iter, err = v.srv.State().NodesByNodePool(nil, vol.NodePool) + } else { + iter, err = v.srv.State().Nodes(nil) + } + if err != nil { + return err + } + raw = iter.Next() + if raw == nil { + return fmt.Errorf("no node meets constraints for volume") + } + + node := raw.(*structs.Node) + vol.NodeID = node.ID + } + + method := "ClientHostVolume.Create" + cReq := &cstructs.ClientHostVolumeCreateRequest{ + ID: vol.ID, + Name: vol.Name, + PluginID: vol.PluginID, + NodeID: vol.NodeID, + RequestedCapacityMinBytes: vol.RequestedCapacityMinBytes, + RequestedCapacityMaxBytes: vol.RequestedCapacityMaxBytes, + Parameters: vol.Parameters, + } + cResp := &cstructs.ClientHostVolumeCreateResponse{} + err := v.srv.RPC(method, cReq, cResp) + if err != nil { + return err + } + + if vol.State == structs.HostVolumeStateUnknown { + vol.State = structs.HostVolumeStatePending + } + + vol.HostPath = cResp.HostPath + vol.CapacityBytes = cResp.CapacityBytes + + return nil +} + +func (v *HostVolume) Delete(args *structs.HostVolumeDeleteRequest, reply *structs.HostVolumeDeleteResponse) error { + + authErr := v.srv.Authenticate(v.ctx, args) + if done, err := v.srv.forward("HostVolume.Delete", args, args, reply); done { + return err + } + v.srv.MeasureRPCRate("host_volume", structs.RateMetricWrite, args) + if authErr != nil { + return structs.ErrPermissionDenied + } + defer metrics.MeasureSince([]string{"nomad", "host_volume", "delete"}, time.Now()) + + // Note that all deleted volumes need to be in the same namespace + allowVolume := acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeDelete) + aclObj, err := v.srv.ResolveACL(args) + if err != nil { + return err + } + if !allowVolume(aclObj, args.RequestNamespace()) { + return structs.ErrPermissionDenied + } + + if len(args.VolumeIDs) == 0 { + return fmt.Errorf("missing volumes to delete") + } + + var deletedVols []string + var index uint64 + + snap, err := v.srv.State().Snapshot() + if err != nil { + return err + } + + var mErr *multierror.Error + ns := args.RequestNamespace() + + for _, id := range args.VolumeIDs { + vol, err := snap.HostVolumeByID(nil, ns, id, true) + if err != nil { + return fmt.Errorf("could not query host volume: %w", err) + } + if vol == nil { + return fmt.Errorf("no such volume: %s", id) + } + if len(vol.Allocations) > 0 { + allocIDs := helper.ConvertSlice(vol.Allocations, + func(a *structs.AllocListStub) string { return a.ID }) + mErr = multierror.Append(mErr, + fmt.Errorf("volume %s in use by allocations: %v", id, allocIDs)) + continue + } + + err = v.deleteVolume(vol) + if err != nil { + mErr = multierror.Append(mErr, err) + } else { + deletedVols = append(deletedVols, id) + } + } + + if len(deletedVols) > 0 { + args.VolumeIDs = deletedVols + _, index, err = v.srv.raftApply(structs.HostVolumeDeleteRequestType, args) + if err != nil { + v.logger.Error("raft apply failed", "error", err, "method", "delete") + mErr = multierror.Append(mErr, err) + } + } + + reply.VolumeIDs = deletedVols + reply.Index = index + return helper.FlattenMultierror(mErr) +} + +func (v *HostVolume) deleteVolume(vol *structs.HostVolume) error { + + method := "ClientHostVolume.Delete" + cReq := &cstructs.ClientHostVolumeDeleteRequest{ + ID: vol.ID, + NodeID: vol.NodeID, + HostPath: vol.HostPath, + Parameters: vol.Parameters, + } + cResp := &cstructs.ClientHostVolumeDeleteResponse{} + err := v.srv.RPC(method, cReq, cResp) + if err != nil { + return err + } + + return nil +} diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go new file mode 100644 index 00000000000..2aff4fc001e --- /dev/null +++ b/nomad/host_volume_endpoint_test.go @@ -0,0 +1,566 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package nomad + +import ( + "context" + "errors" + "fmt" + "sync" + "testing" + "time" + + msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc/v2" + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client" + "github.com/hashicorp/nomad/client/config" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/testutil" + "github.com/hashicorp/nomad/version" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" +) + +func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { + ci.Parallel(t) + + srv, _, cleanupSrv := TestACLServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + t.Cleanup(cleanupSrv) + testutil.WaitForLeader(t, srv.RPC) + store := srv.fsm.State() + + c1, node1 := newMockHostVolumeClient(t, srv, "prod") + c2, _ := newMockHostVolumeClient(t, srv, "default") + c2.setCreate(nil, errors.New("this node should never receive create RPC")) + c2.setDelete("this node should never receive delete RPC") + + index := uint64(1001) + + token := mock.CreatePolicyAndToken(t, store, index, "volume-manager", + `namespace "apps" { capabilities = ["host-volume-register"] } + node { policy = "read" }`).SecretID + + index++ + otherToken := mock.CreatePolicyAndToken(t, store, index, "other", + `namespace "foo" { capabilities = ["host-volume-register"] } + node { policy = "read" }`).SecretID + + index++ + powerToken := mock.CreatePolicyAndToken(t, store, index, "cluster-admin", + `namespace "*" { capabilities = ["host-volume-write"] } + node { policy = "read" }`).SecretID + + index++ + ns := "apps" + nspace := mock.Namespace() + nspace.Name = ns + must.NoError(t, store.UpsertNamespaces(index, []*structs.Namespace{nspace})) + + codec := rpcClient(t, srv) + + req := &structs.HostVolumeCreateRequest{ + Volumes: []*structs.HostVolume{}, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + AuthToken: token}, + } + + t.Run("invalid create", func(t *testing.T) { + + // TODO(1.10.0): once validation logic for updating an existing volume is in + // place, fully test it here + + req.Namespace = ns + var resp structs.HostVolumeCreateResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, "missing volume definition") + }) + + var vol1ID, vol2ID string + var expectIndex uint64 + + c1.setCreate(&cstructs.ClientHostVolumeCreateResponse{ + HostPath: "/var/nomad/alloc_mounts/foo", + CapacityBytes: 150000, + }, nil) + + vol1 := mock.HostVolumeRequest() + vol1.Namespace = "apps" + vol1.Name = "example1" + vol1.NodePool = "prod" + vol2 := mock.HostVolumeRequest() + vol2.Namespace = "apps" + vol2.Name = "example2" + vol2.NodePool = "prod" + req.Volumes = []*structs.HostVolume{vol1, vol2} + + t.Run("invalid permissions", func(t *testing.T) { + var resp structs.HostVolumeCreateResponse + req.AuthToken = otherToken + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, "Permission denied") + }) + + t.Run("valid create", func(t *testing.T) { + var resp structs.HostVolumeCreateResponse + req.AuthToken = token + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.NoError(t, err) + must.Len(t, 2, resp.Volumes) + vol1ID = resp.Volumes[0].ID + vol2ID = resp.Volumes[1].ID + expectIndex = resp.Index + + getReq := &structs.HostVolumeGetRequest{ + ID: vol1ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns, + AuthToken: otherToken, + }, + } + var getResp structs.HostVolumeGetResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + must.EqError(t, err, "Permission denied") + + getReq.AuthToken = token + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + must.NoError(t, err) + must.NotNil(t, getResp.Volume) + }) + + t.Run("blocking Get unblocks on write", func(t *testing.T) { + vol1, err := store.HostVolumeByID(nil, ns, vol1ID, false) + must.NoError(t, err) + must.NotNil(t, vol1) + nextVol1 := vol1.Copy() + nextVol1.RequestedCapacityMaxBytes = 300000 + registerReq := &structs.HostVolumeRegisterRequest{ + Volumes: []*structs.HostVolume{nextVol1}, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token}, + } + + c1.setCreate(nil, errors.New("should not call this endpoint on register RPC")) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + t.Cleanup(cancel) + volCh := make(chan *structs.HostVolume) + errCh := make(chan error) + + getReq := &structs.HostVolumeGetRequest{ + ID: vol1ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token, + MinQueryIndex: expectIndex, + }, + } + + go func() { + codec := rpcClient(t, srv) + var getResp structs.HostVolumeGetResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + if err != nil { + errCh <- err + } + volCh <- getResp.Volume + }() + + var registerResp structs.HostVolumeRegisterResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + + select { + case <-ctx.Done(): + t.Fatal("timeout or cancelled") + case vol := <-volCh: + must.Greater(t, expectIndex, vol.ModifyIndex) + case err := <-errCh: + t.Fatalf("unexpected error: %v", err) + } + }) + + t.Run("delete blocked by allocation claims", func(t *testing.T) { + vol2, err := store.HostVolumeByID(nil, ns, vol2ID, false) + must.NoError(t, err) + must.NotNil(t, vol2) + + // claim one of the volumes with a pending allocation + alloc := mock.MinAlloc() + alloc.NodeID = node1.ID + alloc.Job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{"example": { + Name: "example", + Type: structs.VolumeTypeHost, + Source: vol2.Name, + }} + index++ + must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, + index, []*structs.Allocation{alloc})) + + delReq := &structs.HostVolumeDeleteRequest{ + VolumeIDs: []string{vol1ID, vol2ID}, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token}, + } + var delResp structs.HostVolumeDeleteResponse + + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) + must.EqError(t, err, "Permission denied") + + delReq.AuthToken = powerToken + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) + must.EqError(t, err, fmt.Sprintf("volume %s in use by allocations: [%s]", vol2ID, alloc.ID)) + + // volume not in use will be deleted even if we got an error + getReq := &structs.HostVolumeGetRequest{ + ID: vol1ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token, + }, + } + var getResp structs.HostVolumeGetResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + must.NoError(t, err) + must.Nil(t, getResp.Volume) + + // update the allocations terminal so the delete works + alloc = alloc.Copy() + alloc.ClientStatus = structs.AllocClientStatusFailed + nArgs := &structs.AllocUpdateRequest{ + Alloc: []*structs.Allocation{alloc}, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + AuthToken: node1.SecretID}, + } + err = msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", nArgs, &structs.GenericResponse{}) + + delReq.VolumeIDs = []string{vol2ID} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) + must.NoError(t, err) + + getReq.ID = vol2ID + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + must.NoError(t, err) + must.Nil(t, getResp.Volume) + }) +} + +func TestHostVolumeEndpoint_List(t *testing.T) { + + srv, rootToken, cleanupSrv := TestACLServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + t.Cleanup(cleanupSrv) + testutil.WaitForLeader(t, srv.RPC) + store := srv.fsm.State() + codec := rpcClient(t, srv) + + index := uint64(1001) + + token := mock.CreatePolicyAndToken(t, store, index, "volume-manager", + `namespace "apps" { capabilities = ["host-volume-register"] } + node { policy = "read" }`).SecretID + + index++ + otherToken := mock.CreatePolicyAndToken(t, store, index, "other", + `namespace "foo" { capabilities = ["host-volume-read"] } + node { policy = "read" }`).SecretID + + index++ + ns1 := "apps" + ns2 := "system" + nspace1, nspace2 := mock.Namespace(), mock.Namespace() + nspace1.Name = ns1 + nspace2.Name = ns2 + must.NoError(t, store.UpsertNamespaces(index, []*structs.Namespace{nspace1, nspace2})) + + nodes := []*structs.Node{ + mock.Node(), + mock.Node(), + mock.Node(), + } + nodes[2].NodePool = "prod" + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[0], state.NodeUpsertWithNodePool)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[1], state.NodeUpsertWithNodePool)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, nodes[2], state.NodeUpsertWithNodePool)) + + vol1, vol2 := mock.HostVolume(), mock.HostVolume() + vol1.NodeID = nodes[0].ID + vol1.Name = "foobar-example" + vol1.Namespace = ns1 + vol2.NodeID = nodes[1].ID + vol2.Name = "foobaz-example" + vol2.Namespace = ns1 + + vol3, vol4 := mock.HostVolume(), mock.HostVolume() + vol3.NodeID = nodes[2].ID + vol3.NodePool = "prod" + vol3.Namespace = ns2 + vol3.Name = "foobar-example" + vol4.Namespace = ns2 + vol4.NodeID = nodes[1].ID + vol4.Name = "foobaz-example" + + // we need to register these rather than upsert them so we have the correct + // indexes for unblocking later + registerReq := &structs.HostVolumeRegisterRequest{ + Volumes: []*structs.HostVolume{vol1, vol2, vol3, vol4}, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + AuthToken: rootToken.SecretID}, + } + + var registerResp structs.HostVolumeRegisterResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + + testCases := []struct { + name string + req *structs.HostVolumeListRequest + expectVolIDs []string + }{ + { + name: "wrong namespace for token", + req: &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns1, + AuthToken: otherToken, + }, + }, + expectVolIDs: []string{}, + }, + { + name: "query by namespace", + req: &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns1, + AuthToken: token, + }, + }, + expectVolIDs: []string{vol1.ID, vol2.ID}, + }, + { + name: "wildcard namespace", + req: &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: structs.AllNamespacesSentinel, + AuthToken: token, + }, + }, + expectVolIDs: []string{vol1.ID, vol2.ID, vol3.ID, vol4.ID}, + }, + { + name: "query by prefix", + req: &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns1, + AuthToken: token, + Prefix: "foobar", + }, + }, + expectVolIDs: []string{vol1.ID}, + }, + { + name: "query by node", + req: &structs.HostVolumeListRequest{ + NodeID: nodes[1].ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: structs.AllNamespacesSentinel, + AuthToken: token, + }, + }, + expectVolIDs: []string{vol2.ID, vol4.ID}, + }, + { + name: "query by node pool", + req: &structs.HostVolumeListRequest{ + NodePool: "prod", + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: structs.AllNamespacesSentinel, + AuthToken: token, + }, + }, + expectVolIDs: []string{vol3.ID}, + }, + { + name: "query by incompatible node ID and pool", + req: &structs.HostVolumeListRequest{ + NodeID: nodes[1].ID, + NodePool: "prod", + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: structs.AllNamespacesSentinel, + AuthToken: token, + }, + }, + expectVolIDs: []string{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var resp structs.HostVolumeListResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.List", tc.req, &resp) + must.NoError(t, err) + + gotIDs := helper.ConvertSlice(resp.Volumes, + func(v *structs.HostVolumeStub) string { return v.ID }) + must.SliceContainsAll(t, tc.expectVolIDs, gotIDs, + must.Sprintf("got: %v", gotIDs)) + }) + } + + t.Run("blocking query unblocks", func(t *testing.T) { + + // Get response will include the volume's Index to block on + getReq := &structs.HostVolumeGetRequest{ + ID: vol1.ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: vol1.Namespace, + AuthToken: token, + }, + } + var getResp structs.HostVolumeGetResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + + nextVol := getResp.Volume.Copy() + nextVol.RequestedCapacityMaxBytes = 300000 + registerReq.Volumes = []*structs.HostVolume{nextVol} + registerReq.Namespace = nextVol.Namespace + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + t.Cleanup(cancel) + respCh := make(chan *structs.HostVolumeListResponse) + errCh := make(chan error) + + // prepare the blocking List query + + req := &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns1, + AuthToken: token, + MinQueryIndex: getResp.Index, + }, + } + + go func() { + codec := rpcClient(t, srv) + var listResp structs.HostVolumeListResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.List", req, &listResp) + if err != nil { + errCh <- err + } + respCh <- &listResp + }() + + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + + select { + case <-ctx.Done(): + t.Fatal("timeout or cancelled") + case listResp := <-respCh: + must.Greater(t, req.MinQueryIndex, listResp.Index) + case err := <-errCh: + t.Fatalf("unexpected error: %v", err) + } + }) +} + +// mockHostVolumeClient models client RPCs that have side-effects on the +// client host +type mockHostVolumeClient struct { + lock sync.Mutex + nextCreateResponse *cstructs.ClientHostVolumeCreateResponse + nextCreateErr error + nextDeleteErr error +} + +// newMockHostVolumeClient configures a RPC-only Nomad test agent and returns a +// mockHostVolumeClient so we can send it client RPCs +func newMockHostVolumeClient(t *testing.T, srv *Server, pool string) (*mockHostVolumeClient, *structs.Node) { + t.Helper() + + mockClientEndpoint := &mockHostVolumeClient{} + + c1, cleanup := client.TestRPCOnlyClient(t, func(c *config.Config) { + c.Node.NodePool = pool + // TODO(1.10.0): we'll want to have a version gate for this feature + c.Node.Attributes["nomad.version"] = version.Version + }, srv.config.RPCAddr, map[string]any{"HostVolume": mockClientEndpoint}) + t.Cleanup(cleanup) + + must.Wait(t, wait.InitialSuccess(wait.BoolFunc(func() bool { + node, err := srv.fsm.State().NodeByID(nil, c1.NodeID()) + if err != nil { + return false + } + if node != nil && node.Status == structs.NodeStatusReady { + return true + } + return false + }), + wait.Timeout(time.Second*5), + wait.Gap(time.Millisecond), + ), must.Sprint("client did not fingerprint before timeout")) + + return mockClientEndpoint, c1.Node() +} + +func (v *mockHostVolumeClient) setCreate( + resp *cstructs.ClientHostVolumeCreateResponse, err error) { + v.lock.Lock() + defer v.lock.Unlock() + v.nextCreateResponse = resp + v.nextCreateErr = err +} + +func (v *mockHostVolumeClient) setDelete(errMsg string) { + v.lock.Lock() + defer v.lock.Unlock() + v.nextDeleteErr = errors.New(errMsg) +} + +func (v *mockHostVolumeClient) Create( + req *cstructs.ClientHostVolumeCreateRequest, + resp *cstructs.ClientHostVolumeCreateResponse) error { + v.lock.Lock() + defer v.lock.Unlock() + *resp = *v.nextCreateResponse + return v.nextCreateErr +} + +func (v *mockHostVolumeClient) Delete( + req *cstructs.ClientHostVolumeDeleteRequest, + resp *cstructs.ClientHostVolumeDeleteResponse) error { + v.lock.Lock() + defer v.lock.Unlock() + return v.nextDeleteErr +} diff --git a/nomad/mock/host_volumes.go b/nomad/mock/host_volumes.go index 748e693fe10..ddec8b7ae82 100644 --- a/nomad/mock/host_volumes.go +++ b/nomad/mock/host_volumes.go @@ -8,16 +8,12 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -func HostVolume() *structs.HostVolume { - - volID := uuid.Generate() +func HostVolumeRequest() *structs.HostVolume { vol := &structs.HostVolume{ Namespace: structs.DefaultNamespace, - ID: volID, Name: "example", PluginID: "example-plugin", NodePool: structs.NodePoolDefault, - NodeID: uuid.Generate(), Constraints: []*structs.Constraint{ { LTarget: "${meta.rack}", @@ -25,9 +21,8 @@ func HostVolume() *structs.HostVolume { Operand: "=", }, }, - RequestedCapacityMin: 100000, - RequestedCapacityMax: 200000, - Capacity: 150000, + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 200000, RequestedCapabilities: []*structs.HostVolumeCapability{ { AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, @@ -35,8 +30,18 @@ func HostVolume() *structs.HostVolume { }, }, Parameters: map[string]string{"foo": "bar"}, - HostPath: "/var/data/nomad/alloc_mounts/" + volID, State: structs.HostVolumeStatePending, } return vol + +} + +func HostVolume() *structs.HostVolume { + volID := uuid.Generate() + vol := HostVolumeRequest() + vol.ID = volID + vol.NodeID = uuid.Generate() + vol.CapacityBytes = 150000 + vol.HostPath = "/var/data/nomad/alloc_mounts/" + volID + return vol } diff --git a/nomad/server.go b/nomad/server.go index d69cb2b8fc7..58a611da886 100644 --- a/nomad/server.go +++ b/nomad/server.go @@ -1357,6 +1357,8 @@ func (s *Server) setupRpcServer(server *rpc.Server, ctx *RPCContext) { _ = server.Register(NewStatusEndpoint(s, ctx)) _ = server.Register(NewSystemEndpoint(s, ctx)) _ = server.Register(NewVariablesEndpoint(s, ctx, s.encrypter)) + _ = server.Register(NewHostVolumeEndpoint(s, ctx)) + _ = server.Register(NewClientHostVolumeEndpoint(s, ctx)) // Register non-streaming diff --git a/nomad/state/state_store_host_volumes_test.go b/nomad/state/state_store_host_volumes_test.go index 327c54a1634..11b8371152e 100644 --- a/nomad/state/state_store_host_volumes_test.go +++ b/nomad/state/state_store_host_volumes_test.go @@ -109,7 +109,7 @@ func TestStateStore_HostVolumes_CRUD(t *testing.T) { // update all the volumes, which should update the state of vol2 as well for i, vol := range vols { vol = vol.Copy() - vol.RequestedCapacityMax = 300000 + vol.RequestedCapacityMaxBytes = 300000 vols[i] = vol } index++ diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go index ade40647608..fbeca0430ee 100644 --- a/nomad/structs/host_volumes.go +++ b/nomad/structs/host_volumes.go @@ -43,14 +43,14 @@ type HostVolume struct { // Constraints are used to select a node. If the NodeID is provided, // Constraints are used to validate that the node meets those constraints at // the time of volume creation. - Constraints []*Constraint + Constraints []*Constraint `json:",omitempty"` // Because storage may allow only specific intervals of size, we accept a // min and max and return the actual capacity when the volume is created or // updated on the client - RequestedCapacityMin int64 // bytes - RequestedCapacityMax int64 // bytes - Capacity int64 // bytes + RequestedCapacityMinBytes int64 + RequestedCapacityMaxBytes int64 + CapacityBytes int64 // RequestedCapabilities defines the options available to group.volume // blocks. The scheduler checks against the listed capability blocks and @@ -58,7 +58,7 @@ type HostVolume struct { RequestedCapabilities []*HostVolumeCapability // Parameters are an opaque map of parameters for the host volume plugin. - Parameters map[string]string + Parameters map[string]string `json:",omitempty"` // HostPath is the path on disk where the volume's mount point was // created. We record this to make debugging easier. @@ -77,7 +77,7 @@ type HostVolume struct { // Allocations is the list of non-client-terminal allocations with claims on // this host volume. They are denormalized on read and this field will be // never written to Raft - Allocations []*AllocListStub + Allocations []*AllocListStub `json:",omitempty"` } type HostVolumeState string @@ -107,24 +107,27 @@ func (hv *HostVolume) Stub() *HostVolumeStub { } return &HostVolumeStub{ - Namespace: hv.Namespace, - ID: hv.ID, - Name: hv.Name, - PluginID: hv.PluginID, - NodePool: hv.NodePool, - NodeID: hv.NodeID, - Capacity: hv.Capacity, - State: hv.State, - CreateIndex: hv.CreateIndex, - CreateTime: hv.CreateTime, - ModifyIndex: hv.ModifyIndex, - ModifyTime: hv.ModifyTime, + Namespace: hv.Namespace, + ID: hv.ID, + Name: hv.Name, + PluginID: hv.PluginID, + NodePool: hv.NodePool, + NodeID: hv.NodeID, + CapacityBytes: hv.CapacityBytes, + State: hv.State, + CreateIndex: hv.CreateIndex, + CreateTime: hv.CreateTime, + ModifyIndex: hv.ModifyIndex, + ModifyTime: hv.ModifyTime, } } func (hv *HostVolume) Validate(existing *HostVolume) error { // TODO(1.10.0): validate a host volume is validate or that changes to a // host volume are valid + + // TODO(1.10.0): note that we have to handle nil existing *HostVolume + // parameter safely return nil } @@ -180,14 +183,14 @@ const ( // HostVolumeStub is used for responses for the list volumes endpoint type HostVolumeStub struct { - Namespace string - ID string - Name string - PluginID string - NodePool string - NodeID string - Capacity int64 // bytes - State HostVolumeState + Namespace string + ID string + Name string + PluginID string + NodePool string + NodeID string + CapacityBytes int64 + State HostVolumeState CreateIndex uint64 CreateTime int64 @@ -195,3 +198,54 @@ type HostVolumeStub struct { ModifyIndex uint64 ModifyTime int64 } + +type HostVolumeCreateRequest struct { + Volumes []*HostVolume + WriteRequest +} + +type HostVolumeCreateResponse struct { + Volumes []*HostVolume + WriteMeta +} + +type HostVolumeRegisterRequest struct { + Volumes []*HostVolume + WriteRequest +} + +type HostVolumeRegisterResponse struct { + Volumes []*HostVolume + WriteMeta +} + +type HostVolumeDeleteRequest struct { + VolumeIDs []string + WriteRequest +} + +type HostVolumeDeleteResponse struct { + VolumeIDs []string // volumes actually deleted + WriteMeta +} + +type HostVolumeGetRequest struct { + ID string + QueryOptions +} + +type HostVolumeGetResponse struct { + Volume *HostVolume + QueryMeta +} + +type HostVolumeListRequest struct { + NodeID string // filter + NodePool string // filter + QueryOptions +} + +type HostVolumeListResponse struct { + Volumes []*HostVolumeStub + QueryMeta +} diff --git a/nomad/structs/host_volumes_test.go b/nomad/structs/host_volumes_test.go index a07af92da38..a7c0aa9bc1e 100644 --- a/nomad/structs/host_volumes_test.go +++ b/nomad/structs/host_volumes_test.go @@ -29,7 +29,7 @@ func TestHostVolume_Copy(t *testing.T) { RTarget: "r1", Operand: "=", }}, - Capacity: 150000, + CapacityBytes: 150000, RequestedCapabilities: []*HostVolumeCapability{{ AttachmentMode: HostVolumeAttachmentModeFilesystem, AccessMode: HostVolumeAccessModeSingleNodeWriter, diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index b3f0e8e4ac2..f3d0f5dc1b0 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -132,6 +132,10 @@ const ( NamespaceUpsertRequestType MessageType = 64 NamespaceDeleteRequestType MessageType = 65 + // MessageTypes 66-74 are in Nomad Enterprise + HostVolumeRegisterRequestType MessageType = 75 + HostVolumeDeleteRequestType MessageType = 76 + // NOTE: MessageTypes are shared between CE and ENT. If you need to add a // new type, check that ENT is not already using that value. ) From a65358da7b173f45b88a4be8495296efb42e8333 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 11 Nov 2024 11:47:38 -0500 Subject: [PATCH 04/35] dynamic host volumes: HTTP API endpoint (#24380) This changeset implements the HTTP API endpoints for Dynamic Host Volumes. The `GET /v1/volumes` endpoint is shared between CSI and DHV with a query parameter for the type. In the interest of getting some working handlers available for use in development (and minimizing the size of the diff to review), this changeset doesn't do any sort of refactoring of how the existing List Volumes CSI endpoint works. That will come in a later PR, as will the corresponding `api` package updates we need to support the CLI. Ref: https://hashicorp.atlassian.net/browse/NET-11549 --- command/agent/csi_endpoint.go | 8 +- command/agent/host_volume_endpoint.go | 143 +++++++++++++++++++++ command/agent/host_volume_endpoint_test.go | 101 +++++++++++++++ command/agent/http.go | 1 + 4 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 command/agent/host_volume_endpoint.go create mode 100644 command/agent/host_volume_endpoint_test.go diff --git a/command/agent/csi_endpoint.go b/command/agent/csi_endpoint.go index 2f57aef8865..9f4b87aa0fc 100644 --- a/command/agent/csi_endpoint.go +++ b/command/agent/csi_endpoint.go @@ -27,7 +27,13 @@ func (s *HTTPServer) CSIVolumesRequest(resp http.ResponseWriter, req *http.Reque if !ok { return []*structs.CSIVolListStub{}, nil } - if qtype[0] != "csi" { + // TODO(1.10.0): move handling of GET /v1/volumes/ out so that we're not + // co-mingling the call for listing host volume here + switch qtype[0] { + case "host": + return s.HostVolumesListRequest(resp, req) + case "csi": + default: return nil, nil } diff --git a/command/agent/host_volume_endpoint.go b/command/agent/host_volume_endpoint.go new file mode 100644 index 00000000000..288d44bfc0f --- /dev/null +++ b/command/agent/host_volume_endpoint.go @@ -0,0 +1,143 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package agent + +import ( + "net/http" + "strings" + + "github.com/hashicorp/nomad/nomad/structs" +) + +func (s *HTTPServer) HostVolumesListRequest(resp http.ResponseWriter, req *http.Request) (any, error) { + args := structs.HostVolumeListRequest{} + if s.parse(resp, req, &args.Region, &args.QueryOptions) { + return nil, nil + } + + query := req.URL.Query() + args.Prefix = query.Get("prefix") + args.NodePool = query.Get("node_pool") + args.NodeID = query.Get("node_id") + + var out structs.HostVolumeListResponse + if err := s.agent.RPC("HostVolume.List", &args, &out); err != nil { + return nil, err + } + + setMeta(resp, &out.QueryMeta) + return out.Volumes, nil +} + +// HostVolumeSpecificRequest dispatches GET and PUT +func (s *HTTPServer) HostVolumeSpecificRequest(resp http.ResponseWriter, req *http.Request) (any, error) { + // Tokenize the suffix of the path to get the volume id, tolerating a + // present or missing trailing slash + reqSuffix := strings.TrimPrefix(req.URL.Path, "/v1/volume/host/") + tokens := strings.FieldsFunc(reqSuffix, func(c rune) bool { return c == '/' }) + + if len(tokens) == 0 { + return nil, CodedError(404, resourceNotFoundErr) + } + + switch req.Method { + + // PUT /v1/volume/host/create + // POST /v1/volume/host/create + // PUT /v1/volume/host/register + // POST /v1/volume/host/register + case http.MethodPut, http.MethodPost: + switch tokens[0] { + case "create", "": + return s.hostVolumeCreate(resp, req) + case "register": + return s.hostVolumeRegister(resp, req) + default: + return nil, CodedError(404, resourceNotFoundErr) + } + + // DELETE /v1/volume/host/:id + case http.MethodDelete: + return s.hostVolumeDelete(tokens[0], resp, req) + + // GET /v1/volume/host/:id + case http.MethodGet: + return s.hostVolumeGet(tokens[0], resp, req) + } + + return nil, CodedError(404, resourceNotFoundErr) +} + +func (s *HTTPServer) hostVolumeGet(id string, resp http.ResponseWriter, req *http.Request) (any, error) { + args := structs.HostVolumeGetRequest{ + ID: id, + } + if s.parse(resp, req, &args.Region, &args.QueryOptions) { + return nil, nil + } + + var out structs.HostVolumeGetResponse + if err := s.agent.RPC("HostVolume.Get", &args, &out); err != nil { + return nil, err + } + + setMeta(resp, &out.QueryMeta) + if out.Volume == nil { + return nil, CodedError(404, "volume not found") + } + + return out.Volume, nil +} + +func (s *HTTPServer) hostVolumeRegister(resp http.ResponseWriter, req *http.Request) (any, error) { + + args := structs.HostVolumeRegisterRequest{} + if err := decodeBody(req, &args); err != nil { + return err, CodedError(400, err.Error()) + } + s.parseWriteRequest(req, &args.WriteRequest) + + var out structs.HostVolumeRegisterResponse + if err := s.agent.RPC("HostVolume.Register", &args, &out); err != nil { + return nil, err + } + + setIndex(resp, out.Index) + + return &out, nil +} + +func (s *HTTPServer) hostVolumeCreate(resp http.ResponseWriter, req *http.Request) (any, error) { + + args := structs.HostVolumeCreateRequest{} + if err := decodeBody(req, &args); err != nil { + return err, CodedError(400, err.Error()) + } + s.parseWriteRequest(req, &args.WriteRequest) + + var out structs.HostVolumeCreateResponse + if err := s.agent.RPC("HostVolume.Create", &args, &out); err != nil { + return nil, err + } + + setIndex(resp, out.Index) + + return &out, nil +} + +func (s *HTTPServer) hostVolumeDelete(id string, resp http.ResponseWriter, req *http.Request) (any, error) { + // HTTP API only supports deleting a single ID because of compatibility with + // the existing HTTP routes for CSI + args := structs.HostVolumeDeleteRequest{VolumeIDs: []string{id}} + s.parseWriteRequest(req, &args.WriteRequest) + + var out structs.HostVolumeDeleteResponse + if err := s.agent.RPC("HostVolume.Delete", &args, &out); err != nil { + return nil, err + } + + setIndex(resp, out.Index) + + return nil, nil +} diff --git a/command/agent/host_volume_endpoint_test.go b/command/agent/host_volume_endpoint_test.go new file mode 100644 index 00000000000..b9144a736d9 --- /dev/null +++ b/command/agent/host_volume_endpoint_test.go @@ -0,0 +1,101 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package agent + +import ( + "fmt" + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func TestHostVolumeEndpoint_CRUD(t *testing.T) { + httpTest(t, nil, func(s *TestAgent) { + + // Create a volume on the test node + + vol := mock.HostVolumeRequest() + reqBody := struct { + Volumes []*structs.HostVolume + }{Volumes: []*structs.HostVolume{vol}} + buf := encodeReq(reqBody) + req, err := http.NewRequest(http.MethodPut, "/v1/volume/host/create", buf) + must.NoError(t, err) + respW := httptest.NewRecorder() + + // Make the request and verify we got a valid volume back + + obj, err := s.Server.HostVolumeSpecificRequest(respW, req) + must.NoError(t, err) + must.NotNil(t, obj) + resp := obj.(*structs.HostVolumeCreateResponse) + must.Len(t, 1, resp.Volumes) + must.Eq(t, vol.Name, resp.Volumes[0].Name) + must.Eq(t, s.client.NodeID(), resp.Volumes[0].NodeID) + must.NotEq(t, "", respW.Result().Header.Get("X-Nomad-Index")) + + volID := resp.Volumes[0].ID + + // Verify volume was created + + path, err := url.JoinPath("/v1/volume/host/", volID) + must.NoError(t, err) + req, err = http.NewRequest(http.MethodGet, path, nil) + must.NoError(t, err) + obj, err = s.Server.HostVolumeSpecificRequest(respW, req) + must.NoError(t, err) + must.NotNil(t, obj) + respVol := obj.(*structs.HostVolume) + must.Eq(t, s.client.NodeID(), respVol.NodeID) + + // Update the volume (note: this doesn't update the volume on the client) + + vol = respVol.Copy() + vol.Parameters = map[string]string{"bar": "foo"} // swaps key and value + reqBody = struct { + Volumes []*structs.HostVolume + }{Volumes: []*structs.HostVolume{vol}} + buf = encodeReq(reqBody) + req, err = http.NewRequest(http.MethodPut, "/v1/volume/host/register", buf) + must.NoError(t, err) + obj, err = s.Server.HostVolumeSpecificRequest(respW, req) + must.NoError(t, err) + must.NotNil(t, obj) + regResp := obj.(*structs.HostVolumeRegisterResponse) + must.Len(t, 1, regResp.Volumes) + must.Eq(t, map[string]string{"bar": "foo"}, regResp.Volumes[0].Parameters) + + // Verify volume was updated + + path = fmt.Sprintf("/v1/volumes?type=host&node_id=%s", s.client.NodeID()) + req, err = http.NewRequest(http.MethodGet, path, nil) + must.NoError(t, err) + obj, err = s.Server.HostVolumesListRequest(respW, req) + must.NoError(t, err) + vols := obj.([]*structs.HostVolumeStub) + must.Len(t, 1, vols) + + // Delete the volume + + req, err = http.NewRequest(http.MethodDelete, fmt.Sprintf("/v1/volume/host/%s", volID), nil) + must.NoError(t, err) + _, err = s.Server.HostVolumeSpecificRequest(respW, req) + must.NoError(t, err) + + // Verify volume was deleted + + path, err = url.JoinPath("/v1/volume/host/", volID) + must.NoError(t, err) + req, err = http.NewRequest(http.MethodGet, path, nil) + must.NoError(t, err) + obj, err = s.Server.HostVolumeSpecificRequest(respW, req) + must.EqError(t, err, "volume not found") + must.Nil(t, obj) + }) +} diff --git a/command/agent/http.go b/command/agent/http.go index 3f4db49d65c..cb1b9359a5e 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -410,6 +410,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) { s.mux.HandleFunc("/v1/volume/csi/", s.wrap(s.CSIVolumeSpecificRequest)) s.mux.HandleFunc("/v1/plugins", s.wrap(s.CSIPluginsRequest)) s.mux.HandleFunc("/v1/plugin/csi/", s.wrap(s.CSIPluginSpecificRequest)) + s.mux.HandleFunc("/v1/volume/host/", s.wrap(s.HostVolumeSpecificRequest)) s.mux.HandleFunc("/v1/acl/policies", s.wrap(s.ACLPoliciesRequest)) s.mux.HandleFunc("/v1/acl/policy/", s.wrap(s.ACLPolicySpecificRequest)) From 7c85176059bcdd36e42536dc7da43121456da6c8 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 11 Nov 2024 15:51:03 -0500 Subject: [PATCH 05/35] dynamic host volumes: basic CLI CRUD operations (#24382) This changeset implements a first pass at the CLI for Dynamic Host Volumes. Ref: https://hashicorp.atlassian.net/browse/NET-11549 --- api/host_volumes.go | 236 ++++++++++++++++++ command/volume_create.go | 10 +- command/volume_create_host.go | 228 +++++++++++++++++ command/volume_create_host_test.go | 227 +++++++++++++++++ command/volume_delete.go | 31 ++- command/volume_delete_host_test.go | 75 ++++++ command/volume_register.go | 14 +- ...er_test.go => volume_register_csi_test.go} | 0 command/volume_register_host.go | 35 +++ command/volume_register_host_test.go | 93 +++++++ command/volume_status.go | 33 ++- command/volume_status_csi.go | 8 +- ...atus_test.go => volume_status_csi_test.go} | 0 command/volume_status_host.go | 180 +++++++++++++ command/volume_status_host_test.go | 150 +++++++++++ 15 files changed, 1295 insertions(+), 25 deletions(-) create mode 100644 api/host_volumes.go create mode 100644 command/volume_create_host.go create mode 100644 command/volume_create_host_test.go create mode 100644 command/volume_delete_host_test.go rename command/{volume_register_test.go => volume_register_csi_test.go} (100%) create mode 100644 command/volume_register_host.go create mode 100644 command/volume_register_host_test.go rename command/{volume_status_test.go => volume_status_csi_test.go} (100%) create mode 100644 command/volume_status_host.go create mode 100644 command/volume_status_host_test.go diff --git a/api/host_volumes.go b/api/host_volumes.go new file mode 100644 index 00000000000..dae11afc68a --- /dev/null +++ b/api/host_volumes.go @@ -0,0 +1,236 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package api + +import "net/url" + +// HostVolume represents a Dynamic Host Volume: a volume associated with a +// specific Nomad client agent but created via API. +type HostVolume struct { + // Namespace is the Nomad namespace for the host volume, which constrains + // which jobs can mount it. + Namespace string `mapstructure:"namespace" hcl:"namespace"` + + // ID is a UUID-like string generated by the server. + ID string `mapstructure:"id" hcl:"id"` + + // Name is the name that group.volume will use to identify the volume + // source. Not expected to be unique. + Name string `mapstructure:"name" hcl:"name"` + + // PluginID is the name of the host volume plugin on the client that will be + // used for creating the volume. If omitted, the client will use its default + // built-in plugin. + PluginID string `mapstructure:"plugin_id" hcl:"plugin_id"` + + // NodePool is the node pool of the node where the volume is placed. If the + // user doesn't provide a node ID, a node will be selected using the + // NodePool and Constraints. If the user provides both NodePool and NodeID, + // NodePool will be used to validate the request. If omitted, the server + // will populate this value in before writing the volume to Raft. + NodePool string `mapstructure:"node_pool" hcl:"node_pool"` + + // NodeID is the node where the volume is placed. If the user doesn't + // provide a NodeID, one will be selected using the NodePool and + // Constraints. If omitted, this field will then be populated by the server + // before writing the volume to Raft. + NodeID string `mapstructure:"node_id" hcl:"node_id"` + + // Constraints are optional. If the NodeID is not provided, the NodePool and + // Constraints are used to select a node. If the NodeID is provided, + // Constraints are used to validate that the node meets those constraints at + // the time of volume creation. + Constraints []*Constraint `json:",omitempty" hcl:"constraint"` + + // Because storage may allow only specific intervals of size, we accept a + // min and max and return the actual capacity when the volume is created or + // updated on the client + RequestedCapacityMinBytes int64 `mapstructure:"capacity_min" hcl:"capacity_min"` + RequestedCapacityMaxBytes int64 `mapstructure:"capacity_max" hcl:"capacity_max"` + CapacityBytes int64 + + // RequestedCapabilities defines the options available to group.volume + // blocks. The scheduler checks against the listed capability blocks and + // selects a node for placement if *any* capability block works. + RequestedCapabilities []*HostVolumeCapability `hcl:"capability"` + + // Parameters are an opaque map of parameters for the host volume plugin. + Parameters map[string]string `json:",omitempty"` + + // HostPath is the path on disk where the volume's mount point was + // created. We record this to make debugging easier. + HostPath string `mapstructure:"host_path" hcl:"host_path"` + + // State represents the overall state of the volume. One of pending, ready, + // deleted. + State HostVolumeState + + CreateIndex uint64 + CreateTime int64 + + ModifyIndex uint64 + ModifyTime int64 + + // Allocations is the list of non-client-terminal allocations with claims on + // this host volume. They are denormalized on read and this field will be + // never written to Raft + Allocations []*AllocationListStub `json:",omitempty" mapstructure:"-" hcl:"-"` +} + +// HostVolume state reports the current status of the host volume +type HostVolumeState string + +const ( + HostVolumeStatePending HostVolumeState = "pending" + HostVolumeStateReady HostVolumeState = "ready" + HostVolumeStateDeleted HostVolumeState = "deleted" +) + +// HostVolumeCapability is the requested attachment and access mode for a volume +type HostVolumeCapability struct { + AttachmentMode HostVolumeAttachmentMode `mapstructure:"attachment_mode" hcl:"attachment_mode"` + AccessMode HostVolumeAccessMode `mapstructure:"access_mode" hcl:"access_mode"` +} + +// HostVolumeAttachmentMode chooses the type of storage API that will be used to +// interact with the device. +type HostVolumeAttachmentMode string + +const ( + HostVolumeAttachmentModeUnknown HostVolumeAttachmentMode = "" + HostVolumeAttachmentModeBlockDevice HostVolumeAttachmentMode = "block-device" + HostVolumeAttachmentModeFilesystem HostVolumeAttachmentMode = "file-system" +) + +// HostVolumeAccessMode indicates how Nomad should make the volume available to +// concurrent allocations. +type HostVolumeAccessMode string + +const ( + HostVolumeAccessModeUnknown HostVolumeAccessMode = "" + + HostVolumeAccessModeSingleNodeReader HostVolumeAccessMode = "single-node-reader-only" + HostVolumeAccessModeSingleNodeWriter HostVolumeAccessMode = "single-node-writer" + + HostVolumeAccessModeMultiNodeReader HostVolumeAccessMode = "multi-node-reader-only" + HostVolumeAccessModeMultiNodeSingleWriter HostVolumeAccessMode = "multi-node-single-writer" + HostVolumeAccessModeMultiNodeMultiWriter HostVolumeAccessMode = "multi-node-multi-writer" +) + +// HostVolumeStub is used for responses for the List Volumes endpoint +type HostVolumeStub struct { + Namespace string + ID string + Name string + PluginID string + NodePool string + NodeID string + CapacityBytes int64 + State HostVolumeState + + CreateIndex uint64 + CreateTime int64 + + ModifyIndex uint64 + ModifyTime int64 +} + +// HostVolumes is used to access the host volumes API. +type HostVolumes struct { + client *Client +} + +// HostVolumes returns a new handle on the host volumes API. +func (c *Client) HostVolumes() *HostVolumes { + return &HostVolumes{client: c} +} + +type HostVolumeCreateRequest struct { + Volumes []*HostVolume +} + +type HostVolumeRegisterRequest struct { + Volumes []*HostVolume +} + +type HostVolumeListRequest struct { + NodeID string + NodePool string +} + +type HostVolumeDeleteRequest struct { + VolumeIDs []string +} + +// Create forwards to client agents so host volumes can be created on those +// hosts, and registers the volumes with Nomad servers. +func (hv *HostVolumes) Create(req *HostVolumeCreateRequest, opts *WriteOptions) ([]*HostVolume, *WriteMeta, error) { + var out struct { + Volumes []*HostVolume + } + wm, err := hv.client.put("/v1/volume/host/create", req, &out, opts) + if err != nil { + return nil, wm, err + } + return out.Volumes, wm, nil +} + +// Register registers host volumes that were created out-of-band with the Nomad +// servers. +func (hv *HostVolumes) Register(req *HostVolumeRegisterRequest, opts *WriteOptions) ([]*HostVolume, *WriteMeta, error) { + var out struct { + Volumes []*HostVolume + } + wm, err := hv.client.put("/v1/volume/host/register", req, &out, opts) + if err != nil { + return nil, wm, err + } + return out.Volumes, wm, nil +} + +// Get queries for a single host volume, by ID +func (hv *HostVolumes) Get(id string, opts *QueryOptions) (*HostVolume, *QueryMeta, error) { + var out *HostVolume + path, err := url.JoinPath("/v1/volume/host/", url.PathEscape(id)) + if err != nil { + return nil, nil, err + } + qm, err := hv.client.query(path, &out, opts) + if err != nil { + return nil, qm, err + } + return out, qm, nil +} + +// List queries for a set of host volumes, by namespace, node, node pool, or +// name prefix. +func (hv *HostVolumes) List(req *HostVolumeListRequest, opts *QueryOptions) ([]*HostVolumeStub, *QueryMeta, error) { + var out []*HostVolumeStub + qv := url.Values{} + qv.Set("type", "host") + if req != nil { + if req.NodeID != "" { + qv.Set("node_id", req.NodeID) + } + if req.NodePool != "" { + qv.Set("node_pool", req.NodePool) + } + } + + qm, err := hv.client.query("/v1/volumes?"+qv.Encode(), &out, opts) + if err != nil { + return nil, qm, err + } + return out, qm, nil +} + +// Delete deletes a host volume +func (hv *HostVolumes) Delete(id string, opts *WriteOptions) (*WriteMeta, error) { + path, err := url.JoinPath("/v1/volume/host/", url.PathEscape(id)) + if err != nil { + return nil, err + } + wm, err := hv.client.delete(path, nil, nil, opts) + return wm, err +} diff --git a/command/volume_create.go b/command/volume_create.go index c7d32fbe808..258e37b1f47 100644 --- a/command/volume_create.go +++ b/command/volume_create.go @@ -25,8 +25,9 @@ Usage: nomad volume create [options] If the supplied path is "-" the volume file is read from stdin. Otherwise, it is read from the file at the supplied path. - When ACLs are enabled, this command requires a token with the - 'csi-write-volume' capability for the volume's namespace. + When ACLs are enabled, this command requires a token with the appropriate + capability in the volume's namespace: the 'csi-write-volume' capability for + CSI volumes or 'host-volume-create' for dynamic host volumes. General Options: @@ -99,8 +100,9 @@ func (c *VolumeCreateCommand) Run(args []string) int { switch strings.ToLower(volType) { case "csi": - code := c.csiCreate(client, ast) - return code + return c.csiCreate(client, ast) + case "host": + return c.hostVolumeCreate(client, ast) default: c.Ui.Error(fmt.Sprintf("Error unknown volume type: %s", volType)) return 1 diff --git a/command/volume_create_host.go b/command/volume_create_host.go new file mode 100644 index 00000000000..32205610740 --- /dev/null +++ b/command/volume_create_host.go @@ -0,0 +1,228 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + "strconv" + + "github.com/hashicorp/hcl" + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/helper" + "github.com/mitchellh/mapstructure" +) + +func (c *VolumeCreateCommand) hostVolumeCreate(client *api.Client, ast *ast.File) int { + vol, err := decodeHostVolume(ast) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error decoding the volume definition: %s", err)) + return 1 + } + + req := &api.HostVolumeCreateRequest{ + Volumes: []*api.HostVolume{vol}, + } + vols, _, err := client.HostVolumes().Create(req, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error creating volume: %s", err)) + return 1 + } + for _, vol := range vols { + // note: the command only ever returns 1 volume from the API + c.Ui.Output(fmt.Sprintf( + "Created host volume %s with ID %s", vol.Name, vol.ID)) + } + + // TODO(1.10.0): monitor so we can report when the node has fingerprinted + + return 0 +} + +func decodeHostVolume(input *ast.File) (*api.HostVolume, error) { + var err error + vol := &api.HostVolume{} + + list, ok := input.Node.(*ast.ObjectList) + if !ok { + return nil, fmt.Errorf("error parsing: root should be an object") + } + + // Decode the full thing into a map[string]interface for ease + var m map[string]any + err = hcl.DecodeObject(&m, list) + if err != nil { + return nil, err + } + + // Need to manually parse these fields/blocks + delete(m, "capability") + delete(m, "constraint") + delete(m, "capacity_max") + delete(m, "capacity_min") + delete(m, "type") + + // Decode the rest + err = mapstructure.WeakDecode(m, vol) + if err != nil { + return nil, err + } + + capacityMin, err := parseCapacityBytes(list.Filter("capacity_min")) + if err != nil { + return nil, fmt.Errorf("invalid capacity_min: %v", err) + } + vol.RequestedCapacityMinBytes = capacityMin + capacityMax, err := parseCapacityBytes(list.Filter("capacity_max")) + if err != nil { + return nil, fmt.Errorf("invalid capacity_max: %v", err) + } + vol.RequestedCapacityMaxBytes = capacityMax + + if o := list.Filter("constraint"); len(o.Items) > 0 { + if err := parseConstraints(&vol.Constraints, o); err != nil { + return nil, fmt.Errorf("invalid constraint: %v", err) + } + } + if o := list.Filter("capability"); len(o.Items) > 0 { + if err := parseHostVolumeCapabilities(&vol.RequestedCapabilities, o); err != nil { + return nil, fmt.Errorf("invalid capability: %v", err) + } + } + + return vol, nil +} + +func parseHostVolumeCapabilities(result *[]*api.HostVolumeCapability, list *ast.ObjectList) error { + for _, o := range list.Elem().Items { + valid := []string{"access_mode", "attachment_mode"} + if err := helper.CheckHCLKeys(o.Val, valid); err != nil { + return err + } + + ot, ok := o.Val.(*ast.ObjectType) + if !ok { + break + } + + var m map[string]any + if err := hcl.DecodeObject(&m, ot.List); err != nil { + return err + } + var cap *api.HostVolumeCapability + if err := mapstructure.WeakDecode(&m, &cap); err != nil { + return err + } + + *result = append(*result, cap) + } + + return nil +} + +func parseConstraints(result *[]*api.Constraint, list *ast.ObjectList) error { + for _, o := range list.Elem().Items { + valid := []string{ + "attribute", + "distinct_hosts", + "distinct_property", + "operator", + "regexp", + "set_contains", + "value", + "version", + "semver", + } + if err := helper.CheckHCLKeys(o.Val, valid); err != nil { + return err + } + + var m map[string]any + if err := hcl.DecodeObject(&m, o.Val); err != nil { + return err + } + + m["LTarget"] = m["attribute"] + m["RTarget"] = m["value"] + m["Operand"] = m["operator"] + + // If "version" is provided, set the operand + // to "version" and the value to the "RTarget" + if constraint, ok := m[api.ConstraintVersion]; ok { + m["Operand"] = api.ConstraintVersion + m["RTarget"] = constraint + } + + // If "semver" is provided, set the operand + // to "semver" and the value to the "RTarget" + if constraint, ok := m[api.ConstraintSemver]; ok { + m["Operand"] = api.ConstraintSemver + m["RTarget"] = constraint + } + + // If "regexp" is provided, set the operand + // to "regexp" and the value to the "RTarget" + if constraint, ok := m[api.ConstraintRegex]; ok { + m["Operand"] = api.ConstraintRegex + m["RTarget"] = constraint + } + + // If "set_contains" is provided, set the operand + // to "set_contains" and the value to the "RTarget" + if constraint, ok := m[api.ConstraintSetContains]; ok { + m["Operand"] = api.ConstraintSetContains + m["RTarget"] = constraint + } + + if value, ok := m[api.ConstraintDistinctHosts]; ok { + enabled, err := parseBool(value) + if err != nil { + return fmt.Errorf("distinct_hosts should be set to true or false; %v", err) + } + + // If it is not enabled, skip the constraint. + if !enabled { + continue + } + + m["Operand"] = api.ConstraintDistinctHosts + m["RTarget"] = strconv.FormatBool(enabled) + } + + if property, ok := m[api.ConstraintDistinctProperty]; ok { + m["Operand"] = api.ConstraintDistinctProperty + m["LTarget"] = property + } + + // Build the constraint + var c api.Constraint + if err := mapstructure.WeakDecode(m, &c); err != nil { + return err + } + if c.Operand == "" { + c.Operand = "=" + } + + *result = append(*result, &c) + } + + return nil +} + +// parseBool takes an interface value and tries to convert it to a boolean and +// returns an error if the type can't be converted. +func parseBool(value any) (bool, error) { + var enabled bool + var err error + switch data := value.(type) { + case string: + enabled, err = strconv.ParseBool(data) + case bool: + enabled = data + default: + err = fmt.Errorf("%v couldn't be converted to boolean value", value) + } + + return enabled, err +} diff --git a/command/volume_create_host_test.go b/command/volume_create_host_test.go new file mode 100644 index 00000000000..81e59367ff1 --- /dev/null +++ b/command/volume_create_host_test.go @@ -0,0 +1,227 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "os" + "strings" + "testing" + + "github.com/hashicorp/hcl" + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/ci" + "github.com/mitchellh/cli" + "github.com/shoenig/test/must" +) + +func TestHostVolumeCreateCommand_Run(t *testing.T) { + ci.Parallel(t) + srv, client, url := testServer(t, true, nil) + t.Cleanup(srv.Shutdown) + + waitForNodes(t, client) + + _, err := client.Namespaces().Register(&api.Namespace{Name: "prod"}, nil) + must.NoError(t, err) + + ui := cli.NewMockUi() + cmd := &VolumeCreateCommand{Meta: Meta{Ui: ui}} + + hclTestFile := ` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "plugin_id" +node_pool = "default" + +capacity_min = "10GiB" +capacity_max = "20G" + +constraint { + attribute = "${attr.kernel.name}" + value = "linux" +} + +constraint { + attribute = "${meta.rack}" + value = "foo" +} + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-reader" + attachment_mode = "block-device" +} + +parameters { + foo = "bar" +} +` + + file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") + must.NoError(t, err) + _, err = file.WriteString(hclTestFile) + must.NoError(t, err) + + args := []string{"-address", url, file.Name()} + + code := cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + + out := ui.OutputWriter.String() + must.StrContains(t, out, "Created host volume") + parts := strings.Split(out, " ") + id := strings.TrimSpace(parts[len(parts)-1]) + + // Verify volume was created + got, _, err := client.HostVolumes().Get(id, &api.QueryOptions{Namespace: "prod"}) + must.NoError(t, err) + must.NotNil(t, got) +} + +func TestHostVolume_HCLDecode(t *testing.T) { + ci.Parallel(t) + + cases := []struct { + name string + hcl string + expected *api.HostVolume + errMsg string + }{ + { + name: "full spec", + hcl: ` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "plugin_id" +node_pool = "default" + +capacity_min = "10GiB" +capacity_max = "20G" + +constraint { + attribute = "${attr.kernel.name}" + value = "linux" +} + +constraint { + attribute = "${meta.rack}" + value = "foo" +} + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-reader-only" + attachment_mode = "block-device" +} + +parameters { + foo = "bar" +} +`, + expected: &api.HostVolume{ + Namespace: "prod", + Name: "database", + PluginID: "plugin_id", + NodePool: "default", + Constraints: []*api.Constraint{{ + LTarget: "${attr.kernel.name}", + RTarget: "linux", + Operand: "=", + }, { + LTarget: "${meta.rack}", + RTarget: "foo", + Operand: "=", + }}, + RequestedCapacityMinBytes: 10737418240, + RequestedCapacityMaxBytes: 20000000000, + RequestedCapabilities: []*api.HostVolumeCapability{ + { + AttachmentMode: api.HostVolumeAttachmentModeFilesystem, + AccessMode: api.HostVolumeAccessModeSingleNodeWriter, + }, + { + AttachmentMode: api.HostVolumeAttachmentModeBlockDevice, + AccessMode: api.HostVolumeAccessModeSingleNodeReader, + }, + }, + Parameters: map[string]string{"foo": "bar"}, + }, + }, + + { + name: "mostly empty spec", + hcl: ` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "plugin_id" +node_pool = "default" +`, + expected: &api.HostVolume{ + Namespace: "prod", + Name: "database", + PluginID: "plugin_id", + NodePool: "default", + }, + }, + + { + name: "invalid capacity", + hcl: ` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "plugin_id" +node_pool = "default" + +capacity_min = "a" +`, + expected: nil, + errMsg: "invalid capacity_min: could not parse value as bytes: strconv.ParseFloat: parsing \"\": invalid syntax", + }, + + { + name: "invalid constraint", + hcl: ` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "plugin_id" +node_pool = "default" + +constraint { + distinct_hosts = "foo" +} + +`, + expected: nil, + errMsg: "invalid constraint: distinct_hosts should be set to true or false; strconv.ParseBool: parsing \"foo\": invalid syntax", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ast, err := hcl.ParseString(tc.hcl) + must.NoError(t, err) + vol, err := decodeHostVolume(ast) + if tc.errMsg == "" { + must.NoError(t, err) + } else { + must.EqError(t, err, tc.errMsg) + } + must.Eq(t, tc.expected, vol) + }) + } + +} diff --git a/command/volume_delete.go b/command/volume_delete.go index 7dc3df1e128..ab8be61104b 100644 --- a/command/volume_delete.go +++ b/command/volume_delete.go @@ -41,6 +41,9 @@ Delete Options: -secret Secrets to pass to the plugin to delete the snapshot. Accepts multiple flags in the form -secret key=value + + -type + Type of volume to delete. Must be one of "csi" or "host". Defaults to "csi". ` return strings.TrimSpace(helpText) } @@ -80,9 +83,11 @@ func (c *VolumeDeleteCommand) Name() string { return "volume delete" } func (c *VolumeDeleteCommand) Run(args []string) int { var secretsArgs flaghelper.StringFlag + var typeArg string flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.Var(&secretsArgs, "secret", "secrets for snapshot, ex. -secret key=value") + flags.StringVar(&typeArg, "type", "csi", "type of volume (csi or host)") if err := flags.Parse(args); err != nil { c.Ui.Error(fmt.Sprintf("Error parsing arguments %s", err)) @@ -105,6 +110,19 @@ func (c *VolumeDeleteCommand) Run(args []string) int { return 1 } + switch typeArg { + case "csi": + return c.deleteCSIVolume(client, volID, secretsArgs) + case "host": + return c.deleteHostVolume(client, volID) + default: + c.Ui.Error(fmt.Sprintf("No such volume type %q", typeArg)) + return 1 + } +} + +func (c *VolumeDeleteCommand) deleteCSIVolume(client *api.Client, volID string, secretsArgs flaghelper.StringFlag) int { + secrets := api.CSISecrets{} for _, kv := range secretsArgs { if key, value, found := strings.Cut(kv, "="); found { @@ -115,7 +133,7 @@ func (c *VolumeDeleteCommand) Run(args []string) int { } } - err = client.CSIVolumes().DeleteOpts(&api.CSIVolumeDeleteRequest{ + err := client.CSIVolumes().DeleteOpts(&api.CSIVolumeDeleteRequest{ ExternalVolumeID: volID, Secrets: secrets, }, nil) @@ -127,3 +145,14 @@ func (c *VolumeDeleteCommand) Run(args []string) int { c.Ui.Output(fmt.Sprintf("Successfully deleted volume %q!", volID)) return 0 } + +func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string) int { + _, err := client.HostVolumes().Delete(volID, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error deleting volume: %s", err)) + return 1 + } + + c.Ui.Output(fmt.Sprintf("Successfully deleted volume %q!", volID)) + return 0 +} diff --git a/command/volume_delete_host_test.go b/command/volume_delete_host_test.go new file mode 100644 index 00000000000..4da028d7085 --- /dev/null +++ b/command/volume_delete_host_test.go @@ -0,0 +1,75 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + "os" + "strings" + "testing" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/ci" + "github.com/mitchellh/cli" + "github.com/shoenig/test/must" +) + +func TestHostVolumeDeleteCommand(t *testing.T) { + ci.Parallel(t) + srv, client, url := testServer(t, true, nil) + t.Cleanup(srv.Shutdown) + + waitForNodes(t, client) + + _, err := client.Namespaces().Register(&api.Namespace{Name: "prod"}, nil) + must.NoError(t, err) + + nodes, _, err := client.Nodes().List(nil) + must.NoError(t, err) + must.Len(t, 1, nodes) + nodeID := nodes[0].ID + + ui := cli.NewMockUi() + + hclTestFile := fmt.Sprintf(` +namespace = "prod" +name = "example" +type = "host" +plugin_id = "plugin_id" +node_id = "%s" +node_pool = "default" +`, nodeID) + + file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") + must.NoError(t, err) + _, err = file.WriteString(hclTestFile) + must.NoError(t, err) + + args := []string{"-address", url, file.Name()} + regCmd := &VolumeRegisterCommand{Meta: Meta{Ui: ui}} + code := regCmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + + out := ui.OutputWriter.String() + must.StrContains(t, out, "Registered host volume") + parts := strings.Split(out, " ") + id := strings.TrimSpace(parts[len(parts)-1]) + + ui.OutputWriter.Reset() + + // missing the namespace + cmd := &VolumeDeleteCommand{Meta: Meta{Ui: ui}} + args = []string{"-address", url, "-type", "host", id} + code = cmd.Run(args) + must.Eq(t, 1, code) + must.StrContains(t, ui.ErrorWriter.String(), "no such volume") + ui.ErrorWriter.Reset() + + // fix the namespace + args = []string{"-address", url, "-type", "host", "-namespace", "prod", id} + code = cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + out = ui.OutputWriter.String() + must.StrContains(t, out, fmt.Sprintf("Successfully deleted volume %q!", id)) +} diff --git a/command/volume_register.go b/command/volume_register.go index 3a8815347ff..19527cf6b1f 100644 --- a/command/volume_register.go +++ b/command/volume_register.go @@ -28,8 +28,9 @@ Usage: nomad volume register [options] If the supplied path is "-" the volume file is read from stdin. Otherwise, it is read from the file at the supplied path. - When ACLs are enabled, this command requires a token with the - 'csi-write-volume' capability for the volume's namespace. + When ACLs are enabled, this command requires a token with the appropriate + capability in the volume's namespace: the 'csi-write-volume' capability for + CSI volumes or 'host-volume-register' for dynamic host volumes. General Options: @@ -103,16 +104,13 @@ func (c *VolumeRegisterCommand) Run(args []string) int { switch volType { case "csi": - code := c.csiRegister(client, ast) - if code != 0 { - return code - } + return c.csiRegister(client, ast) + case "host": + return c.hostVolumeRegister(client, ast) default: c.Ui.Error(fmt.Sprintf("Error unknown volume type: %s", volType)) return 1 } - - return 0 } // parseVolume is used to parse the quota specification from HCL diff --git a/command/volume_register_test.go b/command/volume_register_csi_test.go similarity index 100% rename from command/volume_register_test.go rename to command/volume_register_csi_test.go diff --git a/command/volume_register_host.go b/command/volume_register_host.go new file mode 100644 index 00000000000..705f2faaf26 --- /dev/null +++ b/command/volume_register_host.go @@ -0,0 +1,35 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + + "github.com/hashicorp/hcl/hcl/ast" + "github.com/hashicorp/nomad/api" +) + +func (c *VolumeRegisterCommand) hostVolumeRegister(client *api.Client, ast *ast.File) int { + vol, err := decodeHostVolume(ast) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error decoding the volume definition: %s", err)) + return 1 + } + + req := &api.HostVolumeRegisterRequest{ + Volumes: []*api.HostVolume{vol}, + } + vols, _, err := client.HostVolumes().Register(req, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error registering volume: %s", err)) + return 1 + } + for _, vol := range vols { + // note: the command only ever returns 1 volume from the API + c.Ui.Output(fmt.Sprintf( + "Registered host volume %s with ID %s", vol.Name, vol.ID)) + } + + return 0 +} diff --git a/command/volume_register_host_test.go b/command/volume_register_host_test.go new file mode 100644 index 00000000000..0ce33770197 --- /dev/null +++ b/command/volume_register_host_test.go @@ -0,0 +1,93 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + "os" + "strings" + "testing" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/ci" + "github.com/mitchellh/cli" + "github.com/shoenig/test/must" +) + +func TestHostVolumeRegisterCommand_Run(t *testing.T) { + ci.Parallel(t) + srv, client, url := testServer(t, true, nil) + t.Cleanup(srv.Shutdown) + + waitForNodes(t, client) + + _, err := client.Namespaces().Register(&api.Namespace{Name: "prod"}, nil) + must.NoError(t, err) + + nodes, _, err := client.Nodes().List(nil) + must.NoError(t, err) + must.Len(t, 1, nodes) + nodeID := nodes[0].ID + + ui := cli.NewMockUi() + cmd := &VolumeRegisterCommand{Meta: Meta{Ui: ui}} + + hclTestFile := fmt.Sprintf(` +namespace = "prod" +name = "database" +type = "host" +plugin_id = "plugin_id" +node_id = "%s" +node_pool = "default" + +capacity = 150000000 +host_path = "/var/nomad/alloc_mounts/example" +capacity_min = "10GiB" +capacity_max = "20G" + +constraint { + attribute = "${attr.kernel.name}" + value = "linux" +} + +constraint { + attribute = "${meta.rack}" + value = "foo" +} + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-reader-only" + attachment_mode = "block-device" +} + +parameters { + foo = "bar" +} +`, nodeID) + + file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") + must.NoError(t, err) + _, err = file.WriteString(hclTestFile) + must.NoError(t, err) + + args := []string{"-address", url, file.Name()} + + code := cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + + out := ui.OutputWriter.String() + must.StrContains(t, out, "Registered host volume") + parts := strings.Split(out, " ") + id := strings.TrimSpace(parts[len(parts)-1]) + + // Verify volume was registered + got, _, err := client.HostVolumes().Get(id, &api.QueryOptions{Namespace: "prod"}) + must.NoError(t, err) + must.NotNil(t, got) +} diff --git a/command/volume_status.go b/command/volume_status.go index 22fc6afc225..b6163cca6f4 100644 --- a/command/volume_status.go +++ b/command/volume_status.go @@ -52,6 +52,12 @@ Status Options: -t Format and display volumes using a Go template. + + -node-pool + Filter results by node pool, when no volume ID is provided and -type=host. + + -node + Filter results by node ID, when no volume ID is provided and -type=host. ` return strings.TrimSpace(helpText) } @@ -68,6 +74,10 @@ func (c *VolumeStatusCommand) AutocompleteFlags() complete.Flags { "-verbose": complete.PredictNothing, "-json": complete.PredictNothing, "-t": complete.PredictAnything, + + // TODO(1.10.0): wire-up predictions for nodes and node pools + "-node": complete.PredictNothing, + "-node-pool": complete.PredictNothing, }) } @@ -89,7 +99,7 @@ func (c *VolumeStatusCommand) AutocompleteArgs() complete.Predictor { func (c *VolumeStatusCommand) Name() string { return "volume status" } func (c *VolumeStatusCommand) Run(args []string) int { - var typeArg string + var typeArg, nodeID, nodePool string flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } @@ -98,6 +108,8 @@ func (c *VolumeStatusCommand) Run(args []string) int { flags.BoolVar(&c.verbose, "verbose", false, "") flags.BoolVar(&c.json, "json", false, "") flags.StringVar(&c.template, "t", "", "") + flags.StringVar(&nodeID, "node", "", "") + flags.StringVar(&nodePool, "node-pool", "", "") if err := flags.Parse(args); err != nil { c.Ui.Error(fmt.Sprintf("Error parsing arguments %s", err)) @@ -130,12 +142,17 @@ func (c *VolumeStatusCommand) Run(args []string) int { id = args[0] } - code := c.csiStatus(client, id) - if code != 0 { - return code + switch typeArg { + case "csi", "": + if nodeID != "" || nodePool != "" { + c.Ui.Error("-node and -node-pool can only be used with -type host") + return 1 + } + return c.csiStatus(client, id) + case "host": + return c.hostVolumeStatus(client, id, nodeID, nodePool) + default: + c.Ui.Error(fmt.Sprintf("No such volume type %q", typeArg)) + return 1 } - - // Extend this section with other volume implementations - - return 0 } diff --git a/command/volume_status_csi.go b/command/volume_status_csi.go index 31fdeeb2331..01644b513d8 100644 --- a/command/volume_status_csi.go +++ b/command/volume_status_csi.go @@ -23,7 +23,7 @@ func (c *VolumeStatusCommand) csiBanner() { func (c *VolumeStatusCommand) csiStatus(client *api.Client, id string) int { // Invoke list mode if no volume id if id == "" { - return c.listVolumes(client) + return c.listCSIVolumes(client) } // get a CSI volume that matches the given prefix or a list of all matches if an @@ -55,7 +55,7 @@ func (c *VolumeStatusCommand) csiStatus(client *api.Client, id string) int { return 1 } - str, err := c.formatBasic(vol) + str, err := c.formatCSIBasic(vol) if err != nil { c.Ui.Error(fmt.Sprintf("Error formatting volume: %s", err)) return 1 @@ -65,7 +65,7 @@ func (c *VolumeStatusCommand) csiStatus(client *api.Client, id string) int { return 0 } -func (c *VolumeStatusCommand) listVolumes(client *api.Client) int { +func (c *VolumeStatusCommand) listCSIVolumes(client *api.Client) int { c.csiBanner() vols, _, err := client.CSIVolumes().List(nil) @@ -182,7 +182,7 @@ func csiFormatSortedVolumes(vols []*api.CSIVolumeListStub) (string, error) { return formatList(rows), nil } -func (c *VolumeStatusCommand) formatBasic(vol *api.CSIVolume) (string, error) { +func (c *VolumeStatusCommand) formatCSIBasic(vol *api.CSIVolume) (string, error) { if c.json || len(c.template) > 0 { out, err := Format(c.json, c.template, vol) if err != nil { diff --git a/command/volume_status_test.go b/command/volume_status_csi_test.go similarity index 100% rename from command/volume_status_test.go rename to command/volume_status_csi_test.go diff --git a/command/volume_status_host.go b/command/volume_status_host.go new file mode 100644 index 00000000000..7878afc55ce --- /dev/null +++ b/command/volume_status_host.go @@ -0,0 +1,180 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + "sort" + "strings" + + humanize "github.com/dustin/go-humanize" + "github.com/hashicorp/nomad/api" +) + +func (c *VolumeStatusCommand) hostVolumeStatus(client *api.Client, id, nodeID, nodePool string) int { + if id == "" { + return c.listHostVolumes(client, nodeID, nodePool) + } + + if nodeID != "" || nodePool != "" { + c.Ui.Error("-node or -node-pool options can only be used when no ID is provided") + return 1 + } + + // get a host volume that matches the given prefix or a list of all matches + // if an exact match is not found. note we can't use the shared getByPrefix + // helper here because the List API doesn't match the required signature + + volStub, possible, err := c.getByPrefix(client, id) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error listing volumes: %s", err)) + return 1 + } + if len(possible) > 0 { + out, err := c.formatHostVolumes(possible) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error formatting: %s", err)) + return 1 + } + c.Ui.Error(fmt.Sprintf("Prefix matched multiple volumes\n\n%s", out)) + return 1 + } + + vol, _, err := client.HostVolumes().Get(volStub.ID, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error querying volume: %s", err)) + return 1 + } + + str, err := c.formatHostVolume(vol) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error formatting volume: %s", err)) + return 1 + } + c.Ui.Output(str) + return 0 +} + +func (c *VolumeStatusCommand) listHostVolumes(client *api.Client, nodeID, nodePool string) int { + vols, _, err := client.HostVolumes().List(&api.HostVolumeListRequest{ + NodeID: nodeID, + NodePool: nodePool, + }, nil) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error querying volumes: %s", err)) + return 1 + } + + str, err := c.formatHostVolumes(vols) + if err != nil { + c.Ui.Error(fmt.Sprintf("Error formatting volumes: %s", err)) + return 1 + } + c.Ui.Output(str) + + return 0 +} + +func (c *VolumeStatusCommand) getByPrefix(client *api.Client, prefix string) (*api.HostVolumeStub, []*api.HostVolumeStub, error) { + vols, _, err := client.HostVolumes().List(nil, &api.QueryOptions{ + Prefix: prefix, + Namespace: c.namespace, + }) + + if err != nil { + return nil, nil, fmt.Errorf("error querying volumes: %s", err) + } + switch len(vols) { + case 0: + return nil, nil, fmt.Errorf("no volumes with prefix or ID %q found", prefix) + case 1: + return vols[0], nil, nil + default: + // search for exact matches to account for multiple exact ID or name + // matches across namespaces + var match *api.HostVolumeStub + exactMatchesCount := 0 + for _, vol := range vols { + if vol.ID == prefix || vol.Name == prefix { + exactMatchesCount++ + match = vol + } + } + if exactMatchesCount == 1 { + return match, nil, nil + } + return nil, vols, nil + } +} + +func (c *VolumeStatusCommand) formatHostVolume(vol *api.HostVolume) (string, error) { + if c.json || len(c.template) > 0 { + out, err := Format(c.json, c.template, vol) + if err != nil { + return "", fmt.Errorf("format error: %v", err) + } + return out, nil + } + + output := []string{ + fmt.Sprintf("ID|%s", vol.ID), + fmt.Sprintf("Name|%s", vol.Name), + fmt.Sprintf("Namespace|%s", vol.Namespace), + fmt.Sprintf("Plugin ID|%s", vol.PluginID), + fmt.Sprintf("Node ID|%s", vol.NodeID), + fmt.Sprintf("Node Pool|%s", vol.NodePool), + fmt.Sprintf("Capacity|%s", humanize.IBytes(uint64(vol.CapacityBytes))), + fmt.Sprintf("State|%s", vol.State), + fmt.Sprintf("Host Path|%s", vol.HostPath), + } + + // Exit early + if c.short { + return formatKV(output), nil + } + + full := []string{formatKV(output)} + + // Format the allocs + banner := c.Colorize().Color("\n[bold]Allocations[reset]") + allocs := formatAllocListStubs(vol.Allocations, c.verbose, c.length) + full = append(full, banner) + full = append(full, allocs) + + return strings.Join(full, "\n"), nil +} + +func (c *VolumeStatusCommand) formatHostVolumes(vols []*api.HostVolumeStub) (string, error) { + // Sort the output by volume ID + sort.Slice(vols, func(i, j int) bool { return vols[i].ID < vols[j].ID }) + + if c.json || len(c.template) > 0 { + out, err := Format(c.json, c.template, vols) + if err != nil { + return "", fmt.Errorf("format error: %v", err) + } + return out, nil + } + + // Truncate the id unless full length is requested + length := shortId + if c.verbose { + length = fullId + } + + rows := make([]string, len(vols)+1) + rows[0] = "ID|Name|Namespace|Plugin ID|Node ID|Node Pool|State" + for i, v := range vols { + rows[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s|%s", + limit(v.ID, length), + v.Name, + v.Namespace, + v.PluginID, + limit(v.NodeID, length), + v.NodePool, + v.State, + ) + } + return formatList(rows), nil +} diff --git a/command/volume_status_host_test.go b/command/volume_status_host_test.go new file mode 100644 index 00000000000..608a7a64d36 --- /dev/null +++ b/command/volume_status_host_test.go @@ -0,0 +1,150 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package command + +import ( + "fmt" + "os" + "strings" + "testing" + + "github.com/hashicorp/nomad/api" + "github.com/hashicorp/nomad/ci" + "github.com/mitchellh/cli" + "github.com/shoenig/test/must" +) + +func TestHostVolumeStatusCommand_Args(t *testing.T) { + ci.Parallel(t) + ui := cli.NewMockUi() + cmd := &VolumeStatusCommand{Meta: Meta{Ui: ui}} + + code := cmd.Run([]string{ + "-type", "host", + "-node", "6063016a-9d4c-11ef-85fc-9be98efe7e76", + "-node-pool", "prod", + "6e3e80f2-9d4c-11ef-97b1-d38cf64416a4", + }) + must.One(t, code) + + out := ui.ErrorWriter.String() + must.StrContains(t, out, "-node or -node-pool options can only be used when no ID is provided") +} + +func TestHostVolumeStatusCommand_List(t *testing.T) { + ci.Parallel(t) + srv, client, url := testServer(t, true, nil) + t.Cleanup(srv.Shutdown) + + waitForNodes(t, client) + + _, err := client.Namespaces().Register(&api.Namespace{Name: "prod"}, nil) + must.NoError(t, err) + + nodes, _, err := client.Nodes().List(nil) + must.NoError(t, err) + must.Len(t, 1, nodes) + nodeID := nodes[0].ID + + ui := cli.NewMockUi() + + vols := []api.NamespacedID{ + {Namespace: "prod", ID: "database"}, + {Namespace: "prod", ID: "certs"}, + {Namespace: "default", ID: "example"}, + } + + for _, vol := range vols { + hclTestFile := fmt.Sprintf(` +namespace = "%s" +name = "%s" +type = "host" +plugin_id = "plugin_id" +node_id = "%s" +node_pool = "default" +`, vol.Namespace, vol.ID, nodeID) + + file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") + must.NoError(t, err) + _, err = file.WriteString(hclTestFile) + must.NoError(t, err) + + args := []string{"-address", url, file.Name()} + cmd := &VolumeCreateCommand{Meta: Meta{Ui: ui}} + code := cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + + out := ui.OutputWriter.String() + must.StrContains(t, out, "Created host volume") + ui.OutputWriter.Reset() + } + + cmd := &VolumeStatusCommand{Meta: Meta{Ui: ui}} + args := []string{"-address", url, "-type", "host", "-namespace", "prod"} + code := cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + out := ui.OutputWriter.String() + must.StrContains(t, out, "certs") + must.StrContains(t, out, "database") + must.StrNotContains(t, out, "example") +} + +func TestHostVolumeStatusCommand_Get(t *testing.T) { + ci.Parallel(t) + srv, client, url := testServer(t, true, nil) + t.Cleanup(srv.Shutdown) + + waitForNodes(t, client) + + _, err := client.Namespaces().Register(&api.Namespace{Name: "prod"}, nil) + must.NoError(t, err) + + nodes, _, err := client.Nodes().List(nil) + must.NoError(t, err) + must.Len(t, 1, nodes) + nodeID := nodes[0].ID + + ui := cli.NewMockUi() + + hclTestFile := fmt.Sprintf(` +namespace = "prod" +name = "example" +type = "host" +plugin_id = "plugin_id" +node_id = "%s" +node_pool = "default" +`, nodeID) + + file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") + must.NoError(t, err) + _, err = file.WriteString(hclTestFile) + must.NoError(t, err) + + args := []string{"-address", url, file.Name()} + regCmd := &VolumeRegisterCommand{Meta: Meta{Ui: ui}} + code := regCmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + + out := ui.OutputWriter.String() + must.StrContains(t, out, "Registered host volume") + parts := strings.Split(out, " ") + id := strings.TrimSpace(parts[len(parts)-1]) + + ui.OutputWriter.Reset() + + // missing the namespace + cmd := &VolumeStatusCommand{Meta: Meta{Ui: ui}} + args = []string{"-address", url, "-type", "host", id} + code = cmd.Run(args) + must.Eq(t, 1, code) + must.StrContains(t, ui.ErrorWriter.String(), + "Error listing volumes: no volumes with prefix or ID") + ui.ErrorWriter.Reset() + + args = []string{"-address", url, "-type", "host", "-namespace", "prod", id} + code = cmd.Run(args) + must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) + out = ui.OutputWriter.String() + must.StrContains(t, out, "example") +} From c3735127ae69bac2201a51810f20507e0136b14f Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 19 Nov 2024 16:50:12 -0500 Subject: [PATCH 06/35] allow FlattenMultierror to accept standard error --- helper/funcs.go | 7 ++++++- helper/funcs_test.go | 12 +++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/helper/funcs.go b/helper/funcs.go index 4dbf1223c62..5b394c09fa6 100644 --- a/helper/funcs.go +++ b/helper/funcs.go @@ -528,7 +528,12 @@ func Merge[T comparable](a, b T) T { // FlattenMultierror takes a multierror and unwraps it if there's only one error // in the output, otherwise returning the multierror or nil. -func FlattenMultierror(mErr *multierror.Error) error { +func FlattenMultierror(err error) error { + mErr, ok := err.(*multierror.Error) + if !ok { + return err + } + // note: mErr is a pointer so we still need to nil-check even after the cast if mErr == nil { return nil } diff --git a/helper/funcs_test.go b/helper/funcs_test.go index 9b4e5426558..86e6fd4c8e8 100644 --- a/helper/funcs_test.go +++ b/helper/funcs_test.go @@ -488,11 +488,21 @@ func Test_SliceSetEq(t *testing.T) { func TestFlattenMultiError(t *testing.T) { + err := FlattenMultierror(nil) + must.Nil(t, err) + + err0 := errors.New("oh no!") + err = FlattenMultierror(err0) + must.Eq(t, `oh no!`, err.Error()) + var mErr0 *multierror.Error + err = FlattenMultierror(mErr0) + must.Nil(t, err) + mErr0 = multierror.Append(mErr0, func() error { return nil }()) - err := FlattenMultierror(mErr0) + err = FlattenMultierror(mErr0) must.Nil(t, err) var mErr1 *multierror.Error From 0f92134a7f661b31b47671e81b3ce7050364f87c Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 19 Nov 2024 11:48:29 -0500 Subject: [PATCH 07/35] dynamic host volumes: fix unblocking bug in state store The `HostVolumeByID` state store method didn't add a watch channel to the watchset, which meant that it would never unblock. The tests missed this because they were racy, so move the updates for unblocking tests into a `time.After` call to ensure the queries are blocked before the update happens. --- nomad/host_volume_endpoint_test.go | 21 ++++++++++++++++----- nomad/state/state_store_host_volumes.go | 4 +++- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go index 2aff4fc001e..13a0ef6e94e 100644 --- a/nomad/host_volume_endpoint_test.go +++ b/nomad/host_volume_endpoint_test.go @@ -177,9 +177,14 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { volCh <- getResp.Volume }() - var registerResp structs.HostVolumeRegisterResponse - err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) - must.NoError(t, err) + // re-register the volume long enough later that we can be sure we won't + // win a race with the get RPC goroutine + time.AfterFunc(200*time.Millisecond, func() { + codec := rpcClient(t, srv) + var registerResp structs.HostVolumeRegisterResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + }) select { case <-ctx.Done(): @@ -480,8 +485,14 @@ func TestHostVolumeEndpoint_List(t *testing.T) { respCh <- &listResp }() - err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) - must.NoError(t, err) + // re-register the volume long enough later that we can be sure we won't + // win a race with the get RPC goroutine + time.AfterFunc(200*time.Millisecond, func() { + codec := rpcClient(t, srv) + var registerResp structs.HostVolumeRegisterResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + }) select { case <-ctx.Done(): diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go index 6805314ff97..732399ffb46 100644 --- a/nomad/state/state_store_host_volumes.go +++ b/nomad/state/state_store_host_volumes.go @@ -13,10 +13,12 @@ import ( // HostVolumeByID retrieve a specific host volume func (s *StateStore) HostVolumeByID(ws memdb.WatchSet, ns, id string, withAllocs bool) (*structs.HostVolume, error) { txn := s.db.ReadTxn() - obj, err := txn.First(TableHostVolumes, indexID, ns, id) + watchCh, obj, err := txn.FirstWatch(TableHostVolumes, indexID, ns, id) if err != nil { return nil, err } + ws.Add(watchCh) + if obj == nil { return nil, nil } From 10a5f4861fc361514e4176b29775f3a7a88df7f4 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 18 Nov 2024 15:06:50 -0500 Subject: [PATCH 08/35] dynamic host volumes: create/register RPC validation Add several validation steps in the create/register RPCs for dynamic host volumes. We first check that submitted volumes are self-consistent (ex. max capacity is more than min capacity), then that any updates we've made are valid. And we validate against state: preventing claimed volumes from being updated and preventing placement requests for nodes that don't exist. Ref: https://github.com/hashicorp/nomad/issues/15489 --- command/agent/host_volume_endpoint_test.go | 2 +- command/volume_create_host_test.go | 2 +- command/volume_delete_host_test.go | 4 + command/volume_status_host_test.go | 8 + nomad/host_volume_endpoint.go | 87 ++++++++-- nomad/host_volume_endpoint_test.go | 144 ++++++++++++++-- nomad/mock/host_volumes.go | 13 +- nomad/structs/host_volumes.go | 131 +++++++++++++- nomad/structs/host_volumes_test.go | 191 +++++++++++++++++++++ 9 files changed, 535 insertions(+), 47 deletions(-) diff --git a/command/agent/host_volume_endpoint_test.go b/command/agent/host_volume_endpoint_test.go index b9144a736d9..a328b85e42e 100644 --- a/command/agent/host_volume_endpoint_test.go +++ b/command/agent/host_volume_endpoint_test.go @@ -20,7 +20,7 @@ func TestHostVolumeEndpoint_CRUD(t *testing.T) { // Create a volume on the test node - vol := mock.HostVolumeRequest() + vol := mock.HostVolumeRequest(structs.DefaultNamespace) reqBody := struct { Volumes []*structs.HostVolume }{Volumes: []*structs.HostVolume{vol}} diff --git a/command/volume_create_host_test.go b/command/volume_create_host_test.go index 81e59367ff1..f7f1cd5a57f 100644 --- a/command/volume_create_host_test.go +++ b/command/volume_create_host_test.go @@ -54,7 +54,7 @@ capability { } capability { - access_mode = "single-node-reader" + access_mode = "single-node-reader-only" attachment_mode = "block-device" } diff --git a/command/volume_delete_host_test.go b/command/volume_delete_host_test.go index 4da028d7085..0cc24645376 100644 --- a/command/volume_delete_host_test.go +++ b/command/volume_delete_host_test.go @@ -39,6 +39,10 @@ type = "host" plugin_id = "plugin_id" node_id = "%s" node_pool = "default" +capability { + access_mode = "single-node-reader-only" + attachment_mode = "file-system" +} `, nodeID) file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") diff --git a/command/volume_status_host_test.go b/command/volume_status_host_test.go index 608a7a64d36..d4784029555 100644 --- a/command/volume_status_host_test.go +++ b/command/volume_status_host_test.go @@ -63,6 +63,10 @@ type = "host" plugin_id = "plugin_id" node_id = "%s" node_pool = "default" +capability { + access_mode = "single-node-reader-only" + attachment_mode = "file-system" +} `, vol.Namespace, vol.ID, nodeID) file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") @@ -114,6 +118,10 @@ type = "host" plugin_id = "plugin_id" node_id = "%s" node_pool = "default" +capability { + access_mode = "single-node-reader-only" + attachment_mode = "file-system" +} `, nodeID) file, err := os.CreateTemp(t.TempDir(), "volume-test-*.hcl") diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index 3d7f1a720e4..3998dabc3f2 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -16,7 +16,6 @@ import ( "github.com/hashicorp/nomad/acl" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper" - "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/state/paginator" "github.com/hashicorp/nomad/nomad/structs" @@ -232,7 +231,7 @@ func (v *HostVolume) Create(args *structs.HostVolumeCreateRequest, reply *struct // volumes validVols, err := v.validateVolumeUpdates(args.Volumes) if err != nil { - return err + return helper.FlattenMultierror(err) } // Attempt to create all the validated volumes and write only successfully @@ -310,7 +309,7 @@ func (v *HostVolume) Register(args *structs.HostVolumeRegisterRequest, reply *st // volumes validVols, err := v.validateVolumeUpdates(args.Volumes) if err != nil { - return err + return helper.FlattenMultierror(err) } raftArgs := &structs.HostVolumeRegisterRequest{ @@ -335,7 +334,7 @@ func (v *HostVolume) Register(args *structs.HostVolumeRegisterRequest, reply *st func (v *HostVolume) validateVolumeUpdates(requested []*structs.HostVolume) ([]*structs.HostVolume, error) { - now := time.Now().UnixNano() + now := time.Now() var vols []*structs.HostVolume snap, err := v.srv.State().Snapshot() @@ -345,31 +344,85 @@ func (v *HostVolume) validateVolumeUpdates(requested []*structs.HostVolume) ([]* var mErr *multierror.Error for _, vol := range requested { - vol.ModifyTime = now - - if vol.ID == "" { - vol.ID = uuid.Generate() - vol.CreateTime = now - } - // if the volume already exists, we'll ensure we're validating the - // update - current, err := snap.HostVolumeByID(nil, vol.Namespace, vol.ID, false) + // validate the volume spec + err := vol.Validate() if err != nil { - mErr = multierror.Append(mErr, err) + mErr = multierror.Append(mErr, fmt.Errorf("volume validation failed: %v", err)) continue } - if err = vol.Validate(current); err != nil { - mErr = multierror.Append(mErr, err) + + // validate any update we're making + var existing *structs.HostVolume + volID := vol.ID + if vol.ID != "" { + existing, err = snap.HostVolumeByID(nil, vol.Namespace, vol.ID, true) + if err != nil { + return nil, err // should never hit, bail out + } + if existing == nil { + mErr = multierror.Append(mErr, + fmt.Errorf("cannot update volume %q: volume does not exist", vol.ID)) + continue + } + err = vol.ValidateUpdate(existing) + if err != nil { + mErr = multierror.Append(mErr, + fmt.Errorf("validating volume %q update failed: %v", vol.ID, err)) + continue + } + } else { + // capture this for nicer error messages later + volID = vol.Name + } + + // set zero values as needed, possibly from existing + vol.CanonicalizeForUpdate(existing, now) + + // make sure any nodes or pools actually exist + err = v.validateVolumeForState(vol, snap) + if err != nil { + mErr = multierror.Append(mErr, + fmt.Errorf("validating volume %q against state failed: %v", volID, err)) continue } - vols = append(vols, vol.Copy()) + vols = append(vols, vol) } return vols, mErr.ErrorOrNil() } +// validateVolumeForState ensures that any references to node IDs or node pools are valid +func (v *HostVolume) validateVolumeForState(vol *structs.HostVolume, snap *state.StateSnapshot) error { + var poolFromExistingNode string + if vol.NodeID != "" { + node, err := snap.NodeByID(nil, vol.NodeID) + if err != nil { + return err // should never hit, bail out + } + if node == nil { + return fmt.Errorf("node %q does not exist", vol.NodeID) + } + poolFromExistingNode = node.NodePool + } + + if vol.NodePool != "" { + pool, err := snap.NodePoolByName(nil, vol.NodePool) + if err != nil { + return err // should never hit, bail out + } + if pool == nil { + return fmt.Errorf("node pool %q does not exist", vol.NodePool) + } + if poolFromExistingNode != "" && poolFromExistingNode != pool.Name { + return fmt.Errorf("node ID %q is not in pool %q", vol.NodeID, vol.NodePool) + } + } + + return nil +} + func (v *HostVolume) createVolume(vol *structs.HostVolume) error { // TODO(1.10.0): proper node selection based on constraints and node diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go index 13a0ef6e94e..098cb0f26f6 100644 --- a/nomad/host_volume_endpoint_test.go +++ b/nomad/host_volume_endpoint_test.go @@ -17,6 +17,7 @@ import ( "github.com/hashicorp/nomad/client/config" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" @@ -74,13 +75,62 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { t.Run("invalid create", func(t *testing.T) { - // TODO(1.10.0): once validation logic for updating an existing volume is in - // place, fully test it here - req.Namespace = ns var resp structs.HostVolumeCreateResponse err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) must.EqError(t, err, "missing volume definition") + + req.Volumes = []*structs.HostVolume{ + {}, // missing basic fields + { + Name: "example", + PluginID: "example_plugin", + Constraints: []*structs.Constraint{{ + RTarget: "r1", + Operand: "=", + }}, + RequestedCapacityMinBytes: 200000, + RequestedCapacityMaxBytes: 100000, + RequestedCapabilities: []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + { + AttachmentMode: "bad", + AccessMode: "invalid", + }, + }, + }, // fails other field validations + } + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + // TODO(1.10.0): nested multierrors are really ugly, we could really use + // some helper functions to make these nicer everywhere they pop up + must.EqError(t, err, `2 errors occurred: + * volume validation failed: 2 errors occurred: + * missing name + * must include at least one capability block + + + * volume validation failed: 3 errors occurred: + * capacity_max (100000) must be larger than capacity_min (200000) + * invalid attachment mode: "bad" + * invalid constraint: 1 error occurred: + * No LTarget provided but is required by constraint + + + + + +`) + + invalidNode := &structs.Node{ID: uuid.Generate(), NodePool: "does-not-exist"} + volOnInvalidNode := mock.HostVolumeRequestForNode(ns, invalidNode) + req.Volumes = []*structs.HostVolume{volOnInvalidNode} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, fmt.Sprintf( + `validating volume "example" against state failed: node %q does not exist`, + invalidNode.ID)) }) var vol1ID, vol2ID string @@ -91,12 +141,10 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { CapacityBytes: 150000, }, nil) - vol1 := mock.HostVolumeRequest() - vol1.Namespace = "apps" + vol1 := mock.HostVolumeRequest("apps") vol1.Name = "example1" vol1.NodePool = "prod" - vol2 := mock.HostVolumeRequest() - vol2.Namespace = "apps" + vol2 := mock.HostVolumeRequest("apps") vol2.Name = "example2" vol2.NodePool = "prod" req.Volumes = []*structs.HostVolume{vol1, vol2} @@ -136,6 +184,50 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { must.NotNil(t, getResp.Volume) }) + t.Run("invalid updates", func(t *testing.T) { + + vol1, err := store.HostVolumeByID(nil, ns, vol1ID, false) + must.NoError(t, err) + must.NotNil(t, vol1) + invalidVol1 := vol1.Copy() + invalidVol2 := &structs.HostVolume{} + + createReq := &structs.HostVolumeCreateRequest{ + Volumes: []*structs.HostVolume{invalidVol1, invalidVol2}, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token}, + } + c1.setCreate(nil, errors.New("should not call this endpoint on invalid RPCs")) + var createResp structs.HostVolumeCreateResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", createReq, &createResp) + must.EqError(t, err, `volume validation failed: 2 errors occurred: + * missing name + * must include at least one capability block + +`, must.Sprint("initial validation failures should exit early even if there's another valid vol")) + + invalidVol1.NodeID = uuid.Generate() + invalidVol1.RequestedCapacityMinBytes = 100 + invalidVol1.RequestedCapacityMaxBytes = 200 + registerReq := &structs.HostVolumeRegisterRequest{ + Volumes: []*structs.HostVolume{invalidVol1}, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token}, + } + var registerResp structs.HostVolumeRegisterResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.EqError(t, err, fmt.Sprintf(`validating volume %q update failed: 2 errors occurred: + * node ID cannot be updated + * capacity_max (200) cannot be less than existing provisioned capacity (150000) + +`, invalidVol1.ID), must.Sprint("update validation checks should have failed")) + + }) + t.Run("blocking Get unblocks on write", func(t *testing.T) { vol1, err := store.HostVolumeByID(nil, ns, vol1ID, false) must.NoError(t, err) @@ -308,25 +400,24 @@ func TestHostVolumeEndpoint_List(t *testing.T) { must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, nodes[2], state.NodeUpsertWithNodePool)) - vol1, vol2 := mock.HostVolume(), mock.HostVolume() - vol1.NodeID = nodes[0].ID + vol1 := mock.HostVolumeRequestForNode(ns1, nodes[0]) vol1.Name = "foobar-example" - vol1.Namespace = ns1 - vol2.NodeID = nodes[1].ID + vol1.Parameters = map[string]string{"mockID": "vol1"} + + vol2 := mock.HostVolumeRequestForNode(ns1, nodes[1]) vol2.Name = "foobaz-example" - vol2.Namespace = ns1 + vol2.Parameters = map[string]string{"mockID": "vol2"} - vol3, vol4 := mock.HostVolume(), mock.HostVolume() - vol3.NodeID = nodes[2].ID - vol3.NodePool = "prod" - vol3.Namespace = ns2 + vol3 := mock.HostVolumeRequestForNode(ns2, nodes[2]) vol3.Name = "foobar-example" - vol4.Namespace = ns2 - vol4.NodeID = nodes[1].ID + vol3.Parameters = map[string]string{"mockID": "vol3"} + + vol4 := mock.HostVolumeRequestForNode(ns2, nodes[1]) vol4.Name = "foobaz-example" + vol4.Parameters = map[string]string{"mockID": "vol4"} // we need to register these rather than upsert them so we have the correct - // indexes for unblocking later + // indexes for unblocking later. registerReq := &structs.HostVolumeRegisterRequest{ Volumes: []*structs.HostVolume{vol1, vol2, vol3, vol4}, WriteRequest: structs.WriteRequest{ @@ -338,6 +429,21 @@ func TestHostVolumeEndpoint_List(t *testing.T) { err := msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) must.NoError(t, err) + // IDs are generated by the server, so we need to read them back to figure + // out which mock got which ID + for _, vol := range registerResp.Volumes { + switch vol.Parameters["mockID"] { + case "vol1": + vol1 = vol + case "vol2": + vol2 = vol + case "vol3": + vol3 = vol + case "vol4": + vol4 = vol + } + } + testCases := []struct { name string req *structs.HostVolumeListRequest diff --git a/nomad/mock/host_volumes.go b/nomad/mock/host_volumes.go index ddec8b7ae82..7cec8e2884a 100644 --- a/nomad/mock/host_volumes.go +++ b/nomad/mock/host_volumes.go @@ -8,9 +8,9 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) -func HostVolumeRequest() *structs.HostVolume { +func HostVolumeRequest(ns string) *structs.HostVolume { vol := &structs.HostVolume{ - Namespace: structs.DefaultNamespace, + Namespace: ns, Name: "example", PluginID: "example-plugin", NodePool: structs.NodePoolDefault, @@ -36,9 +36,16 @@ func HostVolumeRequest() *structs.HostVolume { } +func HostVolumeRequestForNode(ns string, node *structs.Node) *structs.HostVolume { + vol := HostVolumeRequest(ns) + vol.NodeID = node.ID + vol.NodePool = node.NodePool + return vol +} + func HostVolume() *structs.HostVolume { volID := uuid.Generate() - vol := HostVolumeRequest() + vol := HostVolumeRequest(structs.DefaultNamespace) vol.ID = volID vol.NodeID = uuid.Generate() vol.CapacityBytes = 150000 diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go index fbeca0430ee..66a178a3423 100644 --- a/nomad/structs/host_volumes.go +++ b/nomad/structs/host_volumes.go @@ -4,9 +4,15 @@ package structs import ( + "errors" + "fmt" "maps" + "strings" + "time" + "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/uuid" ) type HostVolume struct { @@ -122,13 +128,101 @@ func (hv *HostVolume) Stub() *HostVolumeStub { } } -func (hv *HostVolume) Validate(existing *HostVolume) error { - // TODO(1.10.0): validate a host volume is validate or that changes to a - // host volume are valid +// Validate verifies that the submitted HostVolume spec has valid field values, +// without validating any changes or state (see ValidateUpdate). +func (hv *HostVolume) Validate() error { - // TODO(1.10.0): note that we have to handle nil existing *HostVolume - // parameter safely - return nil + var mErr *multierror.Error + + if hv.Name == "" { + mErr = multierror.Append(mErr, errors.New("missing name")) + } + + if hv.RequestedCapacityMaxBytes < hv.RequestedCapacityMinBytes { + mErr = multierror.Append(mErr, fmt.Errorf( + "capacity_max (%d) must be larger than capacity_min (%d)", + hv.RequestedCapacityMaxBytes, hv.RequestedCapacityMinBytes)) + } + + if len(hv.RequestedCapabilities) == 0 { + mErr = multierror.Append(mErr, errors.New("must include at least one capability block")) + } else { + for _, cap := range hv.RequestedCapabilities { + err := cap.Validate() + if err != nil { + mErr = multierror.Append(mErr, err) + } + } + } + + for _, constraint := range hv.Constraints { + if err := constraint.Validate(); err != nil { + mErr = multierror.Append(mErr, fmt.Errorf("invalid constraint: %v", err)) + } + } + + return mErr.ErrorOrNil() +} + +// ValidateUpdate verifies that an update to a volume is safe to make. +func (hv *HostVolume) ValidateUpdate(existing *HostVolume) error { + if existing == nil { + return nil + } + + var mErr *multierror.Error + if len(existing.Allocations) > 0 { + allocIDs := helper.ConvertSlice(existing.Allocations, + func(a *AllocListStub) string { return a.ID }) + mErr = multierror.Append(mErr, fmt.Errorf( + "cannot update a volume in use: claimed by allocs (%s)", + strings.Join(allocIDs, ", "))) + } + + if hv.NodeID != "" && hv.NodeID != existing.NodeID { + mErr = multierror.Append(mErr, errors.New("node ID cannot be updated")) + } + if hv.NodePool != "" && hv.NodePool != existing.NodePool { + mErr = multierror.Append(mErr, errors.New("node pool cannot be updated")) + } + + if hv.RequestedCapacityMaxBytes < existing.CapacityBytes { + mErr = multierror.Append(mErr, fmt.Errorf( + "capacity_max (%d) cannot be less than existing provisioned capacity (%d)", + hv.RequestedCapacityMaxBytes, existing.CapacityBytes)) + } + + return mErr.ErrorOrNil() +} + +const DefaultHostVolumePlugin = "default" + +// CanonicalizeForUpdate is called in the RPC handler to ensure we call client +// RPCs with correctly populated fields from the existing volume, even if the +// RPC request includes otherwise valid zero-values. This method should be +// called on request objects or a copy, never on a state store object directly. +func (hv *HostVolume) CanonicalizeForUpdate(existing *HostVolume, now time.Time) { + if existing == nil { + hv.ID = uuid.Generate() + if hv.PluginID == "" { + hv.PluginID = DefaultHostVolumePlugin + } + hv.CapacityBytes = 0 // returned by plugin + hv.HostPath = "" // returned by plugin + hv.CreateTime = now.UnixNano() + } else { + hv.PluginID = existing.PluginID + hv.NodePool = existing.NodePool + hv.NodeID = existing.NodeID + hv.Constraints = existing.Constraints + hv.CapacityBytes = existing.CapacityBytes + hv.HostPath = existing.HostPath + hv.CreateTime = existing.CreateTime + } + + hv.State = HostVolumeStatePending // reset on any change + hv.ModifyTime = now.UnixNano() + hv.Allocations = nil // set on read only } // GetNamespace implements the paginator.NamespaceGetter interface @@ -156,6 +250,31 @@ func (hvc *HostVolumeCapability) Copy() *HostVolumeCapability { return &nhvc } +func (hvc *HostVolumeCapability) Validate() error { + if hvc == nil { + return errors.New("validate called on nil host volume capability") + } + + switch hvc.AttachmentMode { + case HostVolumeAttachmentModeBlockDevice, + HostVolumeAttachmentModeFilesystem: + default: + return fmt.Errorf("invalid attachment mode: %q", hvc.AttachmentMode) + } + + switch hvc.AccessMode { + case HostVolumeAccessModeSingleNodeReader, + HostVolumeAccessModeSingleNodeWriter, + HostVolumeAccessModeMultiNodeReader, + HostVolumeAccessModeMultiNodeSingleWriter, + HostVolumeAccessModeMultiNodeMultiWriter: + default: + return fmt.Errorf("invalid access mode: %q", hvc.AccessMode) + } + + return nil +} + // HostVolumeAttachmentMode chooses the type of storage API that will be used to // interact with the device. type HostVolumeAttachmentMode string diff --git a/nomad/structs/host_volumes_test.go b/nomad/structs/host_volumes_test.go index a7c0aa9bc1e..da25ad3cbc4 100644 --- a/nomad/structs/host_volumes_test.go +++ b/nomad/structs/host_volumes_test.go @@ -5,6 +5,7 @@ package structs import ( "testing" + "time" "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/uuid" @@ -53,3 +54,193 @@ func TestHostVolume_Copy(t *testing.T) { must.Len(t, 1, vol.RequestedCapabilities) must.Eq(t, "bar", vol.Parameters["foo"]) } + +func TestHostVolume_Validate(t *testing.T) { + ci.Parallel(t) + + invalid := &HostVolume{} + err := invalid.Validate() + must.EqError(t, err, `2 errors occurred: + * missing name + * must include at least one capability block + +`) + + invalid = &HostVolume{ + Name: "example", + PluginID: "example-plugin", + Constraints: []*Constraint{{ + RTarget: "r1", + Operand: "=", + }}, + RequestedCapacityMinBytes: 200000, + RequestedCapacityMaxBytes: 100000, + RequestedCapabilities: []*HostVolumeCapability{ + { + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeWriter, + }, + { + AttachmentMode: "bad", + AccessMode: "invalid", + }, + }, + } + err = invalid.Validate() + must.EqError(t, err, `3 errors occurred: + * capacity_max (100000) must be larger than capacity_min (200000) + * invalid attachment mode: "bad" + * invalid constraint: 1 error occurred: + * No LTarget provided but is required by constraint + + + +`) + + vol := &HostVolume{ + Namespace: DefaultNamespace, + ID: uuid.Generate(), + Name: "example", + PluginID: "example-plugin", + NodePool: NodePoolDefault, + NodeID: uuid.Generate(), + Constraints: []*Constraint{{ + LTarget: "${meta.rack}", + RTarget: "r1", + Operand: "=", + }}, + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 200000, + CapacityBytes: 150000, + RequestedCapabilities: []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeWriter, + }}, + Parameters: map[string]string{"foo": "bar"}, + } + must.NoError(t, vol.Validate()) +} + +func TestHostVolume_ValidateUpdate(t *testing.T) { + ci.Parallel(t) + + vol := &HostVolume{ + NodePool: NodePoolDefault, + NodeID: uuid.Generate(), + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 120000, + Parameters: map[string]string{"baz": "qux"}, + } + err := vol.ValidateUpdate(nil) + must.NoError(t, err) + + existing := &HostVolume{ + NodePool: "prod", + NodeID: uuid.Generate(), + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 200000, + CapacityBytes: 150000, + RequestedCapabilities: []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeWriter, + }}, + Parameters: map[string]string{"foo": "bar"}, + Allocations: []*AllocListStub{ + {ID: "6bd66bfa"}, + {ID: "7032e570"}, + }, + } + + err = vol.ValidateUpdate(existing) + must.EqError(t, err, `4 errors occurred: + * cannot update a volume in use: claimed by allocs (6bd66bfa, 7032e570) + * node ID cannot be updated + * node pool cannot be updated + * capacity_max (120000) cannot be less than existing provisioned capacity (150000) + +`) + +} + +func TestHostVolume_CanonicalizeForUpdate(t *testing.T) { + now := time.Now() + vol := &HostVolume{ + CapacityBytes: 100000, + HostPath: "/etc/passwd", + Allocations: []*AllocListStub{ + {ID: "6bd66bfa"}, + {ID: "7032e570"}, + }, + } + vol.CanonicalizeForUpdate(nil, now) + + must.NotEq(t, "", vol.ID) + must.Eq(t, now.UnixNano(), vol.CreateTime) + must.Eq(t, now.UnixNano(), vol.ModifyTime) + must.Eq(t, HostVolumeStatePending, vol.State) + must.Nil(t, vol.Allocations) + must.Eq(t, "", vol.HostPath) + must.Zero(t, vol.CapacityBytes) + + vol = &HostVolume{ + ID: "82f357d6-a5ec-11ef-9e36-3f9884222736", + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 500000, + RequestedCapabilities: []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeMultiNodeMultiWriter, + }}, + } + existing := &HostVolume{ + ID: "82f357d6-a5ec-11ef-9e36-3f9884222736", + PluginID: "example_plugin", + NodePool: "prod", + NodeID: uuid.Generate(), + RequestedCapacityMinBytes: 100000, + RequestedCapacityMaxBytes: 200000, + CapacityBytes: 150000, + RequestedCapabilities: []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeSingleNodeWriter, + }}, + Constraints: []*Constraint{{ + LTarget: "${meta.rack}", + RTarget: "r1", + Operand: "=", + }}, + Parameters: map[string]string{"foo": "bar"}, + Allocations: []*AllocListStub{ + {ID: "6bd66bfa"}, + {ID: "7032e570"}, + }, + HostPath: "/var/nomad/alloc_mounts/82f357d6.ext4", + CreateTime: 1, + } + + vol.CanonicalizeForUpdate(existing, now) + must.Eq(t, existing.ID, vol.ID) + must.Eq(t, existing.PluginID, vol.PluginID) + must.Eq(t, existing.NodePool, vol.NodePool) + must.Eq(t, existing.NodeID, vol.NodeID) + must.Eq(t, []*Constraint{{ + LTarget: "${meta.rack}", + RTarget: "r1", + Operand: "=", + }}, vol.Constraints) + must.Eq(t, 100000, vol.RequestedCapacityMinBytes) + must.Eq(t, 500000, vol.RequestedCapacityMaxBytes) + must.Eq(t, 150000, vol.CapacityBytes) + + must.Eq(t, []*HostVolumeCapability{{ + AttachmentMode: HostVolumeAttachmentModeFilesystem, + AccessMode: HostVolumeAccessModeMultiNodeMultiWriter, + }}, vol.RequestedCapabilities) + + must.Eq(t, "/var/nomad/alloc_mounts/82f357d6.ext4", vol.HostPath) + must.Eq(t, HostVolumeStatePending, vol.State) + + must.Eq(t, existing.CreateTime, vol.CreateTime) + must.Eq(t, now.UnixNano(), vol.ModifyTime) + must.Nil(t, vol.Allocations) + +} From c2dd97dee7c2b61c215df9c2815c4f2719a576d9 Mon Sep 17 00:00:00 2001 From: Daniel Bennett Date: Wed, 20 Nov 2024 17:03:14 -0500 Subject: [PATCH 09/35] HostVolumePlugin interface and two implementations (#24497) * mkdir: HostVolumePluginMkdir: just creates a directory * example-host-volume: HostVolumePluginExternal: plugin script that does mkfs and mount loopback Co-authored-by: Tim Gross --- ci/test-core.json | 1 + client/client.go | 5 + client/host_volume_endpoint.go | 40 ++-- .../hostvolumemanager/host_volume_plugin.go | 194 ++++++++++++++++++ client/hostvolumemanager/host_volumes.go | 98 +++++++++ client/structs/host_volumes.go | 5 + command/volume_create_host_test.go | 14 +- command/volume_delete_host_test.go | 2 +- command/volume_status_host_test.go | 4 +- demo/hostvolume/_test-plugin.sh | 62 ++++++ demo/hostvolume/example-host-volume | 104 ++++++++++ demo/hostvolume/host.volume.hcl | 19 ++ nomad/host_volume_endpoint.go | 1 + nomad/mock/host_volumes.go | 2 +- 14 files changed, 526 insertions(+), 25 deletions(-) create mode 100644 client/hostvolumemanager/host_volume_plugin.go create mode 100644 client/hostvolumemanager/host_volumes.go create mode 100755 demo/hostvolume/_test-plugin.sh create mode 100755 demo/hostvolume/example-host-volume create mode 100644 demo/hostvolume/host.volume.hcl diff --git a/ci/test-core.json b/ci/test-core.json index 95f354fbe99..5ec461809ed 100644 --- a/ci/test-core.json +++ b/ci/test-core.json @@ -17,6 +17,7 @@ "client/dynamicplugins/...", "client/fingerprint/...", "client/hoststats/...", + "client/hostvolumemanager/...", "client/interfaces/...", "client/lib/...", "client/logmon/...", diff --git a/client/client.go b/client/client.go index fa539f51ed2..1e9a87dd7ea 100644 --- a/client/client.go +++ b/client/client.go @@ -34,6 +34,7 @@ import ( "github.com/hashicorp/nomad/client/dynamicplugins" "github.com/hashicorp/nomad/client/fingerprint" "github.com/hashicorp/nomad/client/hoststats" + "github.com/hashicorp/nomad/client/hostvolumemanager" cinterfaces "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/client/lib/cgroupslib" "github.com/hashicorp/nomad/client/lib/numalib" @@ -289,6 +290,8 @@ type Client struct { // drivermanager is responsible for managing driver plugins drivermanager drivermanager.Manager + hostVolumeManager *hostvolumemanager.HostVolumeManager + // baseLabels are used when emitting tagged metrics. All client metrics will // have these tags, and optionally more. baseLabels []metrics.Label @@ -532,6 +535,8 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie c.devicemanager = devManager c.pluginManagers.RegisterAndRun(devManager) + c.hostVolumeManager = hostvolumemanager.NewHostVolumeManager(cfg.AllocMountsDir, logger) + // Set up the service registration wrapper using the Consul and Nomad // implementations. The Nomad implementation is only ever used on the // client, so we do that here rather than within the agent. diff --git a/client/host_volume_endpoint.go b/client/host_volume_endpoint.go index c22d22efde0..622fff3c492 100644 --- a/client/host_volume_endpoint.go +++ b/client/host_volume_endpoint.go @@ -5,7 +5,6 @@ package client import ( "context" - "path/filepath" "time" metrics "github.com/armon/go-metrics" @@ -23,31 +22,44 @@ func newHostVolumesEndpoint(c *Client) *HostVolume { var hostVolumeRequestTimeout = time.Minute -func (v *HostVolume) requestContext() (context.Context, context.CancelFunc) { - return context.WithTimeout(context.Background(), hostVolumeRequestTimeout) -} +func (v *HostVolume) Create( + req *cstructs.ClientHostVolumeCreateRequest, + resp *cstructs.ClientHostVolumeCreateResponse) error { -func (v *HostVolume) Create(req *cstructs.ClientHostVolumeCreateRequest, resp *cstructs.ClientHostVolumeCreateResponse) error { defer metrics.MeasureSince([]string{"client", "host_volume", "create"}, time.Now()) - _, cancelFn := v.requestContext() + ctx, cancelFn := v.requestContext() defer cancelFn() - // TODO(1.10.0): call into Client's host volume manager to create the work here + cresp, err := v.c.hostVolumeManager.Create(ctx, req) + if err != nil { + v.c.logger.Error("failed to create host volume", "name", req.Name, "error", err) + return err + } - resp.CapacityBytes = req.RequestedCapacityMinBytes - resp.HostPath = filepath.Join(v.c.config.AllocMountsDir, req.ID) + resp.CapacityBytes = cresp.CapacityBytes + resp.HostPath = cresp.HostPath - v.c.logger.Debug("created host volume", "id", req.ID, "path", resp.HostPath) + v.c.logger.Info("created host volume", "id", req.ID, "path", resp.HostPath) return nil } -func (v *HostVolume) Delete(req *cstructs.ClientHostVolumeDeleteRequest, resp *cstructs.ClientHostVolumeDeleteResponse) error { +func (v *HostVolume) Delete( + req *cstructs.ClientHostVolumeDeleteRequest, + resp *cstructs.ClientHostVolumeDeleteResponse) error { defer metrics.MeasureSince([]string{"client", "host_volume", "create"}, time.Now()) - _, cancelFn := v.requestContext() + ctx, cancelFn := v.requestContext() defer cancelFn() - // TODO(1.10.0): call into Client's host volume manager to delete the volume here + _, err := v.c.hostVolumeManager.Delete(ctx, req) // db TODO(1.10.0): cresp is empty... why return it? + if err != nil { + v.c.logger.Error("failed to delete host volume", "ID", req.ID, "error", err) + return err + } - v.c.logger.Debug("deleted host volume", "id", req.ID, "path", req.HostPath) + v.c.logger.Info("deleted host volume", "id", req.ID, "path", req.HostPath) return nil } + +func (v *HostVolume) requestContext() (context.Context, context.CancelFunc) { + return context.WithTimeout(context.Background(), hostVolumeRequestTimeout) +} diff --git a/client/hostvolumemanager/host_volume_plugin.go b/client/hostvolumemanager/host_volume_plugin.go new file mode 100644 index 00000000000..357eb2ef61e --- /dev/null +++ b/client/hostvolumemanager/host_volume_plugin.go @@ -0,0 +1,194 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-multierror" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper" +) + +type HostVolumePlugin interface { + Version(ctx context.Context) (string, error) + Create(ctx context.Context, req *cstructs.ClientHostVolumeCreateRequest) (*HostVolumePluginCreateResponse, error) + Delete(ctx context.Context, req *cstructs.ClientHostVolumeDeleteRequest) error + // db TODO(1.10.0): update? resize? ?? +} + +type HostVolumePluginCreateResponse struct { + Path string `json:"path"` + SizeBytes int64 `json:"bytes"` + Context map[string]string `json:"context"` // metadata +} + +var _ HostVolumePlugin = &HostVolumePluginMkdir{} + +type HostVolumePluginMkdir struct { + ID string + TargetPath string + + log hclog.Logger +} + +func (p *HostVolumePluginMkdir) Version(_ context.Context) (string, error) { + return "0.0.1", nil +} + +func (p *HostVolumePluginMkdir) Create(_ context.Context, + req *cstructs.ClientHostVolumeCreateRequest) (*HostVolumePluginCreateResponse, error) { + + path := filepath.Join(p.TargetPath, req.ID) + log := p.log.With( + "operation", "create", + "volume_id", req.ID, + "path", path) + log.Debug("running plugin") + + err := os.Mkdir(path, 0o700) + if err != nil { + log.Debug("error with plugin", "error", err) + return nil, err + } + + log.Debug("plugin ran successfully") + return &HostVolumePluginCreateResponse{ + Path: path, + SizeBytes: 0, + Context: map[string]string{}, + }, nil +} + +func (p *HostVolumePluginMkdir) Delete(_ context.Context, req *cstructs.ClientHostVolumeDeleteRequest) error { + path := filepath.Join(p.TargetPath, req.ID) + log := p.log.With( + "operation", "delete", + "volume_id", req.ID, + "path", path) + log.Debug("running plugin") + + err := os.RemoveAll(path) + if err != nil { + log.Debug("error with plugin", "error", err) + return err + } + + log.Debug("plugin ran successfully") + return nil +} + +var _ HostVolumePlugin = &HostVolumePluginExternal{} + +type HostVolumePluginExternal struct { + ID string + Executable string + TargetPath string + + log hclog.Logger +} + +func (p *HostVolumePluginExternal) Version(_ context.Context) (string, error) { + return "0.0.1", nil // db TODO(1.10.0): call the plugin, use in fingerprint +} + +func (p *HostVolumePluginExternal) Create(ctx context.Context, + req *cstructs.ClientHostVolumeCreateRequest) (*HostVolumePluginCreateResponse, error) { + + params, err := json.Marshal(req.Parameters) // db TODO(1.10.0): if this is nil, then PARAMETERS env will be "null" + if err != nil { + return nil, fmt.Errorf("error marshaling volume pramaters: %w", err) + } + envVars := []string{ + "NODE_ID=" + req.NodeID, + "VOLUME_NAME=" + req.Name, + fmt.Sprintf("CAPACITY_MIN_BYTES=%d", req.RequestedCapacityMinBytes), + fmt.Sprintf("CAPACITY_MAX_BYTES=%d", req.RequestedCapacityMaxBytes), + "PARAMETERS=" + string(params), + } + + stdout, _, err := p.runPlugin(ctx, "create", req.ID, envVars) + if err != nil { + return nil, fmt.Errorf("error creating volume %q with plugin %q: %w", req.ID, req.PluginID, err) + } + + var pluginResp HostVolumePluginCreateResponse + err = json.Unmarshal(stdout, &pluginResp) + if err != nil { + return nil, err + } + return &pluginResp, nil +} + +func (p *HostVolumePluginExternal) Delete(ctx context.Context, + req *cstructs.ClientHostVolumeDeleteRequest) error { + + params, err := json.Marshal(req.Parameters) + if err != nil { + return fmt.Errorf("error marshaling volume pramaters: %w", err) + } + envVars := []string{ + "NODE_ID=" + req.NodeID, + "PARAMETERS=" + string(params), + } + + _, _, err = p.runPlugin(ctx, "delete", req.ID, envVars) + if err != nil { + return fmt.Errorf("error deleting volume %q with plugin %q: %w", req.ID, req.PluginID, err) + } + return nil +} + +func (p *HostVolumePluginExternal) runPlugin(ctx context.Context, + op, volID string, env []string) (stdout, stderr []byte, err error) { + + path := filepath.Join(p.TargetPath, volID) + log := p.log.With( + "operation", op, + "volume_id", volID, + "path", path) + log.Debug("running plugin") + + // set up plugin execution + cmd := exec.CommandContext(ctx, p.Executable, op, path) + + cmd.Env = append([]string{ + "OPERATION=" + op, + "HOST_PATH=" + path, + }, env...) + + var errBuf bytes.Buffer + cmd.Stderr = io.Writer(&errBuf) + + // run the command and capture output + mErr := &multierror.Error{} + stdout, err = cmd.Output() + if err != nil { + mErr = multierror.Append(mErr, err) + } + stderr, err = io.ReadAll(&errBuf) + if err != nil { + mErr = multierror.Append(mErr, err) + } + + log = log.With( + "stdout", string(stdout), + "stderr", string(stderr), + ) + if mErr.ErrorOrNil() != nil { + err = helper.FlattenMultierror(mErr) + log.Debug("error with plugin", "error", err) + return stdout, stderr, err + } + log.Debug("plugin ran successfully") + return stdout, stderr, nil +} diff --git a/client/hostvolumemanager/host_volumes.go b/client/hostvolumemanager/host_volumes.go new file mode 100644 index 00000000000..4d7da7d1ea4 --- /dev/null +++ b/client/hostvolumemanager/host_volumes.go @@ -0,0 +1,98 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "context" + "fmt" + "sync" + + "github.com/hashicorp/go-hclog" + cstructs "github.com/hashicorp/nomad/client/structs" +) + +type HostVolumeManager struct { + log hclog.Logger + plugins *sync.Map +} + +func NewHostVolumeManager(sharedMountDir string, logger hclog.Logger) *HostVolumeManager { + log := logger.Named("host_volumes") + + mgr := &HostVolumeManager{ + log: log, + plugins: &sync.Map{}, + } + // db TODO(1.10.0): discover plugins on disk, need a new plugin dir + // TODO: how do we define the external mounter plugins? plugin configs? + mgr.setPlugin("mkdir", &HostVolumePluginMkdir{ + ID: "mkdir", + TargetPath: sharedMountDir, + log: log.With("plugin_id", "mkdir"), + }) + mgr.setPlugin("example-host-volume", &HostVolumePluginExternal{ + ID: "example-host-volume", + Executable: "/opt/nomad/hostvolumeplugins/example-host-volume", + TargetPath: sharedMountDir, + log: log.With("plugin_id", "example-host-volume"), + }) + return mgr +} + +// db TODO(1.10.0): fingerprint elsewhere / on sighup, and SetPlugin from afar? +func (hvm *HostVolumeManager) setPlugin(id string, plug HostVolumePlugin) { + hvm.plugins.Store(id, plug) +} + +func (hvm *HostVolumeManager) getPlugin(id string) (HostVolumePlugin, bool) { + obj, ok := hvm.plugins.Load(id) + if !ok { + return nil, false + } + return obj.(HostVolumePlugin), true +} + +func (hvm *HostVolumeManager) Create(ctx context.Context, + req *cstructs.ClientHostVolumeCreateRequest) (*cstructs.ClientHostVolumeCreateResponse, error) { + + plug, ok := hvm.getPlugin(req.PluginID) + if !ok { + return nil, fmt.Errorf("no such plugin %q", req.PluginID) + } + + pluginResp, err := plug.Create(ctx, req) + if err != nil { + return nil, err + } + + resp := &cstructs.ClientHostVolumeCreateResponse{ + HostPath: pluginResp.Path, + CapacityBytes: pluginResp.SizeBytes, + } + + // db TODO(1.10.0): now we need to add it to the node fingerprint! + // db TODO(1.10.0): and save it in client state! + + return resp, nil +} + +func (hvm *HostVolumeManager) Delete(ctx context.Context, + req *cstructs.ClientHostVolumeDeleteRequest) (*cstructs.ClientHostVolumeDeleteResponse, error) { + + plug, ok := hvm.getPlugin(req.PluginID) + if !ok { + return nil, fmt.Errorf("no such plugin %q", req.PluginID) + } + + err := plug.Delete(ctx, req) + if err != nil { + return nil, err + } + + resp := &cstructs.ClientHostVolumeDeleteResponse{} + + // db TODO(1.10.0): save the client state! + + return resp, nil +} diff --git a/client/structs/host_volumes.go b/client/structs/host_volumes.go index ba6806051aa..38d3cb2d770 100644 --- a/client/structs/host_volumes.go +++ b/client/structs/host_volumes.go @@ -45,6 +45,11 @@ type ClientHostVolumeDeleteRequest struct { // ID is a UUID-like string generated by the server. ID string + // PluginID is the name of the host volume plugin on the client that will be + // used for deleting the volume. If omitted, the client will use its default + // built-in plugin. + PluginID string + // NodeID is the node where the volume is placed. It's included in the // client RPC request so that the server can route the request to the // correct node. diff --git a/command/volume_create_host_test.go b/command/volume_create_host_test.go index f7f1cd5a57f..bd4fff6f46a 100644 --- a/command/volume_create_host_test.go +++ b/command/volume_create_host_test.go @@ -32,7 +32,7 @@ func TestHostVolumeCreateCommand_Run(t *testing.T) { namespace = "prod" name = "database" type = "host" -plugin_id = "plugin_id" +plugin_id = "mkdir" node_pool = "default" capacity_min = "10GiB" @@ -99,7 +99,7 @@ func TestHostVolume_HCLDecode(t *testing.T) { namespace = "prod" name = "database" type = "host" -plugin_id = "plugin_id" +plugin_id = "mkdir" node_pool = "default" capacity_min = "10GiB" @@ -132,7 +132,7 @@ parameters { expected: &api.HostVolume{ Namespace: "prod", Name: "database", - PluginID: "plugin_id", + PluginID: "mkdir", NodePool: "default", Constraints: []*api.Constraint{{ LTarget: "${attr.kernel.name}", @@ -165,13 +165,13 @@ parameters { namespace = "prod" name = "database" type = "host" -plugin_id = "plugin_id" +plugin_id = "mkdir" node_pool = "default" `, expected: &api.HostVolume{ Namespace: "prod", Name: "database", - PluginID: "plugin_id", + PluginID: "mkdir", NodePool: "default", }, }, @@ -182,7 +182,7 @@ node_pool = "default" namespace = "prod" name = "database" type = "host" -plugin_id = "plugin_id" +plugin_id = "mkdir" node_pool = "default" capacity_min = "a" @@ -197,7 +197,7 @@ capacity_min = "a" namespace = "prod" name = "database" type = "host" -plugin_id = "plugin_id" +plugin_id = "mkdir" node_pool = "default" constraint { diff --git a/command/volume_delete_host_test.go b/command/volume_delete_host_test.go index 0cc24645376..fde8994df33 100644 --- a/command/volume_delete_host_test.go +++ b/command/volume_delete_host_test.go @@ -36,7 +36,7 @@ func TestHostVolumeDeleteCommand(t *testing.T) { namespace = "prod" name = "example" type = "host" -plugin_id = "plugin_id" +plugin_id = "mkdir" node_id = "%s" node_pool = "default" capability { diff --git a/command/volume_status_host_test.go b/command/volume_status_host_test.go index d4784029555..f150b1f9985 100644 --- a/command/volume_status_host_test.go +++ b/command/volume_status_host_test.go @@ -60,7 +60,7 @@ func TestHostVolumeStatusCommand_List(t *testing.T) { namespace = "%s" name = "%s" type = "host" -plugin_id = "plugin_id" +plugin_id = "mkdir" node_id = "%s" node_pool = "default" capability { @@ -115,7 +115,7 @@ func TestHostVolumeStatusCommand_Get(t *testing.T) { namespace = "prod" name = "example" type = "host" -plugin_id = "plugin_id" +plugin_id = "mkdir" node_id = "%s" node_pool = "default" capability { diff --git a/demo/hostvolume/_test-plugin.sh b/demo/hostvolume/_test-plugin.sh new file mode 100755 index 00000000000..5ccd1f28a72 --- /dev/null +++ b/demo/hostvolume/_test-plugin.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -euo pipefail + +if [[ $# -eq 0 || "$*" =~ -h ]]; then + cat < + +Operations: + create, delete, version + any other operation will be passed to the plugin + +Environment variables: + PLUGIN: executable to run (default ./example-host-volume) + TARGET_DIR: path to place the mount dir (default /tmp, + usually {nomad data dir}/alloc_mounts) +EOF + exit +fi + +op="$1" +shift + +plugin="${PLUGIN:-./example-host-volume}" +alloc_mounts="${TARGET_DIR:-/tmp}" +uuid='74564d17-ce50-0bc1-48e5-6feaa41ede48' + +case $op in + version) + args='version' + ;; + + create) + args="create $alloc_mounts/$uuid" + export HOST_PATH="$alloc_mounts/$uuid" + export VOLUME_NAME=test + export NODE_ID=0b62d807-6101-a80f-374d-e1c430abbf47 + export CAPACITY_MAX_BYTES=50000000 # 50mb + export CAPACITY_MIN_BYTES=50000000 # 50mb + export PARAMETERS='{"a": "ayy"}' + # db TODO(1.10.0): check stdout + ;; + + delete) + args="delete $alloc_mounts/$uuid" + export HOST_PATH="$alloc_mounts/$uuid" + export PARAMETERS='{"a": "ayy"}' + ;; + + *) + args="$*" + ;; +esac + +export OPERATION="$op" +set -x +eval "$plugin $* $args" diff --git a/demo/hostvolume/example-host-volume b/demo/hostvolume/example-host-volume new file mode 100755 index 00000000000..ae0f7711326 --- /dev/null +++ b/demo/hostvolume/example-host-volume @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +# db TODO(1.10.0): where does PATH come from here? somewhere implicit? /sbin/ and /bin/ and ...? + +set -euo pipefail + +help() { + cat < [path] + +Options: + -v|--verbose: Show shell commands (set -x) + -h|--help: Print this help text and exit + +Operations: + create: Creates and mounts the device at path (required) + required environment: + CAPACITY_MIN_BYTES + delete: Unmounts and deletes the device at path (required) + version: Outputs this plugin's version + +EOF +} + +version() { + echo "0.0.1" +} + +# parse args +[ $# -eq 0 ] && { help; exit 1; } +for arg in "$@"; do + case $arg in + -h|-help|--help) help; exit 0 ;; + version|--version) version; exit 0 ;; + -v|--verbose) set -x; shift; ;; + esac +done + +# path is required for everything else +[ $# -lt 2 ] && { echo 'path required; seek --help' 1>&2; exit 1; } +host_path="$2" + +validate_path() { + local path="$1" + if [[ ! "$path" =~ [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} ]]; then + 1>&2 echo "expected uuid-lookin ID in the HOST_PATH; got: '$path'" + return 1 + fi +} + +is_mounted() { + awk '{print $2}' /proc/mounts | grep -q "^$1$" +} + +create_volume() { + local path="$1" + validate_path "$path" + local bytes="$2" + + # translate to mb for dd block size + local megs=$((bytes / 1024 / 1024)) # lazy, approximate + + # the extra conditionals are for idempotency + if [ ! -f "$path.ext4" ]; then + dd if=/dev/zero of="$path.ext4" bs=1M count="$megs" + # mkfs is noisy on stdout, so we send it to stderr + # to avoid breaking the JSON parsing on the client + mkfs.ext4 "$path.ext4" 1>&2 + fi + if ! is_mounted "$path"; then + mkdir -p "$path" + mount "$path.ext4" "$path" + fi +} + +delete_volume() { + local path="$1" + validate_path "$path" + is_mounted "$path" && umount "$path" + rm -rf "$path" + rm -f "$path.ext4" +} + +case "$1" in + "create") + create_volume "$host_path" "$CAPACITY_MIN_BYTES" + # output what Nomad expects + bytes="$(stat --format='%s' "$host_path.ext4")" + printf '{"path": "%s", "bytes": %s}' "$host_path", "$bytes" + ;; + "delete") + delete_volume "$host_path" ;; + *) + echo "unknown operation: $1" 1>&2 + exit 1 ;; +esac diff --git a/demo/hostvolume/host.volume.hcl b/demo/hostvolume/host.volume.hcl new file mode 100644 index 00000000000..cb0774b94e7 --- /dev/null +++ b/demo/hostvolume/host.volume.hcl @@ -0,0 +1,19 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 +name = "test" +type = "host" +plugin_id = "example-host-volume" +capacity_min = "50mb" +capacity_max = "50mb" + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +parameters { + a = "ayy" +} + +# TODO(1.10.0): don't require node_pool +node_pool = "default" diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index 3998dabc3f2..4b4d09deac3 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -554,6 +554,7 @@ func (v *HostVolume) deleteVolume(vol *structs.HostVolume) error { method := "ClientHostVolume.Delete" cReq := &cstructs.ClientHostVolumeDeleteRequest{ ID: vol.ID, + PluginID: vol.PluginID, NodeID: vol.NodeID, HostPath: vol.HostPath, Parameters: vol.Parameters, diff --git a/nomad/mock/host_volumes.go b/nomad/mock/host_volumes.go index 7cec8e2884a..a87b084dad3 100644 --- a/nomad/mock/host_volumes.go +++ b/nomad/mock/host_volumes.go @@ -12,7 +12,7 @@ func HostVolumeRequest(ns string) *structs.HostVolume { vol := &structs.HostVolume{ Namespace: ns, Name: "example", - PluginID: "example-plugin", + PluginID: "mkdir", NodePool: structs.NodePoolDefault, Constraints: []*structs.Constraint{ { From bbf49a90504b97cd51e2fe96d898d1b277868204 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 21 Nov 2024 09:28:13 -0500 Subject: [PATCH 10/35] dynamic host volumes: node selection via constraints (#24518) When making a request to create a dynamic host volumes, users can pass a node pool and constraints instead of a specific node ID. This changeset implements a node scheduling logic by instantiating a filter by node pool and constraint checker borrowed from the scheduler package. Because host volumes with the same name can't land on the same host, we don't need to support `distinct_hosts`/`distinct_property`; this would be challenging anyways without building out a much larger node iteration mechanism to keep track of usage across multiple hosts. Ref: https://github.com/hashicorp/nomad/pull/24479 --- command/agent/host_volume_endpoint_test.go | 2 + command/volume_create_host_test.go | 10 +- nomad/host_volume_endpoint.go | 104 ++++++++++++++---- nomad/host_volume_endpoint_test.go | 118 +++++++++++++++++++++ nomad/state/state_store_host_volumes.go | 2 + nomad/structs/host_volumes.go | 6 ++ scheduler/context.go | 7 ++ scheduler/feasible.go | 32 +++--- scheduler/feasible_test.go | 4 +- 9 files changed, 239 insertions(+), 46 deletions(-) diff --git a/command/agent/host_volume_endpoint_test.go b/command/agent/host_volume_endpoint_test.go index a328b85e42e..8a939a86582 100644 --- a/command/agent/host_volume_endpoint_test.go +++ b/command/agent/host_volume_endpoint_test.go @@ -21,6 +21,8 @@ func TestHostVolumeEndpoint_CRUD(t *testing.T) { // Create a volume on the test node vol := mock.HostVolumeRequest(structs.DefaultNamespace) + vol.NodePool = "" + vol.Constraints = nil reqBody := struct { Volumes []*structs.HostVolume }{Volumes: []*structs.HostVolume{vol}} diff --git a/command/volume_create_host_test.go b/command/volume_create_host_test.go index bd4fff6f46a..ce713b20d5d 100644 --- a/command/volume_create_host_test.go +++ b/command/volume_create_host_test.go @@ -11,13 +11,16 @@ import ( "github.com/hashicorp/hcl" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/command/agent" "github.com/mitchellh/cli" "github.com/shoenig/test/must" ) func TestHostVolumeCreateCommand_Run(t *testing.T) { ci.Parallel(t) - srv, client, url := testServer(t, true, nil) + srv, client, url := testServer(t, true, func(c *agent.Config) { + c.Client.Meta = map[string]string{"rack": "foo"} + }) t.Cleanup(srv.Shutdown) waitForNodes(t, client) @@ -38,11 +41,6 @@ node_pool = "default" capacity_min = "10GiB" capacity_max = "20G" -constraint { - attribute = "${attr.kernel.name}" - value = "linux" -} - constraint { attribute = "${meta.rack}" value = "foo" diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index 4b4d09deac3..0f3fa457a65 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -6,6 +6,7 @@ package nomad import ( "fmt" "net/http" + "regexp" "strings" "time" @@ -19,6 +20,7 @@ import ( "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/state/paginator" "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/scheduler" ) // HostVolume is the server RPC endpoint for host volumes @@ -425,28 +427,12 @@ func (v *HostVolume) validateVolumeForState(vol *structs.HostVolume, snap *state func (v *HostVolume) createVolume(vol *structs.HostVolume) error { - // TODO(1.10.0): proper node selection based on constraints and node - // pool. Also, should we move this into the validator step? - if vol.NodeID == "" { - var iter memdb.ResultIterator - var err error - var raw any - if vol.NodePool != "" { - iter, err = v.srv.State().NodesByNodePool(nil, vol.NodePool) - } else { - iter, err = v.srv.State().Nodes(nil) - } - if err != nil { - return err - } - raw = iter.Next() - if raw == nil { - return fmt.Errorf("no node meets constraints for volume") - } - - node := raw.(*structs.Node) - vol.NodeID = node.ID + node, err := v.placeHostVolume(vol) + if err != nil { + return fmt.Errorf("could not place volume %q: %w", vol.Name, err) } + vol.NodeID = node.ID + vol.NodePool = node.NodePool method := "ClientHostVolume.Create" cReq := &cstructs.ClientHostVolumeCreateRequest{ @@ -459,7 +445,7 @@ func (v *HostVolume) createVolume(vol *structs.HostVolume) error { Parameters: vol.Parameters, } cResp := &cstructs.ClientHostVolumeCreateResponse{} - err := v.srv.RPC(method, cReq, cResp) + err = v.srv.RPC(method, cReq, cResp) if err != nil { return err } @@ -474,6 +460,80 @@ func (v *HostVolume) createVolume(vol *structs.HostVolume) error { return nil } +// placeHostVolume finds a node that matches the node pool and constraints, +// which doesn't already have a volume by that name. It returns a non-nil Node +// or an error indicating placement failed. +func (v *HostVolume) placeHostVolume(vol *structs.HostVolume) (*structs.Node, error) { + + var iter memdb.ResultIterator + var err error + if vol.NodePool != "" { + iter, err = v.srv.State().NodesByNodePool(nil, vol.NodePool) + } else { + iter, err = v.srv.State().Nodes(nil) + } + if err != nil { + return nil, err + } + + var checker *scheduler.ConstraintChecker + + if len(vol.Constraints) > 0 { + ctx := &placementContext{ + regexpCache: make(map[string]*regexp.Regexp), + versionCache: make(map[string]scheduler.VerConstraints), + semverCache: make(map[string]scheduler.VerConstraints), + } + checker = scheduler.NewConstraintChecker(ctx, vol.Constraints) + } + + for { + raw := iter.Next() + if raw == nil { + break + } + candidate := raw.(*structs.Node) + + // note: this is a race if multiple users create volumes of the same + // name concurrently, but we can't solve it on the server because we + // haven't yet written to state. The client will reject requests to + // create/register a volume with the same name with a different ID. + if _, hasVol := candidate.HostVolumes[vol.Name]; hasVol { + continue + } + + if checker != nil { + if ok := checker.Feasible(candidate); !ok { + continue + } + } + + return candidate, nil + } + + return nil, fmt.Errorf("no node meets constraints") +} + +// placementContext implements the scheduler.ConstraintContext interface, a +// minimal subset of the scheduler.Context interface that we need to create a +// feasibility checker for constraints +type placementContext struct { + regexpCache map[string]*regexp.Regexp + versionCache map[string]scheduler.VerConstraints + semverCache map[string]scheduler.VerConstraints +} + +func (ctx *placementContext) Metrics() *structs.AllocMetric { return &structs.AllocMetric{} } +func (ctx *placementContext) RegexpCache() map[string]*regexp.Regexp { return ctx.regexpCache } + +func (ctx *placementContext) VersionConstraintCache() map[string]scheduler.VerConstraints { + return ctx.versionCache +} + +func (ctx *placementContext) SemverConstraintCache() map[string]scheduler.VerConstraints { + return ctx.semverCache +} + func (v *HostVolume) Delete(args *structs.HostVolumeDeleteRequest, reply *structs.HostVolumeDeleteResponse) error { authErr := v.srv.Authenticate(v.ctx, args) diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go index 098cb0f26f6..2a432d961cf 100644 --- a/nomad/host_volume_endpoint_test.go +++ b/nomad/host_volume_endpoint_test.go @@ -17,6 +17,7 @@ import ( "github.com/hashicorp/nomad/client/config" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/state" @@ -156,6 +157,25 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { must.EqError(t, err, "Permission denied") }) + t.Run("invalid node constraints", func(t *testing.T) { + req.Volumes[0].Constraints[0].RTarget = "r2" + req.Volumes[1].Constraints[0].RTarget = "r2" + + defer func() { + req.Volumes[0].Constraints[0].RTarget = "r1" + req.Volumes[1].Constraints[0].RTarget = "r1" + }() + + var resp structs.HostVolumeCreateResponse + req.AuthToken = token + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, `2 errors occurred: + * could not place volume "example1": no node meets constraints + * could not place volume "example2": no node meets constraints + +`) + }) + t.Run("valid create", func(t *testing.T) { var resp structs.HostVolumeCreateResponse req.AuthToken = token @@ -611,6 +631,103 @@ func TestHostVolumeEndpoint_List(t *testing.T) { }) } +func TestHostVolumeEndpoint_placeVolume(t *testing.T) { + srv, _, cleanupSrv := TestACLServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + t.Cleanup(cleanupSrv) + testutil.WaitForLeader(t, srv.RPC) + store := srv.fsm.State() + + endpoint := &HostVolume{ + srv: srv, + logger: testlog.HCLogger(t), + } + + node0, node1, node2, node3 := mock.Node(), mock.Node(), mock.Node(), mock.Node() + node0.NodePool = structs.NodePoolDefault + node1.NodePool = "dev" + node1.Meta["rack"] = "r2" + node2.NodePool = "prod" + node3.NodePool = "prod" + node3.Meta["rack"] = "r3" + node3.HostVolumes = map[string]*structs.ClientHostVolumeConfig{"example": { + Name: "example", + Path: "/srv", + }} + + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node0)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node1)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node2)) + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node3)) + + testCases := []struct { + name string + vol *structs.HostVolume + expect *structs.Node + expectErr string + }{ + { + name: "only one in node pool", + vol: &structs.HostVolume{NodePool: "default"}, + expect: node0, + }, + { + name: "only one that matches constraints", + vol: &structs.HostVolume{Constraints: []*structs.Constraint{ + { + LTarget: "${meta.rack}", + RTarget: "r2", + Operand: "=", + }, + }}, + expect: node1, + }, + { + name: "only one available in pool", + vol: &structs.HostVolume{NodePool: "prod", Name: "example"}, + expect: node2, + }, + { + name: "no match", + vol: &structs.HostVolume{Constraints: []*structs.Constraint{ + { + LTarget: "${meta.rack}", + RTarget: "r6", + Operand: "=", + }, + }}, + expectErr: "no node meets constraints", + }, + { + name: "match already has a volume with the same name", + vol: &structs.HostVolume{ + Name: "example", + Constraints: []*structs.Constraint{ + { + LTarget: "${meta.rack}", + RTarget: "r3", + Operand: "=", + }, + }}, + expectErr: "no node meets constraints", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + node, err := endpoint.placeHostVolume(tc.vol) + if tc.expectErr == "" { + must.NoError(t, err) + must.Eq(t, tc.expect, node) + } else { + must.EqError(t, err, tc.expectErr) + must.Nil(t, node) + } + }) + } +} + // mockHostVolumeClient models client RPCs that have side-effects on the // client host type mockHostVolumeClient struct { @@ -631,6 +748,7 @@ func newMockHostVolumeClient(t *testing.T, srv *Server, pool string) (*mockHostV c.Node.NodePool = pool // TODO(1.10.0): we'll want to have a version gate for this feature c.Node.Attributes["nomad.version"] = version.Version + c.Node.Meta["rack"] = "r1" }, srv.config.RPCAddr, map[string]any{"HostVolume": mockClientEndpoint}) t.Cleanup(cleanup) diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go index 732399ffb46..27013b05d90 100644 --- a/nomad/state/state_store_host_volumes.go +++ b/nomad/state/state_store_host_volumes.go @@ -87,6 +87,8 @@ func (s *StateStore) UpsertHostVolumes(index uint64, volumes []*structs.HostVolu if _, ok := node.HostVolumes[v.Name]; ok { v.State = structs.HostVolumeStateReady } + // Register RPCs for new volumes may not have the node pool set + v.NodePool = node.NodePool // Allocations are denormalized on read, so we don't want these to be // written to the state store. diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go index 66a178a3423..21e827c3a9d 100644 --- a/nomad/structs/host_volumes.go +++ b/nomad/structs/host_volumes.go @@ -159,6 +159,12 @@ func (hv *HostVolume) Validate() error { if err := constraint.Validate(); err != nil { mErr = multierror.Append(mErr, fmt.Errorf("invalid constraint: %v", err)) } + switch constraint.Operand { + case ConstraintDistinctHosts, ConstraintDistinctProperty: + mErr = multierror.Append(mErr, fmt.Errorf( + "invalid constraint %s: host volumes of the same name are always on distinct hosts", constraint.Operand)) + default: + } } return mErr.ErrorOrNil() diff --git a/scheduler/context.go b/scheduler/context.go index 887607cf3be..e48cefc3918 100644 --- a/scheduler/context.go +++ b/scheduler/context.go @@ -51,6 +51,13 @@ type Context interface { SendEvent(event interface{}) } +type ConstraintContext interface { + Metrics() *structs.AllocMetric + RegexpCache() map[string]*regexp.Regexp + VersionConstraintCache() map[string]VerConstraints + SemverConstraintCache() map[string]VerConstraints +} + // EvalCache is used to cache certain things during an evaluation type EvalCache struct { reCache map[string]*regexp.Regexp diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 9ff3878baac..e6e7c81d4a3 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -752,12 +752,12 @@ func (iter *DistinctPropertyIterator) Reset() { // given set of constraints. This is used to filter on job, task group, and task // constraints. type ConstraintChecker struct { - ctx Context + ctx ConstraintContext constraints []*structs.Constraint } // NewConstraintChecker creates a ConstraintChecker for a set of constraints -func NewConstraintChecker(ctx Context, constraints []*structs.Constraint) *ConstraintChecker { +func NewConstraintChecker(ctx ConstraintContext, constraints []*structs.Constraint) *ConstraintChecker { return &ConstraintChecker{ ctx: ctx, constraints: constraints, @@ -830,7 +830,7 @@ func resolveTarget(target string, node *structs.Node) (string, bool) { // checkConstraint checks if a constraint is satisfied. The lVal and rVal // interfaces may be nil. -func checkConstraint(ctx Context, operand string, lVal, rVal interface{}, lFound, rFound bool) bool { +func checkConstraint(ctx ConstraintContext, operand string, lVal, rVal interface{}, lFound, rFound bool) bool { // Check for constraints not handled by this checker. switch operand { case structs.ConstraintDistinctHosts, structs.ConstraintDistinctProperty: @@ -852,14 +852,14 @@ func checkConstraint(ctx Context, operand string, lVal, rVal interface{}, lFound return !lFound case structs.ConstraintVersion: parser := newVersionConstraintParser(ctx) - return lFound && rFound && checkVersionMatch(ctx, parser, lVal, rVal) + return lFound && rFound && checkVersionMatch(parser, lVal, rVal) case structs.ConstraintSemver: parser := newSemverConstraintParser(ctx) - return lFound && rFound && checkVersionMatch(ctx, parser, lVal, rVal) + return lFound && rFound && checkVersionMatch(parser, lVal, rVal) case structs.ConstraintRegex: return lFound && rFound && checkRegexpMatch(ctx, lVal, rVal) case structs.ConstraintSetContains, structs.ConstraintSetContainsAll: - return lFound && rFound && checkSetContainsAll(ctx, lVal, rVal) + return lFound && rFound && checkSetContainsAll(lVal, rVal) case structs.ConstraintSetContainsAny: return lFound && rFound && checkSetContainsAny(lVal, rVal) default: @@ -943,7 +943,7 @@ func compareOrder[T cmp.Ordered](op string, left, right T) bool { // checkVersionMatch is used to compare a version on the // left hand side with a set of constraints on the right hand side -func checkVersionMatch(_ Context, parse verConstraintParser, lVal, rVal interface{}) bool { +func checkVersionMatch(parse verConstraintParser, lVal, rVal interface{}) bool { // Parse the version var versionStr string switch v := lVal.(type) { @@ -979,7 +979,7 @@ func checkVersionMatch(_ Context, parse verConstraintParser, lVal, rVal interfac // checkAttributeVersionMatch is used to compare a version on the // left hand side with a set of constraints on the right hand side -func checkAttributeVersionMatch(_ Context, parse verConstraintParser, lVal, rVal *psstructs.Attribute) bool { +func checkAttributeVersionMatch(parse verConstraintParser, lVal, rVal *psstructs.Attribute) bool { // Parse the version var versionStr string if s, ok := lVal.GetString(); ok { @@ -1014,7 +1014,7 @@ func checkAttributeVersionMatch(_ Context, parse verConstraintParser, lVal, rVal // checkRegexpMatch is used to compare a value on the // left hand side with a regexp on the right hand side -func checkRegexpMatch(ctx Context, lVal, rVal interface{}) bool { +func checkRegexpMatch(ctx ConstraintContext, lVal, rVal interface{}) bool { // Ensure left-hand is string lStr, ok := lVal.(string) if !ok { @@ -1047,7 +1047,7 @@ func checkRegexpMatch(ctx Context, lVal, rVal interface{}) bool { // checkSetContainsAll is used to see if the left hand side contains the // string on the right hand side -func checkSetContainsAll(_ Context, lVal, rVal interface{}) bool { +func checkSetContainsAll(lVal, rVal interface{}) bool { // Ensure left-hand is string lStr, ok := lVal.(string) if !ok { @@ -1424,7 +1424,7 @@ func resolveDeviceTarget(target string, d *structs.NodeDeviceResource) (*psstruc // checkAttributeConstraint checks if a constraint is satisfied. nil equality // comparisons are considered to be false. -func checkAttributeConstraint(ctx Context, operand string, lVal, rVal *psstructs.Attribute, lFound, rFound bool) bool { +func checkAttributeConstraint(ctx ConstraintContext, operand string, lVal, rVal *psstructs.Attribute, lFound, rFound bool) bool { // Check for constraints not handled by this checker. switch operand { case structs.ConstraintDistinctHosts, structs.ConstraintDistinctProperty: @@ -1484,7 +1484,7 @@ func checkAttributeConstraint(ctx Context, operand string, lVal, rVal *psstructs } parser := newVersionConstraintParser(ctx) - return checkAttributeVersionMatch(ctx, parser, lVal, rVal) + return checkAttributeVersionMatch(parser, lVal, rVal) case structs.ConstraintSemver: if !(lFound && rFound) { @@ -1492,7 +1492,7 @@ func checkAttributeConstraint(ctx Context, operand string, lVal, rVal *psstructs } parser := newSemverConstraintParser(ctx) - return checkAttributeVersionMatch(ctx, parser, lVal, rVal) + return checkAttributeVersionMatch(parser, lVal, rVal) case structs.ConstraintRegex: if !(lFound && rFound) { @@ -1516,7 +1516,7 @@ func checkAttributeConstraint(ctx Context, operand string, lVal, rVal *psstructs return false } - return checkSetContainsAll(ctx, ls, rs) + return checkSetContainsAll(ls, rs) case structs.ConstraintSetContainsAny: if !(lFound && rFound) { return false @@ -1550,7 +1550,7 @@ type VerConstraints interface { // or semver). type verConstraintParser func(verConstraint string) VerConstraints -func newVersionConstraintParser(ctx Context) verConstraintParser { +func newVersionConstraintParser(ctx ConstraintContext) verConstraintParser { cache := ctx.VersionConstraintCache() return func(cstr string) VerConstraints { @@ -1568,7 +1568,7 @@ func newVersionConstraintParser(ctx Context) verConstraintParser { } } -func newSemverConstraintParser(ctx Context) verConstraintParser { +func newSemverConstraintParser(ctx ConstraintContext) verConstraintParser { cache := ctx.SemverConstraintCache() return func(cstr string) VerConstraints { diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index f552b70c9f3..4e887752989 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -1263,7 +1263,7 @@ func TestCheckVersionConstraint(t *testing.T) { for _, tc := range cases { _, ctx := testContext(t) p := newVersionConstraintParser(ctx) - if res := checkVersionMatch(ctx, p, tc.lVal, tc.rVal); res != tc.result { + if res := checkVersionMatch(p, tc.lVal, tc.rVal); res != tc.result { t.Fatalf("TC: %#v, Result: %v", tc, res) } } @@ -1345,7 +1345,7 @@ func TestCheckSemverConstraint(t *testing.T) { t.Run(tc.name, func(t *testing.T) { _, ctx := testContext(t) p := newSemverConstraintParser(ctx) - actual := checkVersionMatch(ctx, p, tc.lVal, tc.rVal) + actual := checkVersionMatch(p, tc.lVal, tc.rVal) must.Eq(t, tc.result, actual) }) } From 0a08ddb08352f7ffac2852c0a94fda680ff90781 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 21 Nov 2024 11:45:59 -0500 Subject: [PATCH 11/35] dynamic host volumes: update volume from node fingerprint (#24521) When dynamic host volumes are created, they're written to the state store in a "pending" state. Once the client fingerprints the volume it's eligible for scheduling, so we mark the state as ready at that point. Because the fingerprint could potentially be returned before the RPC handler has a chance to write to the state store, this changeset adds test coverage to verify that upserts of pending volumes check the node for a previously-fingerprinted volume as well. Ref: https://github.com/hashicorp/nomad/pull/24479 --- nomad/state/state_store.go | 3 + nomad/state/state_store_host_volumes.go | 34 +++++++- nomad/state/state_store_host_volumes_test.go | 89 ++++++++++++++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 545c3f3201b..65ce87813db 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -1048,6 +1048,9 @@ func upsertNodeTxn(txn *txn, index uint64, node *structs.Node) error { if err := upsertCSIPluginsForNode(txn, node, index); err != nil { return fmt.Errorf("csi plugin update failed: %v", err) } + if err := upsertHostVolumeForNode(txn, node, index); err != nil { + return fmt.Errorf("dynamic host volumes update failed: %v", err) + } return nil } diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go index 27013b05d90..dd5a68040f2 100644 --- a/nomad/state/state_store_host_volumes.go +++ b/nomad/state/state_store_host_volumes.go @@ -75,7 +75,7 @@ func (s *StateStore) UpsertHostVolumes(index uint64, volumes []*structs.HostVolu } // If the fingerprint is written from the node before the create RPC - // handler completes, we'll never update from the initial pending , so + // handler completes, we'll never update from the initial pending, so // reconcile that here node, err := s.NodeByID(nil, v.NodeID) if err != nil { @@ -190,3 +190,35 @@ func (s *StateStore) hostVolumesIter(ws memdb.WatchSet, index string, sort SortO ws.Add(iter.WatchCh()) return iter, nil } + +// upsertHostVolumeForNode sets newly fingerprinted host volumes to ready state +func upsertHostVolumeForNode(txn *txn, node *structs.Node, index uint64) error { + if len(node.HostVolumes) == 0 { + return nil + } + iter, err := txn.Get(TableHostVolumes, indexNodeID, node.ID) + if err != nil { + return err + } + for { + raw := iter.Next() + if raw == nil { + return nil + } + vol := raw.(*structs.HostVolume) + switch vol.State { + case structs.HostVolumeStateUnknown, structs.HostVolumeStatePending: + if _, ok := node.HostVolumes[vol.Name]; ok { + vol = vol.Copy() + vol.State = structs.HostVolumeStateReady + vol.ModifyIndex = index + err = txn.Insert(TableHostVolumes, vol) + if err != nil { + return fmt.Errorf("host volume insert: %w", err) + } + } + default: + // don't touch ready or soft-deleted volumes + } + } +} diff --git a/nomad/state/state_store_host_volumes_test.go b/nomad/state/state_store_host_volumes_test.go index 11b8371152e..af4c77a729b 100644 --- a/nomad/state/state_store_host_volumes_test.go +++ b/nomad/state/state_store_host_volumes_test.go @@ -9,6 +9,7 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/shoenig/test/must" @@ -163,3 +164,91 @@ func TestStateStore_HostVolumes_CRUD(t *testing.T) { got = consumeIter(iter) must.MapLen(t, 3, got, must.Sprint(`expected 3 volumes remain`)) } + +func TestStateStore_UpdateHostVolumesFromFingerprint(t *testing.T) { + ci.Parallel(t) + store := testStateStore(t) + index, err := store.LatestIndex() + must.NoError(t, err) + + node := mock.Node() + node.HostVolumes = map[string]*structs.ClientHostVolumeConfig{ + "static-vol": {Name: "static-vol", Path: "/srv/static"}, + "dhv-zero": {Name: "dhv-zero", Path: "/var/nomad/alloc_mounts" + uuid.Generate()}, + } + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, node, NodeUpsertWithNodePool)) + otherNode := mock.Node() + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, + index, otherNode, NodeUpsertWithNodePool)) + + ns := structs.DefaultNamespace + + vols := []*structs.HostVolume{ + mock.HostVolume(), + mock.HostVolume(), + mock.HostVolume(), + mock.HostVolume(), + } + + // a volume that's been fingerprinted before we can write it to state + vols[0].Name = "dhv-zero" + vols[0].NodeID = node.ID + + // a volume that will match the new fingerprint + vols[1].Name = "dhv-one" + vols[1].NodeID = node.ID + + // a volume that matches the new fingerprint but on the wrong node + vols[2].Name = "dhv-one" + vols[2].NodeID = otherNode.ID + + // a volume that won't be fingerprinted + vols[3].Name = "dhv-two" + vols[3].NodeID = node.ID + + index++ + oldIndex := index + must.NoError(t, store.UpsertHostVolumes(index, vols)) + + vol0, err := store.HostVolumeByID(nil, ns, vols[0].ID, false) + must.NoError(t, err) + must.Eq(t, structs.HostVolumeStateReady, vol0.State, + must.Sprint("previously-fingerprinted volume should be in ready state")) + + // update the fingerprint + + node = node.Copy() + node.HostVolumes["dhv-one"] = &structs.ClientHostVolumeConfig{ + Name: "dhv-one", + Path: "/var/nomad/alloc_mounts" + uuid.Generate(), + } + + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node)) + + vol0, err = store.HostVolumeByID(nil, ns, vols[0].ID, false) + must.NoError(t, err) + must.Eq(t, oldIndex, vol0.ModifyIndex, must.Sprint("expected no further update")) + must.Eq(t, structs.HostVolumeStateReady, vol0.State) + + vol1, err := store.HostVolumeByID(nil, ns, vols[1].ID, false) + must.NoError(t, err) + must.Eq(t, index, vol1.ModifyIndex, + must.Sprint("fingerprint should update pending volume")) + must.Eq(t, structs.HostVolumeStateReady, vol1.State) + + vol2, err := store.HostVolumeByID(nil, ns, vols[2].ID, false) + must.NoError(t, err) + must.Eq(t, oldIndex, vol2.ModifyIndex, + must.Sprint("volume on other node should not change")) + must.Eq(t, structs.HostVolumeStatePending, vol2.State) + + vol3, err := store.HostVolumeByID(nil, ns, vols[3].ID, false) + must.NoError(t, err) + must.Eq(t, oldIndex, vol3.ModifyIndex, + must.Sprint("volume not fingerprinted should not change")) + must.Eq(t, structs.HostVolumeStatePending, vol3.State) + +} From 2b04d47ac2e3f1d6911eddbb9fb9dfffab3f1eef Mon Sep 17 00:00:00 2001 From: Daniel Bennett Date: Fri, 22 Nov 2024 12:22:50 -0500 Subject: [PATCH 12/35] dynamic host volumes: test client RPC and plugins (#24535) also ensure that volume ID is uuid-shaped so user-provided input like `id = "../../../"` which is used as part of the target directory can not find its way very far into the volume submission process --- client/host_volume_endpoint_test.go | 94 ++++++++ .../hostvolumemanager/host_volume_plugin.go | 61 +++-- .../host_volume_plugin_test.go | 217 ++++++++++++++++++ .../test_fixtures/test_plugin.sh | 34 +++ .../test_fixtures/test_plugin_sad.sh | 7 + nomad/structs/host_volumes.go | 6 +- nomad/structs/host_volumes_test.go | 9 +- 7 files changed, 405 insertions(+), 23 deletions(-) create mode 100644 client/host_volume_endpoint_test.go create mode 100644 client/hostvolumemanager/host_volume_plugin_test.go create mode 100755 client/hostvolumemanager/test_fixtures/test_plugin.sh create mode 100755 client/hostvolumemanager/test_fixtures/test_plugin_sad.sh diff --git a/client/host_volume_endpoint_test.go b/client/host_volume_endpoint_test.go new file mode 100644 index 00000000000..c3a4ae83835 --- /dev/null +++ b/client/host_volume_endpoint_test.go @@ -0,0 +1,94 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package client + +import ( + "path/filepath" + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/hostvolumemanager" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/shoenig/test/must" +) + +func TestHostVolume(t *testing.T) { + ci.Parallel(t) + + client, cleanup := TestClient(t, nil) + defer cleanup() + + tmp := t.TempDir() + expectDir := filepath.Join(tmp, "test-vol-id") + hvm := hostvolumemanager.NewHostVolumeManager(tmp, testlog.HCLogger(t)) + client.hostVolumeManager = hvm + + t.Run("happy", func(t *testing.T) { + req := &cstructs.ClientHostVolumeCreateRequest{ + ID: "test-vol-id", + Name: "test-vol-name", + PluginID: "mkdir", // real plugin really makes a dir + } + var resp cstructs.ClientHostVolumeCreateResponse + err := client.ClientRPC("HostVolume.Create", req, &resp) + must.NoError(t, err) + must.Eq(t, cstructs.ClientHostVolumeCreateResponse{ + HostPath: expectDir, + CapacityBytes: 0, // "mkdir" always returns zero + }, resp) + // technically this is testing "mkdir" more than the RPC + must.DirExists(t, expectDir) + + delReq := &cstructs.ClientHostVolumeDeleteRequest{ + ID: "test-vol-id", + PluginID: "mkdir", + HostPath: expectDir, + } + var delResp cstructs.ClientHostVolumeDeleteResponse + err = client.ClientRPC("HostVolume.Delete", delReq, &delResp) + must.NoError(t, err) + must.NotNil(t, delResp) + // again, actually testing the "mkdir" plugin + must.DirNotExists(t, expectDir) + }) + + t.Run("missing plugin", func(t *testing.T) { + req := &cstructs.ClientHostVolumeCreateRequest{ + PluginID: "non-existent", + } + var resp cstructs.ClientHostVolumeCreateResponse + err := client.ClientRPC("HostVolume.Create", req, &resp) + must.EqError(t, err, `no such plugin "non-existent"`) + + delReq := &cstructs.ClientHostVolumeDeleteRequest{ + PluginID: "non-existent", + } + var delResp cstructs.ClientHostVolumeDeleteResponse + err = client.ClientRPC("HostVolume.Delete", delReq, &delResp) + must.EqError(t, err, `no such plugin "non-existent"`) + }) + + t.Run("error from plugin", func(t *testing.T) { + // "mkdir" plugin can't create a directory within a file + client.hostVolumeManager = hostvolumemanager.NewHostVolumeManager("host_volume_endpoint_test.go", testlog.HCLogger(t)) + + req := &cstructs.ClientHostVolumeCreateRequest{ + ID: "test-vol-id", + Name: "test-vol-name", + PluginID: "mkdir", + } + var resp cstructs.ClientHostVolumeCreateResponse + err := client.ClientRPC("HostVolume.Create", req, &resp) + must.ErrorContains(t, err, "host_volume_endpoint_test.go/test-vol-id: not a directory") + + delReq := &cstructs.ClientHostVolumeDeleteRequest{ + ID: "test-vol-id", + PluginID: "mkdir", + } + var delResp cstructs.ClientHostVolumeDeleteResponse + err = client.ClientRPC("HostVolume.Delete", delReq, &delResp) + must.ErrorContains(t, err, "host_volume_endpoint_test.go/test-vol-id: not a directory") + }) +} diff --git a/client/hostvolumemanager/host_volume_plugin.go b/client/hostvolumemanager/host_volume_plugin.go index 357eb2ef61e..e8297a32f80 100644 --- a/client/hostvolumemanager/host_volume_plugin.go +++ b/client/hostvolumemanager/host_volume_plugin.go @@ -12,15 +12,17 @@ import ( "os" "os/exec" "path/filepath" + "strings" "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-multierror" + "github.com/hashicorp/go-version" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper" ) type HostVolumePlugin interface { - Version(ctx context.Context) (string, error) + Version(ctx context.Context) (*version.Version, error) Create(ctx context.Context, req *cstructs.ClientHostVolumeCreateRequest) (*HostVolumePluginCreateResponse, error) Delete(ctx context.Context, req *cstructs.ClientHostVolumeDeleteRequest) error // db TODO(1.10.0): update? resize? ?? @@ -41,8 +43,8 @@ type HostVolumePluginMkdir struct { log hclog.Logger } -func (p *HostVolumePluginMkdir) Version(_ context.Context) (string, error) { - return "0.0.1", nil +func (p *HostVolumePluginMkdir) Version(_ context.Context) (*version.Version, error) { + return version.NewVersion("0.0.1") } func (p *HostVolumePluginMkdir) Create(_ context.Context, @@ -97,8 +99,23 @@ type HostVolumePluginExternal struct { log hclog.Logger } -func (p *HostVolumePluginExternal) Version(_ context.Context) (string, error) { - return "0.0.1", nil // db TODO(1.10.0): call the plugin, use in fingerprint +func (p *HostVolumePluginExternal) Version(ctx context.Context) (*version.Version, error) { + cmd := exec.CommandContext(ctx, p.Executable, "version") + cmd.Env = []string{"OPERATION=version"} + stdout, stderr, err := runCommand(cmd) + if err != nil { + p.log.Debug("error with plugin", + "operation", "version", + "stdout", string(stdout), + "stderr", string(stderr), + "error", err) + return nil, fmt.Errorf("error getting version from plugin %q: %w", p.ID, err) + } + v, err := version.NewVersion(strings.TrimSpace(string(stdout))) + if err != nil { + return nil, fmt.Errorf("error with version from plugin: %w", err) + } + return v, nil } func (p *HostVolumePluginExternal) Create(ctx context.Context, @@ -118,7 +135,7 @@ func (p *HostVolumePluginExternal) Create(ctx context.Context, stdout, _, err := p.runPlugin(ctx, "create", req.ID, envVars) if err != nil { - return nil, fmt.Errorf("error creating volume %q with plugin %q: %w", req.ID, req.PluginID, err) + return nil, fmt.Errorf("error creating volume %q with plugin %q: %w", req.ID, p.ID, err) } var pluginResp HostVolumePluginCreateResponse @@ -143,7 +160,7 @@ func (p *HostVolumePluginExternal) Delete(ctx context.Context, _, _, err = p.runPlugin(ctx, "delete", req.ID, envVars) if err != nil { - return fmt.Errorf("error deleting volume %q with plugin %q: %w", req.ID, req.PluginID, err) + return fmt.Errorf("error deleting volume %q with plugin %q: %w", req.ID, p.ID, err) } return nil } @@ -166,10 +183,23 @@ func (p *HostVolumePluginExternal) runPlugin(ctx context.Context, "HOST_PATH=" + path, }, env...) + stdout, stderr, err = runCommand(cmd) + + log = log.With( + "stdout", string(stdout), + "stderr", string(stderr), + ) + if err != nil { + log.Debug("error with plugin", "error", err) + return stdout, stderr, err + } + log.Debug("plugin ran successfully") + return stdout, stderr, nil +} + +func runCommand(cmd *exec.Cmd) (stdout, stderr []byte, err error) { var errBuf bytes.Buffer cmd.Stderr = io.Writer(&errBuf) - - // run the command and capture output mErr := &multierror.Error{} stdout, err = cmd.Output() if err != nil { @@ -179,16 +209,5 @@ func (p *HostVolumePluginExternal) runPlugin(ctx context.Context, if err != nil { mErr = multierror.Append(mErr, err) } - - log = log.With( - "stdout", string(stdout), - "stderr", string(stderr), - ) - if mErr.ErrorOrNil() != nil { - err = helper.FlattenMultierror(mErr) - log.Debug("error with plugin", "error", err) - return stdout, stderr, err - } - log.Debug("plugin ran successfully") - return stdout, stderr, nil + return stdout, stderr, helper.FlattenMultierror(mErr.ErrorOrNil()) } diff --git a/client/hostvolumemanager/host_volume_plugin_test.go b/client/hostvolumemanager/host_volume_plugin_test.go new file mode 100644 index 00000000000..954686d7443 --- /dev/null +++ b/client/hostvolumemanager/host_volume_plugin_test.go @@ -0,0 +1,217 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "bytes" + "context" + "io" + "path/filepath" + "runtime" + "testing" + "time" + + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-version" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/shoenig/test" + "github.com/shoenig/test/must" +) + +func TestHostVolumePluginMkdir(t *testing.T) { + volID := "test-vol-id" + tmp := t.TempDir() + target := filepath.Join(tmp, volID) + + plug := &HostVolumePluginMkdir{ + ID: "test-mkdir-plugin", + TargetPath: tmp, + log: testlog.HCLogger(t), + } + + // contexts don't matter here, since they're thrown away by this plugin, + // but sending timeout contexts anyway, in case the plugin changes later. + _, err := plug.Version(timeout(t)) + must.NoError(t, err) + + t.Run("happy", func(t *testing.T) { + resp, err := plug.Create(timeout(t), + &cstructs.ClientHostVolumeCreateRequest{ + ID: volID, // minimum required by this plugin + }) + must.NoError(t, err) + must.Eq(t, &HostVolumePluginCreateResponse{ + Path: target, + SizeBytes: 0, + Context: map[string]string{}, + }, resp) + must.DirExists(t, target) + + err = plug.Delete(timeout(t), + &cstructs.ClientHostVolumeDeleteRequest{ + ID: volID, + }) + must.NoError(t, err) + must.DirNotExists(t, target) + }) + + t.Run("sad", func(t *testing.T) { + // can't mkdir inside a file + plug.TargetPath = "host_volume_plugin_test.go" + + resp, err := plug.Create(timeout(t), + &cstructs.ClientHostVolumeCreateRequest{ + ID: volID, // minimum required by this plugin + }) + must.ErrorContains(t, err, "host_volume_plugin_test.go/test-vol-id: not a directory") + must.Nil(t, resp) + + err = plug.Delete(timeout(t), + &cstructs.ClientHostVolumeDeleteRequest{ + ID: volID, + }) + must.ErrorContains(t, err, "host_volume_plugin_test.go/test-vol-id: not a directory") + }) +} + +func TestHostVolumePluginExternal(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("skipped because windows") // db TODO(1.10.0) + } + + volID := "test-vol-id" + tmp := t.TempDir() + target := filepath.Join(tmp, volID) + + expectVersion, err := version.NewVersion("0.0.2") + must.NoError(t, err) + + t.Run("happy", func(t *testing.T) { + + log, getLogs := logRecorder(t) + plug := &HostVolumePluginExternal{ + ID: "test-external-plugin", + Executable: "./test_fixtures/test_plugin.sh", + TargetPath: tmp, + log: log, + } + + v, err := plug.Version(timeout(t)) + must.NoError(t, err) + must.Eq(t, expectVersion, v) + + resp, err := plug.Create(timeout(t), + &cstructs.ClientHostVolumeCreateRequest{ + ID: volID, + NodeID: "test-node", + RequestedCapacityMinBytes: 5, + RequestedCapacityMaxBytes: 10, + Parameters: map[string]string{"key": "val"}, + }) + must.NoError(t, err) + + must.Eq(t, &HostVolumePluginCreateResponse{ + Path: target, + SizeBytes: 5, + Context: map[string]string{"key": "val"}, + }, resp) + must.DirExists(t, target) + logged := getLogs() + must.StrContains(t, logged, "OPERATION=create") // stderr from `env` + must.StrContains(t, logged, `stdout="{`) // stdout from printf + + // reset logger for next call + log, getLogs = logRecorder(t) + plug.log = log + + err = plug.Delete(timeout(t), + &cstructs.ClientHostVolumeDeleteRequest{ + ID: volID, + NodeID: "test-node", + Parameters: map[string]string{"key": "val"}, + }) + must.NoError(t, err) + must.DirNotExists(t, target) + logged = getLogs() + must.StrContains(t, logged, "OPERATION=delete") // stderr from `env` + must.StrContains(t, logged, "removed directory") // stdout from `rm -v` + }) + + t.Run("sad", func(t *testing.T) { + + log, getLogs := logRecorder(t) + plug := &HostVolumePluginExternal{ + ID: "test-external-plugin-sad", + Executable: "./test_fixtures/test_plugin_sad.sh", + TargetPath: tmp, + log: log, + } + + v, err := plug.Version(timeout(t)) + must.EqError(t, err, `error getting version from plugin "test-external-plugin-sad": exit status 1`) + must.Nil(t, v) + logged := getLogs() + must.StrContains(t, logged, "version: sad plugin is sad") + must.StrContains(t, logged, "version: it tells you all about it in stderr") + + // reset logger + log, getLogs = logRecorder(t) + plug.log = log + + resp, err := plug.Create(timeout(t), + &cstructs.ClientHostVolumeCreateRequest{ + ID: volID, + NodeID: "test-node", + RequestedCapacityMinBytes: 5, + RequestedCapacityMaxBytes: 10, + Parameters: map[string]string{"key": "val"}, + }) + must.EqError(t, err, `error creating volume "test-vol-id" with plugin "test-external-plugin-sad": exit status 1`) + must.Nil(t, resp) + logged = getLogs() + must.StrContains(t, logged, "create: sad plugin is sad") + must.StrContains(t, logged, "create: it tells you all about it in stderr") + + log, getLogs = logRecorder(t) + plug.log = log + + err = plug.Delete(timeout(t), + &cstructs.ClientHostVolumeDeleteRequest{ + ID: volID, + NodeID: "test-node", + Parameters: map[string]string{"key": "val"}, + }) + must.EqError(t, err, `error deleting volume "test-vol-id" with plugin "test-external-plugin-sad": exit status 1`) + logged = getLogs() + must.StrContains(t, logged, "delete: sad plugin is sad") + must.StrContains(t, logged, "delete: it tells you all about it in stderr") + }) +} + +// timeout provides a context that times out in 1 second +func timeout(t *testing.T) context.Context { + t.Helper() + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + t.Cleanup(cancel) + return ctx +} + +// logRecorder is here so we can assert that stdout/stderr appear in logs +func logRecorder(t *testing.T) (hclog.Logger, func() string) { + t.Helper() + buf := &bytes.Buffer{} + logger := hclog.New(&hclog.LoggerOptions{ + Name: "log-recorder", + Output: buf, + Level: hclog.Debug, + IncludeLocation: true, + DisableTime: true, + }) + return logger, func() string { + bts, err := io.ReadAll(buf) + test.NoError(t, err) + return string(bts) + } +} diff --git a/client/hostvolumemanager/test_fixtures/test_plugin.sh b/client/hostvolumemanager/test_fixtures/test_plugin.sh new file mode 100755 index 00000000000..b60229fd34d --- /dev/null +++ b/client/hostvolumemanager/test_fixtures/test_plugin.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +# plugin for host_volume_plugin_test.go +set -xeuo pipefail + +env 1>&2 + +test "$1" == "$OPERATION" + +echo 'all operations should ignore stderr' 1>&2 + +case $1 in + create) + test "$2" == "$HOST_PATH" + test "$NODE_ID" == 'test-node' + test "$PARAMETERS" == '{"key":"val"}' + test "$CAPACITY_MIN_BYTES" -eq 5 + test "$CAPACITY_MAX_BYTES" -eq 10 + mkdir "$2" + printf '{"path": "%s", "bytes": 5, "context": %s}' "$2" "$PARAMETERS" + ;; + delete) + test "$2" == "$HOST_PATH" + test "$NODE_ID" == 'test-node' + test "$PARAMETERS" == '{"key":"val"}' + rm -rfv "$2" ;; + version) + echo '0.0.2' ;; + *) + echo "unknown operation $1" + exit 1 ;; +esac diff --git a/client/hostvolumemanager/test_fixtures/test_plugin_sad.sh b/client/hostvolumemanager/test_fixtures/test_plugin_sad.sh new file mode 100755 index 00000000000..6f883297a98 --- /dev/null +++ b/client/hostvolumemanager/test_fixtures/test_plugin_sad.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +echo "$1: sad plugin is sad" +echo "$1: it tells you all about it in stderr" 1>&2 +exit 1 diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go index 21e827c3a9d..11745526aa5 100644 --- a/nomad/structs/host_volumes.go +++ b/nomad/structs/host_volumes.go @@ -134,6 +134,10 @@ func (hv *HostVolume) Validate() error { var mErr *multierror.Error + if hv.ID != "" && !helper.IsUUID(hv.ID) { + mErr = multierror.Append(mErr, errors.New("invalid ID")) + } + if hv.Name == "" { mErr = multierror.Append(mErr, errors.New("missing name")) } @@ -167,7 +171,7 @@ func (hv *HostVolume) Validate() error { } } - return mErr.ErrorOrNil() + return helper.FlattenMultierror(mErr.ErrorOrNil()) } // ValidateUpdate verifies that an update to a volume is safe to make. diff --git a/nomad/structs/host_volumes_test.go b/nomad/structs/host_volumes_test.go index da25ad3cbc4..499bc27d1c8 100644 --- a/nomad/structs/host_volumes_test.go +++ b/nomad/structs/host_volumes_test.go @@ -66,7 +66,13 @@ func TestHostVolume_Validate(t *testing.T) { `) + invalid = &HostVolume{Name: "example"} + err = invalid.Validate() + // single error should be flattened + must.EqError(t, err, "must include at least one capability block") + invalid = &HostVolume{ + ID: "../../not-a-uuid", Name: "example", PluginID: "example-plugin", Constraints: []*Constraint{{ @@ -87,7 +93,8 @@ func TestHostVolume_Validate(t *testing.T) { }, } err = invalid.Validate() - must.EqError(t, err, `3 errors occurred: + must.EqError(t, err, `4 errors occurred: + * invalid ID * capacity_max (100000) must be larger than capacity_min (200000) * invalid attachment mode: "bad" * invalid constraint: 1 error occurred: From 298460dcd96e027a37cbbbe7734ff23e58d7e3ec Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 22 Nov 2024 12:53:31 -0500 Subject: [PATCH 13/35] dynamic host volumes: monitor readiness from CLI (#24528) When creating a dynamic host volumes, set up an optional monitor that waits for the node to fingerprint the volume as healthy. Ref: https://github.com/hashicorp/nomad/pull/24479 --- command/volume_create.go | 22 +++- command/volume_create_host.go | 151 ++++++++++++++++++++++++++-- command/volume_create_host_test.go | 2 +- command/volume_status_host.go | 64 +++++++----- command/volume_status_host_test.go | 2 +- demo/hostvolume/example-host-volume | 2 +- 6 files changed, 209 insertions(+), 34 deletions(-) diff --git a/command/volume_create.go b/command/volume_create.go index 258e37b1f47..f566cfa8f2d 100644 --- a/command/volume_create.go +++ b/command/volume_create.go @@ -31,7 +31,22 @@ Usage: nomad volume create [options] General Options: - ` + generalOptionsUsage(usageOptsDefault) + ` + generalOptionsUsage(usageOptsDefault) + ` + +Create Options: + + -detach + Return immediately instead of entering monitor mode for dynamic host + volumes. After creating a volume, the volume ID will be printed to the + screen, which can be used to examine the volume using the volume status + command. If -detach is omitted or false, the command will monitor the state + of the volume until it is ready to be scheduled. + + -verbose + Display full information when monitoring volume state. Used for dynamic host + volumes only. + +` return strings.TrimSpace(helpText) } @@ -51,7 +66,10 @@ func (c *VolumeCreateCommand) Synopsis() string { func (c *VolumeCreateCommand) Name() string { return "volume create" } func (c *VolumeCreateCommand) Run(args []string) int { + var detach, verbose bool flags := c.Meta.FlagSet(c.Name(), FlagSetClient) + flags.BoolVar(&detach, "detach", false, "detach from monitor") + flags.BoolVar(&verbose, "verbose", false, "display full volume IDs") flags.Usage = func() { c.Ui.Output(c.Help()) } if err := flags.Parse(args); err != nil { @@ -102,7 +120,7 @@ func (c *VolumeCreateCommand) Run(args []string) int { case "csi": return c.csiCreate(client, ast) case "host": - return c.hostVolumeCreate(client, ast) + return c.hostVolumeCreate(client, ast, detach, verbose) default: c.Ui.Error(fmt.Sprintf("Error unknown volume type: %s", volType)) return 1 diff --git a/command/volume_create_host.go b/command/volume_create_host.go index 32205610740..8947244aba0 100644 --- a/command/volume_create_host.go +++ b/command/volume_create_host.go @@ -4,17 +4,23 @@ package command import ( + "context" "fmt" "strconv" + "time" "github.com/hashicorp/hcl" "github.com/hashicorp/hcl/hcl/ast" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/helper" + "github.com/mitchellh/go-glint" + "github.com/mitchellh/go-glint/components" "github.com/mitchellh/mapstructure" ) -func (c *VolumeCreateCommand) hostVolumeCreate(client *api.Client, ast *ast.File) int { +func (c *VolumeCreateCommand) hostVolumeCreate( + client *api.Client, ast *ast.File, detach, verbose bool) int { + vol, err := decodeHostVolume(ast) if err != nil { c.Ui.Error(fmt.Sprintf("Error decoding the volume definition: %s", err)) @@ -29,17 +35,150 @@ func (c *VolumeCreateCommand) hostVolumeCreate(client *api.Client, ast *ast.File c.Ui.Error(fmt.Sprintf("Error creating volume: %s", err)) return 1 } + + var volID string + var lastIndex uint64 + + // note: the command only ever returns 1 volume from the API for _, vol := range vols { - // note: the command only ever returns 1 volume from the API - c.Ui.Output(fmt.Sprintf( - "Created host volume %s with ID %s", vol.Name, vol.ID)) + if detach || vol.State == api.HostVolumeStateReady { + c.Ui.Output(fmt.Sprintf( + "Created host volume %s with ID %s", vol.Name, vol.ID)) + return 0 + } else { + c.Ui.Output(fmt.Sprintf( + "==> Created host volume %s with ID %s", vol.Name, vol.ID)) + volID = vol.ID + lastIndex = vol.ModifyIndex + break + } } - // TODO(1.10.0): monitor so we can report when the node has fingerprinted - + err = c.monitorHostVolume(client, volID, lastIndex, verbose) + if err != nil { + c.Ui.Error(fmt.Sprintf("==> %s: %v", formatTime(time.Now()), err.Error())) + return 1 + } return 0 } +func (c *VolumeCreateCommand) monitorHostVolume(client *api.Client, id string, lastIndex uint64, verbose bool) error { + length := shortId + if verbose { + length = fullId + } + + opts := formatOpts{ + verbose: verbose, + short: !verbose, + length: length, + } + + if isStdoutTerminal() { + return c.ttyMonitor(client, id, lastIndex, opts) + } else { + return c.nottyMonitor(client, id, lastIndex, opts) + } +} + +func (c *VolumeCreateCommand) ttyMonitor(client *api.Client, id string, lastIndex uint64, opts formatOpts) error { + + gUi := glint.New() + spinner := glint.Layout( + components.Spinner(), + glint.Text(fmt.Sprintf(" Monitoring volume %q in progress...", limit(id, opts.length))), + ).Row().MarginLeft(2) + refreshRate := 100 * time.Millisecond + + gUi.SetRefreshRate(refreshRate) + gUi.Set(spinner) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + go gUi.Render(ctx) + + qOpts := &api.QueryOptions{ + AllowStale: true, + WaitIndex: lastIndex, + WaitTime: time.Second * 5, + } + + var statusComponent *glint.LayoutComponent + var endSpinner *glint.LayoutComponent + +DONE: + for { + vol, meta, err := client.HostVolumes().Get(id, qOpts) + if err != nil { + return err + } + str, err := formatHostVolume(vol, opts) + if err != nil { + // should never happen b/c we don't pass json/template via opts here + return err + } + statusComponent = glint.Layout( + glint.Text(""), + glint.Text(formatTime(time.Now())), + glint.Text(c.Colorize().Color(str)), + ).MarginLeft(4) + + statusComponent = glint.Layout(statusComponent) + gUi.Set(spinner, statusComponent) + + endSpinner = glint.Layout( + components.Spinner(), + glint.Text(fmt.Sprintf(" Host volume %q %s", limit(id, opts.length), vol.State)), + ).Row().MarginLeft(2) + + switch vol.State { + case api.HostVolumeStateReady: + endSpinner = glint.Layout( + glint.Text(fmt.Sprintf("✓ Host volume %q %s", limit(id, opts.length), vol.State)), + ).Row().MarginLeft(2) + break DONE + + case api.HostVolumeStateDeleted: + endSpinner = glint.Layout( + glint.Text(fmt.Sprintf("! Host volume %q %s", limit(id, opts.length), vol.State)), + ).Row().MarginLeft(2) + break DONE + + default: + qOpts.WaitIndex = meta.LastIndex + continue + } + + } + + // Render one final time with completion message + gUi.Set(endSpinner, statusComponent, glint.Text("")) + gUi.RenderFrame() + return nil +} + +func (c *VolumeCreateCommand) nottyMonitor(client *api.Client, id string, lastIndex uint64, opts formatOpts) error { + + c.Ui.Info(fmt.Sprintf("==> %s: Monitoring volume %q...", + formatTime(time.Now()), limit(id, opts.length))) + + for { + vol, _, err := client.HostVolumes().Get(id, &api.QueryOptions{ + WaitIndex: lastIndex, + WaitTime: time.Second * 5, + }) + if err != nil { + return err + } + if vol.State == api.HostVolumeStateReady { + c.Ui.Info(fmt.Sprintf("==> %s: Volume %q ready", + formatTime(time.Now()), limit(vol.Name, opts.length))) + return nil + } + } +} + func decodeHostVolume(input *ast.File) (*api.HostVolume, error) { var err error vol := &api.HostVolume{} diff --git a/command/volume_create_host_test.go b/command/volume_create_host_test.go index ce713b20d5d..4ef92dc02a8 100644 --- a/command/volume_create_host_test.go +++ b/command/volume_create_host_test.go @@ -66,7 +66,7 @@ parameters { _, err = file.WriteString(hclTestFile) must.NoError(t, err) - args := []string{"-address", url, file.Name()} + args := []string{"-address", url, "-detach", file.Name()} code := cmd.Run(args) must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) diff --git a/command/volume_status_host.go b/command/volume_status_host.go index 7878afc55ce..ebe035ddb87 100644 --- a/command/volume_status_host.go +++ b/command/volume_status_host.go @@ -22,6 +22,14 @@ func (c *VolumeStatusCommand) hostVolumeStatus(client *api.Client, id, nodeID, n return 1 } + opts := formatOpts{ + verbose: c.verbose, + short: c.short, + length: c.length, + json: c.json, + template: c.template, + } + // get a host volume that matches the given prefix or a list of all matches // if an exact match is not found. note we can't use the shared getByPrefix // helper here because the List API doesn't match the required signature @@ -32,7 +40,7 @@ func (c *VolumeStatusCommand) hostVolumeStatus(client *api.Client, id, nodeID, n return 1 } if len(possible) > 0 { - out, err := c.formatHostVolumes(possible) + out, err := formatHostVolumes(possible, opts) if err != nil { c.Ui.Error(fmt.Sprintf("Error formatting: %s", err)) return 1 @@ -47,12 +55,12 @@ func (c *VolumeStatusCommand) hostVolumeStatus(client *api.Client, id, nodeID, n return 1 } - str, err := c.formatHostVolume(vol) + str, err := formatHostVolume(vol, opts) if err != nil { c.Ui.Error(fmt.Sprintf("Error formatting volume: %s", err)) return 1 } - c.Ui.Output(str) + c.Ui.Output(c.Colorize().Color(str)) return 0 } @@ -66,13 +74,20 @@ func (c *VolumeStatusCommand) listHostVolumes(client *api.Client, nodeID, nodePo return 1 } - str, err := c.formatHostVolumes(vols) + opts := formatOpts{ + verbose: c.verbose, + short: c.short, + length: c.length, + json: c.json, + template: c.template, + } + + str, err := formatHostVolumes(vols, opts) if err != nil { c.Ui.Error(fmt.Sprintf("Error formatting volumes: %s", err)) return 1 } - c.Ui.Output(str) - + c.Ui.Output(c.Colorize().Color(str)) return 0 } @@ -108,9 +123,9 @@ func (c *VolumeStatusCommand) getByPrefix(client *api.Client, prefix string) (*a } } -func (c *VolumeStatusCommand) formatHostVolume(vol *api.HostVolume) (string, error) { - if c.json || len(c.template) > 0 { - out, err := Format(c.json, c.template, vol) +func formatHostVolume(vol *api.HostVolume, opts formatOpts) (string, error) { + if opts.json || len(opts.template) > 0 { + out, err := Format(opts.json, opts.template, vol) if err != nil { return "", fmt.Errorf("format error: %v", err) } @@ -130,48 +145,51 @@ func (c *VolumeStatusCommand) formatHostVolume(vol *api.HostVolume) (string, err } // Exit early - if c.short { + if opts.short { return formatKV(output), nil } full := []string{formatKV(output)} // Format the allocs - banner := c.Colorize().Color("\n[bold]Allocations[reset]") - allocs := formatAllocListStubs(vol.Allocations, c.verbose, c.length) + banner := "\n[bold]Allocations[reset]" + allocs := formatAllocListStubs(vol.Allocations, opts.verbose, opts.length) full = append(full, banner) full = append(full, allocs) return strings.Join(full, "\n"), nil } -func (c *VolumeStatusCommand) formatHostVolumes(vols []*api.HostVolumeStub) (string, error) { +// TODO: we could make a bunch more formatters into shared functions using this +type formatOpts struct { + verbose bool + short bool + length int + json bool + template string +} + +func formatHostVolumes(vols []*api.HostVolumeStub, opts formatOpts) (string, error) { // Sort the output by volume ID sort.Slice(vols, func(i, j int) bool { return vols[i].ID < vols[j].ID }) - if c.json || len(c.template) > 0 { - out, err := Format(c.json, c.template, vols) + if opts.json || len(opts.template) > 0 { + out, err := Format(opts.json, opts.template, vols) if err != nil { return "", fmt.Errorf("format error: %v", err) } return out, nil } - // Truncate the id unless full length is requested - length := shortId - if c.verbose { - length = fullId - } - rows := make([]string, len(vols)+1) rows[0] = "ID|Name|Namespace|Plugin ID|Node ID|Node Pool|State" for i, v := range vols { rows[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s|%s", - limit(v.ID, length), + limit(v.ID, opts.length), v.Name, v.Namespace, v.PluginID, - limit(v.NodeID, length), + limit(v.NodeID, opts.length), v.NodePool, v.State, ) diff --git a/command/volume_status_host_test.go b/command/volume_status_host_test.go index f150b1f9985..0ec8d930550 100644 --- a/command/volume_status_host_test.go +++ b/command/volume_status_host_test.go @@ -74,7 +74,7 @@ capability { _, err = file.WriteString(hclTestFile) must.NoError(t, err) - args := []string{"-address", url, file.Name()} + args := []string{"-address", url, "-detach", file.Name()} cmd := &VolumeCreateCommand{Meta: Meta{Ui: ui}} code := cmd.Run(args) must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) diff --git a/demo/hostvolume/example-host-volume b/demo/hostvolume/example-host-volume index ae0f7711326..0d726cd3c01 100755 --- a/demo/hostvolume/example-host-volume +++ b/demo/hostvolume/example-host-volume @@ -94,7 +94,7 @@ case "$1" in create_volume "$host_path" "$CAPACITY_MIN_BYTES" # output what Nomad expects bytes="$(stat --format='%s' "$host_path.ext4")" - printf '{"path": "%s", "bytes": %s}' "$host_path", "$bytes" + printf '{"path": "%s", "bytes": %s}' "$host_path" "$bytes" ;; "delete") delete_volume "$host_path" ;; From 926925ba1624142b3c59320579129caa51badf6e Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 22 Nov 2024 13:23:19 -0500 Subject: [PATCH 14/35] dynamic host volumes: search endpoint (#24531) Add support for dynamic host volumes to the search endpoint. Like many other objects with UUID identifiers, we're not supporting fuzzy search here, just prefix search on the fuzzy search endpoint. Because the search endpoint only returns IDs, we need to seperate CSI volumes and host volumes for it to be useful. The new context is called `"host_volumes"` to disambiguate it from `"volumes"`. In future versions of Nomad we should consider deprecating the `"volumes"` context in lieu of a `"csi_volumes"` context. Ref: https://github.com/hashicorp/nomad/pull/24479 --- api/contexts/contexts.go | 1 + nomad/search_endpoint.go | 19 +++- nomad/search_endpoint_test.go | 93 ++++++++++++++++++++ nomad/state/state_store_host_volumes.go | 25 ++++++ nomad/state/state_store_host_volumes_test.go | 11 +++ nomad/structs/search.go | 1 + 6 files changed, 148 insertions(+), 2 deletions(-) diff --git a/api/contexts/contexts.go b/api/contexts/contexts.go index 5176f5b8290..20f099a38e7 100644 --- a/api/contexts/contexts.go +++ b/api/contexts/contexts.go @@ -23,6 +23,7 @@ const ( Plugins Context = "plugins" Variables Context = "vars" Volumes Context = "volumes" + HostVolumes Context = "host_volumes" // These Context types are used to associate a search result from a lower // level Nomad object with one of the higher level Context types above. diff --git a/nomad/search_endpoint.go b/nomad/search_endpoint.go index 4a66e939238..b6743c42369 100644 --- a/nomad/search_endpoint.go +++ b/nomad/search_endpoint.go @@ -41,6 +41,7 @@ var ( structs.ScalingPolicies, structs.Variables, structs.Namespaces, + structs.HostVolumes, } ) @@ -84,6 +85,8 @@ func (s *Search) getPrefixMatches(iter memdb.ResultIterator, prefix string) ([]s id = t.ID case *structs.CSIVolume: id = t.ID + case *structs.HostVolume: + id = t.ID case *structs.ScalingPolicy: id = t.ID case *structs.Namespace: @@ -405,6 +408,8 @@ func getResourceIter(context structs.Context, aclObj *acl.ACL, namespace, prefix return store.ScalingPoliciesByIDPrefix(ws, namespace, prefix) case structs.Volumes: return store.CSIVolumesByIDPrefix(ws, namespace, prefix) + case structs.HostVolumes: + return store.HostVolumesByIDPrefix(ws, namespace, prefix, state.SortDefault) case structs.Namespaces: iter, err := store.NamespacesByNamePrefix(ws, prefix) if err != nil { @@ -684,6 +689,8 @@ func sufficientSearchPerms(aclObj *acl.ACL, namespace string, context structs.Co acl.NamespaceCapabilityCSIReadVolume, acl.NamespaceCapabilityListJobs, acl.NamespaceCapabilityReadJob)(aclObj, namespace) + case structs.HostVolumes: + return acl.NamespaceValidator(acl.NamespaceCapabilityHostVolumeRead)(aclObj, namespace) case structs.Variables: return aclObj.AllowVariableSearch(namespace) case structs.Plugins: @@ -774,7 +781,8 @@ func (s *Search) FuzzySearch(args *structs.FuzzySearchRequest, reply *structs.Fu for _, ctx := range prefixContexts { switch ctx { // only apply on the types that use UUID prefix searching - case structs.Evals, structs.Deployments, structs.ScalingPolicies, structs.Volumes, structs.Quotas, structs.Recommendations: + case structs.Evals, structs.Deployments, structs.ScalingPolicies, + structs.Volumes, structs.HostVolumes, structs.Quotas, structs.Recommendations: iter, err := getResourceIter(ctx, aclObj, namespace, roundUUIDDownIfOdd(args.Prefix, args.Context), ws, state) if err != nil { if !s.silenceError(err) { @@ -790,7 +798,9 @@ func (s *Search) FuzzySearch(args *structs.FuzzySearchRequest, reply *structs.Fu for _, ctx := range fuzzyContexts { switch ctx { // skip the types that use UUID prefix searching - case structs.Evals, structs.Deployments, structs.ScalingPolicies, structs.Volumes, structs.Quotas, structs.Recommendations: + case structs.Evals, structs.Deployments, structs.ScalingPolicies, + structs.Volumes, structs.HostVolumes, structs.Quotas, + structs.Recommendations: continue default: iter, err := getFuzzyResourceIterator(ctx, aclObj, namespace, ws, state) @@ -927,6 +937,11 @@ func filteredSearchContexts(aclObj *acl.ACL, namespace string, context structs.C if volRead { available = append(available, c) } + case structs.HostVolumes: + if acl.NamespaceValidator( + acl.NamespaceCapabilityHostVolumeRead)(aclObj, namespace) { + available = append(available, c) + } case structs.Plugins: if aclObj.AllowPluginList() { available = append(available, c) diff --git a/nomad/search_endpoint_test.go b/nomad/search_endpoint_test.go index e06688ac927..ae9e10e33ff 100644 --- a/nomad/search_endpoint_test.go +++ b/nomad/search_endpoint_test.go @@ -1039,6 +1039,53 @@ func TestSearch_PrefixSearch_CSIVolume(t *testing.T) { require.False(t, resp.Truncations[structs.Volumes]) } +func TestSearch_PrefixSearch_HostVolume(t *testing.T) { + ci.Parallel(t) + + srv, cleanup := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + defer cleanup() + codec := rpcClient(t, srv) + testutil.WaitForLeader(t, srv.RPC) + + store := srv.fsm.State() + index, _ := store.LatestIndex() + + node := mock.Node() + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node)) + + id := uuid.Generate() + index++ + err := store.UpsertHostVolumes(index, []*structs.HostVolume{{ + ID: id, + Name: "example", + Namespace: structs.DefaultNamespace, + PluginID: "glade", + NodeID: node.ID, + NodePool: node.NodePool, + }}) + must.NoError(t, err) + + req := &structs.SearchRequest{ + Prefix: id[:6], + Context: structs.HostVolumes, + QueryOptions: structs.QueryOptions{ + Region: "global", + Namespace: structs.DefaultNamespace, + }, + } + + var resp structs.SearchResponse + must.NoError(t, msgpackrpc.CallWithCodec(codec, "Search.PrefixSearch", req, &resp)) + + must.Len(t, 1, resp.Matches[structs.HostVolumes]) + must.Len(t, 0, resp.Matches[structs.Volumes]) + must.Eq(t, id, resp.Matches[structs.HostVolumes][0]) + must.False(t, resp.Truncations[structs.HostVolumes]) +} + func TestSearch_PrefixSearch_Namespace(t *testing.T) { ci.Parallel(t) @@ -1932,6 +1979,52 @@ func TestSearch_FuzzySearch_CSIVolume(t *testing.T) { require.False(t, resp.Truncations[structs.Volumes]) } +func TestSearch_FuzzySearch_HostVolume(t *testing.T) { + ci.Parallel(t) + + srv, cleanup := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + defer cleanup() + codec := rpcClient(t, srv) + testutil.WaitForLeader(t, srv.RPC) + + store := srv.fsm.State() + index, _ := store.LatestIndex() + + node := mock.Node() + index++ + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, node)) + + id := uuid.Generate() + index++ + err := store.UpsertHostVolumes(index, []*structs.HostVolume{{ + ID: id, + Name: "example", + Namespace: structs.DefaultNamespace, + PluginID: "glade", + NodeID: node.ID, + NodePool: node.NodePool, + }}) + must.NoError(t, err) + + req := &structs.FuzzySearchRequest{ + Text: id[0:3], // volumes are prefix searched + Context: structs.HostVolumes, + QueryOptions: structs.QueryOptions{ + Region: "global", + Namespace: structs.DefaultNamespace, + }, + } + + var resp structs.FuzzySearchResponse + must.NoError(t, msgpackrpc.CallWithCodec(codec, "Search.FuzzySearch", req, &resp)) + + must.Len(t, 1, resp.Matches[structs.HostVolumes]) + must.Eq(t, id, resp.Matches[structs.HostVolumes][0].ID) + must.False(t, resp.Truncations[structs.HostVolumes]) +} + func TestSearch_FuzzySearch_Namespace(t *testing.T) { ci.Parallel(t) diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go index dd5a68040f2..522d1d19468 100644 --- a/nomad/state/state_store_host_volumes.go +++ b/nomad/state/state_store_host_volumes.go @@ -5,6 +5,7 @@ package state import ( "fmt" + "strings" memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/nomad/structs" @@ -156,6 +157,30 @@ func (s *StateStore) HostVolumes(ws memdb.WatchSet, sort SortOption) (memdb.Resu return s.hostVolumesIter(ws, indexID, sort) } +// HostVolumesByIDPrefix retrieves all host volumes by ID prefix. Because the ID +// index is namespaced, we need to handle the wildcard namespace here as well. +func (s *StateStore) HostVolumesByIDPrefix(ws memdb.WatchSet, ns, prefix string, sort SortOption) (memdb.ResultIterator, error) { + + if ns != structs.AllNamespacesSentinel { + return s.hostVolumesIter(ws, "id_prefix", sort, ns, prefix) + } + + // for wildcard namespace, wrap the iterator in a filter function that + // filters all volumes by prefix + iter, err := s.hostVolumesIter(ws, indexID, sort) + if err != nil { + return nil, err + } + wrappedIter := memdb.NewFilterIterator(iter, func(raw any) bool { + vol, ok := raw.(*structs.HostVolume) + if !ok { + return true + } + return !strings.HasPrefix(vol.ID, prefix) + }) + return wrappedIter, nil +} + // HostVolumesByName retrieves all host volumes of the same name func (s *StateStore) HostVolumesByName(ws memdb.WatchSet, ns, name string, sort SortOption) (memdb.ResultIterator, error) { return s.hostVolumesIter(ws, "name_prefix", sort, ns, name) diff --git a/nomad/state/state_store_host_volumes_test.go b/nomad/state/state_store_host_volumes_test.go index af4c77a729b..358591160c6 100644 --- a/nomad/state/state_store_host_volumes_test.go +++ b/nomad/state/state_store_host_volumes_test.go @@ -163,6 +163,17 @@ func TestStateStore_HostVolumes_CRUD(t *testing.T) { must.NoError(t, err) got = consumeIter(iter) must.MapLen(t, 3, got, must.Sprint(`expected 3 volumes remain`)) + + prefix := vol.ID[:30] // sufficiently long prefix to avoid flakes + iter, err = store.HostVolumesByIDPrefix(nil, "*", prefix, SortDefault) + must.NoError(t, err) + got = consumeIter(iter) + must.MapLen(t, 1, got, must.Sprint(`expected only one volume to match prefix`)) + + iter, err = store.HostVolumesByIDPrefix(nil, vol.Namespace, prefix, SortDefault) + must.NoError(t, err) + got = consumeIter(iter) + must.MapLen(t, 1, got, must.Sprint(`expected only one volume to match prefix`)) } func TestStateStore_UpdateHostVolumesFromFingerprint(t *testing.T) { diff --git a/nomad/structs/search.go b/nomad/structs/search.go index b71798c2194..53aebc01e2a 100644 --- a/nomad/structs/search.go +++ b/nomad/structs/search.go @@ -22,6 +22,7 @@ const ( Plugins Context = "plugins" Variables Context = "vars" Volumes Context = "volumes" + HostVolumes Context = "host_volumes" // Subtypes used in fuzzy matching. Groups Context = "groups" From d1352b285d24c15798b5c52c44cb05a3e7512537 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 2 Dec 2024 09:11:05 -0500 Subject: [PATCH 15/35] dynamic host volumes: Enterprise stubs and refactor API (#24545) Most Nomad upsert RPCs accept a single object with the notable exception of CSI. But in CSI we don't actually expose this to users except through the Go API. It deeply complicates how we present errors to users, especially once Sentinel policy enforcement enters the mix. Refactor the `HostVolume.Create` and `HostVolume.Register` RPCs to take a single volume instead of a slice of volumes. Add a stub function for Enterprise policy enforcement. This requires splitting out placement from the `createVolume` function so that we can ensure we've completed placement before trying to enforce policy. Ref: https://github.com/hashicorp/nomad/pull/24479 --- api/host_volumes.go | 22 +- command/agent/host_volume_endpoint_test.go | 20 +- command/volume_create_host.go | 26 +- command/volume_register_host.go | 11 +- nomad/fsm.go | 2 +- nomad/host_volume_endpoint.go | 258 ++++++++++--------- nomad/host_volume_endpoint_ce.go | 23 ++ nomad/host_volume_endpoint_test.go | 189 +++++++------- nomad/search_endpoint_test.go | 8 +- nomad/state/state_store_host_volumes.go | 82 +++--- nomad/state/state_store_host_volumes_test.go | 14 +- nomad/structs/host_volumes.go | 24 +- 12 files changed, 364 insertions(+), 315 deletions(-) create mode 100644 nomad/host_volume_endpoint_ce.go diff --git a/api/host_volumes.go b/api/host_volumes.go index dae11afc68a..985695fa706 100644 --- a/api/host_volumes.go +++ b/api/host_volumes.go @@ -147,11 +147,11 @@ func (c *Client) HostVolumes() *HostVolumes { } type HostVolumeCreateRequest struct { - Volumes []*HostVolume + Volume *HostVolume } type HostVolumeRegisterRequest struct { - Volumes []*HostVolume + Volume *HostVolume } type HostVolumeListRequest struct { @@ -163,30 +163,30 @@ type HostVolumeDeleteRequest struct { VolumeIDs []string } -// Create forwards to client agents so host volumes can be created on those -// hosts, and registers the volumes with Nomad servers. -func (hv *HostVolumes) Create(req *HostVolumeCreateRequest, opts *WriteOptions) ([]*HostVolume, *WriteMeta, error) { +// Create forwards to client agents so a host volume can be created on those +// hosts, and registers the volume with Nomad servers. +func (hv *HostVolumes) Create(req *HostVolumeCreateRequest, opts *WriteOptions) (*HostVolume, *WriteMeta, error) { var out struct { - Volumes []*HostVolume + Volume *HostVolume } wm, err := hv.client.put("/v1/volume/host/create", req, &out, opts) if err != nil { return nil, wm, err } - return out.Volumes, wm, nil + return out.Volume, wm, nil } -// Register registers host volumes that were created out-of-band with the Nomad +// Register registers a host volume that was created out-of-band with the Nomad // servers. -func (hv *HostVolumes) Register(req *HostVolumeRegisterRequest, opts *WriteOptions) ([]*HostVolume, *WriteMeta, error) { +func (hv *HostVolumes) Register(req *HostVolumeRegisterRequest, opts *WriteOptions) (*HostVolume, *WriteMeta, error) { var out struct { - Volumes []*HostVolume + Volume *HostVolume } wm, err := hv.client.put("/v1/volume/host/register", req, &out, opts) if err != nil { return nil, wm, err } - return out.Volumes, wm, nil + return out.Volume, wm, nil } // Get queries for a single host volume, by ID diff --git a/command/agent/host_volume_endpoint_test.go b/command/agent/host_volume_endpoint_test.go index 8a939a86582..ddff7a33fbb 100644 --- a/command/agent/host_volume_endpoint_test.go +++ b/command/agent/host_volume_endpoint_test.go @@ -24,8 +24,8 @@ func TestHostVolumeEndpoint_CRUD(t *testing.T) { vol.NodePool = "" vol.Constraints = nil reqBody := struct { - Volumes []*structs.HostVolume - }{Volumes: []*structs.HostVolume{vol}} + Volume *structs.HostVolume + }{Volume: vol} buf := encodeReq(reqBody) req, err := http.NewRequest(http.MethodPut, "/v1/volume/host/create", buf) must.NoError(t, err) @@ -37,12 +37,12 @@ func TestHostVolumeEndpoint_CRUD(t *testing.T) { must.NoError(t, err) must.NotNil(t, obj) resp := obj.(*structs.HostVolumeCreateResponse) - must.Len(t, 1, resp.Volumes) - must.Eq(t, vol.Name, resp.Volumes[0].Name) - must.Eq(t, s.client.NodeID(), resp.Volumes[0].NodeID) + must.NotNil(t, resp.Volume) + must.Eq(t, vol.Name, resp.Volume.Name) + must.Eq(t, s.client.NodeID(), resp.Volume.NodeID) must.NotEq(t, "", respW.Result().Header.Get("X-Nomad-Index")) - volID := resp.Volumes[0].ID + volID := resp.Volume.ID // Verify volume was created @@ -61,8 +61,8 @@ func TestHostVolumeEndpoint_CRUD(t *testing.T) { vol = respVol.Copy() vol.Parameters = map[string]string{"bar": "foo"} // swaps key and value reqBody = struct { - Volumes []*structs.HostVolume - }{Volumes: []*structs.HostVolume{vol}} + Volume *structs.HostVolume + }{Volume: vol} buf = encodeReq(reqBody) req, err = http.NewRequest(http.MethodPut, "/v1/volume/host/register", buf) must.NoError(t, err) @@ -70,8 +70,8 @@ func TestHostVolumeEndpoint_CRUD(t *testing.T) { must.NoError(t, err) must.NotNil(t, obj) regResp := obj.(*structs.HostVolumeRegisterResponse) - must.Len(t, 1, regResp.Volumes) - must.Eq(t, map[string]string{"bar": "foo"}, regResp.Volumes[0].Parameters) + must.NotNil(t, regResp.Volume) + must.Eq(t, map[string]string{"bar": "foo"}, regResp.Volume.Parameters) // Verify volume was updated diff --git a/command/volume_create_host.go b/command/volume_create_host.go index 8947244aba0..62ccf1a4189 100644 --- a/command/volume_create_host.go +++ b/command/volume_create_host.go @@ -28,9 +28,9 @@ func (c *VolumeCreateCommand) hostVolumeCreate( } req := &api.HostVolumeCreateRequest{ - Volumes: []*api.HostVolume{vol}, + Volume: vol, } - vols, _, err := client.HostVolumes().Create(req, nil) + vol, _, err = client.HostVolumes().Create(req, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error creating volume: %s", err)) return 1 @@ -39,19 +39,15 @@ func (c *VolumeCreateCommand) hostVolumeCreate( var volID string var lastIndex uint64 - // note: the command only ever returns 1 volume from the API - for _, vol := range vols { - if detach || vol.State == api.HostVolumeStateReady { - c.Ui.Output(fmt.Sprintf( - "Created host volume %s with ID %s", vol.Name, vol.ID)) - return 0 - } else { - c.Ui.Output(fmt.Sprintf( - "==> Created host volume %s with ID %s", vol.Name, vol.ID)) - volID = vol.ID - lastIndex = vol.ModifyIndex - break - } + if detach || vol.State == api.HostVolumeStateReady { + c.Ui.Output(fmt.Sprintf( + "Created host volume %s with ID %s", vol.Name, vol.ID)) + return 0 + } else { + c.Ui.Output(fmt.Sprintf( + "==> Created host volume %s with ID %s", vol.Name, vol.ID)) + volID = vol.ID + lastIndex = vol.ModifyIndex } err = c.monitorHostVolume(client, volID, lastIndex, verbose) diff --git a/command/volume_register_host.go b/command/volume_register_host.go index 705f2faaf26..4e3ce6ccddb 100644 --- a/command/volume_register_host.go +++ b/command/volume_register_host.go @@ -18,18 +18,15 @@ func (c *VolumeRegisterCommand) hostVolumeRegister(client *api.Client, ast *ast. } req := &api.HostVolumeRegisterRequest{ - Volumes: []*api.HostVolume{vol}, + Volume: vol, } - vols, _, err := client.HostVolumes().Register(req, nil) + vol, _, err = client.HostVolumes().Register(req, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error registering volume: %s", err)) return 1 } - for _, vol := range vols { - // note: the command only ever returns 1 volume from the API - c.Ui.Output(fmt.Sprintf( - "Registered host volume %s with ID %s", vol.Name, vol.ID)) - } + c.Ui.Output(fmt.Sprintf( + "Registered host volume %s with ID %s", vol.Name, vol.ID)) return 0 } diff --git a/nomad/fsm.go b/nomad/fsm.go index 16a52e0810f..9ea3267457f 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -2428,7 +2428,7 @@ func (n *nomadFSM) applyHostVolumeRegister(msgType structs.MessageType, buf []by panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.UpsertHostVolumes(index, req.Volumes); err != nil { + if err := n.state.UpsertHostVolume(index, req.Volume); err != nil { n.logger.Error("UpsertHostVolumes failed", "error", err) return err } diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index 0f3fa457a65..cd7b629890f 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -216,64 +216,70 @@ func (v *HostVolume) Create(args *structs.HostVolumeCreateRequest, reply *struct return err } - if len(args.Volumes) == 0 { + if args.Volume == nil { return fmt.Errorf("missing volume definition") } - for _, vol := range args.Volumes { - if vol.Namespace == "" { - vol.Namespace = args.RequestNamespace() - } - if !allowVolume(aclObj, vol.Namespace) { - return structs.ErrPermissionDenied - } + vol := args.Volume + if vol.Namespace == "" { + vol.Namespace = args.RequestNamespace() + } + if !allowVolume(aclObj, vol.Namespace) { + return structs.ErrPermissionDenied } - // ensure we only try to create valid volumes or make valid updates to - // volumes - validVols, err := v.validateVolumeUpdates(args.Volumes) + // ensure we only try to create a valid volume or make valid updates to a + // volume + now := time.Now() + snap, err := v.srv.State().Snapshot() if err != nil { - return helper.FlattenMultierror(err) + return err } - // Attempt to create all the validated volumes and write only successfully - // created volumes to raft. And we'll report errors for any failed volumes + vol, err = v.validateVolumeUpdate(vol, snap, now) + if err != nil { + return err + } + + _, err = v.placeHostVolume(snap, vol) + if err != nil { + return fmt.Errorf("could not place volume %q: %w", vol.Name, err) + } + + warn, err := v.enforceEnterprisePolicy( + snap, vol, args.GetIdentity().GetACLToken(), args.PolicyOverride) + if warn != nil { + reply.Warnings = warn.Error() + } + if err != nil { + return err + } + + // Attempt to create the volume on the client. // // NOTE: creating the volume on the client via the plugin can't be made // atomic with the registration, and creating the volume provides values we // want to write on the Volume in raft anyways. - - // This can't reuse the validVols slice because we only want to write - // volumes we've successfully created or updated on the client to get - // updated in Raft. - raftArgs := &structs.HostVolumeRegisterRequest{ - Volumes: []*structs.HostVolume{}, - WriteRequest: args.WriteRequest, - } - - var mErr *multierror.Error - for _, vol := range validVols { - err = v.createVolume(vol) // mutates the vol - if err != nil { - mErr = multierror.Append(mErr, err) - } else { - raftArgs.Volumes = append(raftArgs.Volumes, vol) - } + err = v.createVolume(vol) + if err != nil { + return err } - // if we created or updated any volumes, apply them to raft. - var index uint64 - if len(raftArgs.Volumes) > 0 { - _, index, err = v.srv.raftApply(structs.HostVolumeRegisterRequestType, raftArgs) - if err != nil { - v.logger.Error("raft apply failed", "error", err, "method", "register") - mErr = multierror.Append(mErr, err) - } + // Write a newly created or modified volume to raft. We create a new request + // here because we've likely mutated the volume. + _, index, err := v.srv.raftApply(structs.HostVolumeRegisterRequestType, + &structs.HostVolumeRegisterRequest{ + Volume: vol, + WriteRequest: args.WriteRequest, + }) + if err != nil { + v.logger.Error("raft apply failed", "error", err, "method", "register") + return err } - reply.Volumes = raftArgs.Volumes + reply.Volume = vol reply.Index = index - return helper.FlattenMultierror(mErr) + return nil } func (v *HostVolume) Register(args *structs.HostVolumeRegisterRequest, reply *structs.HostVolumeRegisterResponse) error { @@ -294,105 +300,97 @@ func (v *HostVolume) Register(args *structs.HostVolumeRegisterRequest, reply *st return err } - if len(args.Volumes) == 0 { + if args.Volume == nil { return fmt.Errorf("missing volume definition") } - for _, vol := range args.Volumes { - if vol.Namespace == "" { - vol.Namespace = args.RequestNamespace() - } - if !allowVolume(aclObj, vol.Namespace) { - return structs.ErrPermissionDenied - } + vol := args.Volume + if vol.Namespace == "" { + vol.Namespace = args.RequestNamespace() + } + if !allowVolume(aclObj, vol.Namespace) { + return structs.ErrPermissionDenied } - // ensure we only try to create valid volumes or make valid updates to - // volumes - validVols, err := v.validateVolumeUpdates(args.Volumes) + snap, err := v.srv.State().Snapshot() if err != nil { - return helper.FlattenMultierror(err) + return err } - raftArgs := &structs.HostVolumeRegisterRequest{ - Volumes: validVols, - WriteRequest: args.WriteRequest, + now := time.Now() + vol, err = v.validateVolumeUpdate(vol, snap, now) + if err != nil { + return err } - var mErr *multierror.Error - var index uint64 - if len(raftArgs.Volumes) > 0 { - _, index, err = v.srv.raftApply(structs.HostVolumeRegisterRequestType, raftArgs) - if err != nil { - v.logger.Error("raft apply failed", "error", err, "method", "register") - mErr = multierror.Append(mErr, err) - } + warn, err := v.enforceEnterprisePolicy( + snap, vol, args.GetIdentity().GetACLToken(), args.PolicyOverride) + if warn != nil { + reply.Warnings = warn.Error() + } + if err != nil { + return err } - reply.Volumes = raftArgs.Volumes + // Write a newly created or modified volume to raft. We create a new request + // here because we've likely mutated the volume. + _, index, err := v.srv.raftApply(structs.HostVolumeRegisterRequestType, + &structs.HostVolumeRegisterRequest{ + Volume: vol, + WriteRequest: args.WriteRequest, + }) + if err != nil { + v.logger.Error("raft apply failed", "error", err, "method", "register") + return err + } + + reply.Volume = vol reply.Index = index - return helper.FlattenMultierror(mErr) + return nil } -func (v *HostVolume) validateVolumeUpdates(requested []*structs.HostVolume) ([]*structs.HostVolume, error) { +func (v *HostVolume) validateVolumeUpdate( + vol *structs.HostVolume, + snap *state.StateSnapshot, + now time.Time) (*structs.HostVolume, error) { - now := time.Now() - var vols []*structs.HostVolume - - snap, err := v.srv.State().Snapshot() + // validate the volume spec + err := vol.Validate() if err != nil { - return nil, err + return nil, fmt.Errorf("volume validation failed: %v", err) } - var mErr *multierror.Error - for _, vol := range requested { - - // validate the volume spec - err := vol.Validate() + // validate any update we're making + var existing *structs.HostVolume + volID := vol.ID + if vol.ID != "" { + existing, err = snap.HostVolumeByID(nil, vol.Namespace, vol.ID, true) if err != nil { - mErr = multierror.Append(mErr, fmt.Errorf("volume validation failed: %v", err)) - continue + return nil, err // should never hit, bail out } + if existing == nil { + return nil, fmt.Errorf("cannot update volume %q: volume does not exist", vol.ID) - // validate any update we're making - var existing *structs.HostVolume - volID := vol.ID - if vol.ID != "" { - existing, err = snap.HostVolumeByID(nil, vol.Namespace, vol.ID, true) - if err != nil { - return nil, err // should never hit, bail out - } - if existing == nil { - mErr = multierror.Append(mErr, - fmt.Errorf("cannot update volume %q: volume does not exist", vol.ID)) - continue - } - err = vol.ValidateUpdate(existing) - if err != nil { - mErr = multierror.Append(mErr, - fmt.Errorf("validating volume %q update failed: %v", vol.ID, err)) - continue - } - } else { - // capture this for nicer error messages later - volID = vol.Name } - - // set zero values as needed, possibly from existing - vol.CanonicalizeForUpdate(existing, now) - - // make sure any nodes or pools actually exist - err = v.validateVolumeForState(vol, snap) + err = vol.ValidateUpdate(existing) if err != nil { - mErr = multierror.Append(mErr, - fmt.Errorf("validating volume %q against state failed: %v", volID, err)) - continue + return nil, fmt.Errorf("validating volume %q update failed: %v", vol.ID, err) } + } else { + // capture this for nicer error messages later + volID = vol.Name + } - vols = append(vols, vol) + // set zero values as needed, possibly from existing + vol.CanonicalizeForUpdate(existing, now) + + // make sure any nodes or pools actually exist + err = v.validateVolumeForState(vol, snap) + if err != nil { + return nil, fmt.Errorf("validating volume %q against state failed: %v", volID, err) } - return vols, mErr.ErrorOrNil() + return vol, nil } // validateVolumeForState ensures that any references to node IDs or node pools are valid @@ -427,13 +425,6 @@ func (v *HostVolume) validateVolumeForState(vol *structs.HostVolume, snap *state func (v *HostVolume) createVolume(vol *structs.HostVolume) error { - node, err := v.placeHostVolume(vol) - if err != nil { - return fmt.Errorf("could not place volume %q: %w", vol.Name, err) - } - vol.NodeID = node.ID - vol.NodePool = node.NodePool - method := "ClientHostVolume.Create" cReq := &cstructs.ClientHostVolumeCreateRequest{ ID: vol.ID, @@ -445,7 +436,7 @@ func (v *HostVolume) createVolume(vol *structs.HostVolume) error { Parameters: vol.Parameters, } cResp := &cstructs.ClientHostVolumeCreateResponse{} - err = v.srv.RPC(method, cReq, cResp) + err := v.srv.RPC(method, cReq, cResp) if err != nil { return err } @@ -460,17 +451,29 @@ func (v *HostVolume) createVolume(vol *structs.HostVolume) error { return nil } -// placeHostVolume finds a node that matches the node pool and constraints, -// which doesn't already have a volume by that name. It returns a non-nil Node -// or an error indicating placement failed. -func (v *HostVolume) placeHostVolume(vol *structs.HostVolume) (*structs.Node, error) { +// placeHostVolume adds a node to volumes that don't already have one. The node +// will match the node pool and constraints, which doesn't already have a volume +// by that name. It returns the node (for testing) and an error indicating +// placement failed. +func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.HostVolume) (*structs.Node, error) { + if vol.NodeID != "" { + node, err := snap.NodeByID(nil, vol.NodeID) + if err != nil { + return nil, err + } + if node == nil { + return nil, fmt.Errorf("no such node %s", vol.NodeID) + } + vol.NodePool = node.NodePool + return node, nil + } var iter memdb.ResultIterator var err error if vol.NodePool != "" { - iter, err = v.srv.State().NodesByNodePool(nil, vol.NodePool) + iter, err = snap.NodesByNodePool(nil, vol.NodePool) } else { - iter, err = v.srv.State().Nodes(nil) + iter, err = snap.Nodes(nil) } if err != nil { return nil, err @@ -508,7 +511,10 @@ func (v *HostVolume) placeHostVolume(vol *structs.HostVolume) (*structs.Node, er } } + vol.NodeID = candidate.ID + vol.NodePool = candidate.NodePool return candidate, nil + } return nil, fmt.Errorf("no node meets constraints") diff --git a/nomad/host_volume_endpoint_ce.go b/nomad/host_volume_endpoint_ce.go new file mode 100644 index 00000000000..756df5f4298 --- /dev/null +++ b/nomad/host_volume_endpoint_ce.go @@ -0,0 +1,23 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build !ent +// +build !ent + +package nomad + +import ( + "github.com/hashicorp/nomad/nomad/state" + "github.com/hashicorp/nomad/nomad/structs" +) + +// enforceEnterprisePolicy is the CE stub for Enterprise governance via +// Sentinel policy, quotas, and node pools +func (v *HostVolume) enforceEnterprisePolicy( + _ *state.StateSnapshot, + _ *structs.HostVolume, + _ *structs.ACLToken, + _ bool, +) (error, error) { + return nil, nil +} diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go index 2a432d961cf..81cd7c55479 100644 --- a/nomad/host_volume_endpoint_test.go +++ b/nomad/host_volume_endpoint_test.go @@ -68,7 +68,6 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { codec := rpcClient(t, srv) req := &structs.HostVolumeCreateRequest{ - Volumes: []*structs.HostVolume{}, WriteRequest: structs.WriteRequest{ Region: srv.Region(), AuthToken: token}, @@ -81,39 +80,37 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) must.EqError(t, err, "missing volume definition") - req.Volumes = []*structs.HostVolume{ - {}, // missing basic fields - { - Name: "example", - PluginID: "example_plugin", - Constraints: []*structs.Constraint{{ - RTarget: "r1", - Operand: "=", - }}, - RequestedCapacityMinBytes: 200000, - RequestedCapacityMaxBytes: 100000, - RequestedCapabilities: []*structs.HostVolumeCapability{ - { - AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, - AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, - }, - { - AttachmentMode: "bad", - AccessMode: "invalid", - }, - }, - }, // fails other field validations - } + req.Volume = &structs.HostVolume{} err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) - // TODO(1.10.0): nested multierrors are really ugly, we could really use - // some helper functions to make these nicer everywhere they pop up - must.EqError(t, err, `2 errors occurred: - * volume validation failed: 2 errors occurred: + must.EqError(t, err, `volume validation failed: 2 errors occurred: * missing name * must include at least one capability block +`) + + req.Volume = &structs.HostVolume{ + Name: "example", + PluginID: "example_plugin", + Constraints: []*structs.Constraint{{ + RTarget: "r1", + Operand: "=", + }}, + RequestedCapacityMinBytes: 200000, + RequestedCapacityMaxBytes: 100000, + RequestedCapabilities: []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + { + AttachmentMode: "bad", + AccessMode: "invalid", + }, + }, + } - * volume validation failed: 3 errors occurred: + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, `volume validation failed: 3 errors occurred: * capacity_max (100000) must be larger than capacity_min (200000) * invalid attachment mode: "bad" * invalid constraint: 1 error occurred: @@ -121,20 +118,17 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { - - `) invalidNode := &structs.Node{ID: uuid.Generate(), NodePool: "does-not-exist"} volOnInvalidNode := mock.HostVolumeRequestForNode(ns, invalidNode) - req.Volumes = []*structs.HostVolume{volOnInvalidNode} + req.Volume = volOnInvalidNode err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) must.EqError(t, err, fmt.Sprintf( `validating volume "example" against state failed: node %q does not exist`, invalidNode.ID)) }) - var vol1ID, vol2ID string var expectIndex uint64 c1.setCreate(&cstructs.ClientHostVolumeCreateResponse{ @@ -148,46 +142,56 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { vol2 := mock.HostVolumeRequest("apps") vol2.Name = "example2" vol2.NodePool = "prod" - req.Volumes = []*structs.HostVolume{vol1, vol2} t.Run("invalid permissions", func(t *testing.T) { var resp structs.HostVolumeCreateResponse req.AuthToken = otherToken + + req.Volume = vol1 err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) must.EqError(t, err, "Permission denied") }) t.Run("invalid node constraints", func(t *testing.T) { - req.Volumes[0].Constraints[0].RTarget = "r2" - req.Volumes[1].Constraints[0].RTarget = "r2" + vol1.Constraints[0].RTarget = "r2" + vol2.Constraints[0].RTarget = "r2" defer func() { - req.Volumes[0].Constraints[0].RTarget = "r1" - req.Volumes[1].Constraints[0].RTarget = "r1" + vol1.Constraints[0].RTarget = "r1" + vol2.Constraints[0].RTarget = "r1" }() + req.Volume = vol1.Copy() var resp structs.HostVolumeCreateResponse req.AuthToken = token err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) - must.EqError(t, err, `2 errors occurred: - * could not place volume "example1": no node meets constraints - * could not place volume "example2": no node meets constraints + must.EqError(t, err, `could not place volume "example1": no node meets constraints`) -`) + req.Volume = vol2.Copy() + resp = structs.HostVolumeCreateResponse{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.EqError(t, err, `could not place volume "example2": no node meets constraints`) }) t.Run("valid create", func(t *testing.T) { var resp structs.HostVolumeCreateResponse req.AuthToken = token + req.Volume = vol1.Copy() err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) must.NoError(t, err) - must.Len(t, 2, resp.Volumes) - vol1ID = resp.Volumes[0].ID - vol2ID = resp.Volumes[1].ID + must.NotNil(t, resp.Volume) + vol1 = resp.Volume + expectIndex = resp.Index + req.Volume = vol2.Copy() + resp = structs.HostVolumeCreateResponse{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp) + must.NoError(t, err) + must.NotNil(t, resp.Volume) + vol2 = resp.Volume getReq := &structs.HostVolumeGetRequest{ - ID: vol1ID, + ID: vol1.ID, QueryOptions: structs.QueryOptions{ Region: srv.Region(), Namespace: ns, @@ -206,14 +210,11 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { t.Run("invalid updates", func(t *testing.T) { - vol1, err := store.HostVolumeByID(nil, ns, vol1ID, false) - must.NoError(t, err) - must.NotNil(t, vol1) invalidVol1 := vol1.Copy() invalidVol2 := &structs.HostVolume{} createReq := &structs.HostVolumeCreateRequest{ - Volumes: []*structs.HostVolume{invalidVol1, invalidVol2}, + Volume: invalidVol2, WriteRequest: structs.WriteRequest{ Region: srv.Region(), Namespace: ns, @@ -221,18 +222,18 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { } c1.setCreate(nil, errors.New("should not call this endpoint on invalid RPCs")) var createResp structs.HostVolumeCreateResponse - err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", createReq, &createResp) + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", createReq, &createResp) must.EqError(t, err, `volume validation failed: 2 errors occurred: * missing name * must include at least one capability block -`, must.Sprint("initial validation failures should exit early even if there's another valid vol")) +`, must.Sprint("initial validation failures should exit early")) invalidVol1.NodeID = uuid.Generate() invalidVol1.RequestedCapacityMinBytes = 100 invalidVol1.RequestedCapacityMaxBytes = 200 registerReq := &structs.HostVolumeRegisterRequest{ - Volumes: []*structs.HostVolume{invalidVol1}, + Volume: invalidVol1, WriteRequest: structs.WriteRequest{ Region: srv.Region(), Namespace: ns, @@ -249,13 +250,10 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { }) t.Run("blocking Get unblocks on write", func(t *testing.T) { - vol1, err := store.HostVolumeByID(nil, ns, vol1ID, false) - must.NoError(t, err) - must.NotNil(t, vol1) nextVol1 := vol1.Copy() nextVol1.RequestedCapacityMaxBytes = 300000 registerReq := &structs.HostVolumeRegisterRequest{ - Volumes: []*structs.HostVolume{nextVol1}, + Volume: nextVol1, WriteRequest: structs.WriteRequest{ Region: srv.Region(), Namespace: ns, @@ -270,7 +268,7 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { errCh := make(chan error) getReq := &structs.HostVolumeGetRequest{ - ID: vol1ID, + ID: vol1.ID, QueryOptions: structs.QueryOptions{ Region: srv.Region(), Namespace: ns, @@ -294,7 +292,7 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { time.AfterFunc(200*time.Millisecond, func() { codec := rpcClient(t, srv) var registerResp structs.HostVolumeRegisterResponse - err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) must.NoError(t, err) }) @@ -309,9 +307,6 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { }) t.Run("delete blocked by allocation claims", func(t *testing.T) { - vol2, err := store.HostVolumeByID(nil, ns, vol2ID, false) - must.NoError(t, err) - must.NotNil(t, vol2) // claim one of the volumes with a pending allocation alloc := mock.MinAlloc() @@ -326,7 +321,7 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { index, []*structs.Allocation{alloc})) delReq := &structs.HostVolumeDeleteRequest{ - VolumeIDs: []string{vol1ID, vol2ID}, + VolumeIDs: []string{vol1.ID, vol2.ID}, WriteRequest: structs.WriteRequest{ Region: srv.Region(), Namespace: ns, @@ -334,16 +329,16 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { } var delResp structs.HostVolumeDeleteResponse - err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) must.EqError(t, err, "Permission denied") delReq.AuthToken = powerToken err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) - must.EqError(t, err, fmt.Sprintf("volume %s in use by allocations: [%s]", vol2ID, alloc.ID)) + must.EqError(t, err, fmt.Sprintf("volume %s in use by allocations: [%s]", vol2.ID, alloc.ID)) // volume not in use will be deleted even if we got an error getReq := &structs.HostVolumeGetRequest{ - ID: vol1ID, + ID: vol1.ID, QueryOptions: structs.QueryOptions{ Region: srv.Region(), Namespace: ns, @@ -366,11 +361,11 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { } err = msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", nArgs, &structs.GenericResponse{}) - delReq.VolumeIDs = []string{vol2ID} + delReq.VolumeIDs = []string{vol2.ID} err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) must.NoError(t, err) - getReq.ID = vol2ID + getReq.ID = vol2.ID err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) must.NoError(t, err) must.Nil(t, getResp.Volume) @@ -378,6 +373,7 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { } func TestHostVolumeEndpoint_List(t *testing.T) { + ci.Parallel(t) srv, rootToken, cleanupSrv := TestACLServer(t, func(c *Config) { c.NumSchedulers = 0 @@ -422,47 +418,51 @@ func TestHostVolumeEndpoint_List(t *testing.T) { vol1 := mock.HostVolumeRequestForNode(ns1, nodes[0]) vol1.Name = "foobar-example" - vol1.Parameters = map[string]string{"mockID": "vol1"} vol2 := mock.HostVolumeRequestForNode(ns1, nodes[1]) vol2.Name = "foobaz-example" - vol2.Parameters = map[string]string{"mockID": "vol2"} vol3 := mock.HostVolumeRequestForNode(ns2, nodes[2]) vol3.Name = "foobar-example" - vol3.Parameters = map[string]string{"mockID": "vol3"} vol4 := mock.HostVolumeRequestForNode(ns2, nodes[1]) vol4.Name = "foobaz-example" - vol4.Parameters = map[string]string{"mockID": "vol4"} // we need to register these rather than upsert them so we have the correct // indexes for unblocking later. registerReq := &structs.HostVolumeRegisterRequest{ - Volumes: []*structs.HostVolume{vol1, vol2, vol3, vol4}, WriteRequest: structs.WriteRequest{ Region: srv.Region(), AuthToken: rootToken.SecretID}, } var registerResp structs.HostVolumeRegisterResponse + + // write the volumes in reverse order so our later test can get a blocking + // query index from a Get it has access to + + registerReq.Volume = vol4 err := msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) must.NoError(t, err) + vol4 = registerResp.Volume - // IDs are generated by the server, so we need to read them back to figure - // out which mock got which ID - for _, vol := range registerResp.Volumes { - switch vol.Parameters["mockID"] { - case "vol1": - vol1 = vol - case "vol2": - vol2 = vol - case "vol3": - vol3 = vol - case "vol4": - vol4 = vol - } - } + registerReq.Volume = vol3 + registerResp = structs.HostVolumeRegisterResponse{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + vol3 = registerResp.Volume + + registerReq.Volume = vol2 + registerResp = structs.HostVolumeRegisterResponse{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + vol2 = registerResp.Volume + + registerReq.Volume = vol1 + registerResp = structs.HostVolumeRegisterResponse{} + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Register", registerReq, ®isterResp) + must.NoError(t, err) + vol1 = registerResp.Volume testCases := []struct { name string @@ -568,21 +568,24 @@ func TestHostVolumeEndpoint_List(t *testing.T) { t.Run("blocking query unblocks", func(t *testing.T) { - // Get response will include the volume's Index to block on + // the Get response from the most-recently written volume will have the + // index we want to block on getReq := &structs.HostVolumeGetRequest{ ID: vol1.ID, QueryOptions: structs.QueryOptions{ Region: srv.Region(), - Namespace: vol1.Namespace, + Namespace: ns1, AuthToken: token, }, } var getResp structs.HostVolumeGetResponse err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) + must.NoError(t, err) + must.NotNil(t, getResp.Volume) nextVol := getResp.Volume.Copy() nextVol.RequestedCapacityMaxBytes = 300000 - registerReq.Volumes = []*structs.HostVolume{nextVol} + registerReq.Volume = nextVol registerReq.Namespace = nextVol.Namespace ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) @@ -716,7 +719,8 @@ func TestHostVolumeEndpoint_placeVolume(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - node, err := endpoint.placeHostVolume(tc.vol) + snap, _ := store.Snapshot() + node, err := endpoint.placeHostVolume(snap, tc.vol) if tc.expectErr == "" { must.NoError(t, err) must.Eq(t, tc.expect, node) @@ -788,6 +792,9 @@ func (v *mockHostVolumeClient) Create( resp *cstructs.ClientHostVolumeCreateResponse) error { v.lock.Lock() defer v.lock.Unlock() + if v.nextCreateResponse == nil { + return nil // prevents panics from incorrect tests + } *resp = *v.nextCreateResponse return v.nextCreateErr } diff --git a/nomad/search_endpoint_test.go b/nomad/search_endpoint_test.go index ae9e10e33ff..5f9695f3d20 100644 --- a/nomad/search_endpoint_test.go +++ b/nomad/search_endpoint_test.go @@ -1058,14 +1058,14 @@ func TestSearch_PrefixSearch_HostVolume(t *testing.T) { id := uuid.Generate() index++ - err := store.UpsertHostVolumes(index, []*structs.HostVolume{{ + err := store.UpsertHostVolume(index, &structs.HostVolume{ ID: id, Name: "example", Namespace: structs.DefaultNamespace, PluginID: "glade", NodeID: node.ID, NodePool: node.NodePool, - }}) + }) must.NoError(t, err) req := &structs.SearchRequest{ @@ -1998,14 +1998,14 @@ func TestSearch_FuzzySearch_HostVolume(t *testing.T) { id := uuid.Generate() index++ - err := store.UpsertHostVolumes(index, []*structs.HostVolume{{ + err := store.UpsertHostVolume(index, &structs.HostVolume{ ID: id, Name: "example", Namespace: structs.DefaultNamespace, PluginID: "glade", NodeID: node.ID, NodePool: node.NodePool, - }}) + }) must.NoError(t, err) req := &structs.FuzzySearchRequest{ diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go index 522d1d19468..37d1cccd1a7 100644 --- a/nomad/state/state_store_host_volumes.go +++ b/nomad/state/state_store_host_volumes.go @@ -51,55 +51,53 @@ func (s *StateStore) HostVolumeByID(ws memdb.WatchSet, ns, id string, withAllocs return vol, nil } -// UpsertHostVolumes upserts a set of host volumes -func (s *StateStore) UpsertHostVolumes(index uint64, volumes []*structs.HostVolume) error { +// UpsertHostVolume upserts a host volume +func (s *StateStore) UpsertHostVolume(index uint64, vol *structs.HostVolume) error { txn := s.db.WriteTxn(index) defer txn.Abort() - for _, v := range volumes { - if exists, err := s.namespaceExists(txn, v.Namespace); err != nil { - return err - } else if !exists { - return fmt.Errorf("host volume %s is in nonexistent namespace %s", v.ID, v.Namespace) - } + if exists, err := s.namespaceExists(txn, vol.Namespace); err != nil { + return err + } else if !exists { + return fmt.Errorf("host volume %s is in nonexistent namespace %s", vol.ID, vol.Namespace) + } - obj, err := txn.First(TableHostVolumes, indexID, v.Namespace, v.ID) - if err != nil { - return err - } - if obj != nil { - old := obj.(*structs.HostVolume) - v.CreateIndex = old.CreateIndex - v.CreateTime = old.CreateTime - } else { - v.CreateIndex = index - } + obj, err := txn.First(TableHostVolumes, indexID, vol.Namespace, vol.ID) + if err != nil { + return err + } + if obj != nil { + old := obj.(*structs.HostVolume) + vol.CreateIndex = old.CreateIndex + vol.CreateTime = old.CreateTime + } else { + vol.CreateIndex = index + } - // If the fingerprint is written from the node before the create RPC - // handler completes, we'll never update from the initial pending, so - // reconcile that here - node, err := s.NodeByID(nil, v.NodeID) - if err != nil { - return err - } - if node == nil { - return fmt.Errorf("host volume %s has nonexistent node ID %s", v.ID, v.NodeID) - } - if _, ok := node.HostVolumes[v.Name]; ok { - v.State = structs.HostVolumeStateReady - } - // Register RPCs for new volumes may not have the node pool set - v.NodePool = node.NodePool + // If the fingerprint is written from the node before the create RPC handler + // completes, we'll never update from the initial pending, so reconcile that + // here + node, err := s.NodeByID(nil, vol.NodeID) + if err != nil { + return err + } + if node == nil { + return fmt.Errorf("host volume %s has nonexistent node ID %s", vol.ID, vol.NodeID) + } + if _, ok := node.HostVolumes[vol.Name]; ok { + vol.State = structs.HostVolumeStateReady + } + // Register RPCs for new volumes may not have the node pool set + vol.NodePool = node.NodePool - // Allocations are denormalized on read, so we don't want these to be - // written to the state store. - v.Allocations = nil - v.ModifyIndex = index + // Allocations are denormalized on read, so we don't want these to be + // written to the state store. + vol.Allocations = nil + vol.ModifyIndex = index - err = txn.Insert(TableHostVolumes, v) - if err != nil { - return fmt.Errorf("host volume insert: %w", err) - } + err = txn.Insert(TableHostVolumes, vol) + if err != nil { + return fmt.Errorf("host volume insert: %w", err) } if err := txn.Insert(tableIndex, &IndexEntry{TableHostVolumes, index}); err != nil { diff --git a/nomad/state/state_store_host_volumes_test.go b/nomad/state/state_store_host_volumes_test.go index 358591160c6..87269597844 100644 --- a/nomad/state/state_store_host_volumes_test.go +++ b/nomad/state/state_store_host_volumes_test.go @@ -54,7 +54,10 @@ func TestStateStore_HostVolumes_CRUD(t *testing.T) { vols[3].NodePool = nodes[2].NodePool index++ - must.NoError(t, store.UpsertHostVolumes(index, vols)) + must.NoError(t, store.UpsertHostVolume(index, vols[0])) + must.NoError(t, store.UpsertHostVolume(index, vols[1])) + must.NoError(t, store.UpsertHostVolume(index, vols[2])) + must.NoError(t, store.UpsertHostVolume(index, vols[3])) vol, err := store.HostVolumeByID(nil, vols[0].Namespace, vols[0].ID, true) must.NoError(t, err) @@ -108,13 +111,13 @@ func TestStateStore_HostVolumes_CRUD(t *testing.T) { must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, index, nodes[2])) // update all the volumes, which should update the state of vol2 as well + index++ for i, vol := range vols { vol = vol.Copy() vol.RequestedCapacityMaxBytes = 300000 vols[i] = vol + must.NoError(t, store.UpsertHostVolume(index, vol)) } - index++ - must.NoError(t, store.UpsertHostVolumes(index, vols)) iter, err = store.HostVolumesByName(nil, structs.DefaultNamespace, "example", SortDefault) must.NoError(t, err) @@ -221,7 +224,10 @@ func TestStateStore_UpdateHostVolumesFromFingerprint(t *testing.T) { index++ oldIndex := index - must.NoError(t, store.UpsertHostVolumes(index, vols)) + must.NoError(t, store.UpsertHostVolume(index, vols[0])) + must.NoError(t, store.UpsertHostVolume(index, vols[1])) + must.NoError(t, store.UpsertHostVolume(index, vols[2])) + must.NoError(t, store.UpsertHostVolume(index, vols[3])) vol0, err := store.HostVolumeByID(nil, ns, vols[0].ID, false) must.NoError(t, err) diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go index 11745526aa5..2c8e6cf2372 100644 --- a/nomad/structs/host_volumes.go +++ b/nomad/structs/host_volumes.go @@ -329,22 +329,38 @@ type HostVolumeStub struct { } type HostVolumeCreateRequest struct { - Volumes []*HostVolume + Volume *HostVolume + + // PolicyOverride is set when the user is attempting to override any + // Enterprise policy enforcement + PolicyOverride bool + WriteRequest } type HostVolumeCreateResponse struct { - Volumes []*HostVolume + Volume *HostVolume + + // Warnings are non-fatal messages from Enterprise policy enforcement + Warnings string WriteMeta } type HostVolumeRegisterRequest struct { - Volumes []*HostVolume + Volume *HostVolume + + // PolicyOverride is set when the user is attempting to override any + // Enterprise policy enforcement + PolicyOverride bool + WriteRequest } type HostVolumeRegisterResponse struct { - Volumes []*HostVolume + Volume *HostVolume + + // Warnings are non-fatal messages from Enterprise policy enforcement + Warnings string WriteMeta } From e3864a5f4a861e7e667c1e47929858f709d57acb Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 2 Dec 2024 09:11:18 -0500 Subject: [PATCH 16/35] dynamic host volumes: autocomplete for CLI (#24533) Adds dynamic host volumes to argument autocomplete for the `volume status` and `volume delete` commands. Adds flag autocompletion for those commands plus `volume create`. Ref: https://github.com/hashicorp/nomad/pull/24479 --- command/node_status.go | 7 ++++++- command/plugin_status.go | 13 +------------ command/volume_create.go | 6 +++++- command/volume_delete.go | 11 +++++++---- command/volume_delete_host_test.go | 14 +++++++++++++- command/volume_deregister.go | 1 - command/volume_status.go | 25 +++++++++++++++---------- command/volume_status_host_test.go | 14 +++++++++++++- 8 files changed, 60 insertions(+), 31 deletions(-) diff --git a/command/node_status.go b/command/node_status.go index f7f7b587802..9538e90622b 100644 --- a/command/node_status.go +++ b/command/node_status.go @@ -13,6 +13,7 @@ import ( "time" humanize "github.com/dustin/go-humanize" + "github.com/hashicorp/go-set/v3" "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/api/contexts" "github.com/hashicorp/nomad/helper/pointer" @@ -129,8 +130,12 @@ func (c *NodeStatusCommand) AutocompleteFlags() complete.Flags { } func (c *NodeStatusCommand) AutocompleteArgs() complete.Predictor { + return nodePredictor(c.Client, nil) +} + +func nodePredictor(factory ApiClientFactory, filter *set.Set[string]) complete.Predictor { return complete.PredictFunc(func(a complete.Args) []string { - client, err := c.Meta.Client() + client, err := factory() if err != nil { return nil } diff --git a/command/plugin_status.go b/command/plugin_status.go index 02c61c65a9a..92dbdc7f26b 100644 --- a/command/plugin_status.go +++ b/command/plugin_status.go @@ -58,21 +58,10 @@ func (c *PluginStatusCommand) Synopsis() string { return "Display status information about a plugin" } -// predictVolumeType is also used in volume_status -var predictVolumeType = complete.PredictFunc(func(a complete.Args) []string { - types := []string{"csi"} - for _, t := range types { - if strings.Contains(t, a.Last) { - return []string{t} - } - } - return nil -}) - func (c *PluginStatusCommand) AutocompleteFlags() complete.Flags { return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), complete.Flags{ - "-type": predictVolumeType, + "-type": complete.PredictSet("csi"), "-short": complete.PredictNothing, "-verbose": complete.PredictNothing, "-json": complete.PredictNothing, diff --git a/command/volume_create.go b/command/volume_create.go index f566cfa8f2d..1e3dcbad02a 100644 --- a/command/volume_create.go +++ b/command/volume_create.go @@ -52,7 +52,11 @@ Create Options: } func (c *VolumeCreateCommand) AutocompleteFlags() complete.Flags { - return c.Meta.AutocompleteFlags(FlagSetClient) + return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), + complete.Flags{ + "-detach": complete.PredictNothing, + "-verbose": complete.PredictNothing, + }) } func (c *VolumeCreateCommand) AutocompleteArgs() complete.Predictor { diff --git a/command/volume_delete.go b/command/volume_delete.go index ab8be61104b..678dd181c95 100644 --- a/command/volume_delete.go +++ b/command/volume_delete.go @@ -40,7 +40,7 @@ Delete Options: -secret Secrets to pass to the plugin to delete the snapshot. Accepts multiple - flags in the form -secret key=value + flags in the form -secret key=value. Only available for CSI volumes. -type Type of volume to delete. Must be one of "csi" or "host". Defaults to "csi". @@ -50,7 +50,10 @@ Delete Options: func (c *VolumeDeleteCommand) AutocompleteFlags() complete.Flags { return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), - complete.Flags{}) + complete.Flags{ + "-type": complete.PredictSet("csi", "host"), + "-secret": complete.PredictNothing, + }) } func (c *VolumeDeleteCommand) AutocompleteArgs() complete.Predictor { @@ -66,11 +69,11 @@ func (c *VolumeDeleteCommand) AutocompleteArgs() complete.Predictor { } matches := resp.Matches[contexts.Volumes] - resp, _, err = client.Search().PrefixSearch(a.Last, contexts.Nodes, nil) + resp, _, err = client.Search().PrefixSearch(a.Last, contexts.HostVolumes, nil) if err != nil { return []string{} } - matches = append(matches, resp.Matches[contexts.Nodes]...) + matches = append(matches, resp.Matches[contexts.HostVolumes]...) return matches }) } diff --git a/command/volume_delete_host_test.go b/command/volume_delete_host_test.go index fde8994df33..353063c34cc 100644 --- a/command/volume_delete_host_test.go +++ b/command/volume_delete_host_test.go @@ -12,6 +12,7 @@ import ( "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/ci" "github.com/mitchellh/cli" + "github.com/posener/complete" "github.com/shoenig/test/must" ) @@ -62,8 +63,18 @@ capability { ui.OutputWriter.Reset() + // autocomplete + cmd := &VolumeDeleteCommand{Meta: Meta{Ui: ui, namespace: "*", flagAddress: url}} + prefix := id[:len(id)-5] + cargs := complete.Args{Last: prefix} + predictor := cmd.AutocompleteArgs() + + res := predictor.Predict(cargs) + must.SliceLen(t, 1, res) + must.Eq(t, id, res[0]) + // missing the namespace - cmd := &VolumeDeleteCommand{Meta: Meta{Ui: ui}} + cmd = &VolumeDeleteCommand{Meta: Meta{Ui: ui}} args = []string{"-address", url, "-type", "host", id} code = cmd.Run(args) must.Eq(t, 1, code) @@ -76,4 +87,5 @@ capability { must.Eq(t, 0, code, must.Sprintf("got error: %s", ui.ErrorWriter.String())) out = ui.OutputWriter.String() must.StrContains(t, out, fmt.Sprintf("Successfully deleted volume %q!", id)) + } diff --git a/command/volume_deregister.go b/command/volume_deregister.go index c4c78cfcc2d..60851e66954 100644 --- a/command/volume_deregister.go +++ b/command/volume_deregister.go @@ -53,7 +53,6 @@ func (c *VolumeDeregisterCommand) AutocompleteArgs() complete.Predictor { return nil } - // When multiple volume types are implemented, this search should merge contexts resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Volumes, nil) if err != nil { return []string{} diff --git a/command/volume_status.go b/command/volume_status.go index b6163cca6f4..d599e349e83 100644 --- a/command/volume_status.go +++ b/command/volume_status.go @@ -69,15 +69,13 @@ func (c *VolumeStatusCommand) Synopsis() string { func (c *VolumeStatusCommand) AutocompleteFlags() complete.Flags { return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), complete.Flags{ - "-type": predictVolumeType, - "-short": complete.PredictNothing, - "-verbose": complete.PredictNothing, - "-json": complete.PredictNothing, - "-t": complete.PredictAnything, - - // TODO(1.10.0): wire-up predictions for nodes and node pools - "-node": complete.PredictNothing, - "-node-pool": complete.PredictNothing, + "-type": complete.PredictSet("csi", "host"), + "-short": complete.PredictNothing, + "-verbose": complete.PredictNothing, + "-json": complete.PredictNothing, + "-t": complete.PredictAnything, + "-node": nodePredictor(c.Client, nil), + "-node-pool": nodePoolPredictor(c.Client, nil), }) } @@ -92,7 +90,14 @@ func (c *VolumeStatusCommand) AutocompleteArgs() complete.Predictor { if err != nil { return []string{} } - return resp.Matches[contexts.Volumes] + matches := resp.Matches[contexts.Volumes] + + resp, _, err = client.Search().PrefixSearch(a.Last, contexts.HostVolumes, nil) + if err != nil { + return []string{} + } + matches = append(matches, resp.Matches[contexts.HostVolumes]...) + return matches }) } diff --git a/command/volume_status_host_test.go b/command/volume_status_host_test.go index 0ec8d930550..c51e931096b 100644 --- a/command/volume_status_host_test.go +++ b/command/volume_status_host_test.go @@ -12,6 +12,7 @@ import ( "github.com/hashicorp/nomad/api" "github.com/hashicorp/nomad/ci" "github.com/mitchellh/cli" + "github.com/posener/complete" "github.com/shoenig/test/must" ) @@ -141,8 +142,19 @@ capability { ui.OutputWriter.Reset() + // autocomplete + cmd := &VolumeStatusCommand{Meta: Meta{Ui: ui, namespace: "*", flagAddress: url}} + cmd.Meta.namespace = "*" + prefix := id[:len(id)-5] + cargs := complete.Args{Last: prefix} + predictor := cmd.AutocompleteArgs() + + res := predictor.Predict(cargs) + must.SliceLen(t, 1, res) + must.Eq(t, id, res[0]) + // missing the namespace - cmd := &VolumeStatusCommand{Meta: Meta{Ui: ui}} + cmd = &VolumeStatusCommand{Meta: Meta{Ui: ui}} args = []string{"-address", url, "-type", "host", id} code = cmd.Run(args) must.Eq(t, 1, code) From f0b89fc4840a309023b22ffabc9cfc4c55f0c00b Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:10:59 +0100 Subject: [PATCH 17/35] dynamic host volumes: make example-host-volume plugin run on macOS (#24563) This adapts the shell script for darwin, making it easier to test. --- demo/hostvolume/example-host-volume | 46 +++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/demo/hostvolume/example-host-volume b/demo/hostvolume/example-host-volume index 0d726cd3c01..d93978ea6cd 100755 --- a/demo/hostvolume/example-host-volume +++ b/demo/hostvolume/example-host-volume @@ -48,6 +48,37 @@ done [ $# -lt 2 ] && { echo 'path required; seek --help' 1>&2; exit 1; } host_path="$2" +# OS detect +if [[ "$OSTYPE" == "linux-"* ]]; then + ext=ext4 + mount=/usr/bin/mount + mkfsExec() { + dd if=/dev/zero of="$1".$ext bs=1M count="$2" + mkfs.ext4 "$1".$ext 1>&2 + } + mountExec() { + $mount "$1".$ext "$1" + } + st() { + stat --format='%s' "$1" + } +elif [[ "$OSTYPE" == "darwin"* ]]; then + ext=dmg + mount=/sbin/mount + mkfsExec() { + hdiutil create -megabytes "$2" -layout NONE -fs apfs -volname "$1" "$1" 1>&2 + } + mountExec() { + hdiutil attach "$1".$ext 1>&2 + } + st() { + stat -f %z "$1" + } +else + echo "$OSTYPE is an unsupported OS" + return 1 +fi + validate_path() { local path="$1" if [[ ! "$path" =~ [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} ]]; then @@ -57,7 +88,7 @@ validate_path() { } is_mounted() { - awk '{print $2}' /proc/mounts | grep -q "^$1$" + $mount | grep -q "^$1$" } create_volume() { @@ -69,15 +100,12 @@ create_volume() { local megs=$((bytes / 1024 / 1024)) # lazy, approximate # the extra conditionals are for idempotency - if [ ! -f "$path.ext4" ]; then - dd if=/dev/zero of="$path.ext4" bs=1M count="$megs" - # mkfs is noisy on stdout, so we send it to stderr - # to avoid breaking the JSON parsing on the client - mkfs.ext4 "$path.ext4" 1>&2 + if [ ! -f "$path.$ext" ]; then + mkfsExec "$path" $megs fi if ! is_mounted "$path"; then mkdir -p "$path" - mount "$path.ext4" "$path" + mountExec "$path" fi } @@ -86,14 +114,14 @@ delete_volume() { validate_path "$path" is_mounted "$path" && umount "$path" rm -rf "$path" - rm -f "$path.ext4" + rm -f "$path"."$ext" } case "$1" in "create") create_volume "$host_path" "$CAPACITY_MIN_BYTES" # output what Nomad expects - bytes="$(stat --format='%s' "$host_path.ext4")" + bytes="$(st "$host_path".$ext)" printf '{"path": "%s", "bytes": %s}' "$host_path" "$bytes" ;; "delete") From df258ac02a8971489bc9ab2ab8e52209048cd902 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 2 Dec 2024 13:27:01 -0500 Subject: [PATCH 18/35] dynamic host volumes: set namespace from volume spec when monitoring (#24586) In #24528 we added monitoring to the CLI for dynamic host volume creation. But when the volume's namespace is set by the volume specification instead of the `-namespace` flag, the API client doesn't have the right namespace and gets a 404 when setting up the monitoring. The specification always overrides the `-namespace` flag, so use that when available for all subsequent API calls. Ref: https://github.com/hashicorp/nomad/pull/24479 --- command/volume_create_host.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/command/volume_create_host.go b/command/volume_create_host.go index 62ccf1a4189..54cd31efb97 100644 --- a/command/volume_create_host.go +++ b/command/volume_create_host.go @@ -50,6 +50,10 @@ func (c *VolumeCreateCommand) hostVolumeCreate( lastIndex = vol.ModifyIndex } + if vol.Namespace != "" { + client.SetNamespace(vol.Namespace) + } + err = c.monitorHostVolume(client, volID, lastIndex, verbose) if err != nil { c.Ui.Error(fmt.Sprintf("==> %s: %v", formatTime(time.Now()), err.Error())) From 46a39560bb996a8b6df2619b0c90717771225e05 Mon Sep 17 00:00:00 2001 From: Daniel Bennett Date: Mon, 2 Dec 2024 16:27:10 -0500 Subject: [PATCH 19/35] dynamic host volumes: fingerprint client plugins (#24589) --- client/client.go | 8 +- client/config/config.go | 3 + client/fingerprint/dynamic_host_volumes.go | 120 ++++++++++++++++++ .../fingerprint/dynamic_host_volumes_test.go | 89 +++++++++++++ client/fingerprint/fingerprint.go | 27 ++-- client/host_volume_endpoint_test.go | 13 +- .../hostvolumemanager/host_volume_plugin.go | 28 +++- client/hostvolumemanager/host_volumes.go | 75 ++++++----- command/agent/agent.go | 4 + command/agent/command.go | 17 ++- command/agent/config.go | 7 + demo/hostvolume/example-host-volume | 8 +- helper/funcs.go | 31 +++++ 13 files changed, 359 insertions(+), 71 deletions(-) create mode 100644 client/fingerprint/dynamic_host_volumes.go create mode 100644 client/fingerprint/dynamic_host_volumes_test.go diff --git a/client/client.go b/client/client.go index 1e9a87dd7ea..4fc808eca2b 100644 --- a/client/client.go +++ b/client/client.go @@ -34,7 +34,7 @@ import ( "github.com/hashicorp/nomad/client/dynamicplugins" "github.com/hashicorp/nomad/client/fingerprint" "github.com/hashicorp/nomad/client/hoststats" - "github.com/hashicorp/nomad/client/hostvolumemanager" + hvm "github.com/hashicorp/nomad/client/hostvolumemanager" cinterfaces "github.com/hashicorp/nomad/client/interfaces" "github.com/hashicorp/nomad/client/lib/cgroupslib" "github.com/hashicorp/nomad/client/lib/numalib" @@ -290,7 +290,7 @@ type Client struct { // drivermanager is responsible for managing driver plugins drivermanager drivermanager.Manager - hostVolumeManager *hostvolumemanager.HostVolumeManager + hostVolumeManager *hvm.HostVolumeManager // baseLabels are used when emitting tagged metrics. All client metrics will // have these tags, and optionally more. @@ -535,7 +535,9 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie c.devicemanager = devManager c.pluginManagers.RegisterAndRun(devManager) - c.hostVolumeManager = hostvolumemanager.NewHostVolumeManager(cfg.AllocMountsDir, logger) + c.hostVolumeManager = hvm.NewHostVolumeManager(logger, + cfg.HostVolumePluginDir, + cfg.AllocMountsDir) // Set up the service registration wrapper using the Consul and Nomad // implementations. The Nomad implementation is only ever used on the diff --git a/client/config/config.go b/client/config/config.go index c6b18bcd08f..1fe177432e8 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -314,6 +314,9 @@ type Config struct { // HostVolumes is a map of the configured host volumes by name. HostVolumes map[string]*structs.ClientHostVolumeConfig + // HostVolumePluginDir is the directory with dynamic host volume plugins. + HostVolumePluginDir string + // HostNetworks is a map of the conigured host networks by name. HostNetworks map[string]*structs.ClientHostNetworkConfig diff --git a/client/fingerprint/dynamic_host_volumes.go b/client/fingerprint/dynamic_host_volumes.go new file mode 100644 index 00000000000..e15a3a8f0c6 --- /dev/null +++ b/client/fingerprint/dynamic_host_volumes.go @@ -0,0 +1,120 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package fingerprint + +import ( + "context" + "os" + "strings" + "sync" + "time" + + "github.com/hashicorp/go-hclog" + hvm "github.com/hashicorp/nomad/client/hostvolumemanager" + "github.com/hashicorp/nomad/helper" +) + +func NewPluginsHostVolumeFingerprint(logger hclog.Logger) Fingerprint { + return &DynamicHostVolumePluginFingerprint{ + logger: logger.Named("host_volume_plugins"), + } +} + +var _ ReloadableFingerprint = &DynamicHostVolumePluginFingerprint{} + +type DynamicHostVolumePluginFingerprint struct { + logger hclog.Logger +} + +func (h *DynamicHostVolumePluginFingerprint) Reload() { + // host volume plugins are re-detected on agent reload +} + +func (h *DynamicHostVolumePluginFingerprint) Fingerprint(request *FingerprintRequest, response *FingerprintResponse) error { + // always add "mkdir" plugin + h.logger.Debug("detected plugin built-in", + "plugin_id", hvm.HostVolumePluginMkdirID, "version", hvm.HostVolumePluginMkdirVersion) + defer response.AddAttribute("plugins.host_volume.version."+hvm.HostVolumePluginMkdirID, hvm.HostVolumePluginMkdirVersion) + response.Detected = true + + // this config value will be empty in -dev mode + pluginDir := request.Config.HostVolumePluginDir + if pluginDir == "" { + return nil + } + + plugins, err := GetHostVolumePluginVersions(h.logger, pluginDir) + if err != nil { + if os.IsNotExist(err) { + h.logger.Debug("plugin dir does not exist", "dir", pluginDir) + } else { + h.logger.Warn("error finding plugins", "dir", pluginDir, "error", err) + } + return nil // don't halt agent start + } + + // if this was a reload, wipe what was there before + for k := range request.Node.Attributes { + if strings.HasPrefix(k, "plugins.host_volume.") { + response.RemoveAttribute(k) + } + } + + // set the attribute(s) + for plugin, version := range plugins { + h.logger.Debug("detected plugin", "plugin_id", plugin, "version", version) + response.AddAttribute("plugins.host_volume.version."+plugin, version) + } + + return nil +} + +func (h *DynamicHostVolumePluginFingerprint) Periodic() (bool, time.Duration) { + return false, 0 +} + +// GetHostVolumePluginVersions finds all the executable files on disk +// that respond to a Version call (arg $1 = 'version' / env $OPERATION = 'version') +// The return map's keys are plugin IDs, and the values are version strings. +func GetHostVolumePluginVersions(log hclog.Logger, pluginDir string) (map[string]string, error) { + files, err := helper.FindExecutableFiles(pluginDir) + if err != nil { + return nil, err + } + + plugins := make(map[string]string) + mut := sync.Mutex{} + var wg sync.WaitGroup + + for file, fullPath := range files { + wg.Add(1) + go func(file, fullPath string) { + defer wg.Done() + // really should take way less than a second + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + + log := log.With("plugin_id", file) + + p, err := hvm.NewHostVolumePluginExternal(log, file, fullPath, "") + if err != nil { + log.Warn("error getting plugin", "error", err) + return + } + + version, err := p.Version(ctx) + if err != nil { + log.Debug("failed to get version from plugin", "error", err) + return + } + + mut.Lock() + plugins[file] = version.String() + mut.Unlock() + }(file, fullPath) + } + + wg.Wait() + return plugins, nil +} diff --git a/client/fingerprint/dynamic_host_volumes_test.go b/client/fingerprint/dynamic_host_volumes_test.go new file mode 100644 index 00000000000..4be9d69d305 --- /dev/null +++ b/client/fingerprint/dynamic_host_volumes_test.go @@ -0,0 +1,89 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package fingerprint + +import ( + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/hashicorp/nomad/client/config" + hvm "github.com/hashicorp/nomad/client/hostvolumemanager" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +// this is more of a full integration test of: +// fingerprint <- find plugins <- find executables +func TestPluginsHostVolumeFingerprint(t *testing.T) { + cfg := &config.Config{HostVolumePluginDir: ""} + node := &structs.Node{Attributes: map[string]string{}} + req := &FingerprintRequest{Config: cfg, Node: node} + fp := NewPluginsHostVolumeFingerprint(testlog.HCLogger(t)) + + // this fingerprint is not mandatory, so no error should be returned + for name, path := range map[string]string{ + "empty": "", + "non-existent": "/nowhere", + "impossible": "dynamic_host_volumes_test.go", + } { + t.Run(name, func(t *testing.T) { + resp := FingerprintResponse{} + cfg.HostVolumePluginDir = path + err := fp.Fingerprint(req, &resp) + must.NoError(t, err) + must.True(t, resp.Detected) // always true due to "mkdir" built-in + }) + } + + if runtime.GOOS == "windows" { + t.Skip("test scripts not built for windows") // db TODO(1.10.0) + } + + // happy path: dir exists. this one will contain a single valid plugin. + tmp := t.TempDir() + cfg.HostVolumePluginDir = tmp + + files := []struct { + name string + contents string + perm os.FileMode + }{ + // only this first one should be detected as a valid plugin + {"happy-plugin", "#!/usr/bin/env sh\necho '0.0.1'", 0700}, + {"not-a-plugin", "#!/usr/bin/env sh\necho 'not-a-version'", 0700}, + {"unhappy-plugin", "#!/usr/bin/env sh\necho '0.0.2'; exit 1", 0700}, + {"not-executable", "hello", 0400}, + } + for _, f := range files { + must.NoError(t, os.WriteFile(filepath.Join(tmp, f.name), []byte(f.contents), f.perm)) + } + // directories should be ignored + must.NoError(t, os.Mkdir(filepath.Join(tmp, "a-directory"), 0700)) + + // do the fingerprint + resp := FingerprintResponse{} + err := fp.Fingerprint(req, &resp) + must.NoError(t, err) + must.Eq(t, map[string]string{ + "plugins.host_volume.version.happy-plugin": "0.0.1", + "plugins.host_volume.version.mkdir": hvm.HostVolumePluginMkdirVersion, // built-in + }, resp.Attributes) + + // do it again after deleting our one good plugin. + // repeat runs should wipe attributes, so nothing should remain. + node.Attributes = resp.Attributes + must.NoError(t, os.Remove(filepath.Join(tmp, "happy-plugin"))) + + resp = FingerprintResponse{} + err = fp.Fingerprint(req, &resp) + must.NoError(t, err) + must.Eq(t, map[string]string{ + "plugins.host_volume.version.happy-plugin": "", // empty value means removed + + "plugins.host_volume.version.mkdir": hvm.HostVolumePluginMkdirVersion, // built-in + }, resp.Attributes) +} diff --git a/client/fingerprint/fingerprint.go b/client/fingerprint/fingerprint.go index 3654db07830..5eb638009ec 100644 --- a/client/fingerprint/fingerprint.go +++ b/client/fingerprint/fingerprint.go @@ -32,19 +32,20 @@ var ( // hostFingerprinters contains the host fingerprints which are available for a // given platform. hostFingerprinters = map[string]Factory{ - "arch": NewArchFingerprint, - "consul": NewConsulFingerprint, - "cni": NewCNIFingerprint, // networks - "cpu": NewCPUFingerprint, - "host": NewHostFingerprint, - "landlock": NewLandlockFingerprint, - "memory": NewMemoryFingerprint, - "network": NewNetworkFingerprint, - "nomad": NewNomadFingerprint, - "plugins_cni": NewPluginsCNIFingerprint, - "signal": NewSignalFingerprint, - "storage": NewStorageFingerprint, - "vault": NewVaultFingerprint, + "arch": NewArchFingerprint, + "consul": NewConsulFingerprint, + "cni": NewCNIFingerprint, // networks + "cpu": NewCPUFingerprint, + "host": NewHostFingerprint, + "landlock": NewLandlockFingerprint, + "memory": NewMemoryFingerprint, + "network": NewNetworkFingerprint, + "nomad": NewNomadFingerprint, + "plugins_cni": NewPluginsCNIFingerprint, + "host_volume_plugins": NewPluginsHostVolumeFingerprint, + "signal": NewSignalFingerprint, + "storage": NewStorageFingerprint, + "vault": NewVaultFingerprint, } // envFingerprinters contains the fingerprints that are environment specific. diff --git a/client/host_volume_endpoint_test.go b/client/host_volume_endpoint_test.go index c3a4ae83835..037880246e9 100644 --- a/client/host_volume_endpoint_test.go +++ b/client/host_volume_endpoint_test.go @@ -8,7 +8,7 @@ import ( "testing" "github.com/hashicorp/nomad/ci" - "github.com/hashicorp/nomad/client/hostvolumemanager" + hvm "github.com/hashicorp/nomad/client/hostvolumemanager" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/testlog" "github.com/shoenig/test/must" @@ -22,8 +22,8 @@ func TestHostVolume(t *testing.T) { tmp := t.TempDir() expectDir := filepath.Join(tmp, "test-vol-id") - hvm := hostvolumemanager.NewHostVolumeManager(tmp, testlog.HCLogger(t)) - client.hostVolumeManager = hvm + client.hostVolumeManager = hvm.NewHostVolumeManager(testlog.HCLogger(t), + "/no/ext/plugins", tmp) t.Run("happy", func(t *testing.T) { req := &cstructs.ClientHostVolumeCreateRequest{ @@ -60,19 +60,20 @@ func TestHostVolume(t *testing.T) { } var resp cstructs.ClientHostVolumeCreateResponse err := client.ClientRPC("HostVolume.Create", req, &resp) - must.EqError(t, err, `no such plugin "non-existent"`) + must.EqError(t, err, `no such plugin: "non-existent"`) delReq := &cstructs.ClientHostVolumeDeleteRequest{ PluginID: "non-existent", } var delResp cstructs.ClientHostVolumeDeleteResponse err = client.ClientRPC("HostVolume.Delete", delReq, &delResp) - must.EqError(t, err, `no such plugin "non-existent"`) + must.EqError(t, err, `no such plugin: "non-existent"`) }) t.Run("error from plugin", func(t *testing.T) { // "mkdir" plugin can't create a directory within a file - client.hostVolumeManager = hostvolumemanager.NewHostVolumeManager("host_volume_endpoint_test.go", testlog.HCLogger(t)) + client.hostVolumeManager = hvm.NewHostVolumeManager(testlog.HCLogger(t), + "/no/ext/plugins", "host_volume_endpoint_test.go") req := &cstructs.ClientHostVolumeCreateRequest{ ID: "test-vol-id", diff --git a/client/hostvolumemanager/host_volume_plugin.go b/client/hostvolumemanager/host_volume_plugin.go index e8297a32f80..0616ce3377e 100644 --- a/client/hostvolumemanager/host_volume_plugin.go +++ b/client/hostvolumemanager/host_volume_plugin.go @@ -34,6 +34,9 @@ type HostVolumePluginCreateResponse struct { Context map[string]string `json:"context"` // metadata } +const HostVolumePluginMkdirID = "mkdir" +const HostVolumePluginMkdirVersion = "0.0.1" + var _ HostVolumePlugin = &HostVolumePluginMkdir{} type HostVolumePluginMkdir struct { @@ -44,7 +47,7 @@ type HostVolumePluginMkdir struct { } func (p *HostVolumePluginMkdir) Version(_ context.Context) (*version.Version, error) { - return version.NewVersion("0.0.1") + return version.NewVersion(HostVolumePluginMkdirVersion) } func (p *HostVolumePluginMkdir) Create(_ context.Context, @@ -91,6 +94,29 @@ func (p *HostVolumePluginMkdir) Delete(_ context.Context, req *cstructs.ClientHo var _ HostVolumePlugin = &HostVolumePluginExternal{} +func NewHostVolumePluginExternal(log hclog.Logger, + id, executable, targetPath string) (*HostVolumePluginExternal, error) { + // this should only be called with already-detected executables, + // but we'll double-check it anyway, so we can provide a tidy error message + // if it has changed between fingerprinting and execution. + f, err := os.Stat(executable) + if err != nil { + if os.IsNotExist(err) { + return nil, fmt.Errorf("%w: %q", ErrPluginNotExists, id) + } + return nil, err + } + if !helper.IsExecutable(f) { + return nil, fmt.Errorf("%w: %q", ErrPluginNotExecutable, id) + } + return &HostVolumePluginExternal{ + ID: id, + Executable: executable, + TargetPath: targetPath, + log: log, + }, nil +} + type HostVolumePluginExternal struct { ID string Executable string diff --git a/client/hostvolumemanager/host_volumes.go b/client/hostvolumemanager/host_volumes.go index 4d7da7d1ea4..39ab9bb890f 100644 --- a/client/hostvolumemanager/host_volumes.go +++ b/client/hostvolumemanager/host_volumes.go @@ -5,60 +5,57 @@ package hostvolumemanager import ( "context" - "fmt" - "sync" + "errors" + "path/filepath" "github.com/hashicorp/go-hclog" cstructs "github.com/hashicorp/nomad/client/structs" ) +var ( + ErrPluginNotExists = errors.New("no such plugin") + ErrPluginNotExecutable = errors.New("plugin not executable") +) + type HostVolumeManager struct { - log hclog.Logger - plugins *sync.Map + pluginDir string + sharedMountDir string + + log hclog.Logger } -func NewHostVolumeManager(sharedMountDir string, logger hclog.Logger) *HostVolumeManager { - log := logger.Named("host_volumes") +func NewHostVolumeManager(logger hclog.Logger, pluginDir, sharedMountDir string) *HostVolumeManager { + log := logger.Named("host_volume_mgr") - mgr := &HostVolumeManager{ - log: log, - plugins: &sync.Map{}, + // db TODO(1.10.0): how do we define the external mounter plugins? plugin configs? + return &HostVolumeManager{ + log: log, + pluginDir: pluginDir, + sharedMountDir: sharedMountDir, } - // db TODO(1.10.0): discover plugins on disk, need a new plugin dir - // TODO: how do we define the external mounter plugins? plugin configs? - mgr.setPlugin("mkdir", &HostVolumePluginMkdir{ - ID: "mkdir", - TargetPath: sharedMountDir, - log: log.With("plugin_id", "mkdir"), - }) - mgr.setPlugin("example-host-volume", &HostVolumePluginExternal{ - ID: "example-host-volume", - Executable: "/opt/nomad/hostvolumeplugins/example-host-volume", - TargetPath: sharedMountDir, - log: log.With("plugin_id", "example-host-volume"), - }) - return mgr } -// db TODO(1.10.0): fingerprint elsewhere / on sighup, and SetPlugin from afar? -func (hvm *HostVolumeManager) setPlugin(id string, plug HostVolumePlugin) { - hvm.plugins.Store(id, plug) -} +func (hvm *HostVolumeManager) getPlugin(id string) (HostVolumePlugin, error) { + log := hvm.log.With("plugin_id", id) -func (hvm *HostVolumeManager) getPlugin(id string) (HostVolumePlugin, bool) { - obj, ok := hvm.plugins.Load(id) - if !ok { - return nil, false + if id == HostVolumePluginMkdirID { + return &HostVolumePluginMkdir{ + ID: HostVolumePluginMkdirID, + TargetPath: hvm.sharedMountDir, + log: log, + }, nil } - return obj.(HostVolumePlugin), true + + path := filepath.Join(hvm.pluginDir, id) + return NewHostVolumePluginExternal(log, id, path, hvm.sharedMountDir) } func (hvm *HostVolumeManager) Create(ctx context.Context, req *cstructs.ClientHostVolumeCreateRequest) (*cstructs.ClientHostVolumeCreateResponse, error) { - plug, ok := hvm.getPlugin(req.PluginID) - if !ok { - return nil, fmt.Errorf("no such plugin %q", req.PluginID) + plug, err := hvm.getPlugin(req.PluginID) + if err != nil { + return nil, err } pluginResp, err := plug.Create(ctx, req) @@ -80,12 +77,12 @@ func (hvm *HostVolumeManager) Create(ctx context.Context, func (hvm *HostVolumeManager) Delete(ctx context.Context, req *cstructs.ClientHostVolumeDeleteRequest) (*cstructs.ClientHostVolumeDeleteResponse, error) { - plug, ok := hvm.getPlugin(req.PluginID) - if !ok { - return nil, fmt.Errorf("no such plugin %q", req.PluginID) + plug, err := hvm.getPlugin(req.PluginID) + if err != nil { + return nil, err } - err := plug.Delete(ctx, req) + err = plug.Delete(ctx, req) if err != nil { return nil, err } diff --git a/command/agent/agent.go b/command/agent/agent.go index 6095e2dce99..40d467e23d5 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -724,6 +724,7 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { if agentConfig.DataDir != "" { conf.StateDir = filepath.Join(agentConfig.DataDir, "client") conf.AllocDir = filepath.Join(agentConfig.DataDir, "alloc") + conf.HostVolumePluginDir = filepath.Join(agentConfig.DataDir, "host_volume_plugins") dataParent := filepath.Dir(agentConfig.DataDir) conf.AllocMountsDir = filepath.Join(dataParent, "alloc_mounts") } @@ -736,6 +737,9 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) { if agentConfig.Client.AllocMountsDir != "" { conf.AllocMountsDir = agentConfig.Client.AllocMountsDir } + if agentConfig.Client.HostVolumePluginDir != "" { + conf.HostVolumePluginDir = agentConfig.Client.HostVolumePluginDir + } if agentConfig.Client.NetworkInterface != "" { conf.NetworkInterface = agentConfig.Client.NetworkInterface } diff --git a/command/agent/command.go b/command/agent/command.go index 088bf9a819c..8184a067ac1 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -111,6 +111,7 @@ func (c *Command) readConfig() *Config { flags.StringVar(&cmdConfig.Client.StateDir, "state-dir", "", "") flags.StringVar(&cmdConfig.Client.AllocDir, "alloc-dir", "", "") flags.StringVar(&cmdConfig.Client.AllocMountsDir, "alloc-mounts-dir", "", "") + flags.StringVar(&cmdConfig.Client.HostVolumePluginDir, "host-volume-plugin-dir", "", "") flags.StringVar(&cmdConfig.Client.NodeClass, "node-class", "", "") flags.StringVar(&cmdConfig.Client.NodePool, "node-pool", "", "") flags.StringVar(&servers, "servers", "", "") @@ -384,11 +385,12 @@ func (c *Command) IsValidConfig(config, cmdConfig *Config) bool { // Verify the paths are absolute. dirs := map[string]string{ - "data-dir": config.DataDir, - "plugin-dir": config.PluginDir, - "alloc-dir": config.Client.AllocDir, - "alloc-mounts-dir": config.Client.AllocMountsDir, - "state-dir": config.Client.StateDir, + "data-dir": config.DataDir, + "plugin-dir": config.PluginDir, + "alloc-dir": config.Client.AllocDir, + "alloc-mounts-dir": config.Client.AllocMountsDir, + "host-volume-plugin-dir": config.Client.HostVolumePluginDir, + "state-dir": config.Client.StateDir, } for k, dir := range dirs { if dir == "" { @@ -735,6 +737,7 @@ func (c *Command) AutocompleteFlags() complete.Flags { "-region": complete.PredictAnything, "-data-dir": complete.PredictDirs("*"), "-plugin-dir": complete.PredictDirs("*"), + "-host-volume-plugin-dir": complete.PredictDirs("*"), "-dc": complete.PredictAnything, "-log-level": complete.PredictAnything, "-json-logs": complete.PredictNothing, @@ -1568,6 +1571,10 @@ Client Options: The default speed for network interfaces in MBits if the link speed can not be determined dynamically. + -host-volume-plugin-dir + Directory containing dynamic host volume plugins. The default is + /host_volume_plugins. + ACL Options: -acl-enabled diff --git a/command/agent/config.go b/command/agent/config.go index 4f8e41f02c8..acfb9bc6344 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -229,6 +229,10 @@ type ClientConfig struct { // AllocMountsDir is the directory for storing mounts into allocation data AllocMountsDir string `hcl:"alloc_mounts_dir"` + // HostVolumePluginDir directory contains dynamic host volume plugins + // db TODO(1.10.0): document default directory is alongside alloc_mounts + HostVolumePluginDir string `hcl:"host_volume_plugin_dir"` + // Servers is a list of known server addresses. These are as "host:port" Servers []string `hcl:"servers"` @@ -2316,6 +2320,9 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig { if b.AllocMountsDir != "" { result.AllocMountsDir = b.AllocMountsDir } + if b.HostVolumePluginDir != "" { + result.HostVolumePluginDir = b.HostVolumePluginDir + } if b.NodeClass != "" { result.NodeClass = b.NodeClass } diff --git a/demo/hostvolume/example-host-volume b/demo/hostvolume/example-host-volume index d93978ea6cd..918f97748fb 100755 --- a/demo/hostvolume/example-host-volume +++ b/demo/hostvolume/example-host-volume @@ -53,8 +53,8 @@ if [[ "$OSTYPE" == "linux-"* ]]; then ext=ext4 mount=/usr/bin/mount mkfsExec() { - dd if=/dev/zero of="$1".$ext bs=1M count="$2" - mkfs.ext4 "$1".$ext 1>&2 + dd if=/dev/zero of="$1".$ext bs=1M count="$2" + mkfs.ext4 "$1".$ext 1>&2 } mountExec() { $mount "$1".$ext "$1" @@ -76,7 +76,7 @@ elif [[ "$OSTYPE" == "darwin"* ]]; then } else echo "$OSTYPE is an unsupported OS" - return 1 + exit 1 fi validate_path() { @@ -88,7 +88,7 @@ validate_path() { } is_mounted() { - $mount | grep -q "^$1$" + $mount | grep -q " $1 " } create_volume() { diff --git a/helper/funcs.go b/helper/funcs.go index 5b394c09fa6..2695540ffa7 100644 --- a/helper/funcs.go +++ b/helper/funcs.go @@ -9,6 +9,7 @@ import ( "maps" "math" "net/http" + "os" "path/filepath" "reflect" "regexp" @@ -542,3 +543,33 @@ func FlattenMultierror(err error) error { } return mErr.ErrorOrNil() } + +// FindExecutableFiles looks in the provided path for executables and returns +// a map where keys are filenames and values are the absolute path. +func FindExecutableFiles(path string) (map[string]string, error) { + executables := make(map[string]string) + entries, err := os.ReadDir(path) + if err != nil { + return executables, err + } + for _, e := range entries { + i, err := e.Info() + if err != nil { + return executables, err + } + if !IsExecutable(i) { + continue + } + p := filepath.Join(path, i.Name()) + abs, err := filepath.Abs(p) + if err != nil { + return executables, err + } + executables[i.Name()] = abs + } + return executables, nil +} + +func IsExecutable(i os.FileInfo) bool { + return !i.IsDir() && i.Mode()&0o111 != 0 +} From d700538921293ac595054a3e7eb761a1da06f242 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 3 Dec 2024 09:48:36 -0500 Subject: [PATCH 20/35] dynamic host volumes: Sentinel improvements for CLI (#24592) The create/register volume RPCs support a policy override flag for soft-mandatory Sentinel policies, but the CLI and Go API were missing support for it. Also add support for Sentinel warnings to the Go API and CLI. Ref: https://github.com/hashicorp/nomad/pull/24479 --- api/host_volumes.go | 32 ++++++++++++++++++++++---------- command/volume_create.go | 13 +++++++++---- command/volume_create_host.go | 14 +++++++++++--- command/volume_register.go | 18 +++++++++++++++--- command/volume_register_host.go | 15 ++++++++++++--- 5 files changed, 69 insertions(+), 23 deletions(-) diff --git a/api/host_volumes.go b/api/host_volumes.go index 985695fa706..e417177d29c 100644 --- a/api/host_volumes.go +++ b/api/host_volumes.go @@ -148,10 +148,26 @@ func (c *Client) HostVolumes() *HostVolumes { type HostVolumeCreateRequest struct { Volume *HostVolume + + // PolicyOverride overrides Sentinel soft-mandatory policy enforcement + PolicyOverride bool } type HostVolumeRegisterRequest struct { Volume *HostVolume + + // PolicyOverride overrides Sentinel soft-mandatory policy enforcement + PolicyOverride bool +} + +type HostVolumeCreateResponse struct { + Volume *HostVolume + Warnings string +} + +type HostVolumeRegisterResponse struct { + Volume *HostVolume + Warnings string } type HostVolumeListRequest struct { @@ -165,28 +181,24 @@ type HostVolumeDeleteRequest struct { // Create forwards to client agents so a host volume can be created on those // hosts, and registers the volume with Nomad servers. -func (hv *HostVolumes) Create(req *HostVolumeCreateRequest, opts *WriteOptions) (*HostVolume, *WriteMeta, error) { - var out struct { - Volume *HostVolume - } +func (hv *HostVolumes) Create(req *HostVolumeCreateRequest, opts *WriteOptions) (*HostVolumeCreateResponse, *WriteMeta, error) { + var out *HostVolumeCreateResponse wm, err := hv.client.put("/v1/volume/host/create", req, &out, opts) if err != nil { return nil, wm, err } - return out.Volume, wm, nil + return out, wm, nil } // Register registers a host volume that was created out-of-band with the Nomad // servers. -func (hv *HostVolumes) Register(req *HostVolumeRegisterRequest, opts *WriteOptions) (*HostVolume, *WriteMeta, error) { - var out struct { - Volume *HostVolume - } +func (hv *HostVolumes) Register(req *HostVolumeRegisterRequest, opts *WriteOptions) (*HostVolumeRegisterResponse, *WriteMeta, error) { + var out *HostVolumeRegisterResponse wm, err := hv.client.put("/v1/volume/host/register", req, &out, opts) if err != nil { return nil, wm, err } - return out.Volume, wm, nil + return out, wm, nil } // Get queries for a single host volume, by ID diff --git a/command/volume_create.go b/command/volume_create.go index 1e3dcbad02a..5a4254e52b0 100644 --- a/command/volume_create.go +++ b/command/volume_create.go @@ -46,6 +46,9 @@ Create Options: Display full information when monitoring volume state. Used for dynamic host volumes only. + -policy-override + Sets the flag to force override any soft mandatory Sentinel policies. Used + for dynamic host volumes only. ` return strings.TrimSpace(helpText) @@ -54,8 +57,9 @@ Create Options: func (c *VolumeCreateCommand) AutocompleteFlags() complete.Flags { return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), complete.Flags{ - "-detach": complete.PredictNothing, - "-verbose": complete.PredictNothing, + "-detach": complete.PredictNothing, + "-verbose": complete.PredictNothing, + "-policy-override": complete.PredictNothing, }) } @@ -70,10 +74,11 @@ func (c *VolumeCreateCommand) Synopsis() string { func (c *VolumeCreateCommand) Name() string { return "volume create" } func (c *VolumeCreateCommand) Run(args []string) int { - var detach, verbose bool + var detach, verbose, override bool flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.BoolVar(&detach, "detach", false, "detach from monitor") flags.BoolVar(&verbose, "verbose", false, "display full volume IDs") + flags.BoolVar(&override, "policy-override", false, "override soft mandatory Sentinel policies") flags.Usage = func() { c.Ui.Output(c.Help()) } if err := flags.Parse(args); err != nil { @@ -124,7 +129,7 @@ func (c *VolumeCreateCommand) Run(args []string) int { case "csi": return c.csiCreate(client, ast) case "host": - return c.hostVolumeCreate(client, ast, detach, verbose) + return c.hostVolumeCreate(client, ast, detach, verbose, override) default: c.Ui.Error(fmt.Sprintf("Error unknown volume type: %s", volType)) return 1 diff --git a/command/volume_create_host.go b/command/volume_create_host.go index 54cd31efb97..dc0d1e1aef2 100644 --- a/command/volume_create_host.go +++ b/command/volume_create_host.go @@ -19,7 +19,7 @@ import ( ) func (c *VolumeCreateCommand) hostVolumeCreate( - client *api.Client, ast *ast.File, detach, verbose bool) int { + client *api.Client, ast *ast.File, detach, verbose, override bool) int { vol, err := decodeHostVolume(ast) if err != nil { @@ -28,13 +28,21 @@ func (c *VolumeCreateCommand) hostVolumeCreate( } req := &api.HostVolumeCreateRequest{ - Volume: vol, + Volume: vol, + PolicyOverride: override, } - vol, _, err = client.HostVolumes().Create(req, nil) + resp, _, err := client.HostVolumes().Create(req, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error creating volume: %s", err)) return 1 } + vol = resp.Volume + + if resp.Warnings != "" { + c.Ui.Output( + c.Colorize().Color( + fmt.Sprintf("[bold][yellow]Volume Warnings:\n%s[reset]\n", resp.Warnings))) + } var volID string var lastIndex uint64 diff --git a/command/volume_register.go b/command/volume_register.go index 19527cf6b1f..d47c93b2232 100644 --- a/command/volume_register.go +++ b/command/volume_register.go @@ -34,13 +34,23 @@ Usage: nomad volume register [options] General Options: - ` + generalOptionsUsage(usageOptsDefault) + ` + generalOptionsUsage(usageOptsDefault) + ` + +Register Options: + + -policy-override + Sets the flag to force override any soft mandatory Sentinel policies. Used + for dynamic host volumes only. +` return strings.TrimSpace(helpText) } func (c *VolumeRegisterCommand) AutocompleteFlags() complete.Flags { - return c.Meta.AutocompleteFlags(FlagSetClient) + return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), + complete.Flags{ + "-policy-override": complete.PredictNothing, + }) } func (c *VolumeRegisterCommand) AutocompleteArgs() complete.Predictor { @@ -54,7 +64,9 @@ func (c *VolumeRegisterCommand) Synopsis() string { func (c *VolumeRegisterCommand) Name() string { return "volume register" } func (c *VolumeRegisterCommand) Run(args []string) int { + var override bool flags := c.Meta.FlagSet(c.Name(), FlagSetClient) + flags.BoolVar(&override, "policy-override", false, "override soft mandatory Sentinel policies") flags.Usage = func() { c.Ui.Output(c.Help()) } if err := flags.Parse(args); err != nil { @@ -106,7 +118,7 @@ func (c *VolumeRegisterCommand) Run(args []string) int { case "csi": return c.csiRegister(client, ast) case "host": - return c.hostVolumeRegister(client, ast) + return c.hostVolumeRegister(client, ast, override) default: c.Ui.Error(fmt.Sprintf("Error unknown volume type: %s", volType)) return 1 diff --git a/command/volume_register_host.go b/command/volume_register_host.go index 4e3ce6ccddb..b6cb213caac 100644 --- a/command/volume_register_host.go +++ b/command/volume_register_host.go @@ -10,7 +10,7 @@ import ( "github.com/hashicorp/nomad/api" ) -func (c *VolumeRegisterCommand) hostVolumeRegister(client *api.Client, ast *ast.File) int { +func (c *VolumeRegisterCommand) hostVolumeRegister(client *api.Client, ast *ast.File, override bool) int { vol, err := decodeHostVolume(ast) if err != nil { c.Ui.Error(fmt.Sprintf("Error decoding the volume definition: %s", err)) @@ -18,13 +18,22 @@ func (c *VolumeRegisterCommand) hostVolumeRegister(client *api.Client, ast *ast. } req := &api.HostVolumeRegisterRequest{ - Volume: vol, + Volume: vol, + PolicyOverride: override, } - vol, _, err = client.HostVolumes().Register(req, nil) + resp, _, err := client.HostVolumes().Register(req, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error registering volume: %s", err)) return 1 } + vol = resp.Volume + + if resp.Warnings != "" { + c.Ui.Output( + c.Colorize().Color( + fmt.Sprintf("[bold][yellow]Volume Warnings:\n%s[reset]\n", resp.Warnings))) + } + c.Ui.Output(fmt.Sprintf( "Registered host volume %s with ID %s", vol.Name, vol.ID)) From 787fbbe67116f3c33cd575a2c4eb208ed161dea3 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 3 Dec 2024 14:30:15 -0500 Subject: [PATCH 21/35] sentinel: remove default scope for Sentinel apply command (#24601) When we add a Sentinel scope for dynamic host volumes, having a default `-scope` value for `sentinel apply` risks accidentally adding policies for volumes to the job scope. This would immediately prevent any job from being submitted. Forcing the administrator to pass a `-scope` will prevent accidental misuse. Ref: https://github.com/hashicorp/nomad-enterprise/pull/2087 Ref: https://github.com/hashicorp/nomad/pull/24479 --- .changelog/24601.txt | 3 +++ api/sentinel.go | 6 ++++++ command/sentinel_apply.go | 17 ++++++++++++++--- 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 .changelog/24601.txt diff --git a/.changelog/24601.txt b/.changelog/24601.txt new file mode 100644 index 00000000000..e8b8807f300 --- /dev/null +++ b/.changelog/24601.txt @@ -0,0 +1,3 @@ +```release-note:breaking-change +sentinel: The sentinel apply command now requires the -scope option +``` diff --git a/api/sentinel.go b/api/sentinel.go index e8a0644ae16..1e93308847d 100644 --- a/api/sentinel.go +++ b/api/sentinel.go @@ -82,3 +82,9 @@ type SentinelPolicyListStub struct { CreateIndex uint64 ModifyIndex uint64 } + +// Possible Sentinel scopes +const ( + SentinelScopeSubmitJob = "submit-job" + SentinelScopeSubmitHostVolume = "submit-host-volume" +) diff --git a/command/sentinel_apply.go b/command/sentinel_apply.go index 7d43c0e6c88..7db40022bd7 100644 --- a/command/sentinel_apply.go +++ b/command/sentinel_apply.go @@ -37,8 +37,9 @@ Apply Options: -description Sets a human readable description for the policy. - -scope (default: submit-job) - Sets the scope of the policy and when it should be enforced. + -scope + Sets the scope of the policy and when it should be enforced. One of + "submit-job" or "submit-host-volume". -level (default: advisory) Sets the enforcement level of the policy. Must be one of advisory, @@ -73,7 +74,7 @@ func (c *SentinelApplyCommand) Run(args []string) int { flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.StringVar(&description, "description", "", "") - flags.StringVar(&scope, "scope", "submit-job", "") + flags.StringVar(&scope, "scope", "", "") flags.StringVar(&enfLevel, "level", "advisory", "") if err := flags.Parse(args); err != nil { return 1 @@ -107,6 +108,16 @@ func (c *SentinelApplyCommand) Run(args []string) int { } } + switch scope { + case api.SentinelScopeSubmitJob, api.SentinelScopeSubmitHostVolume: + case "": + c.Ui.Error("-scope flag is required") + return 1 + default: + c.Ui.Error(fmt.Sprintf("Error: invalid -scope value: %q", scope)) + return 1 + } + // Construct the policy sp := &api.SentinelPolicy{ Name: policyName, From 05f1cda594634edf99f1ffff00f5e21b41870253 Mon Sep 17 00:00:00 2001 From: Daniel Bennett Date: Tue, 3 Dec 2024 16:47:37 -0500 Subject: [PATCH 22/35] dynamic host volumes: client state (#24595) store dynamic host volume creations in client state, so they can be "restored" on agent restart. restore works by repeating the same Create operation as initial creation, and expecting the plugin to be idempotent. this is (potentially) especially important after host restarts, which may have dropped mount points or such. --- client/client.go | 9 +- client/host_volume_endpoint_test.go | 29 +++++- .../hostvolumemanager/host_volume_plugin.go | 11 +-- .../host_volume_plugin_test.go | 2 - client/hostvolumemanager/host_volumes.go | 92 +++++++++++++++++-- client/hostvolumemanager/host_volumes_test.go | 53 +++++++++++ client/state/db_bolt.go | 41 +++++++++ client/state/db_error.go | 15 +++ client/state/db_mem.go | 43 +++++++-- client/state/db_noop.go | 12 +++ client/state/db_test.go | 36 ++++++++ client/state/interface.go | 4 + client/structs/host_volumes.go | 5 + 13 files changed, 323 insertions(+), 29 deletions(-) create mode 100644 client/hostvolumemanager/host_volumes_test.go diff --git a/client/client.go b/client/client.go index 4fc808eca2b..ac4a7dad700 100644 --- a/client/client.go +++ b/client/client.go @@ -535,9 +535,16 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie c.devicemanager = devManager c.pluginManagers.RegisterAndRun(devManager) - c.hostVolumeManager = hvm.NewHostVolumeManager(logger, + c.hostVolumeManager, err = hvm.NewHostVolumeManager(logger, + c.stateDB, hostVolumeRequestTimeout, cfg.HostVolumePluginDir, cfg.AllocMountsDir) + if err != nil { + // NewHostVolumeManager will only err if it fails to read state store, + // or if one or more required plugins do not exist, so halt the client + // because something needs to be fixed by a cluster admin. + return nil, err + } // Set up the service registration wrapper using the Consul and Nomad // implementations. The Nomad implementation is only ever used on the diff --git a/client/host_volume_endpoint_test.go b/client/host_volume_endpoint_test.go index 037880246e9..7fbc042d8ea 100644 --- a/client/host_volume_endpoint_test.go +++ b/client/host_volume_endpoint_test.go @@ -6,9 +6,11 @@ package client import ( "path/filepath" "testing" + "time" "github.com/hashicorp/nomad/ci" hvm "github.com/hashicorp/nomad/client/hostvolumemanager" + "github.com/hashicorp/nomad/client/state" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/testlog" "github.com/shoenig/test/must" @@ -20,10 +22,15 @@ func TestHostVolume(t *testing.T) { client, cleanup := TestClient(t, nil) defer cleanup() + memdb := state.NewMemDB(testlog.HCLogger(t)) + client.stateDB = memdb + tmp := t.TempDir() + var err error expectDir := filepath.Join(tmp, "test-vol-id") - client.hostVolumeManager = hvm.NewHostVolumeManager(testlog.HCLogger(t), - "/no/ext/plugins", tmp) + client.hostVolumeManager, err = hvm.NewHostVolumeManager(testlog.HCLogger(t), + client.stateDB, time.Second, "/no/ext/plugins", tmp) + must.NoError(t, err) t.Run("happy", func(t *testing.T) { req := &cstructs.ClientHostVolumeCreateRequest{ @@ -40,6 +47,15 @@ func TestHostVolume(t *testing.T) { }, resp) // technically this is testing "mkdir" more than the RPC must.DirExists(t, expectDir) + // ensure we saved to client state + vols, err := memdb.GetDynamicHostVolumes() + must.NoError(t, err) + must.Len(t, 1, vols) + expectState := &cstructs.HostVolumeState{ + ID: req.ID, + CreateReq: req, + } + must.Eq(t, expectState, vols[0]) delReq := &cstructs.ClientHostVolumeDeleteRequest{ ID: "test-vol-id", @@ -52,6 +68,10 @@ func TestHostVolume(t *testing.T) { must.NotNil(t, delResp) // again, actually testing the "mkdir" plugin must.DirNotExists(t, expectDir) + // client state should be deleted + vols, err = memdb.GetDynamicHostVolumes() + must.NoError(t, err) + must.Len(t, 0, vols) }) t.Run("missing plugin", func(t *testing.T) { @@ -72,8 +92,9 @@ func TestHostVolume(t *testing.T) { t.Run("error from plugin", func(t *testing.T) { // "mkdir" plugin can't create a directory within a file - client.hostVolumeManager = hvm.NewHostVolumeManager(testlog.HCLogger(t), - "/no/ext/plugins", "host_volume_endpoint_test.go") + client.hostVolumeManager, err = hvm.NewHostVolumeManager(testlog.HCLogger(t), + client.stateDB, time.Second, "/no/ext/plugins", "host_volume_endpoint_test.go") + must.NoError(t, err) req := &cstructs.ClientHostVolumeCreateRequest{ ID: "test-vol-id", diff --git a/client/hostvolumemanager/host_volume_plugin.go b/client/hostvolumemanager/host_volume_plugin.go index 0616ce3377e..17cdf02790c 100644 --- a/client/hostvolumemanager/host_volume_plugin.go +++ b/client/hostvolumemanager/host_volume_plugin.go @@ -29,9 +29,8 @@ type HostVolumePlugin interface { } type HostVolumePluginCreateResponse struct { - Path string `json:"path"` - SizeBytes int64 `json:"bytes"` - Context map[string]string `json:"context"` // metadata + Path string `json:"path"` + SizeBytes int64 `json:"bytes"` } const HostVolumePluginMkdirID = "mkdir" @@ -70,7 +69,6 @@ func (p *HostVolumePluginMkdir) Create(_ context.Context, return &HostVolumePluginCreateResponse{ Path: path, SizeBytes: 0, - Context: map[string]string{}, }, nil } @@ -147,8 +145,9 @@ func (p *HostVolumePluginExternal) Version(ctx context.Context) (*version.Versio func (p *HostVolumePluginExternal) Create(ctx context.Context, req *cstructs.ClientHostVolumeCreateRequest) (*HostVolumePluginCreateResponse, error) { - params, err := json.Marshal(req.Parameters) // db TODO(1.10.0): if this is nil, then PARAMETERS env will be "null" + params, err := json.Marshal(req.Parameters) // db TODO(1.10.0): document if this is nil, then PARAMETERS env will be "null" if err != nil { + // this is a proper error, because users can set this in the volume spec return nil, fmt.Errorf("error marshaling volume pramaters: %w", err) } envVars := []string{ @@ -165,7 +164,7 @@ func (p *HostVolumePluginExternal) Create(ctx context.Context, } var pluginResp HostVolumePluginCreateResponse - err = json.Unmarshal(stdout, &pluginResp) + err = json.Unmarshal(stdout, &pluginResp) // db TODO(1.10.0): if this fails, then the volume may have been created, according to the plugin, but Nomad will not save it if err != nil { return nil, err } diff --git a/client/hostvolumemanager/host_volume_plugin_test.go b/client/hostvolumemanager/host_volume_plugin_test.go index 954686d7443..18de2e1f381 100644 --- a/client/hostvolumemanager/host_volume_plugin_test.go +++ b/client/hostvolumemanager/host_volume_plugin_test.go @@ -45,7 +45,6 @@ func TestHostVolumePluginMkdir(t *testing.T) { must.Eq(t, &HostVolumePluginCreateResponse{ Path: target, SizeBytes: 0, - Context: map[string]string{}, }, resp) must.DirExists(t, target) @@ -115,7 +114,6 @@ func TestHostVolumePluginExternal(t *testing.T) { must.Eq(t, &HostVolumePluginCreateResponse{ Path: target, SizeBytes: 5, - Context: map[string]string{"key": "val"}, }, resp) must.DirExists(t, target) logged := getLogs() diff --git a/client/hostvolumemanager/host_volumes.go b/client/hostvolumemanager/host_volumes.go index 39ab9bb890f..9827084a74b 100644 --- a/client/hostvolumemanager/host_volumes.go +++ b/client/hostvolumemanager/host_volumes.go @@ -7,9 +7,12 @@ import ( "context" "errors" "path/filepath" + "time" "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-multierror" cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper" ) var ( @@ -17,22 +20,73 @@ var ( ErrPluginNotExecutable = errors.New("plugin not executable") ) +type HostVolumeStateManager interface { + PutDynamicHostVolume(*cstructs.HostVolumeState) error + GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) + DeleteDynamicHostVolume(string) error +} + type HostVolumeManager struct { pluginDir string sharedMountDir string + stateMgr HostVolumeStateManager log hclog.Logger } -func NewHostVolumeManager(logger hclog.Logger, pluginDir, sharedMountDir string) *HostVolumeManager { +func NewHostVolumeManager(logger hclog.Logger, + state HostVolumeStateManager, restoreTimeout time.Duration, + pluginDir, sharedMountDir string) (*HostVolumeManager, error) { + log := logger.Named("host_volume_mgr") // db TODO(1.10.0): how do we define the external mounter plugins? plugin configs? - return &HostVolumeManager{ - log: log, + hvm := &HostVolumeManager{ pluginDir: pluginDir, sharedMountDir: sharedMountDir, + stateMgr: state, + log: log, + } + + if err := hvm.restoreState(state, restoreTimeout); err != nil { + return nil, err + } + + return hvm, nil +} + +func (hvm *HostVolumeManager) restoreState(state HostVolumeStateManager, timeout time.Duration) error { + vols, err := state.GetDynamicHostVolumes() + if err != nil { + return err + } + if len(vols) == 0 { + return nil // nothing to do } + + // re-"create" the volumes - plugins have the best knowledge of their + // side effects, and they must be idempotent. + group := multierror.Group{} + for _, vol := range vols { + group.Go(func() error { // db TODO(1.10.0): document that plugins must be safe to run concurrently + // missing plugins with associated volumes in state are considered + // client-stopping errors. they need to be fixed by cluster admins. + plug, err := hvm.getPlugin(vol.CreateReq.PluginID) + if err != nil { + return err + } + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + if _, err := plug.Create(ctx, vol.CreateReq); err != nil { + // plugin execution errors are only logged + hvm.log.Error("failed to restore", "plugin_id", vol.CreateReq.PluginID, "volume_id", vol.ID, "error", err) + } + return nil + }) + } + mErr := group.Wait() + return helper.FlattenMultierror(mErr.ErrorOrNil()) } func (hvm *HostVolumeManager) getPlugin(id string) (HostVolumePlugin, error) { @@ -63,14 +117,35 @@ func (hvm *HostVolumeManager) Create(ctx context.Context, return nil, err } + volState := &cstructs.HostVolumeState{ + ID: req.ID, + CreateReq: req, + } + if err := hvm.stateMgr.PutDynamicHostVolume(volState); err != nil { + // if we fail to write to state, delete the volume so it isn't left + // lying around without Nomad knowing about it. + hvm.log.Error("failed to save volume in state, so deleting", "volume_id", req.ID, "error", err) + delErr := plug.Delete(ctx, &cstructs.ClientHostVolumeDeleteRequest{ + ID: req.ID, + PluginID: req.PluginID, + NodeID: req.NodeID, + HostPath: hvm.sharedMountDir, + Parameters: req.Parameters, + }) + if delErr != nil { + hvm.log.Warn("error deleting volume after state store failure", "volume_id", req.ID, "error", delErr) + err = multierror.Append(err, delErr) + } + return nil, helper.FlattenMultierror(err) + } + + // db TODO(1.10.0): now we need to add the volume to the node fingerprint! + resp := &cstructs.ClientHostVolumeCreateResponse{ HostPath: pluginResp.Path, CapacityBytes: pluginResp.SizeBytes, } - // db TODO(1.10.0): now we need to add it to the node fingerprint! - // db TODO(1.10.0): and save it in client state! - return resp, nil } @@ -89,7 +164,10 @@ func (hvm *HostVolumeManager) Delete(ctx context.Context, resp := &cstructs.ClientHostVolumeDeleteResponse{} - // db TODO(1.10.0): save the client state! + if err := hvm.stateMgr.DeleteDynamicHostVolume(req.ID); err != nil { + hvm.log.Error("failed to delete volume in state", "volume_id", req.ID, "error", err) + return nil, err // bail so a user may retry + } return resp, nil } diff --git a/client/hostvolumemanager/host_volumes_test.go b/client/hostvolumemanager/host_volumes_test.go new file mode 100644 index 00000000000..a94526416d7 --- /dev/null +++ b/client/hostvolumemanager/host_volumes_test.go @@ -0,0 +1,53 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "path/filepath" + "testing" + "time" + + cstate "github.com/hashicorp/nomad/client/state" + cstructs "github.com/hashicorp/nomad/client/structs" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/shoenig/test/must" +) + +// db TODO(1.10.0): improve hostvolumemanager tests. + +func TestNewHostVolumeManager_restoreState(t *testing.T) { + log := testlog.HCLogger(t) + vol := &cstructs.HostVolumeState{ + ID: "test-vol-id", + CreateReq: &cstructs.ClientHostVolumeCreateRequest{ + ID: "test-vol-id", + PluginID: "mkdir", + }, + } + + t.Run("happy", func(t *testing.T) { + // put our volume in state + state := cstate.NewMemDB(log) + must.NoError(t, state.PutDynamicHostVolume(vol)) + + // new volume manager should load it from state and run Create, + // resulting in a volume directory in this mountDir. + mountDir := t.TempDir() + + _, err := NewHostVolumeManager(log, state, time.Second, "/wherever", mountDir) + must.NoError(t, err) + + volPath := filepath.Join(mountDir, vol.ID) + must.DirExists(t, volPath) + }) + + t.Run("get error", func(t *testing.T) { + state := &cstate.ErrDB{} + _, err := NewHostVolumeManager(log, state, time.Second, "/wherever", "/wherever") + // error loading state should break the world + must.ErrorIs(t, err, cstate.ErrDBError) + }) + + // db TODO: test plugin error +} diff --git a/client/state/db_bolt.go b/client/state/db_bolt.go index 2471cda3d14..bef111f6e9a 100644 --- a/client/state/db_bolt.go +++ b/client/state/db_bolt.go @@ -138,6 +138,8 @@ var ( // nodeRegistrationKey is the key at which node registration data is stored. nodeRegistrationKey = []byte("node_registration") + + hostVolBucket = []byte("host_volumes_to_create") ) // taskBucketName returns the bucket name for the given task name. @@ -1048,6 +1050,45 @@ func (s *BoltStateDB) GetNodeRegistration() (*cstructs.NodeRegistration, error) return ®, err } +func (s *BoltStateDB) PutDynamicHostVolume(vol *cstructs.HostVolumeState) error { + return s.db.Update(func(tx *boltdd.Tx) error { + b, err := tx.CreateBucketIfNotExists(hostVolBucket) + if err != nil { + return err + } + return b.Put([]byte(vol.ID), vol) + }) +} + +func (s *BoltStateDB) GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) { + var vols []*cstructs.HostVolumeState + err := s.db.View(func(tx *boltdd.Tx) error { + b := tx.Bucket(hostVolBucket) + if b == nil { + return nil + } + return b.BoltBucket().ForEach(func(k, v []byte) error { + var vol cstructs.HostVolumeState + err := b.Get(k, &vol) + if err != nil { + return err + } + vols = append(vols, &vol) + return nil + }) + }) + if boltdd.IsErrNotFound(err) { + return nil, nil + } + return vols, err +} + +func (s *BoltStateDB) DeleteDynamicHostVolume(id string) error { + return s.db.Update(func(tx *boltdd.Tx) error { + return tx.Bucket(hostVolBucket).Delete([]byte(id)) + }) +} + // init initializes metadata entries in a newly created state database. func (s *BoltStateDB) init() error { return s.db.Update(func(tx *boltdd.Tx) error { diff --git a/client/state/db_error.go b/client/state/db_error.go index 78ef01b7850..6c99defa2ad 100644 --- a/client/state/db_error.go +++ b/client/state/db_error.go @@ -4,6 +4,7 @@ package state import ( + "errors" "fmt" arstate "github.com/hashicorp/nomad/client/allocrunner/state" @@ -16,6 +17,10 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +var _ StateDB = &ErrDB{} + +var ErrDBError = errors.New("Error!") + // ErrDB implements a StateDB that returns errors on restore methods, used for testing type ErrDB struct { // Allocs is a preset slice of allocations used in GetAllAllocations @@ -154,6 +159,16 @@ func (m *ErrDB) GetNodeRegistration() (*cstructs.NodeRegistration, error) { return nil, fmt.Errorf("Error!") } +func (m *ErrDB) PutDynamicHostVolume(_ *cstructs.HostVolumeState) error { + return ErrDBError +} +func (m *ErrDB) GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) { + return nil, ErrDBError +} +func (m *ErrDB) DeleteDynamicHostVolume(_ string) error { + return ErrDBError +} + func (m *ErrDB) Close() error { return fmt.Errorf("Error!") } diff --git a/client/state/db_mem.go b/client/state/db_mem.go index 91e6481b4c9..32abd883e77 100644 --- a/client/state/db_mem.go +++ b/client/state/db_mem.go @@ -60,6 +60,8 @@ type MemDB struct { nodeRegistration *cstructs.NodeRegistration + dynamicHostVolumes map[string]*cstructs.HostVolumeState + logger hclog.Logger mu sync.RWMutex @@ -68,15 +70,16 @@ type MemDB struct { func NewMemDB(logger hclog.Logger) *MemDB { logger = logger.Named("memdb") return &MemDB{ - allocs: make(map[string]*structs.Allocation), - deployStatus: make(map[string]*structs.AllocDeploymentStatus), - networkStatus: make(map[string]*structs.AllocNetworkStatus), - acknowledgedState: make(map[string]*arstate.State), - localTaskState: make(map[string]map[string]*state.LocalState), - taskState: make(map[string]map[string]*structs.TaskState), - checks: make(checks.ClientResults), - identities: make(map[string][]*structs.SignedWorkloadIdentity), - logger: logger, + allocs: make(map[string]*structs.Allocation), + deployStatus: make(map[string]*structs.AllocDeploymentStatus), + networkStatus: make(map[string]*structs.AllocNetworkStatus), + acknowledgedState: make(map[string]*arstate.State), + localTaskState: make(map[string]map[string]*state.LocalState), + taskState: make(map[string]map[string]*structs.TaskState), + checks: make(checks.ClientResults), + identities: make(map[string][]*structs.SignedWorkloadIdentity), + dynamicHostVolumes: make(map[string]*cstructs.HostVolumeState), + logger: logger, } } @@ -354,6 +357,28 @@ func (m *MemDB) GetNodeRegistration() (*cstructs.NodeRegistration, error) { return m.nodeRegistration, nil } +func (m *MemDB) PutDynamicHostVolume(vol *cstructs.HostVolumeState) error { + m.mu.Lock() + defer m.mu.Unlock() + m.dynamicHostVolumes[vol.ID] = vol + return nil +} +func (m *MemDB) GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) { + m.mu.Lock() + defer m.mu.Unlock() + var vols []*cstructs.HostVolumeState + for _, vol := range m.dynamicHostVolumes { + vols = append(vols, vol) + } + return vols, nil +} +func (m *MemDB) DeleteDynamicHostVolume(s string) error { + m.mu.Lock() + defer m.mu.Unlock() + delete(m.dynamicHostVolumes, s) + return nil +} + func (m *MemDB) Close() error { m.mu.Lock() defer m.mu.Unlock() diff --git a/client/state/db_noop.go b/client/state/db_noop.go index 345025a4d52..09488c181a1 100644 --- a/client/state/db_noop.go +++ b/client/state/db_noop.go @@ -14,6 +14,8 @@ import ( "github.com/hashicorp/nomad/nomad/structs" ) +var _ StateDB = &NoopDB{} + // NoopDB implements a StateDB that does not persist any data. type NoopDB struct{} @@ -145,6 +147,16 @@ func (n NoopDB) GetNodeRegistration() (*cstructs.NodeRegistration, error) { return nil, nil } +func (n NoopDB) PutDynamicHostVolume(_ *cstructs.HostVolumeState) error { + return nil +} +func (n NoopDB) GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) { + return nil, nil +} +func (n NoopDB) DeleteDynamicHostVolume(_ string) error { + return nil +} + func (n NoopDB) Close() error { return nil } diff --git a/client/state/db_test.go b/client/state/db_test.go index d13431a6207..3a03cf3a2cc 100644 --- a/client/state/db_test.go +++ b/client/state/db_test.go @@ -15,6 +15,7 @@ import ( dmstate "github.com/hashicorp/nomad/client/devicemanager/state" "github.com/hashicorp/nomad/client/dynamicplugins" driverstate "github.com/hashicorp/nomad/client/pluginmanager/drivermanager/state" + cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" @@ -384,6 +385,41 @@ func TestStateDB_DynamicRegistry(t *testing.T) { }) } +// TestStateDB_HostVolumes asserts the behavior of dynamic host volume state. +func TestStateDB_HostVolumes(t *testing.T) { + ci.Parallel(t) + + testDB(t, func(t *testing.T, db StateDB) { + vols, err := db.GetDynamicHostVolumes() + must.NoError(t, err) + must.Len(t, 0, vols) + + vol := &cstructs.HostVolumeState{ + ID: "test-vol-id", + CreateReq: &cstructs.ClientHostVolumeCreateRequest{ + ID: "test-vol-id", + Name: "test-vol-name", + PluginID: "test-plugin-id", + NodeID: "test-node-id", + RequestedCapacityMinBytes: 5, + RequestedCapacityMaxBytes: 10, + Parameters: map[string]string{"test": "ing"}, + }, + } + + must.NoError(t, db.PutDynamicHostVolume(vol)) + vols, err = db.GetDynamicHostVolumes() + must.NoError(t, err) + must.Len(t, 1, vols) + must.Eq(t, vol, vols[0]) + + must.NoError(t, db.DeleteDynamicHostVolume(vol.ID)) + vols, err = db.GetDynamicHostVolumes() + must.NoError(t, err) + must.Len(t, 0, vols) + }) +} + func TestStateDB_CheckResult_keyForCheck(t *testing.T) { ci.Parallel(t) diff --git a/client/state/interface.go b/client/state/interface.go index a9cd4845038..0460a75e20f 100644 --- a/client/state/interface.go +++ b/client/state/interface.go @@ -137,6 +137,10 @@ type StateDB interface { PutNodeRegistration(*cstructs.NodeRegistration) error GetNodeRegistration() (*cstructs.NodeRegistration, error) + PutDynamicHostVolume(*cstructs.HostVolumeState) error + GetDynamicHostVolumes() ([]*cstructs.HostVolumeState, error) + DeleteDynamicHostVolume(string) error + // Close the database. Unsafe for further use after calling regardless // of return value. Close() error diff --git a/client/structs/host_volumes.go b/client/structs/host_volumes.go index 38d3cb2d770..3188e45dc0a 100644 --- a/client/structs/host_volumes.go +++ b/client/structs/host_volumes.go @@ -3,6 +3,11 @@ package structs +type HostVolumeState struct { + ID string + CreateReq *ClientHostVolumeCreateRequest +} + type ClientHostVolumeCreateRequest struct { // ID is a UUID-like string generated by the server. ID string From 67ed31db4101fac62312f3efef89d81665f4ce4b Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 4 Dec 2024 11:11:03 -0500 Subject: [PATCH 23/35] dynamic host volumes: add implicit constraints on plugin fingerprint (#24605) Node fingerprints include attributes for the host volume plugins, including the built-in plugins. Add an implicit constraint on this fingerprint during volume placement to ensure we only place volumes on hosts with the right plugins. Ref: https://github.com/hashicorp/nomad/pull/24479 --- nomad/host_volume_endpoint.go | 20 ++++++----- nomad/host_volume_endpoint_test.go | 55 +++++++++++++++++++----------- 2 files changed, 47 insertions(+), 28 deletions(-) diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index cd7b629890f..c6097c2a3fd 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -480,15 +480,17 @@ func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.Hos } var checker *scheduler.ConstraintChecker - - if len(vol.Constraints) > 0 { - ctx := &placementContext{ - regexpCache: make(map[string]*regexp.Regexp), - versionCache: make(map[string]scheduler.VerConstraints), - semverCache: make(map[string]scheduler.VerConstraints), - } - checker = scheduler.NewConstraintChecker(ctx, vol.Constraints) - } + ctx := &placementContext{ + regexpCache: make(map[string]*regexp.Regexp), + versionCache: make(map[string]scheduler.VerConstraints), + semverCache: make(map[string]scheduler.VerConstraints), + } + constraints := []*structs.Constraint{{ + LTarget: fmt.Sprintf("${attr.plugins.host_volume.version.%s}", vol.PluginID), + Operand: "is_set", + }} + constraints = append(constraints, vol.Constraints...) + checker = scheduler.NewConstraintChecker(ctx, constraints) for { raw := iter.Next() diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go index 81cd7c55479..88a9cd21dfb 100644 --- a/nomad/host_volume_endpoint_test.go +++ b/nomad/host_volume_endpoint_test.go @@ -649,15 +649,22 @@ func TestHostVolumeEndpoint_placeVolume(t *testing.T) { node0, node1, node2, node3 := mock.Node(), mock.Node(), mock.Node(), mock.Node() node0.NodePool = structs.NodePoolDefault + node0.Attributes["plugins.host_volume.version.mkdir"] = "0.0.1" + node1.NodePool = "dev" node1.Meta["rack"] = "r2" + node1.Attributes["plugins.host_volume.version.mkdir"] = "0.0.1" + node2.NodePool = "prod" + node2.Attributes["plugins.host_volume.version.mkdir"] = "0.0.1" + node3.NodePool = "prod" node3.Meta["rack"] = "r3" node3.HostVolumes = map[string]*structs.ClientHostVolumeConfig{"example": { Name: "example", Path: "/srv", }} + node3.Attributes["plugins.host_volume.version.mkdir"] = "0.0.1" must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node0)) must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node1)) @@ -672,40 +679,50 @@ func TestHostVolumeEndpoint_placeVolume(t *testing.T) { }{ { name: "only one in node pool", - vol: &structs.HostVolume{NodePool: "default"}, + vol: &structs.HostVolume{NodePool: "default", PluginID: "mkdir"}, expect: node0, }, { name: "only one that matches constraints", - vol: &structs.HostVolume{Constraints: []*structs.Constraint{ - { - LTarget: "${meta.rack}", - RTarget: "r2", - Operand: "=", - }, - }}, + vol: &structs.HostVolume{ + PluginID: "mkdir", + Constraints: []*structs.Constraint{ + { + LTarget: "${meta.rack}", + RTarget: "r2", + Operand: "=", + }, + }}, expect: node1, }, { name: "only one available in pool", - vol: &structs.HostVolume{NodePool: "prod", Name: "example"}, + vol: &structs.HostVolume{NodePool: "prod", Name: "example", PluginID: "mkdir"}, expect: node2, }, { - name: "no match", - vol: &structs.HostVolume{Constraints: []*structs.Constraint{ - { - LTarget: "${meta.rack}", - RTarget: "r6", - Operand: "=", - }, - }}, + name: "no matching constraint", + vol: &structs.HostVolume{ + PluginID: "mkdir", + Constraints: []*structs.Constraint{ + { + LTarget: "${meta.rack}", + RTarget: "r6", + Operand: "=", + }, + }}, + expectErr: "no node meets constraints", + }, + { + name: "no matching plugin", + vol: &structs.HostVolume{PluginID: "not-mkdir"}, expectErr: "no node meets constraints", }, { name: "match already has a volume with the same name", vol: &structs.HostVolume{ - Name: "example", + Name: "example", + PluginID: "mkdir", Constraints: []*structs.Constraint{ { LTarget: "${meta.rack}", @@ -750,8 +767,8 @@ func newMockHostVolumeClient(t *testing.T, srv *Server, pool string) (*mockHostV c1, cleanup := client.TestRPCOnlyClient(t, func(c *config.Config) { c.Node.NodePool = pool - // TODO(1.10.0): we'll want to have a version gate for this feature c.Node.Attributes["nomad.version"] = version.Version + c.Node.Attributes["plugins.host_volume.version.mkdir"] = "0.0.1" c.Node.Meta["rack"] = "r1" }, srv.config.RPCAddr, map[string]any{"HostVolume": mockClientEndpoint}) t.Cleanup(cleanup) From 5826e92671c2f7b44e820edc12d98a8d57ebf373 Mon Sep 17 00:00:00 2001 From: Daniel Bennett Date: Wed, 4 Dec 2024 13:32:07 -0500 Subject: [PATCH 24/35] dynamic host volumes: delete by single volume ID (#24606) string instead of []string --- api/host_volumes.go | 6 +- command/agent/host_volume_endpoint.go | 2 +- command/volume_delete.go | 2 +- nomad/fsm.go | 2 +- nomad/host_volume_endpoint.go | 58 ++++++++------------ nomad/host_volume_endpoint_test.go | 51 +++++++++++------ nomad/state/state_store_host_volumes.go | 41 +++++++------- nomad/state/state_store_host_volumes_test.go | 20 +++++-- nomad/structs/host_volumes.go | 3 +- 9 files changed, 98 insertions(+), 87 deletions(-) diff --git a/api/host_volumes.go b/api/host_volumes.go index e417177d29c..661ac77c183 100644 --- a/api/host_volumes.go +++ b/api/host_volumes.go @@ -176,7 +176,7 @@ type HostVolumeListRequest struct { } type HostVolumeDeleteRequest struct { - VolumeIDs []string + ID string } // Create forwards to client agents so a host volume can be created on those @@ -238,8 +238,8 @@ func (hv *HostVolumes) List(req *HostVolumeListRequest, opts *QueryOptions) ([]* } // Delete deletes a host volume -func (hv *HostVolumes) Delete(id string, opts *WriteOptions) (*WriteMeta, error) { - path, err := url.JoinPath("/v1/volume/host/", url.PathEscape(id)) +func (hv *HostVolumes) Delete(req *HostVolumeDeleteRequest, opts *WriteOptions) (*WriteMeta, error) { + path, err := url.JoinPath("/v1/volume/host/", url.PathEscape(req.ID)) if err != nil { return nil, err } diff --git a/command/agent/host_volume_endpoint.go b/command/agent/host_volume_endpoint.go index 288d44bfc0f..db12cca929f 100644 --- a/command/agent/host_volume_endpoint.go +++ b/command/agent/host_volume_endpoint.go @@ -129,7 +129,7 @@ func (s *HTTPServer) hostVolumeCreate(resp http.ResponseWriter, req *http.Reques func (s *HTTPServer) hostVolumeDelete(id string, resp http.ResponseWriter, req *http.Request) (any, error) { // HTTP API only supports deleting a single ID because of compatibility with // the existing HTTP routes for CSI - args := structs.HostVolumeDeleteRequest{VolumeIDs: []string{id}} + args := structs.HostVolumeDeleteRequest{VolumeID: id} s.parseWriteRequest(req, &args.WriteRequest) var out structs.HostVolumeDeleteResponse diff --git a/command/volume_delete.go b/command/volume_delete.go index 678dd181c95..23a82dbe01a 100644 --- a/command/volume_delete.go +++ b/command/volume_delete.go @@ -150,7 +150,7 @@ func (c *VolumeDeleteCommand) deleteCSIVolume(client *api.Client, volID string, } func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string) int { - _, err := client.HostVolumes().Delete(volID, nil) + _, err := client.HostVolumes().Delete(&api.HostVolumeDeleteRequest{ID: volID}, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error deleting volume: %s", err)) return 1 diff --git a/nomad/fsm.go b/nomad/fsm.go index 9ea3267457f..3cc4bf101b4 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -2443,7 +2443,7 @@ func (n *nomadFSM) applyHostVolumeDelete(msgType structs.MessageType, buf []byte panic(fmt.Errorf("failed to decode request: %v", err)) } - if err := n.state.DeleteHostVolumes(index, req.RequestNamespace(), req.VolumeIDs); err != nil { + if err := n.state.DeleteHostVolume(index, req.RequestNamespace(), req.VolumeID); err != nil { n.logger.Error("DeleteHostVolumes failed", "error", err) return err } diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index c6097c2a3fd..e038e631a29 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -13,7 +13,6 @@ import ( "github.com/armon/go-metrics" "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-memdb" - multierror "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/acl" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper" @@ -564,11 +563,10 @@ func (v *HostVolume) Delete(args *structs.HostVolumeDeleteRequest, reply *struct return structs.ErrPermissionDenied } - if len(args.VolumeIDs) == 0 { - return fmt.Errorf("missing volumes to delete") + if args.VolumeID == "" { + return fmt.Errorf("missing volume ID to delete") } - var deletedVols []string var index uint64 snap, err := v.srv.State().Snapshot() @@ -576,45 +574,35 @@ func (v *HostVolume) Delete(args *structs.HostVolumeDeleteRequest, reply *struct return err } - var mErr *multierror.Error ns := args.RequestNamespace() + id := args.VolumeID - for _, id := range args.VolumeIDs { - vol, err := snap.HostVolumeByID(nil, ns, id, true) - if err != nil { - return fmt.Errorf("could not query host volume: %w", err) - } - if vol == nil { - return fmt.Errorf("no such volume: %s", id) - } - if len(vol.Allocations) > 0 { - allocIDs := helper.ConvertSlice(vol.Allocations, - func(a *structs.AllocListStub) string { return a.ID }) - mErr = multierror.Append(mErr, - fmt.Errorf("volume %s in use by allocations: %v", id, allocIDs)) - continue - } + vol, err := snap.HostVolumeByID(nil, ns, id, true) + if err != nil { + return fmt.Errorf("could not query host volume: %w", err) + } + if vol == nil { + return fmt.Errorf("no such volume: %s", id) + } + if len(vol.Allocations) > 0 { + allocIDs := helper.ConvertSlice(vol.Allocations, + func(a *structs.AllocListStub) string { return a.ID }) + return fmt.Errorf("volume %s in use by allocations: %v", id, allocIDs) + } - err = v.deleteVolume(vol) - if err != nil { - mErr = multierror.Append(mErr, err) - } else { - deletedVols = append(deletedVols, id) - } + err = v.deleteVolume(vol) + if err != nil { + return err } - if len(deletedVols) > 0 { - args.VolumeIDs = deletedVols - _, index, err = v.srv.raftApply(structs.HostVolumeDeleteRequestType, args) - if err != nil { - v.logger.Error("raft apply failed", "error", err, "method", "delete") - mErr = multierror.Append(mErr, err) - } + _, index, err = v.srv.raftApply(structs.HostVolumeDeleteRequestType, args) + if err != nil { + v.logger.Error("raft apply failed", "error", err, "method", "delete") + return err } - reply.VolumeIDs = deletedVols reply.Index = index - return helper.FlattenMultierror(mErr) + return nil } func (v *HostVolume) deleteVolume(vol *structs.HostVolume) error { diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go index 88a9cd21dfb..9ca0f94855b 100644 --- a/nomad/host_volume_endpoint_test.go +++ b/nomad/host_volume_endpoint_test.go @@ -321,7 +321,7 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { index, []*structs.Allocation{alloc})) delReq := &structs.HostVolumeDeleteRequest{ - VolumeIDs: []string{vol1.ID, vol2.ID}, + VolumeID: vol2.ID, WriteRequest: structs.WriteRequest{ Region: srv.Region(), Namespace: ns, @@ -336,20 +336,6 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) must.EqError(t, err, fmt.Sprintf("volume %s in use by allocations: [%s]", vol2.ID, alloc.ID)) - // volume not in use will be deleted even if we got an error - getReq := &structs.HostVolumeGetRequest{ - ID: vol1.ID, - QueryOptions: structs.QueryOptions{ - Region: srv.Region(), - Namespace: ns, - AuthToken: token, - }, - } - var getResp structs.HostVolumeGetResponse - err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) - must.NoError(t, err) - must.Nil(t, getResp.Volume) - // update the allocations terminal so the delete works alloc = alloc.Copy() alloc.ClientStatus = structs.AllocClientStatusFailed @@ -361,15 +347,46 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { } err = msgpackrpc.CallWithCodec(codec, "Node.UpdateAlloc", nArgs, &structs.GenericResponse{}) - delReq.VolumeIDs = []string{vol2.ID} err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) must.NoError(t, err) - getReq.ID = vol2.ID + getReq := &structs.HostVolumeGetRequest{ + ID: vol2.ID, + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: ns, + AuthToken: token, + }, + } + var getResp structs.HostVolumeGetResponse err = msgpackrpc.CallWithCodec(codec, "HostVolume.Get", getReq, &getResp) must.NoError(t, err) must.Nil(t, getResp.Volume) }) + + // delete vol1 to finish cleaning up + var delResp structs.HostVolumeDeleteResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", &structs.HostVolumeDeleteRequest{ + VolumeID: vol1.ID, + WriteRequest: structs.WriteRequest{ + Region: srv.Region(), + Namespace: vol1.Namespace, + AuthToken: powerToken, + }, + }, &delResp) + must.NoError(t, err) + + // should be no volumes left + var listResp structs.HostVolumeListResponse + err = msgpackrpc.CallWithCodec(codec, "HostVolume.List", &structs.HostVolumeListRequest{ + QueryOptions: structs.QueryOptions{ + Region: srv.Region(), + Namespace: "*", + AuthToken: token, + }, + }, &listResp) + must.NoError(t, err) + must.Len(t, 0, listResp.Volumes, must.Sprintf("expect no volumes to remain, got: %+v", listResp)) } func TestHostVolumeEndpoint_List(t *testing.T) { diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go index 37d1cccd1a7..bd01129f314 100644 --- a/nomad/state/state_store_host_volumes.go +++ b/nomad/state/state_store_host_volumes.go @@ -107,37 +107,34 @@ func (s *StateStore) UpsertHostVolume(index uint64, vol *structs.HostVolume) err return txn.Commit() } -// DeleteHostVolumes deletes a set of host volumes in the same namespace -func (s *StateStore) DeleteHostVolumes(index uint64, ns string, ids []string) error { +// DeleteHostVolume deletes a host volume +func (s *StateStore) DeleteHostVolume(index uint64, ns string, id string) error { txn := s.db.WriteTxn(index) defer txn.Abort() - for _, id := range ids { + obj, err := txn.First(TableHostVolumes, indexID, ns, id) + if err != nil { + return err + } + if obj != nil { + vol := obj.(*structs.HostVolume) - obj, err := txn.First(TableHostVolumes, indexID, ns, id) + allocs, err := s.AllocsByNodeTerminal(nil, vol.NodeID, false) if err != nil { - return err + return fmt.Errorf("could not query allocs to check for host volume claims: %w", err) } - if obj != nil { - vol := obj.(*structs.HostVolume) - - allocs, err := s.AllocsByNodeTerminal(nil, vol.NodeID, false) - if err != nil { - return fmt.Errorf("could not query allocs to check for host volume claims: %w", err) - } - for _, alloc := range allocs { - for _, volClaim := range alloc.Job.LookupTaskGroup(alloc.TaskGroup).Volumes { - if volClaim.Type == structs.VolumeTypeHost && volClaim.Name == vol.Name { - return fmt.Errorf("could not delete volume %s in use by alloc %s", - vol.ID, alloc.ID) - } + for _, alloc := range allocs { + for _, volClaim := range alloc.Job.LookupTaskGroup(alloc.TaskGroup).Volumes { + if volClaim.Type == structs.VolumeTypeHost && volClaim.Name == vol.Name { + return fmt.Errorf("could not delete volume %s in use by alloc %s", + vol.ID, alloc.ID) } } + } - err = txn.Delete(TableHostVolumes, vol) - if err != nil { - return fmt.Errorf("host volume delete: %w", err) - } + err = txn.Delete(TableHostVolumes, vol) + if err != nil { + return fmt.Errorf("host volume delete: %w", err) } } diff --git a/nomad/state/state_store_host_volumes_test.go b/nomad/state/state_store_host_volumes_test.go index 87269597844..04e18e4016e 100644 --- a/nomad/state/state_store_host_volumes_test.go +++ b/nomad/state/state_store_host_volumes_test.go @@ -144,14 +144,11 @@ func TestStateStore_HostVolumes_CRUD(t *testing.T) { index, []*structs.Allocation{alloc})) index++ - err = store.DeleteHostVolumes(index, vol2.Namespace, []string{vols[1].ID, vols[2].ID}) + err = store.DeleteHostVolume(index, vol2.Namespace, vols[2].ID) must.EqError(t, err, fmt.Sprintf( "could not delete volume %s in use by alloc %s", vols[2].ID, alloc.ID)) - vol, err = store.HostVolumeByID(nil, vols[1].Namespace, vols[1].ID, true) - must.NoError(t, err) - must.NotNil(t, vol, must.Sprint("volume that didn't error should not be deleted")) - err = store.DeleteHostVolumes(index, vol2.Namespace, []string{vols[1].ID}) + err = store.DeleteHostVolume(index, vol2.Namespace, vols[1].ID) must.NoError(t, err) vol, err = store.HostVolumeByID(nil, vols[1].Namespace, vols[1].ID, true) must.NoError(t, err) @@ -177,6 +174,19 @@ func TestStateStore_HostVolumes_CRUD(t *testing.T) { must.NoError(t, err) got = consumeIter(iter) must.MapLen(t, 1, got, must.Sprint(`expected only one volume to match prefix`)) + + alloc = alloc.Copy() + alloc.ClientStatus = structs.AllocClientStatusComplete + index++ + must.NoError(t, store.UpdateAllocsFromClient(structs.MsgTypeTestSetup, + index, []*structs.Allocation{alloc})) + for _, v := range vols { + index++ + must.NoError(t, store.DeleteHostVolume(index, v.Namespace, v.ID)) + } + iter, err = store.HostVolumes(nil, SortDefault) + got = consumeIter(iter) + must.MapLen(t, 0, got, must.Sprint(`expected no volumes to remain`)) } func TestStateStore_UpdateHostVolumesFromFingerprint(t *testing.T) { diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go index 2c8e6cf2372..c254bf72902 100644 --- a/nomad/structs/host_volumes.go +++ b/nomad/structs/host_volumes.go @@ -365,12 +365,11 @@ type HostVolumeRegisterResponse struct { } type HostVolumeDeleteRequest struct { - VolumeIDs []string + VolumeID string WriteRequest } type HostVolumeDeleteResponse struct { - VolumeIDs []string // volumes actually deleted WriteMeta } From 76641c80814ed829b9f250533d27a16333abd8af Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Fri, 6 Dec 2024 08:43:55 -0500 Subject: [PATCH 25/35] dynamic host volumes: refactor HTTP routes for volumes list dispatch (#24612) The List Volumes API was originally written for CSI but assumed we'd have future volume types, dispatched on a query parameter. Dynamic host volumes uses this, but the resulting code has host volumes concerns comingled in the CSI volumes endpoint. Refactor this so that we have a top-level `GET /v1/volumes` route that's shared between CSI and DHV, and have it dispatch to the appropriate handler in the type-specific endpoints. Ref: https://github.com/hashicorp/nomad/pull/24479 --- command/agent/csi_endpoint.go | 18 +----------------- command/agent/http.go | 1 + command/agent/volumes_endpoint.go | 27 +++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 17 deletions(-) create mode 100644 command/agent/volumes_endpoint.go diff --git a/command/agent/csi_endpoint.go b/command/agent/csi_endpoint.go index 9f4b87aa0fc..325ce5bb6d0 100644 --- a/command/agent/csi_endpoint.go +++ b/command/agent/csi_endpoint.go @@ -20,28 +20,12 @@ func (s *HTTPServer) CSIVolumesRequest(resp http.ResponseWriter, req *http.Reque return nil, CodedError(405, ErrInvalidMethod) } - // Type filters volume lists to a specific type. When support for non-CSI volumes is - // introduced, we'll need to dispatch here - query := req.URL.Query() - qtype, ok := query["type"] - if !ok { - return []*structs.CSIVolListStub{}, nil - } - // TODO(1.10.0): move handling of GET /v1/volumes/ out so that we're not - // co-mingling the call for listing host volume here - switch qtype[0] { - case "host": - return s.HostVolumesListRequest(resp, req) - case "csi": - default: - return nil, nil - } - args := structs.CSIVolumeListRequest{} if s.parse(resp, req, &args.Region, &args.QueryOptions) { return nil, nil } + query := req.URL.Query() args.Prefix = query.Get("prefix") args.PluginID = query.Get("plugin_id") args.NodeID = query.Get("node_id") diff --git a/command/agent/http.go b/command/agent/http.go index cb1b9359a5e..6d47e4e78dd 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -404,6 +404,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) { s.mux.HandleFunc("/v1/deployments", s.wrap(s.DeploymentsRequest)) s.mux.HandleFunc("/v1/deployment/", s.wrap(s.DeploymentSpecificRequest)) + s.mux.HandleFunc("GET /v1/volumes", s.wrap(s.ListVolumesRequest)) s.mux.HandleFunc("/v1/volumes", s.wrap(s.CSIVolumesRequest)) s.mux.HandleFunc("/v1/volumes/external", s.wrap(s.CSIExternalVolumesRequest)) s.mux.HandleFunc("/v1/volumes/snapshot", s.wrap(s.CSISnapshotsRequest)) diff --git a/command/agent/volumes_endpoint.go b/command/agent/volumes_endpoint.go new file mode 100644 index 00000000000..3ee84eceb7f --- /dev/null +++ b/command/agent/volumes_endpoint.go @@ -0,0 +1,27 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package agent + +import ( + "net/http" + + "github.com/hashicorp/nomad/nomad/structs" +) + +// ListVolumesRequest dispatches requests for listing volumes to a specific type. +func (s *HTTPServer) ListVolumesRequest(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + query := req.URL.Query() + qtype, ok := query["type"] + if !ok { + return []*structs.CSIVolListStub{}, nil + } + switch qtype[0] { + case "host": + return s.HostVolumesListRequest(resp, req) + case "csi": + return s.CSIVolumesRequest(resp, req) + default: + return nil, CodedError(404, resourceNotFoundErr) + } +} From e76f5e0b4c7fc5377e81aa379a2893989f0a0a73 Mon Sep 17 00:00:00 2001 From: Daniel Bennett Date: Mon, 9 Dec 2024 15:26:25 -0500 Subject: [PATCH 26/35] dynamic host volumes: volume fingerprinting (#24613) and expand the demo a bit --- api/nodes.go | 2 + client/client.go | 19 ++-- client/host_volume_endpoint.go | 2 +- client/host_volume_endpoint_test.go | 40 ++++++-- .../hostvolumemanager/host_volume_plugin.go | 19 +++- client/hostvolumemanager/host_volumes.go | 97 +++++++++++++------ client/hostvolumemanager/host_volumes_test.go | 52 +++++++++- .../hostvolumemanager/volume_fingerprint.go | 65 +++++++++++++ .../volume_fingerprint_test.go | 81 ++++++++++++++++ client/node_updater.go | 63 +++++++++++- client/structs/host_volumes.go | 10 +- demo/hostvolume/check.sh | 14 +++ demo/hostvolume/e2e.sh | 9 ++ ...xample-host-volume => example-plugin-mkfs} | 0 demo/hostvolume/external-plugin.volume.hcl | 22 +++++ demo/hostvolume/host.volume.hcl | 19 ---- demo/hostvolume/internal-plugin.volume.hcl | 14 +++ demo/hostvolume/job.nomad.hcl | 48 +++++++++ demo/hostvolume/setup.sh | 14 +++ demo/hostvolume/teardown.sh | 17 ++++ nomad/host_volume_endpoint.go | 1 + nomad/structs/volumes.go | 12 +++ 22 files changed, 540 insertions(+), 80 deletions(-) create mode 100644 client/hostvolumemanager/volume_fingerprint.go create mode 100644 client/hostvolumemanager/volume_fingerprint_test.go create mode 100755 demo/hostvolume/check.sh create mode 100755 demo/hostvolume/e2e.sh rename demo/hostvolume/{example-host-volume => example-plugin-mkfs} (100%) create mode 100644 demo/hostvolume/external-plugin.volume.hcl delete mode 100644 demo/hostvolume/host.volume.hcl create mode 100644 demo/hostvolume/internal-plugin.volume.hcl create mode 100644 demo/hostvolume/job.nomad.hcl create mode 100755 demo/hostvolume/setup.sh create mode 100755 demo/hostvolume/teardown.sh diff --git a/api/nodes.go b/api/nodes.go index 809382bf79b..1d4cf4e65d9 100644 --- a/api/nodes.go +++ b/api/nodes.go @@ -517,6 +517,8 @@ type DriverInfo struct { type HostVolumeInfo struct { Path string ReadOnly bool + // ID is set for dynamic host volumes only. + ID string } // HostNetworkInfo is used to return metadata about a given HostNetwork diff --git a/client/client.go b/client/client.go index ac4a7dad700..40453f1ab77 100644 --- a/client/client.go +++ b/client/client.go @@ -411,6 +411,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie c.updateNodeFromDriver, c.updateNodeFromDevices, c.updateNodeFromCSI, + c.updateNodeFromHostVol, ) // Initialize the server manager @@ -535,16 +536,14 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie c.devicemanager = devManager c.pluginManagers.RegisterAndRun(devManager) - c.hostVolumeManager, err = hvm.NewHostVolumeManager(logger, - c.stateDB, hostVolumeRequestTimeout, - cfg.HostVolumePluginDir, - cfg.AllocMountsDir) - if err != nil { - // NewHostVolumeManager will only err if it fails to read state store, - // or if one or more required plugins do not exist, so halt the client - // because something needs to be fixed by a cluster admin. - return nil, err - } + // set up dynamic host volume manager + c.hostVolumeManager = hvm.NewHostVolumeManager(logger, hvm.Config{ + PluginDir: cfg.HostVolumePluginDir, + SharedMountDir: cfg.AllocMountsDir, + StateMgr: c.stateDB, + UpdateNodeVols: c.batchNodeUpdates.updateNodeFromHostVolume, + }) + c.pluginManagers.RegisterAndRun(c.hostVolumeManager) // Set up the service registration wrapper using the Consul and Nomad // implementations. The Nomad implementation is only ever used on the diff --git a/client/host_volume_endpoint.go b/client/host_volume_endpoint.go index 622fff3c492..5c73ee4c394 100644 --- a/client/host_volume_endpoint.go +++ b/client/host_volume_endpoint.go @@ -50,7 +50,7 @@ func (v *HostVolume) Delete( ctx, cancelFn := v.requestContext() defer cancelFn() - _, err := v.c.hostVolumeManager.Delete(ctx, req) // db TODO(1.10.0): cresp is empty... why return it? + _, err := v.c.hostVolumeManager.Delete(ctx, req) if err != nil { v.c.logger.Error("failed to delete host volume", "ID", req.ID, "error", err) return err diff --git a/client/host_volume_endpoint_test.go b/client/host_volume_endpoint_test.go index 7fbc042d8ea..a85eae0b1f9 100644 --- a/client/host_volume_endpoint_test.go +++ b/client/host_volume_endpoint_test.go @@ -6,13 +6,13 @@ package client import ( "path/filepath" "testing" - "time" "github.com/hashicorp/nomad/ci" hvm "github.com/hashicorp/nomad/client/hostvolumemanager" "github.com/hashicorp/nomad/client/state" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/structs" "github.com/shoenig/test/must" ) @@ -26,16 +26,22 @@ func TestHostVolume(t *testing.T) { client.stateDB = memdb tmp := t.TempDir() - var err error + manager := hvm.NewHostVolumeManager(testlog.HCLogger(t), hvm.Config{ + StateMgr: client.stateDB, + UpdateNodeVols: client.updateNodeFromHostVol, + PluginDir: "/no/ext/plugins", + SharedMountDir: tmp, + }) + client.hostVolumeManager = manager expectDir := filepath.Join(tmp, "test-vol-id") - client.hostVolumeManager, err = hvm.NewHostVolumeManager(testlog.HCLogger(t), - client.stateDB, time.Second, "/no/ext/plugins", tmp) - must.NoError(t, err) t.Run("happy", func(t *testing.T) { + + /* create */ + req := &cstructs.ClientHostVolumeCreateRequest{ - ID: "test-vol-id", Name: "test-vol-name", + ID: "test-vol-id", PluginID: "mkdir", // real plugin really makes a dir } var resp cstructs.ClientHostVolumeCreateResponse @@ -56,8 +62,19 @@ func TestHostVolume(t *testing.T) { CreateReq: req, } must.Eq(t, expectState, vols[0]) + // and should be fingerprinted + must.Eq(t, hvm.VolumeMap{ + req.Name: { + ID: req.ID, + Name: req.Name, + Path: expectDir, + }, + }, client.Node().HostVolumes) + + /* delete */ delReq := &cstructs.ClientHostVolumeDeleteRequest{ + Name: "test-vol-name", ID: "test-vol-id", PluginID: "mkdir", HostPath: expectDir, @@ -72,6 +89,8 @@ func TestHostVolume(t *testing.T) { vols, err = memdb.GetDynamicHostVolumes() must.NoError(t, err) must.Len(t, 0, vols) + // and the fingerprint, too + must.Eq(t, map[string]*structs.ClientHostVolumeConfig{}, client.Node().HostVolumes) }) t.Run("missing plugin", func(t *testing.T) { @@ -92,9 +111,12 @@ func TestHostVolume(t *testing.T) { t.Run("error from plugin", func(t *testing.T) { // "mkdir" plugin can't create a directory within a file - client.hostVolumeManager, err = hvm.NewHostVolumeManager(testlog.HCLogger(t), - client.stateDB, time.Second, "/no/ext/plugins", "host_volume_endpoint_test.go") - must.NoError(t, err) + client.hostVolumeManager = hvm.NewHostVolumeManager(testlog.HCLogger(t), hvm.Config{ + StateMgr: client.stateDB, + UpdateNodeVols: client.updateNodeFromHostVol, + PluginDir: "/no/ext/plugins", + SharedMountDir: "host_volume_endpoint_test.go", + }) req := &cstructs.ClientHostVolumeCreateRequest{ ID: "test-vol-id", diff --git a/client/hostvolumemanager/host_volume_plugin.go b/client/hostvolumemanager/host_volume_plugin.go index 17cdf02790c..7da5baf66d0 100644 --- a/client/hostvolumemanager/host_volume_plugin.go +++ b/client/hostvolumemanager/host_volume_plugin.go @@ -59,6 +59,20 @@ func (p *HostVolumePluginMkdir) Create(_ context.Context, "path", path) log.Debug("running plugin") + resp := &HostVolumePluginCreateResponse{ + Path: path, + SizeBytes: 0, + } + + if _, err := os.Stat(path); err == nil { + // already exists + return resp, nil + } else if !os.IsNotExist(err) { + // doesn't exist, but some other path error + log.Debug("error with plugin", "error", err) + return nil, err + } + err := os.Mkdir(path, 0o700) if err != nil { log.Debug("error with plugin", "error", err) @@ -66,10 +80,7 @@ func (p *HostVolumePluginMkdir) Create(_ context.Context, } log.Debug("plugin ran successfully") - return &HostVolumePluginCreateResponse{ - Path: path, - SizeBytes: 0, - }, nil + return resp, nil } func (p *HostVolumePluginMkdir) Delete(_ context.Context, req *cstructs.ClientHostVolumeDeleteRequest) error { diff --git a/client/hostvolumemanager/host_volumes.go b/client/hostvolumemanager/host_volumes.go index 9827084a74b..82ccb8f47a4 100644 --- a/client/hostvolumemanager/host_volumes.go +++ b/client/hostvolumemanager/host_volumes.go @@ -7,12 +7,13 @@ import ( "context" "errors" "path/filepath" - "time" + "sync" "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-multierror" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/nomad/structs" ) var ( @@ -26,42 +27,68 @@ type HostVolumeStateManager interface { DeleteDynamicHostVolume(string) error } +type Config struct { + // PluginDir is where external plugins may be found. + PluginDir string + + // SharedMountDir is where plugins should place the directory + // that will later become a volume HostPath + SharedMountDir string + + // StateMgr manages client state to restore on agent restarts. + StateMgr HostVolumeStateManager + + // UpdateNodeVols is run to update the node when a volume is created + // or deleted. + UpdateNodeVols HostVolumeNodeUpdater +} + type HostVolumeManager struct { pluginDir string sharedMountDir string stateMgr HostVolumeStateManager - - log hclog.Logger + updateNodeVols HostVolumeNodeUpdater + log hclog.Logger } -func NewHostVolumeManager(logger hclog.Logger, - state HostVolumeStateManager, restoreTimeout time.Duration, - pluginDir, sharedMountDir string) (*HostVolumeManager, error) { - - log := logger.Named("host_volume_mgr") - - // db TODO(1.10.0): how do we define the external mounter plugins? plugin configs? - hvm := &HostVolumeManager{ - pluginDir: pluginDir, - sharedMountDir: sharedMountDir, - stateMgr: state, - log: log, +func NewHostVolumeManager(logger hclog.Logger, config Config) *HostVolumeManager { + // db TODO(1.10.0): document plugin config options + return &HostVolumeManager{ + pluginDir: config.PluginDir, + sharedMountDir: config.SharedMountDir, + stateMgr: config.StateMgr, + updateNodeVols: config.UpdateNodeVols, + log: logger.Named("host_volume_manager"), } +} - if err := hvm.restoreState(state, restoreTimeout); err != nil { - return nil, err +func genVolConfig(req *cstructs.ClientHostVolumeCreateRequest, resp *HostVolumePluginCreateResponse) *structs.ClientHostVolumeConfig { + if req == nil || resp == nil { + return nil + } + return &structs.ClientHostVolumeConfig{ + Name: req.Name, + ID: req.ID, + Path: resp.Path, + + // dynamic volumes, like CSI, have more robust `capabilities`, + // so we always set ReadOnly to false, and let the scheduler + // decide when to ignore this and check capabilities instead. + ReadOnly: false, } - - return hvm, nil } -func (hvm *HostVolumeManager) restoreState(state HostVolumeStateManager, timeout time.Duration) error { - vols, err := state.GetDynamicHostVolumes() +func (hvm *HostVolumeManager) restoreFromState(ctx context.Context) (VolumeMap, error) { + vols, err := hvm.stateMgr.GetDynamicHostVolumes() if err != nil { - return err + return nil, err } + + volumes := make(VolumeMap) + var mut sync.Mutex + if len(vols) == 0 { - return nil // nothing to do + return volumes, nil // nothing to do } // re-"create" the volumes - plugins have the best knowledge of their @@ -76,17 +103,20 @@ func (hvm *HostVolumeManager) restoreState(state HostVolumeStateManager, timeout return err } - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - if _, err := plug.Create(ctx, vol.CreateReq); err != nil { + resp, err := plug.Create(ctx, vol.CreateReq) + if err != nil { // plugin execution errors are only logged hvm.log.Error("failed to restore", "plugin_id", vol.CreateReq.PluginID, "volume_id", vol.ID, "error", err) + return nil } + mut.Lock() + volumes[vol.CreateReq.Name] = genVolConfig(vol.CreateReq, resp) + mut.Unlock() return nil }) } mErr := group.Wait() - return helper.FlattenMultierror(mErr.ErrorOrNil()) + return volumes, helper.FlattenMultierror(mErr.ErrorOrNil()) } func (hvm *HostVolumeManager) getPlugin(id string) (HostVolumePlugin, error) { @@ -139,9 +169,11 @@ func (hvm *HostVolumeManager) Create(ctx context.Context, return nil, helper.FlattenMultierror(err) } - // db TODO(1.10.0): now we need to add the volume to the node fingerprint! + hvm.updateNodeVols(req.Name, genVolConfig(req, pluginResp)) resp := &cstructs.ClientHostVolumeCreateResponse{ + VolumeName: req.Name, + VolumeID: req.ID, HostPath: pluginResp.Path, CapacityBytes: pluginResp.SizeBytes, } @@ -162,12 +194,17 @@ func (hvm *HostVolumeManager) Delete(ctx context.Context, return nil, err } - resp := &cstructs.ClientHostVolumeDeleteResponse{} - if err := hvm.stateMgr.DeleteDynamicHostVolume(req.ID); err != nil { hvm.log.Error("failed to delete volume in state", "volume_id", req.ID, "error", err) return nil, err // bail so a user may retry } + hvm.updateNodeVols(req.Name, nil) + + resp := &cstructs.ClientHostVolumeDeleteResponse{ + VolumeName: req.Name, + VolumeID: req.ID, + } + return resp, nil } diff --git a/client/hostvolumemanager/host_volumes_test.go b/client/hostvolumemanager/host_volumes_test.go index a94526416d7..15a3a2fca2e 100644 --- a/client/hostvolumemanager/host_volumes_test.go +++ b/client/hostvolumemanager/host_volumes_test.go @@ -4,6 +4,7 @@ package hostvolumemanager import ( + "context" "path/filepath" "testing" "time" @@ -11,6 +12,7 @@ import ( cstate "github.com/hashicorp/nomad/client/state" cstructs "github.com/hashicorp/nomad/client/structs" "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/structs" "github.com/shoenig/test/must" ) @@ -21,10 +23,12 @@ func TestNewHostVolumeManager_restoreState(t *testing.T) { vol := &cstructs.HostVolumeState{ ID: "test-vol-id", CreateReq: &cstructs.ClientHostVolumeCreateRequest{ + Name: "test-vol-name", ID: "test-vol-id", PluginID: "mkdir", }, } + fNode := newFakeNode() t.Run("happy", func(t *testing.T) { // put our volume in state @@ -34,20 +38,62 @@ func TestNewHostVolumeManager_restoreState(t *testing.T) { // new volume manager should load it from state and run Create, // resulting in a volume directory in this mountDir. mountDir := t.TempDir() + volPath := filepath.Join(mountDir, vol.ID) + + hvm := NewHostVolumeManager(log, Config{ + StateMgr: state, + UpdateNodeVols: fNode.updateVol, + PluginDir: "/wherever", + SharedMountDir: mountDir, + }) - _, err := NewHostVolumeManager(log, state, time.Second, "/wherever", mountDir) + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + vols, err := hvm.restoreFromState(ctx) must.NoError(t, err) - volPath := filepath.Join(mountDir, vol.ID) + expect := map[string]*structs.ClientHostVolumeConfig{ + "test-vol-name": { + Name: "test-vol-name", + ID: "test-vol-id", + Path: volPath, + ReadOnly: false, + }, + } + must.Eq(t, expect, vols) + must.DirExists(t, volPath) }) t.Run("get error", func(t *testing.T) { state := &cstate.ErrDB{} - _, err := NewHostVolumeManager(log, state, time.Second, "/wherever", "/wherever") + hvm := NewHostVolumeManager(log, Config{ + StateMgr: state, + UpdateNodeVols: fNode.updateVol, + PluginDir: "/wherever", + SharedMountDir: "/wherever", + }) + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + vols, err := hvm.restoreFromState(ctx) // error loading state should break the world must.ErrorIs(t, err, cstate.ErrDBError) + must.Nil(t, vols) }) // db TODO: test plugin error } + +type fakeNode struct { + vols VolumeMap +} + +func (n *fakeNode) updateVol(name string, volume *structs.ClientHostVolumeConfig) { + UpdateVolumeMap(n.vols, name, volume) +} + +func newFakeNode() *fakeNode { + return &fakeNode{ + vols: make(VolumeMap), + } +} diff --git a/client/hostvolumemanager/volume_fingerprint.go b/client/hostvolumemanager/volume_fingerprint.go new file mode 100644 index 00000000000..37b0c84fbfb --- /dev/null +++ b/client/hostvolumemanager/volume_fingerprint.go @@ -0,0 +1,65 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "context" + + "github.com/hashicorp/nomad/nomad/structs" +) + +// this file is for fingerprinting *volumes* +// *plugins* are detected in client/fingerprint/dynamic_host_volumes.go + +// HostVolumeNodeUpdater is used to add or remove volumes from the Node. +type HostVolumeNodeUpdater func(name string, volume *structs.ClientHostVolumeConfig) + +// VolumeMap keys are volume `name`s, identical to Node.HostVolumes. +type VolumeMap map[string]*structs.ClientHostVolumeConfig + +// UpdateVolumeMap returns true if it changes the provided `volumes` map. +// If `vol` is nil, key `name` will be removed from the map, if present. +// If it is not nil, `name: vol` will be set on the map, if different. +// +// Since it may mutate the map, the caller should make a copy +// or acquire a lock as appropriate for their context. +func UpdateVolumeMap(volumes VolumeMap, name string, vol *structs.ClientHostVolumeConfig) (changed bool) { + current, exists := volumes[name] + if vol == nil { + if exists { + delete(volumes, name) + changed = true + } + } else { + if !exists || !vol.Equal(current) { + volumes[name] = vol + changed = true + } + } + return changed +} + +// WaitForFirstFingerprint implements client.FingerprintingPluginManager +func (hvm *HostVolumeManager) WaitForFirstFingerprint(ctx context.Context) <-chan struct{} { + // the fingerprint manager puts batchFirstFingerprintsTimeout (50 seconds) + // on the context that it sends to us here so we don't need another + // timeout. we just need to cancel to report when we are done. + ctx, cancel := context.WithCancel(ctx) + defer cancel() + volumes, err := hvm.restoreFromState(ctx) + if err != nil { + hvm.log.Error("failed to restore state", "error", err) + return ctx.Done() + } + for name, vol := range volumes { + hvm.updateNodeVols(name, vol) // => batchNodeUpdates.updateNodeFromHostVolume() + } + return ctx.Done() +} +func (hvm *HostVolumeManager) Run() {} +func (hvm *HostVolumeManager) Shutdown() {} +func (hvm *HostVolumeManager) PluginType() string { + // "Plugin"Type is misleading, because this is for *volumes* but ok. + return "dynamic_host_volume" +} diff --git a/client/hostvolumemanager/volume_fingerprint_test.go b/client/hostvolumemanager/volume_fingerprint_test.go new file mode 100644 index 00000000000..c5198eb7c71 --- /dev/null +++ b/client/hostvolumemanager/volume_fingerprint_test.go @@ -0,0 +1,81 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package hostvolumemanager + +import ( + "testing" + + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func TestUpdateVolumeMap(t *testing.T) { + cases := []struct { + name string + + vols VolumeMap + volName string + vol *structs.ClientHostVolumeConfig + + expectMap VolumeMap + expectChange bool + }{ + { + name: "delete absent", + vols: VolumeMap{}, + volName: "anything", + vol: nil, + expectMap: VolumeMap{}, + expectChange: false, + }, + { + name: "delete present", + vols: VolumeMap{"deleteme": {}}, + volName: "deleteme", + vol: nil, + expectMap: VolumeMap{}, + expectChange: true, + }, + { + name: "add absent", + vols: VolumeMap{}, + volName: "addme", + vol: &structs.ClientHostVolumeConfig{}, + expectMap: VolumeMap{"addme": {}}, + expectChange: true, + }, + { + name: "add present", + vols: VolumeMap{"ignoreme": {}}, + volName: "ignoreme", + vol: &structs.ClientHostVolumeConfig{}, + expectMap: VolumeMap{"ignoreme": {}}, + expectChange: false, + }, + { + // this should not happen, but test anyway + name: "change present", + vols: VolumeMap{"changeme": {Path: "before"}}, + volName: "changeme", + vol: &structs.ClientHostVolumeConfig{Path: "after"}, + expectMap: VolumeMap{"changeme": {Path: "after"}}, + expectChange: true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + + changed := UpdateVolumeMap(tc.vols, tc.volName, tc.vol) + must.Eq(t, tc.expectMap, tc.vols) + + if tc.expectChange { + must.True(t, changed, must.Sprint("expect volume to have been changed")) + } else { + must.False(t, changed, must.Sprint("expect volume not to have been changed")) + } + + }) + } +} diff --git a/client/node_updater.go b/client/node_updater.go index 6fe51cdf56e..c02a2dd9950 100644 --- a/client/node_updater.go +++ b/client/node_updater.go @@ -10,6 +10,7 @@ import ( "time" "github.com/hashicorp/nomad/client/devicemanager" + hvm "github.com/hashicorp/nomad/client/hostvolumemanager" "github.com/hashicorp/nomad/client/pluginmanager/csimanager" "github.com/hashicorp/nomad/client/pluginmanager/drivermanager" "github.com/hashicorp/nomad/nomad/structs" @@ -30,7 +31,7 @@ func (c *Client) batchFirstFingerprints() { ch, err := c.pluginManagers.WaitForFirstFingerprint(ctx) if err != nil { - c.logger.Warn("failed to batch initial fingerprint updates, switching to incemental updates") + c.logger.Warn("failed to batch initial fingerprint updates, switching to incremental updates") goto SEND_BATCH } @@ -46,6 +47,12 @@ SEND_BATCH: newConfig := c.config.Copy() + // host volume updates + var hostVolChanged bool + c.batchNodeUpdates.batchHostVolumeUpdates(func(name string, vol *structs.ClientHostVolumeConfig) { + hostVolChanged = hvm.UpdateVolumeMap(newConfig.Node.HostVolumes, name, vol) + }) + // csi updates var csiChanged bool c.batchNodeUpdates.batchCSIUpdates(func(name string, info *structs.CSIInfo) { @@ -85,7 +92,7 @@ SEND_BATCH: }) // only update the node if changes occurred - if driverChanged || devicesChanged || csiChanged { + if driverChanged || devicesChanged || csiChanged || hostVolChanged { c.config = newConfig c.updateNode() } @@ -123,6 +130,23 @@ func (c *Client) updateNodeFromCSI(name string, info *structs.CSIInfo) { } } +func (c *Client) updateNodeFromHostVol(name string, vol *structs.ClientHostVolumeConfig) { + c.configLock.Lock() + defer c.configLock.Unlock() + + newConfig := c.config.Copy() + + if newConfig.Node.HostVolumes == nil { + newConfig.Node.HostVolumes = make(map[string]*structs.ClientHostVolumeConfig) + } + + changed := hvm.UpdateVolumeMap(newConfig.Node.HostVolumes, name, vol) + if changed { + c.config = newConfig + c.updateNode() + } +} + // updateNodeFromCSIControllerLocked makes the changes to the node from a csi // update but does not send the update to the server. c.configLock must be held // before calling this func. @@ -336,12 +360,18 @@ type batchNodeUpdates struct { csiBatched bool csiCB csimanager.UpdateNodeCSIInfoFunc csiMu sync.Mutex + + hostVolumes hvm.VolumeMap + hostVolumesBatched bool + hostVolumeCB hvm.HostVolumeNodeUpdater + hostVolumeMu sync.Mutex } func newBatchNodeUpdates( driverCB drivermanager.UpdateNodeDriverInfoFn, devicesCB devicemanager.UpdateNodeDevicesFn, - csiCB csimanager.UpdateNodeCSIInfoFunc) *batchNodeUpdates { + csiCB csimanager.UpdateNodeCSIInfoFunc, + hostVolumeCB hvm.HostVolumeNodeUpdater) *batchNodeUpdates { return &batchNodeUpdates{ drivers: make(map[string]*structs.DriverInfo), @@ -351,7 +381,34 @@ func newBatchNodeUpdates( csiNodePlugins: make(map[string]*structs.CSIInfo), csiControllerPlugins: make(map[string]*structs.CSIInfo), csiCB: csiCB, + hostVolumes: make(hvm.VolumeMap), + hostVolumeCB: hostVolumeCB, + } +} + +// this is the one that the volume manager runs +func (b *batchNodeUpdates) updateNodeFromHostVolume(name string, vol *structs.ClientHostVolumeConfig) { + b.hostVolumeMu.Lock() + defer b.hostVolumeMu.Unlock() + if b.hostVolumesBatched { + b.hostVolumeCB(name, vol) // => Client.updateNodeFromHostVol() + return + } + hvm.UpdateVolumeMap(b.hostVolumes, name, vol) +} + +// this one runs on client start +func (b *batchNodeUpdates) batchHostVolumeUpdates(f hvm.HostVolumeNodeUpdater) error { + b.hostVolumeMu.Lock() + defer b.hostVolumeMu.Unlock() + if b.hostVolumesBatched { + return fmt.Errorf("host volume updates already batched") + } + b.hostVolumesBatched = true + for name, vol := range b.hostVolumes { + f(name, vol) // => c.batchNodeUpdates.batchHostVolumeUpdates(FUNC } + return nil } // updateNodeFromCSI implements csimanager.UpdateNodeCSIInfoFunc and is used in diff --git a/client/structs/host_volumes.go b/client/structs/host_volumes.go index 3188e45dc0a..bff543588f4 100644 --- a/client/structs/host_volumes.go +++ b/client/structs/host_volumes.go @@ -37,6 +37,9 @@ type ClientHostVolumeCreateRequest struct { } type ClientHostVolumeCreateResponse struct { + VolumeName string + VolumeID string + // Path is the host path where the volume's mount point was created. We send // this back to the server to make debugging easier. HostPath string @@ -50,6 +53,8 @@ type ClientHostVolumeDeleteRequest struct { // ID is a UUID-like string generated by the server. ID string + Name string + // PluginID is the name of the host volume plugin on the client that will be // used for deleting the volume. If omitted, the client will use its default // built-in plugin. @@ -68,4 +73,7 @@ type ClientHostVolumeDeleteRequest struct { Parameters map[string]string } -type ClientHostVolumeDeleteResponse struct{} +type ClientHostVolumeDeleteResponse struct { + VolumeName string + VolumeID string +} diff --git a/demo/hostvolume/check.sh b/demo/hostvolume/check.sh new file mode 100755 index 00000000000..c89a36c5a54 --- /dev/null +++ b/demo/hostvolume/check.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -xeuo pipefail + +nomad volume status -type=host -verbose +nomad operator api /v1/nodes | jq '.[].HostVolumes' + +addr="$(nomad service info -json job | jq -r '.[0].Address'):8000" +curl -sS "$addr/external/" | grep hi +curl -sS "$addr/internal/" | grep hi + +echo '💚 looks good! 💚' diff --git a/demo/hostvolume/e2e.sh b/demo/hostvolume/e2e.sh new file mode 100755 index 00000000000..d27070cafac --- /dev/null +++ b/demo/hostvolume/e2e.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -xeuo pipefail + +./setup.sh +./check.sh +./teardown.sh diff --git a/demo/hostvolume/example-host-volume b/demo/hostvolume/example-plugin-mkfs similarity index 100% rename from demo/hostvolume/example-host-volume rename to demo/hostvolume/example-plugin-mkfs diff --git a/demo/hostvolume/external-plugin.volume.hcl b/demo/hostvolume/external-plugin.volume.hcl new file mode 100644 index 00000000000..6c9f17e8d50 --- /dev/null +++ b/demo/hostvolume/external-plugin.volume.hcl @@ -0,0 +1,22 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +name = "external-plugin" +type = "host" +# the executable named `example-plugin-mkfs` must be placed in the +# -host-volume-plugin-dir (config: client.host_volume_plugin_dir) +# or you will get an error creating the volume: +# * could not place volume "external-plugin": no node meets constraints +# The default location is /host_volume_plugins +plugin_id = "example-plugin-mkfs" +capacity_min = "50mb" +capacity_max = "50mb" + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +parameters { + a = "ayy" +} diff --git a/demo/hostvolume/host.volume.hcl b/demo/hostvolume/host.volume.hcl deleted file mode 100644 index cb0774b94e7..00000000000 --- a/demo/hostvolume/host.volume.hcl +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) HashiCorp, Inc. -# SPDX-License-Identifier: BUSL-1.1 -name = "test" -type = "host" -plugin_id = "example-host-volume" -capacity_min = "50mb" -capacity_max = "50mb" - -capability { - access_mode = "single-node-writer" - attachment_mode = "file-system" -} - -parameters { - a = "ayy" -} - -# TODO(1.10.0): don't require node_pool -node_pool = "default" diff --git a/demo/hostvolume/internal-plugin.volume.hcl b/demo/hostvolume/internal-plugin.volume.hcl new file mode 100644 index 00000000000..bbea133e1a6 --- /dev/null +++ b/demo/hostvolume/internal-plugin.volume.hcl @@ -0,0 +1,14 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +name = "internal-plugin" +type = "host" +# this plugin is built into Nomad; +# it simply creates a directory. +plugin_id = "mkdir" + +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + diff --git a/demo/hostvolume/job.nomad.hcl b/demo/hostvolume/job.nomad.hcl new file mode 100644 index 00000000000..1b0e0162665 --- /dev/null +++ b/demo/hostvolume/job.nomad.hcl @@ -0,0 +1,48 @@ +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +job "job" { + group "g" { + task "t" { + driver = "docker" + config { + image = "python:slim" + command = "bash" + args = ["-xc", <<-EOF + for dir in internal external; do + touch ${NOMAD_TASK_DIR}/$dir/hiii + done + python -m http.server -d ${NOMAD_TASK_DIR} --bind=:: + EOF + ] + ports = ["http"] + } + volume_mount { + volume = "int" + destination = "${NOMAD_TASK_DIR}/internal" + } + volume_mount { + volume = "ext" + destination = "${NOMAD_TASK_DIR}/external" + } + } + volume "int" { + type = "host" + source = "internal-plugin" + } + volume "ext" { + type = "host" + source = "external-plugin" + } + network { + port "http" { + static = 8000 + } + } + service { + name = "job" + port = "http" + provider = "nomad" + } + } +} diff --git a/demo/hostvolume/setup.sh b/demo/hostvolume/setup.sh new file mode 100755 index 00000000000..9a9fc7be719 --- /dev/null +++ b/demo/hostvolume/setup.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -xeuo pipefail + +nomad volume create external-plugin.volume.hcl +nomad volume create internal-plugin.volume.hcl + +nomad job run job.nomad.hcl + +nomad volume status -type=host -verbose +nomad operator api /v1/nodes | jq '.[].HostVolumes' + diff --git a/demo/hostvolume/teardown.sh b/demo/hostvolume/teardown.sh new file mode 100755 index 00000000000..d4d17d67fa4 --- /dev/null +++ b/demo/hostvolume/teardown.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Copyright (c) HashiCorp, Inc. +# SPDX-License-Identifier: BUSL-1.1 + +set -xeuo pipefail + +nomad job stop job || true + +for _ in {1..5}; do + sleep 3 + ids="$(nomad volume status -type=host -verbose | awk '/ternal-plugin/ {print$1}')" + test -z "$ids" && break + for id in $ids; do + nomad volume delete -type=host "$id" || continue + done +done + diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index e038e631a29..8f085416826 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -610,6 +610,7 @@ func (v *HostVolume) deleteVolume(vol *structs.HostVolume) error { method := "ClientHostVolume.Delete" cReq := &cstructs.ClientHostVolumeDeleteRequest{ ID: vol.ID, + Name: vol.Name, PluginID: vol.PluginID, NodeID: vol.NodeID, HostPath: vol.HostPath, diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index daacd5d8670..2f292098e6a 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -31,6 +31,18 @@ type ClientHostVolumeConfig struct { Name string `hcl:",key"` Path string `hcl:"path"` ReadOnly bool `hcl:"read_only"` + // ID is set for dynamic host volumes only. + ID string `hcl:"-"` +} + +func (p *ClientHostVolumeConfig) Equal(o *ClientHostVolumeConfig) bool { + if p == nil && o == nil { + return true + } + if p == nil || o == nil { + return false + } + return *p == *o } func (p *ClientHostVolumeConfig) Copy() *ClientHostVolumeConfig { From 3143019d852711378afa08e1e9d8244cc757a425 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Tue, 10 Dec 2024 13:10:40 -0500 Subject: [PATCH 27/35] dynamic host volumes: capabilities check during scheduling (#24617) Static host volumes have a simple readonly toggle, but dynamic host volumes have a more complex set of capabilities similar to CSI volumes. Update the feasibility checker to account for these capabilities and volume readiness. Also fixes a minor bug in the state store where a soft-delete (not yet implemented) could cause a volume to be marked ready again. This is needed to support testing the readiness checking in the scheduler. Ref: https://github.com/hashicorp/nomad/pull/24479 --- nomad/state/state_store_host_volumes.go | 12 +- scheduler/feasible.go | 76 ++++++----- scheduler/feasible_test.go | 160 ++++++++++++++++++++---- scheduler/scheduler.go | 4 + scheduler/stack.go | 8 +- 5 files changed, 196 insertions(+), 64 deletions(-) diff --git a/nomad/state/state_store_host_volumes.go b/nomad/state/state_store_host_volumes.go index bd01129f314..7e55e6ced43 100644 --- a/nomad/state/state_store_host_volumes.go +++ b/nomad/state/state_store_host_volumes.go @@ -84,9 +84,17 @@ func (s *StateStore) UpsertHostVolume(index uint64, vol *structs.HostVolume) err if node == nil { return fmt.Errorf("host volume %s has nonexistent node ID %s", vol.ID, vol.NodeID) } - if _, ok := node.HostVolumes[vol.Name]; ok { - vol.State = structs.HostVolumeStateReady + switch vol.State { + case structs.HostVolumeStateDeleted: + // no-op: don't allow soft-deletes to resurrect a previously fingerprinted volume + default: + // prevent a race between node fingerprint and create RPC that could + // switch a ready volume back to pending + if _, ok := node.HostVolumes[vol.Name]; ok { + vol.State = structs.HostVolumeStateReady + } } + // Register RPCs for new volumes may not have the node pool set vol.NodePool = node.NodePool diff --git a/scheduler/feasible.go b/scheduler/feasible.go index e6e7c81d4a3..60442f92e7f 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -137,40 +137,38 @@ func NewRandomIterator(ctx Context, nodes []*structs.Node) *StaticIterator { // HostVolumeChecker is a FeasibilityChecker which returns whether a node has // the host volumes necessary to schedule a task group. type HostVolumeChecker struct { - ctx Context - - // volumes is a map[HostVolumeName][]RequestedVolume. The requested volumes are - // a slice because a single task group may request the same volume multiple times. - volumes map[string][]*structs.VolumeRequest + ctx Context + volumeReqs []*structs.VolumeRequest + namespace string } // NewHostVolumeChecker creates a HostVolumeChecker from a set of volumes func NewHostVolumeChecker(ctx Context) *HostVolumeChecker { return &HostVolumeChecker{ - ctx: ctx, + ctx: ctx, + volumeReqs: []*structs.VolumeRequest{}, } } // SetVolumes takes the volumes required by a task group and updates the checker. -func (h *HostVolumeChecker) SetVolumes(allocName string, volumes map[string]*structs.VolumeRequest) { - lookupMap := make(map[string][]*structs.VolumeRequest) - // Convert the map from map[DesiredName]Request to map[Source][]Request to improve - // lookup performance. Also filter non-host volumes. +func (h *HostVolumeChecker) SetVolumes(allocName string, ns string, volumes map[string]*structs.VolumeRequest) { + h.namespace = ns + h.volumeReqs = []*structs.VolumeRequest{} for _, req := range volumes { if req.Type != structs.VolumeTypeHost { - continue + continue // filter CSI volumes } if req.PerAlloc { // provide a unique volume source per allocation copied := req.Copy() copied.Source = copied.Source + structs.AllocSuffix(allocName) - lookupMap[copied.Source] = append(lookupMap[copied.Source], copied) + h.volumeReqs = append(h.volumeReqs, copied) + } else { - lookupMap[req.Source] = append(lookupMap[req.Source], req) + h.volumeReqs = append(h.volumeReqs, req) } } - h.volumes = lookupMap } func (h *HostVolumeChecker) Feasible(candidate *structs.Node) bool { @@ -183,35 +181,45 @@ func (h *HostVolumeChecker) Feasible(candidate *structs.Node) bool { } func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool { - rLen := len(h.volumes) - hLen := len(n.HostVolumes) // Fast path: Requested no volumes. No need to check further. - if rLen == 0 { + if len(h.volumeReqs) == 0 { return true } - // Fast path: Requesting more volumes than the node has, can't meet the criteria. - if rLen > hLen { - return false - } - - for source, requests := range h.volumes { - nodeVolume, ok := n.HostVolumes[source] + for _, req := range h.volumeReqs { + volCfg, ok := n.HostVolumes[req.Source] if !ok { return false } - // If the volume supports being mounted as ReadWrite, we do not need to - // do further validation for readonly placement. - if !nodeVolume.ReadOnly { - continue - } - - // The Volume can only be mounted ReadOnly, validate that no requests for - // it are ReadWrite. - for _, req := range requests { - if !req.ReadOnly { + if volCfg.ID != "" { // dynamic host volume + vol, err := h.ctx.State().HostVolumeByID(nil, h.namespace, volCfg.ID, false) + if err != nil || vol == nil { + // node fingerprint has a dynamic volume that's no longer in the + // state store; this is only possible if the batched fingerprint + // update from a delete RPC is written before the delete RPC's + // raft entry completes + return false + } + if vol.State != structs.HostVolumeStateReady { + return false + } + var capOk bool + for _, cap := range vol.RequestedCapabilities { + if req.AccessMode == structs.CSIVolumeAccessMode(cap.AccessMode) && + req.AttachmentMode == structs.CSIVolumeAttachmentMode(cap.AttachmentMode) { + capOk = true + break + } + } + if !capOk { + return false + } + } else if !req.ReadOnly { + // this is a static host volume and can only be mounted ReadOnly, + // validate that no requests for it are ReadWrite. + if volCfg.ReadOnly { return false } } diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index 4e887752989..9c5a9aaf1a7 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -177,7 +177,7 @@ func TestHostVolumeChecker(t *testing.T) { alloc.NodeID = nodes[2].ID for i, c := range cases { - checker.SetVolumes(alloc.Name, c.RequestedVolumes) + checker.SetVolumes(alloc.Name, structs.DefaultNamespace, c.RequestedVolumes) if act := checker.Feasible(c.Node); act != c.Result { t.Fatalf("case(%d) failed: got %v; want %v", i, act, c.Result) } @@ -187,10 +187,54 @@ func TestHostVolumeChecker(t *testing.T) { func TestHostVolumeChecker_ReadOnly(t *testing.T) { ci.Parallel(t) - _, ctx := testContext(t) + store, ctx := testContext(t) + nodes := []*structs.Node{ mock.Node(), mock.Node(), + mock.Node(), + mock.Node(), + mock.Node(), + } + + hostVolCapsReadWrite := []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeReader, + }, + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + } + hostVolCapsReadOnly := []*structs.HostVolumeCapability{{ + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeReader, + }} + + dhvNotReady := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Name: "foo", + NodeID: nodes[2].ID, + RequestedCapabilities: hostVolCapsReadOnly, + State: structs.HostVolumeStateDeleted, + } + dhvReadOnly := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Name: "foo", + NodeID: nodes[3].ID, + RequestedCapabilities: hostVolCapsReadOnly, + State: structs.HostVolumeStateReady, + } + dhvReadWrite := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Name: "foo", + NodeID: nodes[4].ID, + RequestedCapabilities: hostVolCapsReadWrite, + State: structs.HostVolumeStateReady, } nodes[0].HostVolumes = map[string]*structs.ClientHostVolumeConfig{ @@ -203,6 +247,23 @@ func TestHostVolumeChecker_ReadOnly(t *testing.T) { ReadOnly: false, }, } + nodes[2].HostVolumes = map[string]*structs.ClientHostVolumeConfig{ + "foo": {ID: dhvNotReady.ID}, + } + nodes[3].HostVolumes = map[string]*structs.ClientHostVolumeConfig{ + "foo": {ID: dhvReadOnly.ID}, + } + nodes[4].HostVolumes = map[string]*structs.ClientHostVolumeConfig{ + "foo": {ID: dhvReadWrite.ID}, + } + + for _, node := range nodes { + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node)) + } + + must.NoError(t, store.UpsertHostVolume(1000, dhvNotReady)) + must.NoError(t, store.UpsertHostVolume(1000, dhvReadOnly)) + must.NoError(t, store.UpsertHostVolume(1000, dhvReadWrite)) readwriteRequest := map[string]*structs.VolumeRequest{ "foo": { @@ -219,42 +280,89 @@ func TestHostVolumeChecker_ReadOnly(t *testing.T) { }, } + dhvReadOnlyRequest := map[string]*structs.VolumeRequest{ + "foo": { + Type: "host", + Source: "foo", + AccessMode: structs.CSIVolumeAccessModeSingleNodeReader, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + }, + } + dhvReadWriteRequest := map[string]*structs.VolumeRequest{ + "foo": { + Type: "host", + Source: "foo", + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + }, + } + checker := NewHostVolumeChecker(ctx) cases := []struct { - Node *structs.Node - RequestedVolumes map[string]*structs.VolumeRequest - Result bool + name string + node *structs.Node + requestedVolumes map[string]*structs.VolumeRequest + expect bool }{ - { // ReadWrite Request, ReadOnly Host - Node: nodes[0], - RequestedVolumes: readwriteRequest, - Result: false, + { + name: "read-write request / read-only host", + node: nodes[0], + requestedVolumes: readwriteRequest, + expect: false, }, - { // ReadOnly Request, ReadOnly Host - Node: nodes[0], - RequestedVolumes: readonlyRequest, - Result: true, + { + name: "read-only request / read-only host", + node: nodes[0], + requestedVolumes: readonlyRequest, + expect: true, }, - { // ReadOnly Request, ReadWrite Host - Node: nodes[1], - RequestedVolumes: readonlyRequest, - Result: true, + { + name: "read-only request / read-write host", + node: nodes[1], + requestedVolumes: readonlyRequest, + expect: true, }, - { // ReadWrite Request, ReadWrite Host - Node: nodes[1], - RequestedVolumes: readwriteRequest, - Result: true, + { + name: "read-write request / read-write host", + node: nodes[1], + requestedVolumes: readwriteRequest, + expect: true, + }, + { + name: "dynamic single-reader request / host not ready", + node: nodes[2], + requestedVolumes: dhvReadOnlyRequest, + expect: false, + }, + { + name: "dynamic single-reader request / caps match", + node: nodes[3], + requestedVolumes: dhvReadOnlyRequest, + expect: true, + }, + { + name: "dynamic single-reader request / no matching cap", + node: nodes[4], + requestedVolumes: dhvReadOnlyRequest, + expect: true, + }, + { + name: "dynamic single-writer request / caps match", + node: nodes[4], + requestedVolumes: dhvReadWriteRequest, + expect: true, }, } alloc := mock.Alloc() alloc.NodeID = nodes[1].ID - for i, c := range cases { - checker.SetVolumes(alloc.Name, c.RequestedVolumes) - if act := checker.Feasible(c.Node); act != c.Result { - t.Fatalf("case(%d) failed: got %v; want %v", i, act, c.Result) - } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + checker.SetVolumes(alloc.Name, structs.DefaultNamespace, tc.requestedVolumes) + actual := checker.Feasible(tc.node) + must.Eq(t, tc.expect, actual) + }) } } diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 7e22070966f..9d46edf8801 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -118,6 +118,10 @@ type State interface { // CSIVolumeByID fetch CSI volumes, containing controller jobs CSIVolumesByNodeID(memdb.WatchSet, string, string) (memdb.ResultIterator, error) + HostVolumeByID(memdb.WatchSet, string, string, bool) (*structs.HostVolume, error) + + HostVolumesByNodeID(memdb.WatchSet, string, state.SortOption) (memdb.ResultIterator, error) + // LatestIndex returns the greatest index value for all indexes. LatestIndex() (uint64, error) } diff --git a/scheduler/stack.go b/scheduler/stack.go index 5c897ddf2de..1f2b6586886 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -51,6 +51,7 @@ type GenericStack struct { wrappedChecks *FeasibilityWrapper quota FeasibleIterator jobVersion *uint64 + jobNamespace string jobConstraint *ConstraintChecker taskGroupDrivers *DriverChecker taskGroupConstraint *ConstraintChecker @@ -101,6 +102,7 @@ func (s *GenericStack) SetJob(job *structs.Job) { jobVer := job.Version s.jobVersion = &jobVer + s.jobNamespace = job.Namespace s.jobConstraint.SetConstraints(job.Constraints) s.distinctHostsConstraint.SetJob(job) @@ -154,7 +156,7 @@ func (s *GenericStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ra s.taskGroupDrivers.SetDrivers(tgConstr.drivers) s.taskGroupConstraint.SetConstraints(tgConstr.constraints) s.taskGroupDevices.SetTaskGroup(tg) - s.taskGroupHostVolumes.SetVolumes(options.AllocName, tg.Volumes) + s.taskGroupHostVolumes.SetVolumes(options.AllocName, s.jobNamespace, tg.Volumes) s.taskGroupCSIVolumes.SetVolumes(options.AllocName, tg.Volumes) if len(tg.Networks) > 0 { s.taskGroupNetwork.SetNetwork(tg.Networks[0]) @@ -202,6 +204,7 @@ type SystemStack struct { ctx Context source *StaticIterator + jobNamespace string wrappedChecks *FeasibilityWrapper quota FeasibleIterator jobConstraint *ConstraintChecker @@ -313,6 +316,7 @@ func (s *SystemStack) SetNodes(baseNodes []*structs.Node) { } func (s *SystemStack) SetJob(job *structs.Job) { + s.jobNamespace = job.Namespace s.jobConstraint.SetConstraints(job.Constraints) s.distinctPropertyConstraint.SetJob(job) s.binPack.SetJob(job) @@ -345,7 +349,7 @@ func (s *SystemStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ran s.taskGroupDrivers.SetDrivers(tgConstr.drivers) s.taskGroupConstraint.SetConstraints(tgConstr.constraints) s.taskGroupDevices.SetTaskGroup(tg) - s.taskGroupHostVolumes.SetVolumes(options.AllocName, tg.Volumes) + s.taskGroupHostVolumes.SetVolumes(options.AllocName, s.jobNamespace, tg.Volumes) s.taskGroupCSIVolumes.SetVolumes(options.AllocName, tg.Volumes) if len(tg.Networks) > 0 { s.taskGroupNetwork.SetNetwork(tg.Networks[0]) From 258b159d530693ba3e493e37e8cd1d2ae7a3df1b Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Wed, 11 Dec 2024 09:33:53 +0100 Subject: [PATCH 28/35] stateful deployments: add Sticky property to task group volumes (#24641) --- nomad/structs/diff_test.go | 30 +++++++++++++------ nomad/structs/volumes.go | 9 +++++- .../{volume_test.go => volumes_test.go} | 3 +- 3 files changed, 30 insertions(+), 12 deletions(-) rename nomad/structs/{volume_test.go => volumes_test.go} (98%) diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go index 0f87387ff6c..45563f23573 100644 --- a/nomad/structs/diff_test.go +++ b/nomad/structs/diff_test.go @@ -10,7 +10,7 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/helper/pointer" - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestJobDiff(t *testing.T) { @@ -4864,6 +4864,12 @@ func TestTaskGroupDiff(t *testing.T) { Old: "", New: "foo-src", }, + { + Type: DiffTypeAdded, + Name: "Sticky", + Old: "", + New: "false", + }, { Type: DiffTypeAdded, Name: "Type", @@ -5475,17 +5481,17 @@ func TestTaskGroupDiff(t *testing.T) { } for i, c := range cases { - require.NotEmpty(t, c.TestCase, "case #%d needs a name", i+1) + must.NotEq(t, c.TestCase, "", must.Sprintf("case #%d needs a name", i+1)) t.Run(c.TestCase, func(t *testing.T) { result, err := c.Old.Diff(c.New, c.Contextual) switch c.ExpErr { case true: - require.Error(t, err, "case %q expected error", c.TestCase) + must.Error(t, err, must.Sprintf("case %q expected error", c.TestCase)) case false: - require.NoError(t, err, "case %q expected no error", c.TestCase) - require.Equal(t, c.Expected, result) + must.NoError(t, err, must.Sprintf("case %q expected no error", c.TestCase)) + must.Eq(t, c.Expected, result) } }) } @@ -8370,6 +8376,12 @@ func TestTaskDiff(t *testing.T) { Old: "", New: "Z", }, + { + Type: DiffTypeAdded, + Name: "Sticky", + Old: "", + New: "false", + }, { Type: DiffTypeAdded, Name: "Volume", @@ -9870,10 +9882,10 @@ func TestTaskDiff(t *testing.T) { t.Run(c.Name, func(t *testing.T) { actual, err := c.Old.Diff(c.New, c.Contextual) if c.Error { - require.Error(t, err) + must.Error(t, err) } else { - require.NoError(t, err) - require.Equal(t, c.Expected, actual) + must.NoError(t, err) + must.Eq(t, c.Expected, actual) } }) } @@ -10848,7 +10860,7 @@ func TestServicesDiff(t *testing.T) { for _, c := range cases { t.Run(c.Name, func(t *testing.T) { actual := serviceDiffs(c.Old, c.New, c.Contextual) - require.Equal(t, c.Expected, actual) + must.Eq(t, c.Expected, actual) }) } } diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index 2f292098e6a..b8c95fc2862 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -103,12 +103,14 @@ func HostVolumeSliceMerge(a, b []*ClientHostVolumeConfig) []*ClientHostVolumeCon return n } -// VolumeRequest is a representation of a storage volume that a TaskGroup wishes to use. +// VolumeRequest is a representation of a storage volume that a TaskGroup wishes +// to use. type VolumeRequest struct { Name string Type string Source string ReadOnly bool + Sticky bool AccessMode CSIVolumeAccessMode AttachmentMode CSIVolumeAttachmentMode MountOptions *CSIMountOptions @@ -128,6 +130,8 @@ func (v *VolumeRequest) Equal(o *VolumeRequest) bool { return false case v.ReadOnly != o.ReadOnly: return false + case v.Sticky != o.Sticky: + return false case v.AccessMode != o.AccessMode: return false case v.AttachmentMode != o.AttachmentMode: @@ -259,6 +263,7 @@ type VolumeMount struct { Volume string Destination string ReadOnly bool + Sticky bool PropagationMode string SELinuxLabel string } @@ -279,6 +284,8 @@ func (v *VolumeMount) Equal(o *VolumeMount) bool { return false case v.ReadOnly != o.ReadOnly: return false + case v.Sticky != o.Sticky: + return false case v.PropagationMode != o.PropagationMode: return false case v.SELinuxLabel != o.SELinuxLabel: diff --git a/nomad/structs/volume_test.go b/nomad/structs/volumes_test.go similarity index 98% rename from nomad/structs/volume_test.go rename to nomad/structs/volumes_test.go index 02e0715d1a3..58585932d7c 100644 --- a/nomad/structs/volume_test.go +++ b/nomad/structs/volumes_test.go @@ -9,7 +9,6 @@ import ( "github.com/hashicorp/nomad/ci" "github.com/shoenig/test/must" - "github.com/stretchr/testify/require" ) func TestVolumeRequest_Validate(t *testing.T) { @@ -92,7 +91,7 @@ func TestVolumeRequest_Validate(t *testing.T) { t.Run(tc.name, func(t *testing.T) { err := tc.req.Validate(JobTypeSystem, tc.taskGroupCount, tc.canariesCount) for _, expected := range tc.expected { - require.Contains(t, err.Error(), expected) + must.StrContains(t, err.Error(), expected) } }) } From fd05e461dded64eff9064532d720f4b4cf028b92 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 16 Dec 2024 09:10:58 -0500 Subject: [PATCH 29/35] dynamic host volumes: add -type flag to volume init (#24667) Adds a `-type` flag to the `volume init` command that generates an example volume specification with only those fields relevant to dynamic host volumes. This changeset also moves the string literals into uses of `go:embed` Ref: https://github.com/hashicorp/nomad/pull/24479 --- command/asset/asset.go | 12 +++ command/asset/volume.csi.hcl | 70 +++++++++++++ command/asset/volume.csi.json | 72 +++++++++++++ command/asset/volume.host.hcl | 28 +++++ command/asset/volume.host.json | 24 +++++ command/volume_init.go | 181 +++++---------------------------- 6 files changed, 230 insertions(+), 157 deletions(-) create mode 100644 command/asset/volume.csi.hcl create mode 100644 command/asset/volume.csi.json create mode 100644 command/asset/volume.host.hcl create mode 100644 command/asset/volume.host.json diff --git a/command/asset/asset.go b/command/asset/asset.go index b6c1d9112f6..3570cb78b05 100644 --- a/command/asset/asset.go +++ b/command/asset/asset.go @@ -22,3 +22,15 @@ var NodePoolSpec []byte //go:embed pool.nomad.json var NodePoolSpecJSON []byte + +//go:embed volume.csi.hcl +var CSIVolumeSpecHCL []byte + +//go:embed volume.csi.json +var CSIVolumeSpecJSON []byte + +//go:embed volume.host.hcl +var HostVolumeSpecHCL []byte + +//go:embed volume.host.json +var HostVolumeSpecJSON []byte diff --git a/command/asset/volume.csi.hcl b/command/asset/volume.csi.hcl new file mode 100644 index 00000000000..998edadeff9 --- /dev/null +++ b/command/asset/volume.csi.hcl @@ -0,0 +1,70 @@ +id = "ebs_prod_db1" +namespace = "default" +name = "database" +type = "csi" +plugin_id = "plugin_id" + +# For 'nomad volume register', provide the external ID from the storage +# provider. This field should be omitted when creating a volume with +# 'nomad volume create' +external_id = "vol-23452345" + +# For 'nomad volume create', specify a snapshot ID or volume to clone. You can +# specify only one of these two fields. +snapshot_id = "snap-12345" +# clone_id = "vol-abcdef" + +# Optional: for 'nomad volume create', specify a maximum and minimum capacity. +# Registering an existing volume will record but ignore these fields. +capacity_min = "10GiB" +capacity_max = "20G" + +# Required (at least one): for 'nomad volume create', specify one or more +# capabilities to validate. Registering an existing volume will record but +# ignore these fields. +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-reader" + attachment_mode = "block-device" +} + +# Optional: for 'nomad volume create', specify mount options to validate for +# 'attachment_mode = "file-system". Registering an existing volume will record +# but ignore these fields. +mount_options { + fs_type = "ext4" + mount_flags = ["ro"] +} + +# Optional: specify one or more locations where the volume must be accessible +# from. Refer to the plugin documentation for what segment values are supported. +topology_request { + preferred { + topology { segments { rack = "R1" } } + } + required { + topology { segments { rack = "R1" } } + topology { segments { rack = "R2", zone = "us-east-1a" } } + } +} + +# Optional: provide any secrets specified by the plugin. +secrets { + example_secret = "xyzzy" +} + +# Optional: provide a map of keys to string values expected by the plugin. +parameters { + skuname = "Premium_LRS" +} + +# Optional: for 'nomad volume register', provide a map of keys to string +# values expected by the plugin. This field will populated automatically by +# 'nomad volume create'. +context { + endpoint = "http://192.168.1.101:9425" +} diff --git a/command/asset/volume.csi.json b/command/asset/volume.csi.json new file mode 100644 index 00000000000..722433ed60e --- /dev/null +++ b/command/asset/volume.csi.json @@ -0,0 +1,72 @@ +{ + "id": "ebs_prod_db1", + "namespace": "default", + "name": "database", + "type": "csi", + "plugin_id": "plugin_id", + "external_id": "vol-23452345", + "snapshot_id": "snap-12345", + "capacity_min": "10GiB", + "capacity_max": "20G", + "capability": [ + { + "access_mode": "single-node-writer", + "attachment_mode": "file-system" + }, + { + "access_mode": "single-node-reader", + "attachment_mode": "block-device" + } + ], + "context": [ + { + "endpoint": "http://192.168.1.101:9425" + } + ], + "mount_options": [ + { + "fs_type": "ext4", + "mount_flags": [ + "ro" + ] + } + ], + "topology_request": { + "preferred": [ + { + "topology": { + "segments": { + "rack": "R1" + } + } + } + ], + "required": [ + { + "topology": { + "segments": { + "rack": "R1" + } + } + }, + { + "topology": { + "segments": { + "rack": "R2", + "zone": "us-east-1a" + } + } + } + ] + }, + "parameters": [ + { + "skuname": "Premium_LRS" + } + ], + "secrets": [ + { + "example_secret": "xyzzy" + } + ] +} diff --git a/command/asset/volume.host.hcl b/command/asset/volume.host.hcl new file mode 100644 index 00000000000..3447eef998f --- /dev/null +++ b/command/asset/volume.host.hcl @@ -0,0 +1,28 @@ +id = "disk_prod_db1" +namespace = "default" +name = "database" +type = "host" +plugin_id = "plugin_id" + +# Optional: for 'nomad volume create', specify a maximum and minimum capacity. +# Registering an existing volume will record but ignore these fields. +capacity_min = "10GiB" +capacity_max = "20G" + +# Required (at least one): for 'nomad volume create', specify one or more +# capabilities to validate. Registering an existing volume will record but +# ignore these fields. +capability { + access_mode = "single-node-writer" + attachment_mode = "file-system" +} + +capability { + access_mode = "single-node-reader" + attachment_mode = "block-device" +} + +# Optional: provide a map of keys to string values expected by the plugin. +parameters { + skuname = "Premium_LRS" +} diff --git a/command/asset/volume.host.json b/command/asset/volume.host.json new file mode 100644 index 00000000000..59a4f4ef0ec --- /dev/null +++ b/command/asset/volume.host.json @@ -0,0 +1,24 @@ +{ + "id": "disk_prod_db1", + "namespace": "default", + "name": "database", + "type": "host", + "plugin_id": "plugin_id", + "capacity_min": "10GiB", + "capacity_max": "20G", + "capability": [ + { + "access_mode": "single-node-writer", + "attachment_mode": "file-system" + }, + { + "access_mode": "single-node-reader", + "attachment_mode": "block-device" + } + ], + "parameters": [ + { + "skuname": "Premium_LRS" + } + ] +} diff --git a/command/volume_init.go b/command/volume_init.go index bd37df8cfa1..93cd9e17321 100644 --- a/command/volume_init.go +++ b/command/volume_init.go @@ -8,17 +8,18 @@ import ( "os" "strings" + "github.com/hashicorp/nomad/command/asset" "github.com/posener/complete" ) const ( - // DefaultHclVolumeInitName is the default name we use when initializing + // defaultHclVolumeInitName is the default name we use when initializing // the example volume file in HCL format - DefaultHclVolumeInitName = "volume.hcl" + defaultHclVolumeInitName = "volume.hcl" // DefaultHclVolumeInitName is the default name we use when initializing // the example volume file in JSON format - DefaultJsonVolumeInitName = "volume.json" + defaultJsonVolumeInitName = "volume.json" ) // VolumeInitCommand generates a new volume spec that you can customize to @@ -39,6 +40,11 @@ Init Options: -json Create an example JSON volume specification. + + -type + Create an example for a specific type of volume (one of "csi" or "host", + defaults to "csi"). + ` return strings.TrimSpace(helpText) } @@ -50,6 +56,7 @@ func (c *VolumeInitCommand) Synopsis() string { func (c *VolumeInitCommand) AutocompleteFlags() complete.Flags { return complete.Flags{ "-json": complete.PredictNothing, + "-type": complete.PredictSet("host", "csi"), } } @@ -61,9 +68,11 @@ func (c *VolumeInitCommand) Name() string { return "volume init" } func (c *VolumeInitCommand) Run(args []string) int { var jsonOutput bool + var volType string flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.BoolVar(&jsonOutput, "json", false, "") + flags.StringVar(&volType, "type", "csi", "type of volume") if err := flags.Parse(args); err != nil { return 1 @@ -77,11 +86,17 @@ func (c *VolumeInitCommand) Run(args []string) int { return 1 } - fileName := DefaultHclVolumeInitName - fileContent := defaultHclVolumeSpec - if jsonOutput { - fileName = DefaultJsonVolumeInitName - fileContent = defaultJsonVolumeSpec + fileName := defaultHclVolumeInitName + fileContent := asset.CSIVolumeSpecHCL + + if volType == "host" && !jsonOutput { + fileContent = asset.HostVolumeSpecHCL + } else if volType == "host" && jsonOutput { + fileName = defaultJsonVolumeInitName + fileContent = asset.HostVolumeSpecJSON + } else if jsonOutput { + fileName = defaultJsonVolumeInitName + fileContent = asset.CSIVolumeSpecJSON } if len(args) == 1 { fileName = args[0] @@ -99,7 +114,7 @@ func (c *VolumeInitCommand) Run(args []string) int { } // Write out the example - err = os.WriteFile(fileName, []byte(fileContent), 0660) + err = os.WriteFile(fileName, fileContent, 0660) if err != nil { c.Ui.Error(fmt.Sprintf("Failed to write %q: %v", fileName, err)) return 1 @@ -109,151 +124,3 @@ func (c *VolumeInitCommand) Run(args []string) int { c.Ui.Output(fmt.Sprintf("Example volume specification written to %s", fileName)) return 0 } - -var defaultHclVolumeSpec = strings.TrimSpace(` -id = "ebs_prod_db1" -namespace = "default" -name = "database" -type = "csi" -plugin_id = "plugin_id" - -# For 'nomad volume register', provide the external ID from the storage -# provider. This field should be omitted when creating a volume with -# 'nomad volume create' -external_id = "vol-23452345" - -# For 'nomad volume create', specify a snapshot ID or volume to clone. You can -# specify only one of these two fields. -snapshot_id = "snap-12345" -# clone_id = "vol-abcdef" - -# Optional: for 'nomad volume create', specify a maximum and minimum capacity. -# Registering an existing volume will record but ignore these fields. -capacity_min = "10GiB" -capacity_max = "20G" - -# Required (at least one): for 'nomad volume create', specify one or more -# capabilities to validate. Registering an existing volume will record but -# ignore these fields. -capability { - access_mode = "single-node-writer" - attachment_mode = "file-system" -} - -capability { - access_mode = "single-node-reader" - attachment_mode = "block-device" -} - -# Optional: for 'nomad volume create', specify mount options to validate for -# 'attachment_mode = "file-system". Registering an existing volume will record -# but ignore these fields. -mount_options { - fs_type = "ext4" - mount_flags = ["ro"] -} - -# Optional: specify one or more locations where the volume must be accessible -# from. Refer to the plugin documentation for what segment values are supported. -topology_request { - preferred { - topology { segments { rack = "R1" } } - } - required { - topology { segments { rack = "R1" } } - topology { segments { rack = "R2", zone = "us-east-1a" } } - } -} - -# Optional: provide any secrets specified by the plugin. -secrets { - example_secret = "xyzzy" -} - -# Optional: provide a map of keys to string values expected by the plugin. -parameters { - skuname = "Premium_LRS" -} - -# Optional: for 'nomad volume register', provide a map of keys to string -# values expected by the plugin. This field will populated automatically by -# 'nomad volume create'. -context { - endpoint = "http://192.168.1.101:9425" -} -`) - -var defaultJsonVolumeSpec = strings.TrimSpace(` -{ - "id": "ebs_prod_db1", - "namespace": "default", - "name": "database", - "type": "csi", - "plugin_id": "plugin_id", - "external_id": "vol-23452345", - "snapshot_id": "snap-12345", - "capacity_min": "10GiB", - "capacity_max": "20G", - "capability": [ - { - "access_mode": "single-node-writer", - "attachment_mode": "file-system" - }, - { - "access_mode": "single-node-reader", - "attachment_mode": "block-device" - } - ], - "context": [ - { - "endpoint": "http://192.168.1.101:9425" - } - ], - "mount_options": [ - { - "fs_type": "ext4", - "mount_flags": [ - "ro" - ] - } - ], - "topology_request": { - "preferred": [ - { - "topology": { - "segments": { - "rack": "R1" - } - } - } - ], - "required": [ - { - "topology": { - "segments": { - "rack": "R1" - } - } - }, - { - "topology": { - "segments": { - "rack": "R2", - "zone": "us-east-1a" - } - } - } - ] - }, - "parameters": [ - { - "skuname": "Premium_LRS" - } - ], - "secrets": [ - { - "example_secret": "xyzzy" - } - ] -} -`) From 967addec48a16c523a98a8c83c9ad1b63d9cec32 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Tue, 17 Dec 2024 20:34:25 +0100 Subject: [PATCH 30/35] stateful deployments: add corrections to API structs and methods (#24700) This changeset includes changes accidentally left out from 24641. --- api/tasks.go | 1 + command/agent/job_endpoint.go | 1 + nomad/structs/diff_test.go | 6 ------ nomad/structs/volumes.go | 3 --- 4 files changed, 2 insertions(+), 9 deletions(-) diff --git a/api/tasks.go b/api/tasks.go index 21d99bf4c2c..b39c55ad56b 100644 --- a/api/tasks.go +++ b/api/tasks.go @@ -455,6 +455,7 @@ type VolumeRequest struct { Type string `hcl:"type,optional"` Source string `hcl:"source,optional"` ReadOnly bool `hcl:"read_only,optional"` + Sticky bool `hcl:"sticky,optional"` AccessMode string `hcl:"access_mode,optional"` AttachmentMode string `hcl:"attachment_mode,optional"` MountOptions *CSIMountOptions `hcl:"mount_options,block"` diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index fd0c0c03501..5ebe21e1eb3 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1334,6 +1334,7 @@ func ApiTgToStructsTG(job *structs.Job, taskGroup *api.TaskGroup, tg *structs.Ta Name: v.Name, Type: v.Type, ReadOnly: v.ReadOnly, + Sticky: v.Sticky, Source: v.Source, AttachmentMode: structs.CSIVolumeAttachmentMode(v.AttachmentMode), AccessMode: structs.CSIVolumeAccessMode(v.AccessMode), diff --git a/nomad/structs/diff_test.go b/nomad/structs/diff_test.go index 45563f23573..e394545ad1d 100644 --- a/nomad/structs/diff_test.go +++ b/nomad/structs/diff_test.go @@ -8376,12 +8376,6 @@ func TestTaskDiff(t *testing.T) { Old: "", New: "Z", }, - { - Type: DiffTypeAdded, - Name: "Sticky", - Old: "", - New: "false", - }, { Type: DiffTypeAdded, Name: "Volume", diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index b8c95fc2862..18d5f39289e 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -263,7 +263,6 @@ type VolumeMount struct { Volume string Destination string ReadOnly bool - Sticky bool PropagationMode string SELinuxLabel string } @@ -284,8 +283,6 @@ func (v *VolumeMount) Equal(o *VolumeMount) bool { return false case v.ReadOnly != o.ReadOnly: return false - case v.Sticky != o.Sticky: - return false case v.PropagationMode != o.PropagationMode: return false case v.SELinuxLabel != o.SELinuxLabel: From 2adf6d520870e78bbc68f26869c1a397ff4e59fa Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 18 Dec 2024 08:44:08 -0500 Subject: [PATCH 31/35] dynamic host volumes: remove multi-node access modes (#24705) CSI volumes support multi-node access patterns on the same volume ID, but dynamic host volumes by nature do not. The underlying volume may actually be multi-node (ex. NFS), but Nomad is ignorant of this. Remove the CSI-specific multi-node access modes and instead include the single-node access modes intended that are currently in the alpha edition of the CSI spec but which are better suited for DHV. This PR has been extracted from #24684 to keep reviews manageable. Ref: https://github.com/hashicorp/nomad/pull/24479 Ref: https://github.com/hashicorp/nomad/pull/24684 --- nomad/structs/host_volumes.go | 15 ++++++--------- nomad/structs/host_volumes_test.go | 6 +++--- nomad/structs/volumes.go | 18 ++++++++++++------ nomad/structs/volumes_test.go | 4 ++-- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go index c254bf72902..440ad956512 100644 --- a/nomad/structs/host_volumes.go +++ b/nomad/structs/host_volumes.go @@ -275,9 +275,8 @@ func (hvc *HostVolumeCapability) Validate() error { switch hvc.AccessMode { case HostVolumeAccessModeSingleNodeReader, HostVolumeAccessModeSingleNodeWriter, - HostVolumeAccessModeMultiNodeReader, - HostVolumeAccessModeMultiNodeSingleWriter, - HostVolumeAccessModeMultiNodeMultiWriter: + HostVolumeAccessModeSingleNodeSingleWriter, + HostVolumeAccessModeSingleNodeMultiWriter: default: return fmt.Errorf("invalid access mode: %q", hvc.AccessMode) } @@ -302,12 +301,10 @@ type HostVolumeAccessMode string const ( HostVolumeAccessModeUnknown HostVolumeAccessMode = "" - HostVolumeAccessModeSingleNodeReader HostVolumeAccessMode = "single-node-reader-only" - HostVolumeAccessModeSingleNodeWriter HostVolumeAccessMode = "single-node-writer" - - HostVolumeAccessModeMultiNodeReader HostVolumeAccessMode = "multi-node-reader-only" - HostVolumeAccessModeMultiNodeSingleWriter HostVolumeAccessMode = "multi-node-single-writer" - HostVolumeAccessModeMultiNodeMultiWriter HostVolumeAccessMode = "multi-node-multi-writer" + HostVolumeAccessModeSingleNodeReader HostVolumeAccessMode = "single-node-reader-only" + HostVolumeAccessModeSingleNodeWriter HostVolumeAccessMode = "single-node-writer" + HostVolumeAccessModeSingleNodeSingleWriter HostVolumeAccessMode = "single-node-single-writer" + HostVolumeAccessModeSingleNodeMultiWriter HostVolumeAccessMode = "single-node-multi-writer" ) // HostVolumeStub is used for responses for the list volumes endpoint diff --git a/nomad/structs/host_volumes_test.go b/nomad/structs/host_volumes_test.go index 499bc27d1c8..2a03e838daf 100644 --- a/nomad/structs/host_volumes_test.go +++ b/nomad/structs/host_volumes_test.go @@ -45,7 +45,7 @@ func TestHostVolume_Copy(t *testing.T) { out.Constraints[0].LTarget = "${meta.node_class}" out.RequestedCapabilities = append(out.RequestedCapabilities, &HostVolumeCapability{ AttachmentMode: HostVolumeAttachmentModeBlockDevice, - AccessMode: HostVolumeAccessModeMultiNodeReader, + AccessMode: HostVolumeAccessModeSingleNodeMultiWriter, }) out.Parameters["foo"] = "baz" @@ -195,7 +195,7 @@ func TestHostVolume_CanonicalizeForUpdate(t *testing.T) { RequestedCapacityMaxBytes: 500000, RequestedCapabilities: []*HostVolumeCapability{{ AttachmentMode: HostVolumeAttachmentModeFilesystem, - AccessMode: HostVolumeAccessModeMultiNodeMultiWriter, + AccessMode: HostVolumeAccessModeSingleNodeMultiWriter, }}, } existing := &HostVolume{ @@ -240,7 +240,7 @@ func TestHostVolume_CanonicalizeForUpdate(t *testing.T) { must.Eq(t, []*HostVolumeCapability{{ AttachmentMode: HostVolumeAttachmentModeFilesystem, - AccessMode: HostVolumeAccessModeMultiNodeMultiWriter, + AccessMode: HostVolumeAccessModeSingleNodeMultiWriter, }}, vol.RequestedCapabilities) must.Eq(t, "/var/nomad/alloc_mounts/82f357d6.ext4", vol.HostPath) diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index 18d5f39289e..58c4eefacd3 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -170,16 +170,22 @@ func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) e switch v.Type { case VolumeTypeHost: - if v.AttachmentMode != CSIVolumeAttachmentModeUnknown { - addErr("host volumes cannot have an attachment mode") - } - if v.AccessMode != CSIVolumeAccessModeUnknown { - addErr("host volumes cannot have an access mode") - } if v.MountOptions != nil { + // TODO(1.10.0): support mount options for dynamic host volumes addErr("host volumes cannot have mount options") } + switch v.AccessMode { + case CSIVolumeAccessModeSingleNodeReader, CSIVolumeAccessModeMultiNodeReader: + if !v.ReadOnly { + addErr("%s volumes must be read-only", v.AccessMode) + } + default: + // dynamic host volumes are all "per node" so there's no way to + // validate that other access modes work for a given volume until we + // have access to other allocations (in the scheduler) + } + case VolumeTypeCSI: switch v.AttachmentMode { diff --git a/nomad/structs/volumes_test.go b/nomad/structs/volumes_test.go index 58585932d7c..9b697faf18c 100644 --- a/nomad/structs/volumes_test.go +++ b/nomad/structs/volumes_test.go @@ -31,9 +31,9 @@ func TestVolumeRequest_Validate(t *testing.T) { { name: "host volume with CSI volume config", expected: []string{ - "host volumes cannot have an access mode", - "host volumes cannot have an attachment mode", + "volume has an empty source", "host volumes cannot have mount options", + "single-node-reader-only volumes must be read-only", "volume cannot be per_alloc for system or sysbatch jobs", "volume cannot be per_alloc when canaries are in use", }, From 8cbb74786c3d3fbce29ddb44456465ee290cb79b Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Wed, 18 Dec 2024 19:52:07 +0100 Subject: [PATCH 32/35] stateful deployments: find feasible node for sticky host volumes (#24558) This changeset implements node feasibility checks for sticky host volumes. --- api/allocations.go | 2 + nomad/structs/structs.go | 7 ++ scheduler/feasible.go | 28 ++++++-- scheduler/feasible_test.go | 113 ++++++++++++++++++++++++++++++- scheduler/generic_sched.go | 48 +++++++++++++ scheduler/generic_sched_test.go | 115 ++++++++++++++++++++++++++++++++ scheduler/scheduler.go | 3 + scheduler/stack.go | 13 ++-- 8 files changed, 314 insertions(+), 15 deletions(-) diff --git a/api/allocations.go b/api/allocations.go index b35e338c559..bf8059d32c2 100644 --- a/api/allocations.go +++ b/api/allocations.go @@ -278,6 +278,8 @@ type Allocation struct { Resources *Resources TaskResources map[string]*Resources AllocatedResources *AllocatedResources + HostVolumeIDs []string + CSIVolumeIDs []string Services map[string]string Metrics *AllocationMetric DesiredStatus string diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index f3d0f5dc1b0..e55089f3e12 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -11114,6 +11114,13 @@ type Allocation struct { // AllocatedResources is the total resources allocated for the task group. AllocatedResources *AllocatedResources + // HostVolumeIDs is a list of host volume IDs that this allocation + // has claimed. + HostVolumeIDs []string + + // CSIVolumeIDs is a list of CSI volume IDs that this allocation has claimed. + CSIVolumeIDs []string + // Metrics associated with this allocation Metrics *AllocMetric diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 60442f92e7f..69ab03de7c3 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -8,6 +8,7 @@ import ( "fmt" "reflect" "regexp" + "slices" "strconv" "strings" @@ -137,23 +138,28 @@ func NewRandomIterator(ctx Context, nodes []*structs.Node) *StaticIterator { // HostVolumeChecker is a FeasibilityChecker which returns whether a node has // the host volumes necessary to schedule a task group. type HostVolumeChecker struct { - ctx Context - volumeReqs []*structs.VolumeRequest - namespace string + ctx Context + volumeReqs []*structs.VolumeRequest + hostVolumeIDs []string + namespace string } // NewHostVolumeChecker creates a HostVolumeChecker from a set of volumes func NewHostVolumeChecker(ctx Context) *HostVolumeChecker { return &HostVolumeChecker{ - ctx: ctx, - volumeReqs: []*structs.VolumeRequest{}, + ctx: ctx, + volumeReqs: []*structs.VolumeRequest{}, + hostVolumeIDs: []string{}, } } // SetVolumes takes the volumes required by a task group and updates the checker. -func (h *HostVolumeChecker) SetVolumes(allocName string, ns string, volumes map[string]*structs.VolumeRequest) { +func (h *HostVolumeChecker) SetVolumes( + allocName, ns string, volumes map[string]*structs.VolumeRequest, allocHostVolumeIDs []string, +) { h.namespace = ns h.volumeReqs = []*structs.VolumeRequest{} + h.hostVolumeIDs = allocHostVolumeIDs for _, req := range volumes { if req.Type != structs.VolumeTypeHost { continue // filter CSI volumes @@ -181,7 +187,6 @@ func (h *HostVolumeChecker) Feasible(candidate *structs.Node) bool { } func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool { - // Fast path: Requested no volumes. No need to check further. if len(h.volumeReqs) == 0 { return true @@ -216,6 +221,15 @@ func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool { if !capOk { return false } + + if req.Sticky { + if slices.Contains(h.hostVolumeIDs, vol.ID) || len(h.hostVolumeIDs) == 0 { + return true + } + + return false + } + } else if !req.ReadOnly { // this is a static host volume and can only be mounted ReadOnly, // validate that no requests for it are ReadWrite. diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index 9c5a9aaf1a7..3351210c2ee 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -177,7 +177,7 @@ func TestHostVolumeChecker(t *testing.T) { alloc.NodeID = nodes[2].ID for i, c := range cases { - checker.SetVolumes(alloc.Name, structs.DefaultNamespace, c.RequestedVolumes) + checker.SetVolumes(alloc.Name, structs.DefaultNamespace, c.RequestedVolumes, alloc.HostVolumeIDs) if act := checker.Feasible(c.Node); act != c.Result { t.Fatalf("case(%d) failed: got %v; want %v", i, act, c.Result) } @@ -359,7 +359,116 @@ func TestHostVolumeChecker_ReadOnly(t *testing.T) { for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - checker.SetVolumes(alloc.Name, structs.DefaultNamespace, tc.requestedVolumes) + checker.SetVolumes(alloc.Name, structs.DefaultNamespace, tc.requestedVolumes, alloc.HostVolumeIDs) + actual := checker.Feasible(tc.node) + must.Eq(t, tc.expect, actual) + }) + } +} + +func TestHostVolumeChecker_Sticky(t *testing.T) { + ci.Parallel(t) + + store, ctx := testContext(t) + + nodes := []*structs.Node{ + mock.Node(), + mock.Node(), + } + + hostVolCapsReadWrite := []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeReader, + }, + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + } + + dhv := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Name: "foo", + NodeID: nodes[1].ID, + RequestedCapabilities: hostVolCapsReadWrite, + State: structs.HostVolumeStateReady, + } + + nodes[0].HostVolumes = map[string]*structs.ClientHostVolumeConfig{} + nodes[1].HostVolumes = map[string]*structs.ClientHostVolumeConfig{"foo": {ID: dhv.ID}} + + for _, node := range nodes { + must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node)) + } + must.NoError(t, store.UpsertHostVolume(1000, dhv)) + + stickyRequest := map[string]*structs.VolumeRequest{ + "foo": { + Type: "host", + Source: "foo", + Sticky: true, + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + }, + } + + checker := NewHostVolumeChecker(ctx) + + // alloc0 wants a previously registered volume ID that's available on node1 + alloc0 := mock.Alloc() + alloc0.NodeID = nodes[1].ID + alloc0.HostVolumeIDs = []string{dhv.ID} + + // alloc1 wants a volume ID that's available on node1 but hasn't used it + // before + alloc1 := mock.Alloc() + alloc1.NodeID = nodes[1].ID + + // alloc2 wants a volume ID that's unrelated + alloc2 := mock.Alloc() + alloc2.NodeID = nodes[1].ID + alloc2.HostVolumeIDs = []string{uuid.Generate()} + + // insert all the allocs into the state + must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, 1000, []*structs.Allocation{alloc0, alloc1, alloc2})) + + cases := []struct { + name string + node *structs.Node + alloc *structs.Allocation + expect bool + }{ + { + "alloc asking for a sticky volume on an infeasible node", + nodes[0], + alloc0, + false, + }, + { + "alloc asking for a sticky volume on a feasible node", + nodes[1], + alloc0, + true, + }, + { + "alloc asking for a sticky volume on a feasible node for the first time", + nodes[1], + alloc1, + true, + }, + { + "alloc asking for an unrelated volume", + nodes[1], + alloc2, + false, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + checker.SetVolumes(tc.alloc.Name, structs.DefaultNamespace, stickyRequest, tc.alloc.HostVolumeIDs) actual := checker.Feasible(tc.node) must.Eq(t, tc.expect, actual) }) diff --git a/scheduler/generic_sched.go b/scheduler/generic_sched.go index f9fd669e592..60b4f7f1eed 100644 --- a/scheduler/generic_sched.go +++ b/scheduler/generic_sched.go @@ -6,6 +6,7 @@ package scheduler import ( "fmt" "runtime/debug" + "slices" "sort" "time" @@ -657,6 +658,18 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul "old_alloc_name", oldAllocName, "new_alloc_name", newAllocName) } + // Are there sticky volumes requested by the task group for the first time? If + // yes, make sure the allocation stores their IDs for future reschedules. + var newHostVolumeIDs []string + for _, v := range tg.Volumes { + if v.Sticky { + if missing.PreviousAllocation() != nil && len(missing.PreviousAllocation().HostVolumeIDs) > 0 { + continue + } + newHostVolumeIDs = append(newHostVolumeIDs, option.Node.HostVolumes[v.Source].ID) + } + } + // Create an allocation for this alloc := &structs.Allocation{ ID: uuid.Generate(), @@ -681,6 +694,10 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul }, } + if len(newHostVolumeIDs) > 0 { + alloc.HostVolumeIDs = newHostVolumeIDs + } + // If the new allocation is replacing an older allocation then we // set the record the older allocation id so that they are chained if prevAllocation != nil { @@ -689,6 +706,10 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul updateRescheduleTracker(alloc, prevAllocation, now) } + if len(prevAllocation.HostVolumeIDs) > 0 { + alloc.HostVolumeIDs = prevAllocation.HostVolumeIDs + } + // If the allocation has task handles, // copy them to the new allocation propagateTaskState(alloc, prevAllocation, missing.PreviousLost()) @@ -838,6 +859,10 @@ func getSelectOptions(prevAllocation *structs.Allocation, preferredNode *structs } } selectOptions.PenaltyNodeIDs = penaltyNodes + + if prevAllocation.HostVolumeIDs != nil { + selectOptions.AllocationHostVolumeIDs = prevAllocation.HostVolumeIDs + } } if preferredNode != nil { selectOptions.PreferredNodes = []*structs.Node{preferredNode} @@ -910,6 +935,29 @@ func (s *GenericScheduler) findPreferredNode(place placementResult) (*structs.No return preferredNode, nil } } + + for _, vol := range place.TaskGroup().Volumes { + if !vol.Sticky { + continue + } + + var preferredNode *structs.Node + preferredNode, err := s.state.NodeByID(nil, prev.NodeID) + if err != nil { + return nil, err + } + + if preferredNode != nil && preferredNode.Ready() { + // if this node has at least one of the allocation volumes, it's a + // preferred one + for _, vol := range preferredNode.HostVolumes { + if slices.Contains(prev.HostVolumeIDs, vol.ID) { + return preferredNode, nil + } + } + } + } + return nil, nil } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index adda5e2cb2a..5d471423136 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -218,6 +218,121 @@ func TestServiceSched_JobRegister_StickyAllocs(t *testing.T) { } } +func TestServiceSched_JobRegister_StickyVolumes(t *testing.T) { + ci.Parallel(t) + + h := NewHarness(t) + + nodes := []*structs.Node{ + mock.Node(), + mock.Node(), + } + + hostVolCapsReadWrite := []*structs.HostVolumeCapability{ + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeReader, + }, + { + AttachmentMode: structs.HostVolumeAttachmentModeFilesystem, + AccessMode: structs.HostVolumeAccessModeSingleNodeWriter, + }, + } + + dhv := &structs.HostVolume{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Name: "foo", + NodeID: nodes[1].ID, + RequestedCapabilities: hostVolCapsReadWrite, + State: structs.HostVolumeStateReady, + } + + nodes[0].HostVolumes = map[string]*structs.ClientHostVolumeConfig{} + nodes[1].HostVolumes = map[string]*structs.ClientHostVolumeConfig{"foo": {ID: dhv.ID}} + + for _, node := range nodes { + must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, 1000, node)) + } + must.NoError(t, h.State.UpsertHostVolume(1000, dhv)) + + stickyRequest := map[string]*structs.VolumeRequest{ + "foo": { + Type: "host", + Source: "foo", + Sticky: true, + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + }, + } + + // Create a job + job := mock.Job() + job.TaskGroups[0].Volumes = stickyRequest + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job)) + + // Create a mock evaluation to register the job + eval := &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: job.Priority, + TriggeredBy: structs.EvalTriggerJobRegister, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval})) + + // Process the evaluation + must.NoError(t, h.Process(NewServiceScheduler, eval)) + + // Ensure the plan allocated + plan := h.Plans[0] + planned := make(map[string]*structs.Allocation) + for _, allocList := range plan.NodeAllocation { + for _, alloc := range allocList { + planned[alloc.ID] = alloc + } + } + must.MapLen(t, 10, planned) + + // Ensure that the allocations got the host volume ID added + for _, p := range planned { + must.Eq(t, p.PreviousAllocation, "") + must.Eq(t, p.HostVolumeIDs[0], dhv.ID) + } + + // Update the job to force a rolling upgrade + updated := job.Copy() + updated.TaskGroups[0].Tasks[0].Resources.CPU += 10 + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, updated)) + + // Create a mock evaluation to handle the update + eval = &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: job.Priority, + TriggeredBy: structs.EvalTriggerNodeUpdate, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval})) + must.NoError(t, h.Process(NewServiceScheduler, eval)) + + // Ensure we have created only one new allocation + must.SliceLen(t, 2, h.Plans) + plan = h.Plans[0] + var newPlanned []*structs.Allocation + for _, allocList := range plan.NodeAllocation { + newPlanned = append(newPlanned, allocList...) + } + must.SliceLen(t, 10, newPlanned) + + // Ensure that the new allocations retain the host volume ID + for _, new := range newPlanned { + must.Eq(t, new.HostVolumeIDs[0], dhv.ID) + } +} + func TestServiceSched_JobRegister_DiskConstraints(t *testing.T) { ci.Parallel(t) diff --git a/scheduler/scheduler.go b/scheduler/scheduler.go index 9d46edf8801..27f87e79745 100644 --- a/scheduler/scheduler.go +++ b/scheduler/scheduler.go @@ -118,8 +118,11 @@ type State interface { // CSIVolumeByID fetch CSI volumes, containing controller jobs CSIVolumesByNodeID(memdb.WatchSet, string, string) (memdb.ResultIterator, error) + // HostVolumeByID fetches host volume by its ID HostVolumeByID(memdb.WatchSet, string, string, bool) (*structs.HostVolume, error) + // HostVolumesByNodeID gets an iterator with all the volumes attached to a + // given node HostVolumesByNodeID(memdb.WatchSet, string, state.SortOption) (memdb.ResultIterator, error) // LatestIndex returns the greatest index value for all indexes. diff --git a/scheduler/stack.go b/scheduler/stack.go index 1f2b6586886..f978c753f68 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -35,10 +35,11 @@ type Stack interface { } type SelectOptions struct { - PenaltyNodeIDs map[string]struct{} - PreferredNodes []*structs.Node - Preempt bool - AllocName string + PenaltyNodeIDs map[string]struct{} + PreferredNodes []*structs.Node + Preempt bool + AllocName string + AllocationHostVolumeIDs []string } // GenericStack is the Stack used for the Generic scheduler. It is @@ -156,7 +157,7 @@ func (s *GenericStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ra s.taskGroupDrivers.SetDrivers(tgConstr.drivers) s.taskGroupConstraint.SetConstraints(tgConstr.constraints) s.taskGroupDevices.SetTaskGroup(tg) - s.taskGroupHostVolumes.SetVolumes(options.AllocName, s.jobNamespace, tg.Volumes) + s.taskGroupHostVolumes.SetVolumes(options.AllocName, s.jobNamespace, tg.Volumes, options.AllocationHostVolumeIDs) s.taskGroupCSIVolumes.SetVolumes(options.AllocName, tg.Volumes) if len(tg.Networks) > 0 { s.taskGroupNetwork.SetNetwork(tg.Networks[0]) @@ -349,7 +350,7 @@ func (s *SystemStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ran s.taskGroupDrivers.SetDrivers(tgConstr.drivers) s.taskGroupConstraint.SetConstraints(tgConstr.constraints) s.taskGroupDevices.SetTaskGroup(tg) - s.taskGroupHostVolumes.SetVolumes(options.AllocName, s.jobNamespace, tg.Volumes) + s.taskGroupHostVolumes.SetVolumes(options.AllocName, s.jobNamespace, tg.Volumes, options.AllocationHostVolumeIDs) s.taskGroupCSIVolumes.SetVolumes(options.AllocName, tg.Volumes) if len(tg.Networks) > 0 { s.taskGroupNetwork.SetNetwork(tg.Networks[0]) From af967184a675e89ff7cec209c0a94df40773d34d Mon Sep 17 00:00:00 2001 From: Daniel Bennett Date: Wed, 18 Dec 2024 17:01:14 -0500 Subject: [PATCH 33/35] dynamic host volumes: tweak plugin fingerprint (#24711) Instead of a plugin `version` subcommand that responds with a string (established in #24497), respond to a `fingerprint` command with a data structure that we may extend in the future (such as plugin capabilities, like size constraint support?). In the immediate term, it's still just the version: `{"version": "0.0.1"}` In addition to leaving the door open for future expansion, I think it will also avoid false positives detecting executables that just happen to respond to a `version` command. This also reverses the ordering of the fingerprint string parts from `plugins.host_volume.version.mkdir` (which aligned with CNI) to `plugins.host_volume.mkdir.version` (makes more sense to me) --- client/fingerprint/dynamic_host_volumes.go | 8 +-- .../fingerprint/dynamic_host_volumes_test.go | 16 +++--- .../hostvolumemanager/host_volume_plugin.go | 28 ++++++----- .../host_volume_plugin_test.go | 12 ++--- .../test_fixtures/test_plugin.sh | 4 +- demo/hostvolume/_test-plugin.sh | 49 ++++++++++++------- demo/hostvolume/example-plugin-mkfs | 17 ++++--- nomad/host_volume_endpoint.go | 2 +- nomad/host_volume_endpoint_test.go | 10 ++-- 9 files changed, 84 insertions(+), 62 deletions(-) diff --git a/client/fingerprint/dynamic_host_volumes.go b/client/fingerprint/dynamic_host_volumes.go index e15a3a8f0c6..73cdd4fe4a2 100644 --- a/client/fingerprint/dynamic_host_volumes.go +++ b/client/fingerprint/dynamic_host_volumes.go @@ -35,7 +35,7 @@ func (h *DynamicHostVolumePluginFingerprint) Fingerprint(request *FingerprintReq // always add "mkdir" plugin h.logger.Debug("detected plugin built-in", "plugin_id", hvm.HostVolumePluginMkdirID, "version", hvm.HostVolumePluginMkdirVersion) - defer response.AddAttribute("plugins.host_volume.version."+hvm.HostVolumePluginMkdirID, hvm.HostVolumePluginMkdirVersion) + defer response.AddAttribute("plugins.host_volume."+hvm.HostVolumePluginMkdirID+".version", hvm.HostVolumePluginMkdirVersion) response.Detected = true // this config value will be empty in -dev mode @@ -64,7 +64,7 @@ func (h *DynamicHostVolumePluginFingerprint) Fingerprint(request *FingerprintReq // set the attribute(s) for plugin, version := range plugins { h.logger.Debug("detected plugin", "plugin_id", plugin, "version", version) - response.AddAttribute("plugins.host_volume.version."+plugin, version) + response.AddAttribute("plugins.host_volume."+plugin+".version", version) } return nil @@ -103,14 +103,14 @@ func GetHostVolumePluginVersions(log hclog.Logger, pluginDir string) (map[string return } - version, err := p.Version(ctx) + fprint, err := p.Fingerprint(ctx) if err != nil { log.Debug("failed to get version from plugin", "error", err) return } mut.Lock() - plugins[file] = version.String() + plugins[file] = fprint.Version.String() mut.Unlock() }(file, fullPath) } diff --git a/client/fingerprint/dynamic_host_volumes_test.go b/client/fingerprint/dynamic_host_volumes_test.go index 4be9d69d305..28b331bcfc1 100644 --- a/client/fingerprint/dynamic_host_volumes_test.go +++ b/client/fingerprint/dynamic_host_volumes_test.go @@ -53,10 +53,10 @@ func TestPluginsHostVolumeFingerprint(t *testing.T) { perm os.FileMode }{ // only this first one should be detected as a valid plugin - {"happy-plugin", "#!/usr/bin/env sh\necho '0.0.1'", 0700}, - {"not-a-plugin", "#!/usr/bin/env sh\necho 'not-a-version'", 0700}, - {"unhappy-plugin", "#!/usr/bin/env sh\necho '0.0.2'; exit 1", 0700}, - {"not-executable", "hello", 0400}, + {"happy-plugin", "#!/usr/bin/env sh\necho '{\"version\": \"0.0.1\"}'", 0700}, + {"not-a-plugin", "#!/usr/bin/env sh\necho 'not a version'", 0700}, + {"unhappy-plugin", "#!/usr/bin/env sh\necho 'sad plugin is sad'; exit 1", 0700}, + {"not-executable", "do not execute me", 0400}, } for _, f := range files { must.NoError(t, os.WriteFile(filepath.Join(tmp, f.name), []byte(f.contents), f.perm)) @@ -69,8 +69,8 @@ func TestPluginsHostVolumeFingerprint(t *testing.T) { err := fp.Fingerprint(req, &resp) must.NoError(t, err) must.Eq(t, map[string]string{ - "plugins.host_volume.version.happy-plugin": "0.0.1", - "plugins.host_volume.version.mkdir": hvm.HostVolumePluginMkdirVersion, // built-in + "plugins.host_volume.mkdir.version": hvm.HostVolumePluginMkdirVersion, // built-in + "plugins.host_volume.happy-plugin.version": "0.0.1", }, resp.Attributes) // do it again after deleting our one good plugin. @@ -82,8 +82,8 @@ func TestPluginsHostVolumeFingerprint(t *testing.T) { err = fp.Fingerprint(req, &resp) must.NoError(t, err) must.Eq(t, map[string]string{ - "plugins.host_volume.version.happy-plugin": "", // empty value means removed + "plugins.host_volume.happy-plugin.version": "", // empty value means removed - "plugins.host_volume.version.mkdir": hvm.HostVolumePluginMkdirVersion, // built-in + "plugins.host_volume.mkdir.version": hvm.HostVolumePluginMkdirVersion, // built-in }, resp.Attributes) } diff --git a/client/hostvolumemanager/host_volume_plugin.go b/client/hostvolumemanager/host_volume_plugin.go index 7da5baf66d0..961466b1223 100644 --- a/client/hostvolumemanager/host_volume_plugin.go +++ b/client/hostvolumemanager/host_volume_plugin.go @@ -12,7 +12,6 @@ import ( "os" "os/exec" "path/filepath" - "strings" "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-multierror" @@ -21,8 +20,12 @@ import ( "github.com/hashicorp/nomad/helper" ) +type PluginFingerprint struct { + Version *version.Version `json:"version"` +} + type HostVolumePlugin interface { - Version(ctx context.Context) (*version.Version, error) + Fingerprint(ctx context.Context) (*PluginFingerprint, error) Create(ctx context.Context, req *cstructs.ClientHostVolumeCreateRequest) (*HostVolumePluginCreateResponse, error) Delete(ctx context.Context, req *cstructs.ClientHostVolumeDeleteRequest) error // db TODO(1.10.0): update? resize? ?? @@ -45,8 +48,11 @@ type HostVolumePluginMkdir struct { log hclog.Logger } -func (p *HostVolumePluginMkdir) Version(_ context.Context) (*version.Version, error) { - return version.NewVersion(HostVolumePluginMkdirVersion) +func (p *HostVolumePluginMkdir) Fingerprint(_ context.Context) (*PluginFingerprint, error) { + v, err := version.NewVersion(HostVolumePluginMkdirVersion) + return &PluginFingerprint{ + Version: v, + }, err } func (p *HostVolumePluginMkdir) Create(_ context.Context, @@ -134,9 +140,9 @@ type HostVolumePluginExternal struct { log hclog.Logger } -func (p *HostVolumePluginExternal) Version(ctx context.Context) (*version.Version, error) { - cmd := exec.CommandContext(ctx, p.Executable, "version") - cmd.Env = []string{"OPERATION=version"} +func (p *HostVolumePluginExternal) Fingerprint(ctx context.Context) (*PluginFingerprint, error) { + cmd := exec.CommandContext(ctx, p.Executable, "fingerprint") + cmd.Env = []string{"OPERATION=fingerprint"} stdout, stderr, err := runCommand(cmd) if err != nil { p.log.Debug("error with plugin", @@ -146,11 +152,11 @@ func (p *HostVolumePluginExternal) Version(ctx context.Context) (*version.Versio "error", err) return nil, fmt.Errorf("error getting version from plugin %q: %w", p.ID, err) } - v, err := version.NewVersion(strings.TrimSpace(string(stdout))) - if err != nil { - return nil, fmt.Errorf("error with version from plugin: %w", err) + fprint := &PluginFingerprint{} + if err := json.Unmarshal(stdout, fprint); err != nil { + return nil, fmt.Errorf("error parsing fingerprint output as json: %w", err) } - return v, nil + return fprint, nil } func (p *HostVolumePluginExternal) Create(ctx context.Context, diff --git a/client/hostvolumemanager/host_volume_plugin_test.go b/client/hostvolumemanager/host_volume_plugin_test.go index 18de2e1f381..0552810bb84 100644 --- a/client/hostvolumemanager/host_volume_plugin_test.go +++ b/client/hostvolumemanager/host_volume_plugin_test.go @@ -33,7 +33,7 @@ func TestHostVolumePluginMkdir(t *testing.T) { // contexts don't matter here, since they're thrown away by this plugin, // but sending timeout contexts anyway, in case the plugin changes later. - _, err := plug.Version(timeout(t)) + _, err := plug.Fingerprint(timeout(t)) must.NoError(t, err) t.Run("happy", func(t *testing.T) { @@ -97,9 +97,9 @@ func TestHostVolumePluginExternal(t *testing.T) { log: log, } - v, err := plug.Version(timeout(t)) + v, err := plug.Fingerprint(timeout(t)) must.NoError(t, err) - must.Eq(t, expectVersion, v) + must.Eq(t, expectVersion, v.Version) resp, err := plug.Create(timeout(t), &cstructs.ClientHostVolumeCreateRequest{ @@ -147,12 +147,12 @@ func TestHostVolumePluginExternal(t *testing.T) { log: log, } - v, err := plug.Version(timeout(t)) + v, err := plug.Fingerprint(timeout(t)) must.EqError(t, err, `error getting version from plugin "test-external-plugin-sad": exit status 1`) must.Nil(t, v) logged := getLogs() - must.StrContains(t, logged, "version: sad plugin is sad") - must.StrContains(t, logged, "version: it tells you all about it in stderr") + must.StrContains(t, logged, "fingerprint: sad plugin is sad") + must.StrContains(t, logged, "fingerprint: it tells you all about it in stderr") // reset logger log, getLogs = logRecorder(t) diff --git a/client/hostvolumemanager/test_fixtures/test_plugin.sh b/client/hostvolumemanager/test_fixtures/test_plugin.sh index b60229fd34d..e93e37bbc76 100755 --- a/client/hostvolumemanager/test_fixtures/test_plugin.sh +++ b/client/hostvolumemanager/test_fixtures/test_plugin.sh @@ -12,6 +12,8 @@ test "$1" == "$OPERATION" echo 'all operations should ignore stderr' 1>&2 case $1 in + fingerprint) + echo '{"version": "0.0.2"}' ;; create) test "$2" == "$HOST_PATH" test "$NODE_ID" == 'test-node' @@ -26,8 +28,6 @@ case $1 in test "$NODE_ID" == 'test-node' test "$PARAMETERS" == '{"key":"val"}' rm -rfv "$2" ;; - version) - echo '0.0.2' ;; *) echo "unknown operation $1" exit 1 ;; diff --git a/demo/hostvolume/_test-plugin.sh b/demo/hostvolume/_test-plugin.sh index 5ccd1f28a72..864680e237d 100755 --- a/demo/hostvolume/_test-plugin.sh +++ b/demo/hostvolume/_test-plugin.sh @@ -4,35 +4,48 @@ set -euo pipefail -if [[ $# -eq 0 || "$*" =~ -h ]]; then +help() { cat < + $0 [target dir] [uuid] -Operations: - create, delete, version - any other operation will be passed to the plugin +Args: + plugin: path to plugin executable + operation: fingerprint, create, or delete + create and delete must be idempotent. + any other operation will be passed into the plugin, + to see how it handles invalid operations. + target dir: directory to create the volume (defaults to /tmp) + uuid: volume id to use (usually assigned by Nomad; + defaults to 74564d17-ce50-0bc1-48e5-6feaa41ede48) -Environment variables: - PLUGIN: executable to run (default ./example-host-volume) - TARGET_DIR: path to place the mount dir (default /tmp, - usually {nomad data dir}/alloc_mounts) +Examples: + $0 ./example-plugin-mkfs fingerprint + $0 ./example-plugin-mkfs create + $0 ./example-plugin-mkfs create /some/other/place + $0 ./example-plugin-mkfs delete EOF +} + +if [[ $# -eq 0 || "$*" =~ -h ]]; then + help exit fi +if [ $# -lt 2 ]; then + help + exit 1 +fi -op="$1" -shift - -plugin="${PLUGIN:-./example-host-volume}" -alloc_mounts="${TARGET_DIR:-/tmp}" -uuid='74564d17-ce50-0bc1-48e5-6feaa41ede48' +plugin="$1" +op="$2" +alloc_mounts="${3:-/tmp}" +uuid="${4:-74564d17-ce50-0bc1-48e5-6feaa41ede48}" case $op in - version) - args='version' + fingerprint) + args='fingerprint' ;; create) @@ -59,4 +72,4 @@ esac export OPERATION="$op" set -x -eval "$plugin $* $args" +eval "$plugin $args" diff --git a/demo/hostvolume/example-plugin-mkfs b/demo/hostvolume/example-plugin-mkfs index 918f97748fb..5bfaa4e47fa 100755 --- a/demo/hostvolume/example-plugin-mkfs +++ b/demo/hostvolume/example-plugin-mkfs @@ -6,6 +6,11 @@ set -euo pipefail +version='0.0.1' +fingerprint() { + printf '{"version": "%s"}' "$version" +} + help() { cat < [path] + $(basename "$0") [options] [path] Options: -v|--verbose: Show shell commands (set -x) @@ -25,21 +30,19 @@ Operations: required environment: CAPACITY_MIN_BYTES delete: Unmounts and deletes the device at path (required) - version: Outputs this plugin's version + version: Outputs this plugin's version: $version + fingerprint: Outputs plugin metadata: $(fingerprint) EOF } -version() { - echo "0.0.1" -} - # parse args [ $# -eq 0 ] && { help; exit 1; } for arg in "$@"; do case $arg in -h|-help|--help) help; exit 0 ;; - version|--version) version; exit 0 ;; + fingerprint|fingerprint) fingerprint; exit 0 ;; + version|version) echo "$version"; exit 0 ;; -v|--verbose) set -x; shift; ;; esac done diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index 8f085416826..e33b0e8a42b 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -485,7 +485,7 @@ func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.Hos semverCache: make(map[string]scheduler.VerConstraints), } constraints := []*structs.Constraint{{ - LTarget: fmt.Sprintf("${attr.plugins.host_volume.version.%s}", vol.PluginID), + LTarget: fmt.Sprintf("${attr.plugins.host_volume.%s.version}", vol.PluginID), Operand: "is_set", }} constraints = append(constraints, vol.Constraints...) diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go index 9ca0f94855b..e523ae29ae3 100644 --- a/nomad/host_volume_endpoint_test.go +++ b/nomad/host_volume_endpoint_test.go @@ -666,14 +666,14 @@ func TestHostVolumeEndpoint_placeVolume(t *testing.T) { node0, node1, node2, node3 := mock.Node(), mock.Node(), mock.Node(), mock.Node() node0.NodePool = structs.NodePoolDefault - node0.Attributes["plugins.host_volume.version.mkdir"] = "0.0.1" + node0.Attributes["plugins.host_volume.mkdir.version"] = "0.0.1" node1.NodePool = "dev" node1.Meta["rack"] = "r2" - node1.Attributes["plugins.host_volume.version.mkdir"] = "0.0.1" + node1.Attributes["plugins.host_volume.mkdir.version"] = "0.0.1" node2.NodePool = "prod" - node2.Attributes["plugins.host_volume.version.mkdir"] = "0.0.1" + node2.Attributes["plugins.host_volume.mkdir.version"] = "0.0.1" node3.NodePool = "prod" node3.Meta["rack"] = "r3" @@ -681,7 +681,7 @@ func TestHostVolumeEndpoint_placeVolume(t *testing.T) { Name: "example", Path: "/srv", }} - node3.Attributes["plugins.host_volume.version.mkdir"] = "0.0.1" + node3.Attributes["plugins.host_volume.mkdir.version"] = "0.0.1" must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node0)) must.NoError(t, store.UpsertNode(structs.MsgTypeTestSetup, 1000, node1)) @@ -785,7 +785,7 @@ func newMockHostVolumeClient(t *testing.T, srv *Server, pool string) (*mockHostV c1, cleanup := client.TestRPCOnlyClient(t, func(c *config.Config) { c.Node.NodePool = pool c.Node.Attributes["nomad.version"] = version.Version - c.Node.Attributes["plugins.host_volume.version.mkdir"] = "0.0.1" + c.Node.Attributes["plugins.host_volume.mkdir.version"] = "0.0.1" c.Node.Meta["rack"] = "r1" }, srv.config.RPCAddr, map[string]any{"HostVolume": mockClientEndpoint}) t.Cleanup(cleanup) From fea846189fb92493c221e1bfff87be4615dca785 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 19 Dec 2024 09:18:57 -0500 Subject: [PATCH 34/35] dynamic host volumes: account for other claims in capability check (#24684) When we feasibility check a dynamic host volume against a volume request, we check the attachment mode and access mode. This only ensures that the capabilities match, but doesn't enforce the semantics of the capabilities against other claims that may be made on the allocation. Add support for checking the requested capability against other allocations that the volume claimed. Ref: https://github.com/hashicorp/nomad/pull/24479 --- scheduler/feasible.go | 103 +++++++++++++++++++--- scheduler/feasible_test.go | 151 +++++++++++++++++++++++++++++++- scheduler/generic_sched_test.go | 2 +- 3 files changed, 241 insertions(+), 15 deletions(-) diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 69ab03de7c3..fa1800b2ae0 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -192,6 +192,11 @@ func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool { return true } + proposed, err := h.ctx.ProposedAllocs(n.ID) + if err != nil { + return false // only hit this on state store invariant failure + } + for _, req := range h.volumeReqs { volCfg, ok := n.HostVolumes[req.Source] if !ok { @@ -207,18 +212,12 @@ func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool { // raft entry completes return false } - if vol.State != structs.HostVolumeStateReady { - return false - } - var capOk bool - for _, cap := range vol.RequestedCapabilities { - if req.AccessMode == structs.CSIVolumeAccessMode(cap.AccessMode) && - req.AttachmentMode == structs.CSIVolumeAttachmentMode(cap.AttachmentMode) { - capOk = true - break - } - } - if !capOk { + if !h.hostVolumeIsAvailable(vol, + structs.HostVolumeAccessMode(req.AccessMode), + structs.HostVolumeAttachmentMode(req.AttachmentMode), + req.ReadOnly, + proposed, + ) { return false } @@ -242,6 +241,86 @@ func (h *HostVolumeChecker) hasVolumes(n *structs.Node) bool { return true } +// hostVolumeIsAvailable determines if a dynamic host volume is available for a request +func (h *HostVolumeChecker) hostVolumeIsAvailable( + vol *structs.HostVolume, + reqAccess structs.HostVolumeAccessMode, + reqAttach structs.HostVolumeAttachmentMode, + readOnly bool, + proposed []*structs.Allocation) bool { + + if vol.State != structs.HostVolumeStateReady { + return false + } + + // pick a default capability based on the read-only flag. this happens here + // in the scheduler rather than job submit because we don't know whether a + // host volume is dynamic or not until we try to schedule it (ex. the same + // name could be static on one node and dynamic on another) + if reqAccess == structs.HostVolumeAccessModeUnknown { + if readOnly { + reqAccess = structs.HostVolumeAccessModeSingleNodeReader + } else { + reqAccess = structs.HostVolumeAccessModeSingleNodeWriter + } + } + if reqAttach == structs.HostVolumeAttachmentModeUnknown { + reqAttach = structs.HostVolumeAttachmentModeFilesystem + } + + // check that the volume has the requested capability at all + var capOk bool + for _, cap := range vol.RequestedCapabilities { + if reqAccess == cap.AccessMode && + reqAttach == cap.AttachmentMode { + capOk = true + break + } + } + if !capOk { + return false + } + + switch reqAccess { + case structs.HostVolumeAccessModeSingleNodeReader: + return readOnly + case structs.HostVolumeAccessModeSingleNodeWriter: + return !readOnly + case structs.HostVolumeAccessModeSingleNodeSingleWriter: + // examine all proposed allocs on the node, including those that might + // not have yet been persisted. they have nil pointers to their Job, so + // we have to go back to the state store to get them + seen := map[string]struct{}{} + for _, alloc := range proposed { + uniqueGroup := alloc.JobNamespacedID().String() + alloc.TaskGroup + if _, ok := seen[uniqueGroup]; ok { + // all allocs for the same group will have the same read-only + // flag and capabilities, so we only need to check a given group + // once + continue + } + seen[uniqueGroup] = struct{}{} + job, err := h.ctx.State().JobByID(nil, alloc.Namespace, alloc.JobID) + if err != nil { + return false + } + tg := job.LookupTaskGroup(alloc.TaskGroup) + for _, req := range tg.Volumes { + if req.Type == structs.VolumeTypeHost && req.Source == vol.Name { + if !req.ReadOnly { + return false + } + } + } + } + + case structs.HostVolumeAccessModeSingleNodeMultiWriter: + // no contraint + } + + return true +} + type CSIVolumeChecker struct { ctx Context namespace string diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index 3351210c2ee..18a8153e83c 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -91,7 +91,7 @@ func TestRandomIterator(t *testing.T) { } } -func TestHostVolumeChecker(t *testing.T) { +func TestHostVolumeChecker_Static(t *testing.T) { ci.Parallel(t) _, ctx := testContext(t) @@ -184,7 +184,7 @@ func TestHostVolumeChecker(t *testing.T) { } } -func TestHostVolumeChecker_ReadOnly(t *testing.T) { +func TestHostVolumeChecker_Dynamic(t *testing.T) { ci.Parallel(t) store, ctx := testContext(t) @@ -284,6 +284,7 @@ func TestHostVolumeChecker_ReadOnly(t *testing.T) { "foo": { Type: "host", Source: "foo", + ReadOnly: true, AccessMode: structs.CSIVolumeAccessModeSingleNodeReader, AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, }, @@ -475,6 +476,152 @@ func TestHostVolumeChecker_Sticky(t *testing.T) { } } +// TestDynamicHostVolumeIsAvailable provides fine-grained coverage of the +// hostVolumeIsAvailable method +func TestDynamicHostVolumeIsAvailable(t *testing.T) { + + store, ctx := testContext(t) + + allCaps := []*structs.HostVolumeCapability{} + + for _, accessMode := range []structs.HostVolumeAccessMode{ + structs.HostVolumeAccessModeSingleNodeReader, + structs.HostVolumeAccessModeSingleNodeWriter, + structs.HostVolumeAccessModeSingleNodeSingleWriter, + structs.HostVolumeAccessModeSingleNodeMultiWriter, + } { + for _, attachMode := range []structs.HostVolumeAttachmentMode{ + structs.HostVolumeAttachmentModeFilesystem, + structs.HostVolumeAttachmentModeBlockDevice, + } { + allCaps = append(allCaps, &structs.HostVolumeCapability{ + AttachmentMode: attachMode, + AccessMode: accessMode, + }) + } + } + + jobReader, jobWriter := mock.Job(), mock.Job() + jobReader.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{ + "example": { + Type: structs.VolumeTypeHost, + Source: "example", + ReadOnly: true, + }, + } + jobWriter.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{ + "example": { + Type: structs.VolumeTypeHost, + Source: "example", + }, + } + index, _ := store.LatestIndex() + index++ + must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, jobReader)) + index++ + must.NoError(t, store.UpsertJob(structs.MsgTypeTestSetup, index, nil, jobWriter)) + + allocReader0, allocReader1 := mock.Alloc(), mock.Alloc() + allocReader0.JobID = jobReader.ID + allocReader1.JobID = jobReader.ID + + allocWriter0, allocWriter1 := mock.Alloc(), mock.Alloc() + allocWriter0.JobID = jobWriter.ID + allocWriter1.JobID = jobWriter.ID + + index++ + must.NoError(t, store.UpsertAllocs(structs.MsgTypeTestSetup, index, + []*structs.Allocation{allocReader0, allocReader1, allocWriter0, allocWriter1})) + + testCases := []struct { + name string + hasProposed []*structs.Allocation + hasCaps []*structs.HostVolumeCapability + wantAccess structs.HostVolumeAccessMode + wantAttach structs.HostVolumeAttachmentMode + readOnly bool + expect bool + }{ + { + name: "enforce attachment mode", + hasCaps: []*structs.HostVolumeCapability{{ + AttachmentMode: structs.HostVolumeAttachmentModeBlockDevice, + AccessMode: structs.HostVolumeAccessModeSingleNodeSingleWriter, + }}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeSingleWriter, + expect: false, + }, + { + name: "enforce read only", + hasProposed: []*structs.Allocation{allocReader0, allocReader1}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeReader, + expect: false, + }, + { + name: "enforce read only ok", + hasProposed: []*structs.Allocation{allocReader0, allocReader1}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeReader, + readOnly: true, + expect: true, + }, + { + name: "enforce single writer", + hasProposed: []*structs.Allocation{allocReader0, allocReader1, allocWriter0}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeSingleWriter, + expect: false, + }, + { + name: "enforce single writer ok", + hasProposed: []*structs.Allocation{allocReader0, allocReader1}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeSingleWriter, + expect: true, + }, + { + name: "multi writer is always ok", + hasProposed: []*structs.Allocation{allocReader0, allocWriter0, allocWriter1}, + wantAttach: structs.HostVolumeAttachmentModeFilesystem, + wantAccess: structs.HostVolumeAccessModeSingleNodeMultiWriter, + expect: true, + }, + { + name: "default capabilities ok", + expect: true, + }, + { + name: "default capabilities fail", + readOnly: true, + hasCaps: []*structs.HostVolumeCapability{{ + AttachmentMode: structs.HostVolumeAttachmentModeBlockDevice, + AccessMode: structs.HostVolumeAccessModeSingleNodeSingleWriter, + }}, + expect: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + vol := &structs.HostVolume{ + Name: "example", + State: structs.HostVolumeStateReady, + } + if len(tc.hasCaps) > 0 { + vol.RequestedCapabilities = tc.hasCaps + } else { + vol.RequestedCapabilities = allCaps + } + checker := NewHostVolumeChecker(ctx) + must.Eq(t, tc.expect, checker.hostVolumeIsAvailable( + vol, tc.wantAccess, tc.wantAttach, tc.readOnly, tc.hasProposed)) + }) + } + +} + func TestCSIVolumeChecker(t *testing.T) { ci.Parallel(t) state, ctx := testContext(t) diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 5d471423136..3d236b5d289 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -218,7 +218,7 @@ func TestServiceSched_JobRegister_StickyAllocs(t *testing.T) { } } -func TestServiceSched_JobRegister_StickyVolumes(t *testing.T) { +func TestServiceSched_JobRegister_StickyHostVolumes(t *testing.T) { ci.Parallel(t) h := NewHarness(t) From ad1e5977963fed03e49c33f53ef97613d82e1a44 Mon Sep 17 00:00:00 2001 From: Piotr Kazmierczak <470696+pkazmierczak@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:20:09 +0100 Subject: [PATCH 35/35] stateful deployments: validate there are no sticky per_alloc volume requests (#24714) This changeset adds an additional validation that prevents users from setting per_alloc and sticky flags on volume requests. Ref: #24479 --- nomad/structs/volumes.go | 3 +++ nomad/structs/volumes_test.go | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index 58c4eefacd3..f474fe1d28e 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -165,6 +165,9 @@ func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) e if canaries > 0 { addErr("volume cannot be per_alloc when canaries are in use") } + if v.Sticky { + addErr("volume cannot be per_alloc and sticky at the same time") + } } switch v.Type { diff --git a/nomad/structs/volumes_test.go b/nomad/structs/volumes_test.go index 9b697faf18c..fb5a1a04d64 100644 --- a/nomad/structs/volumes_test.go +++ b/nomad/structs/volumes_test.go @@ -85,6 +85,17 @@ func TestVolumeRequest_Validate(t *testing.T) { PerAlloc: true, }, }, + { + name: "per_alloc sticky", + expected: []string{ + "volume cannot be per_alloc and sticky at the same time", + }, + req: &VolumeRequest{ + Type: VolumeTypeCSI, + PerAlloc: true, + Sticky: true, + }, + }, } for _, tc := range testCases {