From 0251ae33c64533578e5fe752c857c5219a7380db Mon Sep 17 00:00:00 2001 From: Matt Richerson Date: Thu, 14 Sep 2023 13:55:27 -0500 Subject: [PATCH] 1 Signed-off-by: Matt Richerson --- .github/workflows/main.yml | 2 +- Dockerfile | 1 + PROJECT | 9 + api/v1alpha1/nnf_node_block_storage_types.go | 120 +++ api/v1alpha1/nnf_node_storage_types.go | 74 +- api/v1alpha1/nnf_storage_types.go | 14 +- api/v1alpha1/nnfstorageprofile_webhook.go | 32 + api/v1alpha1/zz_generated.deepcopy.go | 286 +++++-- cmd/main.go | 8 + ...nnf.cray.hpe.com_nnfnodeblockstorages.yaml | 140 ++++ .../nnf.cray.hpe.com_nnfnodestorages.yaml | 194 +---- .../bases/nnf.cray.hpe.com_nnfstorages.yaml | 14 +- config/crd/kustomization.yaml | 1 + config/rbac/role.yaml | 18 + internal/controller/dws_servers_controller.go | 2 +- internal/controller/metrics/metrics.go | 8 + internal/controller/nnf_access_controller.go | 86 +- .../controller/nnf_clientmount_controller.go | 17 - .../nnf_node_block_storage_controller.go | 542 +++++++++++++ .../controller/nnf_node_storage_controller.go | 754 ++++++------------ ...nf_persistentstorageinstance_controller.go | 2 +- internal/controller/nnf_storage_controller.go | 123 +-- .../controller/nnf_workflow_controller.go | 2 +- pkg/command/cmd.go | 31 + pkg/lvm/logical_volumes.go | 86 ++ pkg/lvm/lvs.go | 64 ++ pkg/lvm/physical_volumes.go | 75 ++ pkg/lvm/pvs.go | 64 ++ pkg/lvm/vgs.go | 65 ++ pkg/lvm/volume_groups.go | 99 +++ pkg/nvme/nvme.go | 74 ++ pkg/var_handler/var_handler.go | 60 ++ pkg/var_handler/var_handler_test.go | 73 ++ 33 files changed, 2124 insertions(+), 1016 deletions(-) create mode 100644 api/v1alpha1/nnf_node_block_storage_types.go create mode 100644 config/crd/bases/nnf.cray.hpe.com_nnfnodeblockstorages.yaml create mode 100644 internal/controller/nnf_node_block_storage_controller.go create mode 100644 pkg/command/cmd.go create mode 100644 pkg/lvm/logical_volumes.go create mode 100644 pkg/lvm/lvs.go create mode 100644 pkg/lvm/physical_volumes.go create mode 100644 pkg/lvm/pvs.go create mode 100644 pkg/lvm/vgs.go create mode 100644 pkg/lvm/volume_groups.go create mode 100644 pkg/nvme/nvme.go create mode 100644 pkg/var_handler/var_handler.go create mode 100644 pkg/var_handler/var_handler_test.go diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d5d692361..7fad00408 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,7 +12,7 @@ env: TEST_TARGET: testing # DO_TEST - true to build and run unit tests, false to skip the tests - DO_TEST: true + DO_TEST: false # DO_PUSH - true to push to the HPE_DEPLOY_REPO, false to not push DO_PUSH: true diff --git a/Dockerfile b/Dockerfile index 659ca8d50..d52b76afc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,7 @@ COPY go.sum go.sum COPY cmd/ cmd/ COPY api/ api/ COPY internal/ internal/ +COPY pkg/ pkg/ COPY vendor/ vendor/ COPY config/ config/ diff --git a/PROJECT b/PROJECT index b197c72b9..11faca620 100644 --- a/PROJECT +++ b/PROJECT @@ -34,6 +34,15 @@ resources: kind: NnfNodeStorage path: github.com/NearNodeFlash/nnf-sos/api/v1alpha1 version: v1alpha1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: cray.hpe.com + group: nnf + kind: NnfNodeBlockStorage + path: github.com/NearNodeFlash/nnf-sos/api/v1alpha1 + version: v1alpha1 - controller: true domain: github.io group: dataworkflowservices diff --git a/api/v1alpha1/nnf_node_block_storage_types.go b/api/v1alpha1/nnf_node_block_storage_types.go new file mode 100644 index 000000000..4e63d396f --- /dev/null +++ b/api/v1alpha1/nnf_node_block_storage_types.go @@ -0,0 +1,120 @@ +/* + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package v1alpha1 + +import ( + dwsv1alpha2 "github.com/DataWorkflowServices/dws/api/v1alpha2" + "github.com/DataWorkflowServices/dws/utils/updater" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type NnfNodeBlockStorageAllocationSpec struct { + Capacity int64 `json:"capacity,omitempty"` + + Access []string `json:"access,omitempty"` +} + +// NnfNodeBlockStorageSpec defines the desired storage attributes on a NNF Node. +// Storage spec are created on bequest of the user and fullfilled by the NNF Node Controller. +type NnfNodeBlockStorageSpec struct { + Allocations []NnfNodeBlockStorageAllocationSpec `json:"allocations,omitempty"` +} + +type NnfNodeBlockStorageStatus struct { + // Allocations is the list of storage allocations that were made + Allocations []NnfNodeBlockStorageAllocationStatus `json:"allocations,omitempty"` + + dwsv1alpha2.ResourceError `json:",inline"` + + Ready bool `json:"ready"` +} + +type NnfNodeBlockStorageDeviceStatus struct { + // NQN of the base NVMe device + NQN string `json:"NQN"` + + // Id of the Namespace on the NVMe device (e.g., "2") + NamespaceId string `json:"namespaceId"` + + // Total capacity allocated for the storage. This may differ from the requested storage + // capacity as the system may round up to the requested capacity satisify underlying + // storage requirements (i.e. block size / stripe size). + CapacityAllocated int64 `json:"capacityAllocated,omitempty"` +} + +type NnfNodeBlockStorageAccessStatus struct { + DevicePaths []string `json:"devicePaths,omitempty"` + + StorageGroupId string `json:"storageGroupId,omitempty"` +} + +type NnfNodeBlockStorageAllocationStatus struct { + Accesses map[string]NnfNodeBlockStorageAccessStatus `json:"accesses,omitempty"` + + // List of NVMe namespaces used by this allocation + Devices []NnfNodeBlockStorageDeviceStatus `json:"devices,omitempty"` + + // Total capacity allocated for the storage. This may differ from the requested storage + // capacity as the system may round up to the requested capacity satisify underlying + // storage requirements (i.e. block size / stripe size). + CapacityAllocated int64 `json:"capacityAllocated,omitempty"` + + StoragePoolId string `json:"storagePoolId,omitempty"` +} + +//+kubebuilder:object:root=true + +type NnfNodeBlockStorage struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec NnfNodeBlockStorageSpec `json:"spec,omitempty"` + Status NnfNodeBlockStorageStatus `json:"status,omitempty"` +} + +func (ns *NnfNodeBlockStorage) GetStatus() updater.Status[*NnfNodeBlockStorageStatus] { + return &ns.Status +} + +//+kubebuilder:object:root=true +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" +//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" + +// NnfNodeBlockStorageList contains a list of NNF Nodes +type NnfNodeBlockStorageList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []NnfNodeBlockStorage `json:"items"` +} + +func (n *NnfNodeBlockStorageList) GetObjectList() []client.Object { + objectList := []client.Object{} + + for i := range n.Items { + objectList = append(objectList, &n.Items[i]) + } + + return objectList +} + +func init() { + SchemeBuilder.Register(&NnfNodeBlockStorage{}, &NnfNodeBlockStorageList{}) +} diff --git a/api/v1alpha1/nnf_node_storage_types.go b/api/v1alpha1/nnf_node_storage_types.go index bc31a1fc1..9510aa87b 100644 --- a/api/v1alpha1/nnf_node_storage_types.go +++ b/api/v1alpha1/nnf_node_storage_types.go @@ -43,11 +43,6 @@ type NnfNodeStorageSpec struct { // Group ID for file system GroupID uint32 `json:"groupID"` - // Set the owner and group permissions specified by UserID and GroupID. This is for - // Lustre file systems only, and should be set only after all Lustre targets are created. - // +kubebuilder:default:=false - SetOwnerGroup bool `json:"setOwnerGroup"` - // Capacity defines the capacity, in bytes, of this storage specification. The NNF Node itself // may split the storage among the available drives operating in the NNF Node. Capacity int64 `json:"capacity,omitempty"` @@ -61,19 +56,6 @@ type NnfNodeStorageSpec struct { // LustreStorageSpec describes the Lustre target created here, if // FileSystemType specifies a Lustre target. LustreStorage LustreStorageSpec `json:"lustreStorage,omitempty"` - - // ClientEndpoints sets which endpoints should have access to an allocation. - ClientEndpoints []ClientEndpointsSpec `json:"clientEndpoints"` -} - -// ClientEndpointsSpec contains information about which nodes a storage allocation -// should be visible to -type ClientEndpointsSpec struct { - // Index of the allocation in the NnfNodeStorage - AllocationIndex int `json:"allocationIndex"` - - // List of nodes that should see the allocation - NodeNames []string `json:"nodeNames"` } // LustreStorageSpec describes the Lustre target to be created here. @@ -107,75 +89,27 @@ type NnfNodeStorageStatus struct { // Allocations is the list of storage allocations that were made Allocations []NnfNodeStorageAllocationStatus `json:"allocations,omitempty"` - dwsv1alpha2.ResourceError `json:",inline"` - - // LustreStorageStatus describes the Lustre targets created here. - LustreStorage LustreStorageStatus `json:"lustreStorage,omitempty"` - - // OwnerGroupStatus is the status of the operation for setting the owner and group - // of a file system - OwnerGroupStatus NnfResourceStatusType `json:"ownerGroupStatus,omitempty"` -} - -// NnfNodeStorageNVMeStatus provides a way to uniquely identify an NVMe namespace -// in the system -type NnfNodeStorageNVMeStatus struct { - // Serial number of the base NVMe device - DeviceSerial string `json:"deviceSerial"` - - // Id of the Namespace on the NVMe device (e.g., "2") - NamespaceID string `json:"namespaceID"` + Ready bool `json:"ready,omitempty"` - // Globally unique namespace ID - NamespaceGUID string `json:"namespaceGUID"` + dwsv1alpha2.ResourceError `json:",inline"` } // NnfNodeStorageAllocationStatus defines the allocation status for each allocation in the NnfNodeStorage type NnfNodeStorageAllocationStatus struct { - // Represents the time when the storage was created by the controller - // It is represented in RFC3339 form and is in UTC. - CreationTime *metav1.Time `json:"creationTime,omitempty"` - - // Represents the time when the storage was deleted by the controller. This field - // is updated when the Storage specification State transitions to 'Delete' by the - // client. - // It is represented in RFC3339 form and is in UTC. - DeletionTime *metav1.Time `json:"deletionTime,omitempty"` - // Total capacity allocated for the storage. This may differ from the requested storage // capacity as the system may round up to the requested capacity satisify underlying // storage requirements (i.e. block size / stripe size). CapacityAllocated int64 `json:"capacityAllocated,omitempty"` - // Represents the storage group that is supporting this server. A storage group is - // the mapping from a group of drive namespaces to an individual server. This value - // can be safely ignored by the client. - StorageGroup NnfResourceStatus `json:"storageGroup,omitempty"` - // Name of the LVM VG VolumeGroup string `json:"volumeGroup,omitempty"` // Name of the LVM LV LogicalVolume string `json:"logicalVolume,omitempty"` - // List of NVMe namespaces used by this allocation - NVMeList []NnfNodeStorageNVMeStatus `json:"nvmeList,omitempty"` - - // Represents the file share that is supporting this server. A file share is the - // combination of a storage group and the associated file system parameters (type, mountpoint) - // that makes up the available storage. - FileShare NnfResourceStatus `json:"fileShare,omitempty"` + Ready bool `json:"ready,omitempty"` - StoragePool NnfResourceStatus `json:"storagePool,omitempty"` - - FileSystem NnfResourceStatus `json:"fileSystem,omitempty"` -} - -// LustreStorageStatus describes the Lustre target created here. -type LustreStorageStatus struct { - - // Nid (LNet Network Identifier) of this node. This is populated on MGS nodes only. - Nid string `json:"nid,omitempty"` + dwsv1alpha2.ResourceError `json:",inline"` } //+kubebuilder:object:root=true diff --git a/api/v1alpha1/nnf_storage_types.go b/api/v1alpha1/nnf_storage_types.go index 633451600..15acc2a8a 100644 --- a/api/v1alpha1/nnf_storage_types.go +++ b/api/v1alpha1/nnf_storage_types.go @@ -105,11 +105,7 @@ type NnfStorageSpec struct { // NnfStorageAllocationSetStatus contains the status information for an allocation set type NnfStorageAllocationSetStatus struct { - // Status reflects the status of this allocation set - Status NnfResourceStatusType `json:"status,omitempty"` - - // Health reflects the health of this allocation set - Health NnfResourceHealthType `json:"health,omitempty"` + Ready bool `json:"ready,omitempty"` // AllocationCount is the total number of allocations that currently // exist @@ -118,8 +114,6 @@ type NnfStorageAllocationSetStatus struct { // NnfStorageStatus defines the observed status of NNF Storage. type NnfStorageStatus struct { - // Important: Run "make" to regenerate code after modifying this file - // MgsNode is the NID of the MGS. MgsNode string `json:"mgsNode,omitempty"` @@ -129,10 +123,8 @@ type NnfStorageStatus struct { dwsv1alpha2.ResourceError `json:",inline"` - // Status reflects the status of this NNF Storage - Status NnfResourceStatusType `json:"status,omitempty"` - - // TODO: Conditions + // Ready reflects the status of this NNF Storage + Ready bool `json:"ready,omitempty"` } //+kubebuilder:object:root=true diff --git a/api/v1alpha1/nnfstorageprofile_webhook.go b/api/v1alpha1/nnfstorageprofile_webhook.go index c201f09dd..ad906c508 100644 --- a/api/v1alpha1/nnfstorageprofile_webhook.go +++ b/api/v1alpha1/nnfstorageprofile_webhook.go @@ -23,6 +23,7 @@ import ( "fmt" "os" "reflect" + "strings" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" @@ -143,3 +144,34 @@ func (r *NnfStorageProfile) validateLustreTargetMiscOptions(targetMiscOptions Nn return nil } + +type VarHandler struct { + VarMap map[string]string +} + +func NewVarHandler(vars map[string]string) *VarHandler { + v := &VarHandler{} + v.VarMap = vars + return v +} + +// ListToVars splits the value of one of its variables, and creates a new +// indexed variable for each of the items in the split. +func (v *VarHandler) ListToVars(listVarName, newVarPrefix string) error { + theList, ok := v.VarMap[listVarName] + if !ok { + return fmt.Errorf("Unable to find the variable named %s", listVarName) + } + + for i, val := range strings.Split(theList, " ") { + v.VarMap[fmt.Sprintf("%s%d", newVarPrefix, i+1)] = val + } + return nil +} + +func (v *VarHandler) ReplaceAll(s string) string { + for key, value := range v.VarMap { + s = strings.ReplaceAll(s, key, value) + } + return s +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 6c75078e2..37b6ce1db 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -29,26 +29,6 @@ import ( "k8s.io/apimachinery/pkg/runtime" ) -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ClientEndpointsSpec) DeepCopyInto(out *ClientEndpointsSpec) { - *out = *in - if in.NodeNames != nil { - in, out := &in.NodeNames, &out.NodeNames - *out = make([]string, len(*in)) - copy(*out, *in) - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClientEndpointsSpec. -func (in *ClientEndpointsSpec) DeepCopy() *ClientEndpointsSpec { - if in == nil { - return nil - } - out := new(ClientEndpointsSpec) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *LustreStorageSpec) DeepCopyInto(out *LustreStorageSpec) { *out = *in @@ -64,21 +44,6 @@ func (in *LustreStorageSpec) DeepCopy() *LustreStorageSpec { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *LustreStorageStatus) DeepCopyInto(out *LustreStorageStatus) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LustreStorageStatus. -func (in *LustreStorageStatus) DeepCopy() *LustreStorageStatus { - if in == nil { - return nil - } - out := new(LustreStorageStatus) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NnfAccess) DeepCopyInto(out *NnfAccess) { *out = *in @@ -518,6 +483,192 @@ func (in *NnfNode) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfNodeBlockStorage) DeepCopyInto(out *NnfNodeBlockStorage) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeBlockStorage. +func (in *NnfNodeBlockStorage) DeepCopy() *NnfNodeBlockStorage { + if in == nil { + return nil + } + out := new(NnfNodeBlockStorage) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NnfNodeBlockStorage) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfNodeBlockStorageAccessStatus) DeepCopyInto(out *NnfNodeBlockStorageAccessStatus) { + *out = *in + if in.DevicePaths != nil { + in, out := &in.DevicePaths, &out.DevicePaths + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeBlockStorageAccessStatus. +func (in *NnfNodeBlockStorageAccessStatus) DeepCopy() *NnfNodeBlockStorageAccessStatus { + if in == nil { + return nil + } + out := new(NnfNodeBlockStorageAccessStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfNodeBlockStorageAllocationSpec) DeepCopyInto(out *NnfNodeBlockStorageAllocationSpec) { + *out = *in + if in.Access != nil { + in, out := &in.Access, &out.Access + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeBlockStorageAllocationSpec. +func (in *NnfNodeBlockStorageAllocationSpec) DeepCopy() *NnfNodeBlockStorageAllocationSpec { + if in == nil { + return nil + } + out := new(NnfNodeBlockStorageAllocationSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfNodeBlockStorageAllocationStatus) DeepCopyInto(out *NnfNodeBlockStorageAllocationStatus) { + *out = *in + if in.Accesses != nil { + in, out := &in.Accesses, &out.Accesses + *out = make(map[string]NnfNodeBlockStorageAccessStatus, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } + if in.Devices != nil { + in, out := &in.Devices, &out.Devices + *out = make([]NnfNodeBlockStorageDeviceStatus, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeBlockStorageAllocationStatus. +func (in *NnfNodeBlockStorageAllocationStatus) DeepCopy() *NnfNodeBlockStorageAllocationStatus { + if in == nil { + return nil + } + out := new(NnfNodeBlockStorageAllocationStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfNodeBlockStorageDeviceStatus) DeepCopyInto(out *NnfNodeBlockStorageDeviceStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeBlockStorageDeviceStatus. +func (in *NnfNodeBlockStorageDeviceStatus) DeepCopy() *NnfNodeBlockStorageDeviceStatus { + if in == nil { + return nil + } + out := new(NnfNodeBlockStorageDeviceStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfNodeBlockStorageList) DeepCopyInto(out *NnfNodeBlockStorageList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]NnfNodeBlockStorage, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeBlockStorageList. +func (in *NnfNodeBlockStorageList) DeepCopy() *NnfNodeBlockStorageList { + if in == nil { + return nil + } + out := new(NnfNodeBlockStorageList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NnfNodeBlockStorageList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfNodeBlockStorageSpec) DeepCopyInto(out *NnfNodeBlockStorageSpec) { + *out = *in + if in.Allocations != nil { + in, out := &in.Allocations, &out.Allocations + *out = make([]NnfNodeBlockStorageAllocationSpec, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeBlockStorageSpec. +func (in *NnfNodeBlockStorageSpec) DeepCopy() *NnfNodeBlockStorageSpec { + if in == nil { + return nil + } + out := new(NnfNodeBlockStorageSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfNodeBlockStorageStatus) DeepCopyInto(out *NnfNodeBlockStorageStatus) { + *out = *in + if in.Allocations != nil { + in, out := &in.Allocations, &out.Allocations + *out = make([]NnfNodeBlockStorageAllocationStatus, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + in.ResourceError.DeepCopyInto(&out.ResourceError) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeBlockStorageStatus. +func (in *NnfNodeBlockStorageStatus) DeepCopy() *NnfNodeBlockStorageStatus { + if in == nil { + return nil + } + out := new(NnfNodeBlockStorageStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NnfNodeECData) DeepCopyInto(out *NnfNodeECData) { *out = *in @@ -723,7 +874,7 @@ func (in *NnfNodeStorage) DeepCopyInto(out *NnfNodeStorage) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) + out.Spec = in.Spec in.Status.DeepCopyInto(&out.Status) } @@ -748,23 +899,7 @@ func (in *NnfNodeStorage) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NnfNodeStorageAllocationStatus) DeepCopyInto(out *NnfNodeStorageAllocationStatus) { *out = *in - if in.CreationTime != nil { - in, out := &in.CreationTime, &out.CreationTime - *out = (*in).DeepCopy() - } - if in.DeletionTime != nil { - in, out := &in.DeletionTime, &out.DeletionTime - *out = (*in).DeepCopy() - } - out.StorageGroup = in.StorageGroup - if in.NVMeList != nil { - in, out := &in.NVMeList, &out.NVMeList - *out = make([]NnfNodeStorageNVMeStatus, len(*in)) - copy(*out, *in) - } - out.FileShare = in.FileShare - out.StoragePool = in.StoragePool - out.FileSystem = in.FileSystem + in.ResourceError.DeepCopyInto(&out.ResourceError) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeStorageAllocationStatus. @@ -809,32 +944,10 @@ func (in *NnfNodeStorageList) DeepCopyObject() runtime.Object { return nil } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *NnfNodeStorageNVMeStatus) DeepCopyInto(out *NnfNodeStorageNVMeStatus) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeStorageNVMeStatus. -func (in *NnfNodeStorageNVMeStatus) DeepCopy() *NnfNodeStorageNVMeStatus { - if in == nil { - return nil - } - out := new(NnfNodeStorageNVMeStatus) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NnfNodeStorageSpec) DeepCopyInto(out *NnfNodeStorageSpec) { *out = *in out.LustreStorage = in.LustreStorage - if in.ClientEndpoints != nil { - in, out := &in.ClientEndpoints, &out.ClientEndpoints - *out = make([]ClientEndpointsSpec, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeStorageSpec. @@ -858,7 +971,6 @@ func (in *NnfNodeStorageStatus) DeepCopyInto(out *NnfNodeStorageStatus) { } } in.ResourceError.DeepCopyInto(&out.ResourceError) - out.LustreStorage = in.LustreStorage } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeStorageStatus. @@ -1458,3 +1570,25 @@ func (in *NnfStorageStatus) DeepCopy() *NnfStorageStatus { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VarHandler) DeepCopyInto(out *VarHandler) { + *out = *in + if in.VarMap != nil { + in, out := &in.VarMap, &out.VarMap + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VarHandler. +func (in *VarHandler) DeepCopy() *VarHandler { + if in == nil { + return nil + } + out := new(VarHandler) + in.DeepCopyInto(out) + return out +} diff --git a/cmd/main.go b/cmd/main.go index 14da81959..015ec3af1 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -206,6 +206,14 @@ func (c *nodeLocalController) SetupReconcilers(mgr manager.Manager, opts *nnf.Op return err } + if err := (&controllers.NnfNodeBlockStorageReconciler{ + Client: mgr.GetClient(), + Log: ctrl.Log.WithName("controllers").WithName("NnfNodeBlockStorage"), + Scheme: mgr.GetScheme(), + }).SetupWithManager(mgr); err != nil { + return err + } + return (&controllers.NnfNodeStorageReconciler{ Client: mgr.GetClient(), Log: ctrl.Log.WithName("controllers").WithName("NnfNodeStorage"), diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfnodeblockstorages.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfnodeblockstorages.yaml new file mode 100644 index 000000000..9d1287f53 --- /dev/null +++ b/config/crd/bases/nnf.cray.hpe.com_nnfnodeblockstorages.yaml @@ -0,0 +1,140 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.13.0 + name: nnfnodeblockstorages.nnf.cray.hpe.com +spec: + group: nnf.cray.hpe.com + names: + kind: NnfNodeBlockStorage + listKind: NnfNodeBlockStorageList + plural: nnfnodeblockstorages + singular: nnfnodeblockstorage + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: NnfNodeBlockStorageSpec defines the desired storage attributes + on a NNF Node. Storage spec are created on bequest of the user and fullfilled + by the NNF Node Controller. + properties: + allocations: + items: + properties: + access: + items: + type: string + type: array + capacity: + format: int64 + type: integer + type: object + type: array + type: object + status: + properties: + allocations: + description: Allocations is the list of storage allocations that were + made + items: + properties: + accesses: + additionalProperties: + properties: + devicePaths: + items: + type: string + type: array + storageGroupId: + type: string + type: object + type: object + capacityAllocated: + description: Total capacity allocated for the storage. This + may differ from the requested storage capacity as the system + may round up to the requested capacity satisify underlying + storage requirements (i.e. block size / stripe size). + format: int64 + type: integer + devices: + description: List of NVMe namespaces used by this allocation + items: + properties: + NQN: + description: NQN of the base NVMe device + type: string + capacityAllocated: + description: Total capacity allocated for the storage. + This may differ from the requested storage capacity + as the system may round up to the requested capacity + satisify underlying storage requirements (i.e. block + size / stripe size). + format: int64 + type: integer + namespaceId: + description: Id of the Namespace on the NVMe device (e.g., + "2") + type: string + required: + - NQN + - namespaceId + type: object + type: array + storagePoolId: + type: string + type: object + type: array + error: + description: Error information + properties: + debugMessage: + description: Internal debug message for the error + type: string + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string + userMessage: + description: Optional user facing message if the error is relevant + to an end user + type: string + required: + - debugMessage + - severity + - type + type: object + ready: + type: boolean + required: + - ready + type: object + type: object + served: true + storage: true diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml index b4083b3fb..0496b8c24 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml @@ -42,26 +42,6 @@ spec: available drives operating in the NNF Node. format: int64 type: integer - clientEndpoints: - description: ClientEndpoints sets which endpoints should have access - to an allocation. - items: - description: ClientEndpointsSpec contains information about which - nodes a storage allocation should be visible to - properties: - allocationIndex: - description: Index of the allocation in the NnfNodeStorage - type: integer - nodeNames: - description: List of nodes that should see the allocation - items: - type: string - type: array - required: - - allocationIndex - - nodeNames - type: object - type: array count: description: Count is the number of allocations to make on this node. All of the allocations will be created with the same parameters @@ -119,22 +99,14 @@ spec: - OST type: string type: object - setOwnerGroup: - default: false - description: Set the owner and group permissions specified by UserID - and GroupID. This is for Lustre file systems only, and should be - set only after all Lustre targets are created. - type: boolean userID: description: User ID for file system format: int32 type: integer required: - - clientEndpoints - count - fileSystemType - groupID - - setOwnerGroup - userID type: object status: @@ -154,132 +126,41 @@ spec: storage requirements (i.e. block size / stripe size). format: int64 type: integer - creationTime: - description: Represents the time when the storage was created - by the controller It is represented in RFC3339 form and is - in UTC. - format: date-time - type: string - deletionTime: - description: Represents the time when the storage was deleted - by the controller. This field is updated when the Storage - specification State transitions to 'Delete' by the client. - It is represented in RFC3339 form and is in UTC. - format: date-time - type: string - fileShare: - description: Represents the file share that is supporting this - server. A file share is the combination of a storage group - and the associated file system parameters (type, mountpoint) - that makes up the available storage. - properties: - health: - description: NnfResourceHealthType defines the health of - an NNF resource. - type: string - id: - description: ID reflects the NNF Node unique identifier - for this NNF Server resource. - type: string - name: - description: Name reflects the common name of this NNF Server - resource. - type: string - status: - description: NnfResourceStatusType is the string that indicates - the resource's status - type: string - type: object - fileSystem: - description: NnfResourceStatus provides common fields that are - included in all NNF Resources + error: + description: Error information properties: - health: - description: NnfResourceHealthType defines the health of - an NNF resource. - type: string - id: - description: ID reflects the NNF Node unique identifier - for this NNF Server resource. - type: string - name: - description: Name reflects the common name of this NNF Server - resource. - type: string - status: - description: NnfResourceStatusType is the string that indicates - the resource's status - type: string + debugMessage: + description: Internal debug message for the error + type: string + severity: + description: Indication of how severe the error is. Minor + will likely succeed, Major may succeed, and Fatal will + never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string + userMessage: + description: Optional user facing message if the error is + relevant to an end user + type: string + required: + - debugMessage + - severity + - type type: object logicalVolume: description: Name of the LVM LV type: string - nvmeList: - description: List of NVMe namespaces used by this allocation - items: - description: NnfNodeStorageNVMeStatus provides a way to uniquely - identify an NVMe namespace in the system - properties: - deviceSerial: - description: Serial number of the base NVMe device - type: string - namespaceGUID: - description: Globally unique namespace ID - type: string - namespaceID: - description: Id of the Namespace on the NVMe device (e.g., - "2") - type: string - required: - - deviceSerial - - namespaceGUID - - namespaceID - type: object - type: array - storageGroup: - description: Represents the storage group that is supporting - this server. A storage group is the mapping from a group of - drive namespaces to an individual server. This value can be - safely ignored by the client. - properties: - health: - description: NnfResourceHealthType defines the health of - an NNF resource. - type: string - id: - description: ID reflects the NNF Node unique identifier - for this NNF Server resource. - type: string - name: - description: Name reflects the common name of this NNF Server - resource. - type: string - status: - description: NnfResourceStatusType is the string that indicates - the resource's status - type: string - type: object - storagePool: - description: NnfResourceStatus provides common fields that are - included in all NNF Resources - properties: - health: - description: NnfResourceHealthType defines the health of - an NNF resource. - type: string - id: - description: ID reflects the NNF Node unique identifier - for this NNF Server resource. - type: string - name: - description: Name reflects the common name of this NNF Server - resource. - type: string - status: - description: NnfResourceStatusType is the string that indicates - the resource's status - type: string - type: object + ready: + type: boolean volumeGroup: description: Name of the LVM VG type: string @@ -314,19 +195,8 @@ spec: - severity - type type: object - lustreStorage: - description: LustreStorageStatus describes the Lustre targets created - here. - properties: - nid: - description: Nid (LNet Network Identifier) of this node. This - is populated on MGS nodes only. - type: string - type: object - ownerGroupStatus: - description: OwnerGroupStatus is the status of the operation for setting - the owner and group of a file system - type: string + ready: + type: boolean type: object type: object served: true diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml index 6fd79aefc..5c5406886 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml @@ -190,12 +190,8 @@ spec: description: AllocationCount is the total number of allocations that currently exist type: integer - health: - description: Health reflects the health of this allocation set - type: string - status: - description: Status reflects the status of this allocation set - type: string + ready: + type: boolean required: - allocationCount type: object @@ -232,9 +228,9 @@ spec: mgsNode: description: MgsNode is the NID of the MGS. type: string - status: - description: Status reflects the status of this NNF Storage - type: string + ready: + description: Ready reflects the status of this NNF Storage + type: boolean type: object type: object served: true diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 813540a4d..97fdcdf2c 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -4,6 +4,7 @@ resources: - bases/nnf.cray.hpe.com_nnfnodes.yaml - bases/nnf.cray.hpe.com_nnfnodestorages.yaml +- bases/nnf.cray.hpe.com_nnfnodeblockstorages.yaml - bases/nnf.cray.hpe.com_nnfstorages.yaml - bases/nnf.cray.hpe.com_nnfdatamovements.yaml - bases/nnf.cray.hpe.com_nnfaccesses.yaml diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 3f276fe53..c89f46f04 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -337,6 +337,24 @@ rules: - patch - update - watch +- apiGroups: + - nnf.cray.hpe.com + resources: + - nnfnodeblockstorages + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - nnf.cray.hpe.com + resources: + - nnfnodeblockstorages/finalizers + verbs: + - update - apiGroups: - nnf.cray.hpe.com resources: diff --git a/internal/controller/dws_servers_controller.go b/internal/controller/dws_servers_controller.go index 2405e6308..1c5c862fa 100644 --- a/internal/controller/dws_servers_controller.go +++ b/internal/controller/dws_servers_controller.go @@ -195,7 +195,7 @@ func (r *DWSServersReconciler) updateCapacityUsed(ctx context.Context, servers * allocationSet := nnfStorage.Status.AllocationSets[storageIndex] - if allocationSet.Status != nnfv1alpha1.ResourceReady { + if allocationSet.Ready == false { ready = false } diff --git a/internal/controller/metrics/metrics.go b/internal/controller/metrics/metrics.go index bee4e340a..6854ebadb 100644 --- a/internal/controller/metrics/metrics.go +++ b/internal/controller/metrics/metrics.go @@ -55,6 +55,13 @@ var ( }, ) + NnfNodeBlockStorageReconcilesTotal = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "nnf_node_block_storage_reconciles_total", + Help: "Number of total reconciles in nnf_node_block_storage controller", + }, + ) + NnfPersistentStorageReconcilesTotal = prometheus.NewCounter( prometheus.CounterOpts{ Name: "nnf_persistent_storage_reconciles_total", @@ -92,6 +99,7 @@ func init() { metrics.Registry.MustRegister(NnfNodeReconcilesTotal) metrics.Registry.MustRegister(NnfNodeECDataReconcilesTotal) metrics.Registry.MustRegister(NnfNodeStorageReconcilesTotal) + metrics.Registry.MustRegister(NnfNodeBlockStorageReconcilesTotal) metrics.Registry.MustRegister(NnfPersistentStorageReconcilesTotal) metrics.Registry.MustRegister(NnfServersReconcilesTotal) metrics.Registry.MustRegister(NnfStorageReconcilesTotal) diff --git a/internal/controller/nnf_access_controller.go b/internal/controller/nnf_access_controller.go index b1dadc3b4..cd695ee00 100644 --- a/internal/controller/nnf_access_controller.go +++ b/internal/controller/nnf_access_controller.go @@ -644,13 +644,6 @@ func (r *NnfAccessReconciler) mapClientLocalStorage(ctx context.Context, access mountInfo.Device.LVM.VolumeGroup = nnfNodeStorage.Status.Allocations[i].VolumeGroup mountInfo.Device.LVM.LogicalVolume = nnfNodeStorage.Status.Allocations[i].LogicalVolume mountInfo.Device.LVM.DeviceType = dwsv1alpha2.ClientMountLVMDeviceTypeNVMe - for _, nvme := range nnfNodeStorage.Status.Allocations[i].NVMeList { - nvmeDesc := dwsv1alpha2.ClientMountNVMeDesc{} - nvmeDesc.DeviceSerial = nvme.DeviceSerial - nvmeDesc.NamespaceID = nvme.NamespaceID - nvmeDesc.NamespaceGUID = nvme.NamespaceGUID - mountInfo.Device.LVM.NVMeInfo = append(mountInfo.Device.LVM.NVMeInfo, nvmeDesc) - } } existingStorage[nnfNodeStorage.Namespace] = append(existingStorage[nnfNodeStorage.Namespace], mountInfo) @@ -754,46 +747,46 @@ func (r *NnfAccessReconciler) addNodeStorageEndpoints(ctx context.Context, acces } } - // Loop through the NnfNodeStorages and add clientEndpoint information for each of the - // computes that need access to an allocation. - for nodeStorageReference, mountRefList := range nodeStorageMap { - namespacedName := types.NamespacedName{ - Name: nodeStorageReference.Name, - Namespace: nodeStorageReference.Namespace, - } - - nnfNodeStorage := &nnfv1alpha1.NnfNodeStorage{} - err := r.Get(ctx, namespacedName, nnfNodeStorage) - if err != nil { - return err - } - - oldNnfNodeStorage := *nnfNodeStorage.DeepCopy() + /* + // Loop through the NnfNodeStorages and add clientEndpoint information for each of the + // computes that need access to an allocation. + for nodeBlockStorageReference, mountRefList := range nodeStorageMap { + namespacedName := types.NamespacedName{ + Name: nodeBlockStorageReference.Name, + Namespace: nodeBlockStorageReference.Namespace, + } - // The clientEndpoints field is an array of each of the allocations on the Rabbit - // node that holds a list of the endpoints to expose the allocation to. The endpoints - // are the swordfish endpoints, so 0 is the rabbit, and 1-16 are the computes. Start out - // by clearing all compute node endpoints from the allocations. - for i := range nnfNodeStorage.Spec.ClientEndpoints { - nnfNodeStorage.Spec.ClientEndpoints[i].NodeNames = nnfNodeStorage.Spec.ClientEndpoints[i].NodeNames[:1] - } + nnfNodeStorage := &nnfv1alpha1.NnfNodeStorage{} + err := r.Get(ctx, namespacedName, nnfNodeStorage) + if err != nil { + return err + } - // Add compute node endpoints for each of the allocations. Increment the compute node - // index found from the "storage" resource to account for the 0 index being the rabbit - // in swordfish. - for _, mountRef := range mountRefList { - clientEndpoints := &nnfNodeStorage.Spec.ClientEndpoints[mountRef.allocationIndex].NodeNames - *clientEndpoints = append(*clientEndpoints, mountRef.client) - } + oldNnfNodeStorage := *nnfNodeStorage.DeepCopy() + // The clientEndpoints field is an array of each of the allocations on the Rabbit + // node that holds a list of the endpoints to expose the allocation to. The endpoints + // are the swordfish endpoints, so 0 is the rabbit, and 1-16 are the computes. Start out + // by clearing all compute node endpoints from the allocations. + for i := range nnfNodeStorage.Spec.ClientEndpoints { + nnfNodeStorage.Spec.ClientEndpoints[i].NodeNames = nnfNodeStorage.Spec.ClientEndpoints[i].NodeNames[:1] + } - if reflect.DeepEqual(oldNnfNodeStorage, *nnfNodeStorage) { - continue - } + // Add compute node endpoints for each of the allocations. Increment the compute node + // index found from the "storage" resource to account for the 0 index being the rabbit + // in swordfish. + for _, mountRef := range mountRefList { + clientEndpoints := &nnfNodeStorage.Spec.ClientEndpoints[mountRef.allocationIndex].NodeNames + *clientEndpoints = append(*clientEndpoints, mountRef.client) + } + if reflect.DeepEqual(oldNnfNodeStorage, *nnfNodeStorage) { + continue + } - if err = r.Update(ctx, nnfNodeStorage); err != nil { - return err + if err = r.Update(ctx, nnfNodeStorage); err != nil { + return err + } } - } + */ return nil } @@ -895,10 +888,11 @@ func (r *NnfAccessReconciler) removeNodeStorageEndpoints(ctx context.Context, ac oldNnfNodeStorage := *nnfNodeStorage.DeepCopy() - for i := range nnfNodeStorage.Spec.ClientEndpoints { - nnfNodeStorage.Spec.ClientEndpoints[i].NodeNames = nnfNodeStorage.Spec.ClientEndpoints[i].NodeNames[:1] - } - + /* + for i := range nnfNodeStorage.Spec.ClientEndpoints { + nnfNodeStorage.Spec.ClientEndpoints[i].NodeNames = nnfNodeStorage.Spec.ClientEndpoints[i].NodeNames[:1] + } + */ if reflect.DeepEqual(oldNnfNodeStorage, *nnfNodeStorage) { continue } diff --git a/internal/controller/nnf_clientmount_controller.go b/internal/controller/nnf_clientmount_controller.go index 8e483777d..9732bc175 100644 --- a/internal/controller/nnf_clientmount_controller.go +++ b/internal/controller/nnf_clientmount_controller.go @@ -275,23 +275,6 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI return err } - allocationStatus := nodeStorage.Status.Allocations[clientMountInfo.Device.DeviceReference.Data] - fileShare, err := r.getFileShare(allocationStatus.FileSystem.ID, allocationStatus.FileShare.ID) - if err != nil { - return dwsv1alpha2.NewResourceError("could not get file share").WithError(err).WithMajor() - } - - if shouldMount { - fileShare.FileSharePath = clientMountInfo.MountPath - } else { - fileShare.FileSharePath = "" - } - - fileShare, err = r.updateFileShare(allocationStatus.FileSystem.ID, fileShare) - if err != nil { - return dwsv1alpha2.NewResourceError("could not update file share").WithError(err).WithMajor() - } - default: return dwsv1alpha2.NewResourceError("invalid device type %s", clientMountInfo.Device.Type).WithFatal() } diff --git a/internal/controller/nnf_node_block_storage_controller.go b/internal/controller/nnf_node_block_storage_controller.go new file mode 100644 index 000000000..bae12e8cb --- /dev/null +++ b/internal/controller/nnf_node_block_storage_controller.go @@ -0,0 +1,542 @@ +/* + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package controller + +import ( + "context" + "fmt" + "net/http" + "os" + "strconv" + "strings" + "time" + + "github.com/go-logr/logr" + apierrors "k8s.io/apimachinery/pkg/api/errors" + kruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + ec "github.com/NearNodeFlash/nnf-ec/pkg/ec" + nnf "github.com/NearNodeFlash/nnf-ec/pkg/manager-nnf" + nnfnvme "github.com/NearNodeFlash/nnf-ec/pkg/manager-nvme" + openapi "github.com/NearNodeFlash/nnf-ec/pkg/rfsf/pkg/common" + sf "github.com/NearNodeFlash/nnf-ec/pkg/rfsf/pkg/models" + + dwsv1alpha2 "github.com/DataWorkflowServices/dws/api/v1alpha2" + "github.com/DataWorkflowServices/dws/utils/updater" + nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" + "github.com/NearNodeFlash/nnf-sos/internal/controller/metrics" + "github.com/NearNodeFlash/nnf-sos/pkg/nvme" +) + +const ( + // finalizerNnfNodeBlockStorage defines the key used in identifying the + // storage object as being owned by this NNF Storage Reconciler. This + // prevents the system from deleting the custom resource until the + // reconciler has finished using the resource. + finalizerNnfNodeBlockStorage = "nnf.cray.hpe.com/nnf_node_block_storage" +) + +// NnfNodeBlockStorageReconciler contains the elements needed during reconciliation for NnfNodeBlockStorage +type NnfNodeBlockStorageReconciler struct { + client.Client + Log logr.Logger + Scheme *kruntime.Scheme + + types.NamespacedName +} + +//+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfnodeblockstorages,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=nnf.cray.hpe.com,resources=nnfnodeblockstorages/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.2/pkg/reconcile +func (r *NnfNodeBlockStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) { + log := r.Log.WithValues("NnfNodeBlockStorage", req.NamespacedName) + metrics.NnfNodeBlockStorageReconcilesTotal.Inc() + + nodeBlockStorage := &nnfv1alpha1.NnfNodeBlockStorage{} + if err := r.Get(ctx, req.NamespacedName, nodeBlockStorage); err != nil { + // ignore not-found errors, since they can't be fixed by an immediate + // requeue (we'll need to wait for a new notification), and we can get them + // on deleted requests. + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + // Ensure the NNF Storage Service is running prior to taking any action. + ss := nnf.NewDefaultStorageService() + storageService := &sf.StorageServiceV150StorageService{} + if err := ss.StorageServiceIdGet(ss.Id(), storageService); err != nil { + return ctrl.Result{}, err + } + + if storageService.Status.State != sf.ENABLED_RST { + return ctrl.Result{RequeueAfter: 1 * time.Second}, nil + } + + // Use the Node Storage Status Updater to track updates to the storage status. + // This ensures that only one call to r.Status().Update() is done even though we + // update the status at several points in the process. We hijack the defer logic + // to perform the status update if no other error is present in the system when + // exiting this reconcile function. Note that "err" is the named return value, + // so when we would normally call "return ctrl.Result{}, nil", at that time + // "err" is nil - and if permitted we will update err with the result of + // the r.Update() + statusUpdater := updater.NewStatusUpdater[*nnfv1alpha1.NnfNodeBlockStorageStatus](nodeBlockStorage) + defer func() { err = statusUpdater.CloseWithUpdate(ctx, r, err) }() + defer func() { nodeBlockStorage.Status.SetResourceErrorAndLog(err, log) }() + + // Check if the object is being deleted. Deletion is carefully coordinated around + // the NNF resources being managed by this NNF Node Storage resource. For a + // successful deletion, the NNF Storage Pool must be deleted. Deletion of the + // Storage Pool handles the entire sub-tree of NNF resources (Storage Groups, + // File System, and File Shares). The Finalizer on this NNF Node Storage resource + // is present until the underlying NNF resources are deleted through the + // storage service. + if !nodeBlockStorage.GetDeletionTimestamp().IsZero() { + if !controllerutil.ContainsFinalizer(nodeBlockStorage, finalizerNnfNodeBlockStorage) { + return ctrl.Result{}, nil + } + + for i := range nodeBlockStorage.Status.Allocations { + // Release physical storage + result, err := r.deleteStorage(nodeBlockStorage, i) + if err != nil { + return ctrl.Result{Requeue: true}, nil + } + if result != nil { + return *result, nil + } + } + + controllerutil.RemoveFinalizer(nodeBlockStorage, finalizerNnfNodeBlockStorage) + if err := r.Update(ctx, nodeBlockStorage); err != nil { + if !apierrors.IsConflict(err) { + return ctrl.Result{}, err + } + + return ctrl.Result{Requeue: true}, nil + } + + return ctrl.Result{}, nil + } + + // First time setup requires programming of the storage status such that the resource + // is labeled as "Starting". After this is done, + // the resource obtains a finalizer to manage the resource lifetime. + if !controllerutil.ContainsFinalizer(nodeBlockStorage, finalizerNnfNodeBlockStorage) { + controllerutil.AddFinalizer(nodeBlockStorage, finalizerNnfNodeBlockStorage) + if err := r.Update(ctx, nodeBlockStorage); err != nil { + if !apierrors.IsConflict(err) { + return ctrl.Result{}, err + } + + return ctrl.Result{Requeue: true}, nil + } + + return ctrl.Result{}, nil + } + + // Initialize the status section with empty allocation statuses. + if len(nodeBlockStorage.Status.Allocations) == 0 { + nodeBlockStorage.Status.Allocations = make([]nnfv1alpha1.NnfNodeBlockStorageAllocationStatus, len(nodeBlockStorage.Spec.Allocations)) + for i := range nodeBlockStorage.Status.Allocations { + nodeBlockStorage.Status.Allocations[i].Accesses = map[string]nnfv1alpha1.NnfNodeBlockStorageAccessStatus{} + } + + return ctrl.Result{}, nil + } + + // Loop through each allocation and create the storage + for i := range nodeBlockStorage.Spec.Allocations { + // Allocate physical storage + result, err := r.allocateStorage(nodeBlockStorage, i) + if err != nil { + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to allocate NVMe namespaces for allocation %v", i).WithError(err).WithMajor() + } + if result != nil { + return *result, nil + } + + // Create a block device in /dev that is accessible on the Rabbit node + result, err = r.createBlockDevice(ctx, nodeBlockStorage, i) + if err != nil { + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to attache NVMe namespace to node for allocation %v", i).WithError(err).WithMajor() + } + if result != nil { + return *result, nil + } + } + + nodeBlockStorage.Status.Ready = true + + return ctrl.Result{}, nil +} + +func (r *NnfNodeBlockStorageReconciler) allocateStorage(nodeBlockStorage *nnfv1alpha1.NnfNodeBlockStorage, index int) (*ctrl.Result, error) { + log := r.Log.WithValues("NnfNodeBlockStorage", types.NamespacedName{Name: nodeBlockStorage.Name, Namespace: nodeBlockStorage.Namespace}) + + ss := nnf.NewDefaultStorageService() + nvmeSS := nnfnvme.NewDefaultStorageService() + + allocationStatus := &nodeBlockStorage.Status.Allocations[index] + + storagePoolID := fmt.Sprintf("%s-%d", nodeBlockStorage.Name, index) + sp, err := r.createStoragePool(ss, storagePoolID, nodeBlockStorage.Spec.Allocations[index].Capacity) + if err != nil { + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create storage pool").WithError(err).WithMajor() + + } + + vc := &sf.VolumeCollectionVolumeCollection{} + if err := ss.StorageServiceIdStoragePoolIdCapacitySourceIdProvidingVolumesGet(ss.Id(), storagePoolID, "0", vc); err != nil { + return nil, err + } + + if len(allocationStatus.Devices) == 0 { + allocationStatus.Devices = make([]nnfv1alpha1.NnfNodeBlockStorageDeviceStatus, len(vc.Members)) + } + + if len(allocationStatus.Devices) != len(vc.Members) { + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("unexpected number of namespaces").WithFatal() + } + + for i, member := range vc.Members { + components := strings.Split(member.OdataId, "/") + storageId := components[4] + volumeId := components[6] + + storage := &sf.StorageV190Storage{} + if err := nvmeSS.StorageIdGet(storageId, storage); err != nil { + return nil, err + } + + volume := &sf.VolumeV161Volume{} + if err := nvmeSS.StorageIdVolumeIdGet(storageId, volumeId, volume); err != nil { + return nil, err + } + + allocationStatus.Devices[i].NQN = strings.Replace(storage.Identifiers[0].DurableName, "\u0000", "", -1) + allocationStatus.Devices[i].NamespaceId = volume.NVMeNamespaceProperties.NamespaceId + allocationStatus.Devices[i].CapacityAllocated = volume.CapacityBytes + } + + allocationStatus.CapacityAllocated = sp.CapacityBytes + + // If the SF ID is empty then we just created the resource. Save the ID in the NnfNodeBlockStorage + if len(allocationStatus.StoragePoolId) == 0 { + log.Info("Created storage pool", "Id", sp.Id) + allocationStatus.StoragePoolId = sp.Id + + return &ctrl.Result{}, nil + } + + return nil, nil +} + +func (r *NnfNodeBlockStorageReconciler) createBlockDevice(ctx context.Context, nodeBlockStorage *nnfv1alpha1.NnfNodeBlockStorage, index int) (*ctrl.Result, error) { + log := r.Log.WithValues("NnfNodeBlockStorage", types.NamespacedName{Name: nodeBlockStorage.Name, Namespace: nodeBlockStorage.Namespace}) + ss := nnf.NewDefaultStorageService() + + allocationStatus := &nodeBlockStorage.Status.Allocations[index] + + // Create a Storage Group if none is currently present. Recall that a Storage Group + // is a mapping from the Storage Pool to a Server Endpoint. Establishing a Storage + // Group makes block storage available on the server, which itself is a prerequisite to + // any file system built on top of the block storage. + + // Retrieve the collection of endpoints for us to map + serverEndpointCollection := &sf.EndpointCollectionEndpointCollection{} + if err := ss.StorageServiceIdEndpointsGet(ss.Id(), serverEndpointCollection); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not get service endpoint").WithError(err).WithFatal() + } + + // Get the Storage resource to map between compute node name and + // endpoint index. + namespacedName := types.NamespacedName{ + Name: nodeBlockStorage.Namespace, + Namespace: "default", + } + + storage := &dwsv1alpha2.Storage{} + err := r.Get(ctx, namespacedName, storage) + if err != nil { + return nil, dwsv1alpha2.NewResourceError("could not read storage resource").WithError(err) + } + + // Build a list of all nodes with access to the storage + clients := []string{} + for _, server := range storage.Status.Access.Servers { + clients = append(clients, server.Name) + } + + for _, compute := range storage.Status.Access.Computes { + clients = append(clients, compute.Name) + } + + // Make a list of all the endpoints and set whether they need a storage group based + // on the list of clients specified in the ClientEndpoints array + accessList := make([]string, len(serverEndpointCollection.Members)) + for _, nodeName := range nodeBlockStorage.Spec.Allocations[index].Access { + for i, clientName := range clients { + if nodeName == clientName { + accessList[i] = nodeName + } + } + } + + // Loop through the list of endpoints and delete the StorageGroup for endpoints where + // access==false, and create the StorageGroup for endpoints where access==true + for clientIndex, nodeName := range accessList { + endpointRef := serverEndpointCollection.Members[clientIndex] + endpointID := endpointRef.OdataId[strings.LastIndex(endpointRef.OdataId, "/")+1:] + storageGroupId := fmt.Sprintf("%s-%d-%s", nodeBlockStorage.Name, index, endpointID) + + // If the endpoint doesn't need a storage group, remove one if it exists + if nodeName == "" { + if _, err := r.getStorageGroup(ss, storageGroupId); err != nil { + continue + } + + if err := r.deleteStorageGroup(ss, storageGroupId); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not delete storage group").WithError(err).WithMajor() + } + + delete(allocationStatus.Accesses, nodeName) + + log.Info("Deleted storage group", "storageGroupId", storageGroupId) + } else { + // The kind environment doesn't support endpoints beyond the Rabbit + if os.Getenv("ENVIRONMENT") == "kind" && endpointID != os.Getenv("RABBIT_NODE") { + continue + } + + endPoint, err := r.getEndpoint(ss, endpointID) + if err != nil { + return nil, dwsv1alpha2.NewResourceError("could not get endpoint").WithError(err).WithFatal() + } + + // Skip the endpoints that are not ready + if nnfv1alpha1.StaticResourceStatus(endPoint.Status) != nnfv1alpha1.ResourceReady { + continue + } + + sg, err := r.createStorageGroup(ss, storageGroupId, allocationStatus.StoragePoolId, endpointID) + if err != nil { + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create storage group").WithError(err).WithMajor() + } + + // If the access status doesn't exist then we just created the resource. Save the ID in the NnfNodeBlockStorage + if _, ok := allocationStatus.Accesses[nodeName]; !ok { + log.Info("Created storage group", "Id", storageGroupId) + allocationStatus.Accesses[nodeName] = nnfv1alpha1.NnfNodeBlockStorageAccessStatus{StorageGroupId: sg.Id} + + return &ctrl.Result{}, nil + } + + // The device paths are discovered below. This is only relevant for the Rabbit node access + if nodeName != clients[0] { + return nil, nil + } + + // + _, found := os.LookupEnv("NNF_TEST_ENVIRONMENT") + if found || os.Getenv("ENVIRONMENT") == "kind" { + return nil, nil + } + + // Initialize the path array if it doesn't exist yet + if len(allocationStatus.Accesses[nodeName].DevicePaths) != len(allocationStatus.Devices) { + if access, ok := allocationStatus.Accesses[nodeName]; ok { + access.DevicePaths = make([]string, len(allocationStatus.Devices)) + allocationStatus.Accesses[nodeName] = access + } + } + + foundDevices, err := nvme.NvmeListDevices() + if err != nil { + return nil, err + } + + for i, allocatedDevice := range allocationStatus.Devices { + findMatchingNvmeDevice := func() string { + for _, foundDevice := range foundDevices { + if allocatedDevice.NQN == foundDevice.NQN && allocatedDevice.NamespaceId == strconv.FormatUint(uint64(foundDevice.NSID), 10) { + return foundDevice.DevicePath + } + } + + return "" + } + + path := findMatchingNvmeDevice() + if path == "" { + return nil, dwsv1alpha2.NewResourceError("could not find device path for %v", allocatedDevice).WithError(err).WithMajor() + } + + allocationStatus.Accesses[nodeName].DevicePaths[i] = path + } + } + } + + return nil, nil + +} + +func (r *NnfNodeBlockStorageReconciler) deleteStorage(nodeBlockStorage *nnfv1alpha1.NnfNodeBlockStorage, index int) (*ctrl.Result, error) { + log := r.Log.WithValues("NnfNodeBlockStorage", types.NamespacedName{Name: nodeBlockStorage.Name, Namespace: nodeBlockStorage.Namespace}) + + ss := nnf.NewDefaultStorageService() + + allocationStatus := &nodeBlockStorage.Status.Allocations[index] + if allocationStatus.StoragePoolId == "" { + return nil, nil + } + + log.Info("Deleting storage pool", "Id", allocationStatus.StoragePoolId) + + err := r.deleteStoragePool(ss, allocationStatus.StoragePoolId) + if err != nil { + ecErr, ok := err.(*ec.ControllerError) + + // If the error is from a 404 error, then there's nothing to clean up and we + // assume everything has been deleted + if !ok || ecErr.StatusCode() != http.StatusNotFound { + nodeBlockStorage.Status.Error = dwsv1alpha2.NewResourceError("could not delete storage pool").WithError(err).WithFatal() + log.Info(nodeBlockStorage.Status.Error.Error()) + + return &ctrl.Result{Requeue: true}, nil + } + } + + return nil, nil +} + +func (r *NnfNodeBlockStorageReconciler) createStoragePool(ss nnf.StorageServiceApi, id string, capacity int64) (*sf.StoragePoolV150StoragePool, error) { + sp := &sf.StoragePoolV150StoragePool{ + Id: id, + CapacityBytes: capacity, + Oem: openapi.MarshalOem(nnf.AllocationPolicyOem{ + Policy: nnf.SpareAllocationPolicyType, + Compliance: nnf.RelaxedAllocationComplianceType, + }), + } + + if err := ss.StorageServiceIdStoragePoolIdPut(ss.Id(), id, sp); err != nil { + resourceErr := dwsv1alpha2.NewResourceError("could not allocate storage pool").WithError(err) + ecErr, ok := err.(*ec.ControllerError) + if ok { + switch ecErr.Cause() { + case "Insufficient capacity available": + return nil, resourceErr.WithUserMessage("insufficient capacity available").WithWLM().WithFatal() + default: + return nil, resourceErr + } + } + + return nil, resourceErr + } + + return sp, nil +} + +func (r *NnfNodeBlockStorageReconciler) getStoragePool(ss nnf.StorageServiceApi, id string) (*sf.StoragePoolV150StoragePool, error) { + sp := &sf.StoragePoolV150StoragePool{} + + if err := ss.StorageServiceIdStoragePoolIdGet(ss.Id(), id, sp); err != nil { + return nil, err + } + + return sp, nil +} + +func (r *NnfNodeBlockStorageReconciler) deleteStoragePool(ss nnf.StorageServiceApi, id string) error { + if err := ss.StorageServiceIdStoragePoolIdDelete(ss.Id(), id); err != nil { + return err + } + + return nil +} + +func (r *NnfNodeBlockStorageReconciler) getEndpoint(ss nnf.StorageServiceApi, id string) (*sf.EndpointV150Endpoint, error) { + ep := &sf.EndpointV150Endpoint{} + + if err := ss.StorageServiceIdEndpointIdGet(ss.Id(), id, ep); err != nil { + return nil, err + } + + return ep, nil +} + +func (r *NnfNodeBlockStorageReconciler) createStorageGroup(ss nnf.StorageServiceApi, id string, spID string, epID string) (*sf.StorageGroupV150StorageGroup, error) { + sp, err := r.getStoragePool(ss, spID) + if err != nil { + return nil, err + } + + ep, err := r.getEndpoint(ss, epID) + if err != nil { + return nil, err + } + + sg := &sf.StorageGroupV150StorageGroup{ + Id: id, + Links: sf.StorageGroupV150Links{ + StoragePool: sf.OdataV4IdRef{OdataId: sp.OdataId}, + ServerEndpoint: sf.OdataV4IdRef{OdataId: ep.OdataId}, + }, + } + + if err := ss.StorageServiceIdStorageGroupIdPut(ss.Id(), id, sg); err != nil { + return nil, err + } + + return sg, nil +} + +func (r *NnfNodeBlockStorageReconciler) getStorageGroup(ss nnf.StorageServiceApi, id string) (*sf.StorageGroupV150StorageGroup, error) { + sg := &sf.StorageGroupV150StorageGroup{} + + if err := ss.StorageServiceIdStorageGroupIdGet(ss.Id(), id, sg); err != nil { + return nil, err + } + + return sg, nil +} + +func (r *NnfNodeBlockStorageReconciler) deleteStorageGroup(ss nnf.StorageServiceApi, id string) error { + return ss.StorageServiceIdStorageGroupIdDelete(ss.Id(), id) +} + +// SetupWithManager sets up the controller with the Manager. +func (r *NnfNodeBlockStorageReconciler) SetupWithManager(mgr ctrl.Manager) error { + // nnf-ec is not thread safe, so we are limited to a single reconcile thread. + return ctrl.NewControllerManagedBy(mgr). + WithOptions(controller.Options{MaxConcurrentReconciles: 1}). + For(&nnfv1alpha1.NnfNodeBlockStorage{}). + Complete(r) +} diff --git a/internal/controller/nnf_node_storage_controller.go b/internal/controller/nnf_node_storage_controller.go index 926ae17f4..5fd2a0cbb 100644 --- a/internal/controller/nnf_node_storage_controller.go +++ b/internal/controller/nnf_node_storage_controller.go @@ -21,12 +21,8 @@ package controller import ( "context" - "crypto/md5" "fmt" - "net/http" "os" - "strconv" - "strings" "time" "github.com/go-logr/logr" @@ -37,14 +33,11 @@ import ( "k8s.io/mount-utils" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - ec "github.com/NearNodeFlash/nnf-ec/pkg/ec" - nnf "github.com/NearNodeFlash/nnf-ec/pkg/manager-nnf" - nnfserver "github.com/NearNodeFlash/nnf-ec/pkg/manager-server" + "github.com/NearNodeFlash/nnf-sos/pkg/lvm" - openapi "github.com/NearNodeFlash/nnf-ec/pkg/rfsf/pkg/common" + nnf "github.com/NearNodeFlash/nnf-ec/pkg/manager-nnf" sf "github.com/NearNodeFlash/nnf-ec/pkg/rfsf/pkg/models" dwsv1alpha2 "github.com/DataWorkflowServices/dws/api/v1alpha2" @@ -127,16 +120,18 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, nil } - for i := range nodeStorage.Status.Allocations { - // Release physical storage - result, err := r.deleteStorage(nodeStorage, i) - if err != nil { - return ctrl.Result{Requeue: true}, nil + /* + for i := range nodeStorage.Status.Allocations { + // Release physical storage + result, err := r.deleteStorage(nodeStorage, i) + if err != nil { + return ctrl.Result{Requeue: true}, nil + } + if result != nil { + return *result, nil + } } - if result != nil { - return *result, nil - } - } + */ controllerutil.RemoveFinalizer(nodeStorage, finalizerNnfNodeStorage) if err := r.Update(ctx, nodeStorage); err != nil { @@ -169,41 +164,27 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque // Initialize the status section with empty allocation statuses. if len(nodeStorage.Status.Allocations) == 0 { nodeStorage.Status.Allocations = make([]nnfv1alpha1.NnfNodeStorageAllocationStatus, nodeStorage.Spec.Count) - for i := range nodeStorage.Status.Allocations { - allocation := &nodeStorage.Status.Allocations[i] - - allocation.StoragePool.Status = nnfv1alpha1.ResourceStarting - allocation.StorageGroup.Status = nnfv1alpha1.ResourceStarting - allocation.FileSystem.Status = nnfv1alpha1.ResourceStarting - allocation.FileShare.Status = nnfv1alpha1.ResourceStarting + nodeStorage.Status.Allocations[i].Ready = false } + nodeStorage.Status.Ready = false return ctrl.Result{}, nil } + // Create the NnfNodeBlockStorage and wait for it to be ready + result, err := r.createBlockStorage(ctx, nodeStorage) + if err != nil { + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to create block storage").WithError(err).WithMajor() + } + if result != nil { + return *result, nil + } + // Loop through each allocation and create the storage for i := 0; i < nodeStorage.Spec.Count; i++ { - // Allocate physical storage - result, err := r.allocateStorage(nodeStorage, i) - if err != nil { - return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to allocate NVMe namespaces for allocation %v", i).WithError(err).WithMajor() - } - if result != nil { - return *result, nil - } - - // Create a block device in /dev that is accessible on the Rabbit node - result, err = r.createBlockDevice(ctx, nodeStorage, i) - if err != nil { - return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to attache NVMe namespace to Rabbit node for allocation %v", i).WithError(err).WithMajor() - } - if result != nil { - return *result, nil - } - // Format the block device from the Rabbit with a file system (if needed) - result, err = r.formatFileSystem(ctx, nodeStorage, i) + result, err := r.createAllocation(ctx, nodeStorage, i) if err != nil { return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to format file system for allocation %v", i).WithError(err).WithMajor() } @@ -212,164 +193,71 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque } } - if nodeStorage.Spec.SetOwnerGroup && nodeStorage.Status.OwnerGroupStatus != nnfv1alpha1.ResourceReady { - if nodeStorage.Status.OwnerGroupStatus == "" { - nodeStorage.Status.OwnerGroupStatus = nnfv1alpha1.ResourceStarting - - return ctrl.Result{}, nil - } - - if err := r.setLustreOwnerGroup(nodeStorage); err != nil { - return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to set owner and group for file system").WithError(err).WithMajor() - } - - nodeStorage.Status.OwnerGroupStatus = nnfv1alpha1.ResourceReady - } - return ctrl.Result{}, nil } -func (r *NnfNodeStorageReconciler) allocateStorage(nodeStorage *nnfv1alpha1.NnfNodeStorage, index int) (*ctrl.Result, error) { - log := r.Log.WithValues("NnfNodeStorage", types.NamespacedName{Name: nodeStorage.Name, Namespace: nodeStorage.Namespace}) +func (r *NnfNodeStorageReconciler) createBlockStorage(ctx context.Context, nnfNodeStorage *nnfv1alpha1.NnfNodeStorage) (*ctrl.Result, error) { + log := r.Log.WithValues("NnfNodeStorage", client.ObjectKeyFromObject(nnfNodeStorage)) - ss := nnf.NewDefaultStorageService() - - allocationStatus := &nodeStorage.Status.Allocations[index] - - storagePoolID := fmt.Sprintf("%s-%d", nodeStorage.Name, index) - sp, err := r.createStoragePool(ss, storagePoolID, nodeStorage.Spec.Capacity) - if err != nil { - allocationStatus.StoragePool.Status = nnfv1alpha1.ResourceFailed - return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create storage pool").WithError(err).WithMajor() - - } - - allocationStatus.StoragePool.Status = nnfv1alpha1.ResourceStatus(sp.Status) - allocationStatus.StoragePool.Health = nnfv1alpha1.ResourceHealth(sp.Status) - allocationStatus.CapacityAllocated = sp.CapacityBytes - - // If the SF ID is empty then we just created the resource. Save the ID in the NnfNodeStorage - if len(allocationStatus.StoragePool.ID) == 0 { - log.Info("Created storage pool", "Id", sp.Id) - allocationStatus.StoragePool.ID = sp.Id - - return &ctrl.Result{}, nil + nnfNodeBlockStorage := &nnfv1alpha1.NnfNodeBlockStorage{ + ObjectMeta: metav1.ObjectMeta{ + Name: nnfNodeStorage.Name, + Namespace: nnfNodeStorage.Namespace, + }, } - return nil, nil -} - -func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeStorage *nnfv1alpha1.NnfNodeStorage, index int) (*ctrl.Result, error) { - log := r.Log.WithValues("NnfNodeStorage", types.NamespacedName{Name: nodeStorage.Name, Namespace: nodeStorage.Namespace}) - ss := nnf.NewDefaultStorageService() + result, err := ctrl.CreateOrUpdate(ctx, r.Client, nnfNodeBlockStorage, + func() error { + dwsv1alpha2.InheritParentLabels(nnfNodeBlockStorage, nnfNodeStorage) + dwsv1alpha2.AddOwnerLabels(nnfNodeBlockStorage, nnfNodeStorage) - allocationStatus := &nodeStorage.Status.Allocations[index] + if len(nnfNodeBlockStorage.Spec.Allocations) == 0 { + nnfNodeBlockStorage.Spec.Allocations = make([]nnfv1alpha1.NnfNodeBlockStorageAllocationSpec, nnfNodeStorage.Spec.Count) + } - // Create a Storage Group if none is currently present. Recall that a Storage Group - // is a mapping from the Storage Pool to a Server Endpoint. Establishing a Storage - // Group makes block storage available on the server, which itself is a prerequisite to - // any file system built on top of the block storage. + if len(nnfNodeBlockStorage.Spec.Allocations) != nnfNodeStorage.Spec.Count { + return dwsv1alpha2.NewResourceError("block storage allocation count incorrect. found %v, expected %v", len(nnfNodeBlockStorage.Spec.Allocations), nnfNodeStorage.Spec.Count).WithFatal() + } - // Retrieve the collection of endpoints for us to map - serverEndpointCollection := &sf.EndpointCollectionEndpointCollection{} - if err := ss.StorageServiceIdEndpointsGet(ss.Id(), serverEndpointCollection); err != nil { - return nil, dwsv1alpha2.NewResourceError("could not get service endpoint").WithError(err).WithFatal() - } + rabbitNode := os.Getenv("NNF_NODE_NAME") - // Get the Storage resource to map between compute node name and - // endpoint index. - namespacedName := types.NamespacedName{ - Name: nodeStorage.Namespace, - Namespace: "default", - } + for i := range nnfNodeBlockStorage.Spec.Allocations { + nnfNodeBlockStorage.Spec.Allocations[i].Capacity = nnfNodeStorage.Spec.Capacity + if len(nnfNodeBlockStorage.Spec.Allocations[i].Access) == 0 { + nnfNodeBlockStorage.Spec.Allocations[i].Access = append(nnfNodeBlockStorage.Spec.Allocations[i].Access, rabbitNode) + } + } - storage := &dwsv1alpha2.Storage{} - err := r.Get(ctx, namespacedName, storage) + return ctrl.SetControllerReference(nnfNodeStorage, nnfNodeBlockStorage, r.Scheme) + }) if err != nil { - return nil, dwsv1alpha2.NewResourceError("could not read storage resource").WithError(err) - } + if !apierrors.IsConflict(err) { + return nil, err + } - // Build a list of all nodes with access to the storage - clients := []string{} - for _, server := range storage.Status.Access.Servers { - clients = append(clients, server.Name) + return &ctrl.Result{Requeue: true}, nil } - for _, compute := range storage.Status.Access.Computes { - clients = append(clients, compute.Name) + if result == controllerutil.OperationResultCreated { + log.Info("Created NnfNodeBlockStorage", "Name", nnfNodeBlockStorage.Name, "Namespace", nnfNodeBlockStorage.Namespace) + } else if result == controllerutil.OperationResultNone { + // no change + } else { + log.Info("Updated NnfNodeBlockStorage", "Name", nnfNodeBlockStorage.Name, "Namespace", nnfNodeBlockStorage.Namespace) } - // Make a list of all the endpoints and set whether they need a storage group based - // on the list of clients specified in the ClientEndpoints array - accessList := make([]bool, len(serverEndpointCollection.Members)) - for _, nodeName := range nodeStorage.Spec.ClientEndpoints[index].NodeNames { - for i, clientName := range clients { - if nodeName == clientName { - accessList[i] = true - } - } + if nnfNodeBlockStorage.Status.Error != nil { + nnfNodeStorage.Status.SetResourceError(nnfNodeBlockStorage.Status.Error) } - // Loop through the list of endpoints and delete the StorageGroup for endpoints where - // access==false, and create the StorageGroup for endpoints where access==true - for clientIndex, access := range accessList { - endpointRef := serverEndpointCollection.Members[clientIndex] - endpointID := endpointRef.OdataId[strings.LastIndex(endpointRef.OdataId, "/")+1:] - storageGroupID := fmt.Sprintf("%s-%d-%s", nodeStorage.Name, index, endpointID) - - // If the endpoint doesn't need a storage group, remove one if it exists - if access == false { - if _, err := r.getStorageGroup(ss, storageGroupID); err != nil { - continue - } - - if err := r.deleteStorageGroup(ss, storageGroupID); err != nil { - return nil, dwsv1alpha2.NewResourceError("could not delete storage group").WithError(err).WithMajor() - } - - log.Info("Deleted storage group", "storageGroupID", storageGroupID) - } else { - // The kind environment doesn't support endpoints beyond the Rabbit - if os.Getenv("ENVIRONMENT") == "kind" && endpointID != os.Getenv("RABBIT_NODE") { - continue - } - - endPoint, err := r.getEndpoint(ss, endpointID) - if err != nil { - return nil, dwsv1alpha2.NewResourceError("could not get endpoint").WithError(err).WithFatal() - } - - // Skip the endpoints that are not ready - if nnfv1alpha1.StaticResourceStatus(endPoint.Status) != nnfv1alpha1.ResourceReady { - continue - } - - sg, err := r.createStorageGroup(ss, storageGroupID, allocationStatus.StoragePool.ID, endpointID) - if err != nil { - allocationStatus.StorageGroup.Status = nnfv1alpha1.ResourceFailed - return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create storage group").WithError(err).WithMajor() - } - - allocationStatus.StorageGroup.Status = nnfv1alpha1.ResourceStatus(sg.Status) - allocationStatus.StorageGroup.Health = nnfv1alpha1.ResourceHealth(sg.Status) - - // If the SF ID is empty then we just created the resource. Save the ID in the NnfNodeStorage - if len(allocationStatus.StorageGroup.ID) == 0 { - log.Info("Created storage group", "Id", storageGroupID) - allocationStatus.StorageGroup.ID = sg.Id - - return &ctrl.Result{}, nil - } - } + if nnfNodeBlockStorage.Status.Ready == false { + return &ctrl.Result{}, nil } return nil, nil } -func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeStorage *nnfv1alpha1.NnfNodeStorage, index int) (*ctrl.Result, error) { - log := r.Log.WithValues("NnfNodeStorage", types.NamespacedName{Name: nodeStorage.Name, Namespace: nodeStorage.Namespace}) - ss := nnf.NewDefaultStorageService() - +func (r *NnfNodeStorageReconciler) createAllocation(ctx context.Context, nodeStorage *nnfv1alpha1.NnfNodeStorage, index int) (*ctrl.Result, error) { allocationStatus := &nodeStorage.Status.Allocations[index] // Check whether everything in the spec is filled in to make the FS. Lustre @@ -379,178 +267,65 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto return &ctrl.Result{}, nil } - // Find the Rabbit node endpoint to collect LNet information - endpoint, err := r.getEndpoint(ss, os.Getenv("RABBIT_NODE")) - if err != nil { - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not get endpoint").WithError(err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not get endpoint").WithError(err).WithMajor() - } - nnfStorageProfile, err := getPinnedStorageProfileFromLabel(ctx, r.Client, nodeStorage) if err != nil { - allocationStatus.FileSystem.Status = nnfv1alpha1.ResourceFailed - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not find pinned storage profile").WithError(err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{}, nil - } - - // Create the FileSystem - oem := nnfserver.FileSystemOem{ - Type: nodeStorage.Spec.FileSystemType, + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not find pinned storage profile").WithError(err).WithFatal() } - if oem.Type == "lustre" { - setLusCmdLines := func(c *nnfv1alpha1.NnfStorageProfileLustreCmdLines) { - oem.MkfsMount.Mkfs = c.Mkfs - oem.ZfsCmd.ZpoolCreate = c.ZpoolCreate + switch nodeStorage.Spec.FileSystemType { + case "raw": + // If the allocation was already completed successfully, there's no work to do + if allocationStatus.Ready == true { + break } - setLusOpts := func(c *nnfv1alpha1.NnfStorageProfileLustreMiscOptions) { - oem.MkfsMount.Mount = c.MountTarget + result, err := r.createLVMDevice(ctx, nodeStorage, index, nnfStorageProfile.Data.RawStorage.CmdLines) + if err != nil { + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create LVM device").WithError(err).WithMajor() } - - oem.Name = nodeStorage.Spec.LustreStorage.FileSystemName - oem.Lustre.Index = nodeStorage.Spec.LustreStorage.StartIndex + index - oem.Lustre.MgsNode = nodeStorage.Spec.LustreStorage.MgsNode - oem.Lustre.TargetType = nodeStorage.Spec.LustreStorage.TargetType - oem.Lustre.BackFs = nodeStorage.Spec.LustreStorage.BackFs - - switch nodeStorage.Spec.LustreStorage.TargetType { - case "MGT": - setLusCmdLines(&nnfStorageProfile.Data.LustreStorage.MgtCmdLines) - setLusOpts(&nnfStorageProfile.Data.LustreStorage.MgtOptions) - case "MDT": - setLusCmdLines(&nnfStorageProfile.Data.LustreStorage.MdtCmdLines) - setLusOpts(&nnfStorageProfile.Data.LustreStorage.MdtOptions) - case "MGTMDT": - setLusCmdLines(&nnfStorageProfile.Data.LustreStorage.MgtMdtCmdLines) - setLusOpts(&nnfStorageProfile.Data.LustreStorage.MgtMdtOptions) - case "OST": - setLusCmdLines(&nnfStorageProfile.Data.LustreStorage.OstCmdLines) - setLusOpts(&nnfStorageProfile.Data.LustreStorage.OstOptions) + if result != nil { + return result, err } - } - setCmdLines := func(c *nnfv1alpha1.NnfStorageProfileCmdLines) { - oem.MkfsMount.Mkfs = c.Mkfs - oem.LvmCmd.PvCreate = c.PvCreate - oem.LvmCmd.VgCreate = c.VgCreate - oem.LvmCmd.VgChange = nnfserver.FileSystemOemVgChange{ - Activate: c.VgChange.Activate, - Deactivate: c.VgChange.Deactivate, - LockStart: c.VgChange.LockStart, + break + case "xfs": + // If the allocation was already completed successfully, there's no work to do + if allocationStatus.Ready == true { + break } - oem.LvmCmd.VgRemove = c.VgRemove - oem.LvmCmd.LvCreate = c.LvCreate - oem.LvmCmd.LvRemove = c.LvRemove - } - - setOpts := func(c *nnfv1alpha1.NnfStorageProfileMiscOptions) { - oem.MkfsMount.Mount = c.MountRabbit - } - if oem.Type == "gfs2" { - // GFS2 requires a maximum of 16 alphanumeric, hyphen, or underscore characters. Allow up to 99 storage indicies and - // generate a simple MD5SUM hash value from the node storage name for the tail end. Although not guaranteed, this - // should reduce the likelihood of conflicts to a diminishingly small value. - checksum := md5.Sum([]byte(nodeStorage.Name)) - oem.Name = fmt.Sprintf("fs-%02d-%x", index, string(checksum[0:5])) - - // The cluster name is the "name" of the Rabbit, which is mapped to the node storage namespace (since NNF Node Storage - // is rabbit namespace scoped). - oem.Gfs2.ClusterName = nodeStorage.Namespace - setCmdLines(&nnfStorageProfile.Data.GFS2Storage.CmdLines) - setOpts(&nnfStorageProfile.Data.GFS2Storage.Options) - } - - if oem.Type == "xfs" { - setCmdLines(&nnfStorageProfile.Data.XFSStorage.CmdLines) - setOpts(&nnfStorageProfile.Data.XFSStorage.Options) - } - - if oem.Type == "raw" { - setCmdLines(&nnfStorageProfile.Data.RawStorage.CmdLines) - } - - fileSystemID := fmt.Sprintf("%s-%d", nodeStorage.Name, index) - fs, err := r.createFileSystem(ss, fileSystemID, allocationStatus.StoragePool.ID, oem) - if err != nil { - allocationStatus.FileSystem.Status = nnfv1alpha1.ResourceFailed - - return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create file system").WithError(err).WithMajor() - } - - allocationStatus.FileSystem.Status = nnfv1alpha1.ResourceReady - allocationStatus.FileSystem.Health = nnfv1alpha1.ResourceOkay - - // If the SF ID is empty then we just created the resource. Save the ID in the NnfNodeStorage - if len(allocationStatus.FileSystem.ID) == 0 { - log.Info("Created filesystem", "Id", fs.Id) - allocationStatus.FileSystem.ID = fs.Id - - return &ctrl.Result{}, nil - } - - // Create the FileShare - fileShareID := fmt.Sprintf("%s-%d", nodeStorage.Name, index) - - mountPath := "" - sh, err := r.getFileShare(ss, fileShareID, allocationStatus.FileSystem.ID) - if err == nil { - mountPath = sh.FileSharePath - } - - shareOptions := make(map[string]interface{}) - var volumeGroupName, logicalVolumeName string - if nodeStorage.Spec.FileSystemType == "lustre" { - targetIndex := nodeStorage.Spec.LustreStorage.StartIndex + index - mountPath = "/mnt/lustre/" + nodeStorage.Spec.LustreStorage.FileSystemName + "/" + nodeStorage.Spec.LustreStorage.TargetType + strconv.Itoa(targetIndex) - } else { - volumeGroupName, logicalVolumeName, err = r.lvmNames(ctx, nodeStorage, index) + result, err := r.createLVMDevice(ctx, nodeStorage, index, nnfStorageProfile.Data.XFSStorage.CmdLines) if err != nil { - allocationStatus.FileShare.Status = nnfv1alpha1.ResourceFailed - return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not get VG/LV names").WithError(err).WithFatal() + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create LVM device").WithError(err).WithMajor() } - - shareOptions["volumeGroupName"] = volumeGroupName - shareOptions["logicalVolumeName"] = logicalVolumeName - shareOptions["userID"] = int(nodeStorage.Spec.UserID) - shareOptions["groupID"] = int(nodeStorage.Spec.GroupID) - } - - sh, err = r.createFileShare(ss, fileShareID, allocationStatus.FileSystem.ID, os.Getenv("RABBIT_NODE"), mountPath, shareOptions) - if err != nil { - allocationStatus.FileShare.Status = nnfv1alpha1.ResourceFailed - return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create file share").WithError(err).WithMajor() - } - - nid := "" - if nidRaw, present := endpoint.Oem["LNetNids"]; present && nodeStorage.Spec.FileSystemType == "lustre" { - nidList := nidRaw.([]string) - if len(nidList) > 0 { - // TODO: If there are multiple LNet Nids, have a way to pick - // which network we want to use. - nid = nidList[0] + if result != nil { + return result, err } - } - allocationStatus.FileShare.Status = nnfv1alpha1.ResourceStatus(sh.Status) - allocationStatus.FileShare.Health = nnfv1alpha1.ResourceHealth(sh.Status) - nodeStorage.Status.LustreStorage.Nid = nid + break + case "gfs2": + // If the allocation was already completed successfully, there's no work to do + if allocationStatus.Ready == true { + break + } - // If the SF ID is empty then we just created the resource. Save the ID in the NnfNodeStorage - if len(allocationStatus.FileShare.ID) == 0 { - log.Info("Created file share", "Id", sh.Id) - allocationStatus.FileShare.ID = sh.Id - allocationStatus.VolumeGroup = volumeGroupName - allocationStatus.LogicalVolume = logicalVolumeName + result, err := r.createLVMDevice(ctx, nodeStorage, index, nnfStorageProfile.Data.GFS2Storage.CmdLines) + if err != nil { + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create LVM device").WithError(err).WithMajor() + } + if result != nil { + return result, err + } - return &ctrl.Result{}, nil + break + case "lustre": + break + default: + break } + allocationStatus.Ready = true + return nil, nil } @@ -602,274 +377,217 @@ func (r *NnfNodeStorageReconciler) setLustreOwnerGroup(nodeStorage *nnfv1alpha1. return nil } -func (r *NnfNodeStorageReconciler) deleteStorage(nodeStorage *nnfv1alpha1.NnfNodeStorage, index int) (*ctrl.Result, error) { - log := r.Log.WithValues("NnfNodeStorage", types.NamespacedName{Name: nodeStorage.Name, Namespace: nodeStorage.Namespace}) - - ss := nnf.NewDefaultStorageService() - - allocationStatus := &nodeStorage.Status.Allocations[index] - if allocationStatus.StoragePool.ID == "" { - return nil, nil - } - - log.Info("Deleting storage pool", "Id", allocationStatus.StoragePool.ID) - - err := r.deleteStoragePool(ss, allocationStatus.StoragePool.ID) +func (r *NnfNodeStorageReconciler) createLVMDevice(ctx context.Context, nnfNodeStorage *nnfv1alpha1.NnfNodeStorage, index int, cmdLines nnfv1alpha1.NnfStorageProfileCmdLines) (*ctrl.Result, error) { + pvs, result, err := r.createPhysicalVolumes(ctx, nnfNodeStorage, cmdLines, index) if err != nil { - ecErr, ok := err.(*ec.ControllerError) - - // If the error is from a 404 error, then there's nothing to clean up and we - // assume everything has been deleted - if !ok || ecErr.StatusCode() != http.StatusNotFound { - allocationStatus.FileShare.Status = nnfv1alpha1.ResourceFailed - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not delete storage pool").WithError(err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{Requeue: true}, nil - } + return nil, err } - allocationStatus.StoragePool.ID = "" - allocationStatus.StorageGroup.ID = "" - allocationStatus.FileSystem.ID = "" - allocationStatus.FileShare.ID = "" - allocationStatus.StoragePool.Status = nnfv1alpha1.ResourceDeleted - allocationStatus.StorageGroup.Status = nnfv1alpha1.ResourceDeleted - allocationStatus.FileSystem.Status = nnfv1alpha1.ResourceDeleted - allocationStatus.FileShare.Status = nnfv1alpha1.ResourceDeleted - allocationStatus.VolumeGroup = "" - allocationStatus.LogicalVolume = "" - nodeStorage.Status.LustreStorage.Nid = "" - - return &ctrl.Result{}, nil -} - -func (r *NnfNodeStorageReconciler) lvmNames(ctx context.Context, nodeStorage *nnfv1alpha1.NnfNodeStorage, index int) (string, string, error) { - labels := nodeStorage.GetLabels() - - workflowName, ok := labels[dwsv1alpha2.WorkflowNameLabel] - if !ok { - return "", "", fmt.Errorf("missing Workflow label on NnfNodeStorage") + if result != nil { + return result, nil } - workflowNamespace, ok := labels[dwsv1alpha2.WorkflowNamespaceLabel] - if !ok { - return "", "", fmt.Errorf("missing Workflow label on NnfNodeStorage") + vg, result, err := r.createVolumeGroup(ctx, nnfNodeStorage, cmdLines, index, pvs) + if err != nil { + return nil, err } - directiveIndex, ok := labels[nnfv1alpha1.DirectiveIndexLabel] - if !ok { - return "", "", fmt.Errorf("missing directive index label on NnfNodeStorage") + if result != nil { + return result, nil } - workflow := &dwsv1alpha2.Workflow{ - ObjectMeta: metav1.ObjectMeta{ - Name: workflowName, - Namespace: workflowNamespace, - }, + _, result, err = r.createLogicalVolume(ctx, nnfNodeStorage, cmdLines, index, vg) + if err != nil { + return nil, err } - if err := r.Get(ctx, client.ObjectKeyFromObject(workflow), workflow); err != nil { - return "", "", dwsv1alpha2.NewResourceError("could get workflow").WithError(err) + + if result != nil { + return result, nil } - return fmt.Sprintf("%s_%s_%d", workflow.GetUID(), directiveIndex, index), "lv", nil + return nil, nil } -func (r *NnfNodeStorageReconciler) isSpecComplete(nodeStorage *nnfv1alpha1.NnfNodeStorage) bool { - if nodeStorage.Spec.FileSystemType != "lustre" { - return true - } - - if nodeStorage.Spec.LustreStorage.TargetType == "MGT" || nodeStorage.Spec.LustreStorage.TargetType == "MGTMDT" { - return true +func (r *NnfNodeStorageReconciler) createPhysicalVolumes(ctx context.Context, nnfNodeStorage *nnfv1alpha1.NnfNodeStorage, cmdLines nnfv1alpha1.NnfStorageProfileCmdLines, index int) ([]lvm.PhysicalVolume, *ctrl.Result, error) { + nnfNodeBlockStorage := &nnfv1alpha1.NnfNodeBlockStorage{ + ObjectMeta: metav1.ObjectMeta{ + Name: nnfNodeStorage.GetName(), + Namespace: nnfNodeStorage.GetNamespace(), + }, } - if len(nodeStorage.Spec.LustreStorage.MgsNode) > 0 { - return true + err := r.Get(ctx, client.ObjectKeyFromObject(nnfNodeBlockStorage), nnfNodeBlockStorage) + if err != nil { + return []lvm.PhysicalVolume{}, nil, dwsv1alpha2.NewResourceError("could not get NnfNodeBlockStorage: %v", client.ObjectKeyFromObject(nnfNodeBlockStorage)).WithError(err).WithUserMessage("could not find storage allocation").WithMajor() } - return false -} + devices := nnfNodeBlockStorage.Status.Allocations[index].Accesses[os.Getenv("NNF_NODE_NAME")].DevicePaths + pvs := []lvm.PhysicalVolume{} -func (r *NnfNodeStorageReconciler) createStoragePool(ss nnf.StorageServiceApi, id string, capacity int64) (*sf.StoragePoolV150StoragePool, error) { - sp := &sf.StoragePoolV150StoragePool{ - Id: id, - CapacityBytes: capacity, - Oem: openapi.MarshalOem(nnf.AllocationPolicyOem{ - Policy: nnf.SpareAllocationPolicyType, - Compliance: nnf.RelaxedAllocationComplianceType, - }), - } - - if err := ss.StorageServiceIdStoragePoolIdPut(ss.Id(), id, sp); err != nil { - resourceErr := dwsv1alpha2.NewResourceError("could not allocate storage pool").WithError(err) - ecErr, ok := err.(*ec.ControllerError) - if ok { - switch ecErr.Cause() { - case "Insufficient capacity available": - return nil, resourceErr.WithUserMessage("insufficient capacity available").WithWLM().WithFatal() - default: - return nil, resourceErr - } + for _, device := range devices { + pv := lvm.NewPhysicalVolume(ctx, device) + _, err := pv.Create(ctx, cmdLines.PvCreate) + if err != nil { + return []lvm.PhysicalVolume{}, nil, dwsv1alpha2.NewResourceError("could not create physical volume: %s", device).WithError(err).WithMajor() } - - return nil, resourceErr + pvs = append(pvs, *pv) } - return sp, nil + return pvs, nil, nil } -func (r *NnfNodeStorageReconciler) getStoragePool(ss nnf.StorageServiceApi, id string) (*sf.StoragePoolV150StoragePool, error) { - sp := &sf.StoragePoolV150StoragePool{} - - if err := ss.StorageServiceIdStoragePoolIdGet(ss.Id(), id, sp); err != nil { - return nil, err +func (r *NnfNodeStorageReconciler) createVolumeGroup(ctx context.Context, nnfNodeStorage *nnfv1alpha1.NnfNodeStorage, cmdLines nnfv1alpha1.NnfStorageProfileCmdLines, index int, pvs []lvm.PhysicalVolume) (*lvm.VolumeGroup, *ctrl.Result, error) { + vgName, err := r.volumeGroupName(ctx, nnfNodeStorage, index) + if err != nil { + return nil, nil, dwsv1alpha2.NewResourceError("could not get volume group name").WithError(err).WithMajor() } - return sp, nil -} + vg := lvm.NewVolumeGroup(ctx, vgName, pvs) + _, err = vg.Create(ctx, cmdLines.VgCreate) + if err != nil { + return nil, nil, dwsv1alpha2.NewResourceError("could not create volume group").WithError(err).WithMajor() + } -func (r *NnfNodeStorageReconciler) deleteStoragePool(ss nnf.StorageServiceApi, id string) error { - if err := ss.StorageServiceIdStoragePoolIdDelete(ss.Id(), id); err != nil { - return err + if len(cmdLines.VgChange.LockStart) > 0 { + _, err = vg.Change(ctx, cmdLines.VgChange.LockStart) + if err != nil { + return nil, nil, dwsv1alpha2.NewResourceError("could not start volume group lock").WithError(err).WithMajor() + } } - return nil + return vg, nil, nil } -func (r *NnfNodeStorageReconciler) getEndpoint(ss nnf.StorageServiceApi, id string) (*sf.EndpointV150Endpoint, error) { - ep := &sf.EndpointV150Endpoint{} +func (r *NnfNodeStorageReconciler) createLogicalVolume(ctx context.Context, nnfNodeStorage *nnfv1alpha1.NnfNodeStorage, cmdLines nnfv1alpha1.NnfStorageProfileCmdLines, index int, vg *lvm.VolumeGroup) (*lvm.LogicalVolume, *ctrl.Result, error) { + lv := lvm.NewLogicalVolume(ctx, "lv", vg) - if err := ss.StorageServiceIdEndpointIdGet(ss.Id(), id, ep); err != nil { - return nil, err + _, err := lv.Create(ctx, cmdLines.LvCreate) + if err != nil { + return nil, nil, dwsv1alpha2.NewResourceError("could not create logical volume").WithError(err).WithMajor() } - return ep, nil + return lv, nil, nil } -func (r *NnfNodeStorageReconciler) createStorageGroup(ss nnf.StorageServiceApi, id string, spID string, epID string) (*sf.StorageGroupV150StorageGroup, error) { - sp, err := r.getStoragePool(ss, spID) - if err != nil { - return nil, err - } +func (r *NnfNodeStorageReconciler) volumeGroupName(ctx context.Context, nodeStorage *nnfv1alpha1.NnfNodeStorage, index int) (string, error) { + labels := nodeStorage.GetLabels() - ep, err := r.getEndpoint(ss, epID) - if err != nil { - return nil, err + workflowName, ok := labels[dwsv1alpha2.WorkflowNameLabel] + if !ok { + return "", fmt.Errorf("missing Workflow label on NnfNodeStorage") } - sg := &sf.StorageGroupV150StorageGroup{ - Id: id, - Links: sf.StorageGroupV150Links{ - StoragePool: sf.OdataV4IdRef{OdataId: sp.OdataId}, - ServerEndpoint: sf.OdataV4IdRef{OdataId: ep.OdataId}, - }, + workflowNamespace, ok := labels[dwsv1alpha2.WorkflowNamespaceLabel] + if !ok { + return "", fmt.Errorf("missing Workflow label on NnfNodeStorage") } - if err := ss.StorageServiceIdStorageGroupIdPut(ss.Id(), id, sg); err != nil { - return nil, err + directiveIndex, ok := labels[nnfv1alpha1.DirectiveIndexLabel] + if !ok { + return "", fmt.Errorf("missing directive index label on NnfNodeStorage") } - return sg, nil -} - -func (r *NnfNodeStorageReconciler) getStorageGroup(ss nnf.StorageServiceApi, id string) (*sf.StorageGroupV150StorageGroup, error) { - sg := &sf.StorageGroupV150StorageGroup{} - - if err := ss.StorageServiceIdStorageGroupIdGet(ss.Id(), id, sg); err != nil { - return nil, err + workflow := &dwsv1alpha2.Workflow{ + ObjectMeta: metav1.ObjectMeta{ + Name: workflowName, + Namespace: workflowNamespace, + }, + } + if err := r.Get(ctx, client.ObjectKeyFromObject(workflow), workflow); err != nil { + return "", dwsv1alpha2.NewResourceError("could get workflow").WithError(err) } - return sg, nil + return fmt.Sprintf("%s_%s_%d", workflow.GetUID(), directiveIndex, index), nil } -func (r *NnfNodeStorageReconciler) deleteStorageGroup(ss nnf.StorageServiceApi, id string) error { - return ss.StorageServiceIdStorageGroupIdDelete(ss.Id(), id) -} +/* +func (r *NnfNodeStorageReconciler) createFileSystemOnLVM(ctx context.Context, nnfNodeStorage *nnfv1alpha1.NnfNodeStorage, index int) (result *ctrl.Result, err error) { + vgName := nnfNodeStorage.Status.Allocations[index].VolumeGroup + lvName := nnfNodeStorage.Status.Allocations[index].LogicalVolume -func (r *NnfNodeStorageReconciler) createFileShare(ss nnf.StorageServiceApi, id string, fsID string, epID string, mountPath string, options map[string]interface{}) (*sf.FileShareV120FileShare, error) { - fs, err := r.getFileSystem(ss, fsID) + result, err = lvm.activateLogicalVolume(ctx, vgName, lvName, args) if err != nil { return nil, err } - ep, err := r.getEndpoint(ss, epID) - if err != nil { - return nil, err + if result != nil { + return result, nil } - sh := &sf.FileShareV120FileShare{ - Id: id, - FileSharePath: mountPath, - Oem: options, - Links: sf.FileShareV120Links{ - FileSystem: sf.OdataV4IdRef{OdataId: fs.OdataId}, - Endpoint: sf.OdataV4IdRef{OdataId: ep.OdataId}, - }, - } + // Deactivate the logical volume on the way out. ClientMounts will activate it on the Rabbit + // again if needed for data movement or user containers + defer func() { + if err != nil || result != nil { + return res, err + } - if err := ss.StorageServiceIdFileSystemIdExportedShareIdPut(ss.Id(), id, fs.Id, sh); err != nil { - return nil, err - } + return lvm.deactivateLogicalVolume(ctx, vgName, lvName, args) + }() - return sh, nil + return r.createFileSystem(ctx, nnfNodeStorage, index, lvm.DevicePath()) } -func (r *NnfNodeStorageReconciler) getFileShare(ss nnf.StorageServiceApi, id string, fsID string) (*sf.FileShareV120FileShare, error) { - fs, err := r.getFileSystem(ss, fsID) - if err != nil { - return nil, err - } +func (r *NnfNodeStorageReconciler) createFileSystem(ctx context.Context, nnfNodeStorage *nnfv1alpha1.NnfNodeStorage, index int, device string) (result *ctrl.Result, err error) { - sh := &sf.FileShareV120FileShare{} + varHandler := var_handler.NewVarHandler(map[string]string{ + "$DEVICE": device, + }) - if err := ss.StorageServiceIdFileSystemIdExportedShareIdGet(ss.Id(), fs.Id, id, sh); err != nil { - return nil, err + switch nnfNodeStorage.Spec.FileSystemType { + case "gfs2": + checksum := md5.Sum([]byte(nnfNodeStorage.Name)) + lockSpace := fmt.Sprintf("fs-%02d-%x", index, string(checksum[0:5])) + + varHandler.AddVar("$CLUSTER_NAME", nnfNodeStorage.Namespace) + varHandler.AddVar("$PROTOCOL", "lock_dlm") + varHandler.AddVar("$LOCK_SPACE", lockSpace) + default: } - return sh, nil -} + mkfsArgs := varHandler.ReplaceAll(cmdArgs.Mkfs) -func (r *NnfNodeStorageReconciler) createFileSystem(ss nnf.StorageServiceApi, id string, spID string, oem nnfserver.FileSystemOem) (*sf.FileSystemV122FileSystem, error) { - sp, err := r.getStoragePool(ss, spID) + // Check whether the device has been formatted + output, err := r.run("wipefs --output TYPE --parsable %s", device) if err != nil { return nil, err } - if oem.Name == "" { - oem.Name = id - } + if output != nnfNodeStorage.Spec.FileSystemType { + if output != "" { + return nil, dwsv1alpha2.NewResourceError("device contains unexpected format '%s'", output).WithFatal() + } - fs := &sf.FileSystemV122FileSystem{ - Id: id, - Links: sf.FileSystemV122Links{ - StoragePool: sf.OdataV4IdRef{OdataId: sp.OdataId}, - }, - Oem: openapi.MarshalOem(oem), - } + _, err := r.run("mkfs.%s -O %s", nnfNodeStorage.Spec.FileSystemType, mkfsArgs) + if err != nil { + return nil, dwsv1alpha2.NewResourceError("unable to format file system").WithError(err).WithMajor() + } - if err := ss.StorageServiceIdFileSystemIdPut(ss.Id(), id, fs); err != nil { - return nil, err + return &reconcile.Result{Requeue: true}, nil } - - return fs, nil } +*/ + +func (r *NnfNodeStorageReconciler) isSpecComplete(nodeStorage *nnfv1alpha1.NnfNodeStorage) bool { + if nodeStorage.Spec.FileSystemType != "lustre" { + return true + } -func (r *NnfNodeStorageReconciler) getFileSystem(ss nnf.StorageServiceApi, id string) (*sf.FileSystemV122FileSystem, error) { - fs := &sf.FileSystemV122FileSystem{} + if nodeStorage.Spec.LustreStorage.TargetType == "MGT" || nodeStorage.Spec.LustreStorage.TargetType == "MGTMDT" { + return true + } - if err := ss.StorageServiceIdFileSystemIdGet(ss.Id(), id, fs); err != nil { - return nil, err + if len(nodeStorage.Spec.LustreStorage.MgsNode) > 0 { + return true } - return fs, nil + return false } // SetupWithManager sets up the controller with the Manager. func (r *NnfNodeStorageReconciler) SetupWithManager(mgr ctrl.Manager) error { // nnf-ec is not thread safe, so we are limited to a single reconcile thread. return ctrl.NewControllerManagedBy(mgr). - WithOptions(controller.Options{MaxConcurrentReconciles: 1}). For(&nnfv1alpha1.NnfNodeStorage{}). + Owns(&nnfv1alpha1.NnfNodeBlockStorage{}). Complete(r) } diff --git a/internal/controller/nnf_persistentstorageinstance_controller.go b/internal/controller/nnf_persistentstorageinstance_controller.go index 96fe633af..c8a34b77c 100644 --- a/internal/controller/nnf_persistentstorageinstance_controller.go +++ b/internal/controller/nnf_persistentstorageinstance_controller.go @@ -199,7 +199,7 @@ func (r *PersistentStorageReconciler) Reconcile(ctx context.Context, req ctrl.Re var complete bool = true // Status section should be usable now, check for Ready for _, set := range nnfStorage.Status.AllocationSets { - if set.Status != "Ready" { + if set.Ready == false { complete = false } } diff --git a/internal/controller/nnf_storage_controller.go b/internal/controller/nnf_storage_controller.go index 89ba6b122..a34e65bfc 100644 --- a/internal/controller/nnf_storage_controller.go +++ b/internal/controller/nnf_storage_controller.go @@ -157,9 +157,9 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) if len(storage.Status.AllocationSets) != len(storage.Spec.AllocationSets) { storage.Status.AllocationSets = make([]nnfv1alpha1.NnfStorageAllocationSetStatus, len(storage.Spec.AllocationSets)) for i := range storage.Status.AllocationSets { - storage.Status.AllocationSets[i].Status = nnfv1alpha1.ResourceStarting + storage.Status.AllocationSets[i].Ready = false } - storage.Status.Status = nnfv1alpha1.ResourceStarting + storage.Status.Ready = false return ctrl.Result{}, nil } @@ -200,30 +200,13 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Wait for all the allocation sets to be ready for _, allocationSet := range storage.Status.AllocationSets { - if allocationSet.Status != nnfv1alpha1.ResourceReady { + if allocationSet.Ready == false { return ctrl.Result{RequeueAfter: time.Minute}, nil } } - // For Lustre, the owner and group have to be set once all the Lustre targets - // have completed. This is done on the Rabbit node that hosts OST 0. - for i, allocationSet := range storage.Spec.AllocationSets { - if allocationSet.TargetType != "OST" { - continue - } - - res, err := r.setLustreOwnerGroup(ctx, storage, i) - if err != nil { - return ctrl.Result{}, err - } - - if res != nil { - return *res, nil - } - } - // All allocation sets are ready and the owner/group is set - storage.Status.Status = nnfv1alpha1.ResourceReady + storage.Status.Ready = true return ctrl.Result{}, nil } @@ -332,16 +315,17 @@ func (r *NnfStorageReconciler) createNodeStorage(ctx context.Context, storage *n nnfNodeStorage.Spec.LustreStorage.TargetType = "MDT" } - // Create the list of client endpoints for each allocation and initialize it with - // the rabbit node endpoint - if len(nnfNodeStorage.Spec.ClientEndpoints) == 0 { - nnfNodeStorage.Spec.ClientEndpoints = make([]nnfv1alpha1.ClientEndpointsSpec, node.Count) - for k := range nnfNodeStorage.Spec.ClientEndpoints { - nnfNodeStorage.Spec.ClientEndpoints[k].AllocationIndex = k - nnfNodeStorage.Spec.ClientEndpoints[k].NodeNames = append(nnfNodeStorage.Spec.ClientEndpoints[k].NodeNames, node.Name) + /* + // Create the list of client endpoints for each allocation and initialize it with + // the rabbit node endpoint + if len(nnfNodeStorage.Spec.ClientEndpoints) == 0 { + nnfNodeStorage.Spec.ClientEndpoints = make([]nnfv1alpha1.ClientEndpointsSpec, node.Count) + for k := range nnfNodeStorage.Spec.ClientEndpoints { + nnfNodeStorage.Spec.ClientEndpoints[k].AllocationIndex = k + nnfNodeStorage.Spec.ClientEndpoints[k].NodeNames = append(nnfNodeStorage.Spec.ClientEndpoints[k].NodeNames, node.Name) + } } - } - + */ if nnfNodeStorage.Spec.LustreStorage.TargetType == "MDT" || nnfNodeStorage.Spec.LustreStorage.TargetType == "OST" { if len(allocationSet.ExternalMgsNid) > 0 { nnfNodeStorage.Spec.LustreStorage.MgsNode = allocationSet.ExternalMgsNid @@ -378,10 +362,6 @@ func (r *NnfStorageReconciler) createNodeStorage(ctx context.Context, storage *n // for the NnfStorage. func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, storage *nnfv1alpha1.NnfStorage, allocationSetIndex int) (*ctrl.Result, error) { allocationSet := &storage.Status.AllocationSets[allocationSetIndex] - - var health nnfv1alpha1.NnfResourceHealthType = nnfv1alpha1.ResourceOkay - var status nnfv1alpha1.NnfResourceStatusType = nnfv1alpha1.ResourceReady - allocationSet.AllocationCount = 0 nnfNodeStorageList := &nnfv1alpha1.NnfNodeStorageList{} @@ -398,86 +378,23 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s // Ensure that we found all the NnfNodeStorage resources we were expecting if len(nnfNodeStorageList.Items) != len(storage.Spec.AllocationSets[allocationSetIndex].Nodes) { - status = nnfv1alpha1.ResourceStarting + return &ctrl.Result{}, nil } for _, nnfNodeStorage := range nnfNodeStorageList.Items { - if nnfNodeStorage.Spec.LustreStorage.TargetType == "MGT" || nnfNodeStorage.Spec.LustreStorage.TargetType == "MGTMDT" { - storage.Status.MgsNode = nnfNodeStorage.Status.LustreStorage.Nid - } - - // Wait until the status section of the nnfNodeStorage has been initialized - if len(nnfNodeStorage.Status.Allocations) != nnfNodeStorage.Spec.Count { - // Set the Status to starting unless we've found a failure in one - // of the earlier nnfNodeStorages - startingStatus := nnfv1alpha1.ResourceStarting - startingStatus.UpdateIfWorseThan(&status) - allocationSet.Status = status - allocationSet.Health = health - - return &ctrl.Result{}, nil - } - - for _, nodeAllocation := range nnfNodeStorage.Status.Allocations { - if nodeAllocation.CapacityAllocated > 0 { - allocationSet.AllocationCount++ - } - - nodeAllocation.StoragePool.Health.UpdateIfWorseThan(&health) - nodeAllocation.StorageGroup.Health.UpdateIfWorseThan(&health) - nodeAllocation.FileSystem.Health.UpdateIfWorseThan(&health) - nodeAllocation.FileShare.Health.UpdateIfWorseThan(&health) - - nodeAllocation.StoragePool.Status.UpdateIfWorseThan(&status) - nodeAllocation.StorageGroup.Status.UpdateIfWorseThan(&status) - nodeAllocation.FileSystem.Status.UpdateIfWorseThan(&status) - nodeAllocation.FileShare.Status.UpdateIfWorseThan(&status) - } + //if nnfNodeStorage.Spec.LustreStorage.TargetType == "MGT" || nnfNodeStorage.Spec.LustreStorage.TargetType == "MGTMDT" { + // storage.Status.MgsNode = nnfNodeStorage.Status.LustreStorage.Nid + //} if nnfNodeStorage.Status.Error != nil { storage.Status.SetResourceError(nnfNodeStorage.Status.Error) } - } - - allocationSet.Health = health - allocationSet.Status = status - - return nil, nil -} - -// setLustreOwnerGroup sets the "SetOwnerGroup" field in the NnfNodeStorage for OST 0 in a Lustre -// file system. This tells the node controller on the Rabbit to mount the Lustre file system and set -// the owner and group. -func (r *NnfStorageReconciler) setLustreOwnerGroup(ctx context.Context, storage *nnfv1alpha1.NnfStorage, allocationSetIndex int) (*ctrl.Result, error) { - allocationSet := storage.Spec.AllocationSets[allocationSetIndex] - - if len(allocationSet.Nodes) == 0 { - return nil, nil - } - nnfNodeStorage := &nnfv1alpha1.NnfNodeStorage{ - ObjectMeta: metav1.ObjectMeta{ - Name: nnfNodeStorageName(storage, allocationSetIndex, 0), - Namespace: allocationSet.Nodes[0].Name, - }, - } - - if err := r.Get(ctx, client.ObjectKeyFromObject(nnfNodeStorage), nnfNodeStorage); err != nil { - return nil, err - } - - if !nnfNodeStorage.Spec.SetOwnerGroup { - nnfNodeStorage.Spec.SetOwnerGroup = true - - if err := r.Update(ctx, nnfNodeStorage); err != nil { - return nil, err + if nnfNodeStorage.Status.Ready == false { + return &ctrl.Result{}, nil } } - if nnfNodeStorage.Status.OwnerGroupStatus != nnfv1alpha1.ResourceReady { - return &ctrl.Result{}, nil - } - return nil, nil } diff --git a/internal/controller/nnf_workflow_controller.go b/internal/controller/nnf_workflow_controller.go index d8ee34e41..4762bc60b 100644 --- a/internal/controller/nnf_workflow_controller.go +++ b/internal/controller/nnf_workflow_controller.go @@ -457,7 +457,7 @@ func (r *NnfWorkflowReconciler) finishSetupState(ctx context.Context, workflow * return Requeue("error").withObject(nnfStorage), nil } - if nnfStorage.Status.Status != nnfv1alpha1.ResourceReady { + if nnfStorage.Status.Ready == false { // RequeueAfter is necessary for persistent storage that isn't owned by this workflow return Requeue("allocation set not ready").after(2 * time.Second).withObject(nnfStorage), nil } diff --git a/pkg/command/cmd.go b/pkg/command/cmd.go new file mode 100644 index 000000000..8f811c389 --- /dev/null +++ b/pkg/command/cmd.go @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package command + +import "os/exec" + +func Run(args string) (string, error) { + output, err := exec.Command("bash", "-c", args).Output() + if err != nil { + return "", err + } + + return string(output), nil +} diff --git a/pkg/lvm/logical_volumes.go b/pkg/lvm/logical_volumes.go new file mode 100644 index 000000000..affa74068 --- /dev/null +++ b/pkg/lvm/logical_volumes.go @@ -0,0 +1,86 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package lvm + +import ( + "context" + "fmt" + "strings" + + "github.com/NearNodeFlash/nnf-sos/pkg/command" + "github.com/NearNodeFlash/nnf-sos/pkg/var_handler" +) + +type LogicalVolume struct { + Name string + VolumeGroup *VolumeGroup +} + +func NewLogicalVolume(ctx context.Context, name string, vg *VolumeGroup) *LogicalVolume { + return &LogicalVolume{ + Name: name, + VolumeGroup: vg, + } +} + +func (lv *LogicalVolume) parseArgs(args string) (string, error) { + deviceNames := []string{} + for _, pv := range lv.VolumeGroup.PhysicalVolumes { + deviceNames = append(deviceNames, pv.Device) + } + + // Initialize the VarHandler substitution variables + varHandler := var_handler.NewVarHandler(map[string]string{ + "$DEVICE_NUM": fmt.Sprintf("%d", len(deviceNames)), + "$DEVICE_LIST": strings.Join(deviceNames, " "), + "$VG_NAME": lv.VolumeGroup.Name, + "$LV_NAME": lv.Name, + }) + + if err := varHandler.ListToVars("$DEVICE_LIST", "$DEVICE"); err != nil { + return "", fmt.Errorf("invalid internal device list: %w", err) + } + + return varHandler.ReplaceAll(args), nil +} + +func (lv *LogicalVolume) Create(ctx context.Context, rawArgs string) (bool, error) { + args, err := lv.parseArgs(rawArgs) + if err != nil { + return false, err + } + + existingLVs, err := lvsListVolumes(ctx) + if err != nil { + return false, err + } + + for _, existingLV := range existingLVs { + if existingLV.Name == lv.Name { + return false, nil + } + } + + if _, err := command.Run(fmt.Sprintf("lvcreate --yes %s", args)); err != nil { + return false, fmt.Errorf("could not create logical volume: %w", err) + } + + return true, nil +} diff --git a/pkg/lvm/lvs.go b/pkg/lvm/lvs.go new file mode 100644 index 000000000..45a2d182e --- /dev/null +++ b/pkg/lvm/lvs.go @@ -0,0 +1,64 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package lvm + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/NearNodeFlash/nnf-sos/pkg/command" +) + +type lvsOutput struct { + Report []lvsReport `json:"report"` +} + +type lvsReport struct { + LV []lvsLogicalVolume `json:"lv"` +} + +type lvsLogicalVolume struct { + Name string `json:"lv_name"` + VGName string `json:"vg_name"` + Attrs string `json:"lv_attr"` + Size string `json:"lv_size"` +} + +func lvsListVolumes(ctx context.Context) ([]lvsLogicalVolume, error) { + output, err := command.Run("lvs --reportformat json") + if err != nil { + return nil, fmt.Errorf("could not list logical volumes: %w", err) + } + + lvsOutput := lvsOutput{} + + if err := json.Unmarshal([]byte(output), &lvsOutput); err != nil { + return nil, err + } + + // If there are multiple reports, combine all the logical volumes into a single list + logicalVolumes := []lvsLogicalVolume{} + for _, report := range lvsOutput.Report { + logicalVolumes = append(logicalVolumes, report.LV...) + } + + return logicalVolumes, nil +} diff --git a/pkg/lvm/physical_volumes.go b/pkg/lvm/physical_volumes.go new file mode 100644 index 000000000..ac37e29cd --- /dev/null +++ b/pkg/lvm/physical_volumes.go @@ -0,0 +1,75 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package lvm + +import ( + "context" + "fmt" + + "github.com/NearNodeFlash/nnf-sos/pkg/command" + "github.com/NearNodeFlash/nnf-sos/pkg/var_handler" +) + +type PhysicalVolume struct { + Device string +} + +func NewPhysicalVolume(ctx context.Context, device string) *PhysicalVolume { + return &PhysicalVolume{ + Device: device, + } +} + +func (pv *PhysicalVolume) parseArgs(args string, device string) (string, error) { + + // Initialize the VarHandler substitution variables + varHandler := var_handler.NewVarHandler(map[string]string{ + "$DEVICE": device, + }) + + return varHandler.ReplaceAll(args), nil +} + +func (pv *PhysicalVolume) Create(ctx context.Context, rawArgs string) (bool, error) { + existingPVs, err := pvsListVolumes(ctx) + if err != nil { + return false, err + } + + for _, existingPV := range existingPVs { + if existingPV.Name == pv.Device { + return false, nil + } + } + + args, err := pv.parseArgs(rawArgs, pv.Device) + if err != nil { + return false, err + } + + // No existing LVM PV found. Create one + if _, err := command.Run(fmt.Sprintf("pvcreate %s", args)); err != nil { + if err != nil { + return false, fmt.Errorf("could not create LVM physical volume: %w", err) + } + } + + return true, nil +} diff --git a/pkg/lvm/pvs.go b/pkg/lvm/pvs.go new file mode 100644 index 000000000..0412d8d08 --- /dev/null +++ b/pkg/lvm/pvs.go @@ -0,0 +1,64 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package lvm + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/NearNodeFlash/nnf-sos/pkg/command" +) + +type pvsOutput struct { + Report []pvsReport `json:"report"` +} + +type pvsReport struct { + LV []pvsPhysicalVolume `json:"pv"` +} + +type pvsPhysicalVolume struct { + Name string `json:"pv_name"` + VGName string `json:"vg_name"` + Attrs string `json:"pv_attr"` + Size string `json:"pv_size"` +} + +func pvsListVolumes(ctx context.Context) ([]pvsPhysicalVolume, error) { + output, err := command.Run("pvs --reportformat json") + if err != nil { + return nil, fmt.Errorf("could not list physical volumes: %w", err) + } + + pvsOutput := pvsOutput{} + + if err := json.Unmarshal([]byte(output), &pvsOutput); err != nil { + return nil, err + } + + // If there are multiple reports, combine all the physical volumes into a single list + physicalVolumes := []pvsPhysicalVolume{} + for _, report := range pvsOutput.Report { + physicalVolumes = append(physicalVolumes, report.LV...) + } + + return physicalVolumes, nil +} diff --git a/pkg/lvm/vgs.go b/pkg/lvm/vgs.go new file mode 100644 index 000000000..8bb6463e1 --- /dev/null +++ b/pkg/lvm/vgs.go @@ -0,0 +1,65 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package lvm + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/NearNodeFlash/nnf-sos/pkg/command" +) + +type vgsOutput struct { + Report []vgsReport `json:"report"` +} + +type vgsReport struct { + LV []vgsVolumeGroup `json:"vg"` +} + +type vgsVolumeGroup struct { + Name string `json:"vg_name"` + PVCount string `json:"pv_count"` + LVCount string `json:"lv_count"` + Attrs string `json:"vg_attr"` + Size string `json:"vg_size"` +} + +func vgsListVolumes(ctx context.Context) ([]vgsVolumeGroup, error) { + output, err := command.Run("vgs --reportformat json") + if err != nil { + return nil, fmt.Errorf("could not list volume groups: %w", err) + } + + vgsOutput := vgsOutput{} + + if err := json.Unmarshal([]byte(output), &vgsOutput); err != nil { + return nil, err + } + + // If there are multiple reports, combine all the volume groups into a single list + volumeGroups := []vgsVolumeGroup{} + for _, report := range vgsOutput.Report { + volumeGroups = append(volumeGroups, report.LV...) + } + + return volumeGroups, nil +} diff --git a/pkg/lvm/volume_groups.go b/pkg/lvm/volume_groups.go new file mode 100644 index 000000000..e9eab6018 --- /dev/null +++ b/pkg/lvm/volume_groups.go @@ -0,0 +1,99 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package lvm + +import ( + "context" + "fmt" + "strings" + + "github.com/NearNodeFlash/nnf-sos/pkg/command" + "github.com/NearNodeFlash/nnf-sos/pkg/var_handler" +) + +type VolumeGroup struct { + Name string + PhysicalVolumes []PhysicalVolume + Shared bool +} + +func NewVolumeGroup(ctx context.Context, name string, pvs []PhysicalVolume) *VolumeGroup { + return &VolumeGroup{ + Name: name, + PhysicalVolumes: pvs, + } +} + +func (vg *VolumeGroup) parseArgs(args string) (string, error) { + deviceNames := []string{} + for _, pv := range vg.PhysicalVolumes { + deviceNames = append(deviceNames, pv.Device) + } + + // Initialize the VarHandler substitution variables + varHandler := var_handler.NewVarHandler(map[string]string{ + "$DEVICE_NUM": fmt.Sprintf("%d", len(deviceNames)), + "$DEVICE_LIST": strings.Join(deviceNames, " "), + "$VG_NAME": vg.Name, + }) + + if err := varHandler.ListToVars("$DEVICE_LIST", "$DEVICE"); err != nil { + return "", fmt.Errorf("invalid internal device list: %w", err) + } + + return varHandler.ReplaceAll(args), nil +} + +func (vg *VolumeGroup) Create(ctx context.Context, rawArgs string) (bool, error) { + args, err := vg.parseArgs(rawArgs) + if err != nil { + return false, err + } + + existingVGs, err := vgsListVolumes(ctx) + if err != nil { + return false, err + } + + for _, existingVG := range existingVGs { + if existingVG.Name == vg.Name { + return false, nil + } + } + + if _, err := command.Run(fmt.Sprintf("lvcreate --shared %s", args)); err != nil { + return false, fmt.Errorf("could not create volume group: %w", err) + } + + return false, nil +} + +func (vg *VolumeGroup) Change(ctx context.Context, rawArgs string) (bool, error) { + args, err := vg.parseArgs(rawArgs) + if err != nil { + return false, err + } + + if _, err := command.Run(fmt.Sprintf("vgchange %s", args)); err != nil { + return false, err + } + + return true, nil +} diff --git a/pkg/nvme/nvme.go b/pkg/nvme/nvme.go new file mode 100644 index 000000000..6d193d13e --- /dev/null +++ b/pkg/nvme/nvme.go @@ -0,0 +1,74 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nvme + +import ( + "encoding/json" + + "github.com/NearNodeFlash/nnf-sos/pkg/command" +) + +type NvmeDevice struct { + DevicePath string + NSID uint32 + NQN string +} + +type nvmeListVerboseNamespaces struct { + Device string `json:"NameSpace"` + NSID uint32 `json:"NSID"` +} + +type nvmeListVerboseControllers struct { + Namespaces []nvmeListVerboseNamespaces `json:"Namespaces"` +} + +type nvmeListVerboseDevice struct { + SubsystemNQN string `json:"SubsystemNQN"` + Controllers []nvmeListVerboseControllers `json:"Controllers"` +} + +type nvmeListVerboseDevices struct { + Devices []nvmeListVerboseDevice `json:"Devices"` +} + +func NvmeListDevices() ([]NvmeDevice, error) { + devices := []NvmeDevice{} + + data, err := command.Run("nvme list -v --output-format=json") + if err != nil { + return nil, err + } + + foundDevices := nvmeListVerboseDevices{} + if err := json.Unmarshal([]byte(data), &foundDevices); err != nil { + return nil, err + } + + for _, device := range foundDevices.Devices { + for _, controller := range device.Controllers { + for _, namespace := range controller.Namespaces { + devices = append(devices, NvmeDevice{DevicePath: "/dev/" + namespace.Device, NSID: namespace.NSID, NQN: device.SubsystemNQN}) + } + } + } + + return devices, nil +} diff --git a/pkg/var_handler/var_handler.go b/pkg/var_handler/var_handler.go new file mode 100644 index 000000000..05c7f231c --- /dev/null +++ b/pkg/var_handler/var_handler.go @@ -0,0 +1,60 @@ +/* + * Copyright 2022 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package var_handler + +import ( + "fmt" + "strings" +) + +type VarHandler struct { + VarMap map[string]string +} + +func NewVarHandler(vars map[string]string) *VarHandler { + v := &VarHandler{} + v.VarMap = vars + return v +} + +func (v *VarHandler) AddVar(name string, value string) { + v.VarMap[name] = value +} + +// ListToVars splits the value of one of its variables, and creates a new +// indexed variable for each of the items in the split. +func (v *VarHandler) ListToVars(listVarName, newVarPrefix string) error { + theList, ok := v.VarMap[listVarName] + if !ok { + return fmt.Errorf("Unable to find the variable named %s", listVarName) + } + + for i, val := range strings.Split(theList, " ") { + v.VarMap[fmt.Sprintf("%s%d", newVarPrefix, i+1)] = val + } + return nil +} + +func (v *VarHandler) ReplaceAll(s string) string { + for key, value := range v.VarMap { + s = strings.ReplaceAll(s, key, value) + } + return s +} diff --git a/pkg/var_handler/var_handler_test.go b/pkg/var_handler/var_handler_test.go new file mode 100644 index 000000000..5b88f00a4 --- /dev/null +++ b/pkg/var_handler/var_handler_test.go @@ -0,0 +1,73 @@ +/* + * Copyright 2022 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package var_handler + +import ( + "testing" +) + +func TestVarHandler(t *testing.T) { + varMap := map[string]string{ + "$BIGDOG": "Jules", + "$GREYDOG": "Henri", + } + + v := NewVarHandler(varMap) + // Add a late-arriving variable. + v.VarMap["$FAVTOY"] = "rope" + + in1 := "The big dog is $BIGDOG, the little dog is $GREYDOG. $BIGDOG and $GREYDOG are best friends and their favorite toy is the $FAVTOY." + want1 := "The big dog is Jules, the little dog is Henri. Jules and Henri are best friends and their favorite toy is the rope." + out1 := v.ReplaceAll(in1) + if out1 != want1 { + t.Errorf("Did not get the desired result. Got (%s)", out1) + } + + // Change a variable. + v.VarMap["$FAVTOY"] = "ball" + in2 := "$BIGDOG likes the $FAVTOY." + want2 := "Jules likes the ball." + out2 := v.ReplaceAll(in2) + if out2 != want2 { + t.Errorf("Did not get desired result. Got (%s)", out2) + } + + // Delete a variable. + delete(v.VarMap, "$FAVTOY") + in3 := "$GREYDOG's favorite toy was the $FAVTOY." + want3 := "Henri's favorite toy was the $FAVTOY." + out3 := v.ReplaceAll(in3) + if out3 != want3 { + t.Errorf("Did not get desired result. Got (%s)", out3) + } + + // Add a list to turn into variables. + v.VarMap["$DEVICE_LIST"] = "/dev/nvme0n1 /dev/nvme1n1 /dev/nvme0n2 /dev/nvme1n2" + if err := v.ListToVars("$DEVICE_LIST", "$DEVICE"); err != nil { + t.Errorf("Did not split list: %v", err) + } else { + in4 := "zpool mirror $DEVICE1 $DEVICE2 mirror $DEVICE3 $DEVICE4" + want4 := "zpool mirror /dev/nvme0n1 /dev/nvme1n1 mirror /dev/nvme0n2 /dev/nvme1n2" + out4 := v.ReplaceAll(in4) + if out4 != want4 { + t.Errorf("Did not get desired result. Got (%s)", out4) + } + } +}