From d10dcbaa32a87b22a89c5b6c509bbc2c622c5740 Mon Sep 17 00:00:00 2001 From: rbo54 Date: Fri, 8 Oct 2021 16:29:21 -0400 Subject: [PATCH 1/7] Changes for podAffinity, retry of delete volume attachments error, and processing SkipArrayConnectionCheck only on a node noexec taint. --- cmd/podmon/main.go | 2 +- internal/k8sapi/k8sapi.go | 2 +- internal/monitor/controller.go | 194 +++++++++--- internal/monitor/features/controller.feature | 55 ++-- internal/monitor/monitor_steps_test.go | 310 ++++++++++++++++++- internal/monitor/monitor_test.go | 29 ++ test/sh/scaleup-powerflex.sh | 2 +- 7 files changed, 522 insertions(+), 72 deletions(-) diff --git a/cmd/podmon/main.go b/cmd/podmon/main.go index 51cf2d6..557d0ec 100644 --- a/cmd/podmon/main.go +++ b/cmd/podmon/main.go @@ -160,7 +160,7 @@ func main() { go ArrayConnMonitorFc() } // monitor all the nodes with no label required - go StartNodeMonitorFn(K8sAPI, k8sapi.K8sClient.Client, "", "", monitor.MonitorRestartTimeDelay) + // XXX go StartNodeMonitorFn(K8sAPI, k8sapi.K8sClient.Client, "", "", monitor.MonitorRestartTimeDelay) } // monitor the pods with the designated label key/value diff --git a/internal/k8sapi/k8sapi.go b/internal/k8sapi/k8sapi.go index 1c92a6a..7cf57d6 100644 --- a/internal/k8sapi/k8sapi.go +++ b/internal/k8sapi/k8sapi.go @@ -127,7 +127,7 @@ func (api *Client) GetCachedVolumeAttachment(ctx context.Context, pvName, nodeNa vaKey := fmt.Sprintf("%s/%s", *va.Spec.Source.PersistentVolumeName, va.Spec.NodeName) api.volumeAttachmentCache[vaKey] = vaCopy api.volumeAttachmentNameToKey[vaCopy.ObjectMeta.Name] = vaKey - log.Infof("Adding VA Cache %s %s", vaCopy.ObjectMeta.Name, vaKey) + log.Debugf("Adding VA Cache %s %s", vaCopy.ObjectMeta.Name, vaKey) } } return api.volumeAttachmentCache[key], nil diff --git a/internal/monitor/controller.go b/internal/monitor/controller.go index aa38513..dbfaa0c 100644 --- a/internal/monitor/controller.go +++ b/internal/monitor/controller.go @@ -20,13 +20,14 @@ import ( "sync" "time" + "podmon/internal/k8sapi" + "github.com/container-storage-interface/spec/lib/go/csi" csiext "github.com/dell/dell-csi-extensions/podmon" log "github.com/sirupsen/logrus" v1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" "k8s.io/apimachinery/pkg/watch" - "podmon/internal/k8sapi" ) // MaxCrashLoopBackOffRetry is the maximum number of times for a pod to be deleted in response to a CrashLoopBackOff @@ -34,12 +35,16 @@ const MaxCrashLoopBackOffRetry = 5 //ControllerPodInfo has information for tracking health of the system type ControllerPodInfo struct { // information controller keeps on hand about a pod - PodKey string // the Pod Key (namespace/name) of the pod - Node *v1.Node // the associated node structure - PodUID string // the pod container's UID - ArrayIDs []string // string of array IDs used by the pod's volumes + PodKey string // the Pod Key (namespace/name) of the pod + Node *v1.Node // the associated node structure + PodUID string // the pod container's UID + ArrayIDs []string // string of array IDs used by the pod's volumes + PodAffinityLabels map[string]string // A list of pod affinity labels for the pod } +const notFound = "not found" +const hostNameTopologyKey = "kubernetes.io/hostname" + // controllerModePodHandler handles controller mode functionality when a pod event happens func (cm *PodMonitorType) controllerModePodHandler(pod *v1.Pod, eventType watch.EventType) error { log.Debugf("podMonitorHandler-controller: name %s/%s node %s message %s reason %s event %v", @@ -106,12 +111,17 @@ func (cm *PodMonitorType) controllerModePodHandler(pod *v1.Pod, eventType watch. if err != nil { log.Errorf("Could not determine pod to arrayIDs: %s", err) } + podAffinityLabels := cm.getPodAffinityLabels(pod) + if len(podAffinityLabels) > 0 { + log.Infof("podKey %s podAffinityLabels %v", podKey, podAffinityLabels) + } podUID := string(pod.ObjectMeta.UID) podInfo := &ControllerPodInfo{ - PodKey: podKey, - Node: node, - PodUID: podUID, - ArrayIDs: arrayIDs, + PodKey: podKey, + Node: node, + PodUID: podUID, + ArrayIDs: arrayIDs, + PodAffinityLabels: podAffinityLabels, } cm.PodKeyToControllerPodInfo.Store(podKey, podInfo) // Delete (reset) the CrashLoopBackOff counter since we're running. @@ -122,7 +132,7 @@ func (cm *PodMonitorType) controllerModePodHandler(pod *v1.Pod, eventType watch. pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, pod.Spec.NodeName, initialized, ready, taintnosched, taintnoexec, taintpodmon) // TODO: option for taintnosched vs. taintnoexec if (taintnoexec || taintnosched || taintpodmon) && !ready { - go cm.controllerCleanupPod(pod, node, "NodeFailure", taintpodmon) + go cm.controllerCleanupPod(pod, node, "NodeFailure", taintnoexec, taintpodmon) } else if !ready && crashLoopBackOff { cnt, _ := cm.PodKeyToCrashLoopBackOffCount.LoadOrStore(podKey, 0) crashLoopBackOffCount := cnt.(int) @@ -144,11 +154,12 @@ func (cm *PodMonitorType) controllerModePodHandler(pod *v1.Pod, eventType watch. } // Attempts to cleanup a Pod that is in trouble. Returns true if made it all the way to deleting the pod. -func (cm *PodMonitorType) controllerCleanupPod(pod *v1.Pod, node *v1.Node, reason string, taintpodmon bool) bool { +func (cm *PodMonitorType) controllerCleanupPod(pod *v1.Pod, node *v1.Node, reason string, taintnoexec, taintpodmon bool) bool { fields := make(map[string]interface{}) fields["namespace"] = pod.ObjectMeta.Namespace fields["pod"] = pod.ObjectMeta.Name fields["node"] = node.ObjectMeta.Name + fields["reason"] = reason // Lock so that only one thread is processing pod at a time podKey := getPodKey(pod) // Single thread processing of this pod @@ -205,12 +216,17 @@ func (cm *PodMonitorType) controllerCleanupPod(pod *v1.Pod, node *v1.Node, reaso } // Call the driver to validate the volumes are not in use - if cm.CSIExtensionsPresent && !cm.SkipArrayConnectionValidation { - if CSIApi.Connected() { - log.WithFields(fields).Infof("Validating host connectivity for node %s volumes %v", node.ObjectMeta.Name, volIDs) - connected, iosInProgress, err := cm.callValidateVolumeHostConnectivity(node, volIDs, true) - // Don't consider connected status if taintpodmon is set, because the node may just have come back online. - if (connected && !taintpodmon) || iosInProgress || err != nil { + if cm.CSIExtensionsPresent && CSIApi.Connected() { + log.WithFields(fields).Infof("Validating host connectivity for node %s volumes %v", node.ObjectMeta.Name, volIDs) + connected, iosInProgress, err := cm.callValidateVolumeHostConnectivity(node, volIDs, true) + // Don't consider connected status if taintpodmon is set, because the node may just have come back online. + if (connected && !taintpodmon) || iosInProgress || err != nil { + fields["connected"] = connected + fields["iosInProgress"] = iosInProgress + // If SkipArrayConnectionValidation and taintnoexec are set, proceed anyway + if cm.SkipArrayConnectionValidation && taintnoexec { + log.WithFields(fields).Info("SkipArrayConnectionValidation is set and taintnoexec is true- proceeding") + } else { log.WithFields(fields).Info("Aborting pod cleanup because array still connected and/or recently did I/O") if err = K8sAPI.CreateEvent(podmon, pod, k8sapi.EventTypeWarning, reason, "podmon aborted pod cleanup %s array connected or recent I/O", @@ -221,7 +237,7 @@ func (cm *PodMonitorType) controllerCleanupPod(pod *v1.Pod, node *v1.Node, reaso } } } else { - log.WithFields(fields).Infof("Skipped array connection validation") + log.WithFields(fields).Error("Array validation check skipped because CSIApi not connected") } // Fence all the volumes @@ -255,8 +271,11 @@ func (cm *PodMonitorType) controllerCleanupPod(pod *v1.Pod, node *v1.Node, reaso for _, vaName := range vaNamesToDelete { err = K8sAPI.DeleteVolumeAttachment(ctx, vaName) if err != nil { - log.WithFields(fields).Errorf("Couldn't delete VolumeAttachment: %s", vaName) - return false + err = K8sAPI.DeleteVolumeAttachment(ctx, vaName) + if err != nil && !strings.Contains(err.Error(), notFound) { + log.WithFields(fields).Errorf("Couldn't delete VolumeAttachment- aborting after retry: %s: %s", vaName, err.Error()) + return false + } } } @@ -354,8 +373,12 @@ func (cm *PodMonitorType) podToArrayIDs(pod *v1.Pod) ([]string, error) { // If connectivity is lost, will initiate cleanup of the pods. // This is a never ending function, intended to be called as Go routine. func (cm *PodMonitorType) ArrayConnectivityMonitor() { + // Loop through all the monitored Pods making sure they still have array access for { + podKeysToClean := make([]string, 0) + nodesToTaint := make(map[string]bool) + // Clear the connectivity cache so it will sample again. connectivityCache.ResetSampled() // Internal function for iterating PodKeyToControllerPodInfo @@ -363,8 +386,6 @@ func (cm *PodMonitorType) ArrayConnectivityMonitor() { fnPodKeyToControllerPodInfo := func(key, value interface{}) bool { controllerPodInfo := value.(*ControllerPodInfo) podKey := controllerPodInfo.PodKey - podNamespace, podName := splitPodKey(podKey) - podUID := controllerPodInfo.PodUID node := controllerPodInfo.Node // Check if we have connectivity for all our array ids @@ -377,23 +398,53 @@ func (cm *PodMonitorType) ArrayConnectivityMonitor() { } } if !connected { - // Fetch the pod. - ctx, cancel := K8sAPI.GetContext(MediumTimeout) - defer cancel() - pod, err := K8sAPI.GetPod(ctx, podNamespace, podName) - if err == nil { - if string(pod.ObjectMeta.UID) == podUID && pod.Spec.NodeName == node.ObjectMeta.Name { - log.Infof("Cleaning up pod %s/%s because of array connectivity loss", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name) - cm.controllerCleanupPod(pod, node, "ArrayConnectionLost", false) - } else { - log.Infof("Skipping pod %s/%s podUID %s %s node %s %s", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, - string(pod.ObjectMeta.UID), podUID, pod.Spec.NodeName, node.ObjectMeta.Name) - } - } + nodesToTaint[node.ObjectMeta.Name] = true + podKeysToClean = append(podKeysToClean, podKey) } return true } + + // Process all the pods, generating the associated connectivity cache entries cm.PodKeyToControllerPodInfo.Range(fnPodKeyToControllerPodInfo) + + // Taint all the nodes that were not connected + for nodeName := range nodesToTaint { + log.Infof("Tainting node %s because of connectivity loss", nodeName) + err := taintNode(nodeName, false) + if err != nil { + log.Errorf("Unable to taint node: %s: %s", nodeName, err.Error()) + } + } + + // Cleanup pods that are on the tainted nodes. + for _, podKey := range podKeysToClean { + // Fetch the pod. + info, ok := cm.PodKeyToControllerPodInfo.Load(podKey) + if !ok { + continue + } + podInfo := info.(*ControllerPodInfo) + if len(podInfo.PodAffinityLabels) > 0 { + // Process all the pods with affinity together + log.Infof("Processing pods with affinity %v", podInfo.PodAffinityLabels) + for _, podKey := range podKeysToClean { + // Fetch the pod. + infox, ok := cm.PodKeyToControllerPodInfo.Load(podKey) + if !ok { + continue + } + podInfox := infox.(*ControllerPodInfo) + if mapEqualsMap(podInfo.PodAffinityLabels, podInfox.PodAffinityLabels) { + cm.ProcessPodInfoForCleanup(podInfox, "ArrayConnectivityLoss") + } + } + log.Infof("End Processing pods with affinity %v", podInfo.PodAffinityLabels) + } else { + cm.ProcessPodInfoForCleanup(podInfo, "ArrayConnectivityLoss") + } + } + + // Sleep according to the NODE_CONNECTIVITY_POLL_RATE pollRate := GetArrayConnectivityPollRate() time.Sleep(pollRate) if pollRate < 10*time.Millisecond { @@ -403,6 +454,23 @@ func (cm *PodMonitorType) ArrayConnectivityMonitor() { } } +// ProcessPodInfoForCleanup processes a ControllerPodInfo for cleanup, checking that the UID and object are the same, and then calling controllerCleanupPod. +func (cm *PodMonitorType) ProcessPodInfoForCleanup(podInfo *ControllerPodInfo, reason string) { + podNamespace, podName := splitPodKey(podInfo.PodKey) + ctx, cancel := K8sAPI.GetContext(MediumTimeout) + defer cancel() + pod, err := K8sAPI.GetPod(ctx, podNamespace, podName) + if err == nil { + if string(pod.ObjectMeta.UID) == podInfo.PodUID && pod.Spec.NodeName == podInfo.Node.ObjectMeta.Name { + log.Infof("Cleaning up pod %s/%s because of %s", reason, pod.ObjectMeta.Namespace, pod.ObjectMeta.Name) + cm.controllerCleanupPod(pod, podInfo.Node, reason, false, false) + } else { + log.Infof("Skipping pod %s/%s podUID %s %s node %s %s", pod.ObjectMeta.Namespace, pod.ObjectMeta.Name, + string(pod.ObjectMeta.UID), podInfo.PodUID, pod.Spec.NodeName, podInfo.Node.ObjectMeta.Name) + } + } +} + type nodeArrayConnectivityCache struct { initOnce sync.Once // Will be set after initialization nodeArrayConnectivitySampled map[string]bool // If true, already sampled, if need to call array to verify connectivity @@ -500,3 +568,59 @@ func nodeHasTaint(node *v1.Node, key string, taintEffect v1.TaintEffect) bool { } return false } + +// getPodAffinityLabels returns nil if no node affinity is specified. If node affinity is specified, +// podPodAffinity returns a map of podLabels for pods the specificed pod should have affinity with. +func (cm *PodMonitorType) getPodAffinityLabels(pod *v1.Pod) map[string]string { + result := make(map[string]string) + affinity := pod.Spec.Affinity + if affinity == nil { + return result + } + podAffinity := affinity.PodAffinity + if podAffinity == nil { + return result + } + requiredDuringSchedulingIgnoredDuringExecution := podAffinity.RequiredDuringSchedulingIgnoredDuringExecution + if requiredDuringSchedulingIgnoredDuringExecution == nil { + return result + } + for _, schedConstraints := range requiredDuringSchedulingIgnoredDuringExecution { + topologyKey := schedConstraints.TopologyKey + if topologyKey != hostNameTopologyKey { + continue + } + labelSelector := schedConstraints.LabelSelector + if labelSelector == nil { + continue + } + matchLabels := labelSelector.MatchLabels + for k, v := range matchLabels { + result[k] = v + } + for _, matchExpr := range labelSelector.MatchExpressions { + if matchExpr.Operator != "In" { + continue + } + for _, v := range matchExpr.Values { + result[matchExpr.Key] = v + } + } + + } + return result +} + +// mapEqualsMap returns true IFF string map1 contains the same elements as map2 +func mapEqualsMap(map1, map2 map[string]string) bool { + if len(map1) != len(map2) { + return false + } + for k1, v1 := range map1 { + v2, ok := map2[k1] + if !ok || v2 != v1 { + return false + } + } + return true +} diff --git a/internal/monitor/features/controller.feature b/internal/monitor/features/controller.feature index 4c17061..7c6caa2 100644 --- a/internal/monitor/features/controller.feature +++ b/internal/monitor/features/controller.feature @@ -28,7 +28,7 @@ Feature: Controller Monitor @controller-mode Scenario Outline: test controllerModePodHandler Given a controller monitor "vxflex" - And a pod for node with volumes condition + And a pod for node with volumes condition affinity And a node with taint And I induce error When I call controllerModePodHandler with event @@ -37,24 +37,25 @@ Feature: Controller Monitor And the last log message contains Examples: - | podnode | nvol | condition | nodetaint | error | eventtype | cleaned | info | errormsg | - | "node1" | 2 | "Initialized" | "noexec" | "none" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | - | "node1" | 2 | "NotReady" | "noexec" | "none" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | - | "node1" | 2 | "NotReady" | "nosched" | "none" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | - | "node1" | 2 | "CrashLoop" | "none" | "none" | "Updated" | "false" | "false" | "cleaning up CrashLoopBackOff pod" | - | "node1" | 2 | "NotReady" | "nosched" | "none" | "Deleted" | "false" | "false" | "none" | - | "node1" | 2 | "Ready" | "none" | "none" | "Updated" | "false" | "true" | "none" | - | "node1" | 2 | "NotReady" | "noexec" | "GetPod" | "Updated" | "false" | "false" | "GetPod failed" | - | "node1" | 2 | "NotReady" | "noexec" | "GetNode" | "Updated" | "false" | "false" | "GetNode failed" | - | "node1" | 2 | "Ready" | "noexec" | "CreateEvent" | "Updated" | "false" | "true" | "none" | - | "node1" | 2 | "NotReady" | "noexec" | "CreateEvent" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | - | "node1" | 2 | "CrashLoop" | "noexec" | "CreateEvent" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | - | "node1" | 2 | "Initialized" | "noexec" | "CreateEvent" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | + | podnode | nvol | condition | affin | nodetaint | error | eventtype | cleaned | info | errormsg | + | "node1" | 2 | "Initialized" | "false" | "noexec" | "none" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | + | "node1" | 2 | "NotReady" | "false" | "noexec" | "none" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | + | "node1" | 2 | "NotReady" | "false" | "nosched" | "none" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | + | "node1" | 2 | "CrashLoop" | "false" | "none" | "none" | "Updated" | "false" | "false" | "cleaning up CrashLoopBackOff pod" | + | "node1" | 2 | "NotReady" | "false" | "nosched" | "none" | "Deleted" | "false" | "false" | "none" | + | "node1" | 2 | "Ready" | "false" | "none" | "none" | "Updated" | "false" | "true" | "none" | + | "node1" | 2 | "Ready" | "true" | "none" | "none" | "Updated" | "false" | "true" | "none" | + | "node1" | 2 | "NotReady" | "false" | "noexec" | "GetPod" | "Updated" | "false" | "false" | "GetPod failed" | + | "node1" | 2 | "NotReady" | "false" | "noexec" | "GetNode" | "Updated" | "false" | "false" | "GetNode failed" | + | "node1" | 2 | "Ready" | "false" | "noexec" | "CreateEvent" | "Updated" | "false" | "true" | "none" | + | "node1" | 2 | "NotReady" | "false" | "noexec" | "CreateEvent" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | + | "node1" | 2 | "CrashLoop" | "false" | "noexec" | "CreateEvent" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | + | "node1" | 2 | "Initialized" | "false" | "noexec" | "CreateEvent" | "Updated" | "true" | "false" | "Successfully cleaned up pod" | @controller-mode Scenario Outline: test ArrayConnectivityMonitor Given a controller monitor "vxflex" - And a pod for node with volumes condition + And a pod for node with volumes condition affinity And I induce error When I call controllerModePodHandler with event "Updated" And I call ArrayConnectivityMonitor @@ -62,8 +63,24 @@ Feature: Controller Monitor And the last log message contains Examples: - | podnode | nvol | condition | error | cleaned | errormsg | - | "node1" | 2 | "Ready" | "NodeConnected" | "false" | "Connected true" | - | "node1" | 2 | "Ready" | "NodeNotConnected" | "true" | "Successfully cleaned up pod" | - | "node1" | 2 | "Ready" | "CreateEvent" | "true" | "Successfully cleaned up pod" | + | podnode | nvol | condition | affin | error | cleaned | errormsg | + | "node1" | 2 | "Ready" | "true" | "NodeNotConnected" | "true" | "none" | + | "node1" | 2 | "Ready" | "false" | "NodeConnected" | "false" | "Connected true" | + | "node1" | 2 | "Ready" | "false" | "NodeNotConnected" | "true" | "Successfully cleaned up pod" | + | "node1" | 2 | "Ready" | "false" | "CreateEvent" | "true" | "Successfully cleaned up pod" | + @controller-mode + Scenario Outline: test PodAffinityLabels + Given a controller pod with podaffinitylabels + And create a pod for node with volumes condition affinity errorcase + And I induce error + When I call getPodAffinityLabels + Then the pod is cleaned + + Examples: + | podnode | nvol | condition | affin | nodetaint | error | errorcase | cleaned | + | "node1" | 2 | "Ready" | "true" | "none" | "none" | "podaffinity" | "false" | + | "node1" | 2 | "Ready" | "true" | "none" | "none" | "topology" | "false" | + | "node1" | 2 | "Ready" | "true" | "none" | "none" | "required" | "false" | + | "node1" | 2 | "Ready" | "true" | "none" | "none" | "labelselector" | "false" | + | "node1" | 2 | "Ready" | "true" | "none" | "none" | "operator" | "false" | diff --git a/internal/monitor/monitor_steps_test.go b/internal/monitor/monitor_steps_test.go index c778943..5361b01 100644 --- a/internal/monitor/monitor_steps_test.go +++ b/internal/monitor/monitor_steps_test.go @@ -15,6 +15,17 @@ import ( "context" "errors" "fmt" + "os" + "path/filepath" + "podmon/internal/criapi" + "podmon/internal/csiapi" + "podmon/internal/k8sapi" + "podmon/internal/utils" + "strconv" + "strings" + "sync" + "time" + "github.com/cucumber/godog" "github.com/dell/gofsutil" log "github.com/sirupsen/logrus" @@ -22,21 +33,12 @@ import ( "github.com/stretchr/testify/assert" v1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/uuid" "k8s.io/apimachinery/pkg/watch" "k8s.io/client-go/kubernetes/fake" cri "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" - "os" - "path/filepath" - "podmon/internal/criapi" - "podmon/internal/csiapi" - "podmon/internal/k8sapi" - "podmon/internal/utils" - "strconv" - "strings" - "sync" - "time" ) const ( @@ -49,6 +51,7 @@ type feature struct { loghook *logtest.Hook // Kubernetes objects pod *v1.Pod + pod2 *v1.Pod node *v1.Node // PodmonMonitorType podmonMonitor *PodMonitorType @@ -109,6 +112,7 @@ func (f *feature) aControllerMonitor(driver string) error { gofsutil.UseMockFS() RemoveDir = f.mockRemoveDir f.badWatchObject = false + f.pod2 = nil return nil } @@ -120,9 +124,23 @@ func (f *feature) mockRemoveDir(_ string) error { } func (f *feature) aPodForNodeWithVolumesCondition(node string, nvolumes int, condition string) error { - pod := f.createPod(node, nvolumes, condition) + pod := f.createPod(node, nvolumes, condition, "false") + f.pod = pod + f.k8sapiMock.AddPod(pod) + return nil +} + +func (f *feature) aPodForNodeWithVolumesConditionAffinity(node string, nvolumes int, condition, affinity string) error { + pod := f.createPod(node, nvolumes, condition, affinity) f.pod = pod f.k8sapiMock.AddPod(pod) + // If affinity, create a second pod with affinity to the first + if affinity == "true" { + f.pod2 = f.createPod(node, nvolumes, condition, affinity) + f.pod2.ObjectMeta.Name = "affinityPod" + f.k8sapiMock.AddPod(f.pod2) + fmt.Printf("Added affinitPod\n") + } return nil } @@ -136,7 +154,7 @@ func (f *feature) iHaveAPodsForNodeWithVolumesDevicesCondition(nPods int, nodeNa } }() for i := 0; i < nPods; i++ { - pod := f.createPod(nodeName, nvolumes, condition) + pod := f.createPod(nodeName, nvolumes, condition, "false") f.k8sapiMock.AddPod(pod) f.podList[i] = pod @@ -179,7 +197,7 @@ func (f *feature) iHaveAPodsForNodeWithVolumesDevicesCondition(nPods int, nodeNa func (f *feature) iCallControllerCleanupPodForNode(nodeName string) error { node, _ := f.k8sapiMock.GetNode(context.Background(), nodeName) f.node = node - f.success = f.podmonMonitor.controllerCleanupPod(f.pod, node, "Unit Test", false) + f.success = f.podmonMonitor.controllerCleanupPod(f.pod, node, "Unit Test", false, false) return nil } @@ -363,6 +381,9 @@ func (f *feature) iCallControllerModePodHandlerWithEvent(event string) error { eventType = watch.Error } f.err = f.podmonMonitor.controllerModePodHandler(f.pod, eventType) + if f.pod2 != nil { + f.podmonMonitor.controllerModePodHandler(f.pod2, eventType) + } // Wait on the go routine to finish time.Sleep(100 * time.Millisecond) @@ -376,6 +397,9 @@ func (f *feature) thePodIsCleaned(boolean string) error { lastentry := f.loghook.LastEntry() switch boolean { case "true": + if strings.Contains(lastentry.Message, "End Processing pods with affinity map") && f.pod2 != nil { + return nil + } if !strings.Contains(lastentry.Message, "Successfully cleaned up pod") { return fmt.Errorf("Expected pod to be cleaned up but it was not, last message: %s", lastentry.Message) } @@ -444,7 +468,7 @@ func (f *feature) iCallNodeModeCleanupPodsForNode(nodeName string) error { return nil } -func (f *feature) createPod(node string, nvolumes int, condition string) *v1.Pod { +func (f *feature) createPod(node string, nvolumes int, condition, affinity string) *v1.Pod { pod := &v1.Pod{} pod.ObjectMeta.UID = uuid.NewUUID() if len(f.podUID) == 0 { @@ -457,6 +481,9 @@ func (f *feature) createPod(node string, nvolumes int, condition string) *v1.Pod pod.ObjectMeta.Name = fmt.Sprintf("podname-%s", pod.ObjectMeta.UID) pod.Spec.NodeName = node pod.Spec.Volumes = make([]v1.Volume, 0) + if affinity == "true" { + f.addAffinityToPod(pod) + } pod.Status.Message = "pod updated" pod.Status.Reason = "pod reason" pod.Status.ContainerStatuses = make([]v1.ContainerStatus, 0) @@ -563,10 +590,45 @@ func (f *feature) createPod(node string, nvolumes int, condition string) *v1.Pod return pod } +// Adds a pod affinity specification based on hostname to the pod +func (f *feature) addAffinityToPod(pod *v1.Pod) { + matchLabels := make(map[string]string) + matchLabels["affinityLabel1"] = "affinityLabelValue1" + values := make([]string, 1) + values[0] = "affinityValue1" + matchExpr := metav1.LabelSelectorRequirement{ + Operator: "In", + Key: "affinityLabel2", + Values: values, + } + matchExprs := make([]metav1.LabelSelectorRequirement, 1) + matchExprs[0] = matchExpr + labelSelector := metav1.LabelSelector{ + MatchLabels: matchLabels, + MatchExpressions: matchExprs, + } + namespaces := make([]string, 1) + namespaces[0] = podns + podAffinityTerm := v1.PodAffinityTerm{ + LabelSelector: &labelSelector, + Namespaces: namespaces, + TopologyKey: hostNameTopologyKey, + } + podAffinityTerms := make([]v1.PodAffinityTerm, 1) + podAffinityTerms[0] = podAffinityTerm + podAffinity := v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: podAffinityTerms, + } + affinity := v1.Affinity{ + PodAffinity: &podAffinity, + } + pod.Spec.Affinity = &affinity +} + func (f *feature) theControllerCleanedUpPodsForNode(cleanedUpCount int, nodeName string) error { node, _ := f.k8sapiMock.GetNode(context.Background(), nodeName) for i := 0; i < cleanedUpCount; i++ { - if success := f.podmonMonitor.controllerCleanupPod(f.podList[i], node, "Unit Test", false); !success { + if success := f.podmonMonitor.controllerCleanupPod(f.podList[i], node, "Unit Test", false, false); !success { return fmt.Errorf("controllerCleanPod was not successful") } } @@ -748,12 +810,227 @@ func (f *feature) iCallTestLockAndGetPodKey() error { return nil } +func (f *feature) createPodErrorCase(node string, nvolumes int, condition, affinity string, errorcase string) *v1.Pod { + pod := &v1.Pod{} + pod.ObjectMeta.UID = uuid.NewUUID() + if len(f.podUID) == 0 { + f.podUID = make([]types.UID, 0) + } + f.podCount++ + f.podUID = append(f.podUID, pod.ObjectMeta.UID) + podIndex := f.podCount - 1 + pod.ObjectMeta.Namespace = podns + pod.ObjectMeta.Name = fmt.Sprintf("podname-%s", pod.ObjectMeta.UID) + pod.Spec.NodeName = node + pod.Spec.Volumes = make([]v1.Volume, 0) + if affinity == "true" { + f.addAffinityToPodErrorCase(pod, errorcase) + } + pod.Status.Message = "pod updated" + pod.Status.Reason = "pod reason" + pod.Status.ContainerStatuses = make([]v1.ContainerStatus, 0) + containerStatus := v1.ContainerStatus{ + ContainerID: "//" + containerID, + } + containerInfo := &criapi.ContainerInfo{ + ID: containerID, + Name: "running-container", + State: cri.ContainerState_CONTAINER_EXITED, + } + f.criMock.MockContainerInfos["1234"] = containerInfo + pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, containerStatus) + if pod.Status.Conditions == nil { + pod.Status.Conditions = make([]v1.PodCondition, 0) + } + switch condition { + case "Ready": + condition := v1.PodCondition{ + Type: "Ready", + Status: "True", + Reason: condition, + Message: condition, + } + pod.Status.Conditions = append(pod.Status.Conditions, condition) + case "NotReady": + condition := v1.PodCondition{ + Type: "Ready", + Status: "False", + Reason: condition, + Message: condition, + } + pod.Status.Conditions = append(pod.Status.Conditions, condition) + case "Initialized": + condition := v1.PodCondition{ + Type: "Initialized", + Status: "True", + Reason: condition, + Message: condition, + } + pod.Status.Conditions = append(pod.Status.Conditions, condition) + case "CrashLoop": + waiting := &v1.ContainerStateWaiting{ + Reason: crashLoopBackOffReason, + Message: "unit test condition", + } + state := v1.ContainerState{ + Waiting: waiting, + } + containerStatus := v1.ContainerStatus{ + State: state, + } + pod.Status.ContainerStatuses = append(pod.Status.ContainerStatuses, containerStatus) + // PodCondition is Ready=false + condition := v1.PodCondition{ + Type: "Ready", + Status: "False", + Reason: condition, + Message: condition, + } + pod.Status.Conditions = append(pod.Status.Conditions, condition) + } + // add a number of volumes to the pod + for i := 0; i < nvolumes; i++ { + // Create a PV + pv := &v1.PersistentVolume{} + pv.ObjectMeta.Name = fmt.Sprintf("pv-%s-%d", f.podUID[podIndex], i) + f.pvNames = append(f.pvNames, pv.ObjectMeta.Name) + claimRef := &v1.ObjectReference{} + claimRef.Kind = "PersistentVolumeClaim" + claimRef.Namespace = podns + claimRef.Name = fmt.Sprintf("pvc-%s-%d", f.podUID[podIndex], i) + pv.Spec.ClaimRef = claimRef + log.Infof("claimRef completed") + csiPVSource := &v1.CSIPersistentVolumeSource{} + csiPVSource.Driver = "csi-vxflexos.dellemc.com" + csiPVSource.VolumeHandle = fmt.Sprintf("vhandle%d", i) + pv.Spec.CSI = csiPVSource + // Create a PVC + pvc := &v1.PersistentVolumeClaim{} + pvc.ObjectMeta.Namespace = podns + pvc.ObjectMeta.Name = fmt.Sprintf("pvc-%s-%d", f.podUID[podIndex], i) + pvc.Spec.VolumeName = pv.ObjectMeta.Name + pvc.Status.Phase = "Bound" + // Create a VolumeAttachment + va := &storagev1.VolumeAttachment{} + va.ObjectMeta.Name = fmt.Sprintf("va%d", i) + va.Spec.NodeName = node + va.Spec.Source.PersistentVolumeName = &pv.ObjectMeta.Name + // Add the objects to the mock engine. + f.k8sapiMock.AddPV(pv) + f.k8sapiMock.AddPVC(pvc) + f.k8sapiMock.AddVA(va) + // Add a volume to the pod + vol := v1.Volume{} + vol.Name = fmt.Sprintf("pv-%s-%d", f.podUID[podIndex], i) + pvcSource := &v1.PersistentVolumeClaimVolumeSource{} + pvcSource.ClaimName = pvc.ObjectMeta.Name + volSource := v1.VolumeSource{} + volSource.PersistentVolumeClaim = pvcSource + vol.VolumeSource = volSource + pod.Spec.Volumes = append(pod.Spec.Volumes, vol) + } + return pod +} + +// Adds a pod affinity specification based on error condition +func (f *feature) addAffinityToPodErrorCase(pod *v1.Pod, errorcase string) { + matchLabels := make(map[string]string) + matchLabels["affinityLabel1"] = "affinityLabelValue1" + values := make([]string, 1) + values[0] = "affinityValue1" + matchExpr := metav1.LabelSelectorRequirement{ + Operator: "In", + Key: "affinityLabel2", + Values: values, + } + if errorcase == "operator" { + matchExpr.Operator = "Out" + } + matchExprs := make([]metav1.LabelSelectorRequirement, 1) + matchExprs[0] = matchExpr + labelSelector := metav1.LabelSelector{ + MatchLabels: matchLabels, + MatchExpressions: matchExprs, + } + namespaces := make([]string, 1) + namespaces[0] = podns + podAffinityTerm := v1.PodAffinityTerm{ + LabelSelector: &labelSelector, + Namespaces: namespaces, + TopologyKey: hostNameTopologyKey, + } + if errorcase == "topology" { + podAffinityTerm.TopologyKey = "unknown/hostname" + } + if errorcase == "labelselector" { + podAffinityTerm.LabelSelector = nil + } + podAffinityTerms := make([]v1.PodAffinityTerm, 1) + podAffinityTerms[0] = podAffinityTerm + podAffinity := v1.PodAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: podAffinityTerms, + } + if errorcase == "required" { + podAffinity.RequiredDuringSchedulingIgnoredDuringExecution = nil + } + affinity := v1.Affinity{ + PodAffinity: &podAffinity, + } + + if errorcase == "podaffinity" { + affinity.PodAffinity = nil + } + pod.Spec.Affinity = &affinity +} + +func (f *feature) aControllerPodWithPodaffinitylabels() error { + if f.loghook == nil { + f.loghook = logtest.NewGlobal() + } else { + fmt.Printf("loghook last-entry %+v\n", f.loghook.LastEntry()) + } + // This test is for error condition of func getPodAffinityLabels + // testing only for VxflexosDriver + Driver = new(VxflexDriver) + + f.k8sapiMock = new(k8sapi.K8sMock) + f.k8sapiMock.Initialize() + K8sAPI = f.k8sapiMock + f.csiapiMock = new(csiapi.CSIMock) + CSIApi = f.csiapiMock + f.criMock = new(criapi.MockClient) + f.criMock.Initialize() + getContainers = f.criMock.GetContainerInfo + f.podmonMonitor = &PodMonitorType{} + f.podmonMonitor.CSIExtensionsPresent = true + f.podmonMonitor.DriverPathStr = "csi-vxflexos.dellemc.com" + gofsutil.UseMockFS() + RemoveDir = f.mockRemoveDir + f.badWatchObject = false + f.pod2 = nil + return nil +} + +func (f *feature) createAPodForNodeWithVolumesConditionAffinityErrorcase(node string, nvolumes int, condition, affinity, errorcase string) error { + pod := f.createPodErrorCase(node, nvolumes, condition, affinity, errorcase) + f.pod = pod + f.k8sapiMock.AddPod(pod) + return nil +} + +func (f *feature) iCallGetPodAffinityLabels() error { + f.podmonMonitor.getPodAffinityLabels(f.pod) + return nil +} + func MonitorTestScenarioInit(context *godog.ScenarioContext) { f := &feature{} context.Step(`^a controller monitor "([^"]*)"$`, f.aControllerMonitor) context.Step(`^a controller monitor unity$`, f.aControllerMonitorUnity) context.Step(`^a controller monitor vxflex$`, f.aControllerMonitorVxflex) + //context.Step(`^a pod for node "([^"]*)" with (\d+) volumes condition "([^"]*)"$`, f.aPodForNodeWithVolumesCondition) context.Step(`^a pod for node "([^"]*)" with (\d+) volumes condition "([^"]*)"$`, f.aPodForNodeWithVolumesCondition) + context.Step(`^a pod for node "([^"]*)" with (\d+) volumes condition "([^"]*)" affinity "([^"]*)"$`, f.aPodForNodeWithVolumesConditionAffinity) context.Step(`^I call controllerCleanupPod for node "([^"]*)"$`, f.iCallControllerCleanupPodForNode) context.Step(`^I induce error "([^"]*)"$`, f.iInduceError) context.Step(`^the last log message contains "([^"]*)"$`, f.theLastLogMessageContains) @@ -781,4 +1058,7 @@ func MonitorTestScenarioInit(context *godog.ScenarioContext) { context.Step(`^I call StartNodeMonitor with key "([^"]*)" and value "([^"]*)"$`, f.iCallStartNodeMonitorWithKeyAndValue) context.Step(`^I send a node event type "([^"]*)"$`, f.iSendANodeEventType) context.Step(`^I call test lock and getPodKey$`, f.iCallTestLockAndGetPodKey) + context.Step(`^a controller pod with podaffinitylabels$`, f.aControllerPodWithPodaffinitylabels) + context.Step(`^create a pod for node "([^"]*)" with (\d+) volumes condition "([^"]*)" affinity "([^"]*)" errorcase "([^"]*)"$`, f.createAPodForNodeWithVolumesConditionAffinityErrorcase) + context.Step(`^I call getPodAffinityLabels$`, f.iCallGetPodAffinityLabels) } diff --git a/internal/monitor/monitor_test.go b/internal/monitor/monitor_test.go index 444a09f..87edaf4 100644 --- a/internal/monitor/monitor_test.go +++ b/internal/monitor/monitor_test.go @@ -81,3 +81,32 @@ func TestNodeMode(t *testing.T) { } log.Printf("Node-mode test finished") } + +func TestMapEqualsMap(t *testing.T) { + cases := []struct { + mapA, mapB map[string]string + result bool + }{ + { + map[string]string{"key": "value"}, + map[string]string{"key": "value"}, + true, + }, + { + map[string]string{"key": "value"}, + map[string]string{"value": "key"}, + false, + }, + { + map[string]string{"key": "value"}, + map[string]string{}, + false, + }, + } + for caseNum, acase := range cases { + result := mapEqualsMap(acase.mapA, acase.mapB) + if result != acase.result { + t.Errorf("Case %d: Expected %t got %t", caseNum, acase.result, result) + } + } +} diff --git a/test/sh/scaleup-powerflex.sh b/test/sh/scaleup-powerflex.sh index 749682f..5457421 100644 --- a/test/sh/scaleup-powerflex.sh +++ b/test/sh/scaleup-powerflex.sh @@ -12,7 +12,7 @@ CWD=$(pwd) NVOLUMES=2 STORAGECLASS=vxflexos -MAXPODS=81 +MAXPODS=4 # checks that all labeled pods are running, exits if not wait_on_running() { From 072756a68c43179ccf9fb9727f260782f9dd6277 Mon Sep 17 00:00:00 2001 From: rbo54 Date: Thu, 20 Jan 2022 12:20:08 -0500 Subject: [PATCH 2/7] Add rebalance changes to nway test script; add bounce.kubelet as alternate way to kill node --- test/sh/bounce.kubelet | 30 ++++++++++++++ test/sh/nway.sh | 89 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 116 insertions(+), 3 deletions(-) create mode 100755 test/sh/bounce.kubelet diff --git a/test/sh/bounce.kubelet b/test/sh/bounce.kubelet new file mode 100755 index 0000000..a36065c --- /dev/null +++ b/test/sh/bounce.kubelet @@ -0,0 +1,30 @@ +#!/bin/sh +# +# Copyright (c) 2021. Dell Inc., or its subsidiaries. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# + +seconds=600 + +for param in $*; do + case $param in + "--seconds") + shift + seconds=$1 + shift + ;; + esac +done + +echo "$(date +"%Y-%m-%d %H:%M:%S")" "-- stopping kubelet for seconds=$seconds" >>/root/bounce.kubelet.out +systemctl stop kubelet +sleep $seconds +echo "$(date +"%Y-%m-%d %H:%M:%S")" "-- starting kubelet after seconds=$seconds" >>/root/bounce.kubelet.out +systemctl start kubelet + diff --git a/test/sh/nway.sh b/test/sh/nway.sh index 74184fa..eb33176 100755 --- a/test/sh/nway.sh +++ b/test/sh/nway.sh @@ -31,6 +31,7 @@ BOUNCEIPSECONDS=240 # Bounce IP time in seconds for interface down TIMEOUT=600 # Maximum time in seconds to wait for a failure cycle (needs to be higher than EVACUATE_TIMEOUT) MAXITERATIONS=3 # Maximum number of failover iterations DRIVERNS="vxflexos" # Driver namespace +REBALANCE=1 # Do rebalance if needed for pods with affinity rm -f stop # Remove the stop file @@ -56,6 +57,10 @@ for param in $*; do TIMEOUT=$1 shift ;; + "--rebalance") + REBALANCE=1 + shift + ;; "--help") shift echo "parameters: --ns driver-namespace [--bounceipseconds value] [--maxiterations value] [--timeoutseconds value]" @@ -72,12 +77,90 @@ EVACUATE_TIMEOUT=$TIMEOUT # Doesn't really matter if most of the time is s # and aborts if we exceed TIMEOUT check_timeout() { if [ $1 -gt $TIMEOUT ]; then - echo "******************* timed out: " $1 "seconds ********************" - collect_logs.sh --ns $DRIVERNS - exit 2 + if [ $REBALANCE -gt 0 ]; then + rebalance + else + echo "******************* timed out: " $1 "seconds ********************" + collect_logs.sh --ns $DRIVERNS + exit 2 + fi fi } +# ==================================================================================================================================================== +# This part of the code rebalances pods across nodes for pod affinity. +maxPods=90 + +# nodelist returns a list of nodes( +nodelist() { + kubectl get nodes -A | grep -v master | grep -v NAME | awk '{ print $1 }' +} + +# get the number of pods on a node $1 +podsOnNode() { + # Add an extra space on match string to differentiate worker-1 from worker-10 + kubectl get pods -A -o wide | grep "$1 " | wc -l +} + +# get namespaces of the pending pods +getNSOfPendingPods() { + kubectl get pods -A -o wide | grep Pending | grep -v default | awk '{ print $1}' +} + +# cordon a k8s node $1=node id +cordon() { + echo "cordoning node $1" + kubectl cordon $1 +} + +# cordon a k8s node $1=node id +uncordon() { + echo "uncordoning node $1" + kubectl uncordon $1 +} + +# delete pod names in namespace $1=namespace +deletePodsInNS() { + pods=$(kubectl get pods -n $1 | grep -v NAME | awk '{print $1}') + echo pods "$pods to be deleted" + for pod in $pods; do + echo "kubctl delete pod -n $1 $pod" + kubectl delete pod --grace-period 0 -n $1 $pod + done +} + + +rebalance() { + echo "Rebalancing pods to nodes..." + cordonedNodes="" + nodes=$(nodelist) + echo nodes: $nodes + for n in $nodes; do + pods=$(podsOnNode $n) + echo node $n has $pods pods + if [ $pods -gt $maxPods ]; then + cordon $n + cordonedNodes="$cordonedNodes $n" + fi + done + echo cordonedNodes: $cordonedNodes + namespaces=$(getNSOfPendingPods) + for ns in $namespaces; do + echo "deleting pods in namespace $ns" + deletePodsInNS $ns + done + echo "waiting for pods to get moved" + for i in 1 2 3 4 5 6 7 8 9 10; do + kubectl get pods -l podmon.dellemc.com/driver -A -o wide | grep -v NAME | grep -v Running + sleep 60 + done + for n in $cordonedNodes; do + uncordon $n + done + echo "Rebalancing complete" +} +# ==================================================================================================================================================== + print_status() { notready=$(kubectl get nodes | awk '/NotReady/ { gsub("\\..*", "", $1); print $1; }';) From 444968c9809e06603d6a0c25eba9ec2ed9ecf572 Mon Sep 17 00:00:00 2001 From: rbo54 Date: Fri, 21 Jan 2022 08:27:11 -0500 Subject: [PATCH 3/7] Add rebalance.sh which rebalances nodes with scheduler imbalance (some nodes with 110 pods) --- test/sh/rebalance.sh | 80 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 test/sh/rebalance.sh diff --git a/test/sh/rebalance.sh b/test/sh/rebalance.sh new file mode 100644 index 0000000..85ef6a8 --- /dev/null +++ b/test/sh/rebalance.sh @@ -0,0 +1,80 @@ +#!/bin/sh + +# Rebalance pods by +# 1. tainting overloaded nodes, +# 2. removing pods from partially populated namespaces, +# 3. Waiting 10 minutes for the pods to get recreated, +# 4. Removing the taints that were applied + +maxPods=90 + +# nodelist returns a list of nodes( +nodelist() { + kubectl get nodes -A | grep -v master | grep -v NAME | awk '{ print $1 }' +} + +# get the number of pods on a node $1 +podsOnNode() { + # Add an extra space on match string to differentiate worker-1 from worker-10 + kubectl get pods -A -o wide | grep "$1 " | wc -l +} + +# get namespaces of the pending pods +getNSOfPendingPods() { + kubectl get pods -A -o wide | grep Pending | grep -v default | awk '{ print $1}' +} + +# cordon a k8s node $1=node id +cordon() { + echo "cordoning node $1" + kubectl cordon $1 +} + +# cordon a k8s node $1=node id +uncordon() { + echo "uncordoning node $1" + kubectl uncordon $1 +} + +# delete pod names in namespace $1=namespace +deletePodsInNS() { + pods=$(kubectl get pods -n $1 | grep -v NAME | awk '{print $1}') + echo pods "$pods to be deleted" + for pod in $pods; do + echo "kubctl delete pod -n $1 $pod" + kubectl delete pod --grace-period 0 -n $1 $pod + done +} + + +rebalance() { + echo "Rebalancing pods to nodes..." + cordonedNodes="" + nodes=$(nodelist) + echo nodes: $nodes + for n in $nodes; do + pods=$(podsOnNode $n) + echo node $n has $pods pods + if [ $pods -gt $maxPods ]; then + cordon $n + cordonedNodes="$cordonedNodes $n" + fi + done + echo cordonedNodes: $cordonedNodes + namespaces=$(getNSOfPendingPods) + for ns in $namespaces; do + echo "deleting pods in namespace $ns" + deletePodsInNS $ns + done + echo "waiting for pods to get moved" + for i in 1 2 3 4 5 6 7 8 9 10; do + kubectl get pods -l podmon.dellemc.com/driver -A -o wide | grep -v NAME | grep -v Running + sleep 60 + done + for n in $cordonedNodes; do + uncordon $n + done + echo "Rebalancing complete" +} + +rebalance From f6719fff409e38db5cd3b6f8bb365999580184a9 Mon Sep 17 00:00:00 2001 From: rbo54 Date: Fri, 21 Jan 2022 10:11:42 -0500 Subject: [PATCH 4/7] Fix prohibited word in grep describing type of k8s node --- test/sh/nway.sh | 2 +- test/sh/rebalance.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/sh/nway.sh b/test/sh/nway.sh index eb33176..7e5559c 100755 --- a/test/sh/nway.sh +++ b/test/sh/nway.sh @@ -93,7 +93,7 @@ maxPods=90 # nodelist returns a list of nodes( nodelist() { - kubectl get nodes -A | grep -v master | grep -v NAME | awk '{ print $1 }' + kubectl get nodes -A | grep -v 'mast.r' | grep -v NAME | awk '{ print $1 }' } # get the number of pods on a node $1 diff --git a/test/sh/rebalance.sh b/test/sh/rebalance.sh index 85ef6a8..7dfa6f8 100644 --- a/test/sh/rebalance.sh +++ b/test/sh/rebalance.sh @@ -10,7 +10,7 @@ maxPods=90 # nodelist returns a list of nodes( nodelist() { - kubectl get nodes -A | grep -v master | grep -v NAME | awk '{ print $1 }' + kubectl get nodes -A | grep -v 'mast.r' | grep -v NAME | awk '{ print $1 }' } # get the number of pods on a node $1 From 08e2bdad79e3e7e4be696168eb8d1475808304d4 Mon Sep 17 00:00:00 2001 From: rbo54 Date: Mon, 24 Jan 2022 10:27:21 -0500 Subject: [PATCH 5/7] set replicas for unity --- test/podmontest/insu.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/test/podmontest/insu.sh b/test/podmontest/insu.sh index 6464bd9..57d94eb 100644 --- a/test/podmontest/insu.sh +++ b/test/podmontest/insu.sh @@ -18,6 +18,7 @@ zone=${zone:-""} storageClassName=${storageClassName:-unity-nfs} image="$REGISTRY_HOST:$REGISTRY_PORT/podmontest:v0.0.54" prefix="pmtu" +replicas=1 deploymentType="statefulset" driverLabel="csi-unity" podAffinity="false" From 87420411edba40078034945d900d95c29513a5c8 Mon Sep 17 00:00:00 2001 From: rbo54 Date: Mon, 24 Jan 2022 10:38:36 -0500 Subject: [PATCH 6/7] re-enable nodeMonitorHandler --- cmd/podmon/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/podmon/main.go b/cmd/podmon/main.go index 557d0ec..51cf2d6 100644 --- a/cmd/podmon/main.go +++ b/cmd/podmon/main.go @@ -160,7 +160,7 @@ func main() { go ArrayConnMonitorFc() } // monitor all the nodes with no label required - // XXX go StartNodeMonitorFn(K8sAPI, k8sapi.K8sClient.Client, "", "", monitor.MonitorRestartTimeDelay) + go StartNodeMonitorFn(K8sAPI, k8sapi.K8sClient.Client, "", "", monitor.MonitorRestartTimeDelay) } // monitor the pods with the designated label key/value From fe24a656040baa9033c463b4df82d20714907697 Mon Sep 17 00:00:00 2001 From: rbo54 Date: Wed, 26 Jan 2022 12:47:13 -0500 Subject: [PATCH 7/7] Add pod topology spread to helm chart and update storage class for unity-nfs --- test/podmontest/deploy/templates/test.yaml | 9 +++++++++ test/podmontest/deploy/values-unity-nfs.yaml | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/test/podmontest/deploy/templates/test.yaml b/test/podmontest/deploy/templates/test.yaml index 59beea6..916181f 100644 --- a/test/podmontest/deploy/templates/test.yaml +++ b/test/podmontest/deploy/templates/test.yaml @@ -30,6 +30,7 @@ spec: labels: app: podmontest-{{ .Release.Namespace }} podmon.dellemc.com/driver: {{ required "Must set driver label" .Values.podmonTest.driverLabel }} + affinity: affinity spec: {{- if ne .Values.podmonTest.zone "" }} affinity: @@ -54,6 +55,14 @@ spec: - podmontest-{{ .Release.Namespace }} topologyKey: "kubernetes.io/hostname" {{end}} + topologySpreadConstraints: + - maxSkew: {{ required "Number of replicas" .Values.podmonTest.replicas }} # + topologyKey: kubernetes.io/hostname + # whenUnsatisfiable: ScheduleAnyway + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + affinity: affinity serviceAccount: podmontest containers: - name: podmontest diff --git a/test/podmontest/deploy/values-unity-nfs.yaml b/test/podmontest/deploy/values-unity-nfs.yaml index d94094a..de826c1 100644 --- a/test/podmontest/deploy/values-unity-nfs.yaml +++ b/test/podmontest/deploy/values-unity-nfs.yaml @@ -2,7 +2,7 @@ podmonTest: image: "registry:port/podmontest:v0.0.54" namespace: "podmontest" driverLabel: csi-unity - storageClassName: unity-virt21048j9rzz-nfs + storageClassName: unity-nfs nvolumes: 2 ndevices: 0 # deploymentType can be "statefulset" or "deployment"