From 27fb76bc1795f6f2b27b6f95f1b07f18402c3683 Mon Sep 17 00:00:00 2001 From: jai kumar <8251914+nb950@users.noreply.github.com> Date: Wed, 16 Feb 2022 09:27:16 -0500 Subject: [PATCH] Fix events (#30) * try to make events show driver install status add pod error message for better serviceability * fix Dockerfile for path to yamls fix permission error for pvc -typo add pod status to logs and events * Update actions.yml * fix gosec errors * gosec exclude fileopen, fileclose * enhance pod status msg * use constant to trim string * revert image * remove harcoded ip --- .github/workflows/actions.yml | 2 + Dockerfile | 2 +- ...rage.dell.com_containerstoragemodules.yaml | 2 +- config/manager/kustomization.yaml | 2 +- config/rbac/role.yaml | 16 +- controllers/csm_controller.go | 161 ++++++++---------- deploy/operator.yaml | 16 +- main.go | 25 ++- .../driverconfig/powerscale/v2.1.0/node.yaml | 4 +- pkg/constants/constants.go | 12 ++ pkg/drivers/commonconfig.go | 8 +- pkg/drivers/powerscale.go | 1 - pkg/logger/logger.go | 4 +- pkg/utils/status.go | 118 ++++++++----- .../driverconfig/powerscale/v2.1.0/node.yaml | 5 +- test/shared/clientgoclient/fakeDaemonset.go | 2 +- test/shared/clientgoclient/fakeDeployment.go | 2 +- 17 files changed, 205 insertions(+), 177 deletions(-) diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index e71736195..f361cf324 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -49,6 +49,8 @@ jobs: uses: actions/checkout@v2 - name: Run Go Security uses: securego/gosec@master + with: + args: -exclude=G304,G307 ./... malware_security_scan: name: Malware Scanner runs-on: ubuntu-latest diff --git a/Dockerfile b/Dockerfile index 1454c9f8c..0e147faac 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,7 @@ ENV USER_UID=1001 \ X_CSM_OPERATOR_CONFIG_DIR="/etc/config/dell-csm-operator" WORKDIR / COPY --from=builder /workspace/manager . -COPY operatorconfig/ /etc/config/local/dell-csm-operator +COPY operatorconfig/ /etc/config/dell-csm-operator LABEL vendor="Dell Inc." \ name="dell-csm-operator" \ summary="Operator for installing Dell EMC CSM Modules" \ diff --git a/config/crd/bases/storage.dell.com_containerstoragemodules.yaml b/config/crd/bases/storage.dell.com_containerstoragemodules.yaml index b016cc0be..3df1f0e0d 100644 --- a/config/crd/bases/storage.dell.com_containerstoragemodules.yaml +++ b/config/crd/bases/storage.dell.com_containerstoragemodules.yaml @@ -28,7 +28,7 @@ spec: type: string - description: Version of CSIDriver jsonPath: .spec.driver.configVersion - name: CONFIGVERSION + name: ConfigVersion type: string name: v1alpha1 schema: diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 6dddd8c17..73c350872 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -12,5 +12,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization images: - name: controller - newName: 10.247.101.174:5000/csm-operator + newName: controller newTag: v0.0.2 diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 41126b191..c5005e0f0 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -20,6 +20,14 @@ rules: - services/finalizers verbs: - '*' +- apiGroups: + - "" + resourceNames: + - dell-csm-operator-controller-manager + resources: + - deployments/finalizers + verbs: + - update - apiGroups: - "" resources: @@ -74,14 +82,6 @@ rules: - patch - update - watch -- apiGroups: - - apps - resourceNames: - - dell-csm-operator-controller-manager - resources: - - deployments/finalizers - verbs: - - update - apiGroups: - coordination.k8s.io resources: diff --git a/controllers/csm_controller.go b/controllers/csm_controller.go index 8c7fa6122..5a1ab1b2d 100644 --- a/controllers/csm_controller.go +++ b/controllers/csm_controller.go @@ -13,13 +13,11 @@ package controllers import ( "context" - "errors" "fmt" "sync/atomic" "time" k8sClient "github.com/dell/csm-operator/k8s" - "github.com/dell/csm-operator/pkg/constants" "github.com/dell/csm-operator/pkg/drivers" "github.com/dell/csm-operator/pkg/modules" "k8s.io/apimachinery/pkg/runtime" @@ -53,8 +51,11 @@ import ( corev1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" //metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sync" ) +var dMutex sync.RWMutex + // ContainerStorageModuleReconciler reconciles a ContainerStorageModule object type ContainerStorageModuleReconciler struct { // controller runtime client, responsible for create, delete, update, get etc. @@ -92,7 +93,7 @@ var configVersionKey = fmt.Sprintf("%s/%s", MetadataPrefix, "CSIDriverConfigVers // +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=clusterroles/finalizers,verbs=get;list;watch;update;create;delete;patch // +kubebuilder:rbac:groups="rbac.authorization.k8s.io",resources=roles,verbs=get;list;watch;update;create;delete;patch // +kubebuilder:rbac:groups="monitoring.coreos.com",resources=servicemonitors,verbs=get;create -// +kubebuilder:rbac:groups="apps",resources=deployments/finalizers,resourceNames=dell-csm-operator-controller-manager,verbs=update +// +kubebuilder:rbac:groups="",resources=deployments/finalizers,resourceNames=dell-csm-operator-controller-manager,verbs=update // +kubebuilder:rbac:groups="storage.k8s.io",resources=csidrivers,verbs=get;list;watch;create;update;delete;patch // +kubebuilder:rbac:groups="storage.k8s.io",resources=storageclasses,verbs=get;list;watch;create;update;delete // +kubebuilder:rbac:groups="storage.k8s.io",resources=volumeattachments,verbs=get;list;watch;create;update;patch @@ -184,7 +185,6 @@ func (r *ContainerStorageModuleReconciler) Reconcile(ctx context.Context, req ct if err != nil { return utils.HandleValidationError(ctx, csm, r, err) } - r.EventRecorder.Eventf(csm, "Normal", "Updated", "PreChecks ok: %s", csm.Name) // Set the driver status to updating @@ -194,26 +194,24 @@ func (r *ContainerStorageModuleReconciler) Reconcile(ctx context.Context, req ct log.Error(err, "Failed to update CR status") } // Update the driver - r.EventRecorder.Eventf(csm, "Normal", "Updated", "Call install/update driver: %s", csm.Name) syncErr := r.SyncCSM(ctx, *csm, *driverConfig) if syncErr == nil { - r.EventRecorder.Eventf(csm, "Normal", "Updated", "Driver install completed: reconcile count=%d name=%s", r.updateCount, csm.Name) - return utils.LogBannerAndReturn(reconcile.Result{}, err) + return utils.LogBannerAndReturn(reconcile.Result{}, nil) } - // Failed to sync driver deployment + // Failed driver deployment r.EventRecorder.Eventf(csm, "Warning", "Updated", "Failed install: %s", syncErr.Error()) - return utils.LogBannerAndReturn(reconcile.Result{Requeue: false}, syncErr) + return utils.LogBannerAndReturn(reconcile.Result{Requeue: true}, syncErr) } func (r *ContainerStorageModuleReconciler) ignoreUpdatePredicate() predicate.Predicate { return predicate.Funcs{ UpdateFunc: func(e event.UpdateEvent) bool { - r.Log.Info("ignore Csm UpdateEvent") // Ignore updates to status in which case metadata.Generation does not change return e.ObjectOld.GetGeneration() != e.ObjectNew.GetGeneration() }, + DeleteFunc: func(e event.DeleteEvent) bool { // Evaluates to false if the object has been confirmed deleted. return !e.DeleteStateUnknown @@ -222,6 +220,9 @@ func (r *ContainerStorageModuleReconciler) ignoreUpdatePredicate() predicate.Pre } func (r *ContainerStorageModuleReconciler) handleDeploymentUpdate(oldObj interface{}, obj interface{}) { + dMutex.Lock() + defer dMutex.Unlock() + old, _ := oldObj.(*appsv1.Deployment) d, _ := obj.(*appsv1.Deployment) name := d.Spec.Template.Labels["csm"] @@ -233,6 +234,7 @@ func (r *ContainerStorageModuleReconciler) handleDeploymentUpdate(oldObj interfa } log.Debugw("deployment modified generation", fmt.Sprintf("%d", d.Generation), fmt.Sprintf("%d", old.Generation)) + desired := d.Status.Replicas available := d.Status.AvailableReplicas ready := d.Status.ReadyReplicas @@ -257,50 +259,58 @@ func (r *ContainerStorageModuleReconciler) handleDeploymentUpdate(oldObj interfa if err != nil { log.Error("deployment get csm", "error", err.Error()) } - log.Infow("csm prev status", "state", csm.Status) - state := false - if desired == available { - state = true - } - log.Infow("deployment status", "state", state) - stamp := fmt.Sprintf("at %s", time.Now().Format("2006-01-02 15:04:05")) - if !state { - err = errors.New("deployment in error") - newStatus := csm.GetCSMStatus() - newStatus.State = constants.Failed - newStatus.ControllerStatus = csmv1.PodStatus{ - Available: fmt.Sprintf("%d", available), - Failed: fmt.Sprintf("%d", numberUnavailable), - Desired: fmt.Sprintf("%d", desired), - } + newStatus := csm.GetCSMStatus() + err = utils.UpdateStatus(ctx, csm, r, newStatus) + if err != nil { + log.Debugw("deployment status ", "pods", err.Error()) + } + return +} - log.Infow("deployment in err", "err", err.Error()) - err = utils.UpdateStatus(ctx, csm, r, newStatus) - if err != nil { - log.Error("Failed to update Deployment status", "error", err.Error()) - } - r.EventRecorder.Eventf(csm, "Warning", "Updated", "%s Deployment status check Error ,pod desired:%d, unavailable:%d", stamp, desired, numberUnavailable) +func (r *ContainerStorageModuleReconciler) handlePodsUpdate(oldObj interface{}, obj interface{}) { + dMutex.Lock() + defer dMutex.Unlock() - } else { - log.Infow("csm status", "prev state", csm.Status.State) - newStatus := csm.GetCSMStatus() - newStatus.State = constants.Succeeded - newStatus.ControllerStatus = csmv1.PodStatus{ - Available: fmt.Sprintf("%d", available), - Failed: fmt.Sprintf("%d", numberUnavailable), - Desired: fmt.Sprintf("%d", desired), - } - err = utils.UpdateStatus(ctx, csm, r, newStatus) - if err != nil { - log.Error("Failed to update Deployment status", "error", err.Error()) - } - r.EventRecorder.Eventf(csm, "Normal", "Updated", "%s Deployment status check OK : %s desired pods %d, ready pods %d", stamp, d.Name, desired, available) + p, _ := obj.(*corev1.Pod) + name := p.GetLabels()["csm"] + if name == "" { + return + } + key := name + "-" + fmt.Sprintf("%d", r.GetUpdateCount()) + ctx, log := logger.GetNewContextWithLogger(key) + + log.Infow("pod labeled for csm", "name", p.Name) + + // update it to trigger status watch + ns := p.Namespace + namespacedName := t1.NamespacedName{ + Name: name, + Namespace: ns, + } + csm := new(csmv1.ContainerStorageModule) + err := r.Client.Get(ctx, namespacedName, csm) + if err != nil { + r.Log.Error("daemonset get csm", "error", err.Error()) + } + log.Infow("csm prev status ", "state", csm.Status) + newStatus := csm.GetCSMStatus() + err = utils.UpdateStatus(ctx, csm, r, newStatus) + state := csm.GetCSMStatus().NodeStatus.Failed + stamp := fmt.Sprintf("at %d", time.Now().UnixNano()) + if state != "0" && err != nil { + log.Infow("pod status ", "state", err.Error()) + r.EventRecorder.Eventf(csm, "Warning", "Updated", "%s Pod error details %s", stamp, err.Error()) + } else { + r.EventRecorder.Eventf(csm, "Normal", "Complete", "%s Driver pods running OK", stamp) } return } func (r *ContainerStorageModuleReconciler) handleDaemonsetUpdate(oldObj interface{}, obj interface{}) { + dMutex.Lock() + defer dMutex.Unlock() + old, _ := oldObj.(*appsv1.DaemonSet) d, _ := obj.(*appsv1.DaemonSet) name := d.Spec.Template.Labels["csm"] @@ -313,6 +323,7 @@ func (r *ContainerStorageModuleReconciler) handleDaemonsetUpdate(oldObj interfac ctx, log := logger.GetNewContextWithLogger(key) log.Debugw("daemonset modified generation", fmt.Sprintf("%d", d.Generation), fmt.Sprintf("%d", old.Generation)) + desired := d.Status.DesiredNumberScheduled available := d.Status.NumberAvailable ready := d.Status.NumberReady @@ -324,11 +335,6 @@ func (r *ContainerStorageModuleReconciler) handleDaemonsetUpdate(oldObj interfac log.Infow("daemonset ", "available", available) log.Infow("daemonset ", "numberUnavailable", numberUnavailable) - state := false - if desired == ready { - state = true - } - ns := d.Namespace r.Log.Debugw("daemonset ", "ns", ns, "name", name) namespacedName := t1.NamespacedName{ @@ -341,45 +347,12 @@ func (r *ContainerStorageModuleReconciler) handleDaemonsetUpdate(oldObj interfac if err != nil { r.Log.Error("daemonset get csm", "error", err.Error()) } - // get status and update csm log.Infow("csm prev status ", "state", csm.Status) - log.Infow("daemonset status", "state", state) - - stamp := fmt.Sprintf("at %s", time.Now().Format("2006-01-02 15:04:05")) - if !state { - err = errors.New("daemonset in error") - newStatus := csm.GetCSMStatus() - newStatus.State = constants.Failed - //newStatus.ControllerStatus = csmv1.PodStatus{} - newStatus.NodeStatus = csmv1.PodStatus{ - Available: fmt.Sprintf("%d", available), - Failed: fmt.Sprintf("%d", numberUnavailable), - Desired: fmt.Sprintf("%d", desired), - } - - log.Infow("daemonset in err", "err", err.Error()) - err = utils.UpdateStatus(ctx, csm, r, newStatus) - if err != nil { - log.Infow("Failed to update Daemonset status", "error", err.Error()) - } - r.EventRecorder.Eventf(csm, "Warning", "Updated", "%s DaemonSet status check Error ,node pod desired:%d, unavailable:%d", stamp, desired, numberUnavailable) - } else { - - log.Infow("csm status", "prev state", csm.Status.State) - newStatus := csm.GetCSMStatus() - newStatus.State = constants.Succeeded - newStatus.NodeStatus = csmv1.PodStatus{ - Available: fmt.Sprintf("%d", available), - Failed: fmt.Sprintf("%d", numberUnavailable), - Desired: fmt.Sprintf("%d", desired), - } - err = utils.UpdateStatus(ctx, csm, r, newStatus) - if err != nil { - log.Infow("Failed to update Daemonset status", "error", err.Error()) - } - r.EventRecorder.Eventf(csm, "Normal", "Updated", "%s Daemonset status check OK : %s desired pods %d, ready pods %d", stamp, d.Name, desired, ready) - + newStatus := csm.GetCSMStatus() + err = utils.UpdateStatus(ctx, csm, r, newStatus) + if err != nil { + log.Debugw("daemonset status ", "pods", err.Error()) } return } @@ -393,14 +366,18 @@ func (r *ContainerStorageModuleReconciler) ContentWatch() error { } sharedInformerFactory := sinformer.NewSharedInformerFactory(clientset, time.Duration(time.Hour)) - contentInformer := sharedInformerFactory.Apps().V1().DaemonSets().Informer() - contentdeploymentInformer := sharedInformerFactory.Apps().V1().Deployments().Informer() - contentInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + daemonsetInformer := sharedInformerFactory.Apps().V1().DaemonSets().Informer() + deploymentInformer := sharedInformerFactory.Apps().V1().Deployments().Informer() + daemonsetInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: r.handleDaemonsetUpdate, }) - contentdeploymentInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + deploymentInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: r.handleDeploymentUpdate, }) + podsInformer := sharedInformerFactory.Core().V1().Pods().Informer() + podsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + UpdateFunc: r.handlePodsUpdate, + }) stop := make(chan struct{}) sharedInformerFactory.Start(stop) diff --git a/deploy/operator.yaml b/deploy/operator.yaml index 5ce3ca0de..90b9716cb 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -64,6 +64,14 @@ rules: - services/finalizers verbs: - '*' +- apiGroups: + - "" + resourceNames: + - dell-csm-operator-controller-manager + resources: + - deployments/finalizers + verbs: + - update - apiGroups: - "" resources: @@ -118,14 +126,6 @@ rules: - patch - update - watch -- apiGroups: - - apps - resourceNames: - - dell-csm-operator-controller-manager - resources: - - deployments/finalizers - verbs: - - update - apiGroups: - coordination.k8s.io resources: diff --git a/main.go b/main.go index e7b31eadd..4e24f491e 100644 --- a/main.go +++ b/main.go @@ -12,7 +12,6 @@ You may obtain a copy of the License at package main import ( - "context" "flag" "fmt" "io/ioutil" @@ -41,6 +40,9 @@ import ( "github.com/dell/csm-operator/pkg/logger" utils "github.com/dell/csm-operator/pkg/utils" "go.uber.org/zap" + corev1 "k8s.io/api/core/v1" + typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" + "k8s.io/client-go/tools/record" "k8s.io/client-go/util/workqueue" //+kubebuilder:scaffold:imports ) @@ -177,12 +179,13 @@ func main() { opts.BindFlags(flag.CommandLine) flag.Parse() - //logType := logger.DevelopmentLogLevel - //logger.SetLoggerLevel(logType) - //_, log := logger.GetNewContextWithLogger("main") - log := logger.GetLogger(context.Background()) + ctrl.SetLogger(crzap.New(crzap.UseFlagOptions(&opts))) - //ctrl.SetLogger(crzap.New(crzap.UseFlagOptions(&opts))) + logType := logger.DevelopmentLogLevel + logger.SetLoggerLevel(logType) + _, log := logger.GetNewContextWithLogger("main") + + ctrl.SetLogger(crzap.New(crzap.UseFlagOptions(&opts))) printVersion(log) operatorConfig := getOperatorConfig(log) @@ -201,13 +204,19 @@ func main() { os.Exit(1) } + eventBroadcaster := record.NewBroadcaster() + eventBroadcaster.StartLogging(log.Infof) + k8sClient := kubernetes.NewForConfigOrDie(restConfig) + eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: k8sClient.CoreV1().Events("")}) + recorder := eventBroadcaster.NewRecorder(clientgoscheme.Scheme, corev1.EventSource{Component: "csm"}) + expRateLimiter := workqueue.NewItemExponentialFailureRateLimiter(5*time.Millisecond, 120*time.Second) if err = (&controllers.ContainerStorageModuleReconciler{ Client: mgr.GetClient(), - K8sClient: kubernetes.NewForConfigOrDie(restConfig), + K8sClient: k8sClient, Log: log, Scheme: mgr.GetScheme(), - EventRecorder: mgr.GetEventRecorderFor("csm"), + EventRecorder: recorder, Config: operatorConfig, }).SetupWithManager(mgr, expRateLimiter, 1); err != nil { setupLog.Error(err, "unable to create controller", "controller", "ContainerStorageModule") diff --git a/operatorconfig/driverconfig/powerscale/v2.1.0/node.yaml b/operatorconfig/driverconfig/powerscale/v2.1.0/node.yaml index 19c2bd940..f677975c9 100644 --- a/operatorconfig/driverconfig/powerscale/v2.1.0/node.yaml +++ b/operatorconfig/driverconfig/powerscale/v2.1.0/node.yaml @@ -13,7 +13,7 @@ rules: resources: ["persistentvolumes"] verbs: ["create", "delete", "get", "list", "watch", "update"] - apiGroups: [""] - resources: ["persistentvolumesclaims"] + resources: ["persistentvolumeclaims"] verbs: ["get", "list", "watch", "update"] - apiGroups: [""] resources: ["events"] @@ -191,4 +191,4 @@ spec: secretName: -creds - name: csi-isilon-config-params configMap: - name: -config-params \ No newline at end of file + name: -config-params diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index f25f6c99f..04ba6600c 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -45,3 +45,15 @@ const DriverMountName = "socket-dir" // TerminationGracePeriodSeconds - grace period in seconds var TerminationGracePeriodSeconds = int64(30) + +// Reason - pod status +var Reason = "Reason" + +// ContainerCreating - pod container +var ContainerCreating = "ContainerCreating" + +// PendingCreate - pod pending +var PendingCreate = "Pending create" + +// PodStatusRemoveString -make pod status crisp +var PodStatusRemoveString = "rpc error: code = Unknown desc = Error response from daemon:" diff --git a/pkg/drivers/commonconfig.go b/pkg/drivers/commonconfig.go index c3ab11974..d0c230b19 100644 --- a/pkg/drivers/commonconfig.go +++ b/pkg/drivers/commonconfig.go @@ -85,8 +85,8 @@ func GetController(ctx context.Context, cr csmv1.ContainerStorageModule, operato } } if !removeContainer { - utils.ReplaceAllContainerImageApply(operatorConfig.K8sVersion, &c) - utils.UpdateSideCarApply(cr.Spec.Driver.SideCars, &c) + utils.ReplaceAllContainerImageApply(operatorConfig.K8sVersion, &containers[i]) + utils.UpdateSideCarApply(cr.Spec.Driver.SideCars, &containers[i]) newcontainers = append(newcontainers, c) } @@ -167,8 +167,8 @@ func GetNode(ctx context.Context, cr csmv1.ContainerStorageModule, operatorConfi } } - utils.ReplaceAllContainerImageApply(operatorConfig.K8sVersion, &c) - utils.UpdateSideCarApply(cr.Spec.Driver.SideCars, &c) + utils.ReplaceAllContainerImageApply(operatorConfig.K8sVersion, &containers[i]) + utils.UpdateSideCarApply(cr.Spec.Driver.SideCars, &containers[i]) } diff --git a/pkg/drivers/powerscale.go b/pkg/drivers/powerscale.go index b982a3747..e77d983f6 100644 --- a/pkg/drivers/powerscale.go +++ b/pkg/drivers/powerscale.go @@ -39,7 +39,6 @@ func PrecheckPowerScale(ctx context.Context, cr *csmv1.ContainerStorageModule, r return fmt.Errorf("%s is an invalid value for X_CSI_ISI_SKIP_CERTIFICATE_VALIDATION: %v", env.Value, err) } skipCertValid = b - fmt.Printf("debug skipCertValid 1 %t\n", skipCertValid) } if env.Name == "CERT_SECRET_COUNT" { d, err := strconv.ParseInt(env.Value, 0, 8) diff --git a/pkg/logger/logger.go b/pkg/logger/logger.go index ff1b5e10a..20a5b35fc 100755 --- a/pkg/logger/logger.go +++ b/pkg/logger/logger.go @@ -41,7 +41,7 @@ func InitLogger() { encoder := getEncoder() core := zapcore.NewCore(encoder, writeSyncer, zapcore.DebugLevel) - logger := zap.New(core) + logger := zap.New(core, zap.AddCaller()) sugarLogger = logger.Sugar() } */ @@ -98,7 +98,7 @@ func newLogger() *zap.Logger { core := zapcore.NewCore(consoleEncoder, zapcore.AddSync(os.Stderr), level) - l := zap.New(core) + l := zap.New(core, zap.AddCaller()) return l } diff --git a/pkg/utils/status.go b/pkg/utils/status.go index a206e2e58..a5cca76c9 100644 --- a/pkg/utils/status.go +++ b/pkg/utils/status.go @@ -10,14 +10,15 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" + //metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" t1 "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "strings" "sync" ) var dMutex sync.RWMutex -var sMutex sync.RWMutex func getInt32(pointer *int32) int32 { if pointer == nil { @@ -41,38 +42,52 @@ func getDeploymentStatus(ctx context.Context, instance *csmv1.ContainerStorageMo //app=test-isilon-controller label := instance.GetNamespace() + "-controller" - podList := &v1.PodList{} opts := []client.ListOption{ client.InNamespace(instance.GetNamespace()), client.MatchingLabels{"app": label}, } + + podList := &v1.PodList{} err = r.GetClient().List(ctx, podList, opts...) if err != nil { - return replicas, csmv1.PodStatus{}, err + return deployment.Status.ReadyReplicas, csmv1.PodStatus{}, err } + + var msg string for _, pod := range podList.Items { log.Infof("deployment pod count %d name %s status %s", readyPods, pod.Name, pod.Status.Phase) if pod.Status.Phase == corev1.PodRunning { readyPods++ - for _, containerStatus := range pod.Status.ContainerStatuses { - if containerStatus.State.Running == nil { - readyPods-- - failedCount++ - break + } else if pod.Status.Phase == corev1.PodPending { + failedCount++ + errMap := make(map[string]string) + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Waiting != nil && cs.State.Waiting.Reason != constants.ContainerCreating { + log.Infow("container", "message", cs.State.Waiting.Message, constants.Reason, cs.State.Waiting.Reason) + shortMsg := strings.Replace(cs.State.Waiting.Message, + constants.PodStatusRemoveString, "", 1) + errMap[cs.State.Waiting.Reason] = shortMsg + } + if cs.State.Waiting != nil && cs.State.Waiting.Reason == constants.ContainerCreating { + errMap[cs.State.Waiting.Reason] = constants.PendingCreate } } - } else { - failedCount++ + for k, v := range errMap { + msg += k + "=" + v + } } } + if failedCount > 0 { + err = errors.New(msg) + } return replicas, csmv1.PodStatus{ Available: fmt.Sprintf("%d", readyPods), Desired: fmt.Sprintf("%d", replicas), Failed: fmt.Sprintf("%d", failedCount), - }, nil + }, err } func getDaemonSetStatus(ctx context.Context, instance *csmv1.ContainerStorageModule, r ReconcileCSM) (int32, csmv1.PodStatus, error) { @@ -85,31 +100,55 @@ func getDaemonSetStatus(ctx context.Context, instance *csmv1.ContainerStorageMod return 0, csmv1.PodStatus{}, err } - faliedCount := 0 + failedCount := 0 podList := &v1.PodList{} + label := instance.GetNamespace() + "-node" opts := []client.ListOption{ client.InNamespace(instance.GetNamespace()), - client.MatchingLabels{"app": instance.GetNodeName()}, + client.MatchingLabels{"app": label}, } + err = r.GetClient().List(ctx, podList, opts...) if err != nil { return ds.Status.DesiredNumberScheduled, csmv1.PodStatus{}, err } - msg := "Pods ok" + + var msg string + errMap := make(map[string]string) for _, pod := range podList.Items { - if pod.Status.Phase != corev1.PodRunning { - faliedCount++ - msg += "Pod not running " + pod.Name + log.Infof("daemonset pod %s : %s", pod.Name, pod.Status.Phase) + if pod.Status.Phase == corev1.PodPending { + failedCount++ + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Waiting != nil && cs.State.Waiting.Reason != constants.ContainerCreating { + //message: Back-off pulling image "dellec/csi-isilon:v2.1.0" + //reason: ImagePullBackOff + log.Infow("daemonset pod container", "message", cs.State.Waiting.Message, constants.Reason, cs.State.Waiting.Reason) + shortMsg := strings.Replace(cs.State.Waiting.Message, + constants.PodStatusRemoveString, "", 1) + errMap[cs.State.Waiting.Reason] = shortMsg + } + if cs.State.Waiting != nil && cs.State.Waiting.Reason == constants.ContainerCreating { + log.Infof("daemonset pod container %s : %s", pod.Name, pod.Status.Phase) + errMap[cs.State.Waiting.Reason] = constants.PendingCreate + } + } } } - log.Infof("daemonset status %d", ds.Status.NumberAvailable) - if faliedCount > 0 { + for k, v := range errMap { + msg += k + "=" + v + } + + log.Infof("daemonset status available pods %d", ds.Status.NumberAvailable) + log.Infof("daemonset status failedCount pods %d", failedCount) + log.Infof("daemonset status desired pods %d", ds.Status.DesiredNumberScheduled) + if failedCount > 0 { err = errors.New(msg) } return ds.Status.DesiredNumberScheduled, csmv1.PodStatus{ Available: fmt.Sprintf("%d", ds.Status.NumberAvailable), Desired: fmt.Sprintf("%d", ds.Status.DesiredNumberScheduled), - Failed: fmt.Sprintf("%d", faliedCount), + Failed: fmt.Sprintf("%d", failedCount), }, err } @@ -142,7 +181,8 @@ func CalculateState(ctx context.Context, instance *csmv1.ContainerStorageModule, err = daemonSetErr } if daemonSetErr != nil && controllerErr != nil { - err = fmt.Errorf("controllerError: %s, daemonseterror: %s", controllerErr.Error(), daemonSetErr.Error()) + err = fmt.Errorf("ControllerError: %s, Daemonseterror: %s", controllerErr.Error(), daemonSetErr.Error()) + log.Infof("calculate overall error msg [%s]", err.Error()) } SetStatus(ctx, r, instance, newStatus) return running, err @@ -150,10 +190,8 @@ func CalculateState(ctx context.Context, instance *csmv1.ContainerStorageModule, // SetStatus of csm func SetStatus(ctx context.Context, r ReconcileCSM, instance *csmv1.ContainerStorageModule, newStatus *csmv1.ContainerStorageModuleStatus) { - log := logger.GetLogger(ctx) - sMutex.Lock() - defer sMutex.Unlock() + log := logger.GetLogger(ctx) instance.GetCSMStatus().State = newStatus.State log.Infow("Driver State", "Controller", newStatus.ControllerStatus, "Node", newStatus.NodeStatus) @@ -163,45 +201,32 @@ func SetStatus(ctx context.Context, r ReconcileCSM, instance *csmv1.ContainerSto // UpdateStatus of csm func UpdateStatus(ctx context.Context, instance *csmv1.ContainerStorageModule, r ReconcileCSM, newStatus *csmv1.ContainerStorageModuleStatus) error { - log := logger.GetLogger(ctx) - dMutex.Lock() defer dMutex.Unlock() - log.Infow("update current csm status", "status", instance.Status.State) + log := logger.GetLogger(ctx) + log.Infow("update current csm status", "status", instance.Status.State) statusString := fmt.Sprintf("update new Status: (State - %s)", newStatus.State) log.Info(statusString) log.Infow("Update State", "Controller", newStatus.ControllerStatus, "Node", newStatus.NodeStatus) - //SetStatus(ctx, r, instance, newStatus) - if newStatus.State == constants.Succeeded { - running, err := CalculateState(ctx, instance, r, newStatus) - if err != nil { - log.Errorw("Update driver status ", "error", err.Error()) - newStatus.State = constants.Failed - } - log.Infow("update CR status", "running", running) - } - namespacedName := t1.NamespacedName{ - Name: instance.Name, - Namespace: instance.Namespace, - } + _, merr := CalculateState(ctx, instance, r, newStatus) csm := new(csmv1.ContainerStorageModule) - err := r.GetClient().Get(ctx, namespacedName, csm) + err := r.GetClient().Get(ctx, t1.NamespacedName{Name: instance.Name, + Namespace: instance.GetNamespace()}, csm) if err != nil { - log.Errorw("Get CSM failed", "error", err.Error()) return err } - csm.Status = *instance.GetCSMStatus() + csm.Status = instance.Status err = r.GetClient().Status().Update(ctx, csm) if err != nil { - log.Error(err, "Failed to update CR status UpdateStatus") + log.Error(err, " Failed to update CR status") return err } log.Info("Update done") - return nil + return merr } // HandleValidationError for csm @@ -225,6 +250,9 @@ func HandleValidationError(ctx context.Context, instance *csmv1.ContainerStorage // HandleSuccess for csm func HandleSuccess(ctx context.Context, instance *csmv1.ContainerStorageModule, r ReconcileCSM, newStatus, oldStatus *csmv1.ContainerStorageModuleStatus) (reconcile.Result, error) { + dMutex.Lock() + defer dMutex.Unlock() + log := logger.GetLogger(ctx) running, err := CalculateState(ctx, instance, r, newStatus) diff --git a/test/config/driverconfig/powerscale/v2.1.0/node.yaml b/test/config/driverconfig/powerscale/v2.1.0/node.yaml index 19c2bd940..4183edcb1 100644 --- a/test/config/driverconfig/powerscale/v2.1.0/node.yaml +++ b/test/config/driverconfig/powerscale/v2.1.0/node.yaml @@ -13,7 +13,7 @@ rules: resources: ["persistentvolumes"] verbs: ["create", "delete", "get", "list", "watch", "update"] - apiGroups: [""] - resources: ["persistentvolumesclaims"] + resources: ["persistentvolumeclaims"] verbs: ["get", "list", "watch", "update"] - apiGroups: [""] resources: ["events"] @@ -191,4 +191,5 @@ spec: secretName: -creds - name: csi-isilon-config-params configMap: - name: -config-params \ No newline at end of file + name: -config-params + diff --git a/test/shared/clientgoclient/fakeDaemonset.go b/test/shared/clientgoclient/fakeDaemonset.go index 01d861dc2..99872152c 100644 --- a/test/shared/clientgoclient/fakeDaemonset.go +++ b/test/shared/clientgoclient/fakeDaemonset.go @@ -28,7 +28,7 @@ func (c *FakeDaemonSets) Apply(ctx context.Context, daemonSet *applyconfiguratio return result, err } - json.Unmarshal(data, result) + err = json.Unmarshal(data, result) _, err = c.Get(ctx, *daemonSet.Name, v1.GetOptions{}) if errors.IsNotFound(err) { diff --git a/test/shared/clientgoclient/fakeDeployment.go b/test/shared/clientgoclient/fakeDeployment.go index 45e8bbb20..8f3a2595b 100644 --- a/test/shared/clientgoclient/fakeDeployment.go +++ b/test/shared/clientgoclient/fakeDeployment.go @@ -30,7 +30,7 @@ func (c *FakeDeployments) Apply(ctx context.Context, deployment *applyconfigurat return result, err } - json.Unmarshal(data, result) + err = json.Unmarshal(data, result) _, err = c.Get(ctx, *deployment.Name, v1.GetOptions{}) if errors.IsNotFound(err) {