From b0a0f87897e27b449833120d8aa8bb00882cace5 Mon Sep 17 00:00:00 2001 From: Wen Zhou Date: Mon, 27 Jan 2025 13:53:40 +0100 Subject: [PATCH] update: clean up old code before refactor (#1494) * update: clean up old code before refactor - remove reference we explicilty check on odhdashboardconfig and kserve - remove customized config on controller QPS - remove RemoveLabel which is not called anywhere Signed-off-by: Wen Zhou * update: - remove function for upgrade cleanup introduced before 2.8 - remove flags to main: operator namespace and application namespace ( operator namespace is passing as env variable, app namespace is done by DSCI ) Signed-off-by: Wen Zhou * fix: Auth CR is cluster-scoped no need to set namespace for request - application namespace is removed from reconciler of dsci Signed-off-by: Wen Zhou * fix: wrong code for upgrade path + rebase error Signed-off-by: Wen Zhou --------- Signed-off-by: Wen Zhou --- .../dscinitialization_controller.go | 9 +- main.go | 22 +-- pkg/deploy/deploy.go | 19 +-- pkg/upgrade/upgrade.go | 156 +++--------------- 4 files changed, 33 insertions(+), 173 deletions(-) diff --git a/controllers/dscinitialization/dscinitialization_controller.go b/controllers/dscinitialization/dscinitialization_controller.go index 042a5506cc5..eaacbd4aa08 100644 --- a/controllers/dscinitialization/dscinitialization_controller.go +++ b/controllers/dscinitialization/dscinitialization_controller.go @@ -67,9 +67,8 @@ var managementStateChangeTrustedCA = false // DSCInitializationReconciler reconciles a DSCInitialization object. type DSCInitializationReconciler struct { *odhClient.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder - ApplicationsNamespace string + Scheme *runtime.Scheme + Recorder record.EventRecorder } // Reconcile contains controller logic specific to DSCInitialization instance updates. @@ -249,7 +248,7 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re } case cluster.ManagedRhoai: osdConfigsPath := filepath.Join(deploy.DefaultManifestPath, "osd-configs") - err = deploy.DeployManifestsFromPath(ctx, r.Client, instance, osdConfigsPath, r.ApplicationsNamespace, "osd", true) + err = deploy.DeployManifestsFromPath(ctx, r.Client, instance, osdConfigsPath, instance.Spec.ApplicationsNamespace, "osd", true) if err != nil { log.Error(err, "Failed to apply osd specific configs from manifests", "Manifests path", osdConfigsPath) r.Recorder.Eventf(instance, corev1.EventTypeWarning, "DSCInitializationReconcileError", "Failed to apply "+osdConfigsPath) @@ -445,7 +444,7 @@ func (r *DSCInitializationReconciler) watchAuthResource(ctx context.Context, a c if len(instanceList.Items) == 0 { log.Info("Found no Auth instance in cluster, reconciling to recreate") - return []reconcile.Request{{NamespacedName: types.NamespacedName{Name: "auth", Namespace: r.ApplicationsNamespace}}} + return []reconcile.Request{{NamespacedName: types.NamespacedName{Name: "auth"}}} } return nil diff --git a/main.go b/main.go index edda96dcd79..44b240f7d91 100644 --- a/main.go +++ b/main.go @@ -48,7 +48,6 @@ import ( "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" - "k8s.io/client-go/rest" apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" @@ -98,8 +97,6 @@ import ( _ "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/workbenches" ) -const controllerNum = 4 // we should keep this updated if we have new controllers to add - var ( scheme = runtime.NewScheme() setupLog = ctrl.Log.WithName("setup") @@ -146,9 +143,7 @@ func main() { //nolint:funlen,maintidx,gocyclo var metricsAddr string var enableLeaderElection bool var probeAddr string - var dscApplicationsNamespace string var dscMonitoringNamespace string - var operatorName string var logmode string flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") @@ -156,11 +151,8 @@ func main() { //nolint:funlen,maintidx,gocyclo flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") - flag.StringVar(&dscApplicationsNamespace, "dsc-applications-namespace", "opendatahub", "The namespace where data science cluster"+ - "applications will be deployed") flag.StringVar(&dscMonitoringNamespace, "dsc-monitoring-namespace", "opendatahub", "The namespace where data science cluster"+ "monitoring stack will be deployed") - flag.StringVar(&operatorName, "operator-name", "opendatahub", "The name of the operator") flag.StringVar(&logmode, "log-mode", "", "Log mode ('', prod, devel), default to ''") opts := zap.Options{} @@ -179,9 +171,6 @@ func main() { //nolint:funlen,maintidx,gocyclo setupLog.Error(err, "error getting config for setup") os.Exit(1) } - // uplift default limiataions - setupCfg.QPS = rest.DefaultQPS * controllerNum // 5 * 4 controllers - setupCfg.Burst = rest.DefaultBurst * controllerNum // 10 * 4 controllers setupClient, err := client.New(setupCfg, client.Options{Scheme: scheme}) if err != nil { @@ -325,10 +314,9 @@ func main() { //nolint:funlen,maintidx,gocyclo } if err = (&dscictrl.DSCInitializationReconciler{ - Client: oc, - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("dscinitialization-controller"), - ApplicationsNamespace: dscApplicationsNamespace, + Client: oc, + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("dscinitialization-controller"), }).SetupWithManager(ctx, mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "DSCInitiatlization") os.Exit(1) @@ -405,7 +393,7 @@ func main() { //nolint:funlen,maintidx,gocyclo setupLog.Info("DSCI auto creation is disabled") } else { var createDefaultDSCIFunc manager.RunnableFunc = func(ctx context.Context) error { - err := upgrade.CreateDefaultDSCI(ctx, setupClient, platform, dscApplicationsNamespace, dscMonitoringNamespace) + err := upgrade.CreateDefaultDSCI(ctx, setupClient, platform, dscMonitoringNamespace) if err != nil { setupLog.Error(err, "unable to create initial setup for the operator") } @@ -435,7 +423,7 @@ func main() { //nolint:funlen,maintidx,gocyclo } // Cleanup resources from previous v2 releases var cleanExistingResourceFunc manager.RunnableFunc = func(ctx context.Context) error { - if err = upgrade.CleanupExistingResource(ctx, setupClient, platform, dscApplicationsNamespace, dscMonitoringNamespace, oldReleaseVersion); err != nil { + if err = upgrade.CleanupExistingResource(ctx, setupClient, platform, dscMonitoringNamespace, oldReleaseVersion); err != nil { setupLog.Error(err, "unable to perform cleanup") } return err diff --git a/pkg/deploy/deploy.go b/pkg/deploy/deploy.go index cb28d2c5239..498bce3b395 100644 --- a/pkg/deploy/deploy.go +++ b/pkg/deploy/deploy.go @@ -281,12 +281,6 @@ func manageResource(ctx context.Context, cli client.Client, res *resource.Resour if err == nil { // when resource is found if enabled { - // Exception to not update kserve with managed annotation - // do not reconcile kserve resource with annotation "opendatahub.io/managed: false" - // TODO: remove this exception when we define managed annotation across odh - if found.GetAnnotations()[annotations.ManagedByODHOperator] == "false" && componentName == "kserve" { - return nil - } return updateResource(ctx, cli, res, found, owner) } // Delete resource if it exists or do nothing if not found @@ -354,20 +348,15 @@ func createResource(ctx context.Context, cli client.Client, res *resource.Resour if err != nil { return err } - if obj.GetKind() != "CustomResourceDefinition" && obj.GetKind() != "OdhDashboardConfig" { - if err := ctrl.SetControllerReference(owner, metav1.Object(obj), cli.Scheme()); err != nil { - return err - } + + if err := ctrl.SetControllerReference(owner, metav1.Object(obj), cli.Scheme()); err != nil { + return err } + return cli.Create(ctx, obj) } -// Exception to skip ODHDashboardConfig CR reconcile. func updateResource(ctx context.Context, cli client.Client, res *resource.Resource, found *unstructured.Unstructured, owner metav1.Object) error { - if found.GetKind() == "OdhDashboardConfig" { - return nil - } - // Operator reconcile allowedListfield only when resource is managed by operator(annotation is true) // all other cases: no annotation at all, required annotation not present, of annotation is non-true value, skip reconcile if managed := found.GetAnnotations()[annotations.ManagedByODHOperator]; managed != "true" { diff --git a/pkg/upgrade/upgrade.go b/pkg/upgrade/upgrade.go index 9850e9e88e0..0ca00cbb6fb 100644 --- a/pkg/upgrade/upgrade.go +++ b/pkg/upgrade/upgrade.go @@ -10,9 +10,7 @@ import ( "github.com/hashicorp/go-multierror" operatorv1 "github.com/openshift/api/operator/v1" - routev1 "github.com/openshift/api/route/v1" templatev1 "github.com/openshift/api/template/v1" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -108,10 +106,9 @@ func CreateDefaultDSC(ctx context.Context, cli client.Client) error { // CreateDefaultDSCI creates a default instance of DSCI // If there exists default-dsci instance already, it will not update DSCISpec on it. // Note: DSCI CR modifcations are not supported, as it is the initial prereq setting for the components. -func CreateDefaultDSCI(ctx context.Context, cli client.Client, _ cluster.Platform, appNamespace, monNamespace string) error { +func CreateDefaultDSCI(ctx context.Context, cli client.Client, _ cluster.Platform, monNamespace string) error { log := logf.FromContext(ctx) defaultDsciSpec := &dsciv1.DSCInitializationSpec{ - ApplicationsNamespace: appNamespace, Monitoring: serviceApi.DSCMonitoring{ ManagementSpec: common.ManagementSpec{ManagementState: operatorv1.Managed}, MonitoringCommonSpec: serviceApi.MonitoringCommonSpec{ @@ -208,60 +205,33 @@ func getDashboardWatsonResources(ns string) []ResourceSpec { func CleanupExistingResource(ctx context.Context, cli client.Client, platform cluster.Platform, - dscApplicationsNamespace, dscMonitoringNamespace string, + dscMonitoringNamespace string, oldReleaseVersion cluster.Release, ) error { var multiErr *multierror.Error - // Special Handling of cleanup of deprecated model monitoring stack - if platform == cluster.ManagedRhoai { - deprecatedDeployments := []string{"rhods-prometheus-operator"} - multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedDeployments, &appsv1.DeploymentList{})) - - deprecatedStatefulsets := []string{"prometheus-rhods-model-monitoring"} - multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedStatefulsets, &appsv1.StatefulSetList{})) - - deprecatedServices := []string{"rhods-model-monitoring"} - multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedServices, &corev1.ServiceList{})) - - deprecatedRoutes := []string{"rhods-model-monitoring"} - multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedRoutes, &routev1.RouteList{})) - - deprecatedSecrets := []string{"rhods-monitoring-oauth-config"} - multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedSecrets, &corev1.SecretList{})) - - deprecatedClusterroles := []string{"rhods-namespace-read", "rhods-prometheus-operator"} - multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedClusterroles, &rbacv1.ClusterRoleList{})) - - deprecatedClusterrolebindings := []string{"rhods-namespace-read", "rhods-prometheus-operator"} - multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedClusterrolebindings, &rbacv1.ClusterRoleBindingList{})) - - deprecatedServiceAccounts := []string{"rhods-prometheus-operator"} - multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedServiceAccounts, &corev1.ServiceAccountList{})) - - deprecatedServicemonitors := []string{"modelmesh-federated-metrics"} - multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedServicemonitors)) + // get DSCI CR to get application namespace + dsciList := &dsciv1.DSCInitializationList{} + if err := cli.List(ctx, dsciList); err != nil { + return err } - // common logic for both self-managed and managed - deprecatedOperatorSM := []string{"rhods-monitor-federation2"} - multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedOperatorSM)) - - // Remove deprecated opendatahub namespace(previously owned by kuberay and Kueue) - multiErr = multierror.Append(multiErr, deleteDeprecatedNamespace(ctx, cli, "opendatahub")) - + if len(dsciList.Items) == 0 { + return nil + } + d := &dsciList.Items[0] // Handling for dashboard OdhApplication Jupyterhub CR, see jira #443 - multiErr = multierror.Append(multiErr, removOdhApplicationsCR(ctx, cli, gvk.OdhApplication, "jupyterhub", dscApplicationsNamespace)) + multiErr = multierror.Append(multiErr, removOdhApplicationsCR(ctx, cli, gvk.OdhApplication, "jupyterhub", d.Spec.ApplicationsNamespace)) // cleanup for github.com/opendatahub-io/pull/888 - deprecatedFeatureTrackers := []string{dscApplicationsNamespace + "-kserve-temporary-fixes"} - multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscApplicationsNamespace, deprecatedFeatureTrackers, &featuresv1.FeatureTrackerList{})) + deprecatedFeatureTrackers := []string{d.Spec.ApplicationsNamespace + "-kserve-temporary-fixes"} + multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, d.Spec.ApplicationsNamespace, deprecatedFeatureTrackers, &featuresv1.FeatureTrackerList{})) // Cleanup of deprecated default RoleBinding resources - deprecatedDefaultRoleBinding := []string{dscApplicationsNamespace} - multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscApplicationsNamespace, deprecatedDefaultRoleBinding, &rbacv1.RoleBindingList{})) + deprecatedDefaultRoleBinding := []string{d.Spec.ApplicationsNamespace} + multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, d.Spec.ApplicationsNamespace, deprecatedDefaultRoleBinding, &rbacv1.RoleBindingList{})) // Handling for dashboard OdhDocument Jupyterhub CR, see jira #443 comments odhDocJPH := getJPHOdhDocumentResources( - dscApplicationsNamespace, + d.Spec.ApplicationsNamespace, []string{ "jupyterhub-install-python-packages", "jupyterhub-update-server-settings", @@ -271,23 +241,23 @@ func CleanupExistingResource(ctx context.Context, multiErr = multierror.Append(multiErr, deleteResources(ctx, cli, &odhDocJPH)) // only apply on RHOAI since ODH has a different way to create this CR by dashboard if platform == cluster.SelfManagedRhoai || platform == cluster.ManagedRhoai { - if err := upgradeODCCR(ctx, cli, "odh-dashboard-config", dscApplicationsNamespace, oldReleaseVersion); err != nil { + if err := upgradeODCCR(ctx, cli, "odh-dashboard-config", d.Spec.ApplicationsNamespace, oldReleaseVersion); err != nil { return err } } // remove modelreg proxy container from deployment in ODH if platform == cluster.OpenDataHub { - if err := removeRBACProxyModelRegistry(ctx, cli, "model-registry-operator", "kube-rbac-proxy", dscApplicationsNamespace); err != nil { + if err := removeRBACProxyModelRegistry(ctx, cli, "model-registry-operator", "kube-rbac-proxy", d.Spec.ApplicationsNamespace); err != nil { return err } } // to take a reference - toDelete := getDashboardWatsonResources(dscApplicationsNamespace) + toDelete := getDashboardWatsonResources(d.Spec.ApplicationsNamespace) multiErr = multierror.Append(multiErr, deleteResources(ctx, cli, &toDelete)) // cleanup nvidia nim integration - multiErr = multierror.Append(multiErr, cleanupNimIntegration(ctx, cli, oldReleaseVersion, dscApplicationsNamespace)) + multiErr = multierror.Append(multiErr, cleanupNimIntegration(ctx, cli, oldReleaseVersion, d.Spec.ApplicationsNamespace)) return multiErr.ErrorOrNil() } @@ -369,35 +339,6 @@ func deleteDeprecatedResources(ctx context.Context, cli client.Client, namespace return multiErr.ErrorOrNil() } -// Need to handle ServiceMonitor deletion separately as the generic function does not work for ServiceMonitors because of how the package is built. -func deleteDeprecatedServiceMonitors(ctx context.Context, cli client.Client, namespace string, resourceList []string) error { - log := logf.FromContext(ctx) - var multiErr *multierror.Error - listOpts := &client.ListOptions{Namespace: namespace} - servicemonitors := &monitoringv1.ServiceMonitorList{} - if err := cli.List(ctx, servicemonitors, listOpts); err != nil { - multiErr = multierror.Append(multiErr, err) - } - - for _, servicemonitor := range servicemonitors.Items { - for _, name := range resourceList { - if name == servicemonitor.Name { - log.Info("Attempting to delete " + servicemonitor.Name + " in namespace " + namespace) - err := cli.Delete(ctx, servicemonitor) - if err != nil { - if k8serr.IsNotFound(err) { - log.Info("Could not find " + servicemonitor.Name + " in namespace " + namespace) - } else { - multiErr = multierror.Append(multiErr, err) - } - } - log.Info("Successfully deleted " + servicemonitor.Name) - } - } - } - return multiErr.ErrorOrNil() -} - func removOdhApplicationsCR(ctx context.Context, cli client.Client, gvk schema.GroupVersionKind, instanceName string, applicationNS string) error { // first check if CRD in cluster crd := &apiextv1.CustomResourceDefinition{} @@ -526,63 +467,6 @@ func removeRBACProxyModelRegistry(ctx context.Context, cli client.Client, compon return nil } -func RemoveLabel(ctx context.Context, cli client.Client, objectName string, labelKey string) error { - foundNamespace := &corev1.Namespace{} - if err := cli.Get(ctx, client.ObjectKey{Name: objectName}, foundNamespace); err != nil { - if k8serr.IsNotFound(err) { - return nil - } - return fmt.Errorf("could not get %s namespace: %w", objectName, err) - } - delete(foundNamespace.Labels, labelKey) - if err := cli.Update(ctx, foundNamespace); err != nil { - return fmt.Errorf("error removing %s from %s : %w", labelKey, objectName, err) - } - return nil -} - -func deleteDeprecatedNamespace(ctx context.Context, cli client.Client, namespace string) error { - log := logf.FromContext(ctx) - foundNamespace := &corev1.Namespace{} - if err := cli.Get(ctx, client.ObjectKey{Name: namespace}, foundNamespace); err != nil { - if k8serr.IsNotFound(err) { - return nil - } - return fmt.Errorf("could not get %s namespace: %w", namespace, err) - } - - // Check if namespace is owned by DSC - isOwnedByDSC := false - for _, owner := range foundNamespace.OwnerReferences { - if owner.Kind == "DataScienceCluster" { - isOwnedByDSC = true - } - } - if !isOwnedByDSC { - return nil - } - - // Check if namespace has pods running - podList := &corev1.PodList{} - listOpts := []client.ListOption{ - client.InNamespace(namespace), - } - if err := cli.List(ctx, podList, listOpts...); err != nil { - return fmt.Errorf("error getting pods from namespace %s: %w", namespace, err) - } - if len(podList.Items) != 0 { - log.Info("Skip deletion of namespace " + namespace + " due to running Pods in it") - return nil - } - - // Delete namespace if no pods found - if err := cli.Delete(ctx, foundNamespace); err != nil { - return fmt.Errorf("could not delete %s namespace: %w", namespace, err) - } - - return nil -} - func GetDeployedRelease(ctx context.Context, cli client.Client) (cluster.Release, error) { dsciInstance := &dsciv1.DSCInitializationList{} if err := cli.List(ctx, dsciInstance); err != nil {