Skip to content

Commit

Permalink
update: clean up old code before refactor (#1494)
Browse files Browse the repository at this point in the history
* update: clean up old code before refactor

- remove reference we explicilty check on odhdashboardconfig and kserve
- remove customized config on controller QPS
- remove RemoveLabel which is not called anywhere

Signed-off-by: Wen Zhou <[email protected]>

* update:

- remove function for upgrade cleanup introduced before 2.8
- remove flags to main: operator namespace and application namespace
  ( operator namespace is passing as env variable, app namespace is done by DSCI )

Signed-off-by: Wen Zhou <[email protected]>

* fix: Auth CR is cluster-scoped no need to set namespace for request

- application namespace is removed from reconciler of dsci

Signed-off-by: Wen Zhou <[email protected]>

* fix: wrong code for upgrade path + rebase error

Signed-off-by: Wen Zhou <[email protected]>

---------

Signed-off-by: Wen Zhou <[email protected]>
  • Loading branch information
zdtsw authored Jan 27, 2025
1 parent 7b4dfc2 commit b0a0f87
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 173 deletions.
9 changes: 4 additions & 5 deletions controllers/dscinitialization/dscinitialization_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,8 @@ var managementStateChangeTrustedCA = false
// DSCInitializationReconciler reconciles a DSCInitialization object.
type DSCInitializationReconciler struct {
*odhClient.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
ApplicationsNamespace string
Scheme *runtime.Scheme
Recorder record.EventRecorder
}

// Reconcile contains controller logic specific to DSCInitialization instance updates.
Expand Down Expand Up @@ -249,7 +248,7 @@ func (r *DSCInitializationReconciler) Reconcile(ctx context.Context, req ctrl.Re
}
case cluster.ManagedRhoai:
osdConfigsPath := filepath.Join(deploy.DefaultManifestPath, "osd-configs")
err = deploy.DeployManifestsFromPath(ctx, r.Client, instance, osdConfigsPath, r.ApplicationsNamespace, "osd", true)
err = deploy.DeployManifestsFromPath(ctx, r.Client, instance, osdConfigsPath, instance.Spec.ApplicationsNamespace, "osd", true)
if err != nil {
log.Error(err, "Failed to apply osd specific configs from manifests", "Manifests path", osdConfigsPath)
r.Recorder.Eventf(instance, corev1.EventTypeWarning, "DSCInitializationReconcileError", "Failed to apply "+osdConfigsPath)
Expand Down Expand Up @@ -445,7 +444,7 @@ func (r *DSCInitializationReconciler) watchAuthResource(ctx context.Context, a c
if len(instanceList.Items) == 0 {
log.Info("Found no Auth instance in cluster, reconciling to recreate")

return []reconcile.Request{{NamespacedName: types.NamespacedName{Name: "auth", Namespace: r.ApplicationsNamespace}}}
return []reconcile.Request{{NamespacedName: types.NamespacedName{Name: "auth"}}}
}

return nil
Expand Down
22 changes: 5 additions & 17 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ import (
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
apiregistrationv1 "k8s.io/kube-aggregator/pkg/apis/apiregistration/v1"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
Expand Down Expand Up @@ -98,8 +97,6 @@ import (
_ "github.com/opendatahub-io/opendatahub-operator/v2/controllers/components/workbenches"
)

const controllerNum = 4 // we should keep this updated if we have new controllers to add

var (
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")
Expand Down Expand Up @@ -146,21 +143,16 @@ func main() { //nolint:funlen,maintidx,gocyclo
var metricsAddr string
var enableLeaderElection bool
var probeAddr string
var dscApplicationsNamespace string
var dscMonitoringNamespace string
var operatorName string
var logmode string

flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.StringVar(&dscApplicationsNamespace, "dsc-applications-namespace", "opendatahub", "The namespace where data science cluster"+
"applications will be deployed")
flag.StringVar(&dscMonitoringNamespace, "dsc-monitoring-namespace", "opendatahub", "The namespace where data science cluster"+
"monitoring stack will be deployed")
flag.StringVar(&operatorName, "operator-name", "opendatahub", "The name of the operator")
flag.StringVar(&logmode, "log-mode", "", "Log mode ('', prod, devel), default to ''")

opts := zap.Options{}
Expand All @@ -179,9 +171,6 @@ func main() { //nolint:funlen,maintidx,gocyclo
setupLog.Error(err, "error getting config for setup")
os.Exit(1)
}
// uplift default limiataions
setupCfg.QPS = rest.DefaultQPS * controllerNum // 5 * 4 controllers
setupCfg.Burst = rest.DefaultBurst * controllerNum // 10 * 4 controllers

setupClient, err := client.New(setupCfg, client.Options{Scheme: scheme})
if err != nil {
Expand Down Expand Up @@ -325,10 +314,9 @@ func main() { //nolint:funlen,maintidx,gocyclo
}

if err = (&dscictrl.DSCInitializationReconciler{
Client: oc,
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("dscinitialization-controller"),
ApplicationsNamespace: dscApplicationsNamespace,
Client: oc,
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("dscinitialization-controller"),
}).SetupWithManager(ctx, mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DSCInitiatlization")
os.Exit(1)
Expand Down Expand Up @@ -405,7 +393,7 @@ func main() { //nolint:funlen,maintidx,gocyclo
setupLog.Info("DSCI auto creation is disabled")
} else {
var createDefaultDSCIFunc manager.RunnableFunc = func(ctx context.Context) error {
err := upgrade.CreateDefaultDSCI(ctx, setupClient, platform, dscApplicationsNamespace, dscMonitoringNamespace)
err := upgrade.CreateDefaultDSCI(ctx, setupClient, platform, dscMonitoringNamespace)
if err != nil {
setupLog.Error(err, "unable to create initial setup for the operator")
}
Expand Down Expand Up @@ -435,7 +423,7 @@ func main() { //nolint:funlen,maintidx,gocyclo
}
// Cleanup resources from previous v2 releases
var cleanExistingResourceFunc manager.RunnableFunc = func(ctx context.Context) error {
if err = upgrade.CleanupExistingResource(ctx, setupClient, platform, dscApplicationsNamespace, dscMonitoringNamespace, oldReleaseVersion); err != nil {
if err = upgrade.CleanupExistingResource(ctx, setupClient, platform, dscMonitoringNamespace, oldReleaseVersion); err != nil {
setupLog.Error(err, "unable to perform cleanup")
}
return err
Expand Down
19 changes: 4 additions & 15 deletions pkg/deploy/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,6 @@ func manageResource(ctx context.Context, cli client.Client, res *resource.Resour
if err == nil {
// when resource is found
if enabled {
// Exception to not update kserve with managed annotation
// do not reconcile kserve resource with annotation "opendatahub.io/managed: false"
// TODO: remove this exception when we define managed annotation across odh
if found.GetAnnotations()[annotations.ManagedByODHOperator] == "false" && componentName == "kserve" {
return nil
}
return updateResource(ctx, cli, res, found, owner)
}
// Delete resource if it exists or do nothing if not found
Expand Down Expand Up @@ -354,20 +348,15 @@ func createResource(ctx context.Context, cli client.Client, res *resource.Resour
if err != nil {
return err
}
if obj.GetKind() != "CustomResourceDefinition" && obj.GetKind() != "OdhDashboardConfig" {
if err := ctrl.SetControllerReference(owner, metav1.Object(obj), cli.Scheme()); err != nil {
return err
}

if err := ctrl.SetControllerReference(owner, metav1.Object(obj), cli.Scheme()); err != nil {
return err
}

return cli.Create(ctx, obj)
}

// Exception to skip ODHDashboardConfig CR reconcile.
func updateResource(ctx context.Context, cli client.Client, res *resource.Resource, found *unstructured.Unstructured, owner metav1.Object) error {
if found.GetKind() == "OdhDashboardConfig" {
return nil
}

// Operator reconcile allowedListfield only when resource is managed by operator(annotation is true)
// all other cases: no annotation at all, required annotation not present, of annotation is non-true value, skip reconcile
if managed := found.GetAnnotations()[annotations.ManagedByODHOperator]; managed != "true" {
Expand Down
156 changes: 20 additions & 136 deletions pkg/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@ import (

"github.com/hashicorp/go-multierror"
operatorv1 "github.com/openshift/api/operator/v1"
routev1 "github.com/openshift/api/route/v1"
templatev1 "github.com/openshift/api/template/v1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -108,10 +106,9 @@ func CreateDefaultDSC(ctx context.Context, cli client.Client) error {
// CreateDefaultDSCI creates a default instance of DSCI
// If there exists default-dsci instance already, it will not update DSCISpec on it.
// Note: DSCI CR modifcations are not supported, as it is the initial prereq setting for the components.
func CreateDefaultDSCI(ctx context.Context, cli client.Client, _ cluster.Platform, appNamespace, monNamespace string) error {
func CreateDefaultDSCI(ctx context.Context, cli client.Client, _ cluster.Platform, monNamespace string) error {
log := logf.FromContext(ctx)
defaultDsciSpec := &dsciv1.DSCInitializationSpec{
ApplicationsNamespace: appNamespace,
Monitoring: serviceApi.DSCMonitoring{
ManagementSpec: common.ManagementSpec{ManagementState: operatorv1.Managed},
MonitoringCommonSpec: serviceApi.MonitoringCommonSpec{
Expand Down Expand Up @@ -208,60 +205,33 @@ func getDashboardWatsonResources(ns string) []ResourceSpec {
func CleanupExistingResource(ctx context.Context,
cli client.Client,
platform cluster.Platform,
dscApplicationsNamespace, dscMonitoringNamespace string,
dscMonitoringNamespace string,
oldReleaseVersion cluster.Release,
) error {
var multiErr *multierror.Error
// Special Handling of cleanup of deprecated model monitoring stack
if platform == cluster.ManagedRhoai {
deprecatedDeployments := []string{"rhods-prometheus-operator"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedDeployments, &appsv1.DeploymentList{}))

deprecatedStatefulsets := []string{"prometheus-rhods-model-monitoring"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedStatefulsets, &appsv1.StatefulSetList{}))

deprecatedServices := []string{"rhods-model-monitoring"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedServices, &corev1.ServiceList{}))

deprecatedRoutes := []string{"rhods-model-monitoring"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedRoutes, &routev1.RouteList{}))

deprecatedSecrets := []string{"rhods-monitoring-oauth-config"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedSecrets, &corev1.SecretList{}))

deprecatedClusterroles := []string{"rhods-namespace-read", "rhods-prometheus-operator"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedClusterroles, &rbacv1.ClusterRoleList{}))

deprecatedClusterrolebindings := []string{"rhods-namespace-read", "rhods-prometheus-operator"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedClusterrolebindings, &rbacv1.ClusterRoleBindingList{}))

deprecatedServiceAccounts := []string{"rhods-prometheus-operator"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscMonitoringNamespace, deprecatedServiceAccounts, &corev1.ServiceAccountList{}))

deprecatedServicemonitors := []string{"modelmesh-federated-metrics"}
multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedServicemonitors))
// get DSCI CR to get application namespace
dsciList := &dsciv1.DSCInitializationList{}
if err := cli.List(ctx, dsciList); err != nil {
return err
}
// common logic for both self-managed and managed
deprecatedOperatorSM := []string{"rhods-monitor-federation2"}
multiErr = multierror.Append(multiErr, deleteDeprecatedServiceMonitors(ctx, cli, dscMonitoringNamespace, deprecatedOperatorSM))

// Remove deprecated opendatahub namespace(previously owned by kuberay and Kueue)
multiErr = multierror.Append(multiErr, deleteDeprecatedNamespace(ctx, cli, "opendatahub"))

if len(dsciList.Items) == 0 {
return nil
}
d := &dsciList.Items[0]
// Handling for dashboard OdhApplication Jupyterhub CR, see jira #443
multiErr = multierror.Append(multiErr, removOdhApplicationsCR(ctx, cli, gvk.OdhApplication, "jupyterhub", dscApplicationsNamespace))
multiErr = multierror.Append(multiErr, removOdhApplicationsCR(ctx, cli, gvk.OdhApplication, "jupyterhub", d.Spec.ApplicationsNamespace))

// cleanup for github.com/opendatahub-io/pull/888
deprecatedFeatureTrackers := []string{dscApplicationsNamespace + "-kserve-temporary-fixes"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscApplicationsNamespace, deprecatedFeatureTrackers, &featuresv1.FeatureTrackerList{}))
deprecatedFeatureTrackers := []string{d.Spec.ApplicationsNamespace + "-kserve-temporary-fixes"}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, d.Spec.ApplicationsNamespace, deprecatedFeatureTrackers, &featuresv1.FeatureTrackerList{}))

// Cleanup of deprecated default RoleBinding resources
deprecatedDefaultRoleBinding := []string{dscApplicationsNamespace}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, dscApplicationsNamespace, deprecatedDefaultRoleBinding, &rbacv1.RoleBindingList{}))
deprecatedDefaultRoleBinding := []string{d.Spec.ApplicationsNamespace}
multiErr = multierror.Append(multiErr, deleteDeprecatedResources(ctx, cli, d.Spec.ApplicationsNamespace, deprecatedDefaultRoleBinding, &rbacv1.RoleBindingList{}))

// Handling for dashboard OdhDocument Jupyterhub CR, see jira #443 comments
odhDocJPH := getJPHOdhDocumentResources(
dscApplicationsNamespace,
d.Spec.ApplicationsNamespace,
[]string{
"jupyterhub-install-python-packages",
"jupyterhub-update-server-settings",
Expand All @@ -271,23 +241,23 @@ func CleanupExistingResource(ctx context.Context,
multiErr = multierror.Append(multiErr, deleteResources(ctx, cli, &odhDocJPH))
// only apply on RHOAI since ODH has a different way to create this CR by dashboard
if platform == cluster.SelfManagedRhoai || platform == cluster.ManagedRhoai {
if err := upgradeODCCR(ctx, cli, "odh-dashboard-config", dscApplicationsNamespace, oldReleaseVersion); err != nil {
if err := upgradeODCCR(ctx, cli, "odh-dashboard-config", d.Spec.ApplicationsNamespace, oldReleaseVersion); err != nil {
return err
}
}
// remove modelreg proxy container from deployment in ODH
if platform == cluster.OpenDataHub {
if err := removeRBACProxyModelRegistry(ctx, cli, "model-registry-operator", "kube-rbac-proxy", dscApplicationsNamespace); err != nil {
if err := removeRBACProxyModelRegistry(ctx, cli, "model-registry-operator", "kube-rbac-proxy", d.Spec.ApplicationsNamespace); err != nil {
return err
}
}

// to take a reference
toDelete := getDashboardWatsonResources(dscApplicationsNamespace)
toDelete := getDashboardWatsonResources(d.Spec.ApplicationsNamespace)
multiErr = multierror.Append(multiErr, deleteResources(ctx, cli, &toDelete))

// cleanup nvidia nim integration
multiErr = multierror.Append(multiErr, cleanupNimIntegration(ctx, cli, oldReleaseVersion, dscApplicationsNamespace))
multiErr = multierror.Append(multiErr, cleanupNimIntegration(ctx, cli, oldReleaseVersion, d.Spec.ApplicationsNamespace))

return multiErr.ErrorOrNil()
}
Expand Down Expand Up @@ -369,35 +339,6 @@ func deleteDeprecatedResources(ctx context.Context, cli client.Client, namespace
return multiErr.ErrorOrNil()
}

// Need to handle ServiceMonitor deletion separately as the generic function does not work for ServiceMonitors because of how the package is built.
func deleteDeprecatedServiceMonitors(ctx context.Context, cli client.Client, namespace string, resourceList []string) error {
log := logf.FromContext(ctx)
var multiErr *multierror.Error
listOpts := &client.ListOptions{Namespace: namespace}
servicemonitors := &monitoringv1.ServiceMonitorList{}
if err := cli.List(ctx, servicemonitors, listOpts); err != nil {
multiErr = multierror.Append(multiErr, err)
}

for _, servicemonitor := range servicemonitors.Items {
for _, name := range resourceList {
if name == servicemonitor.Name {
log.Info("Attempting to delete " + servicemonitor.Name + " in namespace " + namespace)
err := cli.Delete(ctx, servicemonitor)
if err != nil {
if k8serr.IsNotFound(err) {
log.Info("Could not find " + servicemonitor.Name + " in namespace " + namespace)
} else {
multiErr = multierror.Append(multiErr, err)
}
}
log.Info("Successfully deleted " + servicemonitor.Name)
}
}
}
return multiErr.ErrorOrNil()
}

func removOdhApplicationsCR(ctx context.Context, cli client.Client, gvk schema.GroupVersionKind, instanceName string, applicationNS string) error {
// first check if CRD in cluster
crd := &apiextv1.CustomResourceDefinition{}
Expand Down Expand Up @@ -526,63 +467,6 @@ func removeRBACProxyModelRegistry(ctx context.Context, cli client.Client, compon
return nil
}

func RemoveLabel(ctx context.Context, cli client.Client, objectName string, labelKey string) error {
foundNamespace := &corev1.Namespace{}
if err := cli.Get(ctx, client.ObjectKey{Name: objectName}, foundNamespace); err != nil {
if k8serr.IsNotFound(err) {
return nil
}
return fmt.Errorf("could not get %s namespace: %w", objectName, err)
}
delete(foundNamespace.Labels, labelKey)
if err := cli.Update(ctx, foundNamespace); err != nil {
return fmt.Errorf("error removing %s from %s : %w", labelKey, objectName, err)
}
return nil
}

func deleteDeprecatedNamespace(ctx context.Context, cli client.Client, namespace string) error {
log := logf.FromContext(ctx)
foundNamespace := &corev1.Namespace{}
if err := cli.Get(ctx, client.ObjectKey{Name: namespace}, foundNamespace); err != nil {
if k8serr.IsNotFound(err) {
return nil
}
return fmt.Errorf("could not get %s namespace: %w", namespace, err)
}

// Check if namespace is owned by DSC
isOwnedByDSC := false
for _, owner := range foundNamespace.OwnerReferences {
if owner.Kind == "DataScienceCluster" {
isOwnedByDSC = true
}
}
if !isOwnedByDSC {
return nil
}

// Check if namespace has pods running
podList := &corev1.PodList{}
listOpts := []client.ListOption{
client.InNamespace(namespace),
}
if err := cli.List(ctx, podList, listOpts...); err != nil {
return fmt.Errorf("error getting pods from namespace %s: %w", namespace, err)
}
if len(podList.Items) != 0 {
log.Info("Skip deletion of namespace " + namespace + " due to running Pods in it")
return nil
}

// Delete namespace if no pods found
if err := cli.Delete(ctx, foundNamespace); err != nil {
return fmt.Errorf("could not delete %s namespace: %w", namespace, err)
}

return nil
}

func GetDeployedRelease(ctx context.Context, cli client.Client) (cluster.Release, error) {
dsciInstance := &dsciv1.DSCInitializationList{}
if err := cli.List(ctx, dsciInstance); err != nil {
Expand Down

0 comments on commit b0a0f87

Please sign in to comment.