Skip to content

Commit

Permalink
[SECURESIGN-1047] CTLog error handler
Browse files Browse the repository at this point in the history
  • Loading branch information
bouskaJ committed Jun 21, 2024
1 parent eb641cb commit 67a119a
Show file tree
Hide file tree
Showing 8 changed files with 284 additions and 5 deletions.
2 changes: 2 additions & 0 deletions api/v1alpha1/ctlog_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type CTlogStatus struct {
RootCertificates []SecretKeySelector `json:"rootCertificates,omitempty"`
// The ID of a Trillian tree that stores the log data.
TreeID *int64 `json:"treeID,omitempty"`
// Number of component restarts.
Restarts int `json:"restarts"`
// +listType=map
// +listMapKey=type
// +patchStrategy=merge
Expand Down
2 changes: 1 addition & 1 deletion bundle/manifests/rhtas-operator.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ metadata:
]
capabilities: Seamless Upgrades
containerImage: registry.redhat.io/rhtas/rhtas-rhel9-operator@sha256:a21f7128694a64989bf0d84a7a7da4c1ffc89edf62d594dc8bea7bcfe9ac08d3
createdAt: "2024-06-21T09:37:48Z"
createdAt: "2024-06-21T11:18:33Z"
features.operators.openshift.io/cnf: "false"
features.operators.openshift.io/cni: "false"
features.operators.openshift.io/csi: "false"
Expand Down
5 changes: 5 additions & 0 deletions bundle/manifests/rhtas.redhat.com_ctlogs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,9 @@ spec:
- name
type: object
x-kubernetes-map-type: atomic
restarts:
description: Number of component restarts.
type: integer
rootCertificates:
items:
description: SecretKeySelector selects a key of a Secret.
Expand Down Expand Up @@ -316,6 +319,8 @@ spec:
description: The ID of a Trillian tree that stores the log data.
format: int64
type: integer
required:
- restarts
type: object
type: object
served: true
Expand Down
5 changes: 5 additions & 0 deletions config/crd/bases/rhtas.redhat.com_ctlogs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,9 @@ spec:
- name
type: object
x-kubernetes-map-type: atomic
restarts:
description: Number of component restarts.
type: integer
rootCertificates:
items:
description: SecretKeySelector selects a key of a Secret.
Expand Down Expand Up @@ -316,6 +319,8 @@ spec:
description: The ID of a Trillian tree that stores the log data.
format: int64
type: integer
required:
- restarts
type: object
type: object
served: true
Expand Down
77 changes: 77 additions & 0 deletions internal/controller/ctlog/actions/error.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package actions

import (
"context"

rhtasv1alpha1 "github.com/securesign/operator/api/v1alpha1"
"github.com/securesign/operator/internal/controller/common/action"
"github.com/securesign/operator/internal/controller/constants"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func NewHandleErrorAction() action.Action[rhtasv1alpha1.CTlog] {
return &handleErrorAction{}
}

type handleErrorAction struct {
action.BaseAction
}

func (i handleErrorAction) Name() string {
return "error handler"
}

func (i handleErrorAction) CanHandle(_ context.Context, instance *rhtasv1alpha1.CTlog) bool {
c := meta.FindStatusCondition(instance.Status.Conditions, constants.Ready)
if c == nil {
return false
}
return c.Reason == constants.Failure && instance.Status.Restarts < constants.AllowedRestarts
}

func (i handleErrorAction) Handle(ctx context.Context, instance *rhtasv1alpha1.CTlog) *action.Result {
i.Recorder.Event(instance, v1.EventTypeWarning, constants.Failure, "Restarted by error handler")

newStatus := rhtasv1alpha1.CTlogStatus{}

newStatus.Restarts = instance.Status.Restarts + 1
if newStatus.Restarts == constants.AllowedRestarts {
meta.SetStatusCondition(&newStatus.Conditions, metav1.Condition{
Type: constants.Ready,
Status: metav1.ConditionFalse,
Reason: constants.Failure,
Message: "Restart threshold reached",
})
instance.Status = newStatus
return i.StatusUpdate(ctx, instance)
}

// - keep the status.treeId if not nil
newStatus.TreeID = instance.Status.TreeID

newStatus.PrivateKeyRef = instance.Status.PrivateKeyRef.DeepCopy()

newStatus.PublicKeyRef = instance.Status.PublicKeyRef.DeepCopy()

newStatus.PrivateKeyPasswordRef = instance.Status.PrivateKeyPasswordRef.DeepCopy()

if meta.IsStatusConditionTrue(instance.Status.Conditions, CertCondition) {
copy(newStatus.RootCertificates, instance.Status.RootCertificates)
}

if meta.IsStatusConditionTrue(instance.Status.Conditions, ServerCondition) {
instance.Status.ServerConfigRef.DeepCopyInto(newStatus.ServerConfigRef)
// do not append server condition - let controller to redeploy
}

meta.SetStatusCondition(&newStatus.Conditions, metav1.Condition{
Type: constants.Ready,
Status: metav1.ConditionFalse,
Reason: constants.Pending,
Message: "Restarted by error handler",
})
instance.Status = newStatus
return i.StatusUpdate(ctx, instance)
}
18 changes: 17 additions & 1 deletion internal/controller/ctlog/ctlog_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@ import (

olpredicate "github.com/operator-framework/operator-lib/predicate"
"github.com/securesign/operator/internal/controller/annotations"
"github.com/securesign/operator/internal/controller/constants"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/event"

"github.com/securesign/operator/internal/controller/ctlog/actions"
actions2 "github.com/securesign/operator/internal/controller/fulcio/actions"
Expand Down Expand Up @@ -83,6 +86,9 @@ func (r *CTlogReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
}
target := instance.DeepCopy()
acs := []action.Action[rhtasv1alpha1.CTlog]{
// register error handler
actions.NewHandleErrorAction(),

actions.NewPendingAction(),

actions.NewHandleFulcioCertAction(),
Expand Down Expand Up @@ -144,7 +150,17 @@ func (r *CTlogReconciler) SetupWithManager(mgr ctrl.Manager) error {

return ctrl.NewControllerManagedBy(mgr).
WithEventFilter(pause).
For(&rhtasv1alpha1.CTlog{}).
For(&rhtasv1alpha1.CTlog{}, builder.WithPredicates(predicate.Or(predicate.GenerationChangedPredicate{}, predicate.Funcs{UpdateFunc: func(event event.UpdateEvent) bool {
// do not requeue failed object updates
instance, ok := event.ObjectNew.(*rhtasv1alpha1.CTlog)
if !ok {
return false
}
if c := meta.FindStatusCondition(instance.Status.Conditions, constants.Ready); c != nil {
return c.Reason != constants.Failure
}
return true
}}))).
Owns(&v1.Deployment{}).
Owns(&v12.Service{}).
WatchesMetadata(partialSecret, handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, object client.Object) []reconcile.Request {
Expand Down
5 changes: 2 additions & 3 deletions internal/controller/ctlog/ctlog_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,14 @@ var _ = Describe("CTlog controller", func() {

const (
Name = "test"
Namespace = "default"
Namespace = "test"
)

ctx := context.Background()

namespace := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: Name,
Namespace: Namespace,
Name: Namespace,
},
}

Expand Down
175 changes: 175 additions & 0 deletions internal/controller/ctlog/ctlog_error_handler_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/*
Copyright 2023.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package ctlog

import (
"context"
"os"
"time"

"github.com/securesign/operator/internal/controller/common/utils"
"github.com/securesign/operator/internal/controller/common/utils/kubernetes"
fulcio "github.com/securesign/operator/internal/controller/fulcio/actions"
trillian "github.com/securesign/operator/internal/controller/trillian/actions"
appsv1 "k8s.io/api/apps/v1"
runtimeClient "sigs.k8s.io/controller-runtime/pkg/client"

"github.com/securesign/operator/api/v1alpha1"
"github.com/securesign/operator/internal/controller/constants"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
)

var _ = Describe("CTlog ErrorHandler", func() {
Context("CTlog ErrorHandler test", func() {

const (
Name = "test"
Namespace = "errorhandler"
)

ctx := context.Background()

namespace := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: Namespace,
},
}

typeNamespaceName := types.NamespacedName{Name: Name, Namespace: Namespace}
instance := &v1alpha1.CTlog{}

BeforeEach(func() {
// workaround - disable "host" mode in CreateTrillianTree function
Expect(os.Setenv("CONTAINER_MODE", "true")).To(Not(HaveOccurred()))

By("Creating the Namespace to perform the tests")
err := k8sClient.Create(ctx, namespace)
Expect(err).To(Not(HaveOccurred()))
})

AfterEach(func() {
By("removing the custom resource for the Kind CTlog")
found := &v1alpha1.CTlog{}
err := k8sClient.Get(ctx, typeNamespaceName, found)
Expect(err).To(Not(HaveOccurred()))

Eventually(func() error {
return k8sClient.Delete(context.TODO(), found)
}, 2*time.Minute, time.Second).Should(Succeed())

// TODO(user): Attention if you improve this code by adding other context test you MUST
// be aware of the current delete namespace limitations.
// More info: https://book.kubebuilder.io/reference/envtest.html#testing-considerations
By("Deleting the Namespace to perform the tests")
_ = k8sClient.Delete(ctx, namespace)
})

It("should successfully reconcile a custom resource for CTlog", func() {
By("creating the custom resource for the Kind CTlog")
err := k8sClient.Get(ctx, typeNamespaceName, instance)
if err != nil && errors.IsNotFound(err) {
// Let's mock our custom resource at the same way that we would
// apply on the cluster the manifest under config/samples
instance := &v1alpha1.CTlog{
ObjectMeta: metav1.ObjectMeta{
Name: Name,
Namespace: Namespace,
},
Spec: v1alpha1.CTlogSpec{},
}
err = k8sClient.Create(ctx, instance)
Expect(err).To(Not(HaveOccurred()))
}

Expect(k8sClient.Create(ctx, kubernetes.CreateSecret("test", Namespace,
map[string][]byte{"cert": []byte("fakeCert")},
map[string]string{fulcio.FulcioCALabel: "cert"},
))).To(Succeed())

err = k8sClient.Create(ctx, kubernetes.CreateService(Namespace, trillian.LogserverDeploymentName, 8090, instance.Labels))
Expect(err).To(Not(HaveOccurred()))

found := &v1alpha1.CTlog{}

By("Deployment should fail")
Eventually(func() string {

Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed())
condition := meta.FindStatusCondition(found.Status.Conditions, constants.Ready)
if condition == nil {
return ""
}
return condition.Reason
}).Should(Equal(constants.Failure))

key := found.Status.PrivateKeyRef.Name
Expect(key).To(Not(BeEmpty()))

By("Periodically trying to restart deployment")
Eventually(func() string {
found := &v1alpha1.CTlog{}
Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed())
return meta.FindStatusCondition(found.Status.Conditions, constants.Ready).Reason
}).Should(Not(Equal(constants.Failure)))
Eventually(func() string {
found := &v1alpha1.CTlog{}
Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed())
return meta.FindStatusCondition(found.Status.Conditions, constants.Ready).Reason
}).Should(Equal(constants.Failure))

By("After fixing the problem the CTlog instance is Ready")
Eventually(func() error {
Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed())
found.Spec.TreeID = utils.Pointer(int64(1))
return k8sClient.Update(ctx, found)
}).Should(Succeed())

By("Waiting until CTlog instance is Initialization")
Eventually(func() string {
found := &v1alpha1.CTlog{}
Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed())
return meta.FindStatusCondition(found.Status.Conditions, constants.Ready).Reason
}).Should(Equal(constants.Initialize))

deployments := &appsv1.DeploymentList{}
Expect(k8sClient.List(ctx, deployments, runtimeClient.InNamespace(Namespace))).To(Succeed())
By("Move to Ready phase")
for _, d := range deployments.Items {
d.Status.Conditions = []appsv1.DeploymentCondition{
{Status: corev1.ConditionTrue, Type: appsv1.DeploymentAvailable, Reason: constants.Ready}}
Expect(k8sClient.Status().Update(ctx, &d)).Should(Succeed())
}
// Workaround to succeed condition for Ready phase

Eventually(func() bool {
found := &v1alpha1.CTlog{}
Expect(k8sClient.Get(ctx, typeNamespaceName, found)).Should(Succeed())
return meta.IsStatusConditionTrue(found.Status.Conditions, constants.Ready)
}).Should(BeTrue())

By("Pregenerated resources are reused")
Expect(key).To(Equal(found.Status.PrivateKeyRef.Name))
})
})
})

0 comments on commit 67a119a

Please sign in to comment.