From 0232b80b1885bf616d4cc5134c6422ed51f7199b Mon Sep 17 00:00:00 2001 From: Kevin Earls Date: Mon, 25 Jul 2022 11:09:21 +0200 Subject: [PATCH] =?UTF-8?q?Change=20Horizontal=20Pod=20Autoscaler=20to=20s?= =?UTF-8?q?cale=20on=20OpenTelemetry=20Collector=20=E2=80=A6=20(#984)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Change Horizontal Pod Autoscaler to scale on OpenTelemetry Collector object Signed-off-by: Kevin Earls * Update doc; move upgrade code out of reconciler Signed-off-by: Kevin Earls * Fixed doc Signed-off-by: Kevin Earls * Respond to comments, implement test Signed-off-by: Kevin Earls * respond to comments Signed-off-by: Kevin Earls --- Makefile | 6 +- apis/v1alpha1/opentelemetrycollector_types.go | 6 +- .../opentelemetrycollector_webhook.go | 9 +++ apis/v1alpha1/zz_generated.deepcopy.go | 5 ++ ...ntelemetry.io_opentelemetrycollectors.yaml | 7 +- ...ntelemetry.io_opentelemetrycollectors.yaml | 7 +- docs/api.md | 11 ++- hack/install-metrics-server.sh | 7 ++ pkg/collector/horizontalpodautoscaler.go | 9 ++- pkg/collector/reconcile/deployment.go | 5 -- .../reconcile/horizontalpodautoscaler.go | 7 +- pkg/collector/upgrade/v0_56_0.go | 80 +++++++++++++++++++ pkg/collector/upgrade/v0_56_0_test.go | 59 ++++++++++++++ pkg/collector/upgrade/versions.go | 4 + pkg/naming/main.go | 15 ++++ tests/e2e/autoscale/00-assert.yaml | 16 ++++ tests/e2e/autoscale/00-install.yaml | 32 ++++++++ tests/e2e/autoscale/01-assert.yaml | 18 +++++ tests/e2e/autoscale/01-install.yaml | 19 +++++ 19 files changed, 307 insertions(+), 15 deletions(-) create mode 100755 hack/install-metrics-server.sh create mode 100644 pkg/collector/upgrade/v0_56_0.go create mode 100644 pkg/collector/upgrade/v0_56_0_test.go create mode 100644 tests/e2e/autoscale/00-assert.yaml create mode 100644 tests/e2e/autoscale/00-install.yaml create mode 100644 tests/e2e/autoscale/01-assert.yaml create mode 100644 tests/e2e/autoscale/01-install.yaml diff --git a/Makefile b/Makefile index e62c88a34b..562e54715d 100644 --- a/Makefile +++ b/Makefile @@ -157,7 +157,7 @@ e2e: $(KUTTL) test .PHONY: prepare-e2e -prepare-e2e: kuttl set-test-image-vars set-image-controller container container-target-allocator start-kind load-image-all +prepare-e2e: kuttl set-test-image-vars set-image-controller container container-target-allocator start-kind install-metrics-server load-image-all mkdir -p tests/_build/crds tests/_build/manifests $(KUSTOMIZE) build config/default -o tests/_build/manifests/01-opentelemetry-operator.yaml $(KUSTOMIZE) build config/crd -o tests/_build/crds/ @@ -189,6 +189,10 @@ container-target-allocator: start-kind: kind create cluster --config $(KIND_CONFIG) +.PHONY: install-metrics-server +install-metrics-server: + ./hack/install-metrics-server.sh + .PHONY: load-image-all load-image-all: load-image-operator load-image-target-allocator diff --git a/apis/v1alpha1/opentelemetrycollector_types.go b/apis/v1alpha1/opentelemetrycollector_types.go index 25a0e60db7..4e5f03b889 100644 --- a/apis/v1alpha1/opentelemetrycollector_types.go +++ b/apis/v1alpha1/opentelemetrycollector_types.go @@ -33,10 +33,14 @@ type OpenTelemetryCollectorSpec struct { // +optional Args map[string]string `json:"args,omitempty"` - // Replicas is the number of pod instances for the underlying OpenTelemetry Collector + // Replicas is the number of pod instances for the underlying OpenTelemetry Collector. Set this if your are not using autoscaling // +optional Replicas *int32 `json:"replicas,omitempty"` + // MinReplicas sets a lower bound to the autoscaling feature. Set this if your are using autoscaling. It must be at least 1 + // +optional + MinReplicas *int32 `json:"minReplicas,omitempty"` + // MaxReplicas sets an upper bound to the autoscaling feature. If MaxReplicas is set autoscaling is enabled. // +optional MaxReplicas *int32 `json:"maxReplicas,omitempty"` diff --git a/apis/v1alpha1/opentelemetrycollector_webhook.go b/apis/v1alpha1/opentelemetrycollector_webhook.go index f4a6b4f068..6a4b1e936f 100644 --- a/apis/v1alpha1/opentelemetrycollector_webhook.go +++ b/apis/v1alpha1/opentelemetrycollector_webhook.go @@ -120,6 +120,15 @@ func (r *OpenTelemetryCollector) validateCRDSpec() error { if r.Spec.Replicas != nil && *r.Spec.Replicas > *r.Spec.MaxReplicas { return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, replicas must not be greater than maxReplicas") } + + if r.Spec.MinReplicas != nil && *r.Spec.MinReplicas > *r.Spec.MaxReplicas { + return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, minReplicas must not be greater than maxReplicas") + } + + if r.Spec.MinReplicas != nil && *r.Spec.MinReplicas < int32(1) { + return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, minReplicas should be one or more") + } + } return nil diff --git a/apis/v1alpha1/zz_generated.deepcopy.go b/apis/v1alpha1/zz_generated.deepcopy.go index ef246bbdf0..a1d95abb28 100644 --- a/apis/v1alpha1/zz_generated.deepcopy.go +++ b/apis/v1alpha1/zz_generated.deepcopy.go @@ -264,6 +264,11 @@ func (in *OpenTelemetryCollectorSpec) DeepCopyInto(out *OpenTelemetryCollectorSp *out = new(int32) **out = **in } + if in.MinReplicas != nil { + in, out := &in.MinReplicas, &out.MinReplicas + *out = new(int32) + **out = **in + } if in.MaxReplicas != nil { in, out := &in.MaxReplicas, &out.MaxReplicas *out = new(int32) diff --git a/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml b/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml index caa2add5b8..619113f228 100644 --- a/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml +++ b/bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml @@ -223,6 +223,11 @@ spec: If MaxReplicas is set autoscaling is enabled. format: int32 type: integer + minReplicas: + description: MinReplicas sets a lower bound to the autoscaling feature. Set + this if your are using autoscaling. It must be at least 1 + format: int32 + type: integer mode: description: Mode represents how the collector should be deployed (deployment, daemonset, statefulset or sidecar) @@ -475,7 +480,7 @@ spec: x-kubernetes-list-type: atomic replicas: description: Replicas is the number of pod instances for the underlying - OpenTelemetry Collector + OpenTelemetry Collector. Set this if your are not using autoscaling format: int32 type: integer resources: diff --git a/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml b/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml index dcb4b80eaa..7043d13eb9 100644 --- a/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml +++ b/config/crd/bases/opentelemetry.io_opentelemetrycollectors.yaml @@ -221,6 +221,11 @@ spec: If MaxReplicas is set autoscaling is enabled. format: int32 type: integer + minReplicas: + description: MinReplicas sets a lower bound to the autoscaling feature. Set + this if your are using autoscaling. It must be at least 1 + format: int32 + type: integer mode: description: Mode represents how the collector should be deployed (deployment, daemonset, statefulset or sidecar) @@ -473,7 +478,7 @@ spec: x-kubernetes-list-type: atomic replicas: description: Replicas is the number of pod instances for the underlying - OpenTelemetry Collector + OpenTelemetry Collector. Set this if your are not using autoscaling format: int32 type: integer resources: diff --git a/docs/api.md b/docs/api.md index c93ff7a58a..8802302ac4 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1455,6 +1455,15 @@ OpenTelemetryCollectorSpec defines the desired state of OpenTelemetryCollector. Format: int32
false + + minReplicas + integer + + MinReplicas sets a lower bound to the autoscaling feature. Set this if your are using autoscaling. It must be at least 1
+
+ Format: int32
+ + false mode enum @@ -1496,7 +1505,7 @@ OpenTelemetryCollectorSpec defines the desired state of OpenTelemetryCollector. replicas integer - Replicas is the number of pod instances for the underlying OpenTelemetry Collector
+ Replicas is the number of pod instances for the underlying OpenTelemetry Collector. Set this if your are not using autoscaling

Format: int32
diff --git a/hack/install-metrics-server.sh b/hack/install-metrics-server.sh new file mode 100755 index 0000000000..d63332d77f --- /dev/null +++ b/hack/install-metrics-server.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +# Install metrics-server on kind clusters for autoscale tests. Note: This is not needed for minikube, +# you can just add --addons "metrics-server" to the start command. +kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml +kubectl patch deployment -n kube-system metrics-server --type "json" -p '[{"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": --kubelet-insecure-tls}]' +kubectl wait --for=condition=available deployment/metrics-server -n kube-system --timeout=5m diff --git a/pkg/collector/horizontalpodautoscaler.go b/pkg/collector/horizontalpodautoscaler.go index 334ae87bd6..7c7003fb48 100644 --- a/pkg/collector/horizontalpodautoscaler.go +++ b/pkg/collector/horizontalpodautoscaler.go @@ -35,17 +35,18 @@ func HorizontalPodAutoscaler(cfg config.Config, logger logr.Logger, otelcol v1al return autoscalingv1.HorizontalPodAutoscaler{ ObjectMeta: metav1.ObjectMeta{ - Name: naming.Collector(otelcol), + Name: naming.HorizontalPodAutoscaler(otelcol), Namespace: otelcol.Namespace, Labels: labels, Annotations: annotations, }, Spec: autoscalingv1.HorizontalPodAutoscalerSpec{ ScaleTargetRef: autoscalingv1.CrossVersionObjectReference{ - APIVersion: "apps/v1", - Kind: "Deployment", - Name: naming.Collector(otelcol), + APIVersion: v1alpha1.GroupVersion.String(), + Kind: "OpenTelemetryCollector", + Name: naming.OpenTelemetryCollector(otelcol), }, + MinReplicas: otelcol.Spec.Replicas, MaxReplicas: *otelcol.Spec.MaxReplicas, TargetCPUUtilizationPercentage: &cpuTarget, diff --git a/pkg/collector/reconcile/deployment.go b/pkg/collector/reconcile/deployment.go index 4049a8caae..99aa3a5a31 100644 --- a/pkg/collector/reconcile/deployment.go +++ b/pkg/collector/reconcile/deployment.go @@ -99,11 +99,6 @@ func expectedDeployments(ctx context.Context, params Params, expected []appsv1.D updated.ObjectMeta.Labels[k] = v } - if params.Instance.Spec.MaxReplicas != nil && desired.Labels["app.kubernetes.io/component"] == "opentelemetry-collector" { - currentReplicas := currentReplicasWithHPA(params.Instance.Spec, existing.Status.Replicas) - updated.Spec.Replicas = ¤tReplicas - } - // Selector is an immutable field, if set, we cannot modify it otherwise we will face reconciliation error. updated.Spec.Selector = existing.Spec.Selector.DeepCopy() diff --git a/pkg/collector/reconcile/horizontalpodautoscaler.go b/pkg/collector/reconcile/horizontalpodautoscaler.go index 6d0bca72a2..606f4dffce 100644 --- a/pkg/collector/reconcile/horizontalpodautoscaler.go +++ b/pkg/collector/reconcile/horizontalpodautoscaler.go @@ -52,6 +52,7 @@ func HorizontalPodAutoscalers(ctx context.Context, params Params) error { } func expectedHorizontalPodAutoscalers(ctx context.Context, params Params, expected []autoscalingv1.HorizontalPodAutoscaler) error { + one := int32(1) for _, obj := range expected { desired := obj @@ -81,9 +82,13 @@ func expectedHorizontalPodAutoscalers(ctx context.Context, params Params, expect } updated.OwnerReferences = desired.OwnerReferences - updated.Spec.MinReplicas = params.Instance.Spec.Replicas if params.Instance.Spec.MaxReplicas != nil { updated.Spec.MaxReplicas = *params.Instance.Spec.MaxReplicas + if params.Instance.Spec.MinReplicas != nil { + updated.Spec.MinReplicas = params.Instance.Spec.MinReplicas + } else { + updated.Spec.MinReplicas = &one + } } for k, v := range desired.Annotations { diff --git a/pkg/collector/upgrade/v0_56_0.go b/pkg/collector/upgrade/v0_56_0.go new file mode 100644 index 0000000000..9103a3d500 --- /dev/null +++ b/pkg/collector/upgrade/v0_56_0.go @@ -0,0 +1,80 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package upgrade + +import ( + "context" + "fmt" + + autoscalingv1 "k8s.io/api/autoscaling/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" + "github.com/open-telemetry/opentelemetry-operator/pkg/naming" +) + +func upgrade0_56_0(u VersionUpgrade, otelcol *v1alpha1.OpenTelemetryCollector) (*v1alpha1.OpenTelemetryCollector, error) { + // return if this does not use an autoscaler + if otelcol.Spec.MaxReplicas == nil { + return otelcol, nil + } + + // Add minReplicas + one := int32(1) + otelcol.Spec.MinReplicas = &one + + // Find the existing HPA for this collector and upgrade it if necessary + listOptions := []client.ListOption{ + client.InNamespace(otelcol.Namespace), + client.MatchingLabels(map[string]string{ + "app.kubernetes.io/instance": fmt.Sprintf("%s.%s", otelcol.Namespace, otelcol.Name), + "app.kubernetes.io/managed-by": "opentelemetry-operator", + }), + } + + hpaList := &autoscalingv1.HorizontalPodAutoscalerList{} + ctx := context.Background() + if err := u.Client.List(ctx, hpaList, listOptions...); err != nil { + return nil, fmt.Errorf("couldn't upgrade to v0.56.0, failed trying to find HPA instances: %w", err) + } + + errors := []error{} + for i := range hpaList.Items { + existing := hpaList.Items[i] + // If there is an autoscaler based on Deployment, replace it with one based on OpenTelemetryCollector + if existing.Spec.ScaleTargetRef.Kind == "Deployment" { + updated := existing.DeepCopy() + updated.Spec.ScaleTargetRef = autoscalingv1.CrossVersionObjectReference{ + Kind: "OpenTelemetryCollector", + Name: naming.OpenTelemetryCollectorName(otelcol.Name), + APIVersion: v1alpha1.GroupVersion.String(), + } + patch := client.MergeFrom(&existing) + err := u.Client.Patch(ctx, updated, patch) + if err != nil { + errors = append(errors, err) + } + } + } + + if len(errors) != 0 { + return nil, fmt.Errorf("couldn't upgrade to v0.56.0, failed to recreate autoscaler: %v", errors) + } + + u.Log.Info("in upgrade0_56_0", "Otel Instance", otelcol.Name, "Upgrade version", u.Version.String()) + u.Recorder.Event(otelcol, "Normal", "Upgrade", "upgraded to v0.56.0, added minReplicas. recreated HPA instance") + + return otelcol, nil +} diff --git a/pkg/collector/upgrade/v0_56_0_test.go b/pkg/collector/upgrade/v0_56_0_test.go new file mode 100644 index 0000000000..639ceccb7d --- /dev/null +++ b/pkg/collector/upgrade/v0_56_0_test.go @@ -0,0 +1,59 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package upgrade_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/record" + + "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" + "github.com/open-telemetry/opentelemetry-operator/internal/version" + "github.com/open-telemetry/opentelemetry-operator/pkg/collector/upgrade" +) + +func Test0_56_0Upgrade(t *testing.T) { + one := int32(1) + three := int32(3) + + collectorInstance := v1alpha1.OpenTelemetryCollector{ + TypeMeta: metav1.TypeMeta{ + Kind: "OpenTelemetryCollector", + APIVersion: "v1alpha1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "my-instance", + Namespace: "somewhere", + }, + Spec: v1alpha1.OpenTelemetryCollectorSpec{ + Replicas: &one, + MaxReplicas: &three, + }, + } + + collectorInstance.Status.Version = "0.55.0" + versionUpgrade := &upgrade.VersionUpgrade{ + Log: logger, + Version: version.Get(), + Client: k8sClient, + Recorder: record.NewFakeRecorder(upgrade.RecordBufferSize), + } + upgradedInstance, err := versionUpgrade.ManagedInstance(context.Background(), collectorInstance) + assert.NoError(t, err) + assert.Equal(t, one, *upgradedInstance.Spec.MinReplicas) +} diff --git a/pkg/collector/upgrade/versions.go b/pkg/collector/upgrade/versions.go index 7aec8a0f50..f78e824e4b 100644 --- a/pkg/collector/upgrade/versions.go +++ b/pkg/collector/upgrade/versions.go @@ -73,6 +73,10 @@ var ( Version: *semver.MustParse("0.43.0"), upgrade: upgrade0_43_0, }, + { + Version: *semver.MustParse("0.56.0"), + upgrade: upgrade0_56_0, + }, } // Latest represents the latest version that we need to upgrade. This is not necessarily the latest known version. diff --git a/pkg/naming/main.go b/pkg/naming/main.go index f1124fa616..f6b896212f 100644 --- a/pkg/naming/main.go +++ b/pkg/naming/main.go @@ -54,6 +54,21 @@ func Collector(otelcol v1alpha1.OpenTelemetryCollector) string { return DNSName(Truncate("%s-collector", 63, otelcol.Name)) } +// HorizontalPodAutoscaler builds the autoscaler name based on the instance. +func HorizontalPodAutoscaler(otelcol v1alpha1.OpenTelemetryCollector) string { + return DNSName(Truncate("%s-collector", 63, otelcol.Name)) +} + +// HorizontalPodAutoscaler builds the collector (deployment/daemonset) name based on the instance. +func OpenTelemetryCollector(otelcol v1alpha1.OpenTelemetryCollector) string { + return DNSName(Truncate("%s", 63, otelcol.Name)) +} + +// HorizontalPodAutoscaler builds the collector (deployment/daemonset) name based on the instance. +func OpenTelemetryCollectorName(otelcolName string) string { + return DNSName(Truncate("%s", 63, otelcolName)) +} + // TargetAllocator returns the TargetAllocator deployment resource name. func TargetAllocator(otelcol v1alpha1.OpenTelemetryCollector) string { return DNSName(Truncate("%s-targetallocator", 63, otelcol.Name)) diff --git a/tests/e2e/autoscale/00-assert.yaml b/tests/e2e/autoscale/00-assert.yaml new file mode 100644 index 0000000000..ef4b828699 --- /dev/null +++ b/tests/e2e/autoscale/00-assert.yaml @@ -0,0 +1,16 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: simplest-collector +status: + readyReplicas: 1 + +--- +apiVersion: autoscaling/v1 +kind: HorizontalPodAutoscaler +metadata: + name: simplest-collector +spec: + minReplicas: 1 + maxReplicas: 2 + diff --git a/tests/e2e/autoscale/00-install.yaml b/tests/e2e/autoscale/00-install.yaml new file mode 100644 index 0000000000..e2ba8d5a54 --- /dev/null +++ b/tests/e2e/autoscale/00-install.yaml @@ -0,0 +1,32 @@ +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: simplest +spec: + minReplicas: 1 + maxReplicas: 2 + resources: + limits: + cpu: 500m + memory: 128Mi + requests: + cpu: 5m + memory: 64Mi + + config: | + receivers: + otlp: + protocols: + grpc: + http: + processors: + + exporters: + logging: + + service: + pipelines: + traces: + receivers: [otlp] + processors: [] + exporters: [logging] diff --git a/tests/e2e/autoscale/01-assert.yaml b/tests/e2e/autoscale/01-assert.yaml new file mode 100644 index 0000000000..2c2eec0238 --- /dev/null +++ b/tests/e2e/autoscale/01-assert.yaml @@ -0,0 +1,18 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: tracegen +status: + conditions: + - status: "True" + type: Complete + +--- +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector + +metadata: + name: simplest +status: + scale: + replicas: 2 diff --git a/tests/e2e/autoscale/01-install.yaml b/tests/e2e/autoscale/01-install.yaml new file mode 100644 index 0000000000..eac4408366 --- /dev/null +++ b/tests/e2e/autoscale/01-install.yaml @@ -0,0 +1,19 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: tracegen +spec: + template: + spec: + containers: + - name: tracegen + image: ghcr.io/open-telemetry/opentelemetry-collector-contrib/tracegen:latest + command: + - "./tracegen" + args: + - -otlp-endpoint=simplest-collector-headless:4317 + - -otlp-insecure + - -duration=1m + - -workers=20 + restartPolicy: Never + backoffLimit: 4