From a6672cdf3c54c1329861532596b6c0012498356c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 11:10:08 +0100 Subject: [PATCH 1/9] Bump github.com/cert-manager/cert-manager from 1.16.1 to 1.16.2 (#3480) Bumps [github.com/cert-manager/cert-manager](https://github.com/cert-manager/cert-manager) from 1.16.1 to 1.16.2. - [Release notes](https://github.com/cert-manager/cert-manager/releases) - [Changelog](https://github.com/cert-manager/cert-manager/blob/master/RELEASE.md) - [Commits](https://github.com/cert-manager/cert-manager/compare/v1.16.1...v1.16.2) --- updated-dependencies: - dependency-name: github.com/cert-manager/cert-manager dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 144ef45a4d..dde5ca3f26 100644 --- a/go.mod +++ b/go.mod @@ -75,7 +75,7 @@ require ( github.com/bytedance/sonic v1.11.6 // indirect github.com/bytedance/sonic/loader v0.1.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect - github.com/cert-manager/cert-manager v1.16.1 + github.com/cert-manager/cert-manager v1.16.2 github.com/cloudwego/base64x v0.1.4 // indirect github.com/cloudwego/iasm v0.2.0 // indirect github.com/cncf/xds/go v0.0.0-20240723142845-024c85f92f20 // indirect diff --git a/go.sum b/go.sum index a62a629ef3..311d8c6c08 100644 --- a/go.sum +++ b/go.sum @@ -103,8 +103,8 @@ github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4 github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cert-manager/cert-manager v1.16.1 h1:1ceFMqTtwiqY2vyfaRT85CNiVmK7pJjt3GebYCx9awY= -github.com/cert-manager/cert-manager v1.16.1/go.mod h1:MfLVTL45hFZsqmaT1O0+b2ugaNNQQZttSFV9hASHUb0= +github.com/cert-manager/cert-manager v1.16.2 h1:c9UU2E+8XWGruyvC/mdpc1wuLddtgmNr8foKdP7a8Jg= +github.com/cert-manager/cert-manager v1.16.2/go.mod h1:MfLVTL45hFZsqmaT1O0+b2ugaNNQQZttSFV9hASHUb0= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= From 5cab636edb5ed481c53a05b6e6ed7d96eaf68aaf Mon Sep 17 00:00:00 2001 From: Ishwar Kanse Date: Thu, 21 Nov 2024 15:47:37 +0530 Subject: [PATCH 2/9] Fix assert for multi-cluster test (#3481) --- tests/e2e-openshift/multi-cluster/04-assert.yaml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/e2e-openshift/multi-cluster/04-assert.yaml b/tests/e2e-openshift/multi-cluster/04-assert.yaml index 922508c134..f1a66083cb 100644 --- a/tests/e2e-openshift/multi-cluster/04-assert.yaml +++ b/tests/e2e-openshift/multi-cluster/04-assert.yaml @@ -4,9 +4,7 @@ metadata: name: generate-traces-http namespace: chainsaw-multi-cluster-send status: - conditions: - - status: "True" - type: Complete + succeeded: 1 --- apiVersion: batch/v1 @@ -15,6 +13,4 @@ metadata: name: generate-traces-grpc namespace: chainsaw-multi-cluster-send status: - conditions: - - status: "True" - type: Complete + succeeded: 1 \ No newline at end of file From 548179307b4ac4131ca048a06a97dd62d77a34b4 Mon Sep 17 00:00:00 2001 From: Jorge Creixell Date: Thu, 21 Nov 2024 15:05:04 +0100 Subject: [PATCH 3/9] Bump base memory requirements for python and go (#3473) * Bump base memory requirements for python and go - When auto-instrumenting applications, I have noticed that default memory limits are too tight for some languages. This leads to the following: - Intermitent OOMKilled events in init container when auto-instrumenting python applications. Eventually the pods are able to start. - OOMKilled events for sidecar containers in go applications. The pods are not able to start. - 64Mi seems to be enough to fix these issues. While some tweaking by users may still be necessary, the operator should work out-of-the-box for all supported languages. * Add changelog * Link issue in changelog --- .../bump-base-instrumentation-mem-limit.yaml | 16 ++++++++++++++++ apis/v1alpha1/instrumentation_webhook.go | 8 ++++---- 2 files changed, 20 insertions(+), 4 deletions(-) create mode 100755 .chloggen/bump-base-instrumentation-mem-limit.yaml diff --git a/.chloggen/bump-base-instrumentation-mem-limit.yaml b/.chloggen/bump-base-instrumentation-mem-limit.yaml new file mode 100755 index 0000000000..ea7b32f88f --- /dev/null +++ b/.chloggen/bump-base-instrumentation-mem-limit.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action) +component: auto-instrumentation + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Bump base memory requirements for python and go + +# One or more tracking issues related to the change +issues: [3479] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/apis/v1alpha1/instrumentation_webhook.go b/apis/v1alpha1/instrumentation_webhook.go index 6e52e7c6a5..b4aae51c56 100644 --- a/apis/v1alpha1/instrumentation_webhook.go +++ b/apis/v1alpha1/instrumentation_webhook.go @@ -128,13 +128,13 @@ func (w InstrumentationWebhook) defaulter(r *Instrumentation) error { if r.Spec.Python.Resources.Limits == nil { r.Spec.Python.Resources.Limits = corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("500m"), - corev1.ResourceMemory: resource.MustParse("32Mi"), + corev1.ResourceMemory: resource.MustParse("64Mi"), } } if r.Spec.Python.Resources.Requests == nil { r.Spec.Python.Resources.Requests = corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("50m"), - corev1.ResourceMemory: resource.MustParse("32Mi"), + corev1.ResourceMemory: resource.MustParse("64Mi"), } } if r.Spec.DotNet.Image == "" { @@ -158,13 +158,13 @@ func (w InstrumentationWebhook) defaulter(r *Instrumentation) error { if r.Spec.Go.Resources.Limits == nil { r.Spec.Go.Resources.Limits = corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("500m"), - corev1.ResourceMemory: resource.MustParse("32Mi"), + corev1.ResourceMemory: resource.MustParse("64Mi"), } } if r.Spec.Go.Resources.Requests == nil { r.Spec.Go.Resources.Requests = corev1.ResourceList{ corev1.ResourceCPU: resource.MustParse("50m"), - corev1.ResourceMemory: resource.MustParse("32Mi"), + corev1.ResourceMemory: resource.MustParse("64Mi"), } } if r.Spec.ApacheHttpd.Image == "" { From 49710a9f966f9ac4bbcb59b0f529369c3e8b028b Mon Sep 17 00:00:00 2001 From: "Koshevy Anton (astlock)" <988910+astlock@users.noreply.github.com> Date: Thu, 21 Nov 2024 20:03:58 +0200 Subject: [PATCH 4/9] chore: replace gcr.io for kube-rbac-proxy image (#3485) * chore: replace gcr.io for kube-rbac-proxy image * chore: add Changelog --- ...re_change-kube-rbac-proxy-image-registry.yaml | 16 ++++++++++++++++ ...telemetry-operator.clusterserviceversion.yaml | 2 +- ...telemetry-operator.clusterserviceversion.yaml | 2 +- config/default/manager_auth_proxy_patch.yaml | 2 +- .../opentelemetry-operator-v0.86.0.yaml | 2 +- 5 files changed, 20 insertions(+), 4 deletions(-) create mode 100755 .chloggen/chore_change-kube-rbac-proxy-image-registry.yaml diff --git a/.chloggen/chore_change-kube-rbac-proxy-image-registry.yaml b/.chloggen/chore_change-kube-rbac-proxy-image-registry.yaml new file mode 100755 index 0000000000..8280b5bc8c --- /dev/null +++ b/.chloggen/chore_change-kube-rbac-proxy-image-registry.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action) +component: operator + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Replace references to gcr.io/kubebuilder/kube-rbac-proxy with quay.io/brancz/kube-rbac-proxy + +# One or more tracking issues related to the change +issues: [3485] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml b/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml index e7b58fec20..3f454adde0 100644 --- a/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml +++ b/bundle/community/manifests/opentelemetry-operator.clusterserviceversion.yaml @@ -514,7 +514,7 @@ spec: - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 + image: quay.io/brancz/kube-rbac-proxy:v0.13.1 name: kube-rbac-proxy ports: - containerPort: 8443 diff --git a/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml b/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml index cae664cb52..016a34c1e0 100644 --- a/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml +++ b/bundle/openshift/manifests/opentelemetry-operator.clusterserviceversion.yaml @@ -522,7 +522,7 @@ spec: - --tls-private-key-file=/var/run/tls/server/tls.key - --tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,TLS_RSA_WITH_AES_128_GCM_SHA256,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_CBC_SHA256 - --tls-min-version=VersionTLS12 - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 + image: quay.io/brancz/kube-rbac-proxy:v0.13.1 name: kube-rbac-proxy ports: - containerPort: 8443 diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml index 9969c5c16e..4ac6ff2247 100644 --- a/config/default/manager_auth_proxy_patch.yaml +++ b/config/default/manager_auth_proxy_patch.yaml @@ -10,7 +10,7 @@ spec: spec: containers: - name: kube-rbac-proxy - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 + image: quay.io/brancz/kube-rbac-proxy:v0.13.1 args: - "--secure-listen-address=0.0.0.0:8443" - "--upstream=http://127.0.0.1:8080/" diff --git a/tests/e2e-upgrade/upgrade-test/opentelemetry-operator-v0.86.0.yaml b/tests/e2e-upgrade/upgrade-test/opentelemetry-operator-v0.86.0.yaml index cc8a9cac64..6ab2e4ac6e 100644 --- a/tests/e2e-upgrade/upgrade-test/opentelemetry-operator-v0.86.0.yaml +++ b/tests/e2e-upgrade/upgrade-test/opentelemetry-operator-v0.86.0.yaml @@ -8348,7 +8348,7 @@ spec: - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.1 + image: quay.io/brancz/kube-rbac-proxy:v0.13.1 name: kube-rbac-proxy ports: - containerPort: 8443 From d27624565ddfb99eaa777337fb11ef5009eaf034 Mon Sep 17 00:00:00 2001 From: Ishwar Kanse Date: Fri, 22 Nov 2024 17:13:15 +0530 Subject: [PATCH 5/9] Test operator restart (#3486) --- .../operator-restart/assert-operator-pod.yaml | 16 +++++++++ tests/e2e/operator-restart/chainsaw-test.yaml | 36 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 tests/e2e/operator-restart/assert-operator-pod.yaml create mode 100644 tests/e2e/operator-restart/chainsaw-test.yaml diff --git a/tests/e2e/operator-restart/assert-operator-pod.yaml b/tests/e2e/operator-restart/assert-operator-pod.yaml new file mode 100644 index 0000000000..d8131db398 --- /dev/null +++ b/tests/e2e/operator-restart/assert-operator-pod.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Pod +metadata: + labels: + app.kubernetes.io/name: opentelemetry-operator + control-plane: controller-manager + namespace: ($OTEL_NAMESPACE) +status: + containerStatuses: + - name: kube-rbac-proxy + ready: true + started: true + - name: manager + ready: true + started: true + phase: Running diff --git a/tests/e2e/operator-restart/chainsaw-test.yaml b/tests/e2e/operator-restart/chainsaw-test.yaml new file mode 100644 index 0000000000..d5081d4fef --- /dev/null +++ b/tests/e2e/operator-restart/chainsaw-test.yaml @@ -0,0 +1,36 @@ +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + name: operator-restart +spec: + # Running the test serially as its disruptive causing operator pod restart + concurrent: false + steps: + - name: Delete operator pod + try: + - command: + entrypoint: kubectl + args: + - get + - pods + - -A + - -l control-plane=controller-manager + - -l app.kubernetes.io/name=opentelemetry-operator + - -o + - jsonpath={.items[0].metadata.namespace} + outputs: + - name: OTEL_NAMESPACE + value: ($stdout) + - delete: + ref: + apiVersion: v1 + kind: Pod + namespace: ($OTEL_NAMESPACE) + labels: + control-plane: controller-manager + app.kubernetes.io/name: opentelemetry-operator + # Adding 10s sleep here cause sometimes the pod will be in running state for a while but can fail later if there is any issue with the component startup. + - sleep: + duration: 10s + - assert: + file: assert-operator-pod.yaml \ No newline at end of file From 8483bfe2375e3796afe73357a5d181ee8ee63bbb Mon Sep 17 00:00:00 2001 From: Pavol Loffay Date: Fri, 22 Nov 2024 09:32:47 -0700 Subject: [PATCH 6/9] Cleanup nodejs dependencies (#3466) Signed-off-by: Pavol Loffay --- .../publish-autoinstrumentation-nodejs.yaml | 2 +- autoinstrumentation/nodejs/package.json | 13 ++----------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/.github/workflows/publish-autoinstrumentation-nodejs.yaml b/.github/workflows/publish-autoinstrumentation-nodejs.yaml index 083b7306ae..7115105b2f 100644 --- a/.github/workflows/publish-autoinstrumentation-nodejs.yaml +++ b/.github/workflows/publish-autoinstrumentation-nodejs.yaml @@ -26,7 +26,7 @@ jobs: - uses: actions/checkout@v4 - name: Read version - run: echo VERSION=$(cat autoinstrumentation/nodejs/package.json | jq -r '.dependencies."@opentelemetry/sdk-node"') >> $GITHUB_ENV + run: echo VERSION=$(cat autoinstrumentation/nodejs/package.json | jq -r '.dependencies."@opentelemetry/auto-instrumentations-node"') >> $GITHUB_ENV - name: Docker meta id: meta diff --git a/autoinstrumentation/nodejs/package.json b/autoinstrumentation/nodejs/package.json index a36adc8fad..be98fd9155 100644 --- a/autoinstrumentation/nodejs/package.json +++ b/autoinstrumentation/nodejs/package.json @@ -14,17 +14,8 @@ "typescript": "^5.6.3" }, "dependencies": { - "@opentelemetry/api": "1.9.0", - "@opentelemetry/auto-instrumentations-node": "0.52.0", + "@opentelemetry/auto-instrumentations-node": "0.52.1", "@opentelemetry/exporter-metrics-otlp-grpc": "0.54.0", - "@opentelemetry/exporter-prometheus": "0.54.0", - "@opentelemetry/exporter-trace-otlp-grpc": "0.54.0", - "@opentelemetry/resource-detector-alibaba-cloud": "0.29.4", - "@opentelemetry/resource-detector-aws": "1.7.0", - "@opentelemetry/resource-detector-container": "0.5.0", - "@opentelemetry/resource-detector-gcp": "0.29.13", - "@opentelemetry/resources": "1.27.0", - "@opentelemetry/sdk-metrics": "1.27.0", - "@opentelemetry/sdk-node": "0.54.0" + "@opentelemetry/exporter-prometheus": "0.54.0" } } From 9a5a8cec8724541b648e5d67020c4eca84046002 Mon Sep 17 00:00:00 2001 From: lhp-nemlig <159530308+lhp-nemlig@users.noreply.github.com> Date: Fri, 22 Nov 2024 20:27:02 +0100 Subject: [PATCH 7/9] Add allocation_fallback_strategy option as fallback strategy for per-node strategy (#3482) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add least-weighted as fallback strategy for per-node strategy * Add changelog file * Change fallback strategy to consistent-hashing * Update changelog * Fix bad test condition that might pass even if target was not assigned * Make fallback strategy a config option * Update changelog * Add period to test comments * Add feature gate for enabling fallback strategy * Fix featuregate id * Update cmd/otel-allocator/allocation/per_node_test.go Co-authored-by: Mikołaj Świątek * Update cmd/otel-allocator/allocation/per_node_test.go Co-authored-by: Mikołaj Świątek * Update cmd/otel-allocator/allocation/per_node_test.go Co-authored-by: Mikołaj Świątek * Only add fallbackstrategy if nonempty * Remove unnecessary comments * Add unit test for fallbackstrategy feature gate * Update changelog --------- Co-authored-by: Mikołaj Świątek --- ...llback_strategy_for_per_node_strategy.yaml | 21 +++++ cmd/otel-allocator/allocation/allocator.go | 5 ++ .../allocation/consistent_hashing.go | 2 + .../allocation/least_weighted.go | 2 + cmd/otel-allocator/allocation/per_node.go | 18 +++- .../allocation/per_node_test.go | 83 ++++++++++++++++++- cmd/otel-allocator/allocation/strategy.go | 43 ++++++---- cmd/otel-allocator/config/config.go | 26 +++--- cmd/otel-allocator/main.go | 8 +- cmd/otel-allocator/server/mocks_test.go | 1 + .../manifests/targetallocator/configmap.go | 5 ++ .../targetallocator/configmap_test.go | 57 +++++++++++++ pkg/featuregate/featuregate.go | 8 ++ 13 files changed, 247 insertions(+), 32 deletions(-) create mode 100755 .chloggen/add_fallback_strategy_for_per_node_strategy.yaml diff --git a/.chloggen/add_fallback_strategy_for_per_node_strategy.yaml b/.chloggen/add_fallback_strategy_for_per_node_strategy.yaml new file mode 100755 index 0000000000..ba864fb365 --- /dev/null +++ b/.chloggen/add_fallback_strategy_for_per_node_strategy.yaml @@ -0,0 +1,21 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: 'enhancement' + +# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action) +component: target allocator + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Added allocation_fallback_strategy option as fallback strategy for per-node allocation strategy, can be enabled with feature flag operator.targetallocator.fallbackstrategy + +# One or more tracking issues related to the change +issues: [3477] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + If using per-node allocation strategy, targets that are not attached to a node will not + be allocated. As the per-node strategy is required when running as a daemonset, it is + not possible to assign some targets under a daemonset deployment. + Feature flag operator.targetallocator.fallbackstrategy has been added and results in consistent-hashing + being used as the fallback allocation strategy for "per-node" only at this time. diff --git a/cmd/otel-allocator/allocation/allocator.go b/cmd/otel-allocator/allocation/allocator.go index cbe5d1d31d..b0a9125ba9 100644 --- a/cmd/otel-allocator/allocation/allocator.go +++ b/cmd/otel-allocator/allocation/allocator.go @@ -76,6 +76,11 @@ func (a *allocator) SetFilter(filter Filter) { a.filter = filter } +// SetFallbackStrategy sets the fallback strategy to use. +func (a *allocator) SetFallbackStrategy(strategy Strategy) { + a.strategy.SetFallbackStrategy(strategy) +} + // SetTargets accepts a list of targets that will be used to make // load balancing decisions. This method should be called when there are // new targets discovered or existing targets are shutdown. diff --git a/cmd/otel-allocator/allocation/consistent_hashing.go b/cmd/otel-allocator/allocation/consistent_hashing.go index 9659b73a0d..c8a16903bc 100644 --- a/cmd/otel-allocator/allocation/consistent_hashing.go +++ b/cmd/otel-allocator/allocation/consistent_hashing.go @@ -83,3 +83,5 @@ func (s *consistentHashingStrategy) SetCollectors(collectors map[string]*Collect s.consistentHasher = consistent.New(members, s.config) } + +func (s *consistentHashingStrategy) SetFallbackStrategy(fallbackStrategy Strategy) {} diff --git a/cmd/otel-allocator/allocation/least_weighted.go b/cmd/otel-allocator/allocation/least_weighted.go index caa2febbd9..49d935715d 100644 --- a/cmd/otel-allocator/allocation/least_weighted.go +++ b/cmd/otel-allocator/allocation/least_weighted.go @@ -54,3 +54,5 @@ func (s *leastWeightedStrategy) GetCollectorForTarget(collectors map[string]*Col } func (s *leastWeightedStrategy) SetCollectors(_ map[string]*Collector) {} + +func (s *leastWeightedStrategy) SetFallbackStrategy(fallbackStrategy Strategy) {} diff --git a/cmd/otel-allocator/allocation/per_node.go b/cmd/otel-allocator/allocation/per_node.go index a5e2bfa3f8..3d9c76d90d 100644 --- a/cmd/otel-allocator/allocation/per_node.go +++ b/cmd/otel-allocator/allocation/per_node.go @@ -25,21 +25,31 @@ const perNodeStrategyName = "per-node" var _ Strategy = &perNodeStrategy{} type perNodeStrategy struct { - collectorByNode map[string]*Collector + collectorByNode map[string]*Collector + fallbackStrategy Strategy } func newPerNodeStrategy() Strategy { return &perNodeStrategy{ - collectorByNode: make(map[string]*Collector), + collectorByNode: make(map[string]*Collector), + fallbackStrategy: nil, } } +func (s *perNodeStrategy) SetFallbackStrategy(fallbackStrategy Strategy) { + s.fallbackStrategy = fallbackStrategy +} + func (s *perNodeStrategy) GetName() string { return perNodeStrategyName } func (s *perNodeStrategy) GetCollectorForTarget(collectors map[string]*Collector, item *target.Item) (*Collector, error) { targetNodeName := item.GetNodeName() + if targetNodeName == "" && s.fallbackStrategy != nil { + return s.fallbackStrategy.GetCollectorForTarget(collectors, item) + } + collector, ok := s.collectorByNode[targetNodeName] if !ok { return nil, fmt.Errorf("could not find collector for node %s", targetNodeName) @@ -54,4 +64,8 @@ func (s *perNodeStrategy) SetCollectors(collectors map[string]*Collector) { s.collectorByNode[collector.NodeName] = collector } } + + if s.fallbackStrategy != nil { + s.fallbackStrategy.SetCollectors(collectors) + } } diff --git a/cmd/otel-allocator/allocation/per_node_test.go b/cmd/otel-allocator/allocation/per_node_test.go index ebbe3f31e6..4d17e6bbb3 100644 --- a/cmd/otel-allocator/allocation/per_node_test.go +++ b/cmd/otel-allocator/allocation/per_node_test.go @@ -26,7 +26,17 @@ import ( var loggerPerNode = logf.Log.WithName("unit-tests") -// Tests that two targets with the same target url and job name but different label set are both added. +func GetTargetsWithNodeName(targets []*target.Item) (targetsWithNodeName []*target.Item) { + for _, item := range targets { + if item.GetNodeName() != "" { + targetsWithNodeName = append(targetsWithNodeName, item) + } + } + return targetsWithNodeName +} + +// Tests that four targets, with one of them lacking node labels, are assigned except for the +// target that lacks node labels. func TestAllocationPerNode(t *testing.T) { // prepare allocator with initial targets and collectors s, _ := New("per-node", loggerPerNode) @@ -93,6 +103,77 @@ func TestAllocationPerNode(t *testing.T) { } } +// Tests that four targets, with one of them missing node labels, are all assigned. +func TestAllocationPerNodeUsingFallback(t *testing.T) { + // prepare allocator with initial targets and collectors + s, _ := New("per-node", loggerPerNode, WithFallbackStrategy(consistentHashingStrategyName)) + + cols := MakeNCollectors(4, 0) + s.SetCollectors(cols) + firstLabels := labels.Labels{ + {Name: "test", Value: "test1"}, + {Name: "__meta_kubernetes_pod_node_name", Value: "node-0"}, + } + secondLabels := labels.Labels{ + {Name: "test", Value: "test2"}, + {Name: "__meta_kubernetes_node_name", Value: "node-1"}, + } + // no label, should be allocated by the fallback strategy + thirdLabels := labels.Labels{ + {Name: "test", Value: "test3"}, + } + // endpointslice target kind and name + fourthLabels := labels.Labels{ + {Name: "test", Value: "test4"}, + {Name: "__meta_kubernetes_endpointslice_address_target_kind", Value: "Node"}, + {Name: "__meta_kubernetes_endpointslice_address_target_name", Value: "node-3"}, + } + + firstTarget := target.NewItem("sample-name", "0.0.0.0:8000", firstLabels, "") + secondTarget := target.NewItem("sample-name", "0.0.0.0:8000", secondLabels, "") + thirdTarget := target.NewItem("sample-name", "0.0.0.0:8000", thirdLabels, "") + fourthTarget := target.NewItem("sample-name", "0.0.0.0:8000", fourthLabels, "") + + targetList := map[string]*target.Item{ + firstTarget.Hash(): firstTarget, + secondTarget.Hash(): secondTarget, + thirdTarget.Hash(): thirdTarget, + fourthTarget.Hash(): fourthTarget, + } + + // test that targets and collectors are added properly + s.SetTargets(targetList) + + // verify length + actualItems := s.TargetItems() + + // all targets should be allocated + expectedTargetLen := len(targetList) + assert.Len(t, actualItems, expectedTargetLen) + + // verify allocation to nodes + for targetHash, item := range targetList { + actualItem, found := actualItems[targetHash] + + assert.True(t, found, "target with hash %s not found", item.Hash()) + + itemsForCollector := s.GetTargetsForCollectorAndJob(actualItem.CollectorName, actualItem.JobName) + + // first two should be assigned one to each collector; if third target, it should be assigned + // according to the fallback strategy which may assign it to the otherwise empty collector or + // one of the others, depending on the strategy and collector loop order + if targetHash == thirdTarget.Hash() { + assert.Empty(t, item.GetNodeName()) + assert.NotZero(t, len(itemsForCollector)) + continue + } + + // Only check targets that have been assigned using the per-node (not fallback) strategy here + assert.Len(t, GetTargetsWithNodeName(itemsForCollector), 1) + assert.Equal(t, actualItem, GetTargetsWithNodeName(itemsForCollector)[0]) + } +} + func TestTargetsWithNoCollectorsPerNode(t *testing.T) { // prepare allocator with initial targets and collectors c, _ := New("per-node", loggerPerNode) diff --git a/cmd/otel-allocator/allocation/strategy.go b/cmd/otel-allocator/allocation/strategy.go index 29ae7fd99a..47fafd5662 100644 --- a/cmd/otel-allocator/allocation/strategy.go +++ b/cmd/otel-allocator/allocation/strategy.go @@ -29,6 +29,8 @@ import ( type AllocatorProvider func(log logr.Logger, opts ...AllocationOption) Allocator var ( + strategies = map[string]Strategy{} + registry = map[string]AllocatorProvider{} // TargetsPerCollector records how many targets have been assigned to each collector. @@ -67,6 +69,16 @@ func WithFilter(filter Filter) AllocationOption { } } +func WithFallbackStrategy(fallbackStrategy string) AllocationOption { + var strategy, ok = strategies[fallbackStrategy] + if fallbackStrategy != "" && !ok { + panic(fmt.Errorf("unregistered strategy used as fallback: %s", fallbackStrategy)) + } + return func(allocator Allocator) { + allocator.SetFallbackStrategy(strategy) + } +} + func RecordTargetsKept(targets map[string]*target.Item) { targetsRemaining.Add(float64(len(targets))) } @@ -101,6 +113,7 @@ type Allocator interface { Collectors() map[string]*Collector GetTargetsForCollectorAndJob(collector string, job string) []*target.Item SetFilter(filter Filter) + SetFallbackStrategy(strategy Strategy) } type Strategy interface { @@ -110,6 +123,8 @@ type Strategy interface { // SetCollectors call. Strategies which don't need this information can just ignore it. SetCollectors(map[string]*Collector) GetName() string + // Add fallback strategy for strategies whose main allocation method can sometimes leave targets unassigned + SetFallbackStrategy(Strategy) } var _ consistent.Member = Collector{} @@ -136,22 +151,18 @@ func NewCollector(name, node string) *Collector { } func init() { - err := Register(leastWeightedStrategyName, func(log logr.Logger, opts ...AllocationOption) Allocator { - return newAllocator(log, newleastWeightedStrategy(), opts...) - }) - if err != nil { - panic(err) - } - err = Register(consistentHashingStrategyName, func(log logr.Logger, opts ...AllocationOption) Allocator { - return newAllocator(log, newConsistentHashingStrategy(), opts...) - }) - if err != nil { - panic(err) + strategies = map[string]Strategy{ + leastWeightedStrategyName: newleastWeightedStrategy(), + consistentHashingStrategyName: newConsistentHashingStrategy(), + perNodeStrategyName: newPerNodeStrategy(), } - err = Register(perNodeStrategyName, func(log logr.Logger, opts ...AllocationOption) Allocator { - return newAllocator(log, newPerNodeStrategy(), opts...) - }) - if err != nil { - panic(err) + + for strategyName, strategy := range strategies { + err := Register(strategyName, func(log logr.Logger, opts ...AllocationOption) Allocator { + return newAllocator(log, strategy, opts...) + }) + if err != nil { + panic(err) + } } } diff --git a/cmd/otel-allocator/config/config.go b/cmd/otel-allocator/config/config.go index 946fba268f..ee55fe0a32 100644 --- a/cmd/otel-allocator/config/config.go +++ b/cmd/otel-allocator/config/config.go @@ -46,16 +46,17 @@ const ( ) type Config struct { - ListenAddr string `yaml:"listen_addr,omitempty"` - KubeConfigFilePath string `yaml:"kube_config_file_path,omitempty"` - ClusterConfig *rest.Config `yaml:"-"` - RootLogger logr.Logger `yaml:"-"` - CollectorSelector *metav1.LabelSelector `yaml:"collector_selector,omitempty"` - PromConfig *promconfig.Config `yaml:"config"` - AllocationStrategy string `yaml:"allocation_strategy,omitempty"` - FilterStrategy string `yaml:"filter_strategy,omitempty"` - PrometheusCR PrometheusCRConfig `yaml:"prometheus_cr,omitempty"` - HTTPS HTTPSServerConfig `yaml:"https,omitempty"` + ListenAddr string `yaml:"listen_addr,omitempty"` + KubeConfigFilePath string `yaml:"kube_config_file_path,omitempty"` + ClusterConfig *rest.Config `yaml:"-"` + RootLogger logr.Logger `yaml:"-"` + CollectorSelector *metav1.LabelSelector `yaml:"collector_selector,omitempty"` + PromConfig *promconfig.Config `yaml:"config"` + AllocationStrategy string `yaml:"allocation_strategy,omitempty"` + AllocationFallbackStrategy string `yaml:"allocation_fallback_strategy,omitempty"` + FilterStrategy string `yaml:"filter_strategy,omitempty"` + PrometheusCR PrometheusCRConfig `yaml:"prometheus_cr,omitempty"` + HTTPS HTTPSServerConfig `yaml:"https,omitempty"` } type PrometheusCRConfig struct { @@ -165,8 +166,9 @@ func unmarshal(cfg *Config, configFile string) error { func CreateDefaultConfig() Config { return Config{ - AllocationStrategy: DefaultAllocationStrategy, - FilterStrategy: DefaultFilterStrategy, + AllocationStrategy: DefaultAllocationStrategy, + AllocationFallbackStrategy: "", + FilterStrategy: DefaultFilterStrategy, PrometheusCR: PrometheusCRConfig{ ScrapeInterval: DefaultCRScrapeInterval, }, diff --git a/cmd/otel-allocator/main.go b/cmd/otel-allocator/main.go index f9531d6740..be2418902e 100644 --- a/cmd/otel-allocator/main.go +++ b/cmd/otel-allocator/main.go @@ -81,7 +81,13 @@ func main() { log := ctrl.Log.WithName("allocator") allocatorPrehook = prehook.New(cfg.FilterStrategy, log) - allocator, err = allocation.New(cfg.AllocationStrategy, log, allocation.WithFilter(allocatorPrehook)) + + var allocationOptions []allocation.AllocationOption + allocationOptions = append(allocationOptions, allocation.WithFilter(allocatorPrehook)) + if cfg.AllocationFallbackStrategy != "" { + allocationOptions = append(allocationOptions, allocation.WithFallbackStrategy(cfg.AllocationFallbackStrategy)) + } + allocator, err = allocation.New(cfg.AllocationStrategy, log, allocationOptions...) if err != nil { setupLog.Error(err, "Unable to initialize allocation strategy") os.Exit(1) diff --git a/cmd/otel-allocator/server/mocks_test.go b/cmd/otel-allocator/server/mocks_test.go index e44b178fa8..8620d70367 100644 --- a/cmd/otel-allocator/server/mocks_test.go +++ b/cmd/otel-allocator/server/mocks_test.go @@ -32,6 +32,7 @@ func (m *mockAllocator) SetTargets(_ map[string]*target.Item) func (m *mockAllocator) Collectors() map[string]*allocation.Collector { return nil } func (m *mockAllocator) GetTargetsForCollectorAndJob(_ string, _ string) []*target.Item { return nil } func (m *mockAllocator) SetFilter(_ allocation.Filter) {} +func (m *mockAllocator) SetFallbackStrategy(_ allocation.Strategy) {} func (m *mockAllocator) TargetItems() map[string]*target.Item { return m.targetItems diff --git a/internal/manifests/targetallocator/configmap.go b/internal/manifests/targetallocator/configmap.go index b17df29151..36defd088e 100644 --- a/internal/manifests/targetallocator/configmap.go +++ b/internal/manifests/targetallocator/configmap.go @@ -90,6 +90,11 @@ func ConfigMap(params Params) (*corev1.ConfigMap, error) { } else { taConfig["allocation_strategy"] = v1beta1.TargetAllocatorAllocationStrategyConsistentHashing } + + if featuregate.EnableTargetAllocatorFallbackStrategy.IsEnabled() { + taConfig["allocation_fallback_strategy"] = v1beta1.TargetAllocatorAllocationStrategyConsistentHashing + } + taConfig["filter_strategy"] = taSpec.FilterStrategy if taSpec.PrometheusCR.Enabled { diff --git a/internal/manifests/targetallocator/configmap_test.go b/internal/manifests/targetallocator/configmap_test.go index de863874db..967eef25e8 100644 --- a/internal/manifests/targetallocator/configmap_test.go +++ b/internal/manifests/targetallocator/configmap_test.go @@ -297,6 +297,63 @@ prometheus_cr: assert.Equal(t, expectedLabels, actual.Labels) assert.Equal(t, expectedData, actual.Data) }) + + t.Run("should return expected target allocator config map allocation fallback strategy", func(t *testing.T) { + expectedLabels["app.kubernetes.io/component"] = "opentelemetry-targetallocator" + expectedLabels["app.kubernetes.io/name"] = "my-instance-targetallocator" + + cfg := config.New(config.WithCertManagerAvailability(certmanager.Available)) + + flgs := featuregate.Flags(colfg.GlobalRegistry()) + err := flgs.Parse([]string{"--feature-gates=operator.targetallocator.fallbackstrategy"}) + require.NoError(t, err) + + testParams := Params{ + Collector: collector, + TargetAllocator: targetAllocator, + Config: cfg, + } + + expectedData := map[string]string{ + targetAllocatorFilename: `allocation_fallback_strategy: consistent-hashing +allocation_strategy: consistent-hashing +collector_selector: + matchlabels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/instance: default.my-instance + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/part-of: opentelemetry + matchexpressions: [] +config: + scrape_configs: + - job_name: otel-collector + scrape_interval: 10s + static_configs: + - targets: + - 0.0.0.0:8888 + - 0.0.0.0:9999 +filter_strategy: relabel-config +https: + ca_file_path: /tls/ca.crt + enabled: true + listen_addr: :8443 + tls_cert_file_path: /tls/tls.crt + tls_key_file_path: /tls/tls.key +prometheus_cr: + enabled: true + pod_monitor_selector: null + scrape_interval: 30s + service_monitor_selector: null +`, + } + + actual, err := ConfigMap(testParams) + assert.NoError(t, err) + + assert.Equal(t, "my-instance-targetallocator", actual.Name) + assert.Equal(t, expectedLabels, actual.Labels) + assert.Equal(t, expectedData, actual.Data) + }) } func TestGetScrapeConfigsFromOtelConfig(t *testing.T) { diff --git a/pkg/featuregate/featuregate.go b/pkg/featuregate/featuregate.go index 03a6f8392a..e08b0fb0c3 100644 --- a/pkg/featuregate/featuregate.go +++ b/pkg/featuregate/featuregate.go @@ -67,6 +67,14 @@ var ( featuregate.WithRegisterDescription("enables mTLS between the target allocator and the collector"), featuregate.WithRegisterFromVersion("v0.111.0"), ) + // EnableTargetAllocatorFallbackStrategy is the feature gate that enables consistent-hashing as the fallback + // strategy for allocation strategies that might not assign all jobs (per-node). + EnableTargetAllocatorFallbackStrategy = featuregate.GlobalRegistry().MustRegister( + "operator.targetallocator.fallbackstrategy", + featuregate.StageAlpha, + featuregate.WithRegisterDescription("enables fallback allocation strategy for the target allocator"), + featuregate.WithRegisterFromVersion("v0.114.0"), + ) // EnableConfigDefaulting is the feature gate that enables the operator to default the endpoint for known components. EnableConfigDefaulting = featuregate.GlobalRegistry().MustRegister( "operator.collector.default.config", From 5e35dbb4a8c4cae8bb6e0cbad130e7d2ee0d3c2e Mon Sep 17 00:00:00 2001 From: Israel Blancas Date: Sat, 23 Nov 2024 16:46:53 +0100 Subject: [PATCH 8/9] Add automatic RBAC creation for k8scluster receiver (#3428) Signed-off-by: Israel Blancas --- .chloggen/3427.yaml | 16 ++ Makefile | 1 + internal/components/receivers/helpers.go | 3 + internal/components/receivers/k8scluster.go | 87 ++++++++++ .../components/receivers/k8scluster_test.go | 164 ++++++++++++++++++ .../single_endpoint_receiver_test.go | 1 - .../clusterresourcequotas.yaml | 11 ++ .../receiver-k8scluster/00-install.yaml | 4 + .../receiver-k8scluster/01-assert.yaml | 80 +++++++++ .../receiver-k8scluster/01-install.yaml | 18 ++ .../receiver-k8scluster/02-assert.yaml | 88 ++++++++++ .../receiver-k8scluster/02-install.yaml | 19 ++ 12 files changed, 491 insertions(+), 1 deletion(-) create mode 100755 .chloggen/3427.yaml create mode 100644 internal/components/receivers/k8scluster.go create mode 100644 internal/components/receivers/k8scluster_test.go create mode 100644 tests/e2e-automatic-rbac/extra-permissions-operator/clusterresourcequotas.yaml create mode 100644 tests/e2e-automatic-rbac/receiver-k8scluster/00-install.yaml create mode 100644 tests/e2e-automatic-rbac/receiver-k8scluster/01-assert.yaml create mode 100644 tests/e2e-automatic-rbac/receiver-k8scluster/01-install.yaml create mode 100644 tests/e2e-automatic-rbac/receiver-k8scluster/02-assert.yaml create mode 100644 tests/e2e-automatic-rbac/receiver-k8scluster/02-install.yaml diff --git a/.chloggen/3427.yaml b/.chloggen/3427.yaml new file mode 100755 index 0000000000..2961d8d2e8 --- /dev/null +++ b/.chloggen/3427.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action) +component: collector + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Create RBAC rules for the k8s_cluster receiver automatically. + +# One or more tracking issues related to the change +issues: [3427] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/Makefile b/Makefile index bab27db662..8212a91dd7 100644 --- a/Makefile +++ b/Makefile @@ -206,6 +206,7 @@ add-rbac-permissions-to-operator: manifests kustomize # This folder is ignored by .gitignore mkdir -p config/rbac/extra-permissions-operator cp -r tests/e2e-automatic-rbac/extra-permissions-operator/* config/rbac/extra-permissions-operator + cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/clusterresourcequotas.yaml cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/cronjobs.yaml cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/daemonsets.yaml cd config/rbac && $(KUSTOMIZE) edit add patch --kind ClusterRole --name manager-role --path extra-permissions-operator/events.yaml diff --git a/internal/components/receivers/helpers.go b/internal/components/receivers/helpers.go index 37cc710e39..43ebaa0d06 100644 --- a/internal/components/receivers/helpers.go +++ b/internal/components/receivers/helpers.go @@ -143,6 +143,9 @@ var ( components.NewBuilder[k8seventsConfig]().WithName("k8s_events"). WithRbacGen(generatek8seventsRbacRules). MustBuild(), + components.NewBuilder[k8sclusterConfig]().WithName("k8s_cluster"). + WithRbacGen(generatek8sclusterRbacRules). + MustBuild(), components.NewBuilder[k8sobjectsConfig]().WithName("k8sobjects"). WithRbacGen(generatek8sobjectsRbacRules). MustBuild(), diff --git a/internal/components/receivers/k8scluster.go b/internal/components/receivers/k8scluster.go new file mode 100644 index 0000000000..aa813d9642 --- /dev/null +++ b/internal/components/receivers/k8scluster.go @@ -0,0 +1,87 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package receivers + +import ( + "github.com/go-logr/logr" + rbacv1 "k8s.io/api/rbac/v1" +) + +type k8sclusterConfig struct { + Distribution string `mapstructure:"distribution"` +} + +func generatek8sclusterRbacRules(_ logr.Logger, cfg k8sclusterConfig) ([]rbacv1.PolicyRule, error) { + policyRules := []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{ + "events", + "namespaces", + "namespaces/status", + "nodes", + "nodes/spec", + "pods", + "pods/status", + "replicationcontrollers", + "replicationcontrollers/status", + "resourcequotas", + "services", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"apps"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + "statefulsets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"extensions"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"batch"}, + Resources: []string{ + "jobs", + "cronjobs", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"autoscaling"}, + Resources: []string{"horizontalpodautoscalers"}, + Verbs: []string{"get", "list", "watch"}, + }, + } + + if cfg.Distribution == "openshift" { + policyRules = append(policyRules, rbacv1.PolicyRule{ + APIGroups: []string{"quota.openshift.io"}, + Resources: []string{"clusterresourcequotas"}, + Verbs: []string{"get", "list", "watch"}, + }) + } + return policyRules, nil +} diff --git a/internal/components/receivers/k8scluster_test.go b/internal/components/receivers/k8scluster_test.go new file mode 100644 index 0000000000..36890ab60e --- /dev/null +++ b/internal/components/receivers/k8scluster_test.go @@ -0,0 +1,164 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package receivers + +import ( + "testing" + + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + rbacv1 "k8s.io/api/rbac/v1" +) + +func Test_generatek8sclusterRbacRules(t *testing.T) { + tests := []struct { + name string + cfg k8sclusterConfig + want []rbacv1.PolicyRule + wantErr bool + }{ + { + name: "default configuration", + cfg: k8sclusterConfig{}, + want: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{ + "events", + "namespaces", + "namespaces/status", + "nodes", + "nodes/spec", + "pods", + "pods/status", + "replicationcontrollers", + "replicationcontrollers/status", + "resourcequotas", + "services", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"apps"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + "statefulsets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"extensions"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"batch"}, + Resources: []string{ + "jobs", + "cronjobs", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"autoscaling"}, + Resources: []string{"horizontalpodautoscalers"}, + Verbs: []string{"get", "list", "watch"}, + }, + }, + wantErr: false, + }, + { + name: "openshift configuration", + cfg: k8sclusterConfig{ + Distribution: "openshift", + }, + want: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{ + "events", + "namespaces", + "namespaces/status", + "nodes", + "nodes/spec", + "pods", + "pods/status", + "replicationcontrollers", + "replicationcontrollers/status", + "resourcequotas", + "services", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"apps"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + "statefulsets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"extensions"}, + Resources: []string{ + "daemonsets", + "deployments", + "replicasets", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"batch"}, + Resources: []string{ + "jobs", + "cronjobs", + }, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"autoscaling"}, + Resources: []string{"horizontalpodautoscalers"}, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{"quota.openshift.io"}, + Resources: []string{"clusterresourcequotas"}, + Verbs: []string{"get", "list", "watch"}, + }, + }, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := generatek8sclusterRbacRules(logr.Discard(), tt.cfg) + if tt.wantErr { + assert.Error(t, err) + return + } + assert.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/internal/components/receivers/single_endpoint_receiver_test.go b/internal/components/receivers/single_endpoint_receiver_test.go index faaae6dbd7..abb866a8d8 100644 --- a/internal/components/receivers/single_endpoint_receiver_test.go +++ b/internal/components/receivers/single_endpoint_receiver_test.go @@ -83,7 +83,6 @@ func TestDownstreamParsers(t *testing.T) { {"awsxray", "awsxray", "__awsxray", 2000, false}, {"tcplog", "tcplog", "__tcplog", 0, true}, {"udplog", "udplog", "__udplog", 0, true}, - {"k8s_cluster", "k8s_cluster", "__k8s_cluster", 0, false}, } { t.Run(tt.receiverName, func(t *testing.T) { t.Run("builds successfully", func(t *testing.T) { diff --git a/tests/e2e-automatic-rbac/extra-permissions-operator/clusterresourcequotas.yaml b/tests/e2e-automatic-rbac/extra-permissions-operator/clusterresourcequotas.yaml new file mode 100644 index 0000000000..89cd1ed2f4 --- /dev/null +++ b/tests/e2e-automatic-rbac/extra-permissions-operator/clusterresourcequotas.yaml @@ -0,0 +1,11 @@ +- op: add + path: /rules/- + value: + apiGroups: + - quota.openshift.io + resources: + - clusterresourcequotas + verbs: + - get + - list + - watch \ No newline at end of file diff --git a/tests/e2e-automatic-rbac/receiver-k8scluster/00-install.yaml b/tests/e2e-automatic-rbac/receiver-k8scluster/00-install.yaml new file mode 100644 index 0000000000..36737528f0 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8scluster/00-install.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: chainsaw-k8s-cluster diff --git a/tests/e2e-automatic-rbac/receiver-k8scluster/01-assert.yaml b/tests/e2e-automatic-rbac/receiver-k8scluster/01-assert.yaml new file mode 100644 index 0000000000..eefc9620c0 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8scluster/01-assert.yaml @@ -0,0 +1,80 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: simplest-chainsaw-k8s-cluster-cluster-role +rules: +- apiGroups: + - "" + resources: + - events + - namespaces + - namespaces/status + - nodes + - nodes/spec + - pods + - pods/status + - replicationcontrollers + - replicationcontrollers/status + - resourcequotas + - services + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/instance: chainsaw-k8s-cluster.simplest + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: simplest-chainsaw-k8s-cluster-collector + app.kubernetes.io/part-of: opentelemetry + name: simplest-chainsaw-k8s-cluster-collector +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: simplest-chainsaw-k8s-cluster-cluster-role +subjects: +- kind: ServiceAccount + name: simplest-collector + namespace: chainsaw-k8s-cluster diff --git a/tests/e2e-automatic-rbac/receiver-k8scluster/01-install.yaml b/tests/e2e-automatic-rbac/receiver-k8scluster/01-install.yaml new file mode 100644 index 0000000000..2cdc575046 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8scluster/01-install.yaml @@ -0,0 +1,18 @@ +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: simplest + namespace: chainsaw-k8s-cluster +spec: + config: | + receivers: + k8s_cluster: + processors: + exporters: + debug: + service: + pipelines: + traces: + receivers: [k8s_cluster] + processors: [] + exporters: [debug] diff --git a/tests/e2e-automatic-rbac/receiver-k8scluster/02-assert.yaml b/tests/e2e-automatic-rbac/receiver-k8scluster/02-assert.yaml new file mode 100644 index 0000000000..e95ce23092 --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8scluster/02-assert.yaml @@ -0,0 +1,88 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: simplest-chainsaw-k8s-cluster-cluster-role +rules: +- apiGroups: + - "" + resources: + - events + - namespaces + - namespaces/status + - nodes + - nodes/spec + - pods + - pods/status + - replicationcontrollers + - replicationcontrollers/status + - resourcequotas + - services + verbs: + - get + - list + - watch +- apiGroups: + - apps + resources: + - daemonsets + - deployments + - replicasets + - statefulsets + verbs: + - get + - list + - watch +- apiGroups: + - extensions + resources: + - daemonsets + - deployments + - replicasets + verbs: + - get + - list + - watch +- apiGroups: + - batch + resources: + - jobs + - cronjobs + verbs: + - get + - list + - watch +- apiGroups: + - autoscaling + resources: + - horizontalpodautoscalers + verbs: + - get + - list + - watch +- apiGroups: + - quota.openshift.io + resources: + - clusterresourcequotas + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + app.kubernetes.io/component: opentelemetry-collector + app.kubernetes.io/instance: chainsaw-k8s-cluster.simplest + app.kubernetes.io/managed-by: opentelemetry-operator + app.kubernetes.io/name: simplest-chainsaw-k8s-cluster-collector + app.kubernetes.io/part-of: opentelemetry + name: simplest-chainsaw-k8s-cluster-collector +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: simplest-chainsaw-k8s-cluster-cluster-role +subjects: +- kind: ServiceAccount + name: simplest-collector + namespace: chainsaw-k8s-cluster diff --git a/tests/e2e-automatic-rbac/receiver-k8scluster/02-install.yaml b/tests/e2e-automatic-rbac/receiver-k8scluster/02-install.yaml new file mode 100644 index 0000000000..984cef98fe --- /dev/null +++ b/tests/e2e-automatic-rbac/receiver-k8scluster/02-install.yaml @@ -0,0 +1,19 @@ +apiVersion: opentelemetry.io/v1alpha1 +kind: OpenTelemetryCollector +metadata: + name: simplest + namespace: chainsaw-k8s-cluster +spec: + config: | + receivers: + k8s_cluster: + distribution: openshift + processors: + exporters: + debug: + service: + pipelines: + traces: + receivers: [k8s_cluster] + processors: [] + exporters: [debug] From bf7cdd15a1ea41fca66e3ed86a2a732dbfd6a474 Mon Sep 17 00:00:00 2001 From: Israel Blancas Date: Sat, 23 Nov 2024 16:47:40 +0100 Subject: [PATCH 9/9] Add a warning message when one created collector needs extra RBAC permissions and the service account doesn't have them (#3433) * Add a warning message when one created collector needs extra RBAC permissions and the service account doesn't have them Signed-off-by: Israel Blancas * Fix nil Signed-off-by: Israel Blancas * Show an admission warning Signed-off-by: Israel Blancas * Apply changes requested in code review Signed-off-by: Israel Blancas --------- Signed-off-by: Israel Blancas --- .chloggen/3432.yaml | 16 + .../opentelemetrycollector_controller.go | 5 + internal/manifests/collector/collector.go | 24 ++ .../manifests/collector/collector_test.go | 343 ++++++++++++++++++ internal/manifests/collector/rbac.go | 27 ++ internal/manifests/params.go | 3 + internal/rbac/access.go | 7 + main.go | 3 + 8 files changed, 428 insertions(+) create mode 100755 .chloggen/3432.yaml create mode 100644 internal/manifests/collector/collector_test.go diff --git a/.chloggen/3432.yaml b/.chloggen/3432.yaml new file mode 100755 index 0000000000..16834b17e4 --- /dev/null +++ b/.chloggen/3432.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action) +component: collector + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add a warning message when one created collector needs extra RBAC permissions and the service account doesn't have them. + +# One or more tracking issues related to the change +issues: [3432] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/controllers/opentelemetrycollector_controller.go b/controllers/opentelemetrycollector_controller.go index 94875f060c..1f0211f932 100644 --- a/controllers/opentelemetrycollector_controller.go +++ b/controllers/opentelemetrycollector_controller.go @@ -47,6 +47,7 @@ import ( "github.com/open-telemetry/opentelemetry-operator/internal/manifests" "github.com/open-telemetry/opentelemetry-operator/internal/manifests/collector" "github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils" + internalRbac "github.com/open-telemetry/opentelemetry-operator/internal/rbac" collectorStatus "github.com/open-telemetry/opentelemetry-operator/internal/status/collector" "github.com/open-telemetry/opentelemetry-operator/pkg/constants" "github.com/open-telemetry/opentelemetry-operator/pkg/featuregate" @@ -66,6 +67,7 @@ type OpenTelemetryCollectorReconciler struct { scheme *runtime.Scheme log logr.Logger config config.Config + reviewer *internalRbac.Reviewer } // Params is the set of options to build a new OpenTelemetryCollectorReconciler. @@ -75,6 +77,7 @@ type Params struct { Scheme *runtime.Scheme Log logr.Logger Config config.Config + Reviewer *internalRbac.Reviewer } func (r *OpenTelemetryCollectorReconciler) findOtelOwnedObjects(ctx context.Context, params manifests.Params) (map[types.UID]client.Object, error) { @@ -178,6 +181,7 @@ func (r *OpenTelemetryCollectorReconciler) GetParams(ctx context.Context, instan Log: r.log, Scheme: r.scheme, Recorder: r.recorder, + Reviewer: r.reviewer, } // generate the target allocator CR from the collector CR @@ -210,6 +214,7 @@ func NewReconciler(p Params) *OpenTelemetryCollectorReconciler { scheme: p.Scheme, config: p.Config, recorder: p.Recorder, + reviewer: p.Reviewer, } return r } diff --git a/internal/manifests/collector/collector.go b/internal/manifests/collector/collector.go index 01b1777276..df294d4d47 100644 --- a/internal/manifests/collector/collector.go +++ b/internal/manifests/collector/collector.go @@ -15,6 +15,9 @@ package collector import ( + "errors" + "fmt" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" @@ -80,6 +83,20 @@ func Build(params manifests.Params) ([]client.Object, error) { resourceManifests = append(resourceManifests, res) } } + + if needsCheckSaPermissions(params) { + warnings, err := CheckRbacRules(params, params.OtelCol.Spec.ServiceAccount) + if err != nil { + return nil, fmt.Errorf("error checking RBAC rules for serviceAccount %s: %w", params.OtelCol.Spec.ServiceAccount, err) + } + + var w []error + for _, warning := range warnings { + w = append(w, fmt.Errorf("RBAC rules are missing: %s", warning)) + } + return nil, errors.Join(w...) + } + routes, err := Routes(params) if err != nil { return nil, err @@ -90,3 +107,10 @@ func Build(params manifests.Params) ([]client.Object, error) { } return resourceManifests, nil } + +func needsCheckSaPermissions(params manifests.Params) bool { + return params.ErrorAsWarning && + params.Config.CreateRBACPermissions() == rbac.NotAvailable && + params.Reviewer != nil && + params.OtelCol.Spec.ServiceAccount != "" +} diff --git a/internal/manifests/collector/collector_test.go b/internal/manifests/collector/collector_test.go new file mode 100644 index 0000000000..473b2c6ab9 --- /dev/null +++ b/internal/manifests/collector/collector_test.go @@ -0,0 +1,343 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "fmt" + "testing" + + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + otelColFeatureGate "go.opentelemetry.io/collector/featuregate" + v1 "k8s.io/api/authorization/v1" + rbacv1 "k8s.io/api/rbac/v1" + + "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" + "github.com/open-telemetry/opentelemetry-operator/internal/autodetect/prometheus" + autoRbac "github.com/open-telemetry/opentelemetry-operator/internal/autodetect/rbac" + "github.com/open-telemetry/opentelemetry-operator/internal/config" + "github.com/open-telemetry/opentelemetry-operator/internal/manifests" + irbac "github.com/open-telemetry/opentelemetry-operator/internal/rbac" + "github.com/open-telemetry/opentelemetry-operator/pkg/featuregate" +) + +func TestNeedsCheckSaPermissions(t *testing.T) { + tests := []struct { + name string + params manifests.Params + expected bool + }{ + { + name: "should return true when all conditions are met", + params: manifests.Params{ + ErrorAsWarning: true, + Config: config.New(config.WithRBACPermissions(autoRbac.NotAvailable)), + Reviewer: &mockReviewer{}, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "test-sa", + }, + }, + }, + }, + expected: true, + }, + { + name: "should return false when ErrorAsWarning is false", + params: manifests.Params{ + ErrorAsWarning: false, + Config: config.New(config.WithRBACPermissions(autoRbac.NotAvailable)), + Reviewer: &mockReviewer{}, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "test-sa", + }, + }, + }, + }, + expected: false, + }, + { + name: "should return false when RBAC is available", + params: manifests.Params{ + ErrorAsWarning: true, + Config: config.New(config.WithRBACPermissions(autoRbac.Available)), + Reviewer: &mockReviewer{}, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "test-sa", + }, + }, + }, + }, + expected: false, + }, + { + name: "should return false when Reviewer is nil", + params: manifests.Params{ + ErrorAsWarning: true, + Config: config.New(config.WithRBACPermissions(autoRbac.NotAvailable)), + Reviewer: nil, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "test-sa", + }, + }, + }, + }, + expected: false, + }, + { + name: "should return false when ServiceAccount is empty", + params: manifests.Params{ + ErrorAsWarning: true, + Config: config.New(config.WithRBACPermissions(autoRbac.NotAvailable)), + Reviewer: &mockReviewer{}, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "", + }, + }, + }, + }, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := needsCheckSaPermissions(tt.params) + assert.Equal(t, tt.expected, result) + }) + } +} + +type mockReviewer struct{} + +var _ irbac.SAReviewer = &mockReviewer{} + +func (m *mockReviewer) CheckPolicyRules(ctx context.Context, serviceAccount, serviceAccountNamespace string, rules ...*rbacv1.PolicyRule) ([]*v1.SubjectAccessReview, error) { + return nil, fmt.Errorf("error checking policy rules") +} + +func (m *mockReviewer) CanAccess(ctx context.Context, serviceAccount, serviceAccountNamespace string, res *v1.ResourceAttributes, nonResourceAttributes *v1.NonResourceAttributes) (*v1.SubjectAccessReview, error) { + return nil, nil +} + +func TestBuild(t *testing.T) { + logger := logr.Discard() + tests := []struct { + name string + params manifests.Params + expectedObjects int + wantErr bool + featureGate *otelColFeatureGate.Gate + }{ + { + name: "deployment mode builds expected manifests", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeDeployment, + }, + }, + Config: config.New(), + }, + expectedObjects: 5, + wantErr: false, + }, + { + name: "statefulset mode builds expected manifests", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeStatefulSet, + }, + }, + Config: config.New(), + }, + expectedObjects: 5, + wantErr: false, + }, + { + name: "sidecar mode skips deployment manifests", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeSidecar, + }, + }, + Config: config.New(), + }, + expectedObjects: 3, + wantErr: false, + }, + { + name: "rbac available adds cluster role manifests", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeDeployment, + Config: v1beta1.Config{ + Processors: &v1beta1.AnyConfig{ + Object: map[string]any{ + "k8sattributes": map[string]any{}, + }, + }, + Service: v1beta1.Service{ + Pipelines: map[string]*v1beta1.Pipeline{ + "traces": { + Processors: []string{"k8sattributes"}, + }, + }, + }, + }, + }, + }, + Config: config.New(config.WithRBACPermissions(autoRbac.Available)), + }, + expectedObjects: 7, + wantErr: false, + }, + { + name: "metrics enabled adds monitoring service monitor", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeDeployment, + Observability: v1beta1.ObservabilitySpec{ + Metrics: v1beta1.MetricsConfigSpec{ + EnableMetrics: true, + }, + }, + }, + }, + Config: config.New(config.WithPrometheusCRAvailability(prometheus.Available)), + }, + expectedObjects: 6, + wantErr: false, + featureGate: featuregate.PrometheusOperatorIsAvailable, + }, + { + name: "metrics enabled adds service monitors", + params: manifests.Params{ + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + Mode: v1beta1.ModeDeployment, + Observability: v1beta1.ObservabilitySpec{ + Metrics: v1beta1.MetricsConfigSpec{ + EnableMetrics: true, + }, + }, + Config: v1beta1.Config{ + Exporters: v1beta1.AnyConfig{ + Object: map[string]any{ + "prometheus": map[string]any{ + "endpoint": "1.2.3.4:1234", + }, + }, + }, + Service: v1beta1.Service{ + Pipelines: map[string]*v1beta1.Pipeline{ + "metrics": { + Exporters: []string{"prometheus"}, + }, + }, + }, + }, + }, + }, + Config: config.New(config.WithPrometheusCRAvailability(prometheus.Available)), + }, + expectedObjects: 9, + wantErr: false, + featureGate: featuregate.PrometheusOperatorIsAvailable, + }, + { + name: "check sa permissions", + params: manifests.Params{ + ErrorAsWarning: true, + Reviewer: &mockReviewer{}, + Log: logger, + OtelCol: v1beta1.OpenTelemetryCollector{ + Spec: v1beta1.OpenTelemetryCollectorSpec{ + OpenTelemetryCommonFields: v1beta1.OpenTelemetryCommonFields{ + ServiceAccount: "test-sa", + }, + Mode: v1beta1.ModeDeployment, + Observability: v1beta1.ObservabilitySpec{ + Metrics: v1beta1.MetricsConfigSpec{ + EnableMetrics: true, + }, + }, + Config: v1beta1.Config{ + Processors: &v1beta1.AnyConfig{ + Object: map[string]any{ + "k8sattributes": map[string]any{}, + }, + }, + Service: v1beta1.Service{ + Pipelines: map[string]*v1beta1.Pipeline{ + "metrics": { + Processors: []string{"k8sattributes"}, + }, + }, + }, + }, + }, + }, + Config: config.New(config.WithPrometheusCRAvailability(prometheus.Available)), + }, + expectedObjects: 9, + wantErr: true, + featureGate: featuregate.PrometheusOperatorIsAvailable, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.featureGate != nil { + err := otelColFeatureGate.GlobalRegistry().Set(tt.featureGate.ID(), true) + require.NoError(t, err) + defer func() { + err := otelColFeatureGate.GlobalRegistry().Set(tt.featureGate.ID(), false) + require.NoError(t, err) + }() + } + + objects, err := Build(tt.params) + if tt.wantErr { + require.Error(t, err) + return + } + + require.NoError(t, err) + assert.Len(t, objects, tt.expectedObjects) + }) + } +} diff --git a/internal/manifests/collector/rbac.go b/internal/manifests/collector/rbac.go index 610d948b67..9ae0a65f1f 100644 --- a/internal/manifests/collector/rbac.go +++ b/internal/manifests/collector/rbac.go @@ -15,12 +15,16 @@ package collector import ( + "context" + "fmt" + rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/open-telemetry/opentelemetry-operator/internal/manifests" "github.com/open-telemetry/opentelemetry-operator/internal/manifests/manifestutils" "github.com/open-telemetry/opentelemetry-operator/internal/naming" + "github.com/open-telemetry/opentelemetry-operator/internal/rbac" ) func ClusterRole(params manifests.Params) (*rbacv1.ClusterRole, error) { @@ -85,3 +89,26 @@ func ClusterRoleBinding(params manifests.Params) (*rbacv1.ClusterRoleBinding, er }, }, nil } + +func CheckRbacRules(params manifests.Params, saName string) ([]string, error) { + ctx := context.Background() + + rules, err := params.OtelCol.Spec.Config.GetAllRbacRules(params.Log) + if err != nil { + return nil, err + } + + r := []*rbacv1.PolicyRule{} + + for _, rule := range rules { + rule := rule + r = append(r, &rule) + } + + if subjectAccessReviews, err := params.Reviewer.CheckPolicyRules(ctx, saName, params.OtelCol.Namespace, r...); err != nil { + return nil, fmt.Errorf("%s: %w", "unable to check rbac rules", err) + } else if allowed, deniedReviews := rbac.AllSubjectAccessReviewsAllowed(subjectAccessReviews); !allowed { + return rbac.WarningsGroupedByResource(deniedReviews), nil + } + return nil, nil +} diff --git a/internal/manifests/params.go b/internal/manifests/params.go index 69be71fb0b..4f18b74591 100644 --- a/internal/manifests/params.go +++ b/internal/manifests/params.go @@ -23,6 +23,7 @@ import ( "github.com/open-telemetry/opentelemetry-operator/apis/v1alpha1" "github.com/open-telemetry/opentelemetry-operator/apis/v1beta1" "github.com/open-telemetry/opentelemetry-operator/internal/config" + "github.com/open-telemetry/opentelemetry-operator/internal/rbac" ) // Params holds the reconciliation-specific parameters. @@ -35,4 +36,6 @@ type Params struct { TargetAllocator *v1alpha1.TargetAllocator OpAMPBridge v1alpha1.OpAMPBridge Config config.Config + Reviewer rbac.SAReviewer + ErrorAsWarning bool } diff --git a/internal/rbac/access.go b/internal/rbac/access.go index 5bdc9b27cf..ab34bc7485 100644 --- a/internal/rbac/access.go +++ b/internal/rbac/access.go @@ -29,6 +29,13 @@ const ( serviceAccountFmtStr = "system:serviceaccount:%s:%s" ) +type SAReviewer interface { + CheckPolicyRules(ctx context.Context, serviceAccount, serviceAccountNamespace string, rules ...*rbacv1.PolicyRule) ([]*v1.SubjectAccessReview, error) + CanAccess(ctx context.Context, serviceAccount, serviceAccountNamespace string, res *v1.ResourceAttributes, nonResourceAttributes *v1.NonResourceAttributes) (*v1.SubjectAccessReview, error) +} + +var _ SAReviewer = &Reviewer{} + type Reviewer struct { client kubernetes.Interface } diff --git a/main.go b/main.go index 62ad926d87..7c82ce103d 100644 --- a/main.go +++ b/main.go @@ -392,6 +392,7 @@ func main() { Scheme: mgr.GetScheme(), Config: cfg, Recorder: mgr.GetEventRecorderFor("opentelemetry-operator"), + Reviewer: reviewer, }) if err = collectorReconciler.SetupWithManager(mgr); err != nil { @@ -456,6 +457,8 @@ func main() { warnings = append(warnings, newErr.Error()) return warnings } + + params.ErrorAsWarning = true _, newErr = collectorManifests.Build(params) if newErr != nil { warnings = append(warnings, newErr.Error())