Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[targetallocator] PrometheusOperator CRD MVC #653

Merged
merged 26 commits into from
Apr 22, 2022
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4a72ffb
feat(target-allocator): allow custom config file path
secustor Dec 1, 2021
626d002
feat(target-allocator): move CLI config options to config package
secustor Dec 3, 2021
b71d1f2
feat(target-allocator): allow running outside of cluster for debugging
secustor Dec 4, 2021
727493b
introduce meta watcher
secustor Dec 3, 2021
f6a1996
add event source
secustor Dec 9, 2021
f12336d
fix: log panics
secustor Dec 9, 2021
d5c90ea
fix: race condition
secustor Dec 9, 2021
24275d0
fixup! fix: log panics
secustor Dec 10, 2021
0da5fa8
feat: implement promCR retrieval
secustor Dec 11, 2021
8a29f22
feat: functioning
secustor Dec 16, 2021
077c0c4
refactor: some cleanup
secustor Dec 28, 2021
78161df
feat(target-allocator): escape job names in query parameters
secustor Dec 31, 2021
28fc0c9
feat(target-allocator): make prometheusCR lookup optional and allow u…
secustor Dec 31, 2021
bb21889
refactor(target-allocator): improve memory usage and comments
secustor Dec 31, 2021
c18ac42
chore(target-allocator): update PromOperator and Kubernetes deps
secustor Jan 2, 2022
acc07c5
refactor(target-allocator): use exclude instead of replace directive
secustor Jan 2, 2022
8b7d8bf
ci: add Makefile targets for target allocator
secustor Jan 19, 2022
1b249ca
tests: add kuttl tests for PrometheusCR feature of target allocator
secustor Jan 19, 2022
2f5e0b3
docs(targetAllocator): add README.md
secustor Jan 27, 2022
f665de5
fixup CRD docs
secustor Jan 28, 2022
e23a2c5
Merge branch 'main' into implement_prometheus_crd
secustor Feb 9, 2022
0a429a7
fix(Makefile): add missing PHONY tags
secustor Feb 9, 2022
92a55d5
implement change requests
secustor Feb 22, 2022
96cea47
Merge branch 'main' into implement_prometheus_crd
secustor Apr 7, 2022
45a0e84
Merge branch 'main' into implement_prometheus_crd
secustor Apr 9, 2022
7d3f036
go mod tidy and fix linting
secustor Apr 9, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ IMG_REPO ?= opentelemetry-operator
IMG ?= ${IMG_PREFIX}/${IMG_REPO}:${VERSION}
BUNDLE_IMG ?= ${IMG_PREFIX}/${IMG_REPO}-bundle:${VERSION}

TARGETALLOCATOR_IMG_REPO ?= target-allocator
TARGETALLOCATOR_IMG ?= ${IMG_PREFIX}/${TARGETALLOCATOR_IMG_REPO}:$(addprefix v,${VERSION})

# Options for 'bundle-build'
ifneq ($(origin CHANNELS), undefined)
BUNDLE_CHANNELS := --channels=$(CHANNELS)
Expand Down Expand Up @@ -149,7 +152,7 @@ e2e:
$(KUTTL) test

.PHONY: prepare-e2e
prepare-e2e: kuttl set-test-image-vars set-image-controller container start-kind
prepare-e2e: kuttl set-test-image-vars set-image-controller container container-target-allocator start-kind load-image-all
mkdir -p tests/_build/crds tests/_build/manifests
$(KUSTOMIZE) build config/default -o tests/_build/manifests/01-opentelemetry-operator.yaml
$(KUSTOMIZE) build config/crd -o tests/_build/crds/
Expand All @@ -161,6 +164,7 @@ scorecard-tests: operator-sdk
.PHONY: set-test-image-vars
set-test-image-vars:
$(eval IMG=local/opentelemetry-operator:e2e)
$(eval TARGETALLOCATOR_IMG=local/opentelemetry-operator-targetallocator:e2e)

# Build the container image, used only for local dev purposes
.PHONY: container
Expand All @@ -172,11 +176,25 @@ container:
container-push:
docker push ${IMG}

.PHONY: container-target-allocator
container-target-allocator:
docker build -t ${TARGETALLOCATOR_IMG} cmd/otel-allocator

.PHONY: start-kind
start-kind:
kind create cluster --config $(KIND_CONFIG)

.PHONY: load-image-all
load-image-all: load-image-operator load-image-target-allocator

.PHONY: load-image-operator
load-image-operator:
kind load docker-image local/opentelemetry-operator:e2e

.PHONY: load-image-target-allocator
load-image-target-allocator:
kind load docker-image ${TARGETALLOCATOR_IMG}

.PHONY: cert-manager
cert-manager: cmctl
# Consider using cmctl to install the cert-manager once install command is not experimental
Expand Down
12 changes: 12 additions & 0 deletions apis/v1alpha1/opentelemetrycollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,22 @@ type OpenTelemetryTargetAllocator struct {
// +optional
Enabled bool `json:"enabled,omitempty"`

// PrometheusCR defines the configuration for the retrieval of PrometheusOperator CRDs ( servicemonitor.monitoring.coreos.com/v1 and podmonitor.monitoring.coreos.com/v1 ) retrieval.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question as before, from which namespaces are these objects queried?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added a line clearing this up.

// All CR instances which the ServiceAccount has access to will be retrieved. This includes other namespaces.
// +optional
PrometheusCR OpenTelemetryTargetAllocatorPrometheusCR `json:"prometheusCR,omitempty"`

// Image indicates the container image to use for the OpenTelemetry TargetAllocator.
// +optional
Image string `json:"image,omitempty"`
}

type OpenTelemetryTargetAllocatorPrometheusCR struct {
// Enabled indicates whether to use a PrometheusOperator custom resources as targets or not.
// +optional
Enabled bool `json:"enabled,omitempty"`
}

// ScaleSubresourceStatus defines the observed state of the OpenTelemetryCollector's
// scale subresource.
type ScaleSubresourceStatus struct {
Expand All @@ -141,6 +152,7 @@ type ScaleSubresourceStatus struct {
// deployment or statefulSet pods.
// +optional
Selector string `json:"selector,omitempty"`

}

// OpenTelemetryCollectorStatus defines the observed state of OpenTelemetryCollector.
Expand Down
16 changes: 16 additions & 0 deletions apis/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,18 @@ spec:
description: Image indicates the container image to use for the
OpenTelemetry TargetAllocator.
type: string
prometheusCR:
description: PrometheusCR defines the configuration for the retrieval
of PrometheusOperator CRDs ( servicemonitor.monitoring.coreos.com/v1
and podmonitor.monitoring.coreos.com/v1 ) retrieval. All CR
instances which the ServiceAccount has access to will be retrieved.
This includes other namespaces.
properties:
enabled:
description: Enabled indicates whether to use a PrometheusOperator
custom resources as targets or not.
type: boolean
type: object
type: object
tolerations:
description: Toleration to schedule OpenTelemetry Collector pods.
Expand Down
2 changes: 1 addition & 1 deletion cmd/otel-allocator/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ WORKDIR /root/
# Copy the pre-built binary file from the previous stage
COPY --from=builder /app/main .

CMD ["./main"]
ENTRYPOINT ["./main"]
85 changes: 85 additions & 0 deletions cmd/otel-allocator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Target Allocator

The TargetAllocator is an optional separately deployed component of an OpenTelemetry Collector setup, which is used to
distribute targets of the PrometheusReceiver on all deployed Collector instances.

# Design

If the Allocator is activated, all Prometheus configurations will be transferred in a separate ConfigMap which get in
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

all Prometheus configurations

Is it OTEL p8s receiver configuration or as well p8s CRs?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This applies to the configuration of the prometheus receiver in the collector configuration. The Prometheus operator CRs are observed separately after the target allocator has been started, at which point it further modifies the service discovery configuration it is using.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only referencing the OTEL p8s configuration.

turn mounted to the Allocator.
This configuration will be resolved to target configurations and then split across all OpenTelemetryCollector instances.

TargetAllocators expose the results as [HTTP_SD endpoints](https://prometheus.io/docs/prometheus/latest/http_sd/)
split by collector.

#### Endpoints
`/jobs`:

```json
{
"job1": {
"_link": "/jobs/job1/targets"
},
"job2": {
"_link": "/jobs/job1/targets"
}
}

```

`/jobs/{jobID}/targets`:

```json
{
"collector-1": {
"_link": "/jobs/job1/targets?collector_id=collector-1",
"targets": [
{
"Targets": [
"10.100.100.100",
"10.100.100.101",
"10.100.100.102"
],
"Labels": {
"namespace": "a_namespace",
"pod": "a_pod"
}
}
]
}
}
```

`/jobs/{jobID}/targets?collector_id={collectorID}`:

```json
[
{
"targets": [
"10.100.100.100",
"10.100.100.101",
"10.100.100.102"
],
"labels": {
"namespace": "a_namespace",
"pod": "a_pod"
}
}
]
```


## Packages
### Watchers
Watchers are responsible for the translation of external sources into Prometheus readable scrape configurations and
triggers updates to the DiscoveryManager

### DiscoveryManager
Watches the Prometheus service discovery for new targets and sets targets to the Allocator

### Allocator
Shards the received targets based on the discovered Collector instances

### Collector
Client to watch for deployed Collector instances which will then provided to the Allocator.

5 changes: 3 additions & 2 deletions cmd/otel-allocator/allocation/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package allocation

import (
"fmt"
"net/url"
"sync"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -80,7 +81,7 @@ func (allocator *Allocator) SetWaitingTargets(targets []TargetItem) {
// SetCollectors sets the set of collectors with key=collectorName, value=Collector object.
// SetCollectors is called when Collectors are added or removed
func (allocator *Allocator) SetCollectors(collectors []string) {
log := allocator.log.WithValues("opentelemetry-targetallocator")
log := allocator.log.WithValues("component", "opentelemetry-targetallocator")

allocator.m.Lock()
defer allocator.m.Unlock()
Expand Down Expand Up @@ -132,7 +133,7 @@ func (allocator *Allocator) processWaitingTargets() {
allocator.TargetItems[k] = &v
targetItem := TargetItem{
JobName: v.JobName,
Link: LinkJSON{fmt.Sprintf("/jobs/%s/targets", v.JobName)},
Link: LinkJSON{fmt.Sprintf("/jobs/%s/targets", url.QueryEscape(v.JobName))},
TargetURL: v.TargetURL,
Label: v.Label,
Collector: col,
Expand Down
3 changes: 2 additions & 1 deletion cmd/otel-allocator/allocation/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package allocation

import (
"fmt"
"net/url"

"github.com/prometheus/common/model"
)
Expand Down Expand Up @@ -43,7 +44,7 @@ func GetAllTargetsByJob(job string, cMap map[string][]TargetItem, allocator *All
targetGroupList = append(targetGroupList, targetGroupJSON{Targets: targets, Labels: labelSetMap[targets[0]]})

}
displayData[j.Collector.Name] = collectorJSON{Link: fmt.Sprintf("/jobs/%s/targets?collector_id=%s", j.JobName, j.Collector.Name), Jobs: targetGroupList}
displayData[j.Collector.Name] = collectorJSON{Link: fmt.Sprintf("/jobs/%s/targets?collector_id=%s", url.QueryEscape(j.JobName), j.Collector.Name), Jobs: targetGroupList}

}
}
Expand Down
15 changes: 5 additions & 10 deletions cmd/otel-allocator/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,8 @@ type Client struct {
close chan struct{}
}

func NewClient(logger logr.Logger) (*Client, error) {
config, err := rest.InClusterConfig()
if err != nil {
return &Client{}, err
}

clientset, err := kubernetes.NewForConfig(config)
func NewClient(logger logr.Logger, kubeConfig *rest.Config) (*Client, error) {
clientset, err := kubernetes.NewForConfig(kubeConfig)
if err != nil {
return &Client{}, err
}
Expand All @@ -50,7 +45,7 @@ func NewClient(logger logr.Logger) (*Client, error) {

func (k *Client) Watch(ctx context.Context, labelMap map[string]string, fn func(collectors []string)) {
collectorMap := map[string]bool{}
log := k.log.WithValues("opentelemetry-targetallocator")
log := k.log.WithValues("component", "opentelemetry-targetallocator")

opts := metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(labelMap).String(),
Expand Down Expand Up @@ -83,15 +78,15 @@ func (k *Client) Watch(ctx context.Context, labelMap map[string]string, fn func(
return
}
if msg := runWatch(ctx, k, watcher.ResultChan(), collectorMap, fn); msg != "" {
log.Info("Collector pod watch event stopped", msg)
log.Info("Collector pod watch event stopped " + msg)
return
}
}
}()
}

func runWatch(ctx context.Context, k *Client, c <-chan watch.Event, collectorMap map[string]bool, fn func(collectors []string)) string {
log := k.log.WithValues("opentelemetry-targetallocator")
log := k.log.WithValues("component", "opentelemetry-targetallocator")
for {
select {
case <-k.close:
Expand Down
Loading