Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kubernetes(traces): add metrics_generator #39

Merged
merged 2 commits into from
Mar 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ logging {
}

tracing {
sampling_fraction = 0.1
sampling_fraction = 0.8
write_to = [otelcol.processor.batch.containers.input]
}

Expand Down Expand Up @@ -144,16 +144,16 @@ loki.source.docker "containers" {
}

loki.process "containers" {
forward_to = [module.file.docker_compose.exports.logs_receiver]

stage.drop {
longer_than = "8KB"
older_than = "12h"
}

stage.tenant {
value = "anonymous"
value = "fake"
}

forward_to = [module.file.docker_compose.exports.logs_receiver]
}

/********************************************
Expand Down Expand Up @@ -210,6 +210,10 @@ otelcol.receiver.otlp "containers" {
}

otelcol.processor.batch "containers" {
send_batch_size = 16384
send_batch_max_size = 0
timeout = "2s"

output {
metrics = [otelcol.processor.memory_limiter.containers.input]
logs = [otelcol.processor.memory_limiter.containers.input]
Expand Down Expand Up @@ -259,12 +263,15 @@ loki.process "autologging" {
source = "body"
}

forward_to = [module.file.docker_compose.exports.logs_receiver]
forward_to = [loki.process.containers.receiver]
}

otelcol.processor.memory_limiter "containers" {
check_interval = "1s"
limit = "256MiB"

// limit = "150MiB" // alternatively, set `limit_percentage` and `spike_limit_percentage`
limit_percentage = 50
spike_limit_percentage = 30

output {
metrics = [otelcol.exporter.prometheus.containers.input]
Expand All @@ -278,7 +285,7 @@ otelcol.exporter.prometheus "containers" {
}

otelcol.exporter.loki "containers" {
forward_to = [module.file.docker_compose.exports.logs_receiver]
forward_to = [loki.process.containers.receiver]
}

/********************************************
Expand All @@ -287,6 +294,9 @@ otelcol.exporter.loki "containers" {

pyroscope.scrape "containers" {
targets = [
{"__address__" = "mimir:8080", "service_name" = "mimir"},
{"__address__" = "loki:3100", "service_name" = "loki-all"},
{"__address__" = "tempo:3200", "service_name" = "tempo-all"},
{"__address__" = "grafana:6060", "service_name" = "grafana"},
]

Expand Down
26 changes: 25 additions & 1 deletion docker-compose/common/config/agent-flow/traces.river
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ otelcol.receiver.jaeger "containers" {

output {
metrics = [otelcol.processor.batch.containers.input]
logs = [otelcol.processor.batch.containers.input]
traces = [otelcol.processor.batch.containers.input]
}
}
Expand All @@ -181,27 +182,50 @@ otelcol.receiver.otlp "containers" {

output {
metrics = [otelcol.processor.batch.containers.input]
logs = [otelcol.processor.batch.containers.input]
traces = [otelcol.processor.batch.containers.input]
}
}

otelcol.processor.batch "containers" {
output {
metrics = [otelcol.processor.memory_limiter.containers.input]
logs = [otelcol.processor.memory_limiter.containers.input]
traces = [otelcol.processor.memory_limiter.containers.input]
}
}

otelcol.processor.memory_limiter "containers" {
check_interval = "1s"
limit = "256MiB"

// limit = "150MiB" // alternatively, set `limit_percentage` and `spike_limit_percentage`
limit_percentage = 50
spike_limit_percentage = 30

output {
metrics = [otelcol.exporter.prometheus.containers.input]
logs = [otelcol.exporter.loki.containers.input]
traces = [module.file.docker_compose.exports.traces_receiver]
}
}

otelcol.exporter.prometheus "containers" {
forward_to = [module.file.docker_compose.exports.metrics_receiver]
}

otelcol.exporter.loki "containers" {
forward_to = [loki.process.containers.receiver]
}

loki.process "containers" {
stage.drop {
longer_than = "8KB"
older_than = "12h"
}

stage.tenant {
value = "fake"
}

forward_to = [module.file.docker_compose.exports.logs_receiver]
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,3 @@ query_range:
embedded_cache:
enabled: true
ttl: 1h

tracing:
enabled: false
3 changes: 0 additions & 3 deletions docker-compose/common/config/loki/monolithic-mode-logs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,3 @@ chunk_store_config:
chunk_cache_config:
embedded_cache:
enabled: true

tracing:
enabled: false
3 changes: 0 additions & 3 deletions docker-compose/common/config/loki/read-write-mode-logs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,3 @@ frontend:

query_scheduler:
max_outstanding_requests_per_tenant: 1024

tracing:
enabled: false
22 changes: 4 additions & 18 deletions docker-compose/common/config/tempo/datasources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,21 @@ datasources:
jsonData:
search:
hide: false
lokiSearch:
datasourceUid: logs
nodeGraph:
enabled: true
serviceMap:
datasourceUid: metrics
traceQuery:
timeShiftEnabled: true
spanStartTimeShift: '-1h'
spanEndTimeShift: '1h'
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
spanBar:
type: 'Tag'
tag: 'http.path'
tracesToMetrics:
datasourceUid: metrics
spanStartTimeShift: '-1h'
spanEndTimeShift: '1h'
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
tags: [{ key: 'service.name', value: 'service' }, { key: 'span_name' }, { key: 'http_method' }]
queries:
- name: '(R) Rate'
Expand All @@ -70,18 +68,6 @@ datasources:
query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))'
- name: '(D) Duration'
query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))'
tracesToLogsV2:
datasourceUid: logs
spanStartTimeShift: '-1h'
spanEndTimeShift: '1h'
tags: [{ key: 'service.name', value: 'service_name' }, { key: 'namespace' }, { key: 'cluster' }]
filterByTraceID: false
filterBySpanID: false
tracesToProfiles:
customQuery: false
datasourceUid: "profiles"
profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds"
tags: [{ key: 'service.name', value: 'service_name' }]

# Pyroscope for profiles
- name: Profiles
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ distributor:
receivers:
otlp:
protocols:
http:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318

ingester:
trace_idle_period: 10s
Expand All @@ -36,9 +38,6 @@ querier:

metrics_generator:
processor:
# keep all spans in the local blocks. this will allow for traceql metrics using structural queries
local_blocks:
filter_server_spans: false
span_metrics:
# Configure extra dimensions to add as metric labels.
dimensions:
Expand All @@ -54,10 +53,6 @@ metrics_generator:
- http.target
- http.status_code
- service.version
registry:
external_labels:
cluster: docker-compose
namespace: monitoring-system
storage:
path: /tmp/tempo/generator/wal
remote_write_add_org_id_header: true
Expand All @@ -69,11 +64,8 @@ metrics_generator:
X-Scope-OrgID: "anonymous"

server:
grpc_server_max_recv_msg_size: 4194304
grpc_server_max_send_msg_size: 4194304
http_listen_port: 3100
grpc_listen_port: 9095
log_level: info


storage:
Expand All @@ -93,4 +85,6 @@ storage:
overrides:
defaults:
metrics_generator:
processors: ['service-graphs', 'span-metrics']
processors:
- service-graphs
- span-metrics
27 changes: 10 additions & 17 deletions docker-compose/common/config/tempo/monolithic-mode-traces.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ distributor:
receivers:
otlp:
protocols:
http:
grpc:

endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318

ingester:
trace_idle_period: 10s
Expand All @@ -28,11 +29,12 @@ querier:
frontend_worker:
frontend_address: tempo:9095

server:
http_listen_port: 3200
grpc_listen_port: 9095

metrics_generator:
processor:
# keep all spans in the local blocks. this will allow for traceql metrics using structural queries
local_blocks:
filter_server_spans: false
span_metrics:
# Configure extra dimensions to add as metric labels.
dimensions:
Expand All @@ -48,10 +50,6 @@ metrics_generator:
- http.target
- http.status_code
- service.version
registry:
external_labels:
cluster: docker-compose
namespace: monitoring-system
storage:
path: /tmp/tempo/generator/wal
remote_write_add_org_id_header: true
Expand All @@ -62,13 +60,6 @@ metrics_generator:
headers:
X-Scope-OrgID: "anonymous"

server:
grpc_server_max_recv_msg_size: 4194304
grpc_server_max_send_msg_size: 4194304
http_listen_port: 3200
grpc_listen_port: 9095
log_level: info

storage:
trace:
backend: s3
Expand All @@ -86,4 +77,6 @@ storage:
overrides:
defaults:
metrics_generator:
processors: ['service-graphs', 'span-metrics']
processors:
- service-graphs
- span-metrics
14 changes: 3 additions & 11 deletions docker-compose/microservices-mode/traces/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,6 @@ services:
interval: 10s
timeout: 5s
retries: 5
ports:
- "3200"
- "4317"
- "4318"
- "8080"

distributor:
depends_on:
Expand Down Expand Up @@ -68,8 +63,6 @@ services:
image: *tempoImage
volumes:
- ../../common/config/tempo/microservices-mode-traces.yaml:/etc/tempo.yaml # Note: Tempo use microservices-mode-traces.yaml
ports:
- "3100"
command:
- -config.file=/etc/tempo.yaml
- -target=ingester
Expand Down Expand Up @@ -135,12 +128,11 @@ services:
- -config.expand-env=true
- -log.level=warn
environment:
- JAEGER_SERVICE_NAME=mimir
- JAEGER_AGENT_HOST=agent
- JAEGER_AGENT_PORT=6831
- JAEGER_REPORTER_MAX_QUEUE_SIZE=1000
- JAEGER_SAMPLER_PARAM=1
- JAEGER_SAMPLER_TYPE=const
- JAEGER_TAGS=cluster=docker-compose,namespace=monitoring-system
- JAEGER_SAMPLER_PARAM=1
healthcheck:
test: [ "CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:8080/ready || exit 1" ]
interval: 10s
Expand Down Expand Up @@ -174,6 +166,6 @@ services:
- GF_SECURITY_ADMIN_PASSWORD=${GF_SECURITY_ADMIN_PASSWORD:-admin_password}
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor tracesEmbeddedFlameGraph traceqlSearch correlations metricsSummary traceToMetrics traceToProfiles
- GF_TRACING_OPENTELEMETRY_OTLP_ADDRESS=agent:4317
- GF_TRACING_OPENTELEMETRY_CUSTOM_ATTRIBUTES=cluster:docker-compose,namespace:monitoring-system
- GF_TRACING_OPENTELEMETRY_CUSTOM_ATTRIBUTES=namespace:monitoring-system
ports:
- "3000:3000"
Loading