Skip to content

Commit

Permalink
Merge pull request #39 from qclaogui/metrics-generator-enabled
Browse files Browse the repository at this point in the history
Kubernetes(traces): add metrics_generator
  • Loading branch information
qclaogui authored Mar 3, 2024
2 parents 3f96999 + 7576230 commit 0bf41d0
Show file tree
Hide file tree
Showing 51 changed files with 22,711 additions and 23,982 deletions.
24 changes: 17 additions & 7 deletions docker-compose/common/config/agent-flow/monolithic-mode-all.river
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ logging {
}

tracing {
sampling_fraction = 0.1
sampling_fraction = 0.8
write_to = [otelcol.processor.batch.containers.input]
}

Expand Down Expand Up @@ -144,16 +144,16 @@ loki.source.docker "containers" {
}

loki.process "containers" {
forward_to = [module.file.docker_compose.exports.logs_receiver]

stage.drop {
longer_than = "8KB"
older_than = "12h"
}

stage.tenant {
value = "anonymous"
value = "fake"
}

forward_to = [module.file.docker_compose.exports.logs_receiver]
}

/********************************************
Expand Down Expand Up @@ -210,6 +210,10 @@ otelcol.receiver.otlp "containers" {
}

otelcol.processor.batch "containers" {
send_batch_size = 16384
send_batch_max_size = 0
timeout = "2s"

output {
metrics = [otelcol.processor.memory_limiter.containers.input]
logs = [otelcol.processor.memory_limiter.containers.input]
Expand Down Expand Up @@ -259,12 +263,15 @@ loki.process "autologging" {
source = "body"
}

forward_to = [module.file.docker_compose.exports.logs_receiver]
forward_to = [loki.process.containers.receiver]
}

otelcol.processor.memory_limiter "containers" {
check_interval = "1s"
limit = "256MiB"

// limit = "150MiB" // alternatively, set `limit_percentage` and `spike_limit_percentage`
limit_percentage = 50
spike_limit_percentage = 30

output {
metrics = [otelcol.exporter.prometheus.containers.input]
Expand All @@ -278,7 +285,7 @@ otelcol.exporter.prometheus "containers" {
}

otelcol.exporter.loki "containers" {
forward_to = [module.file.docker_compose.exports.logs_receiver]
forward_to = [loki.process.containers.receiver]
}

/********************************************
Expand All @@ -287,6 +294,9 @@ otelcol.exporter.loki "containers" {

pyroscope.scrape "containers" {
targets = [
{"__address__" = "mimir:8080", "service_name" = "mimir"},
{"__address__" = "loki:3100", "service_name" = "loki-all"},
{"__address__" = "tempo:3200", "service_name" = "tempo-all"},
{"__address__" = "grafana:6060", "service_name" = "grafana"},
]

Expand Down
26 changes: 25 additions & 1 deletion docker-compose/common/config/agent-flow/traces.river
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ otelcol.receiver.jaeger "containers" {

output {
metrics = [otelcol.processor.batch.containers.input]
logs = [otelcol.processor.batch.containers.input]
traces = [otelcol.processor.batch.containers.input]
}
}
Expand All @@ -181,27 +182,50 @@ otelcol.receiver.otlp "containers" {

output {
metrics = [otelcol.processor.batch.containers.input]
logs = [otelcol.processor.batch.containers.input]
traces = [otelcol.processor.batch.containers.input]
}
}

otelcol.processor.batch "containers" {
output {
metrics = [otelcol.processor.memory_limiter.containers.input]
logs = [otelcol.processor.memory_limiter.containers.input]
traces = [otelcol.processor.memory_limiter.containers.input]
}
}

otelcol.processor.memory_limiter "containers" {
check_interval = "1s"
limit = "256MiB"

// limit = "150MiB" // alternatively, set `limit_percentage` and `spike_limit_percentage`
limit_percentage = 50
spike_limit_percentage = 30

output {
metrics = [otelcol.exporter.prometheus.containers.input]
logs = [otelcol.exporter.loki.containers.input]
traces = [module.file.docker_compose.exports.traces_receiver]
}
}

otelcol.exporter.prometheus "containers" {
forward_to = [module.file.docker_compose.exports.metrics_receiver]
}

otelcol.exporter.loki "containers" {
forward_to = [loki.process.containers.receiver]
}

loki.process "containers" {
stage.drop {
longer_than = "8KB"
older_than = "12h"
}

stage.tenant {
value = "fake"
}

forward_to = [module.file.docker_compose.exports.logs_receiver]
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,3 @@ query_range:
embedded_cache:
enabled: true
ttl: 1h

tracing:
enabled: false
3 changes: 0 additions & 3 deletions docker-compose/common/config/loki/monolithic-mode-logs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,3 @@ chunk_store_config:
chunk_cache_config:
embedded_cache:
enabled: true

tracing:
enabled: false
3 changes: 0 additions & 3 deletions docker-compose/common/config/loki/read-write-mode-logs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,3 @@ frontend:

query_scheduler:
max_outstanding_requests_per_tenant: 1024

tracing:
enabled: false
22 changes: 4 additions & 18 deletions docker-compose/common/config/tempo/datasources.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,21 @@ datasources:
jsonData:
search:
hide: false
lokiSearch:
datasourceUid: logs
nodeGraph:
enabled: true
serviceMap:
datasourceUid: metrics
traceQuery:
timeShiftEnabled: true
spanStartTimeShift: '-1h'
spanEndTimeShift: '1h'
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
spanBar:
type: 'Tag'
tag: 'http.path'
tracesToMetrics:
datasourceUid: metrics
spanStartTimeShift: '-1h'
spanEndTimeShift: '1h'
spanStartTimeShift: '-30m'
spanEndTimeShift: '30m'
tags: [{ key: 'service.name', value: 'service' }, { key: 'span_name' }, { key: 'http_method' }]
queries:
- name: '(R) Rate'
Expand All @@ -70,18 +68,6 @@ datasources:
query: 'sum(rate(traces_spanmetrics_calls_total{$$__tags, status_code="STATUS_CODE_ERROR"}[$$__rate_interval]))'
- name: '(D) Duration'
query: 'histogram_quantile(0.9, sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[$$__rate_interval])) by (le))'
tracesToLogsV2:
datasourceUid: logs
spanStartTimeShift: '-1h'
spanEndTimeShift: '1h'
tags: [{ key: 'service.name', value: 'service_name' }, { key: 'namespace' }, { key: 'cluster' }]
filterByTraceID: false
filterBySpanID: false
tracesToProfiles:
customQuery: false
datasourceUid: "profiles"
profileTypeId: "process_cpu:cpu:nanoseconds:cpu:nanoseconds"
tags: [{ key: 'service.name', value: 'service_name' }]

# Pyroscope for profiles
- name: Profiles
Expand Down
18 changes: 6 additions & 12 deletions docker-compose/common/config/tempo/microservices-mode-traces.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ distributor:
receivers:
otlp:
protocols:
http:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318

ingester:
trace_idle_period: 10s
Expand All @@ -36,9 +38,6 @@ querier:

metrics_generator:
processor:
# keep all spans in the local blocks. this will allow for traceql metrics using structural queries
local_blocks:
filter_server_spans: false
span_metrics:
# Configure extra dimensions to add as metric labels.
dimensions:
Expand All @@ -54,10 +53,6 @@ metrics_generator:
- http.target
- http.status_code
- service.version
registry:
external_labels:
cluster: docker-compose
namespace: monitoring-system
storage:
path: /tmp/tempo/generator/wal
remote_write_add_org_id_header: true
Expand All @@ -69,11 +64,8 @@ metrics_generator:
X-Scope-OrgID: "anonymous"

server:
grpc_server_max_recv_msg_size: 4194304
grpc_server_max_send_msg_size: 4194304
http_listen_port: 3100
grpc_listen_port: 9095
log_level: info


storage:
Expand All @@ -93,4 +85,6 @@ storage:
overrides:
defaults:
metrics_generator:
processors: ['service-graphs', 'span-metrics']
processors:
- service-graphs
- span-metrics
27 changes: 10 additions & 17 deletions docker-compose/common/config/tempo/monolithic-mode-traces.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ distributor:
receivers:
otlp:
protocols:
http:
grpc:

endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318

ingester:
trace_idle_period: 10s
Expand All @@ -28,11 +29,12 @@ querier:
frontend_worker:
frontend_address: tempo:9095

server:
http_listen_port: 3200
grpc_listen_port: 9095

metrics_generator:
processor:
# keep all spans in the local blocks. this will allow for traceql metrics using structural queries
local_blocks:
filter_server_spans: false
span_metrics:
# Configure extra dimensions to add as metric labels.
dimensions:
Expand All @@ -48,10 +50,6 @@ metrics_generator:
- http.target
- http.status_code
- service.version
registry:
external_labels:
cluster: docker-compose
namespace: monitoring-system
storage:
path: /tmp/tempo/generator/wal
remote_write_add_org_id_header: true
Expand All @@ -62,13 +60,6 @@ metrics_generator:
headers:
X-Scope-OrgID: "anonymous"

server:
grpc_server_max_recv_msg_size: 4194304
grpc_server_max_send_msg_size: 4194304
http_listen_port: 3200
grpc_listen_port: 9095
log_level: info

storage:
trace:
backend: s3
Expand All @@ -86,4 +77,6 @@ storage:
overrides:
defaults:
metrics_generator:
processors: ['service-graphs', 'span-metrics']
processors:
- service-graphs
- span-metrics
14 changes: 3 additions & 11 deletions docker-compose/microservices-mode/traces/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,6 @@ services:
interval: 10s
timeout: 5s
retries: 5
ports:
- "3200"
- "4317"
- "4318"
- "8080"

distributor:
depends_on:
Expand Down Expand Up @@ -68,8 +63,6 @@ services:
image: *tempoImage
volumes:
- ../../common/config/tempo/microservices-mode-traces.yaml:/etc/tempo.yaml # Note: Tempo use microservices-mode-traces.yaml
ports:
- "3100"
command:
- -config.file=/etc/tempo.yaml
- -target=ingester
Expand Down Expand Up @@ -135,12 +128,11 @@ services:
- -config.expand-env=true
- -log.level=warn
environment:
- JAEGER_SERVICE_NAME=mimir
- JAEGER_AGENT_HOST=agent
- JAEGER_AGENT_PORT=6831
- JAEGER_REPORTER_MAX_QUEUE_SIZE=1000
- JAEGER_SAMPLER_PARAM=1
- JAEGER_SAMPLER_TYPE=const
- JAEGER_TAGS=cluster=docker-compose,namespace=monitoring-system
- JAEGER_SAMPLER_PARAM=1
healthcheck:
test: [ "CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:8080/ready || exit 1" ]
interval: 10s
Expand Down Expand Up @@ -174,6 +166,6 @@ services:
- GF_SECURITY_ADMIN_PASSWORD=${GF_SECURITY_ADMIN_PASSWORD:-admin_password}
- GF_FEATURE_TOGGLES_ENABLE=traceqlEditor tracesEmbeddedFlameGraph traceqlSearch correlations metricsSummary traceToMetrics traceToProfiles
- GF_TRACING_OPENTELEMETRY_OTLP_ADDRESS=agent:4317
- GF_TRACING_OPENTELEMETRY_CUSTOM_ATTRIBUTES=cluster:docker-compose,namespace:monitoring-system
- GF_TRACING_OPENTELEMETRY_CUSTOM_ATTRIBUTES=namespace:monitoring-system
ports:
- "3000:3000"
Loading

0 comments on commit 0bf41d0

Please sign in to comment.