From 86b0ea6dd6563deb3dab36279ea25a9efd368540 Mon Sep 17 00:00:00 2001 From: Weifeng Wang Date: Mon, 11 Mar 2024 19:20:34 +0800 Subject: [PATCH 1/3] Docker Compose(logs): Log Ingestion via Labels Signed-off-by: Weifeng Wang --- .../common/config/agent-flow/logs.river | 72 ++------- .../agent-flow/modules/docker/README.md | 24 +++ .../agent-flow/modules/docker/logs/all.river | 118 +++++++++++++++ .../docker/logs/drops/level-debug.river | 28 ++++ .../docker/logs/drops/level-info.river | 28 ++++ .../docker/logs/drops/level-trace.river | 28 ++++ .../modules/docker/logs/drops/levels.river | 36 +++++ .../docker/logs/labels/keep-labels.river | 52 +++++++ .../docker/logs/labels/log-level.river | 105 +++++++++++++ .../modules/docker/logs/log-formats/all.river | 116 +++++++++++++++ .../docker/logs/log-formats/common-log.river | 71 +++++++++ .../docker/logs/log-formats/dotnet.river | 76 ++++++++++ .../docker/logs/log-formats/istio.river | 78 ++++++++++ .../docker/logs/log-formats/json.river | 72 +++++++++ .../docker/logs/log-formats/klog.river | 106 +++++++++++++ .../docker/logs/log-formats/log4j.river | 140 ++++++++++++++++++ .../docker/logs/log-formats/logfmt.river | 73 +++++++++ .../docker/logs/log-formats/otel.river | 76 ++++++++++ .../docker/logs/log-formats/postgres.river | 73 +++++++++ .../docker/logs/log-formats/python.river | 78 ++++++++++ .../docker/logs/log-formats/spring-boot.river | 80 ++++++++++ .../docker/logs/log-formats/syslog.river | 46 ++++++ .../docker/logs/log-formats/zerolog.river | 122 +++++++++++++++ .../modules/docker/logs/masks/all.river | 60 ++++++++ .../docker/logs/masks/credit-card.river | 35 +++++ .../modules/docker/logs/masks/email.river | 29 ++++ .../modules/docker/logs/masks/ipv4.river | 29 ++++ .../modules/docker/logs/masks/ipv6.river | 29 ++++ .../modules/docker/logs/masks/phone.river | 29 ++++ .../modules/docker/logs/masks/ssn.river | 29 ++++ .../metrics/post-process-bytes-lines.river | 39 +++++ .../metrics/pre-process-bytes-lines.river | 74 +++++++++ .../modules/docker/logs/relabelings.river | 65 ++++++++ .../modules/docker/logs/scrubs/all.river | 28 ++++ .../docker/logs/scrubs/json-empties.river | 32 ++++ .../docker/logs/scrubs/json-nulls.river | 31 ++++ .../logs/targets/logs-from-docker.river | 40 +++++ .../modules/docker/relabelings/common.river | 87 +++++++++++ .../agent-flow/modules/docker_compose.river | 28 ++-- .../monolithic-mode/logs/docker-compose.yaml | 5 + 40 files changed, 2295 insertions(+), 72 deletions(-) create mode 100644 docker-compose/common/config/agent-flow/modules/docker/README.md create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/all.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-debug.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-info.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-trace.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/drops/levels.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/labels/keep-labels.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/labels/log-level.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/all.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/common-log.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/dotnet.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/istio.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/json.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/klog.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/log4j.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/logfmt.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/otel.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/postgres.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/python.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/spring-boot.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/syslog.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/zerolog.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/masks/all.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/masks/credit-card.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/masks/email.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/masks/ipv4.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/masks/ipv6.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/masks/phone.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/masks/ssn.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/metrics/post-process-bytes-lines.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/metrics/pre-process-bytes-lines.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/relabelings.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/all.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/json-empties.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/json-nulls.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/logs/targets/logs-from-docker.river create mode 100644 docker-compose/common/config/agent-flow/modules/docker/relabelings/common.river diff --git a/docker-compose/common/config/agent-flow/logs.river b/docker-compose/common/config/agent-flow/logs.river index 825dbf43..ffa6a59c 100644 --- a/docker-compose/common/config/agent-flow/logs.river +++ b/docker-compose/common/config/agent-flow/logs.river @@ -1,10 +1,14 @@ // https://github.com/grafana/agent-configurator logging { - level = "warn" + level = "info" format = "logfmt" } +/******************************************** + * LGTMP Receiver provider + ********************************************/ + module.file "docker_compose" { filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker_compose.river" @@ -13,72 +17,14 @@ module.file "docker_compose" { } } -discovery.relabel "containers" { - targets = module.file.docker_compose.exports.relabelings_common.output -} - /******************************************** * Logs ********************************************/ -loki.source.docker "containers" { - host = "unix:///var/run/docker.sock" - targets = discovery.relabel.containers.output - relabel_rules = discovery.relabel.containers.rules - forward_to = [loki.process.containers.receiver] -} - -loki.process "containers" { - forward_to = [module.file.docker_compose.exports.logs_receiver] - - stage.drop { - longer_than = "8KB" - older_than = "12h" - } - - stage.tenant { - value = "anonymous" - } -} - -/******************************************** - * Otelcol for Logs - ********************************************/ - -otelcol.receiver.otlp "containers" { - grpc { - endpoint = "0.0.0.0:4317" - } - - http { - endpoint = "0.0.0.0:4318" - } - - output { - logs = [otelcol.processor.batch.containers.input] - } -} - -otelcol.processor.batch "containers" { - send_batch_size = 16384 - send_batch_max_size = 0 - timeout = "2s" +module.file "logs_primary" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/all.river" - output { - logs = [otelcol.processor.memory_limiter.containers.input] - } -} - -otelcol.processor.memory_limiter "containers" { - check_interval = "1s" - limit_percentage = 50 - spike_limit_percentage = 30 - - output { - logs = [otelcol.exporter.loki.containers.input] + arguments { + forward_to = [module.file.docker_compose.exports.logs_receiver] } } - -otelcol.exporter.loki "containers" { - forward_to = [loki.process.containers.receiver] -} diff --git a/docker-compose/common/config/agent-flow/modules/docker/README.md b/docker-compose/common/config/agent-flow/modules/docker/README.md new file mode 100644 index 00000000..60af20e5 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/README.md @@ -0,0 +1,24 @@ +# Docker Compose Modules + +## Logs + +The following service labels are supported: + +| Label | Description | +| :--------------- | :-----------| +| `logs.agent.grafana.com/scrape` | Allow a service to declare it's logs should be dropped. | +| `logs.agent.grafana.com/tenant` | Allow a service to override the tenant for its logs. | +| `logs.agent.grafana.com/log-format` | If specified additional processing is performed to extract details based on the specified format. This value can be a comma-delimited list, in the instances a pod may have multiple containers. The following formats are currently supported: | +| `logs.agent.grafana.com/scrub-level` | Boolean whether or not the level should be dropped from the log message (as it is a label). | +| `logs.agent.grafana.com/scrub-timestamp` | Boolean whether or not the timestamp should be dropped from the log message (as it is metadata). | +| `logs.agent.grafana.com/scrub-nulls` | Boolean whether or not keys with null values should be dropped from json, reducing the size of the log message. | +| `logs.agent.grafana.com/scrub-empties` | Boolean whether or not keys with empty values (`"", [], {}`) should be dropped from json, reducing the size of the log message. | +| `logs.agent.grafana.com/drop-info` | Boolean whether or not info messages should be dropped (default is `false`), but a pod can override this temporarily or permanently. | +| `logs.agent.grafana.com/drop-debug` | Boolean whether or not debug messages should be dropped (default is `true`), but a pod can override this temporarily or permanently. | +| `logs.agent.grafana.com/drop-trace` | Boolean whether or not trace messages should be dropped (default is `true`), but a pod can override this temporarily or permanently. | +| `logs.agent.grafana.com/mask-ssn` | Boolean whether or not to mask SSNs in the log line, if true the data will be masked as `*SSN*salt*` | +| `logs.agent.grafana.com/mask-credit-card` | Boolean whether or not to mask credit cards in the log line, if true the data will be masked as `*credit-card*salt*` | +| `logs.agent.grafana.com/mask-email` | Boolean whether or not to mask emails in the log line, if true the data will be masked as`*email*salt*` | +| `logs.agent.grafana.com/mask-ipv4` | Boolean whether or not to mask IPv4 addresses in the log line, if true the data will be masked as`*ipv4*salt*` | +| `logs.agent.grafana.com/mask-ipv6` | Boolean whether or not to mask IPv6 addresses in the log line, if true the data will be masked as `*ipv6*salt*` | +| `logs.agent.grafana.com/mask-phone` | Boolean whether or not to mask phone numbers in the log line, if true the data will be masked as `*phone*salt*` | diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/all.river b/docker-compose/common/config/agent-flow/modules/docker/logs/all.river new file mode 100644 index 00000000..9b21738c --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/all.river @@ -0,0 +1,118 @@ +/* +Module: log-all +Description: Wrapper module to include all docker logging modules and use cri parsing +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +argument "tenant" { + // comment = "The tenant to filter logs to. This does not have to be the tenantId, this is the value to look for in the logs.agent.grafana.com/tenant annotation, and this can be a regex." + optional = true + default = ".*" +} + +argument "keep_labels" { + // comment = "List of labels to keep before the log message is written to Loki" + optional = true + default = [ + "app", + "cluster", + "component", + "container", + "deployment", + "env", + "filename", + "instance", + "job", + "level", + "log_type", + "namespace", + "region", + "service", + "squad", + "team", + ] +} + +module.file "log_targets" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/targets/logs-from-docker.river" + + arguments { + forward_to = [module.file.log_formats_all.exports.process.receiver] + tenant = argument.tenant.value + } +} + +module.file "log_formats_all" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/all.river" + + arguments { + forward_to = [module.file.log_level_default.exports.process.receiver] + } +} + +module.file "log_level_default" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/labels/log-level.river" + + arguments { + // here we fork, one branch goes to the log level module, the other goes to the metrics module + // this is because we need to reduce the labels on the pre-metrics but they are still necessary in + // downstream modules + forward_to = [ + module.file.pre_process_metrics.exports.process.receiver, + module.file.drop_levels.exports.process.receiver, + ] + } +} + +module.file "pre_process_metrics" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/metrics/pre-process-bytes-lines.river" + + arguments { + forward_to = [module.file.drop_levels.exports.process.receiver] + keep_labels = argument.keep_labels.value + } +} + +module.file "drop_levels" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/drops/levels.river" + + arguments { + forward_to = [module.file.scrub_all.exports.process.receiver] + } +} + +module.file "scrub_all" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/scrubs/all.river" + + arguments { + forward_to = [module.file.mask_all.exports.process.receiver] + } +} + +module.file "mask_all" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/masks/all.river" + + arguments { + forward_to = [module.file.label_keep.exports.process.receiver] + } +} + +module.file "label_keep" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/labels/keep-labels.river" + + arguments { + forward_to = [module.file.post_process_metrics.exports.process.receiver] + keep_labels = argument.keep_labels.value + } +} + +module.file "post_process_metrics" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/metrics/post-process-bytes-lines.river" + + arguments { + forward_to = argument.forward_to.value + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-debug.river b/docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-debug.river new file mode 100644 index 00000000..0d3b63a3 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-debug.river @@ -0,0 +1,28 @@ +/* +Module: drop-debug +Description: The default behavior is to drop debug level messaging automatically, however, debug level + messages can still be logged by adding the annotation: + + logs.agent.grafana.com/drop-debug: false +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.drop_debug +} + +loki.process "drop_debug" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/drop-debug annotation, if not set or set to true then drop + // any log message with level=debug + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/drop-debug: true" + selector = "{level=~\"(?i)debug?\",logs_agent_grafana_com_drop_debug!=\"false\"}" + action = "drop" + drop_counter_reason = "debug" + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-info.river b/docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-info.river new file mode 100644 index 00000000..4173934e --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-info.river @@ -0,0 +1,28 @@ +/* +Module: drop-info +Description: The default behavior is to keep info level messaging automatically, however, info level + messages can dropped by adding the annotation: + + logs.agent.grafana.com/drop-info: true +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.drop_info +} + +loki.process "drop_info" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/drop-info annotation, if not set or set to true then drop + // any log message with level=info + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/drop-info: true" + selector = "{level=~\"(?i)info?\",logs_agent_grafana_com_drop_info=\"true\"}" + action = "drop" + drop_counter_reason = "info" + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-trace.river b/docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-trace.river new file mode 100644 index 00000000..6d62e77d --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/drops/level-trace.river @@ -0,0 +1,28 @@ +/* +Module: drop-trace +Description: The default behavior is to drop trace level messaging automatically, however, trace level + messages can still be logged by adding the annotation: + + logs.agent.grafana.com/drop-trace: false +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.drop_trace +} + +loki.process "drop_trace" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/drop-trace annotation, if not set or set to true then drop + // any log message with level=trace + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/drop-trace: true" + selector = "{level=~\"(?i)trace?\",logs_agent_grafana_com_drop_trace!=\"false\"}" + action = "drop" + drop_counter_reason = "trace" + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/drops/levels.river b/docker-compose/common/config/agent-flow/modules/docker/logs/drops/levels.river new file mode 100644 index 00000000..5d42e922 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/drops/levels.river @@ -0,0 +1,36 @@ +/* +Module: drop-levels +Description: Wrapper module to include all drop level modules +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = module.file.drop_trace.exports.process +} + +module.file "drop_trace" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/drops/level-trace.river" + + arguments { + forward_to = [module.file.drop_debug.exports.process.receiver] + } +} + +module.file "drop_debug" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/drops/level-debug.river" + + arguments { + forward_to = [module.file.drop_info.exports.process.receiver] + } +} + +module.file "drop_info" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/drops/level-info.river" + + arguments { + forward_to = argument.forward_to.value + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/labels/keep-labels.river b/docker-compose/common/config/agent-flow/modules/docker/logs/labels/keep-labels.river new file mode 100644 index 00000000..e39bd395 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/labels/keep-labels.river @@ -0,0 +1,52 @@ +/* +Module: keep-labels +Description: Pre-defined set of labels to keep, this stage should always be in-place as the previous relabeing + stages make every pod label and annotation a label in the pipeline, which we do not want created + in Loki as that would have extremely high-cardinality. +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +argument "keep_labels" { + optional = true + // comment = "List of labels to keep before the log message is written to Loki" + default = [ + "app", + "cluster", + "component", + "container", + "deployment", + "env", + "filename", + "instance", + "job", + "level", + "log_type", + "namespace", + "region", + "service", + "squad", + "team", + ] +} + +export "process" { + value = loki.process.keep_labels +} + +/* +As all of the pod labels and annotations we transformed into labels in the previous relabelings to make +them available to the pipeline processing we need to ensure they are not automatically created in Loki. +This would result in an extremely high number of labels and values severely impacting query performance. +Not every log has to contain these labels, but this list should reflect the set of labels that you want +to explicitly allow. +*/ +loki.process "keep_labels" { + forward_to = argument.forward_to.value + + stage.label_keep { + values = argument.keep_labels.value + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/labels/log-level.river b/docker-compose/common/config/agent-flow/modules/docker/logs/labels/log-level.river new file mode 100644 index 00000000..f4af22fe --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/labels/log-level.river @@ -0,0 +1,105 @@ +/* +Module: log-level +Description: Sets a default log level of "unknown", then based on known patterns attempts to assign an appropriate log + log level based on the contents of the log line. This should be considered as default/initial processing + as there are modules for parsing specific log patterns. +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_level +} + +loki.process "log_level" { + forward_to = argument.forward_to.value + + // if a log level is not set, default it to unknown + stage.match { + selector = "{level=\"\"}" + + // default level to unknown + stage.static_labels { + values = { + level = "unknown", + } + } + } + + // if a log_type is not set, default it to unknown + stage.match { + selector = "{log_type=\"\"}" + + // default level to unknown + stage.static_labels { + values = { + log_type = "unknown", + } + } + } + + // check to see if the log line matches the klog format (https://github.com/kubernetes/klog) + stage.match { + // unescaped regex: ([IWED][0-9]{4}\s+[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+) + selector = "{level=\"unknown\"} |~ \"([IWED][0-9]{4}\\\\s+[0-9]{2}:[0-9]{2}:[0-9]{2}\\\\.[0-9]+)\"" + + // extract log level, klog uses a single letter code for the level followed by the month and day i.e. I0119 + stage.regex { + expression = "((?P[A-Z])[0-9])" + } + + // if the extracted level is I set INFO + stage.replace { + source = "level" + expression = "(I)" + replace = "INFO" + } + + // if the extracted level is W set WARN + stage.replace { + source = "level" + expression = "(W)" + replace = "WARN" + } + + // if the extracted level is E set ERROR + stage.replace { + source = "level" + expression = "(E)" + replace = "ERROR" + } + + // if the extracted level is I set INFO + stage.replace { + source = "level" + expression = "(D)" + replace = "DEBUG" + } + + // set the extracted level to be a label + stage.labels { + values = { + level = "", + } + } + } + + // if the level is still unknown, do one last attempt at detecting it based on common levels + stage.match { + selector = "{level=\"unknown\"}" + + // unescaped regex: (?i)(?:"(?:level|loglevel|levelname|lvl|SeverityText)":\s*"|\s+(?:level|loglevel|lvl)="?|\s+\[?)(?P(DEBUG?|INFO|WARN(ING)?|ERR(OR)?|CRITICAL|FATAL|NOTICE|TRACE))("|\s+|-|\s*\]) + stage.regex { + expression = "(?i)(?:\"(?:level|loglevel|levelname|lvl|SeverityText)\":\\s*\"|\\s+(?:level|loglevel|lvl)=\"?|\\s+\\[?)(?P(DEBUG?|INFO|WARN(ING)?|ERR(OR)?|CRITICAL|FATAL|NOTICE|TRACE))(\"|\\s+|-|\\s*\\])" + } + + // set the extracted level to be a label + stage.labels { + values = { + level = "", + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/all.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/all.river new file mode 100644 index 00000000..f42a97ba --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/all.river @@ -0,0 +1,116 @@ +/* +Module: log-format-all +Description: Wrapper module to include all log-format modules +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = module.file.log_format_common_log.exports.process +} + +module.file "log_format_common_log" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/common-log.river" + + arguments { + forward_to = [module.file.log_format_dotnet.exports.process.receiver] + } +} + +module.file "log_format_dotnet" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/dotnet.river" + + arguments { + forward_to = [module.file.log_format_istio.exports.process.receiver] + } +} + +module.file "log_format_istio" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/istio.river" + + arguments { + forward_to = [module.file.log_format_json.exports.process.receiver] + } +} + +module.file "log_format_json" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/json.river" + + arguments { + forward_to = [module.file.log_format_klog.exports.process.receiver] + } +} + +module.file "log_format_klog" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/klog.river" + + arguments { + forward_to = [module.file.log_format_log4j.exports.process.receiver] + } +} + +module.file "log_format_log4j" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/log4j.river" + + arguments { + forward_to = [module.file.log_format_logfmt.exports.process.receiver] + } +} + +module.file "log_format_logfmt" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/logfmt.river" + + arguments { + forward_to = [module.file.log_format_otel.exports.process.receiver] + } +} + +module.file "log_format_otel" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/otel.river" + + arguments { + forward_to = [module.file.log_format_postgres.exports.process.receiver] + } +} + +module.file "log_format_postgres" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/postgres.river" + + arguments { + forward_to = [module.file.log_format_python.exports.process.receiver] + } +} + +module.file "log_format_python" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/python.river" + + arguments { + forward_to = [module.file.log_format_spring_boot.exports.process.receiver] + } +} + +module.file "log_format_spring_boot" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/spring-boot.river" + + arguments { + forward_to = [module.file.log_format_syslog.exports.process.receiver] + } +} + +module.file "log_format_syslog" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/syslog.river" + + arguments { + forward_to = [module.file.log_format_zerolog.exports.process.receiver] + } +} + +module.file "log_format_zerolog" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/log-formats/zerolog.river" + + arguments { + forward_to = argument.forward_to.value + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/common-log.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/common-log.river new file mode 100644 index 00000000..b524fe4a --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/common-log.river @@ -0,0 +1,71 @@ +/* +Module: log-format-clf +Description: Log Processing for common-log (apache/nginx) +Docs: https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_clf +} + +loki.process "log_format_clf" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains clf and the line matches the format, then process the line as clf + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: clf" + // unescaped regex: \S+\s+\S+\s+\S+\s+\[\S+\s+\S+\]\s+"[^"]+"\s+\d+\s+\d+ + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*((apache|nginx|common-?log|clf)).*\"} |~ \"^\\\\S+\\\\s+\\\\S+\\\\s+\\\\S+\\\\s+\\\\[\\\\S+\\\\s+\\\\S+\\\\]\\\\s+\\\"[^\\\"]+\\\"\\\\s+\\\\d+\\\\s+\\\\d+$\"" + + // clf doesn't have a log level, set default to info, set the log_type + stage.static_labels { + values = { + level = "info", + log_type = "clf", + } + } + + // extract the http response code and request method as they might want to be used as labels + stage.regex { + // unescaped regex: (?P\d{3}) "(?P\S+) + expression = "(?P[0-9]{3}) \"(?P\\S+)" + } + + // set the extracted response code and request method as labels + stage.labels { + values = { + response_code = "", + request_method = "", + } + } + + // check to see if the string failed is found in the log line, if so set the level to error + stage.match { + selector = "{logs_agent_grafana_com_log_format=~\"(?i)(apache|nginx|common-log|clf)\"} |~ \" (failed|error) \"" + + stage.static_labels { + values = { + level = "error", + } + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + // unescaped regex: (\[([^\]]+)\]) + stage.replace { + expression = "(\\[([^\\]]+)\\])" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/dotnet.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/dotnet.river new file mode 100644 index 00000000..7ae1e1b6 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/dotnet.river @@ -0,0 +1,76 @@ +/* +Module: log-format-dotnet +Description: Log Processing for .Net +Docs: https://learn.microsoft.com/en-us/dotnet/core/extensions/console-log-formatter#json +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_dotnet +} + +loki.process "log_format_dotnet" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains python-json and the line matches the format, then process the line as python-json + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: dotnet-json" + // unescaped regex: ^\s*\{.+\}\s*$ + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*(dotnet-?json).*\"} |~ \"^\\\\s*\\\\{.+\\\\}\\\\s*$\"" + + // set the log_type + stage.static_labels { + values = { + log_type = "dotnet", + } + } + + // extract the level, response_code, method if they exist + stage.json { + expressions = { + level = "LogLevel", + category = "Category", + } + } + + // set the extracted level and category as labels + stage.labels { + values = { + level = "", + category = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + // remove timestamp from the log line, depending on the entry it can be "start_time" or "time" + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + // unescaped regex: (?i)("(Timestamp)"\s*:\s*\[?"[^"]+"\]?,?) + stage.replace { + expression = "(?i)(\"(Timestamp)\"\\s*:\\s*\\[?\"[^\"]+\"\\]?,?)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: (?i)"LogLevel"\s*:\s*"[^"]+",? + expression = "(?i)(\"LogLevel\"\\s*:\\s*\"[^\"]+\",?)" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/istio.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/istio.river new file mode 100644 index 00000000..aaa5e99e --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/istio.river @@ -0,0 +1,78 @@ +/* +Module: log-format-istio +Description: Log Processing for istio +Docs: https://istio.io/latest/docs/tasks/observability/logs/access-log/ +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_istio +} + +loki.process "log_format_istio" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains istio and the line matches the format, then process the line as json + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: istio" + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*(istio-?(json)?).*\"} |~ \"^\\\\s*\\\\{.+\\\\}\\\\s*$\"" + + // not all istio logs contain a level, default to info and set the log_type + stage.static_labels { + values = { + level = "info", + log_type = "istio", + } + } + + // extract the level, response_code, method if they exist + stage.json { + expressions = { + level = "level", + response_code = "response_code", + request_method = "method", + } + } + + // set the extracted level, response code and request method as labels + stage.labels { + values = { + level = "", + response_code = "", + request_method = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + // remove timestamp from the log line, depending on the entry it can be "start_time" or "time" + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + // unescaped regex: ("(start_)?time"\s*:\s*"[^"]+",) + stage.replace { + expression = "(\"(start_)?time\"\\s*:\\s*\"[^\"]+\",)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: "level"\s*:\s*"[^"]+",? + expression = "(?i)(\"level\"\\s*:\\s*\"[^\"]+\",?)" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/json.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/json.river new file mode 100644 index 00000000..7777745c --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/json.river @@ -0,0 +1,72 @@ +/* +Module: log-format-json +Description: Log Processing for Generic JSON +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_json +} + +loki.process "log_format_json" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains json and the line matches the format, then process the line as json + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: json" + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*((generic-?)?json).*\"} |~ \"^\\\\s*\\\\{.+\\\\}\\\\s*$\"" + + // set the log_type + stage.static_labels { + values = { + log_type = "json", + } + } + + // extract the level + stage.json { + expressions = { + level = "level || lvl || loglevel || LogLevel || log_level || logLevel || log_lvl || logLvl || levelname || levelName || LevelName", + } + } + + // set the extracted level as a label + stage.labels { + values = { + level = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + // remove timestamp from the log line, depending on the entry it can be "start_time" or "time" + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + // unescaped regex: (?i)("(timestamp|ts|logdate|time)"\s*:\s*"[^"]+",?) + stage.replace { + expression = "(?i)(\"(timestamp|ts|logdate|time)\"\\s*:\\s*\"[^\"]+\",?)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: (?i)"(log)?(level|lvl)"\s*:\s*"[^"]+",? + expression = "(?i)(\"(log)?(level|lvl)\"\\s*:\\s*\"[^\"]+\",?)" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/klog.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/klog.river new file mode 100644 index 00000000..f2238398 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/klog.river @@ -0,0 +1,106 @@ +/* +Module: log-format-klog +Description: Log Processing for klog (used by kube-state-metrics and more in kube-system) +Docs: https://github.com/kubernetes/klog +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_klog +} + +loki.process "log_format_klog" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains klog and the line matches the format, then process the line as + // a klog (https://github.com/kubernetes/klog) + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: klog" + // unescaped regex: ^[IWED]\d+\s+\d{2}:\d{2}:\d{2}\.\d+\s+\d+\s+\S+:\d+\]\s+.*$ + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*(klog).*\"} |~ \"^[IWED]\\\\d+\\\\s+\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d+\\\\s+\\\\d+\\\\s+\\\\S+:\\\\d+\\\\]\\\\s+.*$\"" + + // set the log_type + stage.static_labels { + values = { + log_type = "klog", + } + } + + // extract log level, klog uses a single letter code for the level followed by the month and day i.e. I0119 + stage.regex { + expression = "((?P[A-Z])[0-9])" + } + + // extract log level, klog uses a single letter code for the level followed by the month and day i.e. I0119 + stage.regex { + expression = "((?P[A-Z])[0-9])" + } + + // if the extracted level is I set INFO + stage.replace { + source = "level" + expression = "(I)" + replace = "INFO" + } + + // if the extracted level is W set WARN + stage.replace { + source = "level" + expression = "(W)" + replace = "WARN" + } + + // if the extracted level is E set ERROR + stage.replace { + source = "level" + expression = "(E)" + replace = "ERROR" + } + + // if the extracted level is D set DEBUG + stage.replace { + source = "level" + expression = "(D)" + replace = "DEBUG" + } + + // set the extracted level to be a label + stage.labels { + values = { + level = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + + // unescaped regex: ([0-9]{4}\s+[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]+\s+) + stage.replace { + expression = "([0-9]{4}\\s+[0-9]{2}:[0-9]{2}:[0-9]{2}\\.[0-9]+\\s+)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: (log)?(lvl|level)="?[^\s]+\s"? + expression = "(^(I|W|E|D))" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/log4j.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/log4j.river new file mode 100644 index 00000000..3a534585 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/log4j.river @@ -0,0 +1,140 @@ +/* +Module: log-format-log4j +Description: Log Processing for Log4j +Docs: https://logging.apache.org/log4j/2.x/manual/layouts.html#json-template-layout + https://github.com/logstash/log4j-jsonevent-layout +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_log4j +} + +loki.process "log_format_log4j" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains log4j-json and the line matches the format, then process the line as log4j-json + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: log4j-json" + selector = "{logs_agent_grafana_com_log_format=~\"(?i).*(log4j-?json).*\"} |~ \"^\\\\s*\\\\{.+\\\\}\\\\s*$\"" + + // set the log_type + stage.static_labels { + values = { + log_type = "log4j", + } + } + + stage.json { + expressions = { + level = "level", + thread = "thread_name", + logger = "logger_name", + class = "class", + timestamp = "[\"@timestamp\"]", + } + } + + // set the extracted values as labels so they can be used by downstream components, most likely several labels + // will be dropped before being written to Loki + stage.labels { + values = { + level = "", + thread = "", + logger = "", + class = "", + timestamp = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + // remove timestamp from the log line, depending on the entry it can be "start_time" or "time" + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + // unescaped regex: (?i)("@?timestamp"\s*:\s*"[^"]+",) + stage.replace { + expression = "(?i)(\"@?timestamp\"\\s*:\\s*\"[^\"]+\",)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: ("level"\s*:\s*"[^"]+",) + expression = "(\"level\"\\s*:\\s*\"[^\"]+\",)" + replace = "" + } + } + } + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains log4j-text and the line matches the format, then process the line as log4j-text + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: log4j-text" + // unescaped regex: ^\d{4}[^\[]+\[\S+\]\s+\w+\s+\S+\s+-\s+.*$ + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*(log4j(-?te?xt)?).*\"} |~ \"^\\\\d{4}[^\\\\[]+\\\\[\\\\S+\\\\]\\\\s+\\\\w+\\\\s+\\\\S+\\\\s+-\\\\s+.*$\"" + + // set the log_type + stage.static_labels { + values = { + log_type = "log4j", + } + } + + // extract the timestamp, level, traceId, spanId, processId, thread, logger from the log line + stage.regex { + // unescaped regex: (?P[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|(\+|-)[0-9]+)?)\s+(?P\w+)\s+\[(?P[^]]+)\] + expression = "(?P[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]+)?(Z|(\\+|-)[0-9]+)?)\\s+(?P\\w+)\\s+\\[(?P[^]]+)\\]" + } + + // set the extracted values as labels so they can be used by downstream components, most likely several labels + // will be dropped before being written to Loki + stage.labels { + values = { + level = "", + thread = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + // remove timestamp from the log line, depending on the entry it can be "start_time" or "time" + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + // unescaped regex: ([0-9]{4}-[0-9]{2}-[0-9]{2}(T|\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|(\+|-)[0-9]+)?) + stage.replace { + expression = "([0-9]{4}-[0-9]{2}-[0-9]{2}(T|\\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]+)?(Z|(\\+|-)[0-9]+)?)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: (\[?(DEBUG|INFO|WARN|ERROR|FATAL|TRACE)\]\s*) + expression = "(\\[?(DEBUG|INFO|WARN|ERROR|FATAL|TRACE)\\]\\s*)" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/logfmt.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/logfmt.river new file mode 100644 index 00000000..09c188e8 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/logfmt.river @@ -0,0 +1,73 @@ +/* +Module: log-format-logfmt +Description: Handles formatting for log format of logfmt which is the default Golang format +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_logfmt +} + +loki.process "log_format_logfmt" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains logfmt and the line matches the format, then process the line as + // a logfmt (https://github.com/go-logfmt/logfmt) + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: logfmt" + // unescaped regex: (\w+=("[^"]*"|\S+))(\s+(\w+=("[^"]*"|\S+)))*\s* + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*(logfmt).*\"} |~ \"(\\\\w+=(\\\"[^\\\"]*\\\"|\\\\S+))(\\\\s+(\\\\w+=(\\\"[^\\\"]*\\\"|\\\\S+)))*\\\\s*\"" + + // set the log_type + stage.static_labels { + values = { + log_type = "logfmt", + } + } + + // while the level could be extracted as logfmt, this allows for multiple possible log levels formats + // i.e. loglevel=info, level=info, lvl=info, loglvl=info + stage.regex { + expression = "(log)?(level|lvl)=\"?(?P\\S+)\"?" + } + + // set the extracted level value as a label + stage.labels { + values = { + level = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + + // unescaped regex: ((ts?|timestamp)=\d{4}-\d{2}-\d{2}(T|\s+)\d{2}:\d{2}:\d{2}(\.\d+)?(Z|(\+|-)\d+)?\s+) + stage.replace { + expression = "((ts?|timestamp)=[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]+)?(Z|(\\+|-)[0-9]+)?\\s+)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: (log)?(lvl|level)="?[^\s]+\s"? + expression = "(?i)((log)?(lvl|level)=\"?[^\\s]+\\s\"?)" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/otel.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/otel.river new file mode 100644 index 00000000..6b2a50ac --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/otel.river @@ -0,0 +1,76 @@ +/* +Module: log-format-otel +Description: Log Processing for OpenTelemetry +Docs: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/logs/data-model.md +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_otel +} + +loki.process "log_format_otel" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains otel and the line matches the format, then process the line as otel + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: otel" + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*((otel|open-?telemetry)(-?json)).*\"} |~ \"^\\\\s*\\\\{.+\\\\}\\\\s*$\"" + + // set the log_type + stage.static_labels { + values = { + log_type = "otel", + } + } + + // extract the SeverityText (level), and service.name + // Docs: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/resource/semantic_conventions/README.md#service + stage.json { + expressions = { + level = "SeverityText", + service = "Resource.\"service.name\"", + } + } + + // set the extracted level and service as labels + stage.labels { + values = { + level = "", + service = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + // remove timestamp from the log line, depending on the entry it can be "start_time" or "time" + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + // unescaped regex: ("Timestamp"\s*:\s*"[^"]+",) + stage.replace { + expression = "(\"Timestamp\"\\s*:\\s*\"[^\"]+\",)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: ("SeverityText"\s*:\s*"[^"]+",) + expression = "(?i)(\"SeverityText\"\\s*:\\s*\"[^\"]+\",)" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/postgres.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/postgres.river new file mode 100644 index 00000000..eae2f7e6 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/postgres.river @@ -0,0 +1,73 @@ +/* +Module: log-format-postgres +Description: Handles formatting for log format of Postgres +Docs: https://www.postgresql.org/docs/current/runtime-config-logging.html +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_postgres +} + +loki.process "log_format_postgres" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains postgres then process the line + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: postgres" + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*(postgres).*\"}" + + // set the log_type + stage.static_labels { + values = { + log_type = "postgres", + } + } + + // extract the level and process_id from the log + // unescaped regex: \[?(?P\d{4}-\d{2}-\d{2}(T|\s+)\d{2}:\d{2}:\d{2}.\d+\s+\w+)\]?\s+(\[(?P\d+)\]\s+|.+)(?P(INFO|NOTICE|WARNING|ERROR|LOG|FATAL|PANIC|DEBUG)\d*):\s* + stage.regex { + expression = "\\[?(?P\\d{4}-\\d{2}-\\d{2}(T|\\s+)\\d{2}:\\d{2}:\\d{2}.\\d+\\s+\\w+)\\]?\\s+(\\[(?P\\d+)\\]\\s+|.+)(?P(INFO|NOTICE|WARNING|ERROR|LOG|FATAL|PANIC|DEBUG)\\d*):\\s*" + } + + // set the extracted level and process_id as labels + stage.labels { + values = { + level = "", + process_id = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + + // unescaped regex: (\[?[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+\s+\w+\]?) + stage.replace { + expression = "(\\[?[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+\\s+\\w+\\]?)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: ((INFO|NOTICE|WARNING|ERROR|LOG|FATAL|PANIC|DEBUG)\d*:\s+) + expression = "((INFO|NOTICE|WARNING|ERROR|LOG|FATAL|PANIC|DEBUG)\\d*:\\s+)" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/python.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/python.river new file mode 100644 index 00000000..89b4e4a8 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/python.river @@ -0,0 +1,78 @@ +/* +Module: log-format-python +Description: Log Processing for Python +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_python +} + +loki.process "log_format_python" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains python-json and the line matches the format, then process the line as python-json + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: python-json" + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*(python-?json).*\"} |~ \"^\\\\s*\\\\{.+\\\\}\\\\s*$\"" + + // set the log_type + stage.static_labels { + values = { + log_type = "python", + } + } + + // extract the level, response_code, method if they exist + stage.json { + expressions = { + level = "level || lvl || loglevel || log_level || logLevel || log_lvl || logLvl || levelname || levelName", + process = "processName || process_name || process", + module = "module || moduleName || module_name", + func = "funcName || func_name || func", + } + } + + // set the extracted level, process, module and func as labels + stage.labels { + values = { + level = "", + process = "", + module = "", + func = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + // remove timestamp from the log line, depending on the entry it can be "start_time" or "time" + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + // unescaped regex: (?i)("(@?timestamp|asctime)"\s*:\s*\[?"[^"]+"\]?,?) + stage.replace { + expression = "(?i)(\"(@?timestamp|asctime)\"\\s*:\\s*\\[?\"[^\"]+\"\\]?,?)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: (?i)"(log)?(level|lvl)(name)?"\s*:\s*"[^"]+",? + expression = "(?i)(\"(log)?(level|lvl)(name)?\"\\s*:\\s*\"[^\"]+\",?)" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/spring-boot.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/spring-boot.river new file mode 100644 index 00000000..a9a44b95 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/spring-boot.river @@ -0,0 +1,80 @@ +/* +Module: log-format-spring-boot +Description: Log Processing for Spring Boot +Docs: https://docs.spring.io/spring-boot/docs/2.1.13.RELEASE/reference/html/boot-features-logging.html +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_spring_boot +} + +loki.process "log_format_spring_boot" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains springboot and the line matches the format, then process the line as spring-boot + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: spring-boot" + // unescaped regex: ^\d{4}.+(INFO|ERROR|WARN|DEBUG|TRACE)\s+\d+\s+[^\[]+\[\S+\]\s+\S+\s+:\s+.*$ + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*(spring-?boot).*\"} |~ \"^\\\\d{4}.+(INFO|ERROR|WARN|DEBUG|TRACE)\\\\s+\\\\d+\\\\s+[^\\\\[]+\\\\[\\\\S+\\\\]\\\\s+\\\\S+\\\\s+:\\\\s+.*$\"" + + // set the log_type + stage.static_labels { + values = { + log_type = "spring-boot", + } + } + + // extract the timestamp, level, traceId, spanId, processId, thread, logger from the log line + stage.regex { + // unescaped regex: (?P[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?)\s+(?P\w+)\s+(?P\[(\S*\-?),(?P\S*),(?P\S*)\])\s+(?P[0-9]+)\s+-+\s+\[\s*(?P\S+)\]\s+(?P\S+)\s+:\s+(?P.+) + expression = "(?P[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]+)?)\\s+(?P\\w+)\\s+(?P\\[(\\S*\\-?),(?P\\S*),(?P\\S*)\\])\\s+(?P[0-9]+)\\s+-+\\s+\\[\\s*(?P\\S+)\\]\\s+(?P\\S+)\\s+:\\s+(?P.+)" + } + + // set the extracted values as labels so they can be used by downstream components, most likely several labels + // will be dropped before being written to Loki + stage.labels { + values = { + level = "", + trace = "", + traceId = "", + spanId = "", + processId = "", + thread = "", + logger = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + // remove timestamp from the log line, depending on the entry it can be "start_time" or "time" + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + // unescaped regex: ^([0-9]{4}-[0-9]{2}-[0-9]{2}(T|\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+)?(Z|(\+|-)[0-9:]+)?)\s+ + stage.replace { + expression = "^([0-9]{4}-[0-9]{2}-[0-9]{2}(T|\\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]+)?(Z|(\\+|-)[0-9:]+)?)\\s+" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: (ERROR|WARN|INFO|DEBUG|TRACE)\s+ + expression = "(ERROR|WARN|INFO|DEBUG|TRACE)\\s+" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/syslog.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/syslog.river new file mode 100644 index 00000000..7a63f731 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/syslog.river @@ -0,0 +1,46 @@ +/* +Module: log-format-syslog +Description: Handles formatting for log format of syslog +Docs: https://datatracker.ietf.org/doc/html/rfc5424 +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_syslog +} + +loki.process "log_format_syslog" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains postgres then process the line + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: syslog" + // unescaped regex: ^[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s+\S+\s+\S+\[\d+\]:\s+.*$ + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*(syslog).*\"} |~ \"^[A-Za-z]{3}\\\\s+\\\\d{1,2}\\\\s+\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\s+\\\\S+\\\\s+\\\\S+\\\\[\\\\d+\\\\]:\\\\s+.*$\"" + + stage.static_labels { + values = { + // set the log_type + log_type = "syslog", + level = "info", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + // unescaped regex: ^[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2} + stage.replace { + expression = "(^[A-Za-z]{3}\\s+\\d{1,2}\\s+\\d{2}:\\d{2}:\\d{2})" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/zerolog.river b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/zerolog.river new file mode 100644 index 00000000..5c1c097d --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/log-formats/zerolog.river @@ -0,0 +1,122 @@ +/* +Module: log-format-zerolog +Description: Handles formatting for log format of zerolog +Docs: https://github.com/rs/zerolog +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.log_format_zerolog +} + +loki.process "log_format_zerolog" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/log-format annotation, if the log_type is empty the line hasn't been processed, if it contains postgres then process the line + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/log-format: zerolog" + // unescaped regex: ^.+(TRC|DBG|INF|WRN|ERR|FTL|PNC)[^=]+(\w+=("[^"]*"|\S+))(\s+(\w+=("[^"]*"|\S+)))*\s*$ + selector = "{log_type=\"\", logs_agent_grafana_com_log_format=~\"(?i).*(zero-?log).*\"} |~ \"^.+(TRC|DBG|INF|WRN|ERR|FTL|PNC)[^=]+(\\\\w+=(\\\"[^\\\"]*\\\"|\\\\S+))(\\\\s+(\\\\w+=(\\\"[^\\\"]*\\\"|\\\\S+)))*\\\\s*$\"" + + // set the log_type + stage.static_labels { + values = { + log_type = "zerolog", + } + } + + // extract the level from the log + // unescaped regex: (?P[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+[^ ]*\s+)(?P(TRC|DBG|INF|WRN|ERR|FTL|PNC)).+ + stage.regex { + expression = "(?P[0-9]{4}-[0-9]{2}-[0-9]{2}(T|\\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+[^ ]*\\s+)(?P(TRC|DBG|INF|WRN|ERR|FTL|PNC)).+" + } + + // if the extracted level is TRC set trace + stage.replace { + source = "level" + expression = "(TRC)" + replace = "trace" + } + + // if the extracted level is DBG set debug + stage.replace { + source = "level" + expression = "(DBG)" + replace = "debug" + } + + // if the extracted level is INF set info + stage.replace { + source = "level" + expression = "(INF)" + replace = "info" + } + + // if the extracted level is WRN set warn + stage.replace { + source = "level" + expression = "(WRN)" + replace = "warn" + } + + // if the extracted level is ERR set error + stage.replace { + source = "level" + expression = "(ERR)" + replace = "error" + } + + // if the extracted level is FTL set fatal + stage.replace { + source = "level" + expression = "(FTL)" + replace = "fatal" + } + + // if the extracted level is PNC set panic + stage.replace { + source = "level" + expression = "(PNC)" + replace = "panic" + } + + // set the extracted level as a labels + stage.labels { + values = { + level = "", + } + } + + // check logs.agent.grafana.com/scrub-timestamp annotation, if true remove the timestamp from the log line + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_timestamp=\"true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-timestamp: true" + + // remove timestamp from the log line + + // unescaped regex: ([0-9]{4}-[0-9]{2}-[0-9]{2}(T|\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+[^ ]*\s+) + stage.replace { + expression = "([0-9]{4}-[0-9]{2}-[0-9]{2}(T|\\s+)[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]+[^ ]*\\s+)" + replace = "" + } + } + + // check logs.agent.grafana.com/scrub-level annotation, if true remove the level from the log line (it is still a label) + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_level=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-level: true" + + // remove level from the log line + stage.replace { + // unescaped regex: (TRC|DBG|INF|WRN|ERR|FTL|PNC)\s+ + expression = "(TRC|DBG|INF|WRN|ERR|FTL|PNC)\\s+" + replace = "" + } + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/masks/all.river b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/all.river new file mode 100644 index 00000000..00e523ed --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/all.river @@ -0,0 +1,60 @@ +/* +Module: mask-all +Description: Wrapper module to include all masking modules +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = module.file.mask_ssn.exports.process +} + +module.file "mask_ssn" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/masks/ssn.river" + + arguments { + forward_to = [module.file.mask_credit_card.exports.process.receiver] + } +} + +module.file "mask_credit_card" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/masks/credit-card.river" + + arguments { + forward_to = [module.file.mask_email.exports.process.receiver] + } +} + +module.file "mask_email" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/masks/email.river" + + arguments { + forward_to = [module.file.mask_phone.exports.process.receiver] + } +} + +module.file "mask_phone" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/masks/phone.river" + + arguments { + forward_to = [module.file.mask_ipv4.exports.process.receiver] + } +} + +module.file "mask_ipv4" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/masks/ipv4.river" + + arguments { + forward_to = [module.file.mask_ipv6.exports.process.receiver] + } +} + +module.file "mask_ipv6" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/masks/ipv6.river" + + arguments { + forward_to = argument.forward_to.value + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/masks/credit-card.river b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/credit-card.river new file mode 100644 index 00000000..e0f4c8f5 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/credit-card.river @@ -0,0 +1,35 @@ +/* +Module: mask-credit-card +Description: Checks the logs.agent.grafana.com/mask-credit-card annotation, if set to "true" any logs that match the credit + card pattern will have the value of the credit card replaced with "*credit-card*hash* +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.mask_credit_card +} + +loki.process "mask_credit_card" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/mask-credit-card annotation, if true the data will be masked as *credit-card*salt* + // Formats: + // Visa: 4[0-9]{15} + // MasterCard: 5[1-5][0-9]{14} + // American Express: 3[47][0-9]{13} + // Discover: 6[0-9]{15} + // JCB: 3[51-55][0-9]{14} + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/mask-credit-card: true" + selector = "{logs_agent_grafana_com_mask_credit_card=~\"(?i)true\"}" + + stage.replace { + // unescaped regex: (4[0-9]{15}|5[1-5][0-9]{14}|3[47][0-9]{13}|6[0-9]{15}|3[51-55][0-9]{14}) + expression = "(4[0-9]{15}|5[1-5][0-9]{14}|3[47][0-9]{13}|6[0-9]{15}|3[51-55][0-9]{14})" + replace = "*credit-card*{{ .Value | Hash \"salt\" }}*" + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/masks/email.river b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/email.river new file mode 100644 index 00000000..bd047e36 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/email.river @@ -0,0 +1,29 @@ +/* +Module: mask-email +Description: Checks the logs.agent.grafana.com/mask-email annotation, if set to "true" any logs that match the email + pattern will have the value of the email replaced with "*email*hash* +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.mask_email +} + +loki.process "mask_email" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/mask-email annotation, if true the data will be masked as *email*salt* + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/mask-email: true" + selector = "{logs_agent_grafana_com_mask_email=~\"(?i)true\"}" + + stage.replace { + // unescaped regex: ([\w\.=-]+@[\w\.-]+\.[\w]{2,64}) + expression = "([\\w\\.=-]+@[\\w\\.-]+\\.[\\w]{2,64})" + replace = "*email*{{ .Value | Hash \"salt\" }}*" + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/masks/ipv4.river b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/ipv4.river new file mode 100644 index 00000000..bfd5b35c --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/ipv4.river @@ -0,0 +1,29 @@ +/* +Module: mask-ipv4 +Description: Checks the logs.agent.grafana.com/mask-ipv4 annotation, if set to "true" any logs that match the ipv4 + pattern will have the value of the ipv4 replaced with "*ipv4*hash* +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.mask_ipv4 +} + +loki.process "mask_ipv4" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/mask-ipv4 annotation, if true the data will be masked as *ipv4*salt* + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/mask-ipv4: true" + selector = "{logs_agent_grafana_com_mask_ipv4=~\"(?i)true\"}" + + stage.replace { + // unescaped regex: ((\b25[0-5]|\b2[0-4][0-9]|\b[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}) + expression = "((\\b25[0-5]|\\b2[0-4][0-9]|\\b[01]?[0-9][0-9]?)(\\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3})" + replace = "*ipv4*{{ .Value | Hash \"salt\" }}*" + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/masks/ipv6.river b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/ipv6.river new file mode 100644 index 00000000..d02f8c37 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/ipv6.river @@ -0,0 +1,29 @@ +/* +Module: mask-ipv6 +Description: Checks the logs.agent.grafana.com/mask-ipv6 annotation, if set to "true" any logs that match the ipv6 + pattern will have the value of the ipv6 replaced with "*ipv6*hash* +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.mask_ipv6 +} + +loki.process "mask_ipv6" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/mask-ipv6 annotation, if true the data will be masked as *ipv6*salt* + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/mask-ipv6: true" + selector = "{logs_agent_grafana_com_mask_ipv6=~\"(?i)true\"}" + + stage.replace { + // unescaped regex: (([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])) + expression = "(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))" + replace = "*ipv6*{{ .Value | Hash \"salt\" }}*" + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/masks/phone.river b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/phone.river new file mode 100644 index 00000000..0152c21b --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/phone.river @@ -0,0 +1,29 @@ +/* +Module: mask-phone +Description: Checks the logs.agent.grafana.com/mask-phone annotation, if set to "true" any logs that match the phone + pattern will have the value of the phone replaced with "*phone*hash* +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.mask_phone +} + +loki.process "mask_phone" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/mask-phone annotation, if true the data will be masked as *phone*salt* + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/mask-phone: true" + selector = "{logs_agent_grafana_com_mask_phone=~\"(?i)true\"}" + + stage.replace { + // unescaped regex: ([\+]?[(]?[0-9]{3}[)]?[-\s\.]?[0-9]{3}[-\s\.]?[0-9]{4,6}) + expression = "([\\+]?[(]?[0-9]{3}[)]?[-\\s\\.]?[0-9]{3}[-\\s\\.]?[0-9]{4,6})" + replace = "*phone*{{ .Value | Hash \"salt\" }}*" + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/masks/ssn.river b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/ssn.river new file mode 100644 index 00000000..f0fd58ad --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/masks/ssn.river @@ -0,0 +1,29 @@ +/* +Module: mask-ssn +Description: Checks the logs.agent.grafana.com/mask-ssn annotation, if set to "true" any logs that match the ssn + pattern will have the value of the ssn replaced with "*ssn*hash* +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.mask_ssn +} + +loki.process "mask_ssn" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/mask-ssn annotation, if true the data will be masked as *ssn*salt* + stage.match { + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/mask-ssn: true" + selector = "{logs_agent_grafana_com_mask_ssn=~\"(?i)true\"}" + + stage.replace { + // unescaped regex: ([0-9]{3}-[0-9]{2}-[0-9]{4}) + expression = "([0-9]{3}-[0-9]{2}-[0-9]{4})" + replace = "*ssn*{{ .Value | Hash \"salt\" }}*" + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/metrics/post-process-bytes-lines.river b/docker-compose/common/config/agent-flow/modules/docker/logs/metrics/post-process-bytes-lines.river new file mode 100644 index 00000000..5464af9c --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/metrics/post-process-bytes-lines.river @@ -0,0 +1,39 @@ +/* +Module: pre-process-lines-bytes-metrics +Description: Generates metrics for the number of lines and bytes in the log line before any processing is done +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.pre_process_lines_bytes_metrics +} + +loki.process "pre_process_lines_bytes_metrics" { + forward_to = argument.forward_to.value + + stage.metrics { + metric.counter { + name = "lines_total" + description = "total number of log lines ingested, processed and forwarded for storage" + prefix = "log_" + match_all = true + action = "inc" + max_idle_duration = "24h" + } + } + + stage.metrics { + metric.counter { + name = "bytes_total" + description = "total log bytes ingested, processed and forwarded for storage" + prefix = "log_" + match_all = true + count_entry_bytes = true + action = "add" + max_idle_duration = "24h" + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/metrics/pre-process-bytes-lines.river b/docker-compose/common/config/agent-flow/modules/docker/logs/metrics/pre-process-bytes-lines.river new file mode 100644 index 00000000..e5ac9851 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/metrics/pre-process-bytes-lines.river @@ -0,0 +1,74 @@ +/* +Module: pre-process-lines-bytes-metrics +Description: Generates metrics for the number of lines and bytes in the log line before any processing is done +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +argument "keep_labels" { + optional = true + // comment = "List of labels to keep before the log message is written to Loki" + default = [ + "app", + "cluster", + "component", + "container", + "deployment", + "env", + "filename", + "instance", + "job", + "level", + "log_type", + "namespace", + "region", + "service", + "squad", + "team", + ] +} + +export "process" { + value = module.file.label_keep.exports.process +} + +// drop any labels that are not in the keep_labels list +// this is because the metrics generated below will keep the full set of labels currently attached to the log line +// we want those to line up with what we're keeping +module.file "label_keep" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/labels/keep-labels.river" + + arguments { + forward_to = [loki.process.pre_process_lines_bytes_metrics.receiver] + keep_labels = argument.keep_labels.value + } +} + +loki.process "pre_process_lines_bytes_metrics" { + forward_to = [] // does not forward anywhere, just generates metrics + + stage.metrics { + metric.counter { + name = "lines_pre_total" + description = "total number of log lines ingested before processing" + prefix = "log_" + match_all = true + action = "inc" + max_idle_duration = "24h" + } + } + + stage.metrics { + metric.counter { + name = "bytes_pre_total" + description = "total number of log bytes ingested before processing" + prefix = "log_" + match_all = true + count_entry_bytes = true + action = "add" + max_idle_duration = "24h" + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/relabelings.river b/docker-compose/common/config/agent-flow/modules/docker/logs/relabelings.river new file mode 100644 index 00000000..b4667d1e --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/relabelings.river @@ -0,0 +1,65 @@ +/* +Module: log-relabelings +Description: Handles log relabelings +*/ +argument "targets" { + // comment = "Discovered targets to apply relabelings to" + optional = false +} + +argument "tenant" { + // comment = "The tenant to filter logs to. This does not have to be the tenantId, this is the value to look for in the logs.agent.grafana.com/tenant annotation, and this can be a regex." + optional = true + default = "^(primary|anonymous|)$" +} + +export "relabelings" { + value = discovery.relabel.logs +} + +// apply common relabelings +module.file "relabelings_common" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/relabelings/common.river" + + arguments { + targets = argument.targets.value + } +} + +// apply container log specific relabelings +discovery.relabel "logs" { + targets = module.file.relabelings_common.exports.relabelings.output + + // allow services to declare their logs to be ingested or not, the following annotation is supported: + // logs.agent.grafana.com/scrape: false + rule { + action = "replace" + source_labels = ["__meta_docker_container_label_logs_agent_grafana_com_scrape"] + separator = ";" + regex = "^(?:;*)?(true|false).*$" + replacement = "$1" + target_label = "__tmp_scrape" + } + + // drop any targets that have scrape: false + rule { + action = "drop" + source_labels = ["__tmp_scrape"] + regex = "false" + } + + // allow containers to declare what tenant their logs should be written to, the following annotation is supported: + // logs.agent.grafana.com/tenant: "primary" + rule { + action = "keep" + source_labels = ["__meta_docker_container_label_logs_agent_grafana_com_tenant"] + regex = "^(" + argument.tenant.value + ")$" + } + + // make all labels on the pod available to the pipeline as labels, + // they are omitted before write via labelallow unless explicitly set + rule { + action = "labelmap" + regex = "__meta_docker_container_label_(.+)" + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/all.river b/docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/all.river new file mode 100644 index 00000000..5c3980bf --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/all.river @@ -0,0 +1,28 @@ +/* +Module: scrub-all +Description: Wrapper module to include all scrubing modules +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = module.file.scrub_json_empties.exports.process +} + +module.file "scrub_json_empties" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/scrubs/json-empties.river" + + arguments { + forward_to = [module.file.scrub_json_nulls.exports.process.receiver] + } +} + +module.file "scrub_json_nulls" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/scrubs/json-nulls.river" + + arguments { + forward_to = argument.forward_to.value + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/json-empties.river b/docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/json-empties.river new file mode 100644 index 00000000..0e38270a --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/json-empties.river @@ -0,0 +1,32 @@ +/* +Module: scrub-json-empties +Description: Checks for the annotation logs.agent.grafana.com/scrub-empties, if set to "true" + Removes any json properties with empty values i.e. "foo": "", "bar": [], "baz": {} +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.scrub_json_empties +} + +loki.process "scrub_json_empties" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/scrub-empties annotation, if true remove any json property whose value is set to + // an empty string "", empty object {} or empty array [] is removed + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_empties=~\"(?i)(dotnet-?json|istio|(generic-?)?json|log4j-?json|(otel|open-?telemetry)(-?json)?|python-?json)\",logs_agent_grafana_com_scrub_nulls=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-null: true" + + // remove null properties + stage.replace { + // unescaped regex: (\s*,\s*("[^"]+"\s*:\s*(\[\s*\]|\{\s*\}|"\s*"))|("[^"]+"\s*:\s*(\[\s*\]|\{\s*\}|"\s*"))\s*,\s*) + expression = "(\\s*,\\s*(\"[^\"]+\"\\s*:\\s*(\\[\\s*\\]|\\{\\s*\\}|\"\\s*\"))|(\"[^\"]+\"\\s*:\\s*(\\[\\s*\\]|\\{\\s*\\}|\"\\s*\"))\\s*,\\s*)" + replace = "" + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/json-nulls.river b/docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/json-nulls.river new file mode 100644 index 00000000..1a6349fa --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/scrubs/json-nulls.river @@ -0,0 +1,31 @@ +/* +Module: scrub-json-nulls +Description: Checks for the annotation logs.agent.grafana.com/scrub-nulls, if set to "true" + Removes any json properties with a null value +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +export "process" { + value = loki.process.scrub_json_nulls +} + +loki.process "scrub_json_nulls" { + forward_to = argument.forward_to.value + + // check logs.agent.grafana.com/scrub-nulls annotation, if true remove any json property whose value is set to null + // this can reduce the overall # of bytes sent and stored in Loki + stage.match { + selector = "{logs_agent_grafana_com_scrub_nulls=~\"(?i)(dotnet-?json|istio|(generic-?)?json|log4j-?json|(otel|open-?telemetry)(-?json)?|python-?json)\",logs_agent_grafana_com_scrub_nulls=~\"(?i)true\"}" + pipeline_name = "pipeline for annotation || logs.agent.grafana.com/scrub-null: true" + + // remove null properties + stage.replace { + // unescaped regex: (\s*,\s*("[^"]+"\s*:\s*null)|("[^"]+"\s*:\s*null)\s*,\s*) + expression = "(\\s*,\\s*(\"[^\"]+\"\\s*:\\s*null)|(\"[^\"]+\"\\s*:\\s*null)\\s*,\\s*)" + replace = "" + } + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/logs/targets/logs-from-docker.river b/docker-compose/common/config/agent-flow/modules/docker/logs/targets/logs-from-docker.river new file mode 100644 index 00000000..3e4da468 --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/logs/targets/logs-from-docker.river @@ -0,0 +1,40 @@ +/* +Module: logs-from-api +Description: Performs Kubernetes service discovery for pods, applies relabelings, the discovered target logs are + then retrieved from the kubernetes api +*/ +argument "forward_to" { + // comment = "Must be a list(LogsReceiver) where collected logs should be forwarded to" + optional = false +} + +argument "tenant" { + // comment = "The tenant to filter logs to. This does not have to be the tenantId, this is the value to look for in the logs.agent.grafana.com/tenant annotation, and this can be a regex." + optional = true + default = ".*" +} + +discovery.docker "containers" { + host = "unix:///var/run/docker.sock" + + filter { + name = "status" + values = ["running"] + } +} + +module.file "relabelings_log" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/relabelings.river" + + arguments { + targets = discovery.docker.containers.targets + tenant = argument.tenant.value + } +} + +loki.source.docker "containers" { + host = "unix:///var/run/docker.sock" + targets = module.file.relabelings_log.exports.relabelings.output + //relabel_rules = module.file.relabelings_log.exports.relabelings.rules + forward_to = argument.forward_to.value +} diff --git a/docker-compose/common/config/agent-flow/modules/docker/relabelings/common.river b/docker-compose/common/config/agent-flow/modules/docker/relabelings/common.river new file mode 100644 index 00000000..5cb43a5b --- /dev/null +++ b/docker-compose/common/config/agent-flow/modules/docker/relabelings/common.river @@ -0,0 +1,87 @@ +/* +Module: relabelings-common +Description: Handles relabelings for extracting common fields across various roles +*/ + +argument "targets" { + // comment = "Discovered targets to apply relabelings to" + optional = false +} + +argument "cluster" { + optional = true + default = "docker-compose" +} + +argument "namespace" { + optional = true + default = "monitoring-system" +} + +export "relabelings" { + value = discovery.relabel.common +} + +discovery.relabel "common" { + targets = argument.targets.value + + // filter by service name + rule { + action = "keep" + source_labels = [ + "__meta_docker_container_label_com_docker_compose_service", + ] + regex = "(agent|mimir|mimir-.*|grafana|loki|loki-.*|tempo|tempo-.*|pyroscope|distributor|ingester|query-frontend|querier|query-scheduler|ruler|compactor|store-gateway|alertmanager|overrides-exporter|index-gateway|gateway)" + } + + // set the cluster label + rule { + action = "replace" + replacement = argument.cluster.value + target_label = "cluster" + } + + // set the namespace label + rule { + action = "replace" + replacement = argument.namespace.value + target_label = "namespace" + } + + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_com_docker_compose_service", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = argument.namespace.value + "/$1" + target_label = "job" + } + + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_com_docker_compose_service", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "pod" + } + + rule { + action = "replace" + source_labels = [ + "__meta_docker_container_label_com_docker_compose_service", + "__meta_docker_container_label_app", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + rule { + source_labels = ["__meta_docker_container_name"] + regex = "/(.*)" + target_label = "container" + } +} diff --git a/docker-compose/common/config/agent-flow/modules/docker_compose.river b/docker-compose/common/config/agent-flow/modules/docker_compose.river index 244da188..abb1a76e 100644 --- a/docker-compose/common/config/agent-flow/modules/docker_compose.river +++ b/docker-compose/common/config/agent-flow/modules/docker_compose.river @@ -44,10 +44,6 @@ argument "profiles_endpoint" { * EXPORTS ********************************************/ -export "relabelings_common" { - value = discovery.relabel.common -} - export "metrics_receiver" { value = prometheus.remote_write.docker_compose.receiver } @@ -64,6 +60,10 @@ export "profiles_receiver" { value = pyroscope.write.docker_compose.receiver } +export "relabelings_common" { + value = discovery.relabel.common +} + /******************************************** * Endpoints ********************************************/ @@ -77,8 +77,6 @@ prometheus.remote_write "docker_compose" { external_labels = { "scraped_by" = "grafana-agent", - "cluster" = argument.cluster.value, - "namespace" = argument.namespace.value, } } @@ -91,8 +89,6 @@ loki.write "docker_compose" { external_labels = { "scraped_by" = "grafana-agent", - "cluster" = argument.cluster.value, - "namespace" = argument.namespace.value, } } @@ -139,7 +135,21 @@ discovery.relabel "common" { source_labels = [ "__meta_docker_container_label_com_docker_compose_service", ] - regex = "(agent|mimir|mimir-.*|grafana|loki|loki-.*|tempo|tempo-.*|pyroscope|distributor|ingester|query-frontend|querier|query-scheduler|ruler|compactor|store-gateway|alertmanager|overrides-exporter|index-gateway|gateway)" + regex = "(agent|mimir|mimir-.*|grafana|loki|loki-.*|tempo|tempo-.*|pyroscope|distributor|ingester|query-frontend|querier|query-scheduler|ruler|compactor|store-gateway|alertmanager|overrides-exporter|index-gateway|gateway)" + } + + // set the cluster label + rule { + action = "replace" + replacement = argument.cluster.value + target_label = "cluster" + } + + // set the namespace label + rule { + action = "replace" + replacement = argument.namespace.value + target_label = "namespace" } rule { diff --git a/docker-compose/monolithic-mode/logs/docker-compose.yaml b/docker-compose/monolithic-mode/logs/docker-compose.yaml index 76889b36..ddb125f2 100644 --- a/docker-compose/monolithic-mode/logs/docker-compose.yaml +++ b/docker-compose/monolithic-mode/logs/docker-compose.yaml @@ -12,6 +12,9 @@ include: services: gateway: + labels: + - logs.agent.grafana.com/scrape=true + - logs.agent.grafana.com/log-format=nginx depends_on: loki: condition: service_healthy @@ -36,6 +39,8 @@ services: - "3100:3100" loki: + labels: + - logs.agent.grafana.com/log-format=logfmt depends_on: minio: condition: service_healthy From 7e93307f086914a06038b64c5f8ff47ca26b6ec0 Mon Sep 17 00:00:00 2001 From: Weifeng Wang Date: Tue, 12 Mar 2024 00:41:19 +0800 Subject: [PATCH 2/3] improve(logs): add labels Signed-off-by: Weifeng Wang update loki log_format Signed-off-by: Weifeng Wang update loki log_format Signed-off-by: Weifeng Wang --- .../compose-include/agent-collect-logs.yaml | 2 ++ .../common/compose-include/minio.yaml | 2 ++ .../agent-flow/monolithic-mode-all.river | 30 +++++++------------ .../config/loki/microservices-mode-logs.yaml | 1 + .../config/loki/monolithic-mode-logs.yaml | 1 + .../config/loki/read-write-mode-logs.yaml | 1 + .../logs/docker-compose.yaml | 15 ++++++++++ .../all-in-one/docker-compose.yaml | 12 ++++++++ .../monolithic-mode/logs/docker-compose.yaml | 6 ++-- .../read-write-mode/logs/docker-compose.yaml | 11 +++++++ 10 files changed, 60 insertions(+), 21 deletions(-) diff --git a/docker-compose/common/compose-include/agent-collect-logs.yaml b/docker-compose/common/compose-include/agent-collect-logs.yaml index 32f7ed37..0653afc0 100644 --- a/docker-compose/common/compose-include/agent-collect-logs.yaml +++ b/docker-compose/common/compose-include/agent-collect-logs.yaml @@ -1,6 +1,8 @@ services: agent: + labels: + - logs.agent.grafana.com/log-format=logfmt depends_on: gateway: condition: service_healthy diff --git a/docker-compose/common/compose-include/minio.yaml b/docker-compose/common/compose-include/minio.yaml index ace287b8..79aad21c 100644 --- a/docker-compose/common/compose-include/minio.yaml +++ b/docker-compose/common/compose-include/minio.yaml @@ -1,6 +1,8 @@ services: minio: + labels: + - logs.agent.grafana.com/scrape=false image: minio/minio:RELEASE.2024-03-05T04-48-44Z entrypoint: - sh diff --git a/docker-compose/common/config/agent-flow/monolithic-mode-all.river b/docker-compose/common/config/agent-flow/monolithic-mode-all.river index 21e37f87..d44c8639 100644 --- a/docker-compose/common/config/agent-flow/monolithic-mode-all.river +++ b/docker-compose/common/config/agent-flow/monolithic-mode-all.river @@ -1,7 +1,7 @@ // https://github.com/grafana/agent-configurator logging { - level = "warn" + level = "info" format = "logfmt" } @@ -10,6 +10,10 @@ tracing { write_to = [otelcol.processor.batch.containers.input] } +/******************************************** + * LGTMP Receiver provider + ********************************************/ + module.file "docker_compose" { filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker_compose.river" } @@ -95,24 +99,12 @@ prometheus.relabel "integrations" { * Logs ********************************************/ -loki.source.docker "containers" { - host = "unix:///var/run/docker.sock" - targets = discovery.relabel.containers.output - relabel_rules = discovery.relabel.containers.rules - forward_to = [loki.process.containers.receiver] -} - -loki.process "containers" { - stage.drop { - longer_than = "8KB" - older_than = "12h" - } +module.file "logs_primary" { + filename = env("AGENT_CONFIG_FOLDER") + "/modules/docker/logs/all.river" - stage.tenant { - value = "anonymous" + arguments { + forward_to = [module.file.docker_compose.exports.logs_receiver] } - - forward_to = [module.file.docker_compose.exports.logs_receiver] } /******************************************** @@ -265,7 +257,7 @@ otelcol.exporter.prometheus "containers" { } otelcol.exporter.loki "containers" { - forward_to = [loki.process.containers.receiver] + forward_to = [module.file.docker_compose.exports.logs_receiver] } // The OpenTelemetry spanlog connector processes incoming trace spans and extracts data from them ready @@ -310,7 +302,7 @@ loki.process "autologging" { source = "body" } - forward_to = [loki.process.containers.receiver] + forward_to = [module.file.docker_compose.exports.logs_receiver] } /******************************************** diff --git a/docker-compose/common/config/loki/microservices-mode-logs.yaml b/docker-compose/common/config/loki/microservices-mode-logs.yaml index 90890535..77bf5d26 100644 --- a/docker-compose/common/config/loki/microservices-mode-logs.yaml +++ b/docker-compose/common/config/loki/microservices-mode-logs.yaml @@ -8,6 +8,7 @@ server: http_listen_port: 3100 grpc_listen_port: 9095 log_level: info + log_format: json common: compactor_address: http://compactor:3100 diff --git a/docker-compose/common/config/loki/monolithic-mode-logs.yaml b/docker-compose/common/config/loki/monolithic-mode-logs.yaml index b2557b9f..941f8a07 100644 --- a/docker-compose/common/config/loki/monolithic-mode-logs.yaml +++ b/docker-compose/common/config/loki/monolithic-mode-logs.yaml @@ -8,6 +8,7 @@ server: http_listen_port: 3100 grpc_listen_port: 9095 log_level: info + log_format: json common: compactor_address: loki diff --git a/docker-compose/common/config/loki/read-write-mode-logs.yaml b/docker-compose/common/config/loki/read-write-mode-logs.yaml index 672ad55f..6a3397d9 100644 --- a/docker-compose/common/config/loki/read-write-mode-logs.yaml +++ b/docker-compose/common/config/loki/read-write-mode-logs.yaml @@ -8,6 +8,7 @@ server: http_listen_port: 3100 grpc_listen_port: 9095 log_level: info + log_format: json common: compactor_address: loki-backend diff --git a/docker-compose/microservices-mode/logs/docker-compose.yaml b/docker-compose/microservices-mode/logs/docker-compose.yaml index b244b96c..89bda28a 100644 --- a/docker-compose/microservices-mode/logs/docker-compose.yaml +++ b/docker-compose/microservices-mode/logs/docker-compose.yaml @@ -10,8 +10,14 @@ include: - path: ../../common/compose-include/minio.yaml - path: ../../common/compose-include/agent-collect-logs.yaml +x-labels: &loki-labels + - logs.agent.grafana.com/log-format=json + services: gateway: + labels: + # - logs.agent.grafana.com/scrape=false + - logs.agent.grafana.com/log-format=nginx depends_on: distributor: condition: service_healthy @@ -36,6 +42,7 @@ services: - "3100:3100" distributor: + labels: *loki-labels depends_on: ingester: condition: service_started @@ -59,6 +66,7 @@ services: - loki-memberlist ingester: + labels: *loki-labels depends_on: minio: condition: service_healthy @@ -77,6 +85,7 @@ services: - loki-memberlist query-frontend: + labels: *loki-labels depends_on: query-scheduler: condition: service_started @@ -89,6 +98,7 @@ services: - -config.expand-env=true query-scheduler: + labels: *loki-labels image: *lokiImage volumes: - ../../common/config/loki/microservices-mode-logs.yaml:/etc/loki.yaml # Note: Loki use microservices-mode-logs.yaml @@ -104,6 +114,7 @@ services: - loki-memberlist querier: + labels: *loki-labels depends_on: query-scheduler: condition: service_started @@ -116,6 +127,7 @@ services: - -config.expand-env=true ruler: + labels: *loki-labels depends_on: minio: condition: service_healthy @@ -132,6 +144,7 @@ services: - loki-memberlist compactor: + labels: *loki-labels image: *lokiImage volumes: - ../../common/config/loki/microservices-mode-logs.yaml:/etc/loki.yaml # Note: Loki use microservices-mode-logs.yaml @@ -145,6 +158,8 @@ services: - loki-memberlist grafana: + labels: + - logs.agent.grafana.com/log-format=logfmt image: grafana/grafana:10.4.0 command: - --config=/etc/grafana-config/grafana.ini diff --git a/docker-compose/monolithic-mode/all-in-one/docker-compose.yaml b/docker-compose/monolithic-mode/all-in-one/docker-compose.yaml index 42030276..c3639310 100644 --- a/docker-compose/monolithic-mode/all-in-one/docker-compose.yaml +++ b/docker-compose/monolithic-mode/all-in-one/docker-compose.yaml @@ -11,6 +11,8 @@ include: services: agent: + labels: + - logs.agent.grafana.com/log-format=logfmt depends_on: tempo: condition: service_started @@ -36,6 +38,8 @@ services: - "12345:12345" mimir: + labels: + - logs.agent.grafana.com/log-format=logfmt depends_on: minio: condition: service_healthy @@ -77,6 +81,8 @@ services: rules load /mimir-mixin/rules.yaml /mimir-mixin/alerts.yaml /loki-mixin/rules.yaml /loki-mixin/alerts.yaml loki: + labels: + - logs.agent.grafana.com/log-format=json depends_on: minio: condition: service_healthy @@ -100,6 +106,8 @@ services: - loki-memberlist tempo: + labels: + - logs.agent.grafana.com/log-format=logfmt depends_on: minio: condition: service_healthy @@ -125,6 +133,8 @@ services: retries: 5 pyroscope: + labels: + - logs.agent.grafana.com/log-format=logfmt depends_on: minio: condition: service_healthy @@ -137,6 +147,8 @@ services: - -config.expand-env=true grafana: + labels: + - logs.agent.grafana.com/log-format=logfmt image: grafana/grafana:10.4.0 command: - --config=/etc/grafana-config/grafana.ini diff --git a/docker-compose/monolithic-mode/logs/docker-compose.yaml b/docker-compose/monolithic-mode/logs/docker-compose.yaml index ddb125f2..297b6bbd 100644 --- a/docker-compose/monolithic-mode/logs/docker-compose.yaml +++ b/docker-compose/monolithic-mode/logs/docker-compose.yaml @@ -13,7 +13,7 @@ include: services: gateway: labels: - - logs.agent.grafana.com/scrape=true + # - logs.agent.grafana.com/scrape=false - logs.agent.grafana.com/log-format=nginx depends_on: loki: @@ -40,7 +40,7 @@ services: loki: labels: - - logs.agent.grafana.com/log-format=logfmt + - logs.agent.grafana.com/log-format=json depends_on: minio: condition: service_healthy @@ -66,6 +66,8 @@ services: - loki-memberlist grafana: + labels: + - logs.agent.grafana.com/log-format=logfmt image: grafana/grafana:10.4.0 command: - --config=/etc/grafana-config/grafana.ini diff --git a/docker-compose/read-write-mode/logs/docker-compose.yaml b/docker-compose/read-write-mode/logs/docker-compose.yaml index f42f0ae2..323da93f 100644 --- a/docker-compose/read-write-mode/logs/docker-compose.yaml +++ b/docker-compose/read-write-mode/logs/docker-compose.yaml @@ -10,8 +10,14 @@ include: - path: ../../common/compose-include/minio.yaml - path: ../../common/compose-include/agent-collect-logs.yaml +x-labels: &loki-labels + - logs.agent.grafana.com/log-format=json + services: gateway: + labels: + # - logs.agent.grafana.com/scrape=false + - logs.agent.grafana.com/log-format=nginx depends_on: loki-write: condition: service_healthy @@ -36,6 +42,7 @@ services: - "3100:3100" loki-read: + labels: *loki-labels depends_on: minio: condition: service_healthy @@ -60,6 +67,7 @@ services: - loki-memberlist loki-write: + labels: *loki-labels depends_on: minio: condition: service_healthy @@ -83,6 +91,7 @@ services: - loki-memberlist loki-backend: + labels: *loki-labels depends_on: minio: condition: service_healthy @@ -103,6 +112,8 @@ services: retries: 5 grafana: + labels: + - logs.agent.grafana.com/log-format=logfmt image: grafana/grafana:10.4.0 command: - --config=/etc/grafana-config/grafana.ini From 5ebfef0002133896a33bf21f43c1c30e774abf8b Mon Sep 17 00:00:00 2001 From: Weifeng Wang Date: Tue, 12 Mar 2024 12:03:41 +0800 Subject: [PATCH 3/3] gateway(nginx): enable access_log for 2xx Signed-off-by: Weifeng Wang --- docker-compose/common/config/nginx/nginx.conf.template | 2 +- docker-compose/microservices-mode/logs/docker-compose.yaml | 2 +- docker-compose/monolithic-mode/logs/docker-compose.yaml | 2 +- docker-compose/read-write-mode/logs/docker-compose.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose/common/config/nginx/nginx.conf.template b/docker-compose/common/config/nginx/nginx.conf.template index 27da32e1..5f6863c0 100644 --- a/docker-compose/common/config/nginx/nginx.conf.template +++ b/docker-compose/common/config/nginx/nginx.conf.template @@ -28,7 +28,7 @@ http { '"$http_user_agent" "$http_x_forwarded_for"'; map $status $loggable { - ~^[23] 0; + ~^[3] 0; default 1; } diff --git a/docker-compose/microservices-mode/logs/docker-compose.yaml b/docker-compose/microservices-mode/logs/docker-compose.yaml index 89bda28a..4a2efc0d 100644 --- a/docker-compose/microservices-mode/logs/docker-compose.yaml +++ b/docker-compose/microservices-mode/logs/docker-compose.yaml @@ -16,7 +16,7 @@ x-labels: &loki-labels services: gateway: labels: - # - logs.agent.grafana.com/scrape=false + - logs.agent.grafana.com/scrape=false - logs.agent.grafana.com/log-format=nginx depends_on: distributor: diff --git a/docker-compose/monolithic-mode/logs/docker-compose.yaml b/docker-compose/monolithic-mode/logs/docker-compose.yaml index 297b6bbd..51ff5667 100644 --- a/docker-compose/monolithic-mode/logs/docker-compose.yaml +++ b/docker-compose/monolithic-mode/logs/docker-compose.yaml @@ -13,7 +13,7 @@ include: services: gateway: labels: - # - logs.agent.grafana.com/scrape=false + - logs.agent.grafana.com/scrape=false - logs.agent.grafana.com/log-format=nginx depends_on: loki: diff --git a/docker-compose/read-write-mode/logs/docker-compose.yaml b/docker-compose/read-write-mode/logs/docker-compose.yaml index 323da93f..3202d638 100644 --- a/docker-compose/read-write-mode/logs/docker-compose.yaml +++ b/docker-compose/read-write-mode/logs/docker-compose.yaml @@ -16,7 +16,7 @@ x-labels: &loki-labels services: gateway: labels: - # - logs.agent.grafana.com/scrape=false + - logs.agent.grafana.com/scrape=false - logs.agent.grafana.com/log-format=nginx depends_on: loki-write: