[Logs forwarder] Parse enhanced metrics from Lambda telemetry JSON lo…

…gs. (#859) * Parse enhanced metrics from Lambda telemetry JSON logs. These get emitted instead of regular REPORT logs if log format is set to JSON. * Address review comments. * Less try/catch * Organize constants * Remove unnecessary defensive JSON check
DataDog · Oct 29, 2024 · 50e34e8 · 50e34e8
1 parent 0799b46
commit 50e34e8
Show file tree

Hide file tree

Showing 6 changed files with 393 additions and 1 deletion.
diff --git a/aws/logs_monitoring/enhanced_lambda_metrics.py b/aws/logs_monitoring/enhanced_lambda_metrics.py
@@ -2,6 +2,7 @@
 # under the Apache License Version 2.0.
 # This product includes software developed at Datadog (https://www.datadoghq.com/).
 # Copyright 2021 Datadog, Inc.
+import json
 import os
 import logging
 import re
@@ -60,6 +61,19 @@
     INIT_DURATION_METRIC_NAME,
 ]
 
+# Keys that appear in Lambda telemetry records emitted when JSON logs are enabled
+MEMORY_ALLOCATED_RECORD_KEY = "memorySizeMB"
+INIT_DURATION_RECORD_KEY = "initDurationMs"
+DURATION_RECORD_KEY = "durationMs"
+BILLED_DURATION_RECORD_KEY = "billedDurationMs"
+MAX_MEMORY_USED_RECORD_KEY = "maxMemoryUsedMB"
+RUNTIME_METRICS_BY_RECORD_KEY = {
+    # Except INIT_DURATION_RECORD_KEY which is handled separately
+    DURATION_RECORD_KEY: DURATION_METRIC_NAME,
+    BILLED_DURATION_RECORD_KEY: BILLED_DURATION_METRIC_NAME,
+    MAX_MEMORY_USED_RECORD_KEY: MAX_MEMORY_USED_METRIC_NAME,
+}
+
 # Multiply the duration metrics by 1/1000 to convert ms to seconds
 METRIC_ADJUSTMENT_FACTORS = {
     DURATION_METRIC_NAME: 0.001,
@@ -202,8 +216,12 @@ def generate_enhanced_lambda_metrics(log, tags_cache):
     if not is_lambda_log:
         return []
 
+    # Check if its Lambda lifecycle log that is emitted if log format is set to JSON
+    parsed_metrics = parse_metrics_from_json_report_log(log_message)
+
     # Check if this is a REPORT log
-    parsed_metrics = parse_metrics_from_report_log(log_message)
+    if not parsed_metrics:
+        parsed_metrics = parse_metrics_from_report_log(log_message)
 
     # Check if this is a timeout
     if not parsed_metrics:
@@ -254,6 +272,74 @@ def parse_lambda_tags_from_arn(arn):
     ]
 
 
+def parse_metrics_from_json_report_log(log_message):
+    try:
+        body = json.loads(log_message)
+    except json.JSONDecodeError:
+        return []
+
+    stage = body.get("type", "")
+    record = body.get("record", {})
+    record_metrics = record.get("metrics", {})
+
+    if stage != "platform.report" or not record_metrics:
+        return []
+
+    metrics = []
+
+    for record_key, metric_name in RUNTIME_METRICS_BY_RECORD_KEY.items():
+        metric_point_value = record_metrics[record_key]
+
+        if metric_name in METRIC_ADJUSTMENT_FACTORS:
+            metric_point_value *= METRIC_ADJUSTMENT_FACTORS[metric_name]
+
+        metrics.append(
+            DatadogMetricPoint(
+                f"{ENHANCED_METRICS_NAMESPACE_PREFIX}.{metric_name}",
+                metric_point_value,
+            )
+        )
+
+    tags = [
+        f"{MEMORY_ALLOCATED_FIELD_NAME}:{record_metrics[MEMORY_ALLOCATED_RECORD_KEY]}"
+    ]
+
+    init_duration = record_metrics.get(INIT_DURATION_RECORD_KEY)
+    if init_duration:
+        tags.append("cold_start:true")
+        metrics.append(
+            DatadogMetricPoint(
+                f"{ENHANCED_METRICS_NAMESPACE_PREFIX}.{INIT_DURATION_METRIC_NAME}",
+                init_duration * METRIC_ADJUSTMENT_FACTORS[INIT_DURATION_METRIC_NAME],
+            )
+        )
+    else:
+        tags.append("cold_start:false")
+
+    metrics.append(
+        DatadogMetricPoint(
+            f"{ENHANCED_METRICS_NAMESPACE_PREFIX}.{ESTIMATED_COST_METRIC_NAME}",
+            calculate_estimated_cost(
+                record_metrics[BILLED_DURATION_RECORD_KEY],
+                record_metrics[MEMORY_ALLOCATED_RECORD_KEY],
+            ),
+        )
+    )
+
+    if record.get("status") == "timeout":
+        metrics.append(
+            DatadogMetricPoint(
+                f"{ENHANCED_METRICS_NAMESPACE_PREFIX}.{TIMEOUTS_METRIC_NAME}",
+                1.0,
+            )
+        )
+
+    for metric in metrics:
+        metric.add_tags(tags)
+
+    return metrics
+
+
 def parse_metrics_from_report_log(report_log_line):
     """Parses and returns metrics from the REPORT Lambda log
 

diff --git a/..._files/TestEnhancedLambdaMetrics.test_generate_enhanced_lambda_metrics_json.approved.json b/..._files/TestEnhancedLambdaMetrics.test_generate_enhanced_lambda_metrics_json.approved.json
@@ -0,0 +1,54 @@
+[
+    {
+        "name": "aws.lambda.enhanced.duration",
+        "tags": [
+            "memorysize:128",
+            "cold_start:false",
+            "region:us-east-1",
+            "account_id:172597598159",
+            "aws_account:172597598159",
+            "functionname:post-coupon-prod-us"
+        ],
+        "timestamp": 10000,
+        "value": 3.47065
+    },
+    {
+        "name": "aws.lambda.enhanced.billed_duration",
+        "tags": [
+            "memorysize:128",
+            "cold_start:false",
+            "region:us-east-1",
+            "account_id:172597598159",
+            "aws_account:172597598159",
+            "functionname:post-coupon-prod-us"
+        ],
+        "timestamp": 10000,
+        "value": 3.5
+    },
+    {
+        "name": "aws.lambda.enhanced.max_memory_used",
+        "tags": [
+            "memorysize:128",
+            "cold_start:false",
+            "region:us-east-1",
+            "account_id:172597598159",
+            "aws_account:172597598159",
+            "functionname:post-coupon-prod-us"
+        ],
+        "timestamp": 10000,
+        "value": 89
+    },
+    {
+        "name": "aws.lambda.enhanced.estimated_cost",
+        "tags": [
+            "memorysize:128",
+            "cold_start:false",
+            "region:us-east-1",
+            "account_id:172597598159",
+            "aws_account:172597598159",
+            "functionname:post-coupon-prod-us"
+        ],
+        "timestamp": 10000,
+        "value": 7.49168125e-06
+    }
+]
diff --git a/...estEnhancedLambdaMetrics.test_parse_metrics_from_cold_start_json_report_log.approved.json b/...estEnhancedLambdaMetrics.test_parse_metrics_from_cold_start_json_report_log.approved.json
@@ -0,0 +1,47 @@
+[
+    {
+        "name": "aws.lambda.enhanced.duration",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 0.0008100000000000001
+    },
+    {
+        "name": "aws.lambda.enhanced.billed_duration",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 0.1
+    },
+    {
+        "name": "aws.lambda.enhanced.max_memory_used",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 90
+    },
+    {
+        "name": "aws.lambda.enhanced.init_duration",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 1.234
+    },
+    {
+        "name": "aws.lambda.enhanced.estimated_cost",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 4.0833375e-07
+    }
+]
diff --git a/...ved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_json_report_log.approved.json b/...ved_files/TestEnhancedLambdaMetrics.test_parse_metrics_from_json_report_log.approved.json
@@ -0,0 +1,38 @@
+[
+    {
+        "name": "aws.lambda.enhanced.duration",
+        "tags": [
+            "memorysize:128",
+            "cold_start:false"
+        ],
+        "timestamp": null,
+        "value": 0.00062
+    },
+    {
+        "name": "aws.lambda.enhanced.billed_duration",
+        "tags": [
+            "memorysize:128",
+            "cold_start:false"
+        ],
+        "timestamp": null,
+        "value": 0.1
+    },
+    {
+        "name": "aws.lambda.enhanced.max_memory_used",
+        "tags": [
+            "memorysize:128",
+            "cold_start:false"
+        ],
+        "timestamp": null,
+        "value": 51
+    },
+    {
+        "name": "aws.lambda.enhanced.estimated_cost",
+        "tags": [
+            "memorysize:128",
+            "cold_start:false"
+        ],
+        "timestamp": null,
+        "value": 4.0833375e-07
+    }
+]
diff --git a/...s/TestEnhancedLambdaMetrics.test_parse_metrics_from_timeout_json_report_log.approved.json b/...s/TestEnhancedLambdaMetrics.test_parse_metrics_from_timeout_json_report_log.approved.json
@@ -0,0 +1,56 @@
+[
+    {
+        "name": "aws.lambda.enhanced.duration",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 30.0
+    },
+    {
+        "name": "aws.lambda.enhanced.billed_duration",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 30.0
+    },
+    {
+        "name": "aws.lambda.enhanced.max_memory_used",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 74
+    },
+    {
+        "name": "aws.lambda.enhanced.init_duration",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 0.985413
+    },
+    {
+        "name": "aws.lambda.enhanced.estimated_cost",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 6.270012500000001e-05
+    },
+    {
+        "name": "aws.lambda.enhanced.timeouts",
+        "tags": [
+            "memorysize:128",
+            "cold_start:true"
+        ],
+        "timestamp": null,
+        "value": 1.0
+    }
+]