From 2ec2ac76071333422fd50f6762a3f3b67bbae407 Mon Sep 17 00:00:00 2001 From: Daniel Rogers Date: Tue, 30 Aug 2022 12:36:21 -0400 Subject: [PATCH] Add support for sanitizing HTTP header values. First step of https://github.com/open-telemetry/opentelemetry-python-contrib/issues/1184 --- CHANGELOG.md | 2 ++ .../src/opentelemetry/util/http/__init__.py | 22 ++++++++++++++++++- .../tests/test_capture_custom_headers.py | 16 ++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a2b306157..6170541d87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#1197](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1197)) - Add metric instumentation for flask ([#1186](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1186)) +- `opentelemetry-util-http` Add support for sanitizing HTTP header values. + ([#1253](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1253)) ## [1.12.0rc2-0.32b0](https://github.com/open-telemetry/opentelemetry-python/releases/tag/v1.12.0rc2-0.32b0) - 2022-07-01 diff --git a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py index 9088da3a37..4796ee85cf 100644 --- a/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py +++ b/util/opentelemetry-util-http/src/opentelemetry/util/http/__init__.py @@ -15,11 +15,15 @@ from os import environ from re import compile as re_compile from re import search +from re import IGNORECASE as RE_IGNORECASE from typing import Iterable, List from urllib.parse import urlparse, urlunparse from opentelemetry.semconv.trace import SpanAttributes +OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS = ( + "OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS" +) OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST = ( "OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST" ) @@ -60,6 +64,22 @@ def url_disabled(self, url: str) -> bool: return bool(self._excluded_urls and search(self._regex, url)) +class SanitizeValue: + """Class to sanitize (remove sensitive data from) certain headers (given as a list of regexes)""" + + def __init__(self, sanitized_fields: Iterable[str]): + self._sanitized_fields = sanitized_fields + if self._sanitized_fields: + self._regex = re_compile("|".join(sanitized_fields), RE_IGNORECASE) + + def sanitize_header_value(self, header: str, value: str) -> str: + return ( + "[REDACTED]" + if (self._sanitized_fields and search(self._regex, header)) + else value + ) + + _root = r"OTEL_PYTHON_{}" @@ -90,7 +110,7 @@ def get_excluded_urls(instrumentation: str) -> ExcludeList: def parse_excluded_urls(excluded_urls: str) -> ExcludeList: """ - Small helper to put an arbitrary url list inside of ExcludeList + Small helper to put an arbitrary url list inside an ExcludeList """ if excluded_urls: excluded_url_list = [ diff --git a/util/opentelemetry-util-http/tests/test_capture_custom_headers.py b/util/opentelemetry-util-http/tests/test_capture_custom_headers.py index e6e1583ffb..4ce6078311 100644 --- a/util/opentelemetry-util-http/tests/test_capture_custom_headers.py +++ b/util/opentelemetry-util-http/tests/test_capture_custom_headers.py @@ -16,6 +16,7 @@ from unittest.mock import patch from opentelemetry.util.http import ( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST, OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE, get_custom_headers, @@ -58,6 +59,21 @@ def test_get_custom_response_header(self): ], ) + @patch.dict( + "os.environ", + { + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS: "My-Secret-Header,My-Secret-Header-2" + }, + ) + def test_get_custom_sanitize_header(self): + custom_headers_to_capture = get_custom_headers( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS + ) + self.assertEqual( + custom_headers_to_capture, + ["My-Secret-Header", "My-Secret-Header-2"], + ) + def test_normalise_request_header_name(self): key = normalise_request_header_name("Test-Header") self.assertEqual(key, "http.request.header.test_header")