From 4e059b15d2ac97ac926d020e928725804f89b9a5 Mon Sep 17 00:00:00 2001 From: Leighton Chen Date: Thu, 30 Mar 2023 18:10:23 -0700 Subject: [PATCH] Add exclude_list for urllib/urllib3 instrumentations (#1733) * urllib * urllib3 * Update __init__.py * readme * lint --- CHANGELOG.md | 2 + .../instrumentation/requests/__init__.py | 5 +- .../README.rst | 40 ++++++++++++++++ .../instrumentation/urllib/__init__.py | 36 +++++++++++++-- .../tests/test_urllib_integration.py | 46 +++++++++++++++++++ .../README.rst | 14 ++++++ .../instrumentation/urllib3/__init__.py | 34 +++++++++++++- .../tests/test_urllib3_integration.py | 46 +++++++++++++++++++ 8 files changed, 217 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce84cd8a67..8b3191c27b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#1690](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1690)) - Add metrics instrumentation for sqlalchemy ([#1645](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1645)) +- Add `excluded_urls` functionality to `urllib` and `urllib3` instrumentations + ([#1733](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1733)) ### Fixed diff --git a/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py b/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py index 809957f12f..e5bb24223c 100644 --- a/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py @@ -51,7 +51,7 @@ import functools import types from timeit import default_timer -from typing import Callable, Collection, Iterable, Optional +from typing import Callable, Collection, Optional from urllib.parse import urlparse from requests.models import PreparedRequest, Response @@ -77,6 +77,7 @@ from opentelemetry.trace.span import Span from opentelemetry.trace.status import Status from opentelemetry.util.http import ( + ExcludeList, get_excluded_urls, parse_excluded_urls, remove_url_credentials, @@ -96,7 +97,7 @@ def _instrument( duration_histogram: Histogram, request_hook: _RequestHookT = None, response_hook: _ResponseHookT = None, - excluded_urls: Iterable[str] = None, + excluded_urls: ExcludeList = None, ): """Enables tracing of all requests calls that go through :code:`requests.session.Session.request` (this includes diff --git a/instrumentation/opentelemetry-instrumentation-urllib/README.rst b/instrumentation/opentelemetry-instrumentation-urllib/README.rst index aa25fbf383..c78e092fb4 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/README.rst +++ b/instrumentation/opentelemetry-instrumentation-urllib/README.rst @@ -16,6 +16,46 @@ Installation pip install opentelemetry-instrumentation-urllib +Configuration +------------- + +Request/Response hooks +********************** + +The urllib instrumentation supports extending tracing behavior with the help of +request and response hooks. These are functions that are called back by the instrumentation +right after a Span is created for a request and right before the span is finished processing a response respectively. +The hooks can be configured as follows: + +.. code:: python + + # `request_obj` is an instance of urllib.request.Request + def request_hook(span, request_obj): + pass + + # `request_obj` is an instance of urllib.request.Request + # `response` is an instance of http.client.HTTPResponse + def response_hook(span, request_obj, response) + pass + + URLLibInstrumentor.instrument( + request_hook=request_hook, response_hook=response_hook) + ) + +Exclude lists +************* + +To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB_EXCLUDED_URLS`` +(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude. + +For example, + +:: + + export OTEL_PYTHON_URLLIB_EXCLUDED_URLS="client/.*/info,healthcheck" + +will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``. + References ---------- diff --git a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py index 19a3b1fa8b..091ccf99b1 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/src/opentelemetry/instrumentation/urllib/__init__.py @@ -56,6 +56,20 @@ def response_hook(span, request_obj, response) request_hook=request_hook, response_hook=response_hook) ) +Exclude lists +************* + +To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB_EXCLUDED_URLS`` +(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude. + +For example, + +:: + + export OTEL_PYTHON_URLLIB_EXCLUDED_URLS="client/.*/info,healthcheck" + +will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``. + API --- """ @@ -88,7 +102,14 @@ def response_hook(span, request_obj, response) from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace import Span, SpanKind, get_tracer from opentelemetry.trace.status import Status -from opentelemetry.util.http import remove_url_credentials +from opentelemetry.util.http import ( + ExcludeList, + get_excluded_urls, + parse_excluded_urls, + remove_url_credentials, +) + +_excluded_urls_from_env = get_excluded_urls("URLLIB") _RequestHookT = typing.Optional[typing.Callable[[Span, Request], None]] _ResponseHookT = typing.Optional[ @@ -112,10 +133,12 @@ def _instrument(self, **kwargs): ``tracer_provider``: a TracerProvider, defaults to global ``request_hook``: An optional callback invoked that is invoked right after a span is created. ``response_hook``: An optional callback which is invoked right before the span is finished processing a response + ``excluded_urls``: A string containing a comma-delimited + list of regexes used to exclude URLs from tracking """ tracer_provider = kwargs.get("tracer_provider") tracer = get_tracer(__name__, __version__, tracer_provider) - + excluded_urls = kwargs.get("excluded_urls") meter_provider = kwargs.get("meter_provider") meter = get_meter(__name__, __version__, meter_provider) @@ -126,6 +149,9 @@ def _instrument(self, **kwargs): histograms, request_hook=kwargs.get("request_hook"), response_hook=kwargs.get("response_hook"), + excluded_urls=_excluded_urls_from_env + if excluded_urls is None + else parse_excluded_urls(excluded_urls), ) def _uninstrument(self, **kwargs): @@ -143,6 +169,7 @@ def _instrument( histograms: Dict[str, Histogram], request_hook: _RequestHookT = None, response_hook: _ResponseHookT = None, + excluded_urls: ExcludeList = None, ): """Enables tracing of all requests calls that go through :code:`urllib.Client._make_request`""" @@ -174,8 +201,11 @@ def _instrumented_open_call( ) or context.get_value(_SUPPRESS_HTTP_INSTRUMENTATION_KEY): return call_wrapped() - method = request.get_method().upper() url = request.full_url + if excluded_urls and excluded_urls.url_disabled(url): + return call_wrapped() + + method = request.get_method().upper() span_name = f"HTTP {method}".strip() diff --git a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py index 61a986fff5..9937d42176 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py +++ b/instrumentation/opentelemetry-instrumentation-urllib/tests/test_urllib_integration.py @@ -38,6 +38,7 @@ from opentelemetry.test.mock_textmap import MockTextMapPropagator from opentelemetry.test.test_base import TestBase from opentelemetry.trace import StatusCode +from opentelemetry.util.http import get_excluded_urls # pylint: disable=too-many-public-methods @@ -52,6 +53,21 @@ class RequestsIntegrationTestBase(abc.ABC): # pylint: disable=invalid-name def setUp(self): super().setUp() + + self.env_patch = mock.patch.dict( + "os.environ", + { + "OTEL_PYTHON_URLLIB_EXCLUDED_URLS": "http://localhost/env_excluded_arg/123,env_excluded_noarg" + }, + ) + self.env_patch.start() + + self.exclude_patch = mock.patch( + "opentelemetry.instrumentation.urllib._excluded_urls_from_env", + get_excluded_urls("URLLIB"), + ) + self.exclude_patch.start() + URLLibInstrumentor().instrument() httpretty.enable() httpretty.register_uri(httpretty.GET, self.URL, body=b"Hello!") @@ -125,6 +141,36 @@ def test_basic(self): span, opentelemetry.instrumentation.urllib ) + def test_excluded_urls_explicit(self): + url_201 = "http://httpbin.org/status/201" + httpretty.register_uri( + httpretty.GET, + url_201, + status=201, + ) + + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument(excluded_urls=".*/201") + self.perform_request(self.URL) + self.perform_request(url_201) + + self.assert_span(num_spans=1) + + def test_excluded_urls_from_env(self): + url = "http://localhost/env_excluded_arg/123" + httpretty.register_uri( + httpretty.GET, + url, + status=200, + ) + + URLLibInstrumentor().uninstrument() + URLLibInstrumentor().instrument() + self.perform_request(self.URL) + self.perform_request(url) + + self.assert_span(num_spans=1) + def test_not_foundbasic(self): url_404 = "http://httpbin.org/status/404/" httpretty.register_uri( diff --git a/instrumentation/opentelemetry-instrumentation-urllib3/README.rst b/instrumentation/opentelemetry-instrumentation-urllib3/README.rst index 61817d631f..0c53c299a0 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib3/README.rst +++ b/instrumentation/opentelemetry-instrumentation-urllib3/README.rst @@ -42,6 +42,20 @@ The hooks can be configured as follows: request_hook=request_hook, response_hook=response_hook) ) +Exclude lists +************* + +To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB3_EXCLUDED_URLS`` +(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude. + +For example, + +:: + + export OTEL_PYTHON_URLLIB3_EXCLUDED_URLS="client/.*/info,healthcheck" + +will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``. + References ---------- diff --git a/instrumentation/opentelemetry-instrumentation-urllib3/src/opentelemetry/instrumentation/urllib3/__init__.py b/instrumentation/opentelemetry-instrumentation-urllib3/src/opentelemetry/instrumentation/urllib3/__init__.py index 6e0da3be36..91d5576fc0 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib3/src/opentelemetry/instrumentation/urllib3/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-urllib3/src/opentelemetry/instrumentation/urllib3/__init__.py @@ -60,6 +60,20 @@ def response_hook(span, request, response): request_hook=request_hook, response_hook=response_hook) ) +Exclude lists +************* + +To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_URLLIB3_EXCLUDED_URLS`` +(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude. + +For example, + +:: + + export OTEL_PYTHON_URLLIB3_EXCLUDED_URLS="client/.*/info,healthcheck" + +will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``. + API --- """ @@ -92,8 +106,15 @@ def response_hook(span, request, response): from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace import Span, SpanKind, Tracer, get_tracer from opentelemetry.trace.status import Status +from opentelemetry.util.http import ( + ExcludeList, + get_excluded_urls, + parse_excluded_urls, +) from opentelemetry.util.http.httplib import set_ip_on_next_http_connection +_excluded_urls_from_env = get_excluded_urls("URLLIB3") + _UrlFilterT = typing.Optional[typing.Callable[[str], str]] _RequestHookT = typing.Optional[ typing.Callable[ @@ -138,10 +159,14 @@ def _instrument(self, **kwargs): ``response_hook``: An optional callback which is invoked right before the span is finished processing a response. ``url_filter``: A callback to process the requested URL prior to adding it as a span attribute. + ``excluded_urls``: A string containing a comma-delimited + list of regexes used to exclude URLs from tracking """ tracer_provider = kwargs.get("tracer_provider") tracer = get_tracer(__name__, __version__, tracer_provider) + excluded_urls = kwargs.get("excluded_urls") + meter_provider = kwargs.get("meter_provider") meter = get_meter(__name__, __version__, meter_provider) @@ -169,6 +194,9 @@ def _instrument(self, **kwargs): request_hook=kwargs.get("request_hook"), response_hook=kwargs.get("response_hook"), url_filter=kwargs.get("url_filter"), + excluded_urls=_excluded_urls_from_env + if excluded_urls is None + else parse_excluded_urls(excluded_urls), ) def _uninstrument(self, **kwargs): @@ -183,13 +211,17 @@ def _instrument( request_hook: _RequestHookT = None, response_hook: _ResponseHookT = None, url_filter: _UrlFilterT = None, + excluded_urls: ExcludeList = None, ): def instrumented_urlopen(wrapped, instance, args, kwargs): if _is_instrumentation_suppressed(): return wrapped(*args, **kwargs) - method = _get_url_open_arg("method", args, kwargs).upper() url = _get_url(instance, args, kwargs, url_filter) + if excluded_urls and excluded_urls.url_disabled(url): + return wrapped(*args, **kwargs) + + method = _get_url_open_arg("method", args, kwargs).upper() headers = _prepare_headers(kwargs) body = _get_url_open_arg("body", args, kwargs) diff --git a/instrumentation/opentelemetry-instrumentation-urllib3/tests/test_urllib3_integration.py b/instrumentation/opentelemetry-instrumentation-urllib3/tests/test_urllib3_integration.py index be947cc84f..ae59d57c51 100644 --- a/instrumentation/opentelemetry-instrumentation-urllib3/tests/test_urllib3_integration.py +++ b/instrumentation/opentelemetry-instrumentation-urllib3/tests/test_urllib3_integration.py @@ -29,6 +29,7 @@ from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.test.mock_textmap import MockTextMapPropagator from opentelemetry.test.test_base import TestBase +from opentelemetry.util.http import get_excluded_urls # pylint: disable=too-many-public-methods @@ -39,6 +40,21 @@ class TestURLLib3Instrumentor(TestBase): def setUp(self): super().setUp() + + self.env_patch = mock.patch.dict( + "os.environ", + { + "OTEL_PYTHON_URLLIB3_EXCLUDED_URLS": "http://localhost/env_excluded_arg/123,env_excluded_noarg" + }, + ) + self.env_patch.start() + + self.exclude_patch = mock.patch( + "opentelemetry.instrumentation.urllib3._excluded_urls_from_env", + get_excluded_urls("URLLIB3"), + ) + self.exclude_patch.start() + URLLib3Instrumentor().instrument() httpretty.enable(allow_net_connect=False) @@ -158,6 +174,36 @@ def test_url_open_explicit_arg_parameters(self): self.assert_success_span(response, url) + def test_excluded_urls_explicit(self): + url_201 = "http://httpbin.org/status/201" + httpretty.register_uri( + httpretty.GET, + url_201, + status=201, + ) + + URLLib3Instrumentor().uninstrument() + URLLib3Instrumentor().instrument(excluded_urls=".*/201") + self.perform_request(self.HTTP_URL) + self.perform_request(url_201) + + self.assert_span(num_spans=1) + + def test_excluded_urls_from_env(self): + url = "http://localhost/env_excluded_arg/123" + httpretty.register_uri( + httpretty.GET, + url, + status=200, + ) + + URLLib3Instrumentor().uninstrument() + URLLib3Instrumentor().instrument() + self.perform_request(self.HTTP_URL) + self.perform_request(url) + + self.assert_span(num_spans=1) + def test_uninstrument(self): URLLib3Instrumentor().uninstrument()